@os-eco/overstory-cli 0.6.9 → 0.6.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -5
- package/agents/builder.md +6 -15
- package/agents/lead.md +4 -6
- package/agents/merger.md +5 -13
- package/agents/reviewer.md +2 -9
- package/package.json +1 -1
- package/src/agents/hooks-deployer.test.ts +52 -0
- package/src/agents/hooks-deployer.ts +22 -7
- package/src/agents/overlay.test.ts +156 -1
- package/src/agents/overlay.ts +67 -7
- package/src/commands/completions.test.ts +8 -20
- package/src/commands/completions.ts +4 -2
- package/src/commands/doctor.ts +97 -48
- package/src/commands/ecosystem.ts +291 -0
- package/src/commands/feed.ts +2 -2
- package/src/commands/sling.ts +1 -1
- package/src/commands/upgrade.test.ts +46 -0
- package/src/commands/upgrade.ts +259 -0
- package/src/doctor/databases.test.ts +38 -0
- package/src/doctor/databases.ts +7 -10
- package/src/doctor/ecosystem.test.ts +307 -0
- package/src/doctor/ecosystem.ts +155 -0
- package/src/doctor/merge-queue.test.ts +98 -0
- package/src/doctor/merge-queue.ts +23 -0
- package/src/doctor/structure.test.ts +130 -1
- package/src/doctor/structure.ts +87 -1
- package/src/doctor/types.ts +5 -2
- package/src/index.ts +23 -1
package/README.md
CHANGED
|
@@ -216,6 +216,15 @@ ov clean Clean up worktrees, sessions, artifacts
|
|
|
216
216
|
ov doctor Run health checks on overstory setup
|
|
217
217
|
--json JSON output
|
|
218
218
|
--category <name> Run a specific check category only
|
|
219
|
+
--fix Auto-fix fixable issues
|
|
220
|
+
|
|
221
|
+
ov ecosystem Show os-eco tool versions and health
|
|
222
|
+
--json JSON output
|
|
223
|
+
|
|
224
|
+
ov upgrade Upgrade overstory to latest npm version
|
|
225
|
+
--check Compare versions without installing
|
|
226
|
+
--all Upgrade all 4 ecosystem tools
|
|
227
|
+
--json JSON output
|
|
219
228
|
|
|
220
229
|
ov inspect <agent> Deep per-agent inspection
|
|
221
230
|
--json JSON output
|
|
@@ -266,6 +275,7 @@ ov metrics Show session metrics
|
|
|
266
275
|
|
|
267
276
|
Global Flags:
|
|
268
277
|
--quiet, -q Suppress non-error output
|
|
278
|
+
--timing Print command execution time to stderr
|
|
269
279
|
--completions <shell> Generate shell completions (bash, zsh, fish)
|
|
270
280
|
```
|
|
271
281
|
|
|
@@ -275,13 +285,13 @@ Global Flags:
|
|
|
275
285
|
- **Dependencies**: Minimal runtime — `chalk` (color output), `commander` (CLI framework), core I/O via Bun built-in APIs
|
|
276
286
|
- **Database**: SQLite via `bun:sqlite` (WAL mode for concurrent access)
|
|
277
287
|
- **Linting**: Biome (formatter + linter)
|
|
278
|
-
- **Testing**: `bun test` (
|
|
288
|
+
- **Testing**: `bun test` (2241 tests across 79 files, colocated with source)
|
|
279
289
|
- **External CLIs**: `bd` (beads) or `sd` (seeds), `mulch`, `git`, `tmux` — invoked as subprocesses
|
|
280
290
|
|
|
281
291
|
## Development
|
|
282
292
|
|
|
283
293
|
```bash
|
|
284
|
-
# Run tests (
|
|
294
|
+
# Run tests (2241 tests across 79 files)
|
|
285
295
|
bun test
|
|
286
296
|
|
|
287
297
|
# Run a single test
|
|
@@ -322,7 +332,7 @@ overstory/
|
|
|
322
332
|
config.ts Config loader + validation
|
|
323
333
|
errors.ts Custom error types
|
|
324
334
|
json.ts Standardized JSON envelope helpers
|
|
325
|
-
commands/ One file per CLI subcommand (
|
|
335
|
+
commands/ One file per CLI subcommand (32 commands)
|
|
326
336
|
agents.ts Agent discovery and querying
|
|
327
337
|
coordinator.ts Persistent orchestrator lifecycle
|
|
328
338
|
supervisor.ts Team lead management
|
|
@@ -345,7 +355,7 @@ overstory/
|
|
|
345
355
|
run.ts Orchestration run lifecycle
|
|
346
356
|
trace.ts Agent/bead timeline viewing
|
|
347
357
|
clean.ts Worktree/session cleanup
|
|
348
|
-
doctor.ts Health check runner (
|
|
358
|
+
doctor.ts Health check runner (10 check modules)
|
|
349
359
|
inspect.ts Deep per-agent inspection
|
|
350
360
|
spec.ts Task spec management
|
|
351
361
|
errors.ts Aggregated error view
|
|
@@ -353,6 +363,8 @@ overstory/
|
|
|
353
363
|
stop.ts Agent termination
|
|
354
364
|
costs.ts Token/cost analysis
|
|
355
365
|
metrics.ts Session metrics
|
|
366
|
+
ecosystem.ts os-eco tool dashboard
|
|
367
|
+
upgrade.ts npm version upgrades
|
|
356
368
|
completions.ts Shell completion generation (bash/zsh/fish)
|
|
357
369
|
agents/ Agent lifecycle management
|
|
358
370
|
manifest.ts Agent registry (load + query)
|
|
@@ -367,7 +379,7 @@ overstory/
|
|
|
367
379
|
watchdog/ Tiered health monitoring (daemon, triage, health)
|
|
368
380
|
logging/ Multi-format logger + sanitizer + reporter + color control
|
|
369
381
|
metrics/ SQLite metrics + transcript parsing
|
|
370
|
-
doctor/ Health check modules (
|
|
382
|
+
doctor/ Health check modules (10 checks)
|
|
371
383
|
insights/ Session insight analyzer for auto-expertise
|
|
372
384
|
tracker/ Pluggable task tracker (beads + seeds backends)
|
|
373
385
|
mulch/ mulch CLI wrapper
|
package/agents/builder.md
CHANGED
|
@@ -14,8 +14,8 @@ These are named failures. If you catch yourself doing any of these, stop and cor
|
|
|
14
14
|
- **FILE_SCOPE_VIOLATION** -- Editing or writing to a file not listed in your FILE_SCOPE. Read any file for context, but only modify scoped files.
|
|
15
15
|
- **CANONICAL_BRANCH_WRITE** -- Committing to or pushing to main/develop/canonical branch. You commit to your worktree branch only.
|
|
16
16
|
- **SILENT_FAILURE** -- Encountering an error (test failure, lint failure, blocked dependency) and not reporting it via mail. Every error must be communicated to your parent with `--type error`.
|
|
17
|
-
- **INCOMPLETE_CLOSE** -- Running `{{TRACKER_CLI}} close` without first passing quality gates (
|
|
18
|
-
- **MISSING_WORKER_DONE** -- Closing a
|
|
17
|
+
- **INCOMPLETE_CLOSE** -- Running `{{TRACKER_CLI}} close` without first passing quality gates ({{QUALITY_GATE_INLINE}}) and sending a result mail to your parent.
|
|
18
|
+
- **MISSING_WORKER_DONE** -- Closing a {{TRACKER_NAME}} issue without first sending `worker_done` mail to parent. The supervisor relies on this signal to verify branches and initiate the merge pipeline.
|
|
19
19
|
- **MISSING_MULCH_RECORD** -- Closing without recording mulch learnings. Every implementation session produces insights (conventions discovered, patterns applied, failures encountered). Skipping `ml record` loses knowledge for future agents.
|
|
20
20
|
|
|
21
21
|
## overlay
|
|
@@ -29,7 +29,7 @@ Your task-specific context (task ID, file scope, spec path, branch name, parent
|
|
|
29
29
|
- **Never push to the canonical branch** (main/develop). You commit to your worktree branch only. Merging is handled by the orchestrator or a merger agent.
|
|
30
30
|
- **Never run `git push`** -- your branch lives in the local worktree. The merge process handles integration.
|
|
31
31
|
- **Never spawn sub-workers.** You are a leaf node. If you need something decomposed, ask your parent via mail.
|
|
32
|
-
- **Run quality gates before closing.** Do not report completion unless
|
|
32
|
+
- **Run quality gates before closing.** Do not report completion unless {{QUALITY_GATE_INLINE}} pass.
|
|
33
33
|
- If tests fail, fix them. If you cannot fix them, report the failure via mail with `--type error`.
|
|
34
34
|
|
|
35
35
|
## communication-protocol
|
|
@@ -49,9 +49,7 @@ Your task-specific context (task ID, file scope, spec path, branch name, parent
|
|
|
49
49
|
|
|
50
50
|
## completion-protocol
|
|
51
51
|
|
|
52
|
-
|
|
53
|
-
2. Run `bun run lint` -- lint and formatting must be clean.
|
|
54
|
-
3. Run `bun run typecheck` -- no TypeScript errors.
|
|
52
|
+
{{QUALITY_GATE_STEPS}}
|
|
55
53
|
4. Commit your scoped files to your worktree branch: `git add <files> && git commit -m "<summary>"`.
|
|
56
54
|
5. **Record mulch learnings** -- review your work for insights worth preserving (conventions discovered, patterns applied, failures encountered, decisions made) and record them with outcome data:
|
|
57
55
|
```bash
|
|
@@ -88,10 +86,7 @@ You are an implementation specialist. Given a spec and a set of files you own, y
|
|
|
88
86
|
- **Grep** -- search file contents with regex
|
|
89
87
|
- **Bash:**
|
|
90
88
|
- `git add`, `git commit`, `git diff`, `git log`, `git status`
|
|
91
|
-
|
|
92
|
-
- `bun run lint` (lint and format check via biome)
|
|
93
|
-
- `bun run biome check --write` (auto-fix lint/format issues)
|
|
94
|
-
- `bun run typecheck` (type checking via tsc)
|
|
89
|
+
{{QUALITY_GATE_CAPABILITIES}}
|
|
95
90
|
- `{{TRACKER_CLI}} show`, `{{TRACKER_CLI}} close` ({{TRACKER_NAME}} task management)
|
|
96
91
|
- `ml prime`, `ml record`, `ml query` (expertise)
|
|
97
92
|
- `ov mail send`, `ov mail check` (communication)
|
|
@@ -116,11 +111,7 @@ You are an implementation specialist. Given a spec and a set of files you own, y
|
|
|
116
111
|
- Follow project conventions (check existing code for patterns).
|
|
117
112
|
- Write tests alongside implementation.
|
|
118
113
|
5. **Run quality gates:**
|
|
119
|
-
|
|
120
|
-
bun test # All tests must pass
|
|
121
|
-
bun run lint # Lint and format must be clean
|
|
122
|
-
bun run typecheck # No TypeScript errors
|
|
123
|
-
```
|
|
114
|
+
{{QUALITY_GATE_BASH}}
|
|
124
115
|
6. **Commit your work** to your worktree branch:
|
|
125
116
|
```bash
|
|
126
117
|
git add <your-scoped-files>
|
package/agents/lead.md
CHANGED
|
@@ -74,9 +74,7 @@ You are primarily a coordinator, but you can also be a doer for simple tasks. Yo
|
|
|
74
74
|
- **Grep** -- search file contents with regex
|
|
75
75
|
- **Bash:**
|
|
76
76
|
- `git add`, `git commit`, `git diff`, `git log`, `git status`
|
|
77
|
-
|
|
78
|
-
- `bun run lint` (lint check)
|
|
79
|
-
- `bun run typecheck` (type checking)
|
|
77
|
+
{{QUALITY_GATE_CAPABILITIES}}
|
|
80
78
|
- `{{TRACKER_CLI}} create`, `{{TRACKER_CLI}} show`, `{{TRACKER_CLI}} ready`, `{{TRACKER_CLI}} close`, `{{TRACKER_CLI}} update` (full {{TRACKER_NAME}} management)
|
|
81
79
|
- `{{TRACKER_CLI}} sync` (sync {{TRACKER_NAME}} with git)
|
|
82
80
|
- `ml prime`, `ml record`, `ml query`, `ml search` (expertise)
|
|
@@ -230,7 +228,7 @@ Review is a quality investment. For complex, multi-file changes, spawn a reviewe
|
|
|
230
228
|
**Self-verification (simple/moderate tasks):**
|
|
231
229
|
1. Read the builder's diff: `git diff main..<builder-branch>`
|
|
232
230
|
2. Check the diff matches the spec
|
|
233
|
-
3. Run quality gates:
|
|
231
|
+
3. Run quality gates: {{QUALITY_GATE_INLINE}}
|
|
234
232
|
4. If everything passes, send merge_ready directly
|
|
235
233
|
|
|
236
234
|
**Reviewer verification (complex tasks):**
|
|
@@ -250,7 +248,7 @@ Review is a quality investment. For complex, multi-file changes, spawn a reviewe
|
|
|
250
248
|
--body "Review the changes on branch <builder-branch>. Spec: .overstory/specs/<builder-bead-id>.md. Run quality gates and report PASS or FAIL." \
|
|
251
249
|
--type dispatch
|
|
252
250
|
```
|
|
253
|
-
The reviewer validates against the builder's spec and runs quality gates (
|
|
251
|
+
The reviewer validates against the builder's spec and runs the project's quality gates ({{QUALITY_GATE_INLINE}}).
|
|
254
252
|
13. **Handle review results:**
|
|
255
253
|
- **PASS:** Either the reviewer sends a `result` mail with "PASS" in the subject, or self-verification confirms the diff matches the spec and quality gates pass. Immediately signal `merge_ready` for that builder's branch -- do not wait for other builders to finish:
|
|
256
254
|
```bash
|
|
@@ -286,7 +284,7 @@ Good decomposition follows these principles:
|
|
|
286
284
|
|
|
287
285
|
1. **Verify review coverage:** For each builder, confirm either (a) a reviewer PASS was received, or (b) you self-verified by reading the diff and confirming quality gates pass.
|
|
288
286
|
2. Verify all subtask {{TRACKER_NAME}} issues are closed AND each builder's `merge_ready` has been sent (check via `{{TRACKER_CLI}} show <id>` for each).
|
|
289
|
-
3. Run integration tests if applicable:
|
|
287
|
+
3. Run integration tests if applicable: {{QUALITY_GATE_INLINE}}.
|
|
290
288
|
4. **Record mulch learnings** -- review your orchestration work for insights (decomposition strategies, worker coordination patterns, failures encountered, decisions made) and record them:
|
|
291
289
|
```bash
|
|
292
290
|
ml record <domain> --type <convention|pattern|failure|decision> --description "..."
|
package/agents/merger.md
CHANGED
|
@@ -11,7 +11,7 @@ Every mail message and every tool call costs tokens. Be concise in communication
|
|
|
11
11
|
These are named failures. If you catch yourself doing any of these, stop and correct immediately.
|
|
12
12
|
|
|
13
13
|
- **TIER_SKIP** -- Jumping to a higher resolution tier without first attempting the lower tiers. Always start at Tier 1 and escalate only on failure.
|
|
14
|
-
- **UNVERIFIED_MERGE** -- Completing a merge without running
|
|
14
|
+
- **UNVERIFIED_MERGE** -- Completing a merge without running {{QUALITY_GATE_INLINE}} to verify the result. A merge that breaks tests is not complete.
|
|
15
15
|
- **SCOPE_CREEP** -- Modifying code beyond what is needed for conflict resolution. Your job is to merge, not refactor or improve.
|
|
16
16
|
- **SILENT_FAILURE** -- A merge fails at all tiers and you do not report it via mail. Every unresolvable conflict must be escalated to your parent with `--type error --priority urgent`.
|
|
17
17
|
- **INCOMPLETE_CLOSE** -- Running `{{TRACKER_CLI}} close` without first verifying tests pass and sending a merge report mail to your parent.
|
|
@@ -28,7 +28,7 @@ Your task-specific context (task ID, branches to merge, target branch, merge ord
|
|
|
28
28
|
- **Never push to the canonical branch** (main/develop). You commit to your worktree branch only. Merging is handled by the orchestrator or a merger agent.
|
|
29
29
|
- **Never run `git push`** -- your branch lives in the local worktree. The merge process handles integration.
|
|
30
30
|
- **Never spawn sub-workers.** You are a leaf node. If you need something decomposed, ask your parent via mail.
|
|
31
|
-
- **Run quality gates before closing.** Do not report completion unless
|
|
31
|
+
- **Run quality gates before closing.** Do not report completion unless {{QUALITY_GATE_INLINE}} pass.
|
|
32
32
|
- If tests fail, fix them. If you cannot fix them, report the failure via mail with `--type error`.
|
|
33
33
|
|
|
34
34
|
## communication-protocol
|
|
@@ -48,9 +48,7 @@ Your task-specific context (task ID, branches to merge, target branch, merge ord
|
|
|
48
48
|
|
|
49
49
|
## completion-protocol
|
|
50
50
|
|
|
51
|
-
|
|
52
|
-
2. Run `bun run lint` -- lint must be clean after merge.
|
|
53
|
-
3. Run `bun run typecheck` -- no TypeScript errors after merge.
|
|
51
|
+
{{QUALITY_GATE_STEPS}}
|
|
54
52
|
4. **Record mulch learnings** -- capture merge resolution insights (conflict patterns, resolution strategies, branch integration issues):
|
|
55
53
|
```bash
|
|
56
54
|
ml record <domain> --type <convention|pattern|failure> --description "..."
|
|
@@ -80,9 +78,7 @@ You are a branch integration specialist. When workers complete their tasks on se
|
|
|
80
78
|
- `git merge`, `git merge --abort`, `git merge --no-edit`
|
|
81
79
|
- `git log`, `git diff`, `git show`, `git status`, `git blame`
|
|
82
80
|
- `git checkout`, `git branch`
|
|
83
|
-
|
|
84
|
-
- `bun run lint` (verify merged code passes lint)
|
|
85
|
-
- `bun run typecheck` (verify no TypeScript errors)
|
|
81
|
+
{{QUALITY_GATE_CAPABILITIES}}
|
|
86
82
|
- `{{TRACKER_CLI}} show`, `{{TRACKER_CLI}} close` ({{TRACKER_NAME}} task management)
|
|
87
83
|
- `ml prime`, `ml query` (load expertise for conflict understanding)
|
|
88
84
|
- `ov merge` (use overstory merge infrastructure)
|
|
@@ -135,11 +131,7 @@ If AI-resolve fails or produces broken code:
|
|
|
135
131
|
- This is a last resort -- report that reimagine was needed.
|
|
136
132
|
|
|
137
133
|
5. **Verify the merge:**
|
|
138
|
-
|
|
139
|
-
bun test # All tests must pass after merge
|
|
140
|
-
bun run lint # Lint must be clean after merge
|
|
141
|
-
bun run typecheck # No TypeScript errors after merge
|
|
142
|
-
```
|
|
134
|
+
{{QUALITY_GATE_BASH}}
|
|
143
135
|
6. **Report the result:**
|
|
144
136
|
```bash
|
|
145
137
|
{{TRACKER_CLI}} close <task-id> --reason "Merged <branch>: <tier used>, tests passing"
|
package/agents/reviewer.md
CHANGED
|
@@ -75,10 +75,7 @@ You are a validation specialist. Given code to review, you check it for correctn
|
|
|
75
75
|
- **Glob** -- find files by name pattern
|
|
76
76
|
- **Grep** -- search file contents with regex
|
|
77
77
|
- **Bash** (observation and test commands only):
|
|
78
|
-
|
|
79
|
-
- `bun test <specific-file>` (run targeted tests)
|
|
80
|
-
- `bun run lint` (lint and format check)
|
|
81
|
-
- `bun run typecheck` (type checking)
|
|
78
|
+
{{QUALITY_GATE_CAPABILITIES}}
|
|
82
79
|
- `git log`, `git diff`, `git show`, `git blame`
|
|
83
80
|
- `git diff <base-branch>...<feature-branch>` (review changes)
|
|
84
81
|
- `{{TRACKER_CLI}} show`, `{{TRACKER_CLI}} ready` (read {{TRACKER_NAME}} state)
|
|
@@ -107,11 +104,7 @@ You are a validation specialist. Given code to review, you check it for correctn
|
|
|
107
104
|
- Check for: security issues, hardcoded secrets, missing input validation.
|
|
108
105
|
- Check for: adequate test coverage, meaningful test assertions.
|
|
109
106
|
5. **Run quality gates:**
|
|
110
|
-
|
|
111
|
-
bun test # Do all tests pass?
|
|
112
|
-
bun run lint # Does lint and formatting pass?
|
|
113
|
-
bun run typecheck # Are there any TypeScript errors?
|
|
114
|
-
```
|
|
107
|
+
{{QUALITY_GATE_BASH}}
|
|
115
108
|
6. **Report results** via `{{TRACKER_CLI}} close` with a clear pass/fail summary:
|
|
116
109
|
```bash
|
|
117
110
|
{{TRACKER_CLI}} close <task-id> --reason "PASS: <summary>"
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@os-eco/overstory-cli",
|
|
3
|
-
"version": "0.6.
|
|
3
|
+
"version": "0.6.10",
|
|
4
4
|
"description": "Multi-agent orchestration for Claude Code — spawn worker agents in git worktrees via tmux, coordinate through SQLite mail, merge with tiered conflict resolution",
|
|
5
5
|
"author": "Jaymin West",
|
|
6
6
|
"license": "MIT",
|
|
@@ -9,6 +9,7 @@ import {
|
|
|
9
9
|
buildPathBoundaryGuardScript,
|
|
10
10
|
deployHooks,
|
|
11
11
|
escapeForSingleQuotedShell,
|
|
12
|
+
extractQualityGatePrefixes,
|
|
12
13
|
getBashPathBoundaryGuards,
|
|
13
14
|
getCapabilityGuards,
|
|
14
15
|
getDangerGuards,
|
|
@@ -1234,6 +1235,49 @@ describe("getDangerGuards", () => {
|
|
|
1234
1235
|
);
|
|
1235
1236
|
}
|
|
1236
1237
|
});
|
|
1238
|
+
|
|
1239
|
+
test("custom quality gates appear in safe prefix list for non-implementation capabilities", () => {
|
|
1240
|
+
const guards = getCapabilityGuards("scout", [
|
|
1241
|
+
{ name: "Test", command: "pytest", description: "all tests pass" },
|
|
1242
|
+
{ name: "Lint", command: "ruff check .", description: "no lint errors" },
|
|
1243
|
+
]);
|
|
1244
|
+
// Find the Bash guard for file modifications (last Bash entry for non-implementation)
|
|
1245
|
+
const bashGuards = guards.filter((g) => g.matcher === "Bash");
|
|
1246
|
+
const fileGuard = bashGuards.find((g) =>
|
|
1247
|
+
g.hooks.some((h) => h.command.includes("cannot modify files")),
|
|
1248
|
+
);
|
|
1249
|
+
expect(fileGuard).toBeDefined();
|
|
1250
|
+
const command = fileGuard?.hooks[0]?.command ?? "";
|
|
1251
|
+
expect(command).toContain("pytest");
|
|
1252
|
+
expect(command).toContain("ruff check .");
|
|
1253
|
+
// Should NOT contain default bun commands
|
|
1254
|
+
expect(command).not.toContain("bun test");
|
|
1255
|
+
});
|
|
1256
|
+
});
|
|
1257
|
+
|
|
1258
|
+
describe("extractQualityGatePrefixes", () => {
|
|
1259
|
+
test("extracts command from each gate", () => {
|
|
1260
|
+
const gates = [
|
|
1261
|
+
{ name: "Test", command: "bun test", description: "all tests pass" },
|
|
1262
|
+
{ name: "Lint", command: "bun run lint", description: "zero errors" },
|
|
1263
|
+
];
|
|
1264
|
+
const prefixes = extractQualityGatePrefixes(gates);
|
|
1265
|
+
expect(prefixes).toEqual(["bun test", "bun run lint"]);
|
|
1266
|
+
});
|
|
1267
|
+
|
|
1268
|
+
test("returns empty array for empty gates", () => {
|
|
1269
|
+
expect(extractQualityGatePrefixes([])).toEqual([]);
|
|
1270
|
+
});
|
|
1271
|
+
|
|
1272
|
+
test("works with non-bun quality gates", () => {
|
|
1273
|
+
const gates = [
|
|
1274
|
+
{ name: "Test", command: "pytest", description: "all tests pass" },
|
|
1275
|
+
{ name: "Lint", command: "ruff check .", description: "no lint errors" },
|
|
1276
|
+
{ name: "Type", command: "mypy src/", description: "type check" },
|
|
1277
|
+
];
|
|
1278
|
+
const prefixes = extractQualityGatePrefixes(gates);
|
|
1279
|
+
expect(prefixes).toEqual(["pytest", "ruff check .", "mypy src/"]);
|
|
1280
|
+
});
|
|
1237
1281
|
});
|
|
1238
1282
|
|
|
1239
1283
|
describe("buildBashFileGuardScript", () => {
|
|
@@ -1261,6 +1305,14 @@ describe("buildBashFileGuardScript", () => {
|
|
|
1261
1305
|
expect(script).toContain("git log");
|
|
1262
1306
|
expect(script).toContain("git diff");
|
|
1263
1307
|
expect(script).toContain("mulch ");
|
|
1308
|
+
// Quality gate commands (bun test, bun run lint, etc.) are no longer
|
|
1309
|
+
// hardcoded in SAFE_BASH_PREFIXES — they come from config via
|
|
1310
|
+
// extractQualityGatePrefixes() and are passed as extraSafePrefixes
|
|
1311
|
+
// through getCapabilityGuards().
|
|
1312
|
+
});
|
|
1313
|
+
|
|
1314
|
+
test("includes quality gate prefixes when passed as extraSafePrefixes", () => {
|
|
1315
|
+
const script = buildBashFileGuardScript("scout", ["bun test", "bun run lint"]);
|
|
1264
1316
|
expect(script).toContain("bun test");
|
|
1265
1317
|
expect(script).toContain("bun run lint");
|
|
1266
1318
|
});
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import { mkdir } from "node:fs/promises";
|
|
2
2
|
import { dirname, join } from "node:path";
|
|
3
|
+
import { DEFAULT_QUALITY_GATES } from "../config.ts";
|
|
3
4
|
import { AgentError } from "../errors.ts";
|
|
5
|
+
import type { QualityGate } from "../types.ts";
|
|
4
6
|
|
|
5
7
|
/**
|
|
6
8
|
* Capabilities that must never modify project files.
|
|
@@ -117,12 +119,20 @@ const SAFE_BASH_PREFIXES = [
|
|
|
117
119
|
"git blame",
|
|
118
120
|
"git branch",
|
|
119
121
|
"mulch ",
|
|
120
|
-
"bun test",
|
|
121
|
-
"bun run lint",
|
|
122
|
-
"bun run typecheck",
|
|
123
|
-
"bun run biome",
|
|
124
122
|
];
|
|
125
123
|
|
|
124
|
+
/**
|
|
125
|
+
* Extract command prefixes from quality gate configurations.
|
|
126
|
+
*
|
|
127
|
+
* Each gate's command is used as a safe prefix so non-implementation agents
|
|
128
|
+
* can still run quality gate commands (e.g., reviewers running tests).
|
|
129
|
+
* This makes the safe prefix list configurable instead of hardcoding
|
|
130
|
+
* specific tool commands like "bun test".
|
|
131
|
+
*/
|
|
132
|
+
export function extractQualityGatePrefixes(gates: QualityGate[]): string[] {
|
|
133
|
+
return gates.map((g) => g.command);
|
|
134
|
+
}
|
|
135
|
+
|
|
126
136
|
/** Hook entry shape matching Claude Code's settings.local.json format. */
|
|
127
137
|
interface HookEntry {
|
|
128
138
|
matcher: string;
|
|
@@ -470,8 +480,10 @@ export function getBashPathBoundaryGuards(): HookEntry[] {
|
|
|
470
480
|
*
|
|
471
481
|
* Note: All capabilities also receive Bash danger guards via getDangerGuards().
|
|
472
482
|
*/
|
|
473
|
-
export function getCapabilityGuards(capability: string): HookEntry[] {
|
|
483
|
+
export function getCapabilityGuards(capability: string, qualityGates?: QualityGate[]): HookEntry[] {
|
|
474
484
|
const guards: HookEntry[] = [];
|
|
485
|
+
const gates = qualityGates ?? DEFAULT_QUALITY_GATES;
|
|
486
|
+
const gatePrefixes = extractQualityGatePrefixes(gates);
|
|
475
487
|
|
|
476
488
|
// Block Claude Code native team/task tools for ALL overstory agents.
|
|
477
489
|
// Agents must use `overstory sling` for delegation, not native Task/Team tools.
|
|
@@ -501,7 +513,9 @@ export function getCapabilityGuards(capability: string): HookEntry[] {
|
|
|
501
513
|
guards.push(...toolGuards);
|
|
502
514
|
|
|
503
515
|
// Coordination capabilities get git add/commit whitelisted for beads/mulch sync
|
|
504
|
-
const extraSafe = COORDINATION_CAPABILITIES.has(capability)
|
|
516
|
+
const extraSafe = COORDINATION_CAPABILITIES.has(capability)
|
|
517
|
+
? [...COORDINATION_SAFE_PREFIXES, ...gatePrefixes]
|
|
518
|
+
: gatePrefixes;
|
|
505
519
|
const bashFileGuard: HookEntry = {
|
|
506
520
|
matcher: "Bash",
|
|
507
521
|
hooks: [
|
|
@@ -560,6 +574,7 @@ export async function deployHooks(
|
|
|
560
574
|
worktreePath: string,
|
|
561
575
|
agentName: string,
|
|
562
576
|
capability = "builder",
|
|
577
|
+
qualityGates?: QualityGate[],
|
|
563
578
|
): Promise<void> {
|
|
564
579
|
const templatePath = getTemplatePath();
|
|
565
580
|
const file = Bun.file(templatePath);
|
|
@@ -606,7 +621,7 @@ export async function deployHooks(
|
|
|
606
621
|
// and do not require PATH extension.
|
|
607
622
|
const pathGuards = getPathBoundaryGuards();
|
|
608
623
|
const dangerGuards = getDangerGuards(agentName);
|
|
609
|
-
const capabilityGuards = getCapabilityGuards(capability);
|
|
624
|
+
const capabilityGuards = getCapabilityGuards(capability, qualityGates);
|
|
610
625
|
const allGuards = [...pathGuards, ...dangerGuards, ...capabilityGuards];
|
|
611
626
|
|
|
612
627
|
if (allGuards.length > 0) {
|
|
@@ -4,7 +4,15 @@ import { tmpdir } from "node:os";
|
|
|
4
4
|
import { join } from "node:path";
|
|
5
5
|
import { AgentError } from "../errors.ts";
|
|
6
6
|
import type { OverlayConfig, QualityGate } from "../types.ts";
|
|
7
|
-
import {
|
|
7
|
+
import {
|
|
8
|
+
formatQualityGatesBash,
|
|
9
|
+
formatQualityGatesCapabilities,
|
|
10
|
+
formatQualityGatesInline,
|
|
11
|
+
formatQualityGatesSteps,
|
|
12
|
+
generateOverlay,
|
|
13
|
+
isCanonicalRoot,
|
|
14
|
+
writeOverlay,
|
|
15
|
+
} from "./overlay.ts";
|
|
8
16
|
|
|
9
17
|
const SAMPLE_BASE_DEFINITION = `# Builder Agent
|
|
10
18
|
|
|
@@ -674,3 +682,150 @@ describe("isCanonicalRoot", () => {
|
|
|
674
682
|
expect(isCanonicalRoot(worktreePath, canonicalRoot)).toBe(false);
|
|
675
683
|
});
|
|
676
684
|
});
|
|
685
|
+
|
|
686
|
+
describe("formatQualityGatesInline", () => {
|
|
687
|
+
test("formats default gates as inline backtick list", () => {
|
|
688
|
+
const result = formatQualityGatesInline(undefined);
|
|
689
|
+
expect(result).toBe("`bun test`, `bun run lint`, `bun run typecheck`");
|
|
690
|
+
});
|
|
691
|
+
|
|
692
|
+
test("formats custom gates as inline backtick list", () => {
|
|
693
|
+
const gates: QualityGate[] = [
|
|
694
|
+
{ name: "Test", command: "pytest", description: "all tests pass" },
|
|
695
|
+
{ name: "Lint", command: "ruff check .", description: "no lint errors" },
|
|
696
|
+
];
|
|
697
|
+
const result = formatQualityGatesInline(gates);
|
|
698
|
+
expect(result).toBe("`pytest`, `ruff check .`");
|
|
699
|
+
});
|
|
700
|
+
|
|
701
|
+
test("falls back to defaults for empty array", () => {
|
|
702
|
+
const result = formatQualityGatesInline([]);
|
|
703
|
+
expect(result).toContain("`bun test`");
|
|
704
|
+
});
|
|
705
|
+
});
|
|
706
|
+
|
|
707
|
+
describe("formatQualityGatesSteps", () => {
|
|
708
|
+
test("formats default gates as numbered steps", () => {
|
|
709
|
+
const result = formatQualityGatesSteps(undefined);
|
|
710
|
+
expect(result).toContain("1. Run `bun test`");
|
|
711
|
+
expect(result).toContain("2. Run `bun run lint`");
|
|
712
|
+
expect(result).toContain("3. Run `bun run typecheck`");
|
|
713
|
+
});
|
|
714
|
+
|
|
715
|
+
test("formats custom gates as numbered steps", () => {
|
|
716
|
+
const gates: QualityGate[] = [
|
|
717
|
+
{ name: "Build", command: "cargo build", description: "compilation succeeds" },
|
|
718
|
+
{ name: "Test", command: "cargo test", description: "all tests pass" },
|
|
719
|
+
];
|
|
720
|
+
const result = formatQualityGatesSteps(gates);
|
|
721
|
+
expect(result).toBe(
|
|
722
|
+
"1. Run `cargo build` -- compilation succeeds.\n2. Run `cargo test` -- all tests pass.",
|
|
723
|
+
);
|
|
724
|
+
});
|
|
725
|
+
});
|
|
726
|
+
|
|
727
|
+
describe("formatQualityGatesBash", () => {
|
|
728
|
+
test("formats as fenced bash block with aligned comments", () => {
|
|
729
|
+
const result = formatQualityGatesBash(undefined);
|
|
730
|
+
expect(result).toContain("```bash");
|
|
731
|
+
expect(result).toContain("bun test");
|
|
732
|
+
expect(result).toContain("bun run lint");
|
|
733
|
+
expect(result).toContain("bun run typecheck");
|
|
734
|
+
expect(result).toContain("```");
|
|
735
|
+
});
|
|
736
|
+
|
|
737
|
+
test("capitalizes first letter of description in comments", () => {
|
|
738
|
+
const gates: QualityGate[] = [
|
|
739
|
+
{ name: "Test", command: "pytest", description: "all tests pass" },
|
|
740
|
+
];
|
|
741
|
+
const result = formatQualityGatesBash(gates);
|
|
742
|
+
expect(result).toContain("# All tests pass");
|
|
743
|
+
});
|
|
744
|
+
|
|
745
|
+
test("custom gates produce correct bash block", () => {
|
|
746
|
+
const gates: QualityGate[] = [
|
|
747
|
+
{ name: "Test", command: "npm test", description: "tests pass" },
|
|
748
|
+
{ name: "Lint", command: "npm run lint", description: "lint clean" },
|
|
749
|
+
];
|
|
750
|
+
const result = formatQualityGatesBash(gates);
|
|
751
|
+
expect(result).toContain("npm test");
|
|
752
|
+
expect(result).toContain("npm run lint");
|
|
753
|
+
expect(result).not.toContain("bun");
|
|
754
|
+
});
|
|
755
|
+
});
|
|
756
|
+
|
|
757
|
+
describe("formatQualityGatesCapabilities", () => {
|
|
758
|
+
test("formats as indented bullet list", () => {
|
|
759
|
+
const result = formatQualityGatesCapabilities(undefined);
|
|
760
|
+
expect(result).toContain(" - `bun test`");
|
|
761
|
+
expect(result).toContain(" - `bun run lint`");
|
|
762
|
+
expect(result).toContain(" - `bun run typecheck`");
|
|
763
|
+
});
|
|
764
|
+
|
|
765
|
+
test("custom gates produce correct capability bullets", () => {
|
|
766
|
+
const gates: QualityGate[] = [
|
|
767
|
+
{ name: "Test", command: "pytest", description: "run tests" },
|
|
768
|
+
{ name: "Type", command: "mypy .", description: "type check" },
|
|
769
|
+
];
|
|
770
|
+
const result = formatQualityGatesCapabilities(gates);
|
|
771
|
+
expect(result).toBe(" - `pytest` (run tests)\n - `mypy .` (type check)");
|
|
772
|
+
});
|
|
773
|
+
});
|
|
774
|
+
|
|
775
|
+
describe("quality gate placeholders in base definitions", () => {
|
|
776
|
+
test("QUALITY_GATE_INLINE in base definition gets replaced", async () => {
|
|
777
|
+
const config = makeConfig({
|
|
778
|
+
baseDefinition: "Run {{QUALITY_GATE_INLINE}} before closing.",
|
|
779
|
+
});
|
|
780
|
+
const output = await generateOverlay(config);
|
|
781
|
+
expect(output).toContain("`bun test`, `bun run lint`, `bun run typecheck`");
|
|
782
|
+
expect(output).not.toContain("{{QUALITY_GATE_INLINE}}");
|
|
783
|
+
});
|
|
784
|
+
|
|
785
|
+
test("QUALITY_GATE_STEPS in base definition gets replaced", async () => {
|
|
786
|
+
const config = makeConfig({
|
|
787
|
+
baseDefinition: "## Steps\n{{QUALITY_GATE_STEPS}}",
|
|
788
|
+
});
|
|
789
|
+
const output = await generateOverlay(config);
|
|
790
|
+
expect(output).toContain("1. Run `bun test`");
|
|
791
|
+
expect(output).not.toContain("{{QUALITY_GATE_STEPS}}");
|
|
792
|
+
});
|
|
793
|
+
|
|
794
|
+
test("QUALITY_GATE_BASH in base definition gets replaced", async () => {
|
|
795
|
+
const config = makeConfig({
|
|
796
|
+
baseDefinition: "## Workflow\n{{QUALITY_GATE_BASH}}",
|
|
797
|
+
});
|
|
798
|
+
const output = await generateOverlay(config);
|
|
799
|
+
expect(output).toContain("```bash");
|
|
800
|
+
expect(output).toContain("bun test");
|
|
801
|
+
expect(output).not.toContain("{{QUALITY_GATE_BASH}}");
|
|
802
|
+
});
|
|
803
|
+
|
|
804
|
+
test("QUALITY_GATE_CAPABILITIES in base definition gets replaced", async () => {
|
|
805
|
+
const config = makeConfig({
|
|
806
|
+
baseDefinition: "## Caps\n{{QUALITY_GATE_CAPABILITIES}}",
|
|
807
|
+
});
|
|
808
|
+
const output = await generateOverlay(config);
|
|
809
|
+
expect(output).toContain(" - `bun test`");
|
|
810
|
+
expect(output).not.toContain("{{QUALITY_GATE_CAPABILITIES}}");
|
|
811
|
+
});
|
|
812
|
+
|
|
813
|
+
test("custom quality gates in base definition get custom commands", async () => {
|
|
814
|
+
const gates: QualityGate[] = [
|
|
815
|
+
{ name: "Test", command: "pytest", description: "all tests pass" },
|
|
816
|
+
{ name: "Lint", command: "ruff check .", description: "no lint errors" },
|
|
817
|
+
];
|
|
818
|
+
const config = makeConfig({
|
|
819
|
+
capability: "builder",
|
|
820
|
+
qualityGates: gates,
|
|
821
|
+
baseDefinition:
|
|
822
|
+
"Run {{QUALITY_GATE_INLINE}} before closing.\n{{QUALITY_GATE_BASH}}\n{{QUALITY_GATE_STEPS}}",
|
|
823
|
+
});
|
|
824
|
+
const output = await generateOverlay(config);
|
|
825
|
+
expect(output).toContain("`pytest`, `ruff check .`");
|
|
826
|
+
expect(output).toContain("pytest");
|
|
827
|
+
expect(output).toContain("ruff check .");
|
|
828
|
+
expect(output).not.toContain("bun test");
|
|
829
|
+
expect(output).not.toContain("{{QUALITY_GATE");
|
|
830
|
+
});
|
|
831
|
+
});
|