@fredericboyer/dev-team 0.8.1 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/create-agent.js +20 -6
- package/dist/create-agent.js.map +1 -1
- package/dist/init.d.ts +8 -1
- package/dist/init.js +71 -5
- package/dist/init.js.map +1 -1
- package/dist/status.js +12 -6
- package/dist/status.js.map +1 -1
- package/dist/update.d.ts +6 -0
- package/dist/update.js +107 -0
- package/dist/update.js.map +1 -1
- package/package.json +2 -2
- package/templates/CLAUDE.md +25 -11
- package/templates/agent-memory/dev-team-beck/MEMORY.md +20 -6
- package/templates/agent-memory/dev-team-borges/MEMORY.md +20 -6
- package/templates/agent-memory/dev-team-brooks/MEMORY.md +20 -6
- package/templates/agent-memory/dev-team-conway/MEMORY.md +20 -6
- package/templates/agent-memory/dev-team-deming/MEMORY.md +21 -7
- package/templates/agent-memory/dev-team-drucker/MEMORY.md +20 -6
- package/templates/agent-memory/dev-team-hamilton/MEMORY.md +20 -6
- package/templates/agent-memory/dev-team-knuth/MEMORY.md +20 -6
- package/templates/agent-memory/dev-team-mori/MEMORY.md +20 -6
- package/templates/agent-memory/dev-team-szabo/MEMORY.md +20 -6
- package/templates/agent-memory/dev-team-tufte/MEMORY.md +20 -6
- package/templates/agent-memory/dev-team-voss/MEMORY.md +20 -6
- package/templates/agents/dev-team-beck.md +3 -0
- package/templates/agents/dev-team-borges.md +119 -11
- package/templates/agents/dev-team-brooks.md +10 -0
- package/templates/agents/dev-team-conway.md +3 -0
- package/templates/agents/dev-team-deming.md +3 -0
- package/templates/agents/dev-team-drucker.md +114 -2
- package/templates/agents/dev-team-hamilton.md +3 -0
- package/templates/agents/dev-team-knuth.md +10 -0
- package/templates/agents/dev-team-mori.md +3 -0
- package/templates/agents/dev-team-szabo.md +10 -0
- package/templates/agents/dev-team-tufte.md +3 -0
- package/templates/agents/dev-team-voss.md +3 -0
- package/templates/dev-team-learnings.md +3 -1
- package/templates/dev-team-metrics.md +18 -0
- package/templates/hooks/dev-team-post-change-review.js +71 -0
- package/templates/skills/dev-team-assess/SKILL.md +20 -0
- package/templates/skills/dev-team-audit/SKILL.md +1 -1
- package/templates/skills/dev-team-review/SKILL.md +36 -3
- package/templates/skills/dev-team-task/SKILL.md +30 -10
- package/templates/{skills → workflow-skills}/dev-team-security-status/SKILL.md +1 -1
- /package/templates/{skills → workflow-skills}/dev-team-merge/SKILL.md +0 -0
|
@@ -233,8 +233,79 @@ if (flags.length === 0) {
|
|
|
233
233
|
process.exit(0);
|
|
234
234
|
}
|
|
235
235
|
|
|
236
|
+
// ─── Complexity-based triage ─────────────────────────────────────────────────
|
|
237
|
+
// Score the change to determine review depth: LIGHT, STANDARD, or DEEP.
|
|
238
|
+
// Uses available tool_input data (old_string/new_string for Edit, content for Write).
|
|
239
|
+
|
|
240
|
+
function scoreComplexity(toolInput, filePath) {
|
|
241
|
+
let score = 0;
|
|
242
|
+
|
|
243
|
+
// Lines changed
|
|
244
|
+
const oldStr = toolInput.old_string || "";
|
|
245
|
+
const newStr = toolInput.new_string || toolInput.content || "";
|
|
246
|
+
const oldLines = oldStr ? oldStr.split("\n").length : 0;
|
|
247
|
+
const newLines = newStr ? newStr.split("\n").length : 0;
|
|
248
|
+
const linesChanged = Math.abs(newLines - oldLines) + Math.min(oldLines, newLines);
|
|
249
|
+
score += Math.min(linesChanged, 50); // Cap at 50 to avoid single large file dominating
|
|
250
|
+
|
|
251
|
+
// Complexity indicators in the new content
|
|
252
|
+
const complexityPatterns = [
|
|
253
|
+
/\bfunction\b/g, // new functions
|
|
254
|
+
/\bclass\b/g, // new classes
|
|
255
|
+
/\bif\b.*\belse\b/g, // control flow
|
|
256
|
+
/\bcatch\b/g, // error handling
|
|
257
|
+
/\bthrow\b/g, // error throwing
|
|
258
|
+
/\basync\b/g, // async operations
|
|
259
|
+
/\bawait\b/g, // async operations
|
|
260
|
+
/\bexport\b/g, // API surface changes
|
|
261
|
+
];
|
|
262
|
+
|
|
263
|
+
for (const pattern of complexityPatterns) {
|
|
264
|
+
const matches = newStr.match(pattern);
|
|
265
|
+
if (matches) score += matches.length * 2;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Security-sensitive files get a boost
|
|
269
|
+
if (SECURITY_PATTERNS.some((p) => p.test(filePath))) {
|
|
270
|
+
score += 20;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
return score;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// Read configurable thresholds from config.json, or use defaults
|
|
277
|
+
let lightThreshold = 10;
|
|
278
|
+
let deepThreshold = 40;
|
|
279
|
+
try {
|
|
280
|
+
const fs = require("fs");
|
|
281
|
+
const configPath = path.join(process.cwd(), ".dev-team", "config.json");
|
|
282
|
+
const config = JSON.parse(fs.readFileSync(configPath, "utf-8"));
|
|
283
|
+
if (config.reviewThresholds) {
|
|
284
|
+
lightThreshold = config.reviewThresholds.light || lightThreshold;
|
|
285
|
+
deepThreshold = config.reviewThresholds.deep || deepThreshold;
|
|
286
|
+
}
|
|
287
|
+
} catch {
|
|
288
|
+
// Use defaults
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
const complexityScore = scoreComplexity(input.tool_input || {}, fullPath);
|
|
292
|
+
let reviewDepth = "STANDARD";
|
|
293
|
+
if (complexityScore < lightThreshold) {
|
|
294
|
+
reviewDepth = "LIGHT";
|
|
295
|
+
} else if (complexityScore >= deepThreshold) {
|
|
296
|
+
reviewDepth = "DEEP";
|
|
297
|
+
}
|
|
298
|
+
|
|
236
299
|
// Output as a DIRECTIVE, not a suggestion. CLAUDE.md instructs the LLM to act on this.
|
|
237
300
|
console.log(`[dev-team] ACTION REQUIRED — spawn these agents as background reviewers:`);
|
|
301
|
+
console.log(`[dev-team] Review depth: ${reviewDepth} (complexity score: ${complexityScore})`);
|
|
302
|
+
if (reviewDepth === "LIGHT") {
|
|
303
|
+
console.log(`[dev-team] LIGHT review: findings are advisory only — do not classify as [DEFECT].`);
|
|
304
|
+
} else if (reviewDepth === "DEEP") {
|
|
305
|
+
console.log(
|
|
306
|
+
`[dev-team] DEEP review: high complexity — request thorough analysis from all reviewers.`,
|
|
307
|
+
);
|
|
308
|
+
}
|
|
238
309
|
for (const flag of flags) {
|
|
239
310
|
console.log(` → ${flag}`);
|
|
240
311
|
}
|
|
@@ -22,6 +22,7 @@ This skill audits **only update-safe files** — files that survive `dev-team up
|
|
|
22
22
|
- All `.dev-team/agent-memory/*/MEMORY.md` files (use Glob to discover them)
|
|
23
23
|
- The project's `CLAUDE.md` (root of repo)
|
|
24
24
|
- `.dev-team/config.json` (to know which agents are installed)
|
|
25
|
+
- `.dev-team/metrics.md` (if it exists — calibration metrics log)
|
|
25
26
|
|
|
26
27
|
2. If `$ARGUMENTS` specifies a focus area (e.g., "learnings", "memory", "claude.md"), scope the audit to that area only. Otherwise, audit all three.
|
|
27
28
|
|
|
@@ -91,6 +92,24 @@ Check the project's `CLAUDE.md` for:
|
|
|
91
92
|
### Learnings promotion
|
|
92
93
|
- Mature learnings that have been stable for multiple sessions and should be promoted to `CLAUDE.md` instructions
|
|
93
94
|
|
|
95
|
+
## Phase 4: Calibration metrics audit (`.dev-team/metrics.md`)
|
|
96
|
+
|
|
97
|
+
If `.dev-team/metrics.md` exists and contains entries, analyze:
|
|
98
|
+
|
|
99
|
+
### Acceptance rates per agent
|
|
100
|
+
- Calculate rolling acceptance rate (last 10 entries) for each reviewer agent
|
|
101
|
+
- Flag agents with acceptance rate below 50% — they may be generating more noise than signal
|
|
102
|
+
- Identify trend direction: improving, stable, or degrading
|
|
103
|
+
|
|
104
|
+
### Signal quality
|
|
105
|
+
- Are DEFECT findings being overruled frequently? This suggests over-flagging
|
|
106
|
+
- Are SUGGESTION findings dominating? This suggests agents are not calibrated to the project's conventions
|
|
107
|
+
- Are review rounds consistently high (3+)? This suggests systemic quality issues or miscalibrated reviewers
|
|
108
|
+
|
|
109
|
+
### Delegation patterns
|
|
110
|
+
- Which implementing agents are used most frequently?
|
|
111
|
+
- Are reviewers consistently finding issues in specific domains? This may indicate an implementing agent needs calibration
|
|
112
|
+
|
|
94
113
|
## Report
|
|
95
114
|
|
|
96
115
|
Produce a structured health report:
|
|
@@ -145,6 +164,7 @@ Provide a simple health score:
|
|
|
145
164
|
| Learnings | healthy / needs attention / unhealthy | count by severity |
|
|
146
165
|
| Agent Memory | healthy / needs attention / unhealthy | count by severity |
|
|
147
166
|
| CLAUDE.md | healthy / needs attention / unhealthy | count by severity |
|
|
167
|
+
| Metrics | healthy / needs attention / unhealthy | count by severity |
|
|
148
168
|
| **Overall** | **status** | **total** |
|
|
149
169
|
|
|
150
170
|
Thresholds:
|
|
@@ -86,7 +86,7 @@ Numbered list of concrete actions, ordered by priority. Each action should refer
|
|
|
86
86
|
|
|
87
87
|
### Security preamble
|
|
88
88
|
|
|
89
|
-
Before starting the audit, check for open security alerts: run `/dev-team:security-status` if available, or
|
|
89
|
+
Before starting the audit, check for open security alerts: run `/dev-team:security-status` if available, or use the project's security monitoring tools. Include these in the audit scope.
|
|
90
90
|
|
|
91
91
|
### Completion
|
|
92
92
|
|
|
@@ -27,6 +27,12 @@ Run a multi-agent parallel review of: $ARGUMENTS
|
|
|
27
27
|
|
|
28
28
|
3. Always include @dev-team-szabo and @dev-team-knuth — they review all code changes.
|
|
29
29
|
|
|
30
|
+
## Pre-review validation
|
|
31
|
+
|
|
32
|
+
Before spawning reviewers, verify the changes are reviewable:
|
|
33
|
+
1. **Non-empty diff**: The diff contains actual changes to review. If empty, report "nothing to review" and stop.
|
|
34
|
+
2. **Tests pass**: If the project has a test command, confirm tests pass. Flag test failures in the review report header.
|
|
35
|
+
|
|
30
36
|
## Execution
|
|
31
37
|
|
|
32
38
|
1. Spawn each selected agent as a **parallel background subagent** using the Agent tool with `subagent_type: "general-purpose"`.
|
|
@@ -39,6 +45,18 @@ Run a multi-agent parallel review of: $ARGUMENTS
|
|
|
39
45
|
|
|
40
46
|
3. Wait for all agents to complete.
|
|
41
47
|
|
|
48
|
+
## Filter findings (judge pass)
|
|
49
|
+
|
|
50
|
+
Before producing the report, filter raw findings to maximize signal quality:
|
|
51
|
+
1. **Remove contradictions**: Drop findings that contradict existing ADRs (`docs/adr/`), learnings (`.dev-team/learnings.md`), or agent memory (`.dev-team/agent-memory/*/MEMORY.md`)
|
|
52
|
+
2. **Deduplicate**: When multiple agents flag the same issue, keep the most specific finding
|
|
53
|
+
3. **Consolidate suggestions**: Group `[SUGGESTION]`-level items into a single summary block
|
|
54
|
+
4. **Suppress generated file findings**: Skip findings on generated, vendored, or build artifacts
|
|
55
|
+
5. **Validate DEFECTs**: Each `[DEFECT]` must include a concrete scenario — downgrade to `[RISK]` if not
|
|
56
|
+
6. **Accept silence**: "No substantive findings" from a reviewer is a valid positive signal — do not request re-review
|
|
57
|
+
|
|
58
|
+
Log filtered findings in a "Filtered" section for calibration tracking.
|
|
59
|
+
|
|
42
60
|
## Report
|
|
43
61
|
|
|
44
62
|
Produce a unified review summary:
|
|
@@ -60,6 +78,14 @@ Group by severity:
|
|
|
60
78
|
- **[QUESTION]** — decisions needing justification
|
|
61
79
|
- **[SUGGESTION]** — specific improvements
|
|
62
80
|
|
|
81
|
+
### Filtered
|
|
82
|
+
|
|
83
|
+
List findings removed during the judge pass, with the reason for filtering:
|
|
84
|
+
```
|
|
85
|
+
**Filtered** @agent-name — reason (contradicts ADR-NNN / duplicate of above / no concrete scenario / generated file)
|
|
86
|
+
Original finding summary.
|
|
87
|
+
```
|
|
88
|
+
|
|
63
89
|
### Verdict
|
|
64
90
|
|
|
65
91
|
- **Approve** — No `[DEFECT]` findings. Advisory items noted.
|
|
@@ -69,12 +95,19 @@ State the verdict clearly. List what must be fixed for approval if requesting ch
|
|
|
69
95
|
|
|
70
96
|
### Security preamble
|
|
71
97
|
|
|
72
|
-
Before starting the review, check for open security alerts: run `/dev-team:security-status` if available, or
|
|
98
|
+
Before starting the review, check for open security alerts: run `/dev-team:security-status` if available, or use the project's security monitoring tools. Flag any critical findings in the review report.
|
|
73
99
|
|
|
74
100
|
### Completion
|
|
75
101
|
|
|
76
102
|
After the review report is delivered:
|
|
77
|
-
1. You MUST spawn **@dev-team-borges** (Librarian) as the final step
|
|
103
|
+
1. You MUST spawn **@dev-team-borges** (Librarian) as the final step. Pass Borges the **finding outcome log**: every finding with its classification, source agent, and outcome (accepted/overruled/ignored), including reasoning for overrules. Borges will:
|
|
104
|
+
- **Extract structured memory entries** from the review findings (each classified finding becomes a memory entry for the reviewer who produced it)
|
|
105
|
+
- **Reinforce accepted patterns** and **record overruled findings** for reviewer calibration
|
|
106
|
+
- **Generate calibration rules** when 3+ findings on the same tag are overruled
|
|
107
|
+
- **Record metrics** to `.dev-team/metrics.md`
|
|
108
|
+
- Write entries to each participating agent's MEMORY.md using the structured format
|
|
109
|
+
- Update shared learnings in `.dev-team/learnings.md`
|
|
110
|
+
- Check cross-agent coherence
|
|
78
111
|
2. If Borges was not spawned, the review is INCOMPLETE.
|
|
79
|
-
3. **
|
|
112
|
+
3. **Memory formation gate**: After Borges runs, verify that each participating reviewer's MEMORY.md contains at least one new structured entry from this review.
|
|
80
113
|
4. Include Borges's recommendations in the final report.
|
|
@@ -38,11 +38,21 @@ Before the first iteration, the implementing agent should research current best
|
|
|
38
38
|
Track iterations in conversation context (no state files). For each iteration:
|
|
39
39
|
|
|
40
40
|
1. The implementing agent works on the task.
|
|
41
|
-
2.
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
41
|
+
2. **Validate implementation output** before spawning reviewers:
|
|
42
|
+
- Non-empty diff: `git diff` shows actual changes
|
|
43
|
+
- Tests pass: test command executed with exit code 0
|
|
44
|
+
- Relevance: changed files relate to the stated issue
|
|
45
|
+
- Clean working tree: no uncommitted debris
|
|
46
|
+
- If validation fails, route back to implementer with specific failure reason. If it fails twice, escalate to human.
|
|
47
|
+
3. After validation passes, spawn review agents in parallel as background tasks.
|
|
48
|
+
4. Collect classified challenges from reviewers.
|
|
49
|
+
5. If any `[DEFECT]` challenges exist, **compact the context** before the next iteration:
|
|
50
|
+
- Produce a structured summary: DEFECTs found (agent, file, status), files changed, outstanding items
|
|
51
|
+
- New reviewers in subsequent waves receive: current diff + compact summary + agent definition
|
|
52
|
+
- They do NOT receive raw conversation history from prior waves
|
|
53
|
+
6. Address defects in the next iteration.
|
|
54
|
+
7. If no `[DEFECT]` remains, output DONE to exit the loop.
|
|
55
|
+
8. If max iterations reached without convergence, report remaining defects and exit.
|
|
46
56
|
|
|
47
57
|
The convergence check happens in conversation context: count iterations, check for `[DEFECT]` findings, and decide whether to continue or exit.
|
|
48
58
|
|
|
@@ -50,6 +60,8 @@ The convergence check happens in conversation context: count iterations, check f
|
|
|
50
60
|
|
|
51
61
|
When multiple issues are being addressed in a single session, the task loop switches to parallel orchestration (see ADR-019). Drucker coordinates all phases in conversation context.
|
|
52
62
|
|
|
63
|
+
**Mode selection:** If agent teams are enabled (check `.dev-team/config.json` for `"agentTeams": true`), use team lead mode for batches of 3+ issues. Otherwise, use standard worktree subagent mode. For single issues, always use standard mode.
|
|
64
|
+
|
|
53
65
|
### Phase 0: Brooks pre-assessment (batch)
|
|
54
66
|
Spawn @dev-team-brooks once with all issues. Brooks identifies:
|
|
55
67
|
- **File independence**: which issues touch overlapping files (conflict groups that must run sequentially)
|
|
@@ -65,7 +77,7 @@ Drucker spawns one implementing agent per independent issue, each on its own bra
|
|
|
65
77
|
Reviews do **not** start until **all** implementation agents have completed (Agent tool provides completion notifications as the sync barrier). Once all are done, spawn review agents (Szabo + Knuth, plus conditional reviewers) in parallel across all branches simultaneously. Each reviewer receives the diff for one specific branch and produces classified findings scoped to that branch.
|
|
66
78
|
|
|
67
79
|
### Phase 3: Defect routing
|
|
68
|
-
Collect all findings. Route `[DEFECT]` items back to the original implementing agent for each branch. Agents fix defects on their own branch.
|
|
80
|
+
Collect all findings. Route `[DEFECT]` items back to the original implementing agent for each branch. Agents fix defects on their own branch. Before spawning the next review wave, **compact context**: produce a structured summary of prior findings, their status (fixed/disputed/pending), and files changed. New reviewers receive current diff + compact summary only — not full conversation history from prior waves. Continue until no `[DEFECT]` findings remain or the per-branch iteration limit is reached.
|
|
69
81
|
|
|
70
82
|
### Phase 4: Borges completion
|
|
71
83
|
Borges runs **once** across all branches after the final review wave clears. This ensures cross-branch coherence: memory files are consistent, learnings are not duplicated, and system improvement recommendations consider the full batch.
|
|
@@ -77,16 +89,24 @@ Parallel mode is complete when:
|
|
|
77
89
|
|
|
78
90
|
## Security preamble
|
|
79
91
|
|
|
80
|
-
Before starting work, check for open security alerts: run `/dev-team:security-status` if available, or
|
|
92
|
+
Before starting work, check for open security alerts: run `/dev-team:security-status` if available, or use the project's security monitoring tools. Flag any critical findings before proceeding.
|
|
81
93
|
|
|
82
94
|
## Completion
|
|
83
95
|
|
|
84
96
|
When the loop exits:
|
|
85
97
|
1. **Deliver the work**: If changes are on a feature branch, create the PR (body must include `Closes #<issue>`). Ensure the PR is ready to merge: CI green, reviews passed, branch up to date. Then follow the project's merge workflow — use `/dev-team:merge` if the project has it configured, otherwise report readiness. If merge fails (CI failures, merge conflicts, branch protection), report the blocker to the human rather than leaving work unattended.
|
|
86
98
|
2. **Clean up worktree**: If the work was done in a worktree, clean it up after the branch is pushed and the PR is created. Do not wait for merge to clean the worktree.
|
|
87
|
-
3. You MUST spawn **@dev-team-borges** (Librarian) as the final step
|
|
99
|
+
3. You MUST spawn **@dev-team-borges** (Librarian) as the final step. Pass Borges the **finding outcome log**: every finding with its classification, source agent, and outcome (accepted/overruled/ignored), including the human's reasoning for overrules. Borges will:
|
|
100
|
+
- **Extract structured memory entries** from review findings and implementation decisions
|
|
101
|
+
- **Reinforce accepted patterns** in the reviewer's memory (calibration feedback)
|
|
102
|
+
- **Record overruled findings** with context so reviewers generate fewer false positives
|
|
103
|
+
- **Generate calibration rules** when 3+ findings on the same tag are overruled
|
|
104
|
+
- **Record metrics** to `.dev-team/metrics.md` (acceptance rates, rounds to convergence)
|
|
105
|
+
- Write entries to each participating agent's MEMORY.md using the structured format
|
|
106
|
+
- Update shared learnings in `.dev-team/learnings.md`
|
|
107
|
+
- Check cross-agent coherence
|
|
108
|
+
- Report system improvement opportunities
|
|
88
109
|
4. If Borges was not spawned, the task is INCOMPLETE.
|
|
89
|
-
5. **
|
|
110
|
+
5. **Memory formation gate**: After Borges runs, verify that each participating agent's MEMORY.md contains at least one new structured entry from this task. Empty agent memory after a completed task is a system failure — Borges prevents this by automating extraction.
|
|
90
111
|
6. Summarize what was accomplished across all iterations.
|
|
91
112
|
7. Report any remaining `[RISK]` or `[SUGGESTION]` items, including Borges's recommendations.
|
|
92
|
-
8. Write key learnings to agent MEMORY.md files.
|
|
File without changes
|