npm - @fredericboyer/dev-team - Versions diffs - 0.8.1 → 0.10.0 - Mend

@fredericboyer/dev-team 0.8.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/dist/create-agent.js +20 -6
package/dist/create-agent.js.map +1 -1
package/dist/init.d.ts +8 -1
package/dist/init.js +71 -5
package/dist/init.js.map +1 -1
package/dist/status.js +12 -6
package/dist/status.js.map +1 -1
package/dist/update.d.ts +6 -0
package/dist/update.js +107 -0
package/dist/update.js.map +1 -1
package/package.json +2 -2
package/templates/CLAUDE.md +25 -11
package/templates/agent-memory/dev-team-beck/MEMORY.md +20 -6
package/templates/agent-memory/dev-team-borges/MEMORY.md +20 -6
package/templates/agent-memory/dev-team-brooks/MEMORY.md +20 -6
package/templates/agent-memory/dev-team-conway/MEMORY.md +20 -6
package/templates/agent-memory/dev-team-deming/MEMORY.md +21 -7
package/templates/agent-memory/dev-team-drucker/MEMORY.md +20 -6
package/templates/agent-memory/dev-team-hamilton/MEMORY.md +20 -6
package/templates/agent-memory/dev-team-knuth/MEMORY.md +20 -6
package/templates/agent-memory/dev-team-mori/MEMORY.md +20 -6
package/templates/agent-memory/dev-team-szabo/MEMORY.md +20 -6
package/templates/agent-memory/dev-team-tufte/MEMORY.md +20 -6
package/templates/agent-memory/dev-team-voss/MEMORY.md +20 -6
package/templates/agents/dev-team-beck.md +3 -0
package/templates/agents/dev-team-borges.md +119 -11
package/templates/agents/dev-team-brooks.md +10 -0
package/templates/agents/dev-team-conway.md +3 -0
package/templates/agents/dev-team-deming.md +3 -0
package/templates/agents/dev-team-drucker.md +114 -2
package/templates/agents/dev-team-hamilton.md +3 -0
package/templates/agents/dev-team-knuth.md +10 -0
package/templates/agents/dev-team-mori.md +3 -0
package/templates/agents/dev-team-szabo.md +10 -0
package/templates/agents/dev-team-tufte.md +3 -0
package/templates/agents/dev-team-voss.md +3 -0
package/templates/dev-team-learnings.md +3 -1
package/templates/dev-team-metrics.md +18 -0
package/templates/hooks/dev-team-post-change-review.js +71 -0
package/templates/skills/dev-team-assess/SKILL.md +20 -0
package/templates/skills/dev-team-audit/SKILL.md +1 -1
package/templates/skills/dev-team-review/SKILL.md +36 -3
package/templates/skills/dev-team-task/SKILL.md +30 -10
package/templates/{skills → workflow-skills}/dev-team-security-status/SKILL.md +1 -1
/package/templates/{skills → workflow-skills}/dev-team-merge/SKILL.md +0 -0

package/templates/hooks/dev-team-post-change-review.js CHANGED Viewed

@@ -233,8 +233,79 @@ if (flags.length === 0) {
   process.exit(0);
 }
+// ─── Complexity-based triage ─────────────────────────────────────────────────
+// Score the change to determine review depth: LIGHT, STANDARD, or DEEP.
+// Uses available tool_input data (old_string/new_string for Edit, content for Write).
+function scoreComplexity(toolInput, filePath) {
+  let score = 0;
+  // Lines changed
+  const oldStr = toolInput.old_string || "";
+  const newStr = toolInput.new_string || toolInput.content || "";
+  const oldLines = oldStr ? oldStr.split("\n").length : 0;
+  const newLines = newStr ? newStr.split("\n").length : 0;
+  const linesChanged = Math.abs(newLines - oldLines) + Math.min(oldLines, newLines);
+  score += Math.min(linesChanged, 50); // Cap at 50 to avoid single large file dominating
+  // Complexity indicators in the new content
+  const complexityPatterns = [
+    /\bfunction\b/g, // new functions
+    /\bclass\b/g, // new classes
+    /\bif\b.*\belse\b/g, // control flow
+    /\bcatch\b/g, // error handling
+    /\bthrow\b/g, // error throwing
+    /\basync\b/g, // async operations
+    /\bawait\b/g, // async operations
+    /\bexport\b/g, // API surface changes
+  ];
+  for (const pattern of complexityPatterns) {
+    const matches = newStr.match(pattern);
+    if (matches) score += matches.length * 2;
+  }
+  // Security-sensitive files get a boost
+  if (SECURITY_PATTERNS.some((p) => p.test(filePath))) {
+    score += 20;
+  }
+  return score;
+}
+// Read configurable thresholds from config.json, or use defaults
+let lightThreshold = 10;
+let deepThreshold = 40;
+try {
+  const fs = require("fs");
+  const configPath = path.join(process.cwd(), ".dev-team", "config.json");
+  const config = JSON.parse(fs.readFileSync(configPath, "utf-8"));
+  if (config.reviewThresholds) {
+    lightThreshold = config.reviewThresholds.light || lightThreshold;
+    deepThreshold = config.reviewThresholds.deep || deepThreshold;
+  }
+} catch {
+  // Use defaults
+}
+const complexityScore = scoreComplexity(input.tool_input || {}, fullPath);
+let reviewDepth = "STANDARD";
+if (complexityScore < lightThreshold) {
+  reviewDepth = "LIGHT";
+} else if (complexityScore >= deepThreshold) {
+  reviewDepth = "DEEP";
+}
 // Output as a DIRECTIVE, not a suggestion. CLAUDE.md instructs the LLM to act on this.
 console.log(`[dev-team] ACTION REQUIRED — spawn these agents as background reviewers:`);
+console.log(`[dev-team] Review depth: ${reviewDepth} (complexity score: ${complexityScore})`);
+if (reviewDepth === "LIGHT") {
+  console.log(`[dev-team] LIGHT review: findings are advisory only — do not classify as [DEFECT].`);
+} else if (reviewDepth === "DEEP") {
+  console.log(
+    `[dev-team] DEEP review: high complexity — request thorough analysis from all reviewers.`,
+  );
+}
 for (const flag of flags) {
   console.log(`  → ${flag}`);
 }

package/templates/skills/dev-team-assess/SKILL.md CHANGED Viewed

@@ -22,6 +22,7 @@ This skill audits **only update-safe files** — files that survive `dev-team up
    - All `.dev-team/agent-memory/*/MEMORY.md` files (use Glob to discover them)
    - The project's `CLAUDE.md` (root of repo)
    - `.dev-team/config.json` (to know which agents are installed)
+   - `.dev-team/metrics.md` (if it exists — calibration metrics log)
 2. If `$ARGUMENTS` specifies a focus area (e.g., "learnings", "memory", "claude.md"), scope the audit to that area only. Otherwise, audit all three.
@@ -91,6 +92,24 @@ Check the project's `CLAUDE.md` for:
 ### Learnings promotion
 - Mature learnings that have been stable for multiple sessions and should be promoted to `CLAUDE.md` instructions
+## Phase 4: Calibration metrics audit (`.dev-team/metrics.md`)
+If `.dev-team/metrics.md` exists and contains entries, analyze:
+### Acceptance rates per agent
+- Calculate rolling acceptance rate (last 10 entries) for each reviewer agent
+- Flag agents with acceptance rate below 50% — they may be generating more noise than signal
+- Identify trend direction: improving, stable, or degrading
+### Signal quality
+- Are DEFECT findings being overruled frequently? This suggests over-flagging
+- Are SUGGESTION findings dominating? This suggests agents are not calibrated to the project's conventions
+- Are review rounds consistently high (3+)? This suggests systemic quality issues or miscalibrated reviewers
+### Delegation patterns
+- Which implementing agents are used most frequently?
+- Are reviewers consistently finding issues in specific domains? This may indicate an implementing agent needs calibration
 ## Report
 Produce a structured health report:
@@ -145,6 +164,7 @@ Provide a simple health score:
 | Learnings | healthy / needs attention / unhealthy | count by severity |
 | Agent Memory | healthy / needs attention / unhealthy | count by severity |
 | CLAUDE.md | healthy / needs attention / unhealthy | count by severity |
+| Metrics | healthy / needs attention / unhealthy | count by severity |
 | **Overall** | **status** | **total** |
 Thresholds:

package/templates/skills/dev-team-audit/SKILL.md CHANGED Viewed

@@ -86,7 +86,7 @@ Numbered list of concrete actions, ordered by priority. Each action should refer
 ### Security preamble
-Before starting the audit, check for open security alerts: run `/dev-team:security-status` if available, or check `gh api repos/{owner}/{repo}/code-scanning/alerts?state=open` and `gh api repos/{owner}/{repo}/dependabot/alerts?state=open`. Include these in the audit scope.
+Before starting the audit, check for open security alerts: run `/dev-team:security-status` if available, or use the project's security monitoring tools. Include these in the audit scope.
 ### Completion

package/templates/skills/dev-team-review/SKILL.md CHANGED Viewed

@@ -27,6 +27,12 @@ Run a multi-agent parallel review of: $ARGUMENTS
 3. Always include @dev-team-szabo and @dev-team-knuth — they review all code changes.
+## Pre-review validation
+Before spawning reviewers, verify the changes are reviewable:
+1. **Non-empty diff**: The diff contains actual changes to review. If empty, report "nothing to review" and stop.
+2. **Tests pass**: If the project has a test command, confirm tests pass. Flag test failures in the review report header.
 ## Execution
 1. Spawn each selected agent as a **parallel background subagent** using the Agent tool with `subagent_type: "general-purpose"`.
@@ -39,6 +45,18 @@ Run a multi-agent parallel review of: $ARGUMENTS
 3. Wait for all agents to complete.
+## Filter findings (judge pass)
+Before producing the report, filter raw findings to maximize signal quality:
+1. **Remove contradictions**: Drop findings that contradict existing ADRs (`docs/adr/`), learnings (`.dev-team/learnings.md`), or agent memory (`.dev-team/agent-memory/*/MEMORY.md`)
+2. **Deduplicate**: When multiple agents flag the same issue, keep the most specific finding
+3. **Consolidate suggestions**: Group `[SUGGESTION]`-level items into a single summary block
+4. **Suppress generated file findings**: Skip findings on generated, vendored, or build artifacts
+5. **Validate DEFECTs**: Each `[DEFECT]` must include a concrete scenario — downgrade to `[RISK]` if not
+6. **Accept silence**: "No substantive findings" from a reviewer is a valid positive signal — do not request re-review
+Log filtered findings in a "Filtered" section for calibration tracking.
 ## Report
 Produce a unified review summary:
@@ -60,6 +78,14 @@ Group by severity:
 - **[QUESTION]** — decisions needing justification
 - **[SUGGESTION]** — specific improvements
+### Filtered
+List findings removed during the judge pass, with the reason for filtering:
+```
+**Filtered** @agent-name — reason (contradicts ADR-NNN / duplicate of above / no concrete scenario / generated file)
+Original finding summary.
+```
 ### Verdict
 - **Approve** — No `[DEFECT]` findings. Advisory items noted.
@@ -69,12 +95,19 @@ State the verdict clearly. List what must be fixed for approval if requesting ch
 ### Security preamble
-Before starting the review, check for open security alerts: run `/dev-team:security-status` if available, or check `gh api repos/{owner}/{repo}/code-scanning/alerts?state=open` and `gh api repos/{owner}/{repo}/dependabot/alerts?state=open`. Flag any critical findings in the review report.
+Before starting the review, check for open security alerts: run `/dev-team:security-status` if available, or use the project's security monitoring tools. Flag any critical findings in the review report.
 ### Completion
 After the review report is delivered:
-1. You MUST spawn **@dev-team-borges** (Librarian) as the final step to review memory freshness and capture any learnings from the review findings. Do NOT skip this.
+1. You MUST spawn **@dev-team-borges** (Librarian) as the final step. Pass Borges the **finding outcome log**: every finding with its classification, source agent, and outcome (accepted/overruled/ignored), including reasoning for overrules. Borges will:
+   - **Extract structured memory entries** from the review findings (each classified finding becomes a memory entry for the reviewer who produced it)
+   - **Reinforce accepted patterns** and **record overruled findings** for reviewer calibration
+   - **Generate calibration rules** when 3+ findings on the same tag are overruled
+   - **Record metrics** to `.dev-team/metrics.md`
+   - Write entries to each participating agent's MEMORY.md using the structured format
+   - Update shared learnings in `.dev-team/learnings.md`
+   - Check cross-agent coherence
 2. If Borges was not spawned, the review is INCOMPLETE.
-3. **Borges memory gate**: If Borges reports that any participating agent's MEMORY.md is empty or contains only boilerplate, this is a **[DEFECT]** that blocks review completion. The agent must write substantive learnings before the review can be marked done.
+3. **Memory formation gate**: After Borges runs, verify that each participating reviewer's MEMORY.md contains at least one new structured entry from this review.
 4. Include Borges's recommendations in the final report.

package/templates/skills/dev-team-task/SKILL.md CHANGED Viewed

@@ -38,11 +38,21 @@ Before the first iteration, the implementing agent should research current best
 Track iterations in conversation context (no state files). For each iteration:
 1. The implementing agent works on the task.
-2. After implementation, spawn review agents in parallel as background tasks.
-3. Collect classified challenges from reviewers.
-4. If any `[DEFECT]` challenges exist, address them in the next iteration.
-5. If no `[DEFECT]` remains, output DONE to exit the loop.
-6. If max iterations reached without convergence, report remaining defects and exit.
+2. **Validate implementation output** before spawning reviewers:
+   - Non-empty diff: `git diff` shows actual changes
+   - Tests pass: test command executed with exit code 0
+   - Relevance: changed files relate to the stated issue
+   - Clean working tree: no uncommitted debris
+   - If validation fails, route back to implementer with specific failure reason. If it fails twice, escalate to human.
+3. After validation passes, spawn review agents in parallel as background tasks.
+4. Collect classified challenges from reviewers.
+5. If any `[DEFECT]` challenges exist, **compact the context** before the next iteration:
+   - Produce a structured summary: DEFECTs found (agent, file, status), files changed, outstanding items
+   - New reviewers in subsequent waves receive: current diff + compact summary + agent definition
+   - They do NOT receive raw conversation history from prior waves
+6. Address defects in the next iteration.
+7. If no `[DEFECT]` remains, output DONE to exit the loop.
+8. If max iterations reached without convergence, report remaining defects and exit.
 The convergence check happens in conversation context: count iterations, check for `[DEFECT]` findings, and decide whether to continue or exit.
@@ -50,6 +60,8 @@ The convergence check happens in conversation context: count iterations, check f
 When multiple issues are being addressed in a single session, the task loop switches to parallel orchestration (see ADR-019). Drucker coordinates all phases in conversation context.
+**Mode selection:** If agent teams are enabled (check `.dev-team/config.json` for `"agentTeams": true`), use team lead mode for batches of 3+ issues. Otherwise, use standard worktree subagent mode. For single issues, always use standard mode.
 ### Phase 0: Brooks pre-assessment (batch)
 Spawn @dev-team-brooks once with all issues. Brooks identifies:
 - **File independence**: which issues touch overlapping files (conflict groups that must run sequentially)
@@ -65,7 +77,7 @@ Drucker spawns one implementing agent per independent issue, each on its own bra
 Reviews do **not** start until **all** implementation agents have completed (Agent tool provides completion notifications as the sync barrier). Once all are done, spawn review agents (Szabo + Knuth, plus conditional reviewers) in parallel across all branches simultaneously. Each reviewer receives the diff for one specific branch and produces classified findings scoped to that branch.
 ### Phase 3: Defect routing
-Collect all findings. Route `[DEFECT]` items back to the original implementing agent for each branch. Agents fix defects on their own branch. After fixes, another review wave runs. Continue until no `[DEFECT]` findings remain or the per-branch iteration limit is reached.
+Collect all findings. Route `[DEFECT]` items back to the original implementing agent for each branch. Agents fix defects on their own branch. Before spawning the next review wave, **compact context**: produce a structured summary of prior findings, their status (fixed/disputed/pending), and files changed. New reviewers receive current diff + compact summary only — not full conversation history from prior waves. Continue until no `[DEFECT]` findings remain or the per-branch iteration limit is reached.
 ### Phase 4: Borges completion
 Borges runs **once** across all branches after the final review wave clears. This ensures cross-branch coherence: memory files are consistent, learnings are not duplicated, and system improvement recommendations consider the full batch.
@@ -77,16 +89,24 @@ Parallel mode is complete when:
 ## Security preamble
-Before starting work, check for open security alerts: run `/dev-team:security-status` if available, or check `gh api repos/{owner}/{repo}/code-scanning/alerts?state=open` and `gh api repos/{owner}/{repo}/dependabot/alerts?state=open`. Flag any critical findings before proceeding.
+Before starting work, check for open security alerts: run `/dev-team:security-status` if available, or use the project's security monitoring tools. Flag any critical findings before proceeding.
 ## Completion
 When the loop exits:
 1. **Deliver the work**: If changes are on a feature branch, create the PR (body must include `Closes #<issue>`). Ensure the PR is ready to merge: CI green, reviews passed, branch up to date. Then follow the project's merge workflow — use `/dev-team:merge` if the project has it configured, otherwise report readiness. If merge fails (CI failures, merge conflicts, branch protection), report the blocker to the human rather than leaving work unattended.
 2. **Clean up worktree**: If the work was done in a worktree, clean it up after the branch is pushed and the PR is created. Do not wait for merge to clean the worktree.
-3. You MUST spawn **@dev-team-borges** (Librarian) as the final step to review memory freshness, cross-agent coherence, and system improvement opportunities. Do NOT skip this.
+3. You MUST spawn **@dev-team-borges** (Librarian) as the final step. Pass Borges the **finding outcome log**: every finding with its classification, source agent, and outcome (accepted/overruled/ignored), including the human's reasoning for overrules. Borges will:
+   - **Extract structured memory entries** from review findings and implementation decisions
+   - **Reinforce accepted patterns** in the reviewer's memory (calibration feedback)
+   - **Record overruled findings** with context so reviewers generate fewer false positives
+   - **Generate calibration rules** when 3+ findings on the same tag are overruled
+   - **Record metrics** to `.dev-team/metrics.md` (acceptance rates, rounds to convergence)
+   - Write entries to each participating agent's MEMORY.md using the structured format
+   - Update shared learnings in `.dev-team/learnings.md`
+   - Check cross-agent coherence
+   - Report system improvement opportunities
 4. If Borges was not spawned, the task is INCOMPLETE.
-5. **Borges memory gate**: If Borges reports that any implementing agent's MEMORY.md is empty or contains only boilerplate after a task, this is a **[DEFECT]** that blocks task completion. The implementing agent must write substantive learnings before the task can be marked done. Empty agent memory after a task means the enforcement pipeline failed.
+5. **Memory formation gate**: After Borges runs, verify that each participating agent's MEMORY.md contains at least one new structured entry from this task. Empty agent memory after a completed task is a system failure — Borges prevents this by automating extraction.
 6. Summarize what was accomplished across all iterations.
 7. Report any remaining `[RISK]` or `[SUGGESTION]` items, including Borges's recommendations.
-8. Write key learnings to agent MEMORY.md files.

package/templates/{skills → workflow-skills}/dev-team-security-status/SKILL.md RENAMED Viewed

@@ -1,5 +1,5 @@
 ---
-name: security-status
+name: dev-team:security-status
 description: Check GitHub security signals — code scanning, Dependabot, secret scanning, and compliance status. Use at session start and before releases.
 user_invocable: true
 ---

/package/templates/{skills → workflow-skills}/dev-team-merge/SKILL.md RENAMED Viewed

File without changes