aiwcli 0.13.0 → 0.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/init/index.d.ts +8 -8
- package/dist/commands/init/index.js +24 -24
- package/dist/templates/_shared/.claude/settings.json +57 -2
- package/dist/templates/_shared/.windsurf/workflows/handoff.md +4 -221
- package/dist/templates/_shared/.windsurf/workflows/meta-plan.md +5 -341
- package/dist/templates/_shared/hooks-ts/context_monitor.ts +2 -2
- package/dist/templates/_shared/lib-ts/CLAUDE.md +27 -2
- package/dist/templates/_shared/lib-ts/base/lint-dispatch.ts +52 -0
- package/dist/templates/_shared/scripts/resolve-run.ts +1 -0
- package/dist/templates/_shared/skills/handoff-system/CLAUDE.md +4 -4
- package/dist/templates/_shared/skills/handoff-system/lib/document-generator.ts +8 -8
- package/dist/templates/_shared/skills/handoff-system/lib/handoff-reader.ts +3 -3
- package/dist/templates/_shared/skills/handoff-system/scripts/resume_handoff.ts +4 -4
- package/dist/templates/_shared/skills/handoff-system/scripts/save_handoff.ts +6 -6
- package/dist/templates/_shared/skills/handoff-system/workflows/handoff.md +2 -2
- package/dist/templates/_shared/skills/meta-plan/CLAUDE.md +8 -6
- package/dist/templates/_shared/skills/meta-plan/workflows/meta-plan.md +102 -172
- package/dist/templates/cc-native/.claude/settings.json +1 -127
- package/dist/templates/cc-native/_cc-native/artifacts/lib/format.ts +14 -14
- package/dist/templates/cc-native/_cc-native/hooks/CLAUDE.md +32 -2
- package/dist/templates/cc-native/_cc-native/hooks/plan_questions_early.ts +25 -0
- package/dist/templates/cc-native/_cc-native/lib-ts/plan-enhancement.ts +6 -1
- package/dist/templates/cc-native/_cc-native/plan-review/CLAUDE.md +1 -0
- package/dist/templates/cc-native/_cc-native/plan-review/CODING-STANDARDS-CHECKLIST.md +75 -0
- package/dist/templates/cc-native/_cc-native/plan-review/lib/corroboration.ts +69 -16
- package/oclif.manifest.json +1 -1
- package/package.json +1 -1
|
@@ -10,7 +10,7 @@ import type {
|
|
|
10
10
|
ReviewerResult,
|
|
11
11
|
DisplaySettings,
|
|
12
12
|
CorroborationResult,
|
|
13
|
-
} from "
|
|
13
|
+
} from "../../lib-ts/types.js";
|
|
14
14
|
|
|
15
15
|
// ---------------------------------------------------------------------------
|
|
16
16
|
// Markdown Formatting
|
|
@@ -75,14 +75,14 @@ export function formatCombinedMarkdown(
|
|
|
75
75
|
if (corroboration.blocking.length > 0) {
|
|
76
76
|
lines.push("### Blocking Dimensions\n");
|
|
77
77
|
for (const group of corroboration.blocking) {
|
|
78
|
-
lines.push(`- **${group.dimension}**: ${group.
|
|
78
|
+
lines.push(`- **${group.dimension}**: ${group.agentCount} agents agree (threshold: ≥${group.threshold} agents, ${group.issues.length} issues)`);
|
|
79
79
|
}
|
|
80
80
|
lines.push("");
|
|
81
81
|
}
|
|
82
82
|
if (corroboration.solo.length > 0) {
|
|
83
83
|
lines.push("### Solo Dimensions (informational)\n");
|
|
84
84
|
for (const s of corroboration.solo) {
|
|
85
|
-
lines.push(`- **${s.dimension}**: ${s.
|
|
85
|
+
lines.push(`- **${s.dimension}**: ${s.agentCount} agent${s.agentCount !== 1 ? "s" : ""} (threshold: ≥${s.threshold} agents, not met)`);
|
|
86
86
|
}
|
|
87
87
|
lines.push("");
|
|
88
88
|
}
|
|
@@ -192,7 +192,7 @@ export function buildInlineReviewSummary(
|
|
|
192
192
|
if (corroboration && dim) {
|
|
193
193
|
const group = corroboration.blocking.find(g => g.dimension === dim);
|
|
194
194
|
if (group) {
|
|
195
|
-
annotation = ` [CORROBORATED — ${group.
|
|
195
|
+
annotation = ` [CORROBORATED — ${group.agentCount} agents agree, threshold ≥${group.threshold}]`;
|
|
196
196
|
} else {
|
|
197
197
|
annotation = " [perspective]";
|
|
198
198
|
}
|
|
@@ -259,10 +259,10 @@ export function buildHighIssuesDocument(
|
|
|
259
259
|
): string {
|
|
260
260
|
if (corroboration && corroboration.blocking.length > 0) {
|
|
261
261
|
const lines = ["# Corroborated High-Severity Issues\n"];
|
|
262
|
-
lines.push("> Only issues from dimensions where
|
|
262
|
+
lines.push("> Only issues from dimensions where enough distinct agents independently agreed are shown.\n");
|
|
263
263
|
|
|
264
264
|
for (const group of corroboration.blocking) {
|
|
265
|
-
lines.push(`## ${group.dimension} (${group.
|
|
265
|
+
lines.push(`## ${group.dimension} (${group.agentCount} agents agree, threshold: ≥${group.threshold} agents, ${group.issues.length} issues)\n`);
|
|
266
266
|
for (const { agent, issue } of group.issues) {
|
|
267
267
|
const cat = issue.category ?? "general";
|
|
268
268
|
const text = String(issue.issue ?? "").trim();
|
|
@@ -275,7 +275,7 @@ export function buildHighIssuesDocument(
|
|
|
275
275
|
|
|
276
276
|
if (corroboration.solo.length > 0) {
|
|
277
277
|
lines.push("---\n");
|
|
278
|
-
lines.push(`> ${corroboration.solo.length} dimension${corroboration.solo.length !== 1 ? "s" : ""} had
|
|
278
|
+
lines.push(`> ${corroboration.solo.length} dimension${corroboration.solo.length !== 1 ? "s" : ""} had insufficient agent agreement (not blocking): ${corroboration.solo.map(s => `${s.dimension} (${s.agentCount}/${s.threshold} agents)`).join(", ")}\n`);
|
|
279
279
|
}
|
|
280
280
|
|
|
281
281
|
return lines.join("\n");
|
|
@@ -332,18 +332,18 @@ export function buildCorroborationReport(
|
|
|
332
332
|
if (corroborationResult.blocking.length > 0) {
|
|
333
333
|
lines.push("## Blocking Issues (Corroborated)");
|
|
334
334
|
lines.push("");
|
|
335
|
-
lines.push("| Dimension |
|
|
336
|
-
lines.push("
|
|
335
|
+
lines.push("| Dimension | Agents Agreeing | Threshold | Issues | Status |");
|
|
336
|
+
lines.push("|-----------|----------------|-----------|--------|--------|");
|
|
337
337
|
|
|
338
338
|
for (const group of corroborationResult.blocking) {
|
|
339
339
|
lines.push(
|
|
340
|
-
`| ${group.dimension} | ${group.
|
|
340
|
+
`| ${group.dimension} | ${group.agentCount} | ≥${group.threshold} | ${group.issues.length} | ⛔ CORROBORATED |`
|
|
341
341
|
);
|
|
342
342
|
}
|
|
343
343
|
lines.push("");
|
|
344
344
|
|
|
345
345
|
for (const group of corroborationResult.blocking) {
|
|
346
|
-
lines.push(`### ${group.dimension} (${group.issues.length} issues)`);
|
|
346
|
+
lines.push(`### ${group.dimension} (${group.agentCount} agents, ${group.issues.length} issues)`);
|
|
347
347
|
lines.push("");
|
|
348
348
|
for (const {agent, issue} of group.issues) {
|
|
349
349
|
lines.push(`- **[${agent}]** ${issue.issue || "No description"}`);
|
|
@@ -355,12 +355,12 @@ export function buildCorroborationReport(
|
|
|
355
355
|
if (corroborationResult.solo.length > 0) {
|
|
356
356
|
lines.push("## Solo Findings (Below Threshold)");
|
|
357
357
|
lines.push("");
|
|
358
|
-
lines.push("| Dimension |
|
|
359
|
-
lines.push("
|
|
358
|
+
lines.push("| Dimension | Agents Agreeing | Threshold | Issues | Status |");
|
|
359
|
+
lines.push("|-----------|----------------|-----------|--------|--------|");
|
|
360
360
|
|
|
361
361
|
for (const group of corroborationResult.solo) {
|
|
362
362
|
lines.push(
|
|
363
|
-
`| ${group.dimension} | ${group.
|
|
363
|
+
`| ${group.dimension} | ${group.agentCount} | ≥${group.threshold} | ${group.issues.length} | ℹ️ SOLO |`
|
|
364
364
|
);
|
|
365
365
|
}
|
|
366
366
|
lines.push("");
|
|
@@ -9,8 +9,11 @@
|
|
|
9
9
|
| Hook | Trigger | Purpose |
|
|
10
10
|
|------|---------|---------|
|
|
11
11
|
| `cc-native-plan-review.ts` | PreToolUse: ExitPlanMode | Questions gate + plan review before user approval |
|
|
12
|
-
| `
|
|
12
|
+
| `mark_questions_asked.ts` | PostToolUse: AskUserQuestion | Marks questions-asked state after user answers |
|
|
13
|
+
| `enhance_plan_post_subagent.ts` | PostToolUse: Task | Post-subagent plan enhancement |
|
|
14
|
+
| `enhance_plan_post_write.ts` | PostToolUse: Write | Post-write plan enhancement |
|
|
13
15
|
| `plan_questions_early.ts` | UserPromptSubmit | Inject Phase A clarification prompt in plan mode |
|
|
16
|
+
| `validate_task_prompt.ts` | PreToolUse: TaskCreate | Validates task creation prompts |
|
|
14
17
|
|
|
15
18
|
### Plan Review Architecture
|
|
16
19
|
|
|
@@ -217,7 +220,7 @@ Validate TypeScript syntax after editing hooks:
|
|
|
217
220
|
bun --print "import('.aiwcli/_cc-native/hooks/cc-native-plan-review.ts')" 2>&1 | head -5
|
|
218
221
|
|
|
219
222
|
# Or check imports resolve (dry run)
|
|
220
|
-
bun build --no-bundle .aiwcli/_cc-native/hooks/
|
|
223
|
+
bun build --no-bundle .aiwcli/_cc-native/hooks/mark_questions_asked.ts --outdir /dev/null 2>&1
|
|
221
224
|
```
|
|
222
225
|
|
|
223
226
|
Hooks fail silently on import errors — verify after any import path changes.
|
|
@@ -235,3 +238,30 @@ Hooks fail silently on import errors — verify after any import path changes.
|
|
|
235
238
|
| 2026-02-10 | **Migrated cc-native hooks from Python to TypeScript.** `cc-native-plan-review.ts` (async, parallel agent reviews via `Promise.all()`), `add_plan_context.ts`, `plan_questions_early.ts`. All hooks use `runHook()`/`runHookAsync()` entry points. Library code in `_cc-native/lib-ts/` (18 files). Settings.json updated to use `bun` runner. Python `.py` files kept as fallback until TS hooks verified. |
|
|
236
239
|
| 2026-02-10 | Flipped TS logger stderr default to opt-in (`opts?.stderr === true`). Added `logBlocking()` for intentional stderr visibility. Removed redundant `{stderr: false}` from hook-utils.ts, user_prompt_submit.ts, context_monitor.ts. Added "Hook Error Visibility" section documenting visibility tiers and exit code behavior. |
|
|
237
240
|
| 2026-02-10 | Fixed `debug.py` `context_path` crash. Added local try/catch around `maybeActivate` in `user_prompt_submit.ts` and `context_monitor.ts` to prevent stderr error display on non-critical I/O failures. Removed dead `context_path` from `_emitHookEnd` in `hook-utils.ts`. Added "Error Handling" section to CLAUDE.md. |
|
|
241
|
+
| 2026-02-21 | **Coding standards nudge injected in plan mode.** `plan_questions_early.ts` now emits `CODING_STANDARDS_NUDGE` after Phase A prompt — covers test-first design, file structure fit, and extensibility analysis. Standards reference doc at `plan-review/CODING-STANDARDS-CHECKLIST.md`. Post-write self-check added to `plan-enhancement.ts` `getPlanQualityReviewContext()`. |
|
|
242
|
+
| 2026-02-21 | **ContextLayer Audit:** Updated hook roster — removed stale `add_plan_context.ts`, added `mark_questions_asked.ts`, `enhance_plan_post_subagent.ts`, `enhance_plan_post_write.ts`, `validate_task_prompt.ts`. |
|
|
243
|
+
|
|
244
|
+
---
|
|
245
|
+
## Context Maintenance
|
|
246
|
+
|
|
247
|
+
**After modifying files in this directory:** scan the entries above — if any claim is now
|
|
248
|
+
false or incomplete, update this file before ending the task. Do not defer.
|
|
249
|
+
|
|
250
|
+
**Add** an entry only if an agent would fail without knowing it, it is not obvious from
|
|
251
|
+
the code, and it belongs at this scope (project-wide rule → root CLAUDE.md; WHY decision
|
|
252
|
+
→ inline comment or ADR; inferable from code → nowhere).
|
|
253
|
+
|
|
254
|
+
**Remove** any entry that fails the falsifiability test: if removing it would not change
|
|
255
|
+
how an agent acts here, remove it. If a convention here conflicts with the codebase,
|
|
256
|
+
the codebase wins — update this file, do not work around it. Prune aggressively.
|
|
257
|
+
|
|
258
|
+
**Staleness anchor:** This file assumes `cc-native-plan-review.ts` exists. If it doesn't, this file
|
|
259
|
+
is stale — update or regenerate before relying on it.
|
|
260
|
+
|
|
261
|
+
**Trigger Audit or Generate:**
|
|
262
|
+
- Rename/move files or dirs → Audit
|
|
263
|
+
- >20% of files changed → Generate
|
|
264
|
+
- 30+ days without touching this file → Audit
|
|
265
|
+
- Agent mistake caused by this file → fix immediately, then Audit
|
|
266
|
+
|
|
267
|
+
<!-- context-layer: generated=2026-02-10 | last-audited=2026-02-21 | version=2 | dir-commits-at-audit=58 -->
|
|
@@ -13,6 +13,30 @@ import { getProjectRoot } from "../../_shared/lib-ts/base/constants.js";
|
|
|
13
13
|
import { loadHookInput, runHook, logDebug, logInfo, emitContext } from "../../_shared/lib-ts/base/hook-utils.js";
|
|
14
14
|
import { wasEarlyQuestionsAsked } from "../lib-ts/cc-native-state.js";
|
|
15
15
|
|
|
16
|
+
// Unconditional injection by design — no code-detection gate.
|
|
17
|
+
// "When this plan involves code" is self-selecting; non-code plans ignore it.
|
|
18
|
+
// Soft framing per Anthropic Claude 4.x best practices (avoid MUST/MANDATORY overtriggering).
|
|
19
|
+
// Motivation per standard enables generalization better than threats.
|
|
20
|
+
// Generalizability disclaimer: not all codebases need all standards.
|
|
21
|
+
const CODING_STANDARDS_NUDGE = `## Coding Standards for Code Changes
|
|
22
|
+
|
|
23
|
+
When this plan creates or modifies production code, apply these standards — they address the
|
|
24
|
+
most common plan review failure modes:
|
|
25
|
+
|
|
26
|
+
1. **Test-First Design** — Design interfaces from the test perspective first. Plans that
|
|
27
|
+
describe "implement then test" consistently fail review. Structure tests before implementation.
|
|
28
|
+
2. **File Structure Fit** — Verify where similar things already live in this project before
|
|
29
|
+
proposing new files. Agents commonly pick plausible-but-wrong locations that don't match
|
|
30
|
+
existing conventions.
|
|
31
|
+
3. **Extensibility Analysis** — Identify what features most commonly follow this one. Designs
|
|
32
|
+
that resist extension require expensive rewrites later.
|
|
33
|
+
|
|
34
|
+
These standards apply to production code in established codebases. For prototypes, scripts,
|
|
35
|
+
or exploratory work, use judgment on which apply.
|
|
36
|
+
|
|
37
|
+
**Full checklist:** \`.aiwcli/_cc-native/plan-review/CODING-STANDARDS-CHECKLIST.md\`
|
|
38
|
+
Read this file for detailed guidance on each standard.`;
|
|
39
|
+
|
|
16
40
|
const PHASE_A_PROMPT = `## Plan Mode: Narrow the Approach After Exploration
|
|
17
41
|
|
|
18
42
|
After exploring the codebase, use AskUserQuestion — one call, 3-4 questions — before drafting the plan.
|
|
@@ -56,6 +80,7 @@ function main(): void {
|
|
|
56
80
|
|
|
57
81
|
logInfo("plan_questions_early", "Plan mode detected, injecting Phase A prompt");
|
|
58
82
|
emitContext(PHASE_A_PROMPT);
|
|
83
|
+
emitContext(CODING_STANDARDS_NUDGE);
|
|
59
84
|
}
|
|
60
85
|
|
|
61
86
|
runHook(main, "plan_questions_early");
|
|
@@ -37,5 +37,10 @@ Evaluate whether the plan captures decisions that would be lost when this sessio
|
|
|
37
37
|
- What constraints exist that aren't obvious from the code
|
|
38
38
|
- What would break if assumptions change
|
|
39
39
|
|
|
40
|
-
If the plan has gaps, address them before presenting to the user
|
|
40
|
+
If the plan has gaps, address them before presenting to the user.
|
|
41
|
+
|
|
42
|
+
### Coding Standards Check
|
|
43
|
+
If this plan modifies code, verify it against the coding standards you read earlier:
|
|
44
|
+
test-first design, file structure conventions, extensibility. Which standards did you apply,
|
|
45
|
+
and which did you consciously skip (with reasoning)?`;
|
|
41
46
|
}
|
|
@@ -13,6 +13,7 @@ When a Claude Code agent exits plan mode (`ExitPlanMode`), the plan review hook
|
|
|
13
13
|
```
|
|
14
14
|
plan-review/
|
|
15
15
|
├── CLAUDE.md ← This file
|
|
16
|
+
├── CODING-STANDARDS-CHECKLIST.md ← Standards injected during plan mode via plan_questions_early.ts
|
|
16
17
|
├── agents/
|
|
17
18
|
│ ├── CLAUDE.md ← Agent file format, frontmatter fields, selection rules
|
|
18
19
|
│ ├── PLAN-ORCHESTRATOR.md ← Orchestrator agent (complexity analysis)
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# Coding Standards Checklist
|
|
2
|
+
|
|
3
|
+
Standards that address the most common plan review failure modes. Reference this
|
|
4
|
+
when planning code changes in established codebases.
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## 1. Test-First Design Thinking
|
|
9
|
+
|
|
10
|
+
Tests are an architectural constraint, not an afterthought. Design from the test
|
|
11
|
+
perspective first.
|
|
12
|
+
|
|
13
|
+
- **Interface-first:** Before describing implementation, ask: "Can I write the test
|
|
14
|
+
for this before the implementation exists?" If the answer is unclear, the interface
|
|
15
|
+
needs more thought.
|
|
16
|
+
- **Structure tests before code:** Plans that describe "implement then test" consistently
|
|
17
|
+
fail review. Restructure: define what the tests assert, then describe the implementation
|
|
18
|
+
that satisfies them.
|
|
19
|
+
- **Testability as architecture:** Design for dependency injection, interface seams, and
|
|
20
|
+
fakes. If a component can't be tested in isolation, the coupling is too tight.
|
|
21
|
+
- **Test categories:** Consider which test types apply — unit (isolated logic), integration
|
|
22
|
+
(module boundaries), contract (API surfaces), and characterization (existing behavior
|
|
23
|
+
preservation during refactoring).
|
|
24
|
+
- **Verification clarity:** Each planned change should have a corresponding verification
|
|
25
|
+
step that is binary-testable (pass/fail in one check, no subjective judgment).
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## 2. File Structure & Codebase Convention Fit
|
|
30
|
+
|
|
31
|
+
Don't pick a "plausible" location — pick the location that matches the project's
|
|
32
|
+
established patterns.
|
|
33
|
+
|
|
34
|
+
- **Discover before proposing:** Before suggesting new files or directories, verify where
|
|
35
|
+
similar things already live in this project. Use Glob/Grep to find existing patterns.
|
|
36
|
+
- **Naming conventions:** Match existing module and file naming patterns. If the project
|
|
37
|
+
uses `kebab-case.ts`, don't introduce `camelCase.ts`. If hooks live in `hooks/`, don't
|
|
38
|
+
create a `hook-handlers/` directory.
|
|
39
|
+
- **Co-location patterns:** Check if the project follows co-location (tests next to source,
|
|
40
|
+
types with implementation) or separation (dedicated `__tests__/`, `types/` directories).
|
|
41
|
+
Follow what exists.
|
|
42
|
+
- **Import depth:** Verify that new files fit the existing import hierarchy. Adding a file
|
|
43
|
+
that requires imports to cross architectural boundaries (e.g., shared lib importing from
|
|
44
|
+
feature code) signals a structural problem.
|
|
45
|
+
- **Existing system boundaries:** Check if the project has documented system boundaries
|
|
46
|
+
(CLAUDE.md, architecture docs). New files should respect these boundaries rather than
|
|
47
|
+
create cross-cutting dependencies.
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
## 3. Extensibility & Future-Proofing Analysis
|
|
52
|
+
|
|
53
|
+
Balance: don't over-engineer (YAGNI), but don't create designs that actively resist
|
|
54
|
+
extension.
|
|
55
|
+
|
|
56
|
+
- **Adjacent features:** What features are most commonly built after this one? Does the
|
|
57
|
+
design accommodate those extensions without major restructuring?
|
|
58
|
+
- **Extension points:** Where would future developers need to hook in? Are those seams
|
|
59
|
+
accessible, or does the design require forking/copying to extend?
|
|
60
|
+
- **Configuration vs. code changes:** Will common customizations require code changes, or
|
|
61
|
+
can they be handled through configuration? Prefer the latter when the variation space
|
|
62
|
+
is predictable.
|
|
63
|
+
- **Data model flexibility:** Are data structures designed to accommodate likely additions
|
|
64
|
+
(new fields, new types) without breaking existing consumers?
|
|
65
|
+
- **Inversion of control:** Does the design allow callers to inject behavior, or does it
|
|
66
|
+
hardcode decisions that callers will need to override? Prefer interfaces and callbacks
|
|
67
|
+
over concrete implementations when variation is expected.
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## Applicability
|
|
72
|
+
|
|
73
|
+
These standards apply to production code in established codebases with existing conventions.
|
|
74
|
+
For prototypes, scripts, spike explorations, or greenfield projects without established
|
|
75
|
+
patterns, use judgment on which standards apply — not all will be relevant.
|
|
@@ -1,23 +1,35 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Corroboration-based verdict computation for plan review.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* Uses agent-agreement thresholding: a dimension blocks only when a sufficient
|
|
5
|
+
* number of *distinct agents* independently flag it. This measures true
|
|
6
|
+
* corroboration (multiple independent reviewers converge) rather than issue
|
|
7
|
+
* density (one verbose agent floods a dimension).
|
|
7
8
|
*
|
|
8
|
-
* **
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
9
|
+
* **Algorithm:**
|
|
10
|
+
* For each dimension, compute: `effective_threshold = max(minAgreement, ceil(minRatio × totalAgents))`
|
|
11
|
+
* Block when `distinct_agents_in_dimension >= effective_threshold`.
|
|
12
|
+
*
|
|
13
|
+
* **Default config:** `minAgreement=2, minRatio=0.40`
|
|
14
|
+
* - At 6 agents: threshold=3 (50% must agree)
|
|
15
|
+
* - At 10 agents: threshold=4 (40% must agree)
|
|
16
|
+
* - At 20 agents: threshold=8 (40% must agree)
|
|
17
|
+
*
|
|
18
|
+
* **Why agent-agreement over issue-density:**
|
|
19
|
+
* The previous system (issues >= 2×agents_in_dimension) allowed a single agent
|
|
20
|
+
* to self-corroborate by raising 2+ issues, and made blocking harder as more
|
|
21
|
+
* agents covered a dimension (inverted scaling). Agent-agreement fixes both:
|
|
22
|
+
* a single agent can never self-corroborate, and more agents agreeing is a
|
|
23
|
+
* stronger signal, not a weaker one.
|
|
13
24
|
*
|
|
14
25
|
* **Convergence problem this solves:**
|
|
15
26
|
* Agents with opposing philosophies (simplicity-guardian vs completeness-gaps)
|
|
16
27
|
* produce contradictory high-severity issues. Because the old system treated
|
|
17
28
|
* every agent's finding as independently authoritative, plans oscillated —
|
|
18
|
-
* addressing one agent's feedback triggered the opposing agent.
|
|
29
|
+
* addressing one agent's feedback triggered the opposing agent. The minAgreement
|
|
30
|
+
* floor prevents any single agent's philosophy from blocking alone.
|
|
19
31
|
*
|
|
20
|
-
* **Revert path:** Change one line in
|
|
32
|
+
* **Revert path:** Change one line in review-pipeline.ts back to
|
|
21
33
|
* `computeReviewDecision(allVerdicts)`. Old function kept in verdict.ts.
|
|
22
34
|
*/
|
|
23
35
|
|
|
@@ -30,22 +42,55 @@ import type {
|
|
|
30
42
|
SoloFinding,
|
|
31
43
|
} from "../../lib-ts/types.js";
|
|
32
44
|
|
|
45
|
+
/** Configuration for corroboration thresholds */
|
|
46
|
+
export interface CorroborationConfig {
|
|
47
|
+
/** Minimum distinct agents that must agree to trigger blocking (default: 2) */
|
|
48
|
+
minAgreement?: number;
|
|
49
|
+
/** Minimum fraction of total agent pool that must agree (default: 0.40) */
|
|
50
|
+
minRatio?: number;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const DEFAULT_MIN_AGREEMENT = 2;
|
|
54
|
+
const DEFAULT_MIN_RATIO = 0.40;
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Compute the effective blocking threshold for a given agent pool size.
|
|
58
|
+
*
|
|
59
|
+
* Returns `max(minAgreement, ceil(minRatio × totalAgents))`.
|
|
60
|
+
* This ensures a fixed floor (no single-agent self-corroboration) while
|
|
61
|
+
* scaling proportionally at larger pool sizes.
|
|
62
|
+
*/
|
|
63
|
+
export function getEffectiveThreshold(
|
|
64
|
+
totalAgents: number,
|
|
65
|
+
config: CorroborationConfig = {},
|
|
66
|
+
): number {
|
|
67
|
+
const minAgreement = config.minAgreement ?? DEFAULT_MIN_AGREEMENT;
|
|
68
|
+
const minRatio = config.minRatio ?? DEFAULT_MIN_RATIO;
|
|
69
|
+
return Math.max(minAgreement, Math.ceil(totalAgents * minRatio));
|
|
70
|
+
}
|
|
71
|
+
|
|
33
72
|
/**
|
|
34
73
|
* Compute a corroboration-based review decision from all reviewer results.
|
|
35
74
|
*
|
|
36
75
|
* Algorithm:
|
|
37
76
|
* 1. Collect all high-severity issues with a `dimension` field
|
|
38
77
|
* 2. Group by dimension, tracking distinct agent names per group
|
|
39
|
-
* 3.
|
|
40
|
-
* 4.
|
|
41
|
-
* 5.
|
|
78
|
+
* 3. Compute effective threshold: `max(minAgreement, ceil(minRatio × totalAgents))`
|
|
79
|
+
* 4. For each dimension: block if `distinct_agents >= effective_threshold`
|
|
80
|
+
* 5. Issues without `dimension` are unclassified (logged as warning, never block)
|
|
81
|
+
* 6. Non-high issues are ignored (informational only)
|
|
42
82
|
*
|
|
43
83
|
* @param allResults - Map of reviewer name → ReviewerResult (CLI + agent)
|
|
84
|
+
* @param config - Optional threshold configuration
|
|
44
85
|
* @returns CorroborationResult with blocking groups, solo findings, and verdict
|
|
45
86
|
*/
|
|
46
87
|
export function computeCorroboratedDecision(
|
|
47
88
|
allResults: Record<string, ReviewerResult>,
|
|
89
|
+
config: CorroborationConfig = {},
|
|
48
90
|
): CorroborationResult {
|
|
91
|
+
const totalAgents = Object.keys(allResults).length;
|
|
92
|
+
const threshold = getEffectiveThreshold(totalAgents, config);
|
|
93
|
+
|
|
49
94
|
// Accumulator: dimension → { issues, agentNames }
|
|
50
95
|
const dimMap = new Map<
|
|
51
96
|
IssueDimension,
|
|
@@ -66,7 +111,7 @@ export function computeCorroboratedDecision(
|
|
|
66
111
|
// Only high-severity issues participate in corroboration
|
|
67
112
|
if (issue.severity !== "high") continue;
|
|
68
113
|
|
|
69
|
-
// Issues without dimension are unclassified — cannot block
|
|
114
|
+
// Issues without dimension are unclassified — logged but cannot block
|
|
70
115
|
if (!issue.dimension) {
|
|
71
116
|
unclassified.push({ agent: agentName, issue });
|
|
72
117
|
continue;
|
|
@@ -82,14 +127,22 @@ export function computeCorroboratedDecision(
|
|
|
82
127
|
}
|
|
83
128
|
}
|
|
84
129
|
|
|
130
|
+
// Warn about unclassified issues so they don't silently disappear
|
|
131
|
+
if (unclassified.length > 0) {
|
|
132
|
+
const agents = [...new Set(unclassified.map(u => u.agent))];
|
|
133
|
+
process.stderr.write(
|
|
134
|
+
`[corroboration] WARNING: ${unclassified.length} high-severity issue(s) from [${agents.join(", ")}] lack dimension classification and cannot participate in corroboration\n`,
|
|
135
|
+
);
|
|
136
|
+
}
|
|
137
|
+
|
|
85
138
|
const blocking: CorroboratedGroup[] = [];
|
|
86
139
|
const solo: SoloFinding[] = [];
|
|
87
140
|
|
|
88
141
|
for (const [dimension, group] of dimMap) {
|
|
89
142
|
const agentCount = group.agentNames.size;
|
|
90
|
-
const threshold = 2 * agentCount;
|
|
91
143
|
|
|
92
|
-
|
|
144
|
+
// Block when enough distinct agents independently flag this dimension
|
|
145
|
+
if (agentCount >= threshold) {
|
|
93
146
|
blocking.push({
|
|
94
147
|
dimension,
|
|
95
148
|
issues: group.issues,
|
package/oclif.manifest.json
CHANGED