aiwcli 0.13.0 → 0.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. package/dist/commands/init/index.d.ts +8 -8
  2. package/dist/commands/init/index.js +24 -24
  3. package/dist/templates/_shared/.claude/settings.json +57 -2
  4. package/dist/templates/_shared/.windsurf/workflows/handoff.md +4 -221
  5. package/dist/templates/_shared/.windsurf/workflows/meta-plan.md +5 -341
  6. package/dist/templates/_shared/hooks-ts/context_monitor.ts +2 -2
  7. package/dist/templates/_shared/lib-ts/CLAUDE.md +27 -2
  8. package/dist/templates/_shared/lib-ts/base/lint-dispatch.ts +52 -0
  9. package/dist/templates/_shared/scripts/resolve-run.ts +1 -0
  10. package/dist/templates/_shared/skills/handoff-system/CLAUDE.md +4 -4
  11. package/dist/templates/_shared/skills/handoff-system/lib/document-generator.ts +8 -8
  12. package/dist/templates/_shared/skills/handoff-system/lib/handoff-reader.ts +3 -3
  13. package/dist/templates/_shared/skills/handoff-system/scripts/resume_handoff.ts +4 -4
  14. package/dist/templates/_shared/skills/handoff-system/scripts/save_handoff.ts +6 -6
  15. package/dist/templates/_shared/skills/handoff-system/workflows/handoff.md +2 -2
  16. package/dist/templates/_shared/skills/meta-plan/CLAUDE.md +8 -6
  17. package/dist/templates/_shared/skills/meta-plan/workflows/meta-plan.md +102 -172
  18. package/dist/templates/cc-native/.claude/settings.json +1 -127
  19. package/dist/templates/cc-native/_cc-native/artifacts/lib/format.ts +14 -14
  20. package/dist/templates/cc-native/_cc-native/hooks/CLAUDE.md +32 -2
  21. package/dist/templates/cc-native/_cc-native/hooks/plan_questions_early.ts +25 -0
  22. package/dist/templates/cc-native/_cc-native/lib-ts/plan-enhancement.ts +6 -1
  23. package/dist/templates/cc-native/_cc-native/plan-review/CLAUDE.md +1 -0
  24. package/dist/templates/cc-native/_cc-native/plan-review/CODING-STANDARDS-CHECKLIST.md +75 -0
  25. package/dist/templates/cc-native/_cc-native/plan-review/lib/corroboration.ts +69 -16
  26. package/oclif.manifest.json +1 -1
  27. package/package.json +1 -1
@@ -10,7 +10,7 @@ import type {
10
10
  ReviewerResult,
11
11
  DisplaySettings,
12
12
  CorroborationResult,
13
- } from "../types.js";
13
+ } from "../../lib-ts/types.js";
14
14
 
15
15
  // ---------------------------------------------------------------------------
16
16
  // Markdown Formatting
@@ -75,14 +75,14 @@ export function formatCombinedMarkdown(
75
75
  if (corroboration.blocking.length > 0) {
76
76
  lines.push("### Blocking Dimensions\n");
77
77
  for (const group of corroboration.blocking) {
78
- lines.push(`- **${group.dimension}**: ${group.issues.length} issues from ${group.agentCount} agents (threshold: ≥${group.threshold})`);
78
+ lines.push(`- **${group.dimension}**: ${group.agentCount} agents agree (threshold: ≥${group.threshold} agents, ${group.issues.length} issues)`);
79
79
  }
80
80
  lines.push("");
81
81
  }
82
82
  if (corroboration.solo.length > 0) {
83
83
  lines.push("### Solo Dimensions (informational)\n");
84
84
  for (const s of corroboration.solo) {
85
- lines.push(`- **${s.dimension}**: ${s.issues.length} issues from ${s.agentCount} agents (threshold: >${s.threshold}, not exceeded)`);
85
+ lines.push(`- **${s.dimension}**: ${s.agentCount} agent${s.agentCount !== 1 ? "s" : ""} (threshold: ≥${s.threshold} agents, not met)`);
86
86
  }
87
87
  lines.push("");
88
88
  }
@@ -192,7 +192,7 @@ export function buildInlineReviewSummary(
192
192
  if (corroboration && dim) {
193
193
  const group = corroboration.blocking.find(g => g.dimension === dim);
194
194
  if (group) {
195
- annotation = ` [CORROBORATED — ${group.issues.length} issues from ${group.agentCount} agents exceeds threshold ${group.threshold}]`;
195
+ annotation = ` [CORROBORATED — ${group.agentCount} agents agree, threshold ≥${group.threshold}]`;
196
196
  } else {
197
197
  annotation = " [perspective]";
198
198
  }
@@ -259,10 +259,10 @@ export function buildHighIssuesDocument(
259
259
  ): string {
260
260
  if (corroboration && corroboration.blocking.length > 0) {
261
261
  const lines = ["# Corroborated High-Severity Issues\n"];
262
- lines.push("> Only issues from dimensions where the total count exceeded the proportional threshold are shown.\n");
262
+ lines.push("> Only issues from dimensions where enough distinct agents independently agreed are shown.\n");
263
263
 
264
264
  for (const group of corroboration.blocking) {
265
- lines.push(`## ${group.dimension} (${group.issues.length} issues from ${group.agentCount} agents, threshold: ${group.threshold})\n`);
265
+ lines.push(`## ${group.dimension} (${group.agentCount} agents agree, threshold: ≥${group.threshold} agents, ${group.issues.length} issues)\n`);
266
266
  for (const { agent, issue } of group.issues) {
267
267
  const cat = issue.category ?? "general";
268
268
  const text = String(issue.issue ?? "").trim();
@@ -275,7 +275,7 @@ export function buildHighIssuesDocument(
275
275
 
276
276
  if (corroboration.solo.length > 0) {
277
277
  lines.push("---\n");
278
- lines.push(`> ${corroboration.solo.length} dimension${corroboration.solo.length !== 1 ? "s" : ""} had issues below threshold (not blocking): ${corroboration.solo.map(s => `${s.dimension} (${s.issues.length}/${s.threshold})`).join(", ")}\n`);
278
+ lines.push(`> ${corroboration.solo.length} dimension${corroboration.solo.length !== 1 ? "s" : ""} had insufficient agent agreement (not blocking): ${corroboration.solo.map(s => `${s.dimension} (${s.agentCount}/${s.threshold} agents)`).join(", ")}\n`);
279
279
  }
280
280
 
281
281
  return lines.join("\n");
@@ -332,18 +332,18 @@ export function buildCorroborationReport(
332
332
  if (corroborationResult.blocking.length > 0) {
333
333
  lines.push("## Blocking Issues (Corroborated)");
334
334
  lines.push("");
335
- lines.push("| Dimension | Issues | Agents | Threshold | Status |");
336
- lines.push("|-----------|--------|--------|-----------|--------|");
335
+ lines.push("| Dimension | Agents Agreeing | Threshold | Issues | Status |");
336
+ lines.push("|-----------|----------------|-----------|--------|--------|");
337
337
 
338
338
  for (const group of corroborationResult.blocking) {
339
339
  lines.push(
340
- `| ${group.dimension} | ${group.issues.length} | ${group.agentCount} | ${group.threshold} | ⛔ EXCEEDED |`
340
+ `| ${group.dimension} | ${group.agentCount} | ≥${group.threshold} | ${group.issues.length} | ⛔ CORROBORATED |`
341
341
  );
342
342
  }
343
343
  lines.push("");
344
344
 
345
345
  for (const group of corroborationResult.blocking) {
346
- lines.push(`### ${group.dimension} (${group.issues.length} issues)`);
346
+ lines.push(`### ${group.dimension} (${group.agentCount} agents, ${group.issues.length} issues)`);
347
347
  lines.push("");
348
348
  for (const {agent, issue} of group.issues) {
349
349
  lines.push(`- **[${agent}]** ${issue.issue || "No description"}`);
@@ -355,12 +355,12 @@ export function buildCorroborationReport(
355
355
  if (corroborationResult.solo.length > 0) {
356
356
  lines.push("## Solo Findings (Below Threshold)");
357
357
  lines.push("");
358
- lines.push("| Dimension | Issues | Agents | Threshold | Status |");
359
- lines.push("|-----------|--------|--------|-----------|--------|");
358
+ lines.push("| Dimension | Agents Agreeing | Threshold | Issues | Status |");
359
+ lines.push("|-----------|----------------|-----------|--------|--------|");
360
360
 
361
361
  for (const group of corroborationResult.solo) {
362
362
  lines.push(
363
- `| ${group.dimension} | ${group.issues.length} | ${group.agentCount} | ${group.threshold} | ℹ️ SOLO |`
363
+ `| ${group.dimension} | ${group.agentCount} | ≥${group.threshold} | ${group.issues.length} | ℹ️ SOLO |`
364
364
  );
365
365
  }
366
366
  lines.push("");
@@ -9,8 +9,11 @@
9
9
  | Hook | Trigger | Purpose |
10
10
  |------|---------|---------|
11
11
  | `cc-native-plan-review.ts` | PreToolUse: ExitPlanMode | Questions gate + plan review before user approval |
12
- | `add_plan_context.ts` | PostToolUse: AskUserQuestion, PreToolUse: Task | Mark questions asked; nudge Plan subagent to ask questions first |
12
+ | `mark_questions_asked.ts` | PostToolUse: AskUserQuestion | Marks questions-asked state after user answers |
13
+ | `enhance_plan_post_subagent.ts` | PostToolUse: Task | Post-subagent plan enhancement |
14
+ | `enhance_plan_post_write.ts` | PostToolUse: Write | Post-write plan enhancement |
13
15
  | `plan_questions_early.ts` | UserPromptSubmit | Inject Phase A clarification prompt in plan mode |
16
+ | `validate_task_prompt.ts` | PreToolUse: TaskCreate | Validates task creation prompts |
14
17
 
15
18
  ### Plan Review Architecture
16
19
 
@@ -217,7 +220,7 @@ Validate TypeScript syntax after editing hooks:
217
220
  bun --print "import('.aiwcli/_cc-native/hooks/cc-native-plan-review.ts')" 2>&1 | head -5
218
221
 
219
222
  # Or check imports resolve (dry run)
220
- bun build --no-bundle .aiwcli/_cc-native/hooks/add_plan_context.ts --outdir /dev/null 2>&1
223
+ bun build --no-bundle .aiwcli/_cc-native/hooks/mark_questions_asked.ts --outdir /dev/null 2>&1
221
224
  ```
222
225
 
223
226
  Hooks fail silently on import errors — verify after any import path changes.
@@ -235,3 +238,30 @@ Hooks fail silently on import errors — verify after any import path changes.
235
238
  | 2026-02-10 | **Migrated cc-native hooks from Python to TypeScript.** `cc-native-plan-review.ts` (async, parallel agent reviews via `Promise.all()`), `add_plan_context.ts`, `plan_questions_early.ts`. All hooks use `runHook()`/`runHookAsync()` entry points. Library code in `_cc-native/lib-ts/` (18 files). Settings.json updated to use `bun` runner. Python `.py` files kept as fallback until TS hooks verified. |
236
239
  | 2026-02-10 | Flipped TS logger stderr default to opt-in (`opts?.stderr === true`). Added `logBlocking()` for intentional stderr visibility. Removed redundant `{stderr: false}` from hook-utils.ts, user_prompt_submit.ts, context_monitor.ts. Added "Hook Error Visibility" section documenting visibility tiers and exit code behavior. |
237
240
  | 2026-02-10 | Fixed `debug.py` `context_path` crash. Added local try/catch around `maybeActivate` in `user_prompt_submit.ts` and `context_monitor.ts` to prevent stderr error display on non-critical I/O failures. Removed dead `context_path` from `_emitHookEnd` in `hook-utils.ts`. Added "Error Handling" section to CLAUDE.md. |
241
+ | 2026-02-21 | **Coding standards nudge injected in plan mode.** `plan_questions_early.ts` now emits `CODING_STANDARDS_NUDGE` after Phase A prompt — covers test-first design, file structure fit, and extensibility analysis. Standards reference doc at `plan-review/CODING-STANDARDS-CHECKLIST.md`. Post-write self-check added to `plan-enhancement.ts` `getPlanQualityReviewContext()`. |
242
+ | 2026-02-21 | **ContextLayer Audit:** Updated hook roster — removed stale `add_plan_context.ts`, added `mark_questions_asked.ts`, `enhance_plan_post_subagent.ts`, `enhance_plan_post_write.ts`, `validate_task_prompt.ts`. |
243
+
244
+ ---
245
+ ## Context Maintenance
246
+
247
+ **After modifying files in this directory:** scan the entries above — if any claim is now
248
+ false or incomplete, update this file before ending the task. Do not defer.
249
+
250
+ **Add** an entry only if an agent would fail without knowing it, it is not obvious from
251
+ the code, and it belongs at this scope (project-wide rule → root CLAUDE.md; WHY decision
252
+ → inline comment or ADR; inferable from code → nowhere).
253
+
254
+ **Remove** any entry that fails the falsifiability test: if removing it would not change
255
+ how an agent acts here, remove it. If a convention here conflicts with the codebase,
256
+ the codebase wins — update this file, do not work around it. Prune aggressively.
257
+
258
+ **Staleness anchor:** This file assumes `cc-native-plan-review.ts` exists. If it doesn't, this file
259
+ is stale — update or regenerate before relying on it.
260
+
261
+ **Trigger Audit or Generate:**
262
+ - Rename/move files or dirs → Audit
263
+ - >20% of files changed → Generate
264
+ - 30+ days without touching this file → Audit
265
+ - Agent mistake caused by this file → fix immediately, then Audit
266
+
267
+ <!-- context-layer: generated=2026-02-10 | last-audited=2026-02-21 | version=2 | dir-commits-at-audit=58 -->
@@ -13,6 +13,30 @@ import { getProjectRoot } from "../../_shared/lib-ts/base/constants.js";
13
13
  import { loadHookInput, runHook, logDebug, logInfo, emitContext } from "../../_shared/lib-ts/base/hook-utils.js";
14
14
  import { wasEarlyQuestionsAsked } from "../lib-ts/cc-native-state.js";
15
15
 
16
+ // Unconditional injection by design — no code-detection gate.
17
+ // "When this plan involves code" is self-selecting; non-code plans ignore it.
18
+ // Soft framing per Anthropic Claude 4.x best practices (avoid MUST/MANDATORY overtriggering).
19
+ // Motivation per standard enables generalization better than threats.
20
+ // Generalizability disclaimer: not all codebases need all standards.
21
+ const CODING_STANDARDS_NUDGE = `## Coding Standards for Code Changes
22
+
23
+ When this plan creates or modifies production code, apply these standards — they address the
24
+ most common plan review failure modes:
25
+
26
+ 1. **Test-First Design** — Design interfaces from the test perspective first. Plans that
27
+ describe "implement then test" consistently fail review. Structure tests before implementation.
28
+ 2. **File Structure Fit** — Verify where similar things already live in this project before
29
+ proposing new files. Agents commonly pick plausible-but-wrong locations that don't match
30
+ existing conventions.
31
+ 3. **Extensibility Analysis** — Identify what features most commonly follow this one. Designs
32
+ that resist extension require expensive rewrites later.
33
+
34
+ These standards apply to production code in established codebases. For prototypes, scripts,
35
+ or exploratory work, use judgment on which apply.
36
+
37
+ **Full checklist:** \`.aiwcli/_cc-native/plan-review/CODING-STANDARDS-CHECKLIST.md\`
38
+ Read this file for detailed guidance on each standard.`;
39
+
16
40
  const PHASE_A_PROMPT = `## Plan Mode: Narrow the Approach After Exploration
17
41
 
18
42
  After exploring the codebase, use AskUserQuestion — one call, 3-4 questions — before drafting the plan.
@@ -56,6 +80,7 @@ function main(): void {
56
80
 
57
81
  logInfo("plan_questions_early", "Plan mode detected, injecting Phase A prompt");
58
82
  emitContext(PHASE_A_PROMPT);
83
+ emitContext(CODING_STANDARDS_NUDGE);
59
84
  }
60
85
 
61
86
  runHook(main, "plan_questions_early");
@@ -37,5 +37,10 @@ Evaluate whether the plan captures decisions that would be lost when this sessio
37
37
  - What constraints exist that aren't obvious from the code
38
38
  - What would break if assumptions change
39
39
 
40
- If the plan has gaps, address them before presenting to the user.`;
40
+ If the plan has gaps, address them before presenting to the user.
41
+
42
+ ### Coding Standards Check
43
+ If this plan modifies code, verify it against the coding standards you read earlier:
44
+ test-first design, file structure conventions, extensibility. Which standards did you apply,
45
+ and which did you consciously skip (with reasoning)?`;
41
46
  }
@@ -13,6 +13,7 @@ When a Claude Code agent exits plan mode (`ExitPlanMode`), the plan review hook
13
13
  ```
14
14
  plan-review/
15
15
  ├── CLAUDE.md ← This file
16
+ ├── CODING-STANDARDS-CHECKLIST.md ← Standards injected during plan mode via plan_questions_early.ts
16
17
  ├── agents/
17
18
  │ ├── CLAUDE.md ← Agent file format, frontmatter fields, selection rules
18
19
  │ ├── PLAN-ORCHESTRATOR.md ← Orchestrator agent (complexity analysis)
@@ -0,0 +1,75 @@
1
+ # Coding Standards Checklist
2
+
3
+ Standards that address the most common plan review failure modes. Reference this
4
+ when planning code changes in established codebases.
5
+
6
+ ---
7
+
8
+ ## 1. Test-First Design Thinking
9
+
10
+ Tests are an architectural constraint, not an afterthought. Design from the test
11
+ perspective first.
12
+
13
+ - **Interface-first:** Before describing implementation, ask: "Can I write the test
14
+ for this before the implementation exists?" If the answer is unclear, the interface
15
+ needs more thought.
16
+ - **Structure tests before code:** Plans that describe "implement then test" consistently
17
+ fail review. Restructure: define what the tests assert, then describe the implementation
18
+ that satisfies them.
19
+ - **Testability as architecture:** Design for dependency injection, interface seams, and
20
+ fakes. If a component can't be tested in isolation, the coupling is too tight.
21
+ - **Test categories:** Consider which test types apply — unit (isolated logic), integration
22
+ (module boundaries), contract (API surfaces), and characterization (existing behavior
23
+ preservation during refactoring).
24
+ - **Verification clarity:** Each planned change should have a corresponding verification
25
+ step that is binary-testable (pass/fail in one check, no subjective judgment).
26
+
27
+ ---
28
+
29
+ ## 2. File Structure & Codebase Convention Fit
30
+
31
+ Don't pick a "plausible" location — pick the location that matches the project's
32
+ established patterns.
33
+
34
+ - **Discover before proposing:** Before suggesting new files or directories, verify where
35
+ similar things already live in this project. Use Glob/Grep to find existing patterns.
36
+ - **Naming conventions:** Match existing module and file naming patterns. If the project
37
+ uses `kebab-case.ts`, don't introduce `camelCase.ts`. If hooks live in `hooks/`, don't
38
+ create a `hook-handlers/` directory.
39
+ - **Co-location patterns:** Check if the project follows co-location (tests next to source,
40
+ types with implementation) or separation (dedicated `__tests__/`, `types/` directories).
41
+ Follow what exists.
42
+ - **Import depth:** Verify that new files fit the existing import hierarchy. Adding a file
43
+ that requires imports to cross architectural boundaries (e.g., shared lib importing from
44
+ feature code) signals a structural problem.
45
+ - **Existing system boundaries:** Check if the project has documented system boundaries
46
+ (CLAUDE.md, architecture docs). New files should respect these boundaries rather than
47
+ create cross-cutting dependencies.
48
+
49
+ ---
50
+
51
+ ## 3. Extensibility & Future-Proofing Analysis
52
+
53
+ Balance: don't over-engineer (YAGNI), but don't create designs that actively resist
54
+ extension.
55
+
56
+ - **Adjacent features:** What features are most commonly built after this one? Does the
57
+ design accommodate those extensions without major restructuring?
58
+ - **Extension points:** Where would future developers need to hook in? Are those seams
59
+ accessible, or does the design require forking/copying to extend?
60
+ - **Configuration vs. code changes:** Will common customizations require code changes, or
61
+ can they be handled through configuration? Prefer the latter when the variation space
62
+ is predictable.
63
+ - **Data model flexibility:** Are data structures designed to accommodate likely additions
64
+ (new fields, new types) without breaking existing consumers?
65
+ - **Inversion of control:** Does the design allow callers to inject behavior, or does it
66
+ hardcode decisions that callers will need to override? Prefer interfaces and callbacks
67
+ over concrete implementations when variation is expected.
68
+
69
+ ---
70
+
71
+ ## Applicability
72
+
73
+ These standards apply to production code in established codebases with existing conventions.
74
+ For prototypes, scripts, spike explorations, or greenfield projects without established
75
+ patterns, use judgment on which standards apply — not all will be relevant.
@@ -1,23 +1,35 @@
1
1
  /**
2
2
  * Corroboration-based verdict computation for plan review.
3
3
  *
4
- * Replaces the old per-verdict aggregation with proportional thresholding:
5
- * high-severity issues in a dimension only block when the total count
6
- * exceeds the number of distinct agents contributing to that dimension.
4
+ * Uses agent-agreement thresholding: a dimension blocks only when a sufficient
5
+ * number of *distinct agents* independently flag it. This measures true
6
+ * corroboration (multiple independent reviewers converge) rather than issue
7
+ * density (one verbose agent floods a dimension).
7
8
  *
8
- * **Why proportional thresholding:**
9
- * The agent pool has dimensional imbalance (e.g., 10 completeness agents vs
10
- * 1 maintainability agent). A fixed "2+ agents agree = block" would mean
11
- * any 2 completeness agents always block. Proportional scaling (issues > 2×agents)
12
- * sets a fair bar regardless of how many agents focus on each dimension.
9
+ * **Algorithm:**
10
+ * For each dimension, compute: `effective_threshold = max(minAgreement, ceil(minRatio × totalAgents))`
11
+ * Block when `distinct_agents_in_dimension >= effective_threshold`.
12
+ *
13
+ * **Default config:** `minAgreement=2, minRatio=0.40`
14
+ * - At 6 agents: threshold=3 (50% must agree)
15
+ * - At 10 agents: threshold=4 (40% must agree)
16
+ * - At 20 agents: threshold=8 (40% must agree)
17
+ *
18
+ * **Why agent-agreement over issue-density:**
19
+ * The previous system (issues >= 2×agents_in_dimension) allowed a single agent
20
+ * to self-corroborate by raising 2+ issues, and made blocking harder as more
21
+ * agents covered a dimension (inverted scaling). Agent-agreement fixes both:
22
+ * a single agent can never self-corroborate, and more agents agreeing is a
23
+ * stronger signal, not a weaker one.
13
24
  *
14
25
  * **Convergence problem this solves:**
15
26
  * Agents with opposing philosophies (simplicity-guardian vs completeness-gaps)
16
27
  * produce contradictory high-severity issues. Because the old system treated
17
28
  * every agent's finding as independently authoritative, plans oscillated —
18
- * addressing one agent's feedback triggered the opposing agent.
29
+ * addressing one agent's feedback triggered the opposing agent. The minAgreement
30
+ * floor prevents any single agent's philosophy from blocking alone.
19
31
  *
20
- * **Revert path:** Change one line in cc-native-plan-review.ts back to
32
+ * **Revert path:** Change one line in review-pipeline.ts back to
21
33
  * `computeReviewDecision(allVerdicts)`. Old function kept in verdict.ts.
22
34
  */
23
35
 
@@ -30,22 +42,55 @@ import type {
30
42
  SoloFinding,
31
43
  } from "../../lib-ts/types.js";
32
44
 
45
+ /** Configuration for corroboration thresholds */
46
+ export interface CorroborationConfig {
47
+ /** Minimum distinct agents that must agree to trigger blocking (default: 2) */
48
+ minAgreement?: number;
49
+ /** Minimum fraction of total agent pool that must agree (default: 0.40) */
50
+ minRatio?: number;
51
+ }
52
+
53
+ const DEFAULT_MIN_AGREEMENT = 2;
54
+ const DEFAULT_MIN_RATIO = 0.40;
55
+
56
+ /**
57
+ * Compute the effective blocking threshold for a given agent pool size.
58
+ *
59
+ * Returns `max(minAgreement, ceil(minRatio × totalAgents))`.
60
+ * This ensures a fixed floor (no single-agent self-corroboration) while
61
+ * scaling proportionally at larger pool sizes.
62
+ */
63
+ export function getEffectiveThreshold(
64
+ totalAgents: number,
65
+ config: CorroborationConfig = {},
66
+ ): number {
67
+ const minAgreement = config.minAgreement ?? DEFAULT_MIN_AGREEMENT;
68
+ const minRatio = config.minRatio ?? DEFAULT_MIN_RATIO;
69
+ return Math.max(minAgreement, Math.ceil(totalAgents * minRatio));
70
+ }
71
+
33
72
  /**
34
73
  * Compute a corroboration-based review decision from all reviewer results.
35
74
  *
36
75
  * Algorithm:
37
76
  * 1. Collect all high-severity issues with a `dimension` field
38
77
  * 2. Group by dimension, tracking distinct agent names per group
39
- * 3. For each dimension: block if `issues.length > 2 × agentCount`
40
- * 4. Issues without `dimension` are unclassified (never block)
41
- * 5. Non-high issues are ignored (informational only)
78
+ * 3. Compute effective threshold: `max(minAgreement, ceil(minRatio × totalAgents))`
79
+ * 4. For each dimension: block if `distinct_agents >= effective_threshold`
80
+ * 5. Issues without `dimension` are unclassified (logged as warning, never block)
81
+ * 6. Non-high issues are ignored (informational only)
42
82
  *
43
83
  * @param allResults - Map of reviewer name → ReviewerResult (CLI + agent)
84
+ * @param config - Optional threshold configuration
44
85
  * @returns CorroborationResult with blocking groups, solo findings, and verdict
45
86
  */
46
87
  export function computeCorroboratedDecision(
47
88
  allResults: Record<string, ReviewerResult>,
89
+ config: CorroborationConfig = {},
48
90
  ): CorroborationResult {
91
+ const totalAgents = Object.keys(allResults).length;
92
+ const threshold = getEffectiveThreshold(totalAgents, config);
93
+
49
94
  // Accumulator: dimension → { issues, agentNames }
50
95
  const dimMap = new Map<
51
96
  IssueDimension,
@@ -66,7 +111,7 @@ export function computeCorroboratedDecision(
66
111
  // Only high-severity issues participate in corroboration
67
112
  if (issue.severity !== "high") continue;
68
113
 
69
- // Issues without dimension are unclassified — cannot block
114
+ // Issues without dimension are unclassified — logged but cannot block
70
115
  if (!issue.dimension) {
71
116
  unclassified.push({ agent: agentName, issue });
72
117
  continue;
@@ -82,14 +127,22 @@ export function computeCorroboratedDecision(
82
127
  }
83
128
  }
84
129
 
130
+ // Warn about unclassified issues so they don't silently disappear
131
+ if (unclassified.length > 0) {
132
+ const agents = [...new Set(unclassified.map(u => u.agent))];
133
+ process.stderr.write(
134
+ `[corroboration] WARNING: ${unclassified.length} high-severity issue(s) from [${agents.join(", ")}] lack dimension classification and cannot participate in corroboration\n`,
135
+ );
136
+ }
137
+
85
138
  const blocking: CorroboratedGroup[] = [];
86
139
  const solo: SoloFinding[] = [];
87
140
 
88
141
  for (const [dimension, group] of dimMap) {
89
142
  const agentCount = group.agentNames.size;
90
- const threshold = 2 * agentCount;
91
143
 
92
- if (group.issues.length >= threshold) {
144
+ // Block when enough distinct agents independently flag this dimension
145
+ if (agentCount >= threshold) {
93
146
  blocking.push({
94
147
  dimension,
95
148
  issues: group.issues,
@@ -416,5 +416,5 @@
416
416
  ]
417
417
  }
418
418
  },
419
- "version": "0.13.0"
419
+ "version": "0.13.1"
420
420
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "aiwcli",
3
3
  "description": "AI Workflow CLI - Command-line interface for AI-powered workflows",
4
- "version": "0.13.0",
4
+ "version": "0.13.1",
5
5
  "author": "jofu-tofu",
6
6
  "bin": {
7
7
  "aiw": "bin/run.js"