npm - aiwcli - Versions diffs - 0.13.0 → 0.13.1 - Mend

aiwcli 0.13.0 → 0.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/dist/templates/cc-native/_cc-native/artifacts/lib/format.ts CHANGED Viewed

@@ -10,7 +10,7 @@ import type {
   ReviewerResult,
   DisplaySettings,
   CorroborationResult,
-} from "../types.js";
+} from "../../lib-ts/types.js";
 // ---------------------------------------------------------------------------
 // Markdown Formatting
@@ -75,14 +75,14 @@ export function formatCombinedMarkdown(
     if (corroboration.blocking.length > 0) {
       lines.push("### Blocking Dimensions\n");
       for (const group of corroboration.blocking) {
-        lines.push(`- **${group.dimension}**: ${group.issues.length} issues from ${group.agentCount} agents (threshold: ≥${group.threshold})`);
+        lines.push(`- **${group.dimension}**: ${group.agentCount} agents agree (threshold: ≥${group.threshold} agents, ${group.issues.length} issues)`);
       }
       lines.push("");
     }
     if (corroboration.solo.length > 0) {
       lines.push("### Solo Dimensions (informational)\n");
       for (const s of corroboration.solo) {
-        lines.push(`- **${s.dimension}**: ${s.issues.length} issues from ${s.agentCount} agents (threshold: >${s.threshold}, not exceeded)`);
+        lines.push(`- **${s.dimension}**: ${s.agentCount} agent${s.agentCount !== 1 ? "s" : ""} (threshold: ≥${s.threshold} agents, not met)`);
       }
       lines.push("");
     }
@@ -192,7 +192,7 @@ export function buildInlineReviewSummary(
     if (corroboration && dim) {
       const group = corroboration.blocking.find(g => g.dimension === dim);
       if (group) {
-        annotation = ` [CORROBORATED — ${group.issues.length} issues from ${group.agentCount} agents exceeds threshold ${group.threshold}]`;
+        annotation = ` [CORROBORATED — ${group.agentCount} agents agree, threshold ≥${group.threshold}]`;
       } else {
         annotation = " [perspective]";
       }
@@ -259,10 +259,10 @@ export function buildHighIssuesDocument(
 ): string {
   if (corroboration && corroboration.blocking.length > 0) {
     const lines = ["# Corroborated High-Severity Issues\n"];
-    lines.push("> Only issues from dimensions where the total count exceeded the proportional threshold are shown.\n");
+    lines.push("> Only issues from dimensions where enough distinct agents independently agreed are shown.\n");
     for (const group of corroboration.blocking) {
-      lines.push(`## ${group.dimension} (${group.issues.length} issues from ${group.agentCount} agents, threshold: ${group.threshold})\n`);
+      lines.push(`## ${group.dimension} (${group.agentCount} agents agree, threshold: ≥${group.threshold} agents, ${group.issues.length} issues)\n`);
       for (const { agent, issue } of group.issues) {
         const cat = issue.category ?? "general";
         const text = String(issue.issue ?? "").trim();
@@ -275,7 +275,7 @@ export function buildHighIssuesDocument(
     if (corroboration.solo.length > 0) {
       lines.push("---\n");
-      lines.push(`> ${corroboration.solo.length} dimension${corroboration.solo.length !== 1 ? "s" : ""} had issues below threshold (not blocking): ${corroboration.solo.map(s => `${s.dimension} (${s.issues.length}/${s.threshold})`).join(", ")}\n`);
+      lines.push(`> ${corroboration.solo.length} dimension${corroboration.solo.length !== 1 ? "s" : ""} had insufficient agent agreement (not blocking): ${corroboration.solo.map(s => `${s.dimension} (${s.agentCount}/${s.threshold} agents)`).join(", ")}\n`);
     }
     return lines.join("\n");
@@ -332,18 +332,18 @@ export function buildCorroborationReport(
   if (corroborationResult.blocking.length > 0) {
     lines.push("## Blocking Issues (Corroborated)");
     lines.push("");
-    lines.push("| Dimension | Issues | Agents | Threshold | Status |");
-    lines.push("|-----------|--------|--------|-----------|--------|");
+    lines.push("| Dimension | Agents Agreeing | Threshold | Issues | Status |");
+    lines.push("|-----------|----------------|-----------|--------|--------|");
     for (const group of corroborationResult.blocking) {
       lines.push(
-        `| ${group.dimension} | ${group.issues.length} | ${group.agentCount} | ${group.threshold} | ⛔ EXCEEDED |`
+        `| ${group.dimension} | ${group.agentCount} | ≥${group.threshold} | ${group.issues.length} | ⛔ CORROBORATED |`
       );
     }
     lines.push("");
     for (const group of corroborationResult.blocking) {
-      lines.push(`### ${group.dimension} (${group.issues.length} issues)`);
+      lines.push(`### ${group.dimension} (${group.agentCount} agents, ${group.issues.length} issues)`);
       lines.push("");
       for (const {agent, issue} of group.issues) {
         lines.push(`- **[${agent}]** ${issue.issue || "No description"}`);
@@ -355,12 +355,12 @@ export function buildCorroborationReport(
   if (corroborationResult.solo.length > 0) {
     lines.push("## Solo Findings (Below Threshold)");
     lines.push("");
-    lines.push("| Dimension | Issues | Agents | Threshold | Status |");
-    lines.push("|-----------|--------|--------|-----------|--------|");
+    lines.push("| Dimension | Agents Agreeing | Threshold | Issues | Status |");
+    lines.push("|-----------|----------------|-----------|--------|--------|");
     for (const group of corroborationResult.solo) {
       lines.push(
-        `| ${group.dimension} | ${group.issues.length} | ${group.agentCount} | ${group.threshold} | ℹ️ SOLO |`
+        `| ${group.dimension} | ${group.agentCount} | ≥${group.threshold} | ${group.issues.length} | ℹ️ SOLO |`
       );
     }
     lines.push("");

package/dist/templates/cc-native/_cc-native/hooks/CLAUDE.md CHANGED Viewed

@@ -9,8 +9,11 @@
 | Hook | Trigger | Purpose |
 |------|---------|---------|
 | `cc-native-plan-review.ts` | PreToolUse: ExitPlanMode | Questions gate + plan review before user approval |
-| `add_plan_context.ts` | PostToolUse: AskUserQuestion, PreToolUse: Task | Mark questions asked; nudge Plan subagent to ask questions first |
+| `mark_questions_asked.ts` | PostToolUse: AskUserQuestion | Marks questions-asked state after user answers |
+| `enhance_plan_post_subagent.ts` | PostToolUse: Task | Post-subagent plan enhancement |
+| `enhance_plan_post_write.ts` | PostToolUse: Write | Post-write plan enhancement |
 | `plan_questions_early.ts` | UserPromptSubmit | Inject Phase A clarification prompt in plan mode |
+| `validate_task_prompt.ts` | PreToolUse: TaskCreate | Validates task creation prompts |
 ### Plan Review Architecture
@@ -217,7 +220,7 @@ Validate TypeScript syntax after editing hooks:
 bun --print "import('.aiwcli/_cc-native/hooks/cc-native-plan-review.ts')" 2>&1 | head -5
 # Or check imports resolve (dry run)
-bun build --no-bundle .aiwcli/_cc-native/hooks/add_plan_context.ts --outdir /dev/null 2>&1
+bun build --no-bundle .aiwcli/_cc-native/hooks/mark_questions_asked.ts --outdir /dev/null 2>&1
 ```
 Hooks fail silently on import errors — verify after any import path changes.
@@ -235,3 +238,30 @@ Hooks fail silently on import errors — verify after any import path changes.
 | 2026-02-10 | **Migrated cc-native hooks from Python to TypeScript.** `cc-native-plan-review.ts` (async, parallel agent reviews via `Promise.all()`), `add_plan_context.ts`, `plan_questions_early.ts`. All hooks use `runHook()`/`runHookAsync()` entry points. Library code in `_cc-native/lib-ts/` (18 files). Settings.json updated to use `bun` runner. Python `.py` files kept as fallback until TS hooks verified. |
 | 2026-02-10 | Flipped TS logger stderr default to opt-in (`opts?.stderr === true`). Added `logBlocking()` for intentional stderr visibility. Removed redundant `{stderr: false}` from hook-utils.ts, user_prompt_submit.ts, context_monitor.ts. Added "Hook Error Visibility" section documenting visibility tiers and exit code behavior. |
 | 2026-02-10 | Fixed `debug.py` `context_path` crash. Added local try/catch around `maybeActivate` in `user_prompt_submit.ts` and `context_monitor.ts` to prevent stderr error display on non-critical I/O failures. Removed dead `context_path` from `_emitHookEnd` in `hook-utils.ts`. Added "Error Handling" section to CLAUDE.md. |
+| 2026-02-21 | **Coding standards nudge injected in plan mode.** `plan_questions_early.ts` now emits `CODING_STANDARDS_NUDGE` after Phase A prompt — covers test-first design, file structure fit, and extensibility analysis. Standards reference doc at `plan-review/CODING-STANDARDS-CHECKLIST.md`. Post-write self-check added to `plan-enhancement.ts` `getPlanQualityReviewContext()`. |
+| 2026-02-21 | **ContextLayer Audit:** Updated hook roster — removed stale `add_plan_context.ts`, added `mark_questions_asked.ts`, `enhance_plan_post_subagent.ts`, `enhance_plan_post_write.ts`, `validate_task_prompt.ts`. |
+---
+## Context Maintenance
+**After modifying files in this directory:** scan the entries above — if any claim is now
+false or incomplete, update this file before ending the task. Do not defer.
+**Add** an entry only if an agent would fail without knowing it, it is not obvious from
+the code, and it belongs at this scope (project-wide rule → root CLAUDE.md; WHY decision
+→ inline comment or ADR; inferable from code → nowhere).
+**Remove** any entry that fails the falsifiability test: if removing it would not change
+how an agent acts here, remove it. If a convention here conflicts with the codebase,
+the codebase wins — update this file, do not work around it. Prune aggressively.
+**Staleness anchor:** This file assumes `cc-native-plan-review.ts` exists. If it doesn't, this file
+is stale — update or regenerate before relying on it.
+**Trigger Audit or Generate:**
+- Rename/move files or dirs → Audit
+- >20% of files changed → Generate
+- 30+ days without touching this file → Audit
+- Agent mistake caused by this file → fix immediately, then Audit
+<!-- context-layer: generated=2026-02-10 | last-audited=2026-02-21 | version=2 | dir-commits-at-audit=58 -->

package/dist/templates/cc-native/_cc-native/hooks/plan_questions_early.ts CHANGED Viewed

@@ -13,6 +13,30 @@ import { getProjectRoot } from "../../_shared/lib-ts/base/constants.js";
 import { loadHookInput, runHook, logDebug, logInfo, emitContext } from "../../_shared/lib-ts/base/hook-utils.js";
 import { wasEarlyQuestionsAsked } from "../lib-ts/cc-native-state.js";
+// Unconditional injection by design — no code-detection gate.
+// "When this plan involves code" is self-selecting; non-code plans ignore it.
+// Soft framing per Anthropic Claude 4.x best practices (avoid MUST/MANDATORY overtriggering).
+// Motivation per standard enables generalization better than threats.
+// Generalizability disclaimer: not all codebases need all standards.
+const CODING_STANDARDS_NUDGE = `## Coding Standards for Code Changes
+When this plan creates or modifies production code, apply these standards — they address the
+most common plan review failure modes:
+1. **Test-First Design** — Design interfaces from the test perspective first. Plans that
+   describe "implement then test" consistently fail review. Structure tests before implementation.
+2. **File Structure Fit** — Verify where similar things already live in this project before
+   proposing new files. Agents commonly pick plausible-but-wrong locations that don't match
+   existing conventions.
+3. **Extensibility Analysis** — Identify what features most commonly follow this one. Designs
+   that resist extension require expensive rewrites later.
+These standards apply to production code in established codebases. For prototypes, scripts,
+or exploratory work, use judgment on which apply.
+**Full checklist:** \`.aiwcli/_cc-native/plan-review/CODING-STANDARDS-CHECKLIST.md\`
+Read this file for detailed guidance on each standard.`;
 const PHASE_A_PROMPT = `## Plan Mode: Narrow the Approach After Exploration
 After exploring the codebase, use AskUserQuestion — one call, 3-4 questions — before drafting the plan.
@@ -56,6 +80,7 @@ function main(): void {
   logInfo("plan_questions_early", "Plan mode detected, injecting Phase A prompt");
   emitContext(PHASE_A_PROMPT);
+  emitContext(CODING_STANDARDS_NUDGE);
 }
 runHook(main, "plan_questions_early");

package/dist/templates/cc-native/_cc-native/lib-ts/plan-enhancement.ts CHANGED Viewed

@@ -37,5 +37,10 @@ Evaluate whether the plan captures decisions that would be lost when this sessio
 - What constraints exist that aren't obvious from the code
 - What would break if assumptions change
-If the plan has gaps, address them before presenting to the user.`;
+If the plan has gaps, address them before presenting to the user.
+### Coding Standards Check
+If this plan modifies code, verify it against the coding standards you read earlier:
+test-first design, file structure conventions, extensibility. Which standards did you apply,
+and which did you consciously skip (with reasoning)?`;
 }

package/dist/templates/cc-native/_cc-native/plan-review/CLAUDE.md CHANGED Viewed

@@ -13,6 +13,7 @@ When a Claude Code agent exits plan mode (`ExitPlanMode`), the plan review hook
 ```
 plan-review/
 ├── CLAUDE.md            ← This file
+├── CODING-STANDARDS-CHECKLIST.md ← Standards injected during plan mode via plan_questions_early.ts
 ├── agents/
 │   ├── CLAUDE.md        ← Agent file format, frontmatter fields, selection rules
 │   ├── PLAN-ORCHESTRATOR.md   ← Orchestrator agent (complexity analysis)

package/dist/templates/cc-native/_cc-native/plan-review/CODING-STANDARDS-CHECKLIST.md ADDED Viewed

@@ -0,0 +1,75 @@
+# Coding Standards Checklist
+Standards that address the most common plan review failure modes. Reference this
+when planning code changes in established codebases.
+---
+## 1. Test-First Design Thinking
+Tests are an architectural constraint, not an afterthought. Design from the test
+perspective first.
+- **Interface-first:** Before describing implementation, ask: "Can I write the test
+  for this before the implementation exists?" If the answer is unclear, the interface
+  needs more thought.
+- **Structure tests before code:** Plans that describe "implement then test" consistently
+  fail review. Restructure: define what the tests assert, then describe the implementation
+  that satisfies them.
+- **Testability as architecture:** Design for dependency injection, interface seams, and
+  fakes. If a component can't be tested in isolation, the coupling is too tight.
+- **Test categories:** Consider which test types apply — unit (isolated logic), integration
+  (module boundaries), contract (API surfaces), and characterization (existing behavior
+  preservation during refactoring).
+- **Verification clarity:** Each planned change should have a corresponding verification
+  step that is binary-testable (pass/fail in one check, no subjective judgment).
+---
+## 2. File Structure & Codebase Convention Fit
+Don't pick a "plausible" location — pick the location that matches the project's
+established patterns.
+- **Discover before proposing:** Before suggesting new files or directories, verify where
+  similar things already live in this project. Use Glob/Grep to find existing patterns.
+- **Naming conventions:** Match existing module and file naming patterns. If the project
+  uses `kebab-case.ts`, don't introduce `camelCase.ts`. If hooks live in `hooks/`, don't
+  create a `hook-handlers/` directory.
+- **Co-location patterns:** Check if the project follows co-location (tests next to source,
+  types with implementation) or separation (dedicated `__tests__/`, `types/` directories).
+  Follow what exists.
+- **Import depth:** Verify that new files fit the existing import hierarchy. Adding a file
+  that requires imports to cross architectural boundaries (e.g., shared lib importing from
+  feature code) signals a structural problem.
+- **Existing system boundaries:** Check if the project has documented system boundaries
+  (CLAUDE.md, architecture docs). New files should respect these boundaries rather than
+  create cross-cutting dependencies.
+---
+## 3. Extensibility & Future-Proofing Analysis
+Balance: don't over-engineer (YAGNI), but don't create designs that actively resist
+extension.
+- **Adjacent features:** What features are most commonly built after this one? Does the
+  design accommodate those extensions without major restructuring?
+- **Extension points:** Where would future developers need to hook in? Are those seams
+  accessible, or does the design require forking/copying to extend?
+- **Configuration vs. code changes:** Will common customizations require code changes, or
+  can they be handled through configuration? Prefer the latter when the variation space
+  is predictable.
+- **Data model flexibility:** Are data structures designed to accommodate likely additions
+  (new fields, new types) without breaking existing consumers?
+- **Inversion of control:** Does the design allow callers to inject behavior, or does it
+  hardcode decisions that callers will need to override? Prefer interfaces and callbacks
+  over concrete implementations when variation is expected.
+---
+## Applicability
+These standards apply to production code in established codebases with existing conventions.
+For prototypes, scripts, spike explorations, or greenfield projects without established
+patterns, use judgment on which standards apply — not all will be relevant.

package/dist/templates/cc-native/_cc-native/plan-review/lib/corroboration.ts CHANGED Viewed

@@ -1,23 +1,35 @@
 /**
  * Corroboration-based verdict computation for plan review.
  *
- * Replaces the old per-verdict aggregation with proportional thresholding:
- * high-severity issues in a dimension only block when the total count
- * exceeds 2× the number of distinct agents contributing to that dimension.
+ * Uses agent-agreement thresholding: a dimension blocks only when a sufficient
+ * number of *distinct agents* independently flag it. This measures true
+ * corroboration (multiple independent reviewers converge) rather than issue
+ * density (one verbose agent floods a dimension).
  *
- * **Why proportional thresholding:**
- * The agent pool has dimensional imbalance (e.g., 10 completeness agents vs
- * 1 maintainability agent). A fixed "2+ agents agree = block" would mean
- * any 2 completeness agents always block. Proportional scaling (issues > 2×agents)
- * sets a fair bar regardless of how many agents focus on each dimension.
+ * **Algorithm:**
+ * For each dimension, compute: `effective_threshold = max(minAgreement, ceil(minRatio × totalAgents))`
+ * Block when `distinct_agents_in_dimension >= effective_threshold`.
+ *
+ * **Default config:** `minAgreement=2, minRatio=0.40`
+ * - At 6 agents: threshold=3 (50% must agree)
+ * - At 10 agents: threshold=4 (40% must agree)
+ * - At 20 agents: threshold=8 (40% must agree)
+ *
+ * **Why agent-agreement over issue-density:**
+ * The previous system (issues >= 2×agents_in_dimension) allowed a single agent
+ * to self-corroborate by raising 2+ issues, and made blocking harder as more
+ * agents covered a dimension (inverted scaling). Agent-agreement fixes both:
+ * a single agent can never self-corroborate, and more agents agreeing is a
+ * stronger signal, not a weaker one.
  *
  * **Convergence problem this solves:**
  * Agents with opposing philosophies (simplicity-guardian vs completeness-gaps)
  * produce contradictory high-severity issues. Because the old system treated
  * every agent's finding as independently authoritative, plans oscillated —
- * addressing one agent's feedback triggered the opposing agent.
+ * addressing one agent's feedback triggered the opposing agent. The minAgreement
+ * floor prevents any single agent's philosophy from blocking alone.
  *
- * **Revert path:** Change one line in cc-native-plan-review.ts back to
+ * **Revert path:** Change one line in review-pipeline.ts back to
  * `computeReviewDecision(allVerdicts)`. Old function kept in verdict.ts.
  */
@@ -30,22 +42,55 @@ import type {
   SoloFinding,
 } from "../../lib-ts/types.js";
+/** Configuration for corroboration thresholds */
+export interface CorroborationConfig {
+  /** Minimum distinct agents that must agree to trigger blocking (default: 2) */
+  minAgreement?: number;
+  /** Minimum fraction of total agent pool that must agree (default: 0.40) */
+  minRatio?: number;
+}
+const DEFAULT_MIN_AGREEMENT = 2;
+const DEFAULT_MIN_RATIO = 0.40;
+/**
+ * Compute the effective blocking threshold for a given agent pool size.
+ *
+ * Returns `max(minAgreement, ceil(minRatio × totalAgents))`.
+ * This ensures a fixed floor (no single-agent self-corroboration) while
+ * scaling proportionally at larger pool sizes.
+ */
+export function getEffectiveThreshold(
+  totalAgents: number,
+  config: CorroborationConfig = {},
+): number {
+  const minAgreement = config.minAgreement ?? DEFAULT_MIN_AGREEMENT;
+  const minRatio = config.minRatio ?? DEFAULT_MIN_RATIO;
+  return Math.max(minAgreement, Math.ceil(totalAgents * minRatio));
+}
 /**
  * Compute a corroboration-based review decision from all reviewer results.
  *
  * Algorithm:
  * 1. Collect all high-severity issues with a `dimension` field
  * 2. Group by dimension, tracking distinct agent names per group
- * 3. For each dimension: block if `issues.length > 2 × agentCount`
- * 4. Issues without `dimension` are unclassified (never block)
- * 5. Non-high issues are ignored (informational only)
+ * 3. Compute effective threshold: `max(minAgreement, ceil(minRatio × totalAgents))`
+ * 4. For each dimension: block if `distinct_agents >= effective_threshold`
+ * 5. Issues without `dimension` are unclassified (logged as warning, never block)
+ * 6. Non-high issues are ignored (informational only)
  *
  * @param allResults - Map of reviewer name → ReviewerResult (CLI + agent)
+ * @param config - Optional threshold configuration
  * @returns CorroborationResult with blocking groups, solo findings, and verdict
  */
 export function computeCorroboratedDecision(
   allResults: Record<string, ReviewerResult>,
+  config: CorroborationConfig = {},
 ): CorroborationResult {
+  const totalAgents = Object.keys(allResults).length;
+  const threshold = getEffectiveThreshold(totalAgents, config);
   // Accumulator: dimension → { issues, agentNames }
   const dimMap = new Map<
     IssueDimension,
@@ -66,7 +111,7 @@ export function computeCorroboratedDecision(
       // Only high-severity issues participate in corroboration
       if (issue.severity !== "high") continue;
-      // Issues without dimension are unclassified — cannot block
+      // Issues without dimension are unclassified — logged but cannot block
       if (!issue.dimension) {
         unclassified.push({ agent: agentName, issue });
         continue;
@@ -82,14 +127,22 @@ export function computeCorroboratedDecision(
     }
   }
+  // Warn about unclassified issues so they don't silently disappear
+  if (unclassified.length > 0) {
+    const agents = [...new Set(unclassified.map(u => u.agent))];
+    process.stderr.write(
+      `[corroboration] WARNING: ${unclassified.length} high-severity issue(s) from [${agents.join(", ")}] lack dimension classification and cannot participate in corroboration\n`,
+    );
+  }
   const blocking: CorroboratedGroup[] = [];
   const solo: SoloFinding[] = [];
   for (const [dimension, group] of dimMap) {
     const agentCount = group.agentNames.size;
-    const threshold = 2 * agentCount;
-    if (group.issues.length >= threshold) {
+    // Block when enough distinct agents independently flag this dimension
+    if (agentCount >= threshold) {
       blocking.push({
         dimension,
         issues: group.issues,

package/oclif.manifest.json CHANGED Viewed

@@ -416,5 +416,5 @@
       ]
     }
   },
-  "version": "0.13.0"
+  "version": "0.13.1"
 }

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "aiwcli",
   "description": "AI Workflow CLI - Command-line interface for AI-powered workflows",
-  "version": "0.13.0",
+  "version": "0.13.1",
   "author": "jofu-tofu",
   "bin": {
     "aiw": "bin/run.js"