npm - cclaw-cli - Versions diffs - 0.34.1 → 0.36.0 - Mend

cclaw-cli 0.34.1 → 0.36.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/config.js +41 -2
package/dist/content/meta-skill.js +21 -0
package/dist/content/protocols.js +53 -2
package/dist/content/stages/plan.js +2 -1
package/dist/content/stages/spec.js +2 -0
package/dist/content/stages/tdd.js +15 -3
package/dist/doctor.js +31 -0
package/dist/install.d.ts +2 -1
package/dist/install.js +2 -1
package/dist/types.d.ts +37 -0
package/package.json +1 -1

package/dist/config.js CHANGED Viewed

@@ -21,8 +21,11 @@ const ALLOWED_CONFIG_KEYS = new Set([
     "gitHookGuards",
     "defaultTrack",
     "languageRulePacks",
-    "trackHeuristics"
+    "trackHeuristics",
+    "sliceReview"
 ]);
+const DEFAULT_SLICE_REVIEW_THRESHOLD = 5;
+const DEFAULT_SLICE_REVIEW_TRACKS = ["standard"];
 function configFixExample() {
     return `harnesses:
   - claude
@@ -210,6 +213,41 @@ export async function readConfig(projectRoot) {
             tracks
         };
     }
+    const sliceReviewRaw = parsed.sliceReview;
+    let sliceReview = undefined;
+    if (Object.prototype.hasOwnProperty.call(parsed, "sliceReview")) {
+        if (!isRecord(sliceReviewRaw)) {
+            throw configValidationError(fullPath, `"sliceReview" must be an object`);
+        }
+        const enabledRaw = sliceReviewRaw.enabled;
+        if (enabledRaw !== undefined && typeof enabledRaw !== "boolean") {
+            throw configValidationError(fullPath, `"sliceReview.enabled" must be a boolean`);
+        }
+        const thresholdRaw = sliceReviewRaw.filesChangedThreshold;
+        if (thresholdRaw !== undefined &&
+            (typeof thresholdRaw !== "number" || !Number.isInteger(thresholdRaw) || thresholdRaw < 1)) {
+            throw configValidationError(fullPath, `"sliceReview.filesChangedThreshold" must be a positive integer`);
+        }
+        const touchTriggers = validateStringArray(sliceReviewRaw.touchTriggers, "sliceReview.touchTriggers", fullPath);
+        const enforceRaw = sliceReviewRaw.enforceOnTracks;
+        let enforceOnTracks;
+        if (enforceRaw !== undefined) {
+            if (!Array.isArray(enforceRaw)) {
+                throw configValidationError(fullPath, `"sliceReview.enforceOnTracks" must be an array`);
+            }
+            const invalidTracks = enforceRaw.filter((value) => typeof value !== "string" || !FLOW_TRACK_SET.has(value));
+            if (invalidTracks.length > 0) {
+                throw configValidationError(fullPath, `"sliceReview.enforceOnTracks" must contain only: ${SUPPORTED_TRACKS_TEXT}`);
+            }
+            enforceOnTracks = [...new Set(enforceRaw)];
+        }
+        sliceReview = {
+            enabled: typeof enabledRaw === "boolean" ? enabledRaw : false,
+            filesChangedThreshold: typeof thresholdRaw === "number" ? thresholdRaw : DEFAULT_SLICE_REVIEW_THRESHOLD,
+            touchTriggers: touchTriggers ?? [],
+            enforceOnTracks: enforceOnTracks ?? DEFAULT_SLICE_REVIEW_TRACKS
+        };
+    }
     return {
         version: parsed.version ?? CCLAW_VERSION,
         flowVersion: parsed.flowVersion ?? FLOW_VERSION,
@@ -220,7 +258,8 @@ export async function readConfig(projectRoot) {
         gitHookGuards,
         defaultTrack,
         languageRulePacks,
-        trackHeuristics
+        trackHeuristics,
+        sliceReview
     };
 }
 export async function writeConfig(projectRoot, config) {

package/dist/content/meta-skill.js CHANGED Viewed

@@ -18,6 +18,27 @@ description: "Routing brain for cclaw. Decide whether to start/resume a stage, a
 If the user explicitly overrides a stage rule, record it in the artifact.
+## Skill-before-response gate
+If \`.cclaw/state/flow-state.json\` exists and \`currentStage\` is set,
+load the matching stage SKILL before producing **substantive** work
+(artifact edits, code, structured clarifying questions). Do not improvise
+from memory. Also load a contextual utility skill when the task clearly
+triggers it (security, performance, debugging, docs, finishing-a-branch,
+verification-before-completion).
+Substantive vs. non-substantive:
+- **Substantive** (must load skill first): proposing design, editing an
+  artifact, running gates, dispatching subagents, asking a
+  \`Decision Protocol\` question, declaring a stage done.
+- **Non-substantive** (skill load optional): one-line acknowledgement,
+  clarifying a typo, confirming a prior answer, pure conversation.
+If the current stage is ambiguous because \`flow-state.json\` is missing
+or corrupt, stop and route through \`/cc\` before any substantive
+response.
 ## Routing flow
 \`\`\`

package/dist/content/protocols.js CHANGED Viewed

@@ -19,11 +19,62 @@ Shared format for decisions that require user confirmation.
    - OpenCode/Codex: plain text options
 5. Wait for user choice before proceeding.
+## Decision skeleton
+Every Decision Protocol call — regardless of harness — follows this
+four-part skeleton. Do not skip a part; if a part is trivially empty,
+say so explicitly (e.g. "Re-ground: same branch, same task as prior
+turn").
+1. **Re-ground (1-2 sentences).** State the project, the active
+   feature slug, the active stage (from \`flow-state.json\`), and the
+   decision's plain-English context. Pull these values from the source
+   of truth, not from conversation memory.
+2. **Simplify (2-4 sentences).** Explain the choice in plain English a
+   smart 16-year-old could follow. No internal jargon, no raw function
+   names, no implementation trivia. Say what each option DOES and
+   what changes for the user.
+3. **Recommend.** One line of the form
+   \`RECOMMENDATION: Choose [Letter] because [one-line reason]\`.
+   Always prefer the more complete option unless an explicit constraint
+   says otherwise (see Completeness calibration below). Never present
+   options as equivalent when they are not.
+4. **Options.** Lettered options \`A) ... B) ... C) ...\`. Each option
+   includes one-line \`Completeness: X/10\` plus, when effort differs
+   noticeably, a \`(human: ~Xh / agent: ~Ym)\` estimate.
+## Completeness calibration
+Use the same 1-10 scale for every option so comparisons stay honest:
+- **10** = complete implementation: all stated edges handled,
+  traceable to spec, no known deferred work.
+- **7** = covers the happy path; one or two non-critical edges
+  deferred with an explicit follow-up.
+- **5** = partial; either drops edge cases silently or hands off
+  required work to a future run.
+- **3** = shortcut; skips spec criteria, violates an Iron Law, or
+  defers significant work without tracking.
+- **1** = acknowledged placeholder (\`TBD\`, \`TODO\`, "static for now").
+Calibration rules:
+- Mark any option at \`Completeness: ≤5\` and require the user to
+  acknowledge the gap before picking it.
+- If two options are both \`≥8\`, recommend the higher one.
+- "Static for now" / "we will add later" phrasing always scores \`≤3\`
+  and must be surfaced in Simplify, not buried in an option label.
 ## Ask format
 - One question per call.
-- Option labels are short and unambiguous.
-- If tool schema fails once, fall back to plain text immediately.
+- Option labels are short and unambiguous; the full reasoning lives in
+  Simplify + per-option Completeness.
+- If tool schema fails once, fall back to plain text immediately but
+  keep the skeleton (Re-ground / Simplify / RECOMMENDATION / lettered
+  Options with Completeness scores).
+- Log the chosen letter into the stage artifact's decision log with
+  the Completeness score; do not rely on chat history.
 ## Retry and escalation

package/dist/content/stages/plan.js CHANGED Viewed

@@ -25,6 +25,7 @@ export const PLAN = {
         "Group tasks into dependency batches — batch N+1 cannot start until batch N has verification evidence.",
         "Slice into vertical tasks — each task targets 2-5 minutes, produces one testable outcome, and touches one coherent area.",
         "Attach verification — every task has an acceptance criterion mapping and a concrete verification command.",
+        "Annotate slice-review metadata — if `.cclaw/config.yaml::sliceReview.enabled` is true, every task row additionally carries `touchCount` (rough number of files expected to change) and `touchPaths` (glob hints, e.g. `migrations/**`, `src/auth/**`). A task may set `highRisk: true` to force a review pass regardless of thresholds. These fields feed the TDD stage's Per-Slice Review checkpoint; when `sliceReview` is disabled they are optional.",
         "Map scope Locked Decisions — every D-XX from scope is referenced by at least one plan task (or explicitly marked deferred with reason).",
         "Run anti-placeholder + anti-scope-reduction scans — block `TODO/TBD/...` and phrasing like `v1`, `for now`, `later` for locked boundaries.",
         "Define checkpoints — mark points where progress should be validated before continuing.",
@@ -143,7 +144,7 @@ export const PLAN = {
     artifactValidation: [
         { section: "Dependency Graph", required: true, validationRule: "Ordering and parallel opportunities explicit. No circular dependencies." },
         { section: "Dependency Batches", required: true, validationRule: "Every task belongs to a batch. Each batch has an exit gate and dependency statement." },
-        { section: "Task List", required: true, validationRule: "Each task row includes ID, description, acceptance criterion, verification command, and effort estimate (S/M/L). Every task must also carry a minutes estimate within the 2-5 minute budget." },
+        { section: "Task List", required: true, validationRule: "Each task row includes ID, description, acceptance criterion, verification command, and effort estimate (S/M/L). Every task must also carry a minutes estimate within the 2-5 minute budget. When the sliceReview feature is enabled in the cclaw config, each task row additionally declares touchCount, touchPaths, and an optional highRisk flag so the TDD stage can decide whether a Per-Slice Review pass is required." },
         { section: "Acceptance Mapping", required: true, validationRule: "Every spec criterion is covered by at least one task." },
         { section: "Locked Decision Coverage", required: false, validationRule: "Every locked decision ID (D-XX) from scope is listed with linked task IDs or explicit defer rationale." },
         { section: "Risk Assessment", required: false, validationRule: "If present: per-task or per-batch risk identification with likelihood, impact, and mitigation strategy." },

package/dist/content/stages/spec.js CHANGED Viewed

@@ -31,6 +31,7 @@ export const SPEC = {
         "Express each requirement in observable terms.",
         "Resolve ambiguity before moving to plan. Challenge vague language.",
         "Capture assumptions explicitly, not implicitly.",
+        "**Chunk acceptance criteria for review.** When presenting the spec to the user for sign-off, deliver acceptance criteria in batches of 3-5 and **pause for explicit ACK** (via Decision Protocol) before sending the next batch. Do not dump the full criteria wall in one message — small batches surface objections earlier and keep the sign-off meaningful. Full spec writeup still lands in `04-spec.md`, but the conversation itself must be digestible.",
         "Require user confirmation on the written spec. **STOP.** Do NOT proceed to plan until user approves.",
         "For each criterion, ask: how would you test this? If the answer is unclear, rewrite.",
         "When encountering ambiguity, classify it before acting: (A) ask user for missing info, (B) enumerate interpretations and pick one with justification, (C) propose hypothesis with validation path. Do NOT silently resolve ambiguity."
@@ -40,6 +41,7 @@ export const SPEC = {
         "Capture constraints, assumptions, and edge cases.",
         "Build testability map: criterion -> test description.",
         "Confirm testability for each criterion.",
+        "Present acceptance criteria to the user in 3-5-item batches, pausing for explicit ACK between batches (see Interaction Protocol).",
         "Write spec artifact and request approval."
     ],
     requiredGates: [

package/dist/content/stages/tdd.js CHANGED Viewed

@@ -30,6 +30,7 @@ export const TDD = {
         "REFACTOR: Improve code quality — without changing behavior. Document what you changed and why.",
         "Record evidence — capture RED failure, GREEN output, and REFACTOR notes in the TDD artifact.",
         "Annotate traceability — link to plan task ID and spec criterion.",
+        "Per-Slice Review (conditional) — if `.cclaw/config.yaml::sliceReview.enabled` is true and the slice meets any trigger (touchCount >= filesChangedThreshold, touchPaths match touchTriggers, or highRisk=true), append a `## Per-Slice Review` entry for this slice before moving on (see the dedicated section below).",
         "Repeat for each slice — return to step 1 for the next plan slice."
     ],
     interactionProtocol: [
@@ -41,7 +42,8 @@ export const TDD = {
         "Run full suite, not partial checks, for GREEN validation.",
         "Refactor without changing behavior and document rationale (REFACTOR).",
         "Stop if regressions appear and fix before proceeding.",
-        "If a test passes unexpectedly, investigate: does the behavior already exist, or is the test wrong?"
+        "If a test passes unexpectedly, investigate: does the behavior already exist, or is the test wrong?",
+        "**Per-Slice Review checkpoint (conditional, opt-in).** When `.cclaw/config.yaml::sliceReview.enabled` is true, check every slice against the triggers before declaring it DONE. Triggers: `touchCount >= filesChangedThreshold`, any `touchPaths` match a `touchTriggers` glob, or the plan row declares `highRisk: true`. On a trigger, run two passes on the slice alone — (1) Spec-Compliance: trace RED/GREEN/REFACTOR evidence back to its plan task + spec criterion, noting edge cases the tests skip; (2) Quality: diff-scan for naming, error handling, dead code, simpler alternatives. Record both under `## Per-Slice Review` in `06-tdd.md`, naming the trigger that fired. Dispatch the `reviewer` subagent natively when available (log `fulfillmentMode: \"isolated\"`); otherwise fulfil via in-session role switch (`fulfillmentMode: \"role-switch\"`). Never fabricate an isolated pass from memory. Tracks outside `sliceReview.enforceOnTracks` still emit the section; doctor only escalates missed reviews on enforced tracks."
     ],
     process: [
         "Select slice and map to acceptance criterion.",
@@ -51,7 +53,7 @@ export const TDD = {
         "Run full tests and build checks.",
         "Perform refactor pass preserving behavior.",
         "Record RED, GREEN, and REFACTOR evidence in artifact.",
-        "Annotate traceability to plan task and spec criterion."
+        "Annotate traceability to plan task and spec criterion; on `sliceReview` triggers, append a Per-Slice Review entry before closing the slice."
     ],
     requiredGates: [
         { id: "tdd_red_test_written", description: "Failing tests exist before implementation changes." },
@@ -149,6 +151,15 @@ export const TDD = {
             ],
             stopGate: false
         },
+        {
+            title: "Per-Slice Review Audit (conditional)",
+            evaluationPoints: [
+                "When `.cclaw/config.yaml::sliceReview.enabled` is true: does every triggered slice (touchCount >= threshold, touchPaths match, or highRisk=true) carry a Per-Slice Review entry with BOTH a Spec-Compliance pass (plan task <-> spec criterion + edge-case notes) AND a Quality pass (diff-level naming/errors/dead code/simpler alternatives)?",
+                "Is the delegation `fulfillmentMode` recorded (`isolated` for a dispatched reviewer subagent, `role-switch` for an in-session pass) and does it match an entry in `.cclaw/state/delegation-log.json`?",
+                "On tracks listed in `sliceReview.enforceOnTracks`, are there zero missed triggered slices (doctor also surfaces this as a warning)?"
+            ],
+            stopGate: false
+        },
         {
             title: "State-over-Interaction + Beyoncé Coverage",
             evaluationPoints: [
@@ -177,7 +188,8 @@ export const TDD = {
         { section: "Verification Ladder", required: false, validationRule: "If present: per-slice verification tier (static, command, behavioral, human) with evidence for highest tier reached." },
         { section: "Coverage Targets", required: false, validationRule: "If present: per-module or per-code-type coverage thresholds with current values and measurement commands." },
         { section: "Test Pyramid Shape", required: false, validationRule: "If present: per-slice count of Small/Medium/Large tests added, to let reviewers verify the suite is not drifting top-heavy." },
-        { section: "Prove-It Reproduction", required: false, validationRule: "Required for bug-fix slices: original failing reproduction test (RED without fix), passing output with fix (GREEN), and a note confirming the test fails again if the fix is reverted." }
+        { section: "Prove-It Reproduction", required: false, validationRule: "Required for bug-fix slices: original failing reproduction test (RED without fix), passing output with fix (GREEN), and a note confirming the test fails again if the fix is reverted." },
+        { section: "Per-Slice Review", required: false, validationRule: "When `.cclaw/config.yaml::sliceReview.enabled` is true: per triggered slice, a two-part record — Spec-Compliance (slice <-> plan task <-> spec criterion trace plus edge-case notes) and Quality (diff-focused review of naming, error handling, dead code, simpler alternatives). Each entry names the trigger (touchCount, touchPaths glob, or highRisk) and the delegation fulfillmentMode (`isolated` when a reviewer subagent was dispatched natively; `role-switch` when fulfilled in-session). Slices that did not meet any trigger may list `not triggered` instead of a full pass." }
     ],
     batchExecutionAllowed: true
 };

package/dist/doctor.js CHANGED Viewed

@@ -1382,6 +1382,37 @@ export async function doctorChecks(projectRoot, options = {}) {
             ? "all test slices map to acceptance-linked tasks"
             : `orphaned test slices: ${trace.orphanedTests.join(", ")}`
     });
+    // Slice-review warning (opt-in via config.sliceReview.enabled).
+    // Fires when:
+    //   - sliceReview.enabled is true
+    //   - current track is listed in sliceReview.enforceOnTracks
+    //   - 06-tdd.md exists (so the slice loop actually started)
+    //   - artifact contains at least one slice marker (look for the tdd
+    //     "Acceptance Mapping" or any `### Slice` heading) AND the Per-Slice
+    //     Review heading is absent
+    // Non-blocking — warnings guide the user toward adding the review
+    // section without failing doctor.
+    const sliceReviewConfig = parsedConfig?.sliceReview;
+    const sliceReviewEnabled = sliceReviewConfig?.enabled === true;
+    const sliceReviewEnforcedTracks = sliceReviewConfig?.enforceOnTracks ?? ["standard"];
+    const sliceReviewEnforcedHere = sliceReviewEnabled && sliceReviewEnforcedTracks.includes(activeTrack);
+    if (sliceReviewEnforcedHere && tddExists) {
+        const tddMarkdown = await fs.readFile(path.join(artifactsDir, "06-tdd.md"), "utf8");
+        const hasSliceSignal = /^###\s+Slice\b/im.test(tddMarkdown)
+            || /^##\s+Acceptance Mapping\b/im.test(tddMarkdown)
+            || /^##\s+RED\b/im.test(tddMarkdown);
+        const hasReviewHeading = /^##\s+Per-Slice Review\b/im.test(tddMarkdown);
+        const missing = hasSliceSignal && !hasReviewHeading;
+        checks.push({
+            name: "warning:slice_review:missing_section",
+            ok: !missing,
+            details: missing
+                ? `warning: sliceReview is enabled for track "${activeTrack}" and 06-tdd.md contains slice evidence but no "## Per-Slice Review" section. Add a Per-Slice Review entry for every triggered slice (touchCount >= ${sliceReviewConfig?.filesChangedThreshold ?? 5}, touchPaths match, or highRisk=true), or record "not triggered" explicitly.`
+                : hasReviewHeading
+                    ? `sliceReview section present in 06-tdd.md (track "${activeTrack}")`
+                    : `sliceReview enabled but no slice evidence yet in 06-tdd.md (track "${activeTrack}")`
+        });
+    }
     const gateEvidence = await verifyCurrentStageGateEvidence(projectRoot, flowState);
     checks.push({
         name: "gates:evidence:current_stage",

package/dist/install.d.ts CHANGED Viewed

@@ -11,7 +11,8 @@ export declare function syncCclaw(projectRoot: string): Promise<void>;
  * artifacts, state, or custom config keys. Only the `version` + `flowVersion`
  * stamps are rewritten so the on-disk config reflects the installed CLI;
  * `promptGuardMode`, `tddEnforcement`, `gitHookGuards`, `languageRulePacks`,
- * and `trackHeuristics` are preserved verbatim from the existing config.
+ * `trackHeuristics`, and `sliceReview` are preserved verbatim from the
+ * existing config.
  *
  * For an explicit reset, run `cclaw-cli uninstall && cclaw-cli init`
  * (after optionally archiving the current run via `/cc-ops archive`).

package/dist/install.js CHANGED Viewed

@@ -1139,7 +1139,8 @@ export async function syncCclaw(projectRoot) {
  * artifacts, state, or custom config keys. Only the `version` + `flowVersion`
  * stamps are rewritten so the on-disk config reflects the installed CLI;
  * `promptGuardMode`, `tddEnforcement`, `gitHookGuards`, `languageRulePacks`,
- * and `trackHeuristics` are preserved verbatim from the existing config.
+ * `trackHeuristics`, and `sliceReview` are preserved verbatim from the
+ * existing config.
  *
  * For an explicit reset, run `cclaw-cli uninstall && cclaw-cli init`
  * (after optionally archiving the current run via `/cc-ops archive`).

package/dist/types.d.ts CHANGED Viewed

@@ -41,6 +41,35 @@ export interface TrackHeuristicsConfig {
     /** Per-track matching rules. */
     tracks?: Partial<Record<FlowTrack, TrackHeuristicRule>>;
 }
+/**
+ * Opt-in plan-slice review heuristic.
+ *
+ * When enabled, the TDD stage skill is instructed to insert a
+ * `## Per-Slice Review` section into `06-tdd.md` for every plan slice
+ * whose estimated `touchCount` meets `filesChangedThreshold`, whose
+ * `touchPaths` match any `touchTriggers` glob, or whose plan row is
+ * flagged `highRisk: true`. The section records a short spec-compliance
+ * pass plus a short quality pass (delegated to the `reviewer` subagent
+ * when the harness supports native dispatch, otherwise fulfilled via
+ * an explicit in-session role switch with evidence).
+ *
+ * Track gating: `enforceOnTracks` lists the tracks where the doctor
+ * check escalates to a warning. Tracks outside this list still see
+ * the skill prose but leave the decision to the user.
+ *
+ * All fields optional; sensible defaults: disabled, threshold 5, no
+ * touch triggers, `enforceOnTracks: ["standard"]`.
+ */
+export interface SliceReviewConfig {
+    /** Turn the heuristic on (disabled by default). */
+    enabled?: boolean;
+    /** Minimum estimated touchCount for a slice to be eligible. */
+    filesChangedThreshold?: number;
+    /** Glob hints; any plan-task touchPath match triggers review. */
+    touchTriggers?: string[];
+    /** Tracks on which missed reviews escalate to a doctor warning. */
+    enforceOnTracks?: FlowTrack[];
+}
 export interface VibyConfig {
     version: string;
     flowVersion: string;
@@ -67,6 +96,14 @@ export interface VibyConfig {
      * If omitted, cclaw uses built-in defaults.
      */
     trackHeuristics?: TrackHeuristicsConfig;
+    /**
+     * Opt-in per-slice review heuristic. When enabled, the TDD skill
+     * requires a `## Per-Slice Review` section in `06-tdd.md` for slices
+     * that exceed `filesChangedThreshold` or match `touchTriggers`.
+     * Keeps obra's "fresh subagent + spec-then-quality review per task"
+     * discipline tractable without forcing it on tiny quick-track fixes.
+     */
+    sliceReview?: SliceReviewConfig;
 }
 export interface TransitionRule {
     from: FlowStage;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "cclaw-cli",
-  "version": "0.34.1",
+  "version": "0.36.0",
   "description": "Installer-first flow toolkit for coding agents",
   "type": "module",
   "bin": {