npm - cclaw-cli - Versions diffs - 7.0.2 → 7.0.4 - Mend

cclaw-cli 7.0.2 → 7.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/artifact-linter/plan.js +79 -0
package/dist/content/meta-skill.js +11 -1
package/dist/content/skills.d.ts +7 -0
package/dist/content/skills.js +48 -1
package/dist/content/stage-schema.js +1 -0
package/dist/content/stages/plan.js +2 -0
package/dist/content/stages/review.js +1 -0
package/package.json +1 -1

package/dist/artifact-linter/plan.js CHANGED Viewed

@@ -5,6 +5,37 @@ import { FORBIDDEN_PLACEHOLDER_TOKENS, CONFIDENCE_FINDING_REGEX_SOURCE } from ".
 import fs from "node:fs/promises";
 import path from "node:path";
 import { PLAN_SPLIT_SMALL_PLAN_THRESHOLD, parseImplementationUnits, parseImplementationUnitParallelFields } from "../internal/plan-split-waves.js";
+const PARALLEL_EXEC_MANAGED_START = "<!-- parallel-exec-managed-start -->";
+const PARALLEL_EXEC_MANAGED_END = "<!-- parallel-exec-managed-end -->";
+const TASK_ID_PATTERN = /\bT-\d{3}[a-z]?(?:\.\d{1,3})?\b/giu;
+/**
+ * Extract every distinct T-NNN[a-z]?(.NNN)? id from a markdown body.
+ *
+ * Used by the `plan_parallel_exec_full_coverage` linter to compute the
+ * authored task set (from `## Task List`) vs. the wave-claimed task set
+ * (from inside `<!-- parallel-exec-managed-start -->`).
+ */
+function extractTaskIds(body) {
+    const ids = new Set();
+    for (const match of body.matchAll(TASK_ID_PATTERN)) {
+        ids.add(match[0]);
+    }
+    return ids;
+}
+/**
+ * Return the body between the parallel-exec managed comment markers, or
+ * an empty string if the block is absent. The TDD wave parser uses the
+ * same delimiters; keeping the regex local avoids cross-package import
+ * cycles in the linter.
+ */
+function extractParallelExecManagedBody(planMarkdown) {
+    const startIdx = planMarkdown.indexOf(PARALLEL_EXEC_MANAGED_START);
+    const endIdx = planMarkdown.indexOf(PARALLEL_EXEC_MANAGED_END);
+    if (startIdx === -1 || endIdx === -1 || endIdx <= startIdx) {
+        return "";
+    }
+    return planMarkdown.slice(startIdx + PARALLEL_EXEC_MANAGED_START.length, endIdx);
+}
 export async function lintPlanStage(ctx) {
     const { projectRoot, track, raw, absFile, sections, findings, parsedFrontmatter, brainstormShortCircuitBody, brainstormShortCircuitActivated, staleDiagramAuditEnabled, isTrivialOverride } = ctx;
     evaluateInvestigationTrace(ctx, "Implementation Units");
@@ -277,4 +308,52 @@ export async function lintPlanStage(ctx) {
                 : "Parallel-ready units detected or plan is single-unit."
         });
     }
+    // plan_parallel_exec_full_coverage: every T-NNN task listed in the
+    // plan's Task List must be assigned to a slice inside the
+    // <!-- parallel-exec-managed-start --> block. Without this, TDD
+    // cannot fan out work the plan never authored as waves; the previous
+    // failure mode was `stage-complete tdd` succeeding when only the
+    // first batch of tasks had been wave-assigned.
+    //
+    // Spike rows (`S-N`) live in the same Task List but are excluded
+    // because they are wall-clock spikes that produce evidence files
+    // and are not part of the regular slice fan-out. A task is also
+    // excluded when it appears under a `## Deferred Tasks` (or
+    // `## Backlog`) heading inside the plan with an explicit reason.
+    if (strictPlanGuards) {
+        const taskListSection = sectionBodyByName(sections, "Task List") ?? "";
+        const authoredTaskIds = extractTaskIds(taskListSection);
+        // Collect deferred / backlog task ids so they don't trigger the
+        // "uncovered" finding. Both heading variants are accepted.
+        const deferredBody = (sectionBodyByName(sections, "Deferred Tasks") ?? "") +
+            "\n" +
+            (sectionBodyByName(sections, "Backlog") ?? "");
+        const deferredIds = extractTaskIds(deferredBody);
+        const parallelExecBody = extractParallelExecManagedBody(raw);
+        const claimedIds = extractTaskIds(parallelExecBody);
+        const uncovered = [];
+        for (const id of authoredTaskIds) {
+            if (claimedIds.has(id))
+                continue;
+            if (deferredIds.has(id))
+                continue;
+            uncovered.push(id);
+        }
+        uncovered.sort();
+        const blockPresent = parallelExecBody.length > 0;
+        const taskListPresent = authoredTaskIds.size > 0;
+        findings.push({
+            section: "plan_parallel_exec_full_coverage",
+            required: taskListPresent,
+            rule: "Every T-NNN task in `## Task List` must be assigned to at least one slice inside the `<!-- parallel-exec-managed-start -->` block (or moved to an explicit `## Deferred Tasks` / `## Backlog` section). TDD cannot fan out waves the plan never authored.",
+            found: taskListPresent && blockPresent && uncovered.length === 0,
+            details: !taskListPresent
+                ? "Task List section is empty or missing T-NNN ids; full-coverage check skipped."
+                : !blockPresent
+                    ? "`<!-- parallel-exec-managed-start -->` block is missing or empty. Author the Parallel Execution Plan with W-02..W-N covering every task before plan-final-approval."
+                    : uncovered.length === 0
+                        ? `Parallel Execution Plan covers all ${authoredTaskIds.size} authored task id(s); ${deferredIds.size} task id(s) are explicitly deferred.`
+                        : `Uncovered task id(s) — author waves for: ${uncovered.slice(0, 25).join(", ")}${uncovered.length > 25 ? `, … (${uncovered.length - 25} more)` : ""}. Either add slices for them inside <!-- parallel-exec-managed-start --> or move them under \`## Deferred Tasks\` with a reason.`
+        });
+    }
 }

package/dist/content/meta-skill.js CHANGED Viewed

@@ -57,6 +57,16 @@ If you think any of these, stop and follow the routing flow:
 - "I can answer from memory without loading the active stage skill." -> No. Load the skill first.
 - "Hook guard warned, but I can ignore it." -> No. Resolve the warning before continuing.
 - "I'll edit \`.cclaw/state\` directly to move faster." -> No. Use managed commands only.
+- "I'll just do the worker's job inline so we move faster." -> No. See the Controller dispatch discipline below.
+## Controller dispatch discipline (applies to every stage)
+cclaw stages have **mandatory delegations** (TDD: \`slice-builder\`; review: \`reviewer\` + \`security-reviewer\`; design: \`architect\`; scope: \`planner\`; etc.). The controller is the **orchestrator**, not the worker. When a stage declares a mandatory delegation:
+- **Dispatch via the harness Task tool.** Do NOT write the worker's output (slice code, review findings, architect notes) into the artifact yourself as a substitute for delegating. Editing \`06-tdd.md\` slice cards, \`07-review.md\` findings, or any other "result of mandatory worker" content inline in the controller chat is a protocol violation.
+- **Parallel by default when paths/lenses are independent.** TDD wave-fanout (disjoint \`claimedPaths\`) and review-army (independent reviewer lenses) MUST emit all parallel \`Task\` calls in a SINGLE controller message — not sequentially over multiple turns. The controller waits for all spans to return before reconciling.
+- **Record lifecycle on the same span** via \`delegation-record --status=scheduled|launched|acknowledged|completed\`; the worker emits its own \`--phase=…\` and evidence rows. A \`completed\` row without a matching ACK or dispatch surface is a forgery.
+- **Auto-advance when stage-complete returns ok.** When the helper reports a new \`currentStage\`, immediately load the next stage skill and continue. Announce \`Stage <prev> complete → entering <next>. Continuing.\` Do NOT pause for the user to retype \`/cc\` or say \"продолжай\" — that pause is the failure mode 7.0.2 explicitly removed. The only legitimate stop is a real blocker (missing user input, ambiguous decision, hook fail).
 ## Routing flow
@@ -140,7 +150,7 @@ Use session-injected knowledge digest first. Only stream full
 - Do not skip stages silently.
 - Do not claim gate completion without evidence.
-- Do not auto-advance after stage completion unless user asks.
+- DO auto-advance to the next stage after \`stage-complete\` returns ok (see Controller dispatch discipline). The user does not need to retype \`/cc\`.
 - Escalate after repeated failures (see decision protocol).
 `;
 }

package/dist/content/skills.d.ts CHANGED Viewed

@@ -26,6 +26,13 @@ export declare function behaviorAnchorBlock(stage: FlowStage): string;
  * Empty for non-TDD stages.
  */
 export declare function tddTopOfSkillBlock(stage: FlowStage): string;
+/**
+ * Review-only prelude: mandates parallel reviewer / security-reviewer dispatch
+ * via harness Task and forbids inline authoring of findings.
+ *
+ * Empty for non-review stages.
+ */
+export declare function reviewTopOfSkillBlock(stage: FlowStage): string;
 export declare function stageSkillFolder(stage: FlowStage): string;
 export declare function stageSkillMarkdown(stage: FlowStage, track?: FlowTrack, _packageVersion?: string | null): string;
 export declare function executingWavesSkillMarkdown(): string;

package/dist/content/skills.js CHANGED Viewed

@@ -217,6 +217,53 @@ The output names: \`waves[]\` (closed/open), \`nextDispatch.waveId\`, \`nextDisp
 Wave resume: reuse \`wave-status\` outputs and parallelize unfinished members instead of restarting finished slices.
+---
+`;
+}
+/**
+ * Review-only prelude: mandates parallel reviewer / security-reviewer dispatch
+ * via harness Task and forbids inline authoring of findings.
+ *
+ * Empty for non-review stages.
+ */
+export function reviewTopOfSkillBlock(stage) {
+    if (stage !== "review")
+        return "";
+    return `## Review orchestration primer
+**MANDATE — controller never authors findings inline.** In review the controller orchestrates; \`reviewer\` (functional/spec/correctness/architecture/perf/observability) and \`security-reviewer\` (security sweep + dependency/version audit) are the **mandatory delegated workers** that produce findings, lens coverage, and the verdict input. Typing \`## Layer 1 Findings\`, \`## Layer 2 Findings\`, \`## Lens Coverage\`, or \`## Final Verdict\` content directly into \`07-review.md\` in the controller chat is a protocol violation. The controller writes ONLY the reconciled multi-specialist verdict block AFTER all reviewer Tasks return.
+**Step 1 — Diff scope (always first):**
+\`git diff --stat <base>...HEAD\` and \`git diff --name-only <base>...HEAD\`.
+If the diff is empty, exit early with APPROVED (no changes to review).
+**Step 2 — Dispatch the review army in PARALLEL (single controller message):**
+| Lens                     | Worker                | Mandatory? |
+|--------------------------|-----------------------|------------|
+| Spec compliance / Layer 1 | \`reviewer\`         | yes        |
+| Layer 2 cross-slice / correctness / observability | \`reviewer\` | yes |
+| Security sweep + dep/version audit | \`security-reviewer\` | yes (or \`NO_SECURITY_IMPACT\` attestation) |
+| Adversarial second opinion | \`reviewer\` (adversarial framing) | only if trust boundaries moved OR diff is large+high-risk |
+Emit ONE \`Task\` per lens in a single controller message. For each lens:
+1. Append \`delegation-record --status=scheduled\` for the lens span (one row per lens; reuse the same \`spanId\` for the lens lifecycle).
+2. Append \`delegation-record --status=launched\` immediately after.
+3. Issue the harness Task call: \`Task(subagent_type=<harness reviewer/security-reviewer mapping>, description="<lens> review", prompt="<diff range, files, AC ids, upstream artifacts (spec, design, tdd Per-Slice Reviews), expected output schema for 07-review-army.json>")\`.
+4. The reviewer span ACKs locally and writes its findings/lens coverage to \`07-review-army.json\` (and the structured findings table in \`07-review.md\`) on its own — including \`NO_SECURITY_IMPACT\` rationale if a security pass yields zero findings.
+5. The controller waits for ALL lens spans to return before reconciling.
+**Step 3 — Reconcile and verdict:** after all lens spans complete:
+1. Run \`validateReviewArmy\` (helper or linter) on \`07-review-army.json\`.
+2. Dedup by fingerprint, mark multi-specialist confirmations.
+3. Confirm acceptance criteria coverage and Pre-Critic / Lens Coverage / Anti-sycophancy fields are present (linter requires them).
+4. Compute the final verdict: APPROVED, APPROVED_WITH_CONCERNS, or BLOCKED.
+5. If BLOCKED, emit \`ROUTE_BACK_TO_TDD\` with the blocking finding ids and the managed \`npx cclaw-cli internal rewind tdd\` command. Do NOT silently stop.
+**Step 4 — Auto-advance after stage-complete:** when \`stage-complete review\` returns \`ok\` with a new \`currentStage\` (typically \`ship\`), immediately load the next stage skill and continue. Announce \"Stage review complete → entering <next>. Continuing.\" and proceed without waiting for the user to retype \`/cc\`.
 ---
 `;
 }
@@ -643,7 +690,7 @@ If you are about to violate the Iron Law, STOP. No amount of urgency, partial pr
 </EXTREMELY-IMPORTANT>
-${renderTrackTerminology(tddTopOfSkillBlock(stage), trackContext)}${quickStartBlock(stage, track)}
+${renderTrackTerminology(tddTopOfSkillBlock(stage) + reviewTopOfSkillBlock(stage), trackContext)}${quickStartBlock(stage, track)}
 ${STAGE_LANGUAGE_POLICY_POINTER}
 ## Philosophy

package/dist/content/stage-schema.js CHANGED Viewed

@@ -294,6 +294,7 @@ const REQUIRED_GATE_IDS = {
         "plan_dependency_batches_defined",
         "plan_acceptance_mapped",
         "plan_execution_posture_recorded",
+        "plan_parallel_exec_full_coverage",
         "plan_wait_for_confirm"
     ],
     tdd: (track) => [

package/dist/content/stages/plan.js CHANGED Viewed

@@ -51,6 +51,7 @@ export const PLAN = {
             "Run anti-placeholder + anti-scope-reduction scans — block `TODO/TBD/...` and phrasing like `v1`, `for now`, `later` for locked boundaries.",
             "Define validation points — mark where progress must be checked before continuing, with concrete command and expected evidence.",
             "Define execution posture — record whether execution should be sequential, dependency-batched, parallel-safe, or blocked; include risk triggers and RED/GREEN/REFACTOR checkpoint/commit expectations when the repo workflow supports them. This fulfills the `plan_execution_posture_recorded` gate.",
+            "**Author the FULL Parallel Execution Plan.** Inside the `<!-- parallel-exec-managed-start -->` block, enumerate ALL waves W-02..W-N covering EVERY T-NNN task in `## Task List` — no `we'll author waves later`, `next batch only`, or open-ended Backlog handwave is acceptable. Each task gets a slice with `sliceId | taskId | dependsOn | claimedPaths | parallelizable | riskTier | lane`. Spike rows (`S-N`) and tasks marked `deferred` in an explicit `Deferred:` column may be omitted, but every other T-NNN must be claimed. This fulfills the `plan_parallel_exec_full_coverage` gate. The TDD stage downstream is a pure consumer of these waves — if the plan does not author them, TDD cannot fan out that work.",
             "WAIT_FOR_CONFIRM — write plan artifact and explicitly pause. **STOP.** Do NOT proceed until user confirms. Then close the stage with `node .cclaw/hooks/stage-complete.mjs plan` and tell user to run `/cc`."
         ],
         interactionProtocol: [
@@ -80,6 +81,7 @@ export const PLAN = {
             { id: "plan_dependency_batches_defined", description: "Tasks are grouped into executable batches with gate checks and execution posture." },
             { id: "plan_acceptance_mapped", description: "Each task maps to a spec acceptance criterion." },
             { id: "plan_execution_posture_recorded", description: "Execution posture is recorded before implementation handoff." },
+            { id: "plan_parallel_exec_full_coverage", description: "Every T-NNN task in `## Task List` (other than spikes/explicitly-deferred) is assigned to at least one slice inside the `<!-- parallel-exec-managed-start -->` block; TDD cannot fan out work that the plan never authored as waves." },
             { id: "plan_wait_for_confirm", description: "Execution blocked until explicit user confirmation." }
         ],
         requiredEvidence: [

package/dist/content/stages/review.js CHANGED Viewed

@@ -32,6 +32,7 @@ export const REVIEW = {
     },
     executionModel: {
         checklist: [
+            "**MANDATE — controller never authors review findings.** The controller orchestrates; `reviewer` and `security-reviewer` are mandatory delegated workers that produce the actual findings, lens coverage, and verdict input. **Dispatch them in parallel as harness Task subagents in a single controller message** — one Task per lens. Do NOT type `## Layer 1 Findings`, `## Layer 2 Findings`, `## Lens Coverage`, or `## Final Verdict` content into `07-review.md` yourself as a substitute for delegating. The controller's only writes to the review artifact are: structural scaffolding (section headings if the template did not pre-render them) and the reconciled multi-specialist verdict block AFTER all reviewer Tasks return.",
             "**Boundary with TDD (do NOT re-classify slice findings).** `tdd.Per-Slice Review` OWNS severity-classified findings WITHIN a single slice (correctness, edge cases, regression for that slice). `review` OWNS whole-diff Layer 1 (spec compliance) plus Layer 2 (cross-slice integration findings, security sweep, dependency/version audit, observability). When the same finding ID appears in both `06-tdd.md > Per-Slice Review` and `07-review.md` / `07-review-army.json`, the severity/disposition MUST match — the cross-artifact-duplication linter blocks otherwise.",
             "Diff Scope — Run `git diff` against base branch. If no diff, exit early with APPROVED (no changes to review). Scope the review to changed files unless blast-radius analysis requires wider inspection.",
             "Change-Size Check — ~100 lines = normal. ~300 lines = consider splitting. ~1000+ lines = strongly recommend stacked PRs. Flag large diffs to the user.",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "cclaw-cli",
-  "version": "7.0.2",
+  "version": "7.0.4",
   "description": "Installer-first flow toolkit for coding agents",
   "type": "module",
   "bin": {