npm - cclaw-cli - Versions diffs - 0.51.23 → 0.51.25 - Mend

cclaw-cli 0.51.23 → 0.51.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/README.md +135 -414
package/dist/artifact-linter.js +10 -6
package/dist/config.d.ts +1 -1
package/dist/config.js +28 -3
package/dist/content/core-agents.d.ts +128 -2
package/dist/content/core-agents.js +291 -13
package/dist/content/examples.js +21 -10
package/dist/content/next-command.js +10 -6
package/dist/content/reference-patterns.d.ts +18 -0
package/dist/content/reference-patterns.js +391 -0
package/dist/content/seed-shelf.js +73 -8
package/dist/content/skills.js +39 -34
package/dist/content/stage-common-guidance.js +19 -3
package/dist/content/stage-schema.d.ts +12 -0
package/dist/content/stage-schema.js +224 -24
package/dist/content/stages/_lint-metadata/index.js +3 -2
package/dist/content/stages/brainstorm.js +27 -18
package/dist/content/stages/design.js +27 -18
package/dist/content/stages/review.js +20 -9
package/dist/content/stages/schema-types.d.ts +9 -2
package/dist/content/stages/scope.js +21 -10
package/dist/content/stages/ship.js +3 -2
package/dist/content/stages/tdd.js +18 -13
package/dist/content/start-command.js +3 -2
package/dist/content/status-command.js +9 -4
package/dist/content/subagents.js +336 -38
package/dist/content/templates.js +182 -25
package/dist/delegation.d.ts +2 -0
package/dist/delegation.js +27 -6
package/dist/doctor.js +167 -25
package/dist/flow-state.d.ts +1 -0
package/dist/flow-state.js +1 -0
package/dist/gate-evidence.js +25 -2
package/dist/install.js +72 -8
package/dist/internal/advance-stage.js +179 -26
package/dist/knowledge-store.js +30 -6
package/dist/run-archive.js +11 -0
package/dist/run-persistence.js +35 -10
package/dist/tdd-verification-evidence.d.ts +17 -0
package/dist/tdd-verification-evidence.js +43 -0
package/dist/types.d.ts +10 -0
package/package.json +1 -1

package/dist/content/templates.js CHANGED Viewed

@@ -29,10 +29,33 @@ export const ARTIFACT_TEMPLATES = {
 ### Discovered context
 - (paths, prior artifacts, seeds, prompt fragments — referenced by downstream stages, or \`- None.\`)
-## Problem
-- **What we're solving:**
-- **Success criteria:**
-- **Constraints:**
+## Reference Pattern Candidates
+| Pattern / source | Reusable invariant | Disposition (accept/reject/defer) | Why |
+|---|---|---|---|
+|  |  |  |  |
+## Problem Decision Record
+- **Depth:** lite | standard | deep
+- **Frame type:** product | technical-maintenance
+### Product framing (use when applicable)
+- **Persona / user:**
+- **Job to be done:**
+- **Pain / trigger:**
+- **Value hypothesis:**
+- **Evidence / signal:**
+- **Success metric:**
+- **Why now:**
+- **Do-nothing consequence:**
+- **Non-goals:**
+### Technical-maintenance framing (use when product framing is not applicable)
+- **Affected operator/developer:**
+- **Current failure mode:**
+- **Expected operational improvement:**
+- **Verification signal:**
+- **Do-nothing cost:**
+- **Non-goals:**
 ## Premise Check
 - **Right problem?** (yes/no + one-line justification — take a position)
@@ -42,12 +65,17 @@ export const ARTIFACT_TEMPLATES = {
 ## How Might We
 - *How might we …?* — one line naming the user, the desired outcome, and the binding constraint.
+## Clarity Gate
+- Ambiguity score (0.00-1.00):
+- Decision boundaries (what this stage will decide):
+- Reaffirmed non-goals:
+- Residual-risk handoff to scope:
 ## Sharpening Questions
+> Ask one decision-changing question at a time. For concrete early exits, record \`None - early exit\` with rationale.
 | # | Question | Answer / Assumption | Decision impact |
 |---|---|---|---|
 | 1 |  |  |  |
-| 2 |  |  |  |
-| 3 |  |  |  |
 ## Clarifying Questions
 | # | Question | Answer | Decision impact |
@@ -55,7 +83,7 @@ export const ARTIFACT_TEMPLATES = {
 | 1 |  |  |  |
 ## Approach Tier
-- Tier: Lightweight | Standard | Deep
+- Tier: lite | standard | deep
 - Why this tier:
 ## Short-Circuit Decision
@@ -64,7 +92,7 @@ export const ARTIFACT_TEMPLATES = {
 - Scope handoff:
 ## Approaches
-| Approach | Role | Upside | Architecture | Trade-offs | Reuses | Recommendation |
+| Approach | Role | Upside | Architecture | Trade-offs | Reuses / reference pattern | Recommendation |
 |---|---|---|---|---|---|---|
 | A | baseline | modest |  |  |  |  |
 | B | challenger | high |  |  |  |  |
@@ -80,7 +108,7 @@ export const ARTIFACT_TEMPLATES = {
 - **Approach:**
 - **Rationale:** Trace this to the prior Approach Reaction.
 - **Approval:** pending
-- **Next-stage handoff:** On standard track, hand this to \`scope\`; on medium track, hand this directly to \`spec\` with explicit requirements/constraints.
+- **Next-stage handoff:** On standard track, hand this to \`scope\`; on medium track, hand this directly to \`spec\`. Include upstream decisions used, drift, confidence, unresolved questions, risk hints, and non-goals.
 ## Not Doing
 - (3-5 things this brainstorm is *not* committing to — distinct from \`Deferred\`. These will not appear in scope unless the user explicitly opts in.)
@@ -165,8 +193,35 @@ ${SEED_SHELF_SECTION}
 | HOUR 4-5 (integration) |  |  |  |
 | HOUR 6+ (polish/tests) |  |  |  |
+## Scope Contract
+- **Selected mode:** HOLD SCOPE | SELECTIVE EXPANSION | SCOPE EXPANSION | SCOPE REDUCTION
+- **In scope:**
+- **Out of scope:**
+- **Requirements:**
+- **Locked decisions:**
+- **Discretion areas:**
+- **Deferred ideas:**
+- **Accepted reference ideas:**
+- **Rejected reference ideas:**
+- **Success definition:**
+- **Design handoff:**
+## Decision Drivers
+| Driver | Weight (1-5) | Option A | Option B | Option C | Notes |
+|---|---|---|---|---|---|
+| Value impact |  |  |  |  |  |
+| Risk reduction |  |  |  |  |  |
+| Reversibility |  |  |  |  |  |
+| Delivery effort |  |  |  |  |  |
+| Timeline fit |  |  |  |  |  |
+## Scope Completeness Score
+- Score (0.00-1.00):
+- What is still uncertain:
+- Blockers requiring escalation:
 ## Scope Mode
-- [ ] SCOPE EXPANSION — dream bigger; user explicitly opts into the larger product slice.
+- [ ] SCOPE EXPANSION — explore ambitious alternatives; user explicitly opts into the larger product slice.
 - [ ] SELECTIVE EXPANSION — hold baseline scope and cherry-pick one high-leverage addition.
 - [ ] HOLD SCOPE — preserve the approved brainstorm direction with maximum rigor.
 - [ ] SCOPE REDUCTION — strip to the smallest useful wedge when risk/blast radius is too high.
@@ -174,9 +229,29 @@ ${SEED_SHELF_SECTION}
 ## Mode-Specific Analysis
 | Selected mode | Rationale | Depth |
 |---|---|---|
-|  |  | default / deep |
+|  |  | lite / standard / deep |
+> Default path: one selected-mode row plus rationale. Deep/high-risk scope may expand with optional evidence headings below.
+## Landscape Check
+- Optional for EXPAND/SELECTIVE/deep; omit for compact HOLD SCOPE.
+## Taste Calibration
+- Optional quality-bar references from in-repo modules/files.
+## Reference Pattern Registry
+| Pattern / source | Invariant to preserve | Disposition (accepted/rejected/deferred) | Scope boundary impact |
+|---|---|---|---|
+|  |  |  |  |
-> Default path: one selected-mode row plus rationale. Deep/high-risk scope may expand below with mode-specific analysis.
+## Reference Pull
+- Optional evidence from \`/Users/zuevrs/Downloads/references\`; list accepted/rejected ideas or \`Not needed - compact scope\`.
+## Ambitious Alternatives
+- Optional for SCOPE EXPANSION/SELECTIVE; list larger alternatives and disposition.
+## Ruthless Minimum Slice
+- Optional for SCOPE REDUCTION/high-risk scope; define the smallest useful wedge.
 ## Requirements (stable IDs)
 | ID | Requirement (observable outcome) | Priority | Source (origin doc / prompt line) |
@@ -241,6 +316,9 @@ ${SEED_SHELF_SECTION}
 ## Scope Summary
 - Selected mode: (one of \`SCOPE EXPANSION\` | \`SELECTIVE EXPANSION\` | \`HOLD SCOPE\` | \`SCOPE REDUCTION\`)
+- Confidence: high | medium | low
+- Drift from brainstorm: None / <specific drift>
+- Unresolved questions: None / <questions>
 - Strongest challenges resolved:
 - Recommended path:
 - Accepted scope:
@@ -291,7 +369,7 @@ ${SEED_SHELF_SECTION}
 ## Compact-First Scaffold
 - Default to the compact design spine unless risk requires Standard/Deep add-ons.
-- Compact required spine: Codebase Investigation, Architecture Boundaries, Architecture Diagram, Data Flow, Failure Mode Table, Test Strategy, and Completion Dashboard.
+- Compact required spine: Upstream Handoff, Codebase Investigation, Engineering Lock, Architecture Boundaries, Architecture Diagram, Data Flow, Failure Mode Table, Test Strategy, Spec Handoff, and Completion Dashboard.
 - Mark optional Standard/Deep sections as \`Omitted - compact design\` when they do not apply; do not expand the scaffold just to fill empty tables.
 ## Upstream Handoff
@@ -302,9 +380,19 @@ ${SEED_SHELF_SECTION}
 - Drift from upstream (or \`None\`):
 ## Codebase Investigation
-| File | Current responsibility | Patterns discovered |
-|---|---|---|
-|  |  |  |
+| File | Current responsibility | Patterns discovered | Existing fit / reuse candidate |
+|---|---|---|---|
+|  |  |  |  |
+## Engineering Lock
+| Decision area | Chosen path | Shadow alternative | Switch trigger | Failure/rescue/degraded behavior | Verification evidence | Confidence |
+|---|---|---|---|---|---|---|
+|  |  |  |  |  |  |  |
+## Architecture Decision Record (ADR)
+| ADR ID | Context | Decision | Alternatives considered | Consequences | Reversal trigger |
+|---|---|---|---|---|---|
+| ADR-1 |  |  |  |  |  |
 ## Search Before Building
 | Layer | Label | What to reuse first |
@@ -336,9 +424,9 @@ ${MARKDOWN_CODE_FENCE}
 ## Data-Flow Shadow Paths
 - Standard/Deep add-on; omit when compact design does not need a shadow path.
 <!-- diagram: data-flow-shadow-paths -->
-| Path | Trigger | Fallback/Degrade behavior |
-|---|---|---|
-|  |  |  |
+| Chosen path | Shadow alternative | Switch trigger | Failure/rescue/degraded behavior | Verification evidence |
+|---|---|---|---|---|
+|  |  |  |  |  |
 ## Error Flow Diagram
 - Standard/Deep add-on; omit when the Failure Mode Table is sufficient.
@@ -387,6 +475,8 @@ ${MARKDOWN_CODE_FENCE}
 |  |  |  |  |
 ## Data Flow
+- Data/state flow:
+- Critical path:
 - Happy path:
 - Nil/empty input path:
 - Upstream error path:
@@ -411,11 +501,21 @@ ${MARKDOWN_CODE_FENCE}
 |---|---|---|---|
 |  |  |  |  |
+## Pre-mortem
+| Scenario | Earliest warning signal | Mitigation owner | Containment action |
+|---|---|---|---|
+|  |  |  |  |
 ## Test Strategy
 - Unit:
 - Integration:
 - E2E:
+## Test-Diagram Mapping
+| Critical flow | Test coverage (ID/command) | Diagram anchor | Gap status |
+|---|---|---|---|
+|  |  |  | covered/gap |
 ## Performance Budget
 | Critical path | Metric | Target | Measurement method |
 |---|---|---|---|
@@ -431,6 +531,23 @@ ${MARKDOWN_CODE_FENCE}
 |---|---|---|
 |  |  |  |
+## Rejected Alternatives
+| Alternative | Why rejected | Revival signal |
+|---|---|---|
+|  |  |  |
+## Design Decisions
+| Decision Ref | Requirement / LD refs | Decision | Spec impact |
+|---|---|---|---|
+| DD-1 |  |  |  |
+## Spec Handoff
+- Requirements to carry forward:
+- Design decisions to encode:
+- Risks and rescue paths:
+- Test/performance expectations:
+- Unresolved questions (or \`None\`):
 ## Outside Voice Findings
 | ID | Dimension | Finding | Disposition | Rationale |
 |---|---|---|---|---|
@@ -458,6 +575,11 @@ ${MARKDOWN_CODE_FENCE}
 |---|---|---|
 |  |  |  |
+## Reference-Grade Contracts
+| Pattern / source | Reusable invariant | Local adaptation | Rejection boundary | Verification signal |
+|---|---|---|---|---|
+|  |  |  |  |  |
 ## Interface Contracts
 - Standard/Deep add-on when module boundaries or APIs change; omit for compact local changes.
 | Module | Produces | Consumes |
@@ -486,6 +608,9 @@ ${SEED_SHELF_SECTION}
 **Decisions made:** 0 | **Unresolved:** 0
+## Learning Capture Hint
+For meaningful design work, replace the Learnings sentinel with 1-3 JSON learning bullets, for example: \`- {"type":"lesson","trigger":"when design chooses a risky fallback path","action":"record the switch trigger and rollback signal in Spec Handoff","confidence":"medium","domain":"architecture","stage":"design"}\`
 ## Learnings
 - None this stage.
 `,
@@ -663,7 +788,7 @@ Execution rule: complete and verify each batch before starting the next batch.
 ## Execution Posture
 - Posture: sequential | dependency-batched | blocked
-- RED/GREEN/REFACTOR checkpoint plan:
+- Vertical-slice RED/GREEN/REFACTOR checkpoint plan:
 - Incremental commits: yes/no/deferred because
 ## RED Evidence
@@ -672,7 +797,7 @@ Execution rule: complete and verify each batch before starting the next batch.
 | S-1 |  |  |  |
 ## Acceptance Mapping
-| Slice | Source item ID | Spec criterion ID |
+| Vertical slice | Source item ID | Spec criterion ID |
 |---|---|---|
 | S-1 | SRC-1 | AC-1 |
@@ -721,6 +846,9 @@ Execution rule: complete and verify each batch before starting the next batch.
 |---|---|---|---|---|
 | S-1 |  |  |  |  |
+## Learning Capture Hint
+For meaningful TDD work, replace the Learnings sentinel with 1-3 JSON learning bullets, for example: \`- {"type":"pattern","trigger":"when a regression only fails after state rewind","action":"keep the RED fixture and add a cycle-log assertion before GREEN","confidence":"medium","domain":"testing","stage":"tdd"}\`
 ## Learnings
 - None this stage.
 `,
@@ -735,16 +863,40 @@ Execution rule: complete and verify each batch before starting the next batch.
 - Open questions:
 - Drift from upstream (or \`None\`):
+## Review Evidence Scope
+- Base/head:
+- Files inspected:
+- Changed-file coverage summary:
+- Diagnostics run:
+- Omitted files with explicit reason:
+- Reviewer delegation evidence:
+- Security-reviewer delegation evidence:
+## Changed-File Coverage
+| File | Coverage status | Evidence / no-impact reason |
+|---|---|---|
+|  | inspected / broader-module / omitted-no-impact |  |
 ## Layer 1 Verdict
 | Criterion | Verdict | Evidence |
 |---|---|---|
 | AC-1 | PASS/FAIL |  |
 ## Layer 2 Findings
-| ID | Severity | Category | Description | Status |
-|---|---|---|---|---|
-| R-1 | Critical/Important/Suggestion | correctness/security/performance/architecture/external-safety |  | open/resolved |
-- NO_CHANGE_ATTESTATION: <required when Category=security has no entries; explain why no security-relevant changes were detected>
+| ID | Severity | Category | File:line / no-line reason | Description | Status |
+|---|---|---|---|---|---|
+| R-1 | Critical/Important/Suggestion | correctness/security/performance/architecture/external-safety | path:line |  | open/resolved |
+- NO_FINDINGS_ATTESTATION: <required when no findings are reported; cite inspected coverage>
+## Security Sweep Attestation
+- Result: findings | NO_CHANGE_ATTESTATION | NO_SECURITY_IMPACT
+- Inspected surfaces:
+- Rationale:
+## Dependency & Version Audit
+- Relevant: yes/no
+- Manifests/lockfiles/generated clients/CI/runtime config/external APIs inspected:
+- Result / no-impact rationale:
 ## Incoming Feedback Queue
 | ID | Source | Severity | File:line | Request | Status | Evidence |
@@ -757,6 +909,7 @@ Execution rule: complete and verify each batch before starting the next batch.
 ## Review Readiness Snapshot
+- Victory Detector: pass | fail (Layer 1, Layer 2, security sweep, structured findings, trace evidence, unresolved-critical status)
 - Completed checks: Layer 1, Layer 2 tags, security sweep, schema validation
 - Delegation log: \`.cclaw/state/delegation-log.json\` required/completed/waived/pending
 - Staleness signal: commit at last review pass vs current commit
@@ -797,6 +950,9 @@ Execution rule: complete and verify each batch before starting the next batch.
 ## Final Verdict
 - APPROVED | APPROVED_WITH_CONCERNS | BLOCKED
+## Learning Capture Hint
+For meaningful review work, replace the Learnings sentinel with 1-3 JSON learning bullets, for example: \`- {"type":"lesson","trigger":"when security sweep finds no issues but touches trust boundaries","action":"record NO_SECURITY_IMPACT with inspected surfaces and rationale","confidence":"medium","domain":"security","stage":"review"}\`
 ## Learnings
 - None this stage.
 `,
@@ -865,6 +1021,7 @@ ${SHIP_FINALIZATION_ENUM_LINES}
 - NO_VCS handoff target + artifact path (if FINALIZE_NO_VCS):
 ## Completion Status
+- Victory Detector: pass | fail (review verdict valid, preflight fresh, rollback ready, one finalization enum selected, execution result present)
 - SHIPPED | SHIPPED_WITH_EXCEPTIONS | BLOCKED
 - Exceptions (if any):

package/dist/delegation.d.ts CHANGED Viewed

@@ -96,6 +96,8 @@ export declare function checkMandatoryDelegations(projectRoot: string, stage: Fl
     staleIgnored: string[];
     /** Delegation rows missing required evidence under a role-switch fallback. */
     missingEvidence: string[];
+    /** Current-run scheduled rows with no terminal row sharing the same spanId. */
+    staleWorkers: string[];
     /** Expected fulfillment mode for the active harness set. */
     expectedMode: DelegationFulfillmentMode;
 }>;

package/dist/delegation.js CHANGED Viewed

@@ -9,6 +9,7 @@ import { HARNESS_ADAPTERS } from "./harness-adapters.js";
 import { readFlowState } from "./runs.js";
 import { stageSchema } from "./content/stage-schema.js";
 const execFileAsync = promisify(execFile);
+const TERMINAL_DELEGATION_STATUSES = new Set(["completed", "failed", "waived"]);
 function delegationLogPath(projectRoot) {
     return path.join(projectRoot, RUNTIME_ROOT, "state", "delegation-log.json");
 }
@@ -135,6 +136,11 @@ function isDelegationEntry(value) {
         o.status === "waived";
     const timestampOk = typeof o.ts === "string" ||
         typeof o.startTs === "string";
+    const terminalStatus = o.status === "completed" || o.status === "failed" || o.status === "waived";
+    const lifecycleOk = o.status !== "scheduled" || o.endTs === undefined;
+    const terminalLifecycleOk = !terminalStatus ||
+        o.endTs === undefined ||
+        typeof o.endTs === "string";
     const retryOk = o.retryCount === undefined ||
         (typeof o.retryCount === "number" &&
             Number.isFinite(o.retryCount) &&
@@ -146,6 +152,8 @@ function isDelegationEntry(value) {
         modeOk &&
         statusOk &&
         timestampOk &&
+        lifecycleOk &&
+        terminalLifecycleOk &&
         (o.spanId === undefined || typeof o.spanId === "string") &&
         (o.parentSpanId === undefined || typeof o.parentSpanId === "string") &&
         (o.startTs === undefined || typeof o.startTs === "string") &&
@@ -185,6 +193,7 @@ function parseLedger(raw, runId) {
                     ...item,
                     spanId: item.spanId ?? createSpanId(),
                     startTs: ts,
+                    endTs: TERMINAL_DELEGATION_STATUSES.has(item.status) ? (item.endTs ?? ts) : undefined,
                     ts,
                     retryCount: typeof item.retryCount === "number" && Number.isInteger(item.retryCount) && item.retryCount >= 0
                         ? item.retryCount
@@ -226,6 +235,12 @@ export async function appendDelegation(projectRoot, entry) {
         stamped.spanId = entry.spanId ?? createSpanId();
         stamped.startTs = startTs;
         stamped.ts = startTs;
+        if (TERMINAL_DELEGATION_STATUSES.has(stamped.status) && !stamped.endTs) {
+            stamped.endTs = new Date().toISOString();
+        }
+        if (stamped.status === "scheduled") {
+            delete stamped.endTs;
+        }
         stamped.schemaVersion = 1;
         if (stamped.retryCount === undefined ||
             !Number.isInteger(stamped.retryCount) ||
@@ -247,11 +262,10 @@ export async function appendDelegation(projectRoot, entry) {
                 stamped.fulfillmentMode = expectedFulfillmentMode(fallbacks);
             }
         }
-        // Idempotency: if a caller (or a retried hook) tries to append a row
-        // with a spanId that already exists in the ledger, treat it as a no-op
-        // instead of growing the log with duplicate entries that subsequent
-        // delegation checks would mis-count.
-        if (prior.entries.some((existing) => existing.spanId === stamped.spanId)) {
+        // Idempotency: a retried hook may replay the same lifecycle row. Allow a
+        // terminal row to close an existing scheduled span, but drop exact same
+        // span/status duplicates so checks do not mis-count repeated writes.
+        if (prior.entries.some((existing) => existing.spanId === stamped.spanId && existing.status === stamped.status)) {
             return;
         }
         const ledger = {
@@ -293,6 +307,12 @@ export async function checkMandatoryDelegations(projectRoot, stage, options = {}
     const missing = [];
     const waived = [];
     const missingEvidence = [];
+    const terminalSpanIds = new Set(forRun
+        .filter((entry) => TERMINAL_DELEGATION_STATUSES.has(entry.status) && entry.spanId)
+        .map((entry) => entry.spanId));
+    const staleWorkers = forRun
+        .filter((entry) => entry.status === "scheduled" && entry.spanId && !terminalSpanIds.has(entry.spanId))
+        .map((entry) => `${entry.agent}(spanId=${entry.spanId})`);
     const config = await readConfig(projectRoot).catch(() => null);
     const harnesses = config?.harnesses ?? [];
     const configuredFallbacks = harnesses.map((h) => HARNESS_ADAPTERS[h].capabilities.subagentFallback);
@@ -324,11 +344,12 @@ export async function checkMandatoryDelegations(projectRoot, stage, options = {}
         }
     }
     return {
-        satisfied: missing.length === 0 && missingEvidence.length === 0,
+        satisfied: missing.length === 0 && missingEvidence.length === 0 && staleWorkers.length === 0,
         missing,
         waived,
         staleIgnored,
         missingEvidence,
+        staleWorkers,
         expectedMode
     };
 }