npm - cclaw-cli - Versions diffs - 0.5.12 → 0.5.14 - Mend

cclaw-cli 0.5.12 → 0.5.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/artifact-linter.js +26 -0
package/dist/content/examples.js +62 -103
package/dist/content/stage-schema.js +46 -10
package/dist/content/templates.js +17 -2
package/package.json +1 -1

package/dist/artifact-linter.js CHANGED Viewed

@@ -199,6 +199,7 @@ function validateSectionBody(sectionBody, rule) {
                     details: `Rule expects exactly one selected token (${tokens.join(", ")}); found ${selected.size}.`
                 };
             }
+            return { ok: true, details: "Exactly one token selected as expected." };
         }
     }
     if (/Status:\s*pending\s+until/iu.test(rule)) {
@@ -419,9 +420,34 @@ export async function validateReviewArmy(projectRoot) {
         if (!Array.isArray(rec.conflicts)) {
             errors.push("reconciliation.conflicts must be an array.");
         }
+        else {
+            rec.conflicts.forEach((c, ci) => {
+                if (c === null || typeof c !== "object" || Array.isArray(c)) {
+                    errors.push(`reconciliation.conflicts[${ci}] must be an object.`);
+                    return;
+                }
+                const co = c;
+                if (!isNonEmptyString(co.findingId)) {
+                    errors.push(`reconciliation.conflicts[${ci}].findingId must be a non-empty string.`);
+                }
+                else if (!findingIds.has(co.findingId)) {
+                    errors.push(`reconciliation.conflicts[${ci}].findingId references unknown finding "${co.findingId}".`);
+                }
+                if (!isNonEmptyString(co.description)) {
+                    errors.push(`reconciliation.conflicts[${ci}].description must be a non-empty string.`);
+                }
+            });
+        }
         if (!isStringArray(rec.multiSpecialistConfirmed)) {
             errors.push("reconciliation.multiSpecialistConfirmed must be an array of finding ids.");
         }
+        else {
+            for (const msId of rec.multiSpecialistConfirmed) {
+                if (!findingIds.has(msId)) {
+                    errors.push(`reconciliation.multiSpecialistConfirmed references unknown finding id "${msId}".`);
+                }
+            }
+        }
         if (!isStringArray(rec.shipBlockers)) {
             errors.push("reconciliation.shipBlockers must be an array of finding ids.");
         }

package/dist/content/examples.js CHANGED Viewed

@@ -311,134 +311,93 @@ Execution rule: complete and verify each wave before starting the next wave.
 | AC-2 (idempotency) | T-1, T-2 |
 | AC-3 (failure visibility) | T-3 |
+### Risk Assessment
+| Task/Wave | Risk | Likelihood | Impact | Mitigation |
+| --- | --- | --- | --- | --- |
+| T-3 (Wave 3) | SSE reconnect logic complex | Medium | High | Spike reconnect in isolation before integrating with feed UI |
+| Wave 2 → 3 | Publisher API contract may shift | Low | Medium | Pin contract in T-1 schema; T-2 integration test validates |
 ### WAIT_FOR_CONFIRM
 - Status: pending
 - Confirmed by:`,
-    tdd: `### RED test (Vitest) — written before production code
-\`\`\`typescript
-import { describe, it, expect } from "vitest";
-import { summarizeDedupedFeed } from "../notificationFeed";
-describe("summarizeDedupedFeed", () => {
-  it("counts unique keys and unread items", () => {
-    const summary = summarizeDedupedFeed([
-      { dedupeKey: "a", read: false },
-      { dedupeKey: "a", read: true },
-      { dedupeKey: "b", read: false },
-    ]);
-    expect(summary).toEqual({ uniqueKeys: 2, unread: 1 });
-  });
-});
-\`\`\`
-### Expected output (FAIL)
-\`\`\`bash
- FAIL  src/notificationFeed.test.ts
-Error: Cannot find module '../notificationFeed' imported from src/notificationFeed.test.ts
-\`\`\`
-> **Annotation:** This test MUST fail before any production code is written.
-### Iron Law verification
-1. **Run** the test command (for example: \`pnpm vitest run src/notificationFeed.test.ts\`).
-2. **Read output** and confirm the failure is due to the module/function not existing (or the function throwing “not implemented”), not due to a typo in assertions.
-3. **Confirm** the failure reason matches the intentional gap: **missing implementation**, not a flaky environment or misconfigured test runner.
-### Common mistakes to avoid
-- “GREEN” that secretly imports a helper that already implements the behavior (that is skipping RED).
-- Assertions that pass because the function returns \`undefined\` and the matcher is too loose.
-### GREEN (minimal implementation to pass RED)
+    tdd: `### RED Evidence
-\`\`\`typescript
-export type FeedItem = { dedupeKey: string; read: boolean };
+| Slice | Test name | Command | Failure output summary |
+| --- | --- | --- | --- |
+| S-1 (event schema + dedupe) | counts unique keys and unread items | \`\`\`pnpm vitest run tests/unit/dedupe-feed.test.ts\`\`\` | Cannot find module '../notificationFeed' |
+| S-2 (publisher outbox) | publishes event to outbox with dedupe key | \`\`\`pnpm vitest run tests/integration/publisher.test.ts\`\`\` | publishToOutbox is not a function |
+| S-3 (client feed + fallback) | shows notification within 5s via SSE | \`\`\`pnpm playwright test tests/e2e/notification-feed.spec.ts\`\`\` | Element [data-testid="feed-item"] not found |
-export function summarizeDedupedFeed(items: FeedItem[]) {
-  // Last write wins per dedupeKey (stable ordering: later items override earlier ones).
-  const latestReadByKey = new Map<string, boolean>();
+### Acceptance Mapping
-  for (const item of items) {
-    latestReadByKey.set(item.dedupeKey, item.read);
-  }
+| Slice | Plan task ID | Spec criterion ID |
+| --- | --- | --- |
+| S-1 | T-1 | AC-1, AC-2 |
+| S-2 | T-2 | AC-1 |
+| S-3 | T-3 | AC-1, AC-2, AC-3 |
-  let unread = 0;
-  for (const read of latestReadByKey.values()) {
-    if (!read) unread += 1;
-  }
+### Failure Analysis
-  return { uniqueKeys: latestReadByKey.size, unread };
-}
-\`\`\`
+| Slice | Expected missing behavior | Actual failure reason |
+| --- | --- | --- |
+| S-1 | notificationFeed module does not exist yet | Module import fails — correct: implementation missing |
+| S-2 | publishToOutbox function not implemented | Function not found — correct: write path missing |
+| S-3 | Feed UI not rendered, SSE not connected | DOM element missing — correct: client component not built |
-### REFACTOR (keep tests green)
+### GREEN Evidence
-Keep semantics identical, but make the merge step explicit and easier to unit test in isolation:
+- Full suite command: \`\`\`pnpm vitest run && pnpm playwright test\`\`\`
+- Full suite result: 47 tests passed (3 new + 44 existing), 0 failed, 0 skipped
-\`\`\`typescript
-export type FeedItem = { dedupeKey: string; read: boolean };
+### REFACTOR Notes
-function mergeLatestByDedupeKey(items: FeedItem[]) {
-  const latestReadByKey = new Map<string, boolean>();
-  for (const item of items) latestReadByKey.set(item.dedupeKey, item.read);
-  return latestReadByKey;
-}
+- What changed: Extracted \`\`\`mergeLatestByDedupeKey\`\`\` helper from inline loop in \`\`\`summarizeDedupedFeed\`\`\`; moved SSE reconnect logic into \`\`\`useSSEConnection\`\`\` hook.
+- Why: Dedupe merge logic is reused by both publisher and client; reconnect logic was duplicated across components.
+- Behavior preserved: Full suite re-run confirms 47/47 pass after refactor.
-export function summarizeDedupedFeed(items: FeedItem[]) {
-  const latestReadByKey = mergeLatestByDedupeKey(items);
+### Traceability
-  let unread = 0;
-  for (const read of latestReadByKey.values()) {
-    if (!read) unread += 1;
-  }
+- Plan task IDs: T-1, T-2, T-3
+- Spec criterion IDs: AC-1, AC-2, AC-3`,
+    review: `### Layer 1 Verdict
-  return { uniqueKeys: latestReadByKey.size, unread };
-}
-\`\`\`
-### Sample terminal output (GREEN)
+| Criterion | Verdict | Evidence |
+| --- | --- | --- |
+| AC-1: Delivery within 5s without reload | PASS | \`notification-feed.e2e.ts:44-88\` asserts SSE-to-UI timing under mock clock |
+| AC-2: Dedupe — one visible item per key | PARTIAL | Unit tests cover publisher dedupe; UI merge path lacks test for race reordering (\`feedStore.test.ts\` missing case) |
+| AC-3: Degraded mode + REST snapshot | PASS | \`NotificationsPanel.tsx:112-140\` renders banner + calls snapshot endpoint |
-\`\`\`bash
- RUN  v2.1.0 /Users/dev/app
+### Layer 2 Findings
- ✓ src/notificationFeed.test.ts (1 test) 12ms
+| ID | Severity | Category | Description | Status |
+| --- | --- | --- | --- | --- |
+| R-1 | Critical | correctness | Snapshot endpoint returns newest N rows but does not guarantee consistency with stream cursor — users can miss items between snapshot and subscribe. | open |
+| R-2 | Important | performance | \`feedStore.merge()\` does full-array scan on every SSE event; O(n) per event where n is feed length. | open |
+| R-3 | Suggestion | architecture | SSE reconnect logic duplicated across \`useNotifications\` and \`usePresence\`; extract shared hook. | open |
- Test Files  1 passed (1)
-      Tests  1 passed (1)
- Tests: 1 passed.
-\`\`\``,
-    review: `### Layer 1 — Spec compliance (per-criterion)
+### Review Army Contract
-| Criterion | Status | Evidence |
-| --- | --- | --- |
-| Delivery within 5s without reload | PASS | \`notification-feed.e2e.ts:44-88\` asserts SSE-to-UI timing under mock clock |
-| Dedupe: one visible item per key | PARTIAL | Unit tests cover publisher dedupe; UI merge path lacks test for race reordering (\`feedStore.test.ts\` missing case) |
-| Degraded mode + REST snapshot | PASS | \`NotificationsPanel.tsx:112-140\` renders banner + calls snapshot endpoint |
+- See \`07-review-army.json\`
+- Reconciliation summary: 1 duplicate collapsed (R-1 reported by spec-reviewer and code-reviewer), 0 conflicts
-### Layer 2 — Engineering finding (sample)
+### Review Readiness Dashboard
-- **Severity:** Major
-- **Description:** Snapshot endpoint returns newest N rows but does not guarantee consistency with stream cursor, so users can miss items that arrived between snapshot and subscribe.
-- **File:line:** \`server/routes/notifications.ts:208\`
-- **Recommendation:** Return a monotonic cursor with snapshot and initialize SSE from that cursor; add contract tests for gapless delivery.
-- **Resolution options:**
-  1. Add cursor field + server-side reconciliation on subscribe (preferred).
-  2. Client-side “fetch since last seen id” merge pass (more complex, easier to get wrong).
-  3. Temporary mitigation: widen polling window when SSE is unhealthy (acceptable only as a short-term bridge).
+- Layer 1 complete: yes (3/3 criteria)
+- Layer 2 complete: yes (5 sections reviewed)
+- Review army schema valid: yes
+- Open critical blockers: 1 (R-1)
+- Ship recommendation: BLOCKED until R-1 resolved
-### Layer 0 — hygiene checks (sample)
+### Severity Summary
-- **Dependency freshness:** no critical CVEs in direct server dependencies (scanner report linked in PR).
-- **Secrets:** no new env vars committed; rotation playbook unchanged.
+- Critical: 1
+- Important: 1
+- Suggestion: 1
-### Exit criteria (sample)
+### Final Verdict
-- All **Major** findings resolved or explicitly accepted with a time-bounded follow-up ticket.
-- **PARTIAL** spec compliance items have a named owner and a test plan before ship.`,
+- BLOCKED`,
     ship: `### Preflight checklist (sample)
 - tests ✅ (\`pnpm test\` green on main)

package/dist/content/stage-schema.js CHANGED Viewed

@@ -905,6 +905,7 @@ const PLAN = {
         { section: "Task List", required: true, validationRule: "Each task: ID, description, acceptance criterion link, verification command, and effort estimate (S/M/L)." },
         { section: "Acceptance Mapping", required: true, validationRule: "Every spec criterion is covered by at least one task." },
         { section: "Risk Assessment", required: false, validationRule: "If present: per-task or per-wave risk identification with likelihood, impact, and mitigation strategy." },
+        { section: "Boundary Map", required: false, validationRule: "If present: per-wave or per-task interface contracts listing what each task produces (exports) and consumes (imports) from other tasks." },
         { section: "WAIT_FOR_CONFIRM", required: true, validationRule: "Explicit marker present. Status: pending until user approves." }
     ],
     namedAntiPattern: {
@@ -1015,7 +1016,8 @@ const TDD = {
         { claim: "One broad integration test is enough.", reality: "Slice-level RED tests are required for precise failure signal." },
         { claim: "Refactor can be skipped for speed.", reality: "Skipping refactor accumulates debt and weakens maintainability." },
         { claim: "Only changed tests need to pass.", reality: "Full-suite checks are needed to detect regressions." },
-        { claim: "Traceability is implied by commit diff.", reality: "Explicit mapping avoids ambiguity in review and rollback." }
+        { claim: "Traceability is implied by commit diff.", reality: "Explicit mapping avoids ambiguity in review and rollback." },
+        { claim: "Tests written after implementation achieve the same goals.", reality: "Post-hoc tests confirm assumptions, not behavior. They test what you built, not what you should have built. TDD forces you to think about behavior before you have an implementation to be anchored by." }
     ],
     redFlags: [
         "No failing test output (RED missing)",
@@ -1033,14 +1035,39 @@ const TDD = {
         { name: "Failure-First Thinking", description: "The failing test IS the specification. Until you see the right failure, you do not understand what you are building. Wrong failures are information." },
         { name: "Minimal Viable Change", description: "The best implementation is the smallest one that passes all RED tests. Every extra line is risk. Resist the urge to 'improve while you are here.'" },
         { name: "Regression Paranoia", description: "Assume every change breaks something until the full suite proves otherwise. Partial test runs are lies of omission." },
-        { name: "Refactor-as-Hygiene", description: "Refactoring is not optional cleanup — it is the third leg of TDD. GREEN without REFACTOR accumulates mess. REFACTOR without GREEN breaks things." }
+        { name: "Refactor-as-Hygiene", description: "Refactoring is not optional cleanup — it is the third leg of TDD. GREEN without REFACTOR accumulates mess. REFACTOR without GREEN breaks things." },
+        { name: "Evidence Over Anecdote", description: "Every claim about test state must be backed by captured output. 'It passed' without terminal evidence is not evidence. 'I saw it fail' without the failure output is not RED. Capture commands, outputs, and results — not summaries from memory." },
+        { name: "Characterization First", description: "Before changing existing behavior, write characterization tests that capture current behavior as-is. These tests document what the system does today — even if that behavior is wrong. Only after the characterization suite is green do you add the new RED test for the desired change. This prevents accidental behavior destruction during refactoring." }
+    ],
+    reviewSections: [
+        {
+            title: "RED Evidence Audit",
+            evaluationPoints: [
+                "Does every slice have a captured failing test output?",
+                "Does each failure reason match the expected missing behavior (not a typo or config error)?",
+                "Were tests written BEFORE any production code for that slice?",
+                "Does each RED test assert observable behavior, not implementation details?",
+                "Is there a test for each acceptance criterion mapped in the plan?"
+            ],
+            stopGate: true
+        },
+        {
+            title: "GREEN/REFACTOR Audit",
+            evaluationPoints: [
+                "Does GREEN evidence show a FULL suite pass (not partial)?",
+                "Is the GREEN implementation minimal — no features beyond what RED tests require?",
+                "Does the REFACTOR step preserve all existing behavior (no new failures)?",
+                "Are REFACTOR notes documented with rationale?",
+                "Is traceability complete: every change links to plan task ID and spec criterion?"
+            ],
+            stopGate: true
+        }
     ],
-    reviewSections: [],
     completionStatus: ["DONE", "DONE_WITH_CONCERNS", "BLOCKED"],
     crossStageTrace: {
-        readsFrom: [".cclaw/artifacts/05-plan.md", ".cclaw/artifacts/04-spec.md"],
+        readsFrom: [".cclaw/artifacts/05-plan.md", ".cclaw/artifacts/04-spec.md", ".cclaw/artifacts/03-design.md"],
         writesTo: [".cclaw/artifacts/06-tdd.md"],
-        traceabilityRule: "Every RED test traces to a plan task. Every GREEN change traces to a RED test. Every plan task traces to a spec criterion. Evidence chain must be unbroken."
+        traceabilityRule: "Every RED test traces to a plan task. Every GREEN change traces to a RED test. Every plan task traces to a spec criterion. Design decisions inform test strategy. Evidence chain must be unbroken."
     },
     artifactValidation: [
         { section: "RED Evidence", required: true, validationRule: "Failing test output captured per slice." },
@@ -1048,8 +1075,14 @@ const TDD = {
         { section: "Failure Analysis", required: true, validationRule: "Failure reason matches expected missing behavior." },
         { section: "GREEN Evidence", required: true, validationRule: "Full suite pass output captured." },
         { section: "REFACTOR Notes", required: true, validationRule: "What changed, why, behavior preservation confirmed." },
-        { section: "Traceability", required: true, validationRule: "Plan task ID and spec criterion linked." }
+        { section: "Traceability", required: true, validationRule: "Plan task ID and spec criterion linked." },
+        { section: "Verification Ladder", required: false, validationRule: "If present: per-slice verification tier (static, command, behavioral, human) with evidence for highest tier reached." },
+        { section: "Coverage Targets", required: false, validationRule: "If present: per-module or per-code-type coverage thresholds with current values and measurement commands." }
     ],
+    namedAntiPattern: {
+        title: "Code Before Failing Test",
+        description: "Production code written before a failing test is not TDD — it is guessing validated after the fact. Tests written after implementation confirm assumptions, not behavior. If you wrote code first, delete it and start with RED. Delete means delete — not 'keep as reference.' The failing test IS the specification."
+    },
     waveExecutionAllowed: true
 };
 // ---------------------------------------------------------------------------
@@ -1113,7 +1146,8 @@ const REVIEW = {
         { id: "review_layer2_performance", description: "Performance review completed." },
         { id: "review_layer2_architecture", description: "Architecture fit review completed." },
         { id: "review_severity_classified", description: "All findings are severity-tagged." },
-        { id: "review_criticals_resolved", description: "No unresolved critical blockers remain." }
+        { id: "review_criticals_resolved", description: "No unresolved critical blockers remain." },
+        { id: "review_army_json_valid", description: "07-review-army.json passes schema validation (validateReviewArmy)." }
     ],
     requiredEvidence: [
         "Artifact written to `.cclaw/artifacts/07-review.md`.",
@@ -1148,7 +1182,9 @@ const REVIEW = {
         { claim: "Passing tests mean spec compliance by default.", reality: "Tests can miss requirement mismatches; explicit spec review is mandatory." },
         { claim: "Severity labels are unnecessary.", reality: "Without severity, release decisions become inconsistent." },
         { claim: "Critical issues can be fixed after ship.", reality: "Critical blockers must be resolved before release handoff." },
-        { claim: "Security review is not needed for internal tools.", reality: "Internal tools become external surface area. Security is always in scope." }
+        { claim: "Security review is not needed for internal tools.", reality: "Internal tools become external surface area. Security is always in scope." },
+        { claim: "A quick skim is sufficient for small diffs.", reality: "Small diffs hide high-impact changes. A 3-line auth bypass is still critical. Every diff gets layered review regardless of size." },
+        { claim: "The author already reviewed their own code.", reality: "Self-review misses blind spots by definition. Independent review exists precisely because authors cannot objectively evaluate their own assumptions." }
     ],
     redFlags: [
         "No separate Layer 1/Layer 2 outcomes",
@@ -1245,7 +1281,7 @@ const REVIEW = {
     completionStatus: ["APPROVED", "APPROVED_WITH_CONCERNS", "BLOCKED"],
     crossStageTrace: {
         readsFrom: [".cclaw/artifacts/06-tdd.md", ".cclaw/artifacts/04-spec.md", ".cclaw/artifacts/05-plan.md"],
-        writesTo: [".cclaw/artifacts/07-review.md"],
+        writesTo: [".cclaw/artifacts/07-review.md", ".cclaw/artifacts/07-review-army.json"],
         traceabilityRule: "Review verdict must reference specific spec criteria and TDD evidence. Downstream ship stage must reference review verdict."
     },
     artifactValidation: [
@@ -1253,7 +1289,7 @@ const REVIEW = {
         { section: "Layer 2 Findings", required: true, validationRule: "Each finding has severity, description, and resolution status." },
         { section: "Review Army Contract", required: true, validationRule: "Structured findings include id/severity/confidence/fingerprint/reportedBy/status with dedup reconciliation summary." },
         { section: "Review Readiness Dashboard", required: true, validationRule: "At least 4 readiness checklist lines including blocker and recommendation status." },
-        { section: "Severity Summary", required: true, validationRule: "Counts: N critical, N important, N suggestion." },
+        { section: "Severity Summary", required: true, validationRule: "Per-severity count lines for critical, important, and suggestion buckets." },
         { section: "Final Verdict", required: true, validationRule: "Exactly one of: APPROVED, APPROVED_WITH_CONCERNS, BLOCKED." }
     ],
     namedAntiPattern: {

package/dist/content/templates.js CHANGED Viewed

@@ -278,6 +278,11 @@ Execution rule: complete and verify each wave before starting the next wave.
 |---|---|---|---|---|
 |  |  |  |  |  |
+## Boundary Map
+| Task/Wave | Produces (exports) | Consumes (imports from) |
+|---|---|---|
+|  |  |  |
 ## WAIT_FOR_CONFIRM
 - Status: pending
 - Confirmed by:
@@ -311,6 +316,17 @@ Execution rule: complete and verify each wave before starting the next wave.
 ## Traceability
 - Plan task IDs:
 - Spec criterion IDs:
+## Verification Ladder
+| Slice | Tier reached | Evidence |
+|---|---|---|
+| S-1 |  |  |
+## Coverage Targets
+| Code type | Target | Current | Command |
+|---|---|---|---|
+|  |  |  |  |
 `,
     "07-review.md": `# Review Artifact
@@ -359,8 +375,7 @@ Execution rule: complete and verify each wave before starting the next wave.
       "confidence": 7,
       "category": "correctness",
       "location": {
-        "file": "",
-        "line": 0
+        "file": ""
       },
       "fingerprint": "",
       "reportedBy": [],

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "cclaw-cli",
-  "version": "0.5.12",
+  "version": "0.5.14",
   "description": "Installer-first flow toolkit for coding agents",
   "type": "module",
   "bin": {