cclaw-cli 0.5.13 → 0.5.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -199,6 +199,7 @@ function validateSectionBody(sectionBody, rule) {
199
199
  details: `Rule expects exactly one selected token (${tokens.join(", ")}); found ${selected.size}.`
200
200
  };
201
201
  }
202
+ return { ok: true, details: "Exactly one token selected as expected." };
202
203
  }
203
204
  }
204
205
  if (/Status:\s*pending\s+until/iu.test(rule)) {
@@ -419,9 +420,34 @@ export async function validateReviewArmy(projectRoot) {
419
420
  if (!Array.isArray(rec.conflicts)) {
420
421
  errors.push("reconciliation.conflicts must be an array.");
421
422
  }
423
+ else {
424
+ rec.conflicts.forEach((c, ci) => {
425
+ if (c === null || typeof c !== "object" || Array.isArray(c)) {
426
+ errors.push(`reconciliation.conflicts[${ci}] must be an object.`);
427
+ return;
428
+ }
429
+ const co = c;
430
+ if (!isNonEmptyString(co.findingId)) {
431
+ errors.push(`reconciliation.conflicts[${ci}].findingId must be a non-empty string.`);
432
+ }
433
+ else if (!findingIds.has(co.findingId)) {
434
+ errors.push(`reconciliation.conflicts[${ci}].findingId references unknown finding "${co.findingId}".`);
435
+ }
436
+ if (!isNonEmptyString(co.description)) {
437
+ errors.push(`reconciliation.conflicts[${ci}].description must be a non-empty string.`);
438
+ }
439
+ });
440
+ }
422
441
  if (!isStringArray(rec.multiSpecialistConfirmed)) {
423
442
  errors.push("reconciliation.multiSpecialistConfirmed must be an array of finding ids.");
424
443
  }
444
+ else {
445
+ for (const msId of rec.multiSpecialistConfirmed) {
446
+ if (!findingIds.has(msId)) {
447
+ errors.push(`reconciliation.multiSpecialistConfirmed references unknown finding id "${msId}".`);
448
+ }
449
+ }
450
+ }
425
451
  if (!isStringArray(rec.shipBlockers)) {
426
452
  errors.push("reconciliation.shipBlockers must be an array of finding ids.");
427
453
  }
@@ -360,34 +360,44 @@ Execution rule: complete and verify each wave before starting the next wave.
360
360
 
361
361
  - Plan task IDs: T-1, T-2, T-3
362
362
  - Spec criterion IDs: AC-1, AC-2, AC-3`,
363
- review: `### Layer 1 — Spec compliance (per-criterion)
363
+ review: `### Layer 1 Verdict
364
364
 
365
- | Criterion | Status | Evidence |
365
+ | Criterion | Verdict | Evidence |
366
366
  | --- | --- | --- |
367
- | Delivery within 5s without reload | PASS | \`notification-feed.e2e.ts:44-88\` asserts SSE-to-UI timing under mock clock |
368
- | Dedupe: one visible item per key | PARTIAL | Unit tests cover publisher dedupe; UI merge path lacks test for race reordering (\`feedStore.test.ts\` missing case) |
369
- | Degraded mode + REST snapshot | PASS | \`NotificationsPanel.tsx:112-140\` renders banner + calls snapshot endpoint |
367
+ | AC-1: Delivery within 5s without reload | PASS | \`notification-feed.e2e.ts:44-88\` asserts SSE-to-UI timing under mock clock |
368
+ | AC-2: Dedupe — one visible item per key | PARTIAL | Unit tests cover publisher dedupe; UI merge path lacks test for race reordering (\`feedStore.test.ts\` missing case) |
369
+ | AC-3: Degraded mode + REST snapshot | PASS | \`NotificationsPanel.tsx:112-140\` renders banner + calls snapshot endpoint |
370
370
 
371
- ### Layer 2 — Engineering finding (sample)
371
+ ### Layer 2 Findings
372
372
 
373
- - **Severity:** Major
374
- - **Description:** Snapshot endpoint returns newest N rows but does not guarantee consistency with stream cursor, so users can miss items that arrived between snapshot and subscribe.
375
- - **File:line:** \`server/routes/notifications.ts:208\`
376
- - **Recommendation:** Return a monotonic cursor with snapshot and initialize SSE from that cursor; add contract tests for gapless delivery.
377
- - **Resolution options:**
378
- 1. Add cursor field + server-side reconciliation on subscribe (preferred).
379
- 2. Client-side “fetch since last seen id” merge pass (more complex, easier to get wrong).
380
- 3. Temporary mitigation: widen polling window when SSE is unhealthy (acceptable only as a short-term bridge).
373
+ | ID | Severity | Category | Description | Status |
374
+ | --- | --- | --- | --- | --- |
375
+ | R-1 | Critical | correctness | Snapshot endpoint returns newest N rows but does not guarantee consistency with stream cursor — users can miss items between snapshot and subscribe. | open |
376
+ | R-2 | Important | performance | \`feedStore.merge()\` does full-array scan on every SSE event; O(n) per event where n is feed length. | open |
377
+ | R-3 | Suggestion | architecture | SSE reconnect logic duplicated across \`useNotifications\` and \`usePresence\`; extract shared hook. | open |
378
+
379
+ ### Review Army Contract
380
+
381
+ - See \`07-review-army.json\`
382
+ - Reconciliation summary: 1 duplicate collapsed (R-1 reported by spec-reviewer and code-reviewer), 0 conflicts
383
+
384
+ ### Review Readiness Dashboard
385
+
386
+ - Layer 1 complete: yes (3/3 criteria)
387
+ - Layer 2 complete: yes (5 sections reviewed)
388
+ - Review army schema valid: yes
389
+ - Open critical blockers: 1 (R-1)
390
+ - Ship recommendation: BLOCKED until R-1 resolved
381
391
 
382
- ### Layer 0 — hygiene checks (sample)
392
+ ### Severity Summary
383
393
 
384
- - **Dependency freshness:** no critical CVEs in direct server dependencies (scanner report linked in PR).
385
- - **Secrets:** no new env vars committed; rotation playbook unchanged.
394
+ - Critical: 1
395
+ - Important: 1
396
+ - Suggestion: 1
386
397
 
387
- ### Exit criteria (sample)
398
+ ### Final Verdict
388
399
 
389
- - All **Major** findings resolved or explicitly accepted with a time-bounded follow-up ticket.
390
- - **PARTIAL** spec compliance items have a named owner and a test plan before ship.`,
400
+ - BLOCKED`,
391
401
  ship: `### Preflight checklist (sample)
392
402
 
393
403
  - tests ✅ (\`pnpm test\` green on main)
@@ -1016,7 +1016,8 @@ const TDD = {
1016
1016
  { claim: "One broad integration test is enough.", reality: "Slice-level RED tests are required for precise failure signal." },
1017
1017
  { claim: "Refactor can be skipped for speed.", reality: "Skipping refactor accumulates debt and weakens maintainability." },
1018
1018
  { claim: "Only changed tests need to pass.", reality: "Full-suite checks are needed to detect regressions." },
1019
- { claim: "Traceability is implied by commit diff.", reality: "Explicit mapping avoids ambiguity in review and rollback." }
1019
+ { claim: "Traceability is implied by commit diff.", reality: "Explicit mapping avoids ambiguity in review and rollback." },
1020
+ { claim: "Tests written after implementation achieve the same goals.", reality: "Post-hoc tests confirm assumptions, not behavior. They test what you built, not what you should have built. TDD forces you to think about behavior before you have an implementation to be anchored by." }
1020
1021
  ],
1021
1022
  redFlags: [
1022
1023
  "No failing test output (RED missing)",
@@ -1035,7 +1036,8 @@ const TDD = {
1035
1036
  { name: "Minimal Viable Change", description: "The best implementation is the smallest one that passes all RED tests. Every extra line is risk. Resist the urge to 'improve while you are here.'" },
1036
1037
  { name: "Regression Paranoia", description: "Assume every change breaks something until the full suite proves otherwise. Partial test runs are lies of omission." },
1037
1038
  { name: "Refactor-as-Hygiene", description: "Refactoring is not optional cleanup — it is the third leg of TDD. GREEN without REFACTOR accumulates mess. REFACTOR without GREEN breaks things." },
1038
- { name: "Evidence Over Anecdote", description: "Every claim about test state must be backed by captured output. 'It passed' without terminal evidence is not evidence. 'I saw it fail' without the failure output is not RED. Capture commands, outputs, and results — not summaries from memory." }
1039
+ { name: "Evidence Over Anecdote", description: "Every claim about test state must be backed by captured output. 'It passed' without terminal evidence is not evidence. 'I saw it fail' without the failure output is not RED. Capture commands, outputs, and results — not summaries from memory." },
1040
+ { name: "Characterization First", description: "Before changing existing behavior, write characterization tests that capture current behavior as-is. These tests document what the system does today — even if that behavior is wrong. Only after the characterization suite is green do you add the new RED test for the desired change. This prevents accidental behavior destruction during refactoring." }
1039
1041
  ],
1040
1042
  reviewSections: [
1041
1043
  {
@@ -1144,7 +1146,8 @@ const REVIEW = {
1144
1146
  { id: "review_layer2_performance", description: "Performance review completed." },
1145
1147
  { id: "review_layer2_architecture", description: "Architecture fit review completed." },
1146
1148
  { id: "review_severity_classified", description: "All findings are severity-tagged." },
1147
- { id: "review_criticals_resolved", description: "No unresolved critical blockers remain." }
1149
+ { id: "review_criticals_resolved", description: "No unresolved critical blockers remain." },
1150
+ { id: "review_army_json_valid", description: "07-review-army.json passes schema validation (validateReviewArmy)." }
1148
1151
  ],
1149
1152
  requiredEvidence: [
1150
1153
  "Artifact written to `.cclaw/artifacts/07-review.md`.",
@@ -1179,7 +1182,9 @@ const REVIEW = {
1179
1182
  { claim: "Passing tests mean spec compliance by default.", reality: "Tests can miss requirement mismatches; explicit spec review is mandatory." },
1180
1183
  { claim: "Severity labels are unnecessary.", reality: "Without severity, release decisions become inconsistent." },
1181
1184
  { claim: "Critical issues can be fixed after ship.", reality: "Critical blockers must be resolved before release handoff." },
1182
- { claim: "Security review is not needed for internal tools.", reality: "Internal tools become external surface area. Security is always in scope." }
1185
+ { claim: "Security review is not needed for internal tools.", reality: "Internal tools become external surface area. Security is always in scope." },
1186
+ { claim: "A quick skim is sufficient for small diffs.", reality: "Small diffs hide high-impact changes. A 3-line auth bypass is still critical. Every diff gets layered review regardless of size." },
1187
+ { claim: "The author already reviewed their own code.", reality: "Self-review misses blind spots by definition. Independent review exists precisely because authors cannot objectively evaluate their own assumptions." }
1183
1188
  ],
1184
1189
  redFlags: [
1185
1190
  "No separate Layer 1/Layer 2 outcomes",
@@ -1276,7 +1281,7 @@ const REVIEW = {
1276
1281
  completionStatus: ["APPROVED", "APPROVED_WITH_CONCERNS", "BLOCKED"],
1277
1282
  crossStageTrace: {
1278
1283
  readsFrom: [".cclaw/artifacts/06-tdd.md", ".cclaw/artifacts/04-spec.md", ".cclaw/artifacts/05-plan.md"],
1279
- writesTo: [".cclaw/artifacts/07-review.md"],
1284
+ writesTo: [".cclaw/artifacts/07-review.md", ".cclaw/artifacts/07-review-army.json"],
1280
1285
  traceabilityRule: "Review verdict must reference specific spec criteria and TDD evidence. Downstream ship stage must reference review verdict."
1281
1286
  },
1282
1287
  artifactValidation: [
@@ -1284,7 +1289,7 @@ const REVIEW = {
1284
1289
  { section: "Layer 2 Findings", required: true, validationRule: "Each finding has severity, description, and resolution status." },
1285
1290
  { section: "Review Army Contract", required: true, validationRule: "Structured findings include id/severity/confidence/fingerprint/reportedBy/status with dedup reconciliation summary." },
1286
1291
  { section: "Review Readiness Dashboard", required: true, validationRule: "At least 4 readiness checklist lines including blocker and recommendation status." },
1287
- { section: "Severity Summary", required: true, validationRule: "Counts: N critical, N important, N suggestion." },
1292
+ { section: "Severity Summary", required: true, validationRule: "Per-severity count lines for critical, important, and suggestion buckets." },
1288
1293
  { section: "Final Verdict", required: true, validationRule: "Exactly one of: APPROVED, APPROVED_WITH_CONCERNS, BLOCKED." }
1289
1294
  ],
1290
1295
  namedAntiPattern: {
@@ -375,8 +375,7 @@ Execution rule: complete and verify each wave before starting the next wave.
375
375
  "confidence": 7,
376
376
  "category": "correctness",
377
377
  "location": {
378
- "file": "",
379
- "line": 0
378
+ "file": ""
380
379
  },
381
380
  "fingerprint": "",
382
381
  "reportedBy": [],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cclaw-cli",
3
- "version": "0.5.13",
3
+ "version": "0.5.14",
4
4
  "description": "Installer-first flow toolkit for coding agents",
5
5
  "type": "module",
6
6
  "bin": {