cclaw-cli 0.51.13 → 0.51.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  import fs from "node:fs/promises";
2
+ import { createHash } from "node:crypto";
2
3
  import path from "node:path";
3
4
  import { resolveArtifactPath as resolveStageArtifactPath } from "./artifact-paths.js";
4
5
  import { readConfig } from "./config.js";
@@ -94,6 +95,15 @@ function sectionBodyByAnyName(sections, sectionNames) {
94
95
  return null;
95
96
  return bodies.join("\n");
96
97
  }
98
+ function sectionBodyByHeadingPrefix(sections, prefix) {
99
+ const want = normalizeHeadingTitle(prefix).toLowerCase();
100
+ for (const [heading, body] of sections.entries()) {
101
+ if (heading.toLowerCase().startsWith(want)) {
102
+ return body;
103
+ }
104
+ }
105
+ return null;
106
+ }
97
107
  export function extractMarkdownSectionBody(markdown, section) {
98
108
  return sectionBodyByName(extractH2Sections(markdown), section);
99
109
  }
@@ -675,6 +685,52 @@ function validateRequirementsTaxonomy(sectionBody) {
675
685
  details: "Requirements table uses canonical Priority values."
676
686
  };
677
687
  }
688
+ function validateLockedDecisionAnchors(sectionBody) {
689
+ const rows = getMarkdownTableRows(sectionBody);
690
+ const lines = sectionBody
691
+ .split(/\r?\n/u)
692
+ .map((line) => line.trim())
693
+ .filter((line) => /^[-*]\s+\S/u.test(line));
694
+ const anchors = [];
695
+ const issues = [];
696
+ for (const [index, row] of rows.entries()) {
697
+ const anchor = (row[0] ?? "").trim().toLowerCase();
698
+ const decisionText = (row[1] ?? "").trim();
699
+ if (!/^ld#[0-9a-f]{8}$/u.test(anchor)) {
700
+ issues.push(`row ${index + 1} has invalid anchor "${row[0] ?? ""}"`);
701
+ continue;
702
+ }
703
+ anchors.push(anchor);
704
+ if (decisionText.length > 0) {
705
+ const expected = lockedDecisionHash(decisionText).toLowerCase();
706
+ if (anchor !== expected) {
707
+ issues.push(`row ${index + 1} anchor should be ${expected} for its Decision text`);
708
+ }
709
+ }
710
+ }
711
+ for (const [index, line] of lines.entries()) {
712
+ const anchor = /\bLD#[0-9a-f]{8}\b/iu.exec(line)?.[0]?.toLowerCase();
713
+ if (!anchor) {
714
+ issues.push(`bullet ${index + 1} is missing an LD#<sha8> anchor`);
715
+ continue;
716
+ }
717
+ anchors.push(anchor);
718
+ }
719
+ const duplicateAnchors = [...new Set(anchors.filter((anchor, index) => anchors.indexOf(anchor) !== index))];
720
+ if (duplicateAnchors.length > 0) {
721
+ issues.push(`duplicate anchors: ${duplicateAnchors.join(", ")}`);
722
+ }
723
+ if (anchors.length === 0 && (rows.length > 0 || lines.length > 0)) {
724
+ issues.push("no LD#<sha8> anchors found");
725
+ }
726
+ return {
727
+ ok: issues.length === 0,
728
+ anchors: [...new Set(anchors)],
729
+ details: issues.length === 0
730
+ ? `${anchors.length} LD#hash anchor(s) recorded with no duplicates.`
731
+ : issues.join("; ")
732
+ };
733
+ }
678
734
  const INTERACTION_EDGE_CASE_REQUIREMENTS = [
679
735
  { label: "double-click", pattern: /\bdouble[\s-]?click\b/iu },
680
736
  {
@@ -1286,6 +1342,18 @@ function extractDecisionIds(text) {
1286
1342
  const ids = text.match(/\bD-\d+\b/gu) ?? [];
1287
1343
  return [...new Set(ids)];
1288
1344
  }
1345
+ function extractRequirementIdsFromMarkdown(text) {
1346
+ const ids = text.match(/\bR\d+\b/gu) ?? [];
1347
+ return [...new Set(ids)];
1348
+ }
1349
+ function extractLockedDecisionAnchors(text) {
1350
+ const ids = text.match(/\bLD#[0-9a-f]{8}\b/giu) ?? [];
1351
+ return [...new Set(ids.map((id) => id.replace(/^LD#/iu, "LD#").toLowerCase()))];
1352
+ }
1353
+ function lockedDecisionHash(value) {
1354
+ const normalized = value.replace(/\s+/gu, " ").trim().toLowerCase();
1355
+ return `LD#${createHash("sha256").update(normalized).digest("hex").slice(0, 8)}`;
1356
+ }
1289
1357
  function collectPatternHits(text, patterns) {
1290
1358
  const hits = [];
1291
1359
  for (const pattern of patterns) {
@@ -1401,7 +1469,7 @@ function validateSectionBody(sectionBody, rule, sectionName) {
1401
1469
  if (sectionNameNormalized === "premise challenge") {
1402
1470
  return validatePremiseChallenge(sectionBody);
1403
1471
  }
1404
- if (sectionNameNormalized === "requirements") {
1472
+ if (sectionNameNormalized.startsWith("requirements")) {
1405
1473
  return validateRequirementsTaxonomy(sectionBody);
1406
1474
  }
1407
1475
  if (sectionNameNormalized === "data flow") {
@@ -1823,9 +1891,9 @@ export async function lintArtifact(projectRoot, stage, track = "standard") {
1823
1891
  });
1824
1892
  }
1825
1893
  if (stage === "scope") {
1826
- const lockedDecisionsBody = sectionBodyByName(sections, "Locked Decisions (D-XX)") ?? "";
1894
+ const lockedDecisionsBody = sectionBodyByHeadingPrefix(sections, "Locked Decisions") ?? "";
1827
1895
  const strictScopeGuards = parsedFrontmatter.hasFrontmatter ||
1828
- headingPresent(sections, "Locked Decisions (D-XX)");
1896
+ sectionBodyByHeadingPrefix(sections, "Locked Decisions") !== null;
1829
1897
  const scopeSections = [
1830
1898
  sectionBodyByAnyName(sections, ["In Scope / Out of Scope", "In Scope", "Out of Scope"]) ?? "",
1831
1899
  sectionBodyByName(sections, "Scope Summary") ?? "",
@@ -1841,12 +1909,18 @@ export async function lintArtifact(projectRoot, stage, track = "standard") {
1841
1909
  ? "No scope-reduction phrases detected in scope boundary sections."
1842
1910
  : `Detected scope-reduction phrase(s): ${reductionHits.join(", ")}.`
1843
1911
  });
1844
- // When the Locked Decisions section is present we must enforce the
1845
- // D-XX ID contract at runtime (previously this was prose-only in the
1846
- // artifactValidation rule). Empty body, missing IDs, and duplicate
1847
- // IDs all fail the lint; absence of the section remains advisory so
1848
- // scope stays optional for small/quick tracks.
1849
- if (headingPresent(sections, "Locked Decisions (D-XX)")) {
1912
+ if (sectionBodyByHeadingPrefix(sections, "Locked Decisions") !== null) {
1913
+ const anchorValidation = validateLockedDecisionAnchors(lockedDecisionsBody);
1914
+ findings.push({
1915
+ section: "Locked Decisions Hash Integrity",
1916
+ required: true,
1917
+ rule: "Locked Decisions section must list unique LD#<sha8> content-derived anchors.",
1918
+ found: anchorValidation.ok,
1919
+ details: anchorValidation.details
1920
+ });
1921
+ // Legacy D-XX rows remain advisory for older artifacts, but new templates
1922
+ // use LD#hash anchors. This check keeps D-XX duplicates visible without
1923
+ // making old artifacts the primary contract.
1850
1924
  const listDecisionLines = lockedDecisionsBody
1851
1925
  .split(/\r?\n/u)
1852
1926
  .map((line) => line.trim())
@@ -1881,7 +1955,7 @@ export async function lintArtifact(projectRoot, stage, track = "standard") {
1881
1955
  }
1882
1956
  findings.push({
1883
1957
  section: "Locked Decisions ID Integrity",
1884
- required: true,
1958
+ required: false,
1885
1959
  rule: "Locked Decisions section must list each decision with a unique stable D-XX ID.",
1886
1960
  found: issues.length === 0,
1887
1961
  details: issues.length === 0
@@ -1890,6 +1964,46 @@ export async function lintArtifact(projectRoot, stage, track = "standard") {
1890
1964
  });
1891
1965
  }
1892
1966
  }
1967
+ if (["design", "spec", "plan", "review"].includes(stage)) {
1968
+ const scopeArtifact = await resolveStageArtifactPath("scope", {
1969
+ projectRoot,
1970
+ track,
1971
+ intent: "read"
1972
+ });
1973
+ if (await exists(scopeArtifact.absPath)) {
1974
+ const scopeRaw = await fs.readFile(scopeArtifact.absPath, "utf8");
1975
+ const scopeSections = extractH2Sections(scopeRaw);
1976
+ const requirementsBody = sectionBodyByHeadingPrefix(scopeSections, "Requirements") ?? "";
1977
+ const lockedDecisionsBody = sectionBodyByHeadingPrefix(scopeSections, "Locked Decisions") ?? "";
1978
+ const requirementIds = extractRequirementIdsFromMarkdown(requirementsBody);
1979
+ const lockedDecisionAnchors = extractLockedDecisionAnchors(lockedDecisionsBody);
1980
+ const missingRequirementRefs = requirementIds.filter((id) => !raw.includes(id));
1981
+ const normalizedCurrentRaw = raw.toLowerCase();
1982
+ const missingDecisionRefs = lockedDecisionAnchors.filter((id) => !normalizedCurrentRaw.includes(id));
1983
+ findings.push({
1984
+ section: "Scope Requirement Reference Integrity",
1985
+ required: requirementIds.length > 0,
1986
+ rule: "Every R# requirement ID from scope must be referenced by downstream artifacts.",
1987
+ found: missingRequirementRefs.length === 0,
1988
+ details: requirementIds.length === 0
1989
+ ? "No R# requirement IDs found in scope artifact; reference check skipped."
1990
+ : missingRequirementRefs.length === 0
1991
+ ? `All ${requirementIds.length} scope requirement ID(s) are referenced.`
1992
+ : `Missing scope requirement reference(s): ${missingRequirementRefs.join(", ")}.`
1993
+ });
1994
+ findings.push({
1995
+ section: "Locked Decision Hash Reference Integrity",
1996
+ required: lockedDecisionAnchors.length > 0,
1997
+ rule: "Every LD#hash locked decision anchor from scope must be referenced by downstream artifacts.",
1998
+ found: missingDecisionRefs.length === 0,
1999
+ details: lockedDecisionAnchors.length === 0
2000
+ ? "No LD#hash anchors found in scope artifact; reference check skipped."
2001
+ : missingDecisionRefs.length === 0
2002
+ ? `All ${lockedDecisionAnchors.length} locked decision anchor(s) are referenced.`
2003
+ : `Missing locked decision reference(s): ${missingDecisionRefs.join(", ")}.`
2004
+ });
2005
+ }
2006
+ }
1893
2007
  const passed = findings.every((f) => !f.required || f.found);
1894
2008
  return { stage, file: relFile, passed, findings };
1895
2009
  }
@@ -135,6 +135,9 @@ This is the gate function for completion claims. No "done", "all good", or
135
135
  "tests pass" unless fresh evidence from this turn proves it.
136
136
 
137
137
  - Run verification commands (tests/build/lint/type-check) for the changed scope.
138
+ - Before \`tdd -> review\` and \`review -> ship\`, discover the real test command
139
+ from repo config (package scripts, pytest/go/cargo/maven/gradle signals) and
140
+ cite that exact command in the gate evidence.
138
141
  - Confirm output directly; do not infer success from prior runs or green memories.
139
142
  - If this is a bug fix, capture RED -> GREEN evidence for the regression path.
140
143
  - If a command fails, report the failure as diagnostic evidence and stop before completion.
@@ -46,7 +46,7 @@ export const PLAN = {
46
46
  "Slice into vertical tasks — each task targets 2-5 minutes, produces one testable outcome, and touches one coherent area.",
47
47
  "Task Contract — every task has one coherent outcome, AC mapping, exact verification command/manual step, and expected evidence snippet or pass condition. Avoid vague `run tests` wording.",
48
48
  "Annotate slice-review metadata — if `.cclaw/config.yaml::sliceReview.enabled` is true, every task row additionally carries `touchCount` (rough number of files expected to change) and `touchPaths` (glob hints, e.g. `migrations/**`, `src/auth/**`). A task may set `highRisk: true` to force a review pass regardless of thresholds. These fields feed the TDD stage's Per-Slice Review point; when `sliceReview` is disabled they are optional.",
49
- "Map scope Locked Decisions — every D-XX from scope is referenced by at least one plan task (or explicitly marked deferred with reason).",
49
+ "Map scope Locked Decisions — every LD#hash anchor from scope is referenced by at least one plan task (or explicitly marked deferred with reason).",
50
50
  "Run anti-placeholder + anti-scope-reduction scans — block `TODO/TBD/...` and phrasing like `v1`, `for now`, `later` for locked boundaries.",
51
51
  "Define validation points — mark where progress must be checked before continuing, with concrete command and expected evidence.",
52
52
  "Define execution posture — record whether execution should be sequential, dependency-batched, parallel-safe, or blocked; include risk triggers and RED/GREEN/REFACTOR checkpoint/commit expectations when the repo workflow supports them.",
@@ -72,7 +72,7 @@ export const REVIEW = {
72
72
  { id: "review_layer_coverage_complete", description: "Layer coverage map in 07-review-army.json confirms spec/correctness/security/performance/architecture/external-safety tags were considered." },
73
73
  { id: "review_criticals_resolved", description: "No unresolved critical blockers remain." },
74
74
  { id: "review_army_json_valid", description: "07-review-army.json passes schema validation (validateReviewArmy)." },
75
- { id: "review_trace_matrix_clean", description: "Trace matrix has no orphaned criteria/tasks/test slices for the active run." }
75
+ { id: "review_trace_matrix_clean", description: "Trace matrix has no orphaned criteria/tasks/test slices for the active run, and evidence cites a discovered real test command before ship handoff." }
76
76
  ],
77
77
  requiredEvidence: [
78
78
  "Artifact written to `.cclaw/artifacts/07-review.md`.",
@@ -82,6 +82,7 @@ export const REVIEW = {
82
82
  "Layer 2 sections completed with findings.",
83
83
  "Severity log includes critical/important/suggestion buckets.",
84
84
  "Explicit final verdict: APPROVED, APPROVED_WITH_CONCERNS, or BLOCKED.",
85
+ "Fresh verification command discovery recorded, and the command cited in `review_trace_matrix_clean` evidence before ship handoff.",
85
86
  "If BLOCKED: include explicit remediation route (`ROUTE_BACK_TO_TDD`) with blocking finding IDs."
86
87
  ],
87
88
  inputs: ["implementation diff", "spec and plan artifacts", "test/build evidence"],
@@ -87,7 +87,7 @@ export const SCOPE = {
87
87
  "In-scope and out-of-scope lists are explicit.",
88
88
  "Discretion areas are explicit (or marked as `None`).",
89
89
  "Selected mode and rationale are documented.",
90
- "Locked Decisions section lists stable D-XX IDs for non-negotiable boundaries.",
90
+ "Locked Decisions section lists stable LD#hash anchors for non-negotiable boundaries.",
91
91
  "Premise challenge findings documented.",
92
92
  "Outside Voice findings and dispositions are recorded (accept/reject/defer with rationale) before final approval.",
93
93
  `Spec review loop summary includes a table with columns Iteration, Quality Score, Findings, plus Stop reason, Target score, and Max iterations. This is outside-voice evidence only; it does not satisfy user approval. ${reviewLoopPolicySummary("scope")}`,
@@ -143,7 +143,7 @@ export const SCOPE = {
143
143
  { section: "Landscape Check", required: false, validationRule: "When mode is EXPAND/SELECTIVE, include at least one external reference insight and its impact on scope." },
144
144
  { section: "Taste Calibration", required: false, validationRule: "Must reference 2-3 strong in-repo modules/files that define the quality bar or explicitly justify omission." },
145
145
  { section: "Requirements", required: false, validationRule: "Table of stable requirement IDs (R1, R2, R3…) one per row with observable outcome, priority, and source. IDs are assigned once and never renumbered across scope/design/spec/plan/review; dropped requirements stay with Priority `DROPPED`." },
146
- { section: "Locked Decisions (D-XX)", required: false, validationRule: "List of stable locked decisions with IDs D-01, D-02... Each ID appears once, includes rationale, and is intended for downstream cross-stage traceability." },
146
+ { section: "Locked Decisions (LD#hash)", required: false, validationRule: "List of stable locked decisions with unique `LD#<sha8>` anchors. Each anchor is derived from the normalized Decision cell and is referenced downstream for cross-stage traceability." },
147
147
  { section: "Implementation Alternatives", required: false, validationRule: "2-3 options with Name, Summary, Effort, Risk, Pros, Cons, and Reuses. Must include minimal viable and ideal architecture options." },
148
148
  { section: "Scope Mode", required: true, validationRule: "Must state selected mode and rationale with default heuristic justification." },
149
149
  { section: "Mode-Specific Analysis", required: false, validationRule: "Deep/complex scope only: document the analysis matching the selected mode. Default path may record a concise mode rationale instead." },
@@ -183,10 +183,14 @@ ${SEED_SHELF_SECTION}
183
183
  > is later dropped, keep the row and mark Priority \`DROPPED\`; if a new one is
184
184
  > added mid-flow, append with the next free R-number — do NOT reuse numbers.
185
185
 
186
- ## Locked Decisions (D-XX)
187
- | Decision ID | Decision | Why locked now | Downstream impact |
186
+ ## Locked Decisions (LD#hash)
187
+ | Decision Anchor | Decision | Why locked now | Downstream impact |
188
188
  |---|---|---|---|
189
- | D-01 | | | |
189
+ | LD#<sha8> | | | |
190
+
191
+ > Decision Anchor is \`LD#\` + the first 8 lowercase hex chars of SHA-256 over
192
+ > the normalized \`Decision\` cell (trim, collapse whitespace, lowercase). Downstream
193
+ > design/spec/plan/review artifacts reference these anchors verbatim.
190
194
 
191
195
  ## In Scope / Out of Scope
192
196
 
@@ -308,9 +312,9 @@ ${SEED_SHELF_SECTION}
308
312
  | pitfalls-researcher | | | |
309
313
 
310
314
  ## Architecture Boundaries
311
- | Component | Responsibility | Owner |
312
- |---|---|---|
313
- | | | |
315
+ | Component | Responsibility | Requirement Refs (R#) | Decision Refs (LD#hash) | Owner |
316
+ |---|---|---|---|---|
317
+ | | | | | |
314
318
 
315
319
  ## Architecture Diagram
316
320
 
@@ -377,11 +381,11 @@ ${MARKDOWN_CODE_FENCE}
377
381
  ### Interaction Edge Case Matrix
378
382
  | Edge case | Handled? | Design response | Deferred item (if not handled) |
379
383
  |---|---|---|---|
380
- | double-click | yes/no | | None / D-XX |
381
- | nav-away-mid-request | yes/no | | None / D-XX |
382
- | 10K-result dataset | yes/no | | None / D-XX |
383
- | background-job abandonment | yes/no | | None / D-XX |
384
- | zombie connection | yes/no | | None / D-XX |
384
+ | double-click | yes/no | | None / LD#hash |
385
+ | nav-away-mid-request | yes/no | | None / LD#hash |
386
+ | 10K-result dataset | yes/no | | None / LD#hash |
387
+ | background-job abandonment | yes/no | | None / LD#hash |
388
+ | zombie connection | yes/no | | None / LD#hash |
385
389
 
386
390
  ## Security & Threat Model
387
391
  | Boundary | Threat | Mitigation | Owner |
@@ -480,7 +484,7 @@ ${SEED_SHELF_SECTION}
480
484
  - Drift from upstream (or \`None\`):
481
485
 
482
486
  ## Acceptance Criteria
483
- | ID | Requirement Ref (R#) | Criterion (observable/measurable/falsifiable) | Design Decision Ref |
487
+ | ID | Requirement Ref (R#) | Criterion (observable/measurable/falsifiable) | Design Decision Ref (LD#hash) |
484
488
  |---|---|---|---|
485
489
  | AC-1 | R1 | | |
486
490
 
@@ -585,9 +589,9 @@ Execution rule: complete and verify each batch before starting the next batch.
585
589
  - TDD checkpoint plan: RED commit/checkpoint -> GREEN commit/checkpoint -> REFACTOR commit/checkpoint (or deferred because: )
586
590
 
587
591
  ## Locked Decision Coverage
588
- | Decision ID | Source section | Plan tasks implementing decision | Status |
592
+ | Decision Ref (LD#hash) | Source section | Plan tasks implementing decision | Status |
589
593
  |---|---|---|---|
590
- | D-01 | 02-scope.md > Locked Decisions | T-1 | covered |
594
+ | LD#<sha8> | 02-scope.md > Locked Decisions | T-1 | covered |
591
595
 
592
596
  ## Risk Assessment
593
597
  | Task/Batch | Risk | Likelihood | Impact | Mitigation |
@@ -605,7 +609,7 @@ Execution rule: complete and verify each batch before starting the next batch.
605
609
 
606
610
  ## No Scope Reduction Language Scan
607
611
  - Scanned phrases: \`v1\`, \`for now\`, \`later\`, \`temporary\`, \`placeholder\`, \`mock for now\`, \`hardcoded for now\`, \`will improve later\`.
608
- - Hits: 0 (required when Locked Decisions section is non-empty).
612
+ - Hits: 0 (required when Locked Decisions section is non-empty; use LD#hash anchors).
609
613
 
610
614
  ## WAIT_FOR_CONFIRM
611
615
  - Status: pending
@@ -750,6 +754,11 @@ Execution rule: complete and verify each batch before starting the next batch.
750
754
  - Orphaned tests: 0
751
755
  - Evidence ref:
752
756
 
757
+ ## Verification Command Discovery
758
+ | Source | Discovered command | Result | Evidence ref |
759
+ |---|---|---|---|
760
+ | package.json / pytest / go.mod / Cargo.toml / pom.xml / gradle | | PASS/FAIL | |
761
+
753
762
  ## Blocked Route
754
763
  - ROUTE_BACK_TO_TDD: only when Final Verdict = BLOCKED
755
764
  - Target stage: tdd
@@ -44,6 +44,64 @@ function sameStringArray(a, b) {
44
44
  return false;
45
45
  return a.every((value, index) => value === b[index]);
46
46
  }
47
+ async function readJsonFile(filePath) {
48
+ try {
49
+ const parsed = JSON.parse(await fs.readFile(filePath, "utf8"));
50
+ return parsed && typeof parsed === "object" && !Array.isArray(parsed)
51
+ ? parsed
52
+ : null;
53
+ }
54
+ catch {
55
+ return null;
56
+ }
57
+ }
58
+ async function discoverRealTestCommands(projectRoot) {
59
+ const commands = [];
60
+ const packageJson = await readJsonFile(path.join(projectRoot, "package.json"));
61
+ const scripts = packageJson?.scripts;
62
+ if (scripts && typeof scripts === "object" && !Array.isArray(scripts)) {
63
+ const scriptNames = Object.keys(scripts).filter((name) => {
64
+ const value = scripts[name];
65
+ return typeof value === "string" && (name === "test" || name.startsWith("test:"));
66
+ });
67
+ for (const name of scriptNames.sort()) {
68
+ commands.push(name === "test" ? "npm test" : `npm run ${name}`);
69
+ commands.push(name === "test" ? "pnpm test" : `pnpm ${name}`);
70
+ commands.push(name === "test" ? "yarn test" : `yarn ${name}`);
71
+ commands.push(name === "test" ? "bun test" : `bun run ${name}`);
72
+ }
73
+ }
74
+ if (await exists(path.join(projectRoot, "pyproject.toml")))
75
+ commands.push("pytest");
76
+ if (await exists(path.join(projectRoot, "pytest.ini")))
77
+ commands.push("pytest");
78
+ if (await exists(path.join(projectRoot, "go.mod")))
79
+ commands.push("go test ./...");
80
+ if (await exists(path.join(projectRoot, "Cargo.toml")))
81
+ commands.push("cargo test");
82
+ if (await exists(path.join(projectRoot, "pom.xml")))
83
+ commands.push("mvn test");
84
+ if (await exists(path.join(projectRoot, "build.gradle")) ||
85
+ await exists(path.join(projectRoot, "build.gradle.kts"))) {
86
+ commands.push("gradle test", "./gradlew test");
87
+ }
88
+ return unique(commands);
89
+ }
90
+ async function verifyDiscoveredCommandEvidence(projectRoot, stage, gateId, flowState) {
91
+ if (!(stage === "tdd" && gateId === "tdd_verified_before_complete") &&
92
+ !(stage === "review" && gateId === "review_trace_matrix_clean")) {
93
+ return null;
94
+ }
95
+ const commands = await discoverRealTestCommands(projectRoot);
96
+ if (commands.length === 0)
97
+ return null;
98
+ const evidence = flowState.guardEvidence[gateId];
99
+ const normalizedEvidence = typeof evidence === "string" ? evidence.toLowerCase() : "";
100
+ const matched = commands.some((command) => normalizedEvidence.includes(command.toLowerCase()));
101
+ if (matched)
102
+ return null;
103
+ return `${stage} verification gate blocked (${gateId}): guard evidence must cite one discovered real test command: ${commands.join(", ")}.`;
104
+ }
47
105
  const RECONCILIATION_NOTICES_FILE = "reconciliation-notices.json";
48
106
  const RECONCILIATION_NOTICES_SCHEMA_VERSION = 1;
49
107
  const DESIGN_RESEARCH_REQUIRED_SECTIONS = [
@@ -203,6 +261,11 @@ export async function verifyCurrentStageGateEvidence(projectRoot, flowState) {
203
261
  const evidence = flowState.guardEvidence[gateId];
204
262
  if (typeof evidence !== "string" || evidence.trim().length === 0) {
205
263
  issues.push(`passed gate "${gateId}" is missing guardEvidence entry.`);
264
+ continue;
265
+ }
266
+ const discoveredCommandIssue = await verifyDiscoveredCommandEvidence(projectRoot, stage, gateId, flowState);
267
+ if (discoveredCommandIssue) {
268
+ issues.push(discoveredCommandIssue);
206
269
  }
207
270
  }
208
271
  for (const gateId of catalog.blocked) {
@@ -203,6 +203,15 @@ const GATE_EVIDENCE_VALIDATORS = {
203
203
  }
204
204
  return null;
205
205
  },
206
+ "review:review_trace_matrix_clean": (evidence) => {
207
+ if (!TEST_COMMAND_HINT_PATTERN.test(evidence)) {
208
+ return "must include the fresh verification command that was run before ship handoff (for example `npm test`, `pytest`, `go test`, or equivalent).";
209
+ }
210
+ if (!PASS_STATUS_PATTERN.test(evidence)) {
211
+ return "must include explicit success status (for example `PASS` or `GREEN`).";
212
+ }
213
+ return null;
214
+ },
206
215
  "ship:ship_finalization_executed": (evidence) => {
207
216
  if (!SHIP_FINALIZATION_MODE_PATTERN.test(evidence)) {
208
217
  return `must name the finalization mode that ran (for example ${SHIP_FINALIZATION_MODE_HINT}).`;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cclaw-cli",
3
- "version": "0.51.13",
3
+ "version": "0.51.15",
4
4
  "description": "Installer-first flow toolkit for coding agents",
5
5
  "type": "module",
6
6
  "bin": {