cclaw-cli 6.4.0 → 6.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,10 @@
1
1
  import fs from "node:fs/promises";
2
2
  import path from "node:path";
3
- import { checkCriticPredictionsContract, evaluateQaLogFloor, sectionBodyByName, validateApproachesTaxonomy, headingLineIndex, meaningfulLineCount, getMarkdownTableRows, parseShortCircuitStatus, validateCalibratedSelfReview, markdownFieldRegex } from "./shared.js";
3
+ import { checkCriticPredictionsContract, evaluateInvestigationTrace, evaluateQaLogFloor, sectionBodyByName, validateApproachesTaxonomy, headingLineIndex, meaningfulLineCount, getMarkdownTableRows, parseShortCircuitStatus, validateCalibratedSelfReview, markdownFieldRegex } from "./shared.js";
4
4
  import { readFlowState } from "../run-persistence.js";
5
5
  export async function lintBrainstormStage(ctx) {
6
6
  const { projectRoot, track, raw, absFile, sections, findings, parsedFrontmatter, brainstormShortCircuitBody, brainstormShortCircuitActivated, staleDiagramAuditEnabled, isTrivialOverride } = ctx;
7
+ evaluateInvestigationTrace(ctx, "Q&A Log");
7
8
  const qaLogBody = sectionBodyByName(sections, "Q&A Log");
8
9
  const qaLogRows = qaLogBody ? getMarkdownTableRows(qaLogBody) : [];
9
10
  const qaLogOk = qaLogBody !== null && qaLogRows.length > 0;
@@ -3,7 +3,7 @@ import path from "node:path";
3
3
  import { resolveArtifactPath as resolveStageArtifactPath } from "../artifact-paths.js";
4
4
  import { exists } from "../fs-utils.js";
5
5
  import { CONFIDENCE_FINDING_REGEX_SOURCE } from "../content/skills.js";
6
- import { checkCriticPredictionsContract, evaluateLayeredDocumentReviewStatus, evaluateQaLogFloor, extractMarkdownSectionBody, getMarkdownTableRows, meaningfulLineCount, sectionBodyByName, markdownFieldRegex } from "./shared.js";
6
+ import { checkCriticPredictionsContract, evaluateInvestigationTrace, evaluateLayeredDocumentReviewStatus, evaluateQaLogFloor, extractMarkdownSectionBody, getMarkdownTableRows, meaningfulLineCount, sectionBodyByName, markdownFieldRegex } from "./shared.js";
7
7
  const DESIGN_DIAGRAM_REQUIREMENTS = {
8
8
  lightweight: [
9
9
  {
@@ -268,6 +268,7 @@ async function runStaleDiagramAudit(projectRoot, artifactPath, artifactRaw, code
268
268
  }
269
269
  export async function lintDesignStage(ctx) {
270
270
  const { projectRoot, track, raw, absFile, sections, findings, parsedFrontmatter, brainstormShortCircuitBody, brainstormShortCircuitActivated, staleDiagramAuditEnabled, isTrivialOverride, activeStageFlags } = ctx;
271
+ evaluateInvestigationTrace(ctx, "Codebase Investigation");
271
272
  const qaLogBody = sectionBodyByName(sections, "Q&A Log");
272
273
  const qaLogRows = qaLogBody ? getMarkdownTableRows(qaLogBody) : [];
273
274
  const qaLogOk = qaLogBody !== null && qaLogRows.length > 0;
@@ -1,10 +1,11 @@
1
- import { evaluateLayeredDocumentReviewStatus, headingPresent, sectionBodyByName, collectPatternHits, PLACEHOLDER_PATTERNS, extractDecisionIds, SCOPE_REDUCTION_PATTERNS } from "./shared.js";
1
+ import { evaluateInvestigationTrace, evaluateLayeredDocumentReviewStatus, headingPresent, sectionBodyByName, collectPatternHits, PLACEHOLDER_PATTERNS, extractDecisionIds, SCOPE_REDUCTION_PATTERNS } from "./shared.js";
2
2
  import { resolveArtifactPath as resolveStageArtifactPath } from "../artifact-paths.js";
3
3
  import { exists } from "../fs-utils.js";
4
4
  import { FORBIDDEN_PLACEHOLDER_TOKENS, CONFIDENCE_FINDING_REGEX_SOURCE } from "../content/skills.js";
5
5
  import fs from "node:fs/promises";
6
6
  export async function lintPlanStage(ctx) {
7
7
  const { projectRoot, track, raw, absFile, sections, findings, parsedFrontmatter, brainstormShortCircuitBody, brainstormShortCircuitActivated, staleDiagramAuditEnabled, isTrivialOverride } = ctx;
8
+ evaluateInvestigationTrace(ctx, "Implementation Units");
8
9
  const strictPlanGuards = parsedFrontmatter.hasFrontmatter ||
9
10
  headingPresent(sections, "Plan Quality Scan") ||
10
11
  headingPresent(sections, "Locked Decision Coverage");
@@ -1,7 +1,8 @@
1
- import { markdownFieldRegex, sectionBodyByName } from "./shared.js";
1
+ import { evaluateInvestigationTrace, markdownFieldRegex, sectionBodyByName } from "./shared.js";
2
2
  import { checkReviewTddNoCrossArtifactDuplication } from "./review-army.js";
3
3
  export async function lintReviewStage(ctx) {
4
4
  const { projectRoot, track, raw, absFile, sections, findings, parsedFrontmatter, brainstormShortCircuitBody, brainstormShortCircuitActivated, staleDiagramAuditEnabled, isTrivialOverride } = ctx;
5
+ evaluateInvestigationTrace(ctx, "Changed-File Coverage");
5
6
  // Universal Layer 2.7 structural checks (superpowers requesting + receiving).
6
7
  const frameBody = sectionBodyByName(sections, "Pre-Critic Self-Review");
7
8
  if (frameBody !== null) {
@@ -1,9 +1,10 @@
1
- import { checkCriticPredictionsContract, evaluateQaLogFloor, sectionBodyByHeadingPrefix, sectionBodyByName, extractCanonicalScopeMode, getMarkdownTableRows } from "./shared.js";
1
+ import { checkCriticPredictionsContract, evaluateInvestigationTrace, evaluateQaLogFloor, sectionBodyByHeadingPrefix, sectionBodyByName, extractCanonicalScopeMode, getMarkdownTableRows } from "./shared.js";
2
2
  import { readDelegationLedger, recordExpansionStrategistSkippedByTrack } from "../delegation.js";
3
3
  import { shouldDemoteArtifactValidationByTrack } from "../content/stage-schema.js";
4
4
  import { readFlowState } from "../run-persistence.js";
5
5
  export async function lintScopeStage(ctx) {
6
6
  const { projectRoot, track, raw, absFile, sections, findings, parsedFrontmatter, brainstormShortCircuitBody, brainstormShortCircuitActivated, staleDiagramAuditEnabled, isTrivialOverride, activeStageFlags, taskClass } = ctx;
7
+ evaluateInvestigationTrace(ctx, "Q&A Log");
7
8
  const lockedDecisionsBody = sectionBodyByHeadingPrefix(sections, "Locked Decisions") ?? "";
8
9
  const scopeSummaryBody = sectionBodyByName(sections, "Scope Summary") ?? "";
9
10
  const selectedScopeMode = extractCanonicalScopeMode(scopeSummaryBody);
@@ -43,11 +44,11 @@ export async function lintScopeStage(ctx) {
43
44
  const skipByTrack = shouldDemoteArtifactValidationByTrack(track, taskClass);
44
45
  if (skipByTrack) {
45
46
  findings.push({
46
- section: "Expansion Strategist Delegation",
47
+ section: "Product Discovery Delegation (Strategist Mode)",
47
48
  required: false,
48
49
  rule: "When Scope Summary selects SCOPE EXPANSION or SELECTIVE EXPANSION, a completed `product-discovery` delegation for the active run with non-empty evidenceRefs is required.",
49
50
  found: true,
50
- details: `Expansion Strategist delegation requirement skipped for track="${track}"` +
51
+ details: `Product-discovery delegation requirement skipped for track="${track}"` +
51
52
  (taskClass ? `, taskClass="${taskClass}"` : "") +
52
53
  ` (Wave 25: lite-tier escape; selectedMode=${selectedScopeMode}).`
53
54
  });
@@ -78,12 +79,12 @@ export async function lintScopeStage(ctx) {
78
79
  const hasCompleted = discoveryRows.length > 0;
79
80
  const hasEvidence = discoveryRows.some((entry) => Array.isArray(entry.evidenceRefs) && entry.evidenceRefs.length > 0);
80
81
  findings.push({
81
- section: "Expansion Strategist Delegation",
82
+ section: "Product Discovery Delegation (Strategist Mode)",
82
83
  required: true,
83
84
  rule: "When Scope Summary selects SCOPE EXPANSION or SELECTIVE EXPANSION, a completed `product-discovery` delegation for the active run with non-empty evidenceRefs is required.",
84
85
  found: hasCompleted && hasEvidence,
85
86
  details: !hasCompleted
86
- ? `Scope mode ${selectedScopeMode} requires a completed product-discovery delegation row for active run ${delegationLedger.runId}.`
87
+ ? `Scope mode ${selectedScopeMode} requires a completed product-discovery delegation row for active run ${delegationLedger.runId}. In SELECTIVE EXPANSION / SCOPE EXPANSION, run product-discovery (mode=proactive) BEFORE stage-complete.`
87
88
  : hasEvidence
88
89
  ? `product-discovery delegation satisfied for mode ${selectedScopeMode}.`
89
90
  : "product-discovery delegation exists but evidenceRefs is empty; add at least one artifact/code evidence reference."
@@ -403,6 +403,60 @@ export declare function parseLearningSeedEntry(raw: unknown, index: number): {
403
403
  error?: string;
404
404
  };
405
405
  export declare function parseLearningsSection(sectionBody: string): LearningsParseResult;
406
+ /**
407
+ * Round 5 (v6.6.0) — file-path / reference detector for the
408
+ * `investigation_path_first_missing` advisory rule.
409
+ *
410
+ * The detector is intentionally permissive: it only needs to recognize
411
+ * "the author wrote down a path or ref" — the linter does NOT validate
412
+ * the path resolves on disk. Patterns matched (any one is enough):
413
+ * - TS/JS/MD/JSON/YAML path with extension
414
+ * (`src/foo/bar.ts`, `tests/spec.test.ts`, `docs/quality-gates.md`).
415
+ * - Slash-bearing path under a known repo root prefix
416
+ * (`src/...`, `tests/...`, `docs/...`, `scripts/...`,
417
+ * `.cclaw/...`, `.cursor/...`, `node_modules/...`,
418
+ * `examples/...`, `e2e/...`).
419
+ * - GitHub-style ref (`owner/repo#123`, `org/repo@sha`,
420
+ * `path:line`, `path:line-line`).
421
+ * - Explicit `path:` / `paths:` / `ref:` / `refs:` marker.
422
+ * - Stable cclaw IDs (`R1`, `D-12`, `AC-3`, `T-4`, `S-2`, `DD-5`,
423
+ * `ADR-1`, `R-1`, `F-1`, `CR-1`, `I-1`, `QS-1`).
424
+ * - Backticked path-like token containing a slash.
425
+ *
426
+ * Exposed for unit tests (`tests/unit/investigation-trace-evaluator.test.ts`).
427
+ */
428
+ export declare const INVESTIGATION_TRACE_PATH_PATTERNS: readonly RegExp[];
429
+ export interface InvestigationTraceFinding {
430
+ ok: boolean;
431
+ details: string;
432
+ }
433
+ /**
434
+ * Internal core that does NOT depend on `StageLintContext`. Returned
435
+ * shape is consumed by `evaluateInvestigationTrace` (which pushes a
436
+ * finding into the context) and by unit tests that exercise the
437
+ * detector directly.
438
+ *
439
+ * Returns `null` for sections that are missing, empty, or contain only
440
+ * template scaffolding (table headers, separators, placeholder rows
441
+ * with empty cells, lone `- None.` lines). Callers treat `null` as
442
+ * silent — no finding is emitted.
443
+ */
444
+ export declare function checkInvestigationTrace(sectionBody: string | null): InvestigationTraceFinding | null;
445
+ /**
446
+ * Round 5 (v6.6.0) — advisory rule wired into the brainstorm / scope /
447
+ * design / tdd / plan / review linters.
448
+ *
449
+ * Behavior contract:
450
+ * - Section missing or empty / placeholder-only: silent (no finding).
451
+ * - Section has substantive content with a recognizable file path /
452
+ * ref / explicit `path:`-style marker in the first non-empty rows:
453
+ * advisory pass (no finding).
454
+ * - Section has substantive content but no path/ref signal: advisory
455
+ * FAIL finding with ruleId `investigation_path_first_missing`.
456
+ *
457
+ * The rule is `required: false` so it never blocks `stage-complete`.
458
+ */
459
+ export declare function evaluateInvestigationTrace(ctx: StageLintContext, sectionName: string): void;
406
460
  export declare function lineContainsVagueAdjective(text: string): string | null;
407
461
  export interface ParsedFrontmatter {
408
462
  hasFrontmatter: boolean;
@@ -1715,6 +1715,148 @@ export function parseLearningsSection(sectionBody) {
1715
1715
  details: `Parsed ${entries.length} learning bullet(s) as knowledge-compatible JSON entries.`
1716
1716
  };
1717
1717
  }
1718
+ /**
1719
+ * Round 5 (v6.6.0) — file-path / reference detector for the
1720
+ * `investigation_path_first_missing` advisory rule.
1721
+ *
1722
+ * The detector is intentionally permissive: it only needs to recognize
1723
+ * "the author wrote down a path or ref" — the linter does NOT validate
1724
+ * the path resolves on disk. Patterns matched (any one is enough):
1725
+ * - TS/JS/MD/JSON/YAML path with extension
1726
+ * (`src/foo/bar.ts`, `tests/spec.test.ts`, `docs/quality-gates.md`).
1727
+ * - Slash-bearing path under a known repo root prefix
1728
+ * (`src/...`, `tests/...`, `docs/...`, `scripts/...`,
1729
+ * `.cclaw/...`, `.cursor/...`, `node_modules/...`,
1730
+ * `examples/...`, `e2e/...`).
1731
+ * - GitHub-style ref (`owner/repo#123`, `org/repo@sha`,
1732
+ * `path:line`, `path:line-line`).
1733
+ * - Explicit `path:` / `paths:` / `ref:` / `refs:` marker.
1734
+ * - Stable cclaw IDs (`R1`, `D-12`, `AC-3`, `T-4`, `S-2`, `DD-5`,
1735
+ * `ADR-1`, `R-1`, `F-1`, `CR-1`, `I-1`, `QS-1`).
1736
+ * - Backticked path-like token containing a slash.
1737
+ *
1738
+ * Exposed for unit tests (`tests/unit/investigation-trace-evaluator.test.ts`).
1739
+ */
1740
+ export const INVESTIGATION_TRACE_PATH_PATTERNS = [
1741
+ /(?:^|[\s`(\[])(?:[A-Za-z0-9_.-]+\/)+[A-Za-z0-9_.-]+\.(?:ts|tsx|js|jsx|mjs|cjs|md|mdx|json|yaml|yml|toml|sh|py|rs|go|java|kt|swift|rb|css|scss|html)\b/iu,
1742
+ /(?:^|[\s`(\[])(?:src|tests?|docs?|scripts?|e2e|examples?|packages?|apps?|cmd|internal|pkg|lib|app|server|client|backend|frontend|\.cclaw|\.cursor|\.github|node_modules)\/[A-Za-z0-9_./-]+/iu,
1743
+ /\b[A-Za-z0-9_./-]+(?:\.[A-Za-z0-9]+)?:\d+(?:[-:]\d+)?\b/u,
1744
+ /\b[A-Za-z0-9_.-]+\/[A-Za-z0-9_.-]+(?:#\d+|@[0-9a-f]{6,40})\b/iu,
1745
+ /(?:^|\s)(?:paths?|refs?|file|files|cite|citation)\s*:\s*\S/iu,
1746
+ /\b(?:R|D|AC|T|S|DD|ADR|F|CR|I|QS)-?\d+\b/u,
1747
+ /`[^`]*\/[^`]+`/u
1748
+ ];
1749
+ const INVESTIGATION_TRACE_PLACEHOLDER_PATTERN = /^(?:none|none\.|n\/a|tbd|todo|fixme|placeholder|optional|fill[\s-]?in)\b/u;
1750
+ const INVESTIGATION_TRACE_ID_ONLY_CELL = /^[A-Z]{1,4}-?\d+$/u;
1751
+ function isInvestigationTracePlaceholderCell(cell) {
1752
+ const stripped = cell.replace(/[`*_>#]/gu, "").trim();
1753
+ if (stripped.length === 0)
1754
+ return true;
1755
+ if (INVESTIGATION_TRACE_PLACEHOLDER_PATTERN.test(stripped.toLowerCase()))
1756
+ return true;
1757
+ return false;
1758
+ }
1759
+ function isInvestigationTracePlaceholderProseLine(line) {
1760
+ const stripped = line.replace(/[`*_>#-]/gu, "").trim();
1761
+ if (stripped.length === 0)
1762
+ return true;
1763
+ const lower = stripped.toLowerCase();
1764
+ if (INVESTIGATION_TRACE_PLACEHOLDER_PATTERN.test(lower))
1765
+ return true;
1766
+ if (/^\(\s*(?:none|n\/a|tbd|todo|fixme|placeholder|optional|fill[\s-]?in)\b/u.test(lower)) {
1767
+ return true;
1768
+ }
1769
+ return false;
1770
+ }
1771
+ /**
1772
+ * Internal core that does NOT depend on `StageLintContext`. Returned
1773
+ * shape is consumed by `evaluateInvestigationTrace` (which pushes a
1774
+ * finding into the context) and by unit tests that exercise the
1775
+ * detector directly.
1776
+ *
1777
+ * Returns `null` for sections that are missing, empty, or contain only
1778
+ * template scaffolding (table headers, separators, placeholder rows
1779
+ * with empty cells, lone `- None.` lines). Callers treat `null` as
1780
+ * silent — no finding is emitted.
1781
+ */
1782
+ export function checkInvestigationTrace(sectionBody) {
1783
+ if (sectionBody === null)
1784
+ return null;
1785
+ const lines = sectionBody.split(/\r?\n/u);
1786
+ const candidates = [];
1787
+ for (let index = 0; index < lines.length; index += 1) {
1788
+ const raw = lines[index] ?? "";
1789
+ const trimmed = raw.trim();
1790
+ if (trimmed.length === 0)
1791
+ continue;
1792
+ if (trimmed.startsWith("<!--"))
1793
+ continue;
1794
+ const isTableLine = /^\|.*\|$/u.test(trimmed);
1795
+ if (isTableLine) {
1796
+ if (/^\|[-:| ]+\|$/u.test(trimmed))
1797
+ continue; // separator row
1798
+ const next = (lines[index + 1] ?? "").trim();
1799
+ if (/^\|[-:| ]+\|$/u.test(next))
1800
+ continue; // header row (followed by separator)
1801
+ const cells = trimmed
1802
+ .split("|")
1803
+ .slice(1, -1)
1804
+ .map((cell) => cell.trim());
1805
+ const substantive = cells.filter((cell) => !isInvestigationTracePlaceholderCell(cell));
1806
+ if (substantive.length === 0)
1807
+ continue;
1808
+ if (substantive.length === 1 && INVESTIGATION_TRACE_ID_ONLY_CELL.test(substantive[0])) {
1809
+ continue;
1810
+ }
1811
+ candidates.push(substantive.join(" "));
1812
+ continue;
1813
+ }
1814
+ if (isInvestigationTracePlaceholderProseLine(trimmed))
1815
+ continue;
1816
+ candidates.push(trimmed);
1817
+ }
1818
+ if (candidates.length === 0)
1819
+ return null;
1820
+ const sample = candidates.slice(0, Math.min(5, candidates.length));
1821
+ const detectorMatched = sample.some((line) => INVESTIGATION_TRACE_PATH_PATTERNS.some((pattern) => pattern.test(line)));
1822
+ if (detectorMatched) {
1823
+ return {
1824
+ ok: true,
1825
+ details: "Investigation trace cites file paths or refs in the first non-empty row(s)."
1826
+ };
1827
+ }
1828
+ return {
1829
+ ok: false,
1830
+ details: "Investigation trace has prose-only content in its first row(s). Pass paths and refs, not pasted file contents (e.g. `src/foo/bar.ts:42`, `D-12`, `AC-3`)."
1831
+ };
1832
+ }
1833
+ /**
1834
+ * Round 5 (v6.6.0) — advisory rule wired into the brainstorm / scope /
1835
+ * design / tdd / plan / review linters.
1836
+ *
1837
+ * Behavior contract:
1838
+ * - Section missing or empty / placeholder-only: silent (no finding).
1839
+ * - Section has substantive content with a recognizable file path /
1840
+ * ref / explicit `path:`-style marker in the first non-empty rows:
1841
+ * advisory pass (no finding).
1842
+ * - Section has substantive content but no path/ref signal: advisory
1843
+ * FAIL finding with ruleId `investigation_path_first_missing`.
1844
+ *
1845
+ * The rule is `required: false` so it never blocks `stage-complete`.
1846
+ */
1847
+ export function evaluateInvestigationTrace(ctx, sectionName) {
1848
+ const body = sectionBodyByName(ctx.sections, sectionName);
1849
+ const result = checkInvestigationTrace(body);
1850
+ if (result === null)
1851
+ return;
1852
+ ctx.findings.push({
1853
+ section: "investigation_path_first_missing",
1854
+ required: false,
1855
+ rule: `[P3] investigation_path_first_missing — \`## ${sectionName}\` should cite paths and refs in the first non-empty row(s); pass paths and refs, not content.`,
1856
+ found: result.ok,
1857
+ details: result.details
1858
+ });
1859
+ }
1718
1860
  export function lineContainsVagueAdjective(text) {
1719
1861
  const lower = text.toLowerCase();
1720
1862
  for (const adjective of VAGUE_AC_ADJECTIVES) {
@@ -1,9 +1,10 @@
1
1
  import fs from "node:fs/promises";
2
2
  import path from "node:path";
3
3
  import { readDelegationLedger } from "../delegation.js";
4
- import { sectionBodyByName } from "./shared.js";
4
+ import { evaluateInvestigationTrace, sectionBodyByName } from "./shared.js";
5
5
  export async function lintTddStage(ctx) {
6
6
  const { projectRoot, track, raw, absFile, sections, findings, parsedFrontmatter, brainstormShortCircuitBody, brainstormShortCircuitActivated, staleDiagramAuditEnabled, isTrivialOverride } = ctx;
7
+ evaluateInvestigationTrace(ctx, "Watched-RED Proof");
7
8
  // Universal Layer 2.6 structural checks (superpowers TDD + evanflow vertical slices).
8
9
  const ironLawBody = sectionBodyByName(sections, "Iron Law Acknowledgement");
9
10
  if (ironLawBody === null) {
@@ -367,7 +367,7 @@ export async function lintArtifact(projectRoot, stage, track = "standard", optio
367
367
  * - `Architecture Diagram` — sync/async + failure-edge enforcement
368
368
  * - `Data Flow` — Interaction Edge Case mandatory rows
369
369
  * - `Stale Diagram Drift Check` — blast-radius file mtime audit
370
- * - `Expansion Strategist Delegation` — product-discovery delegation
370
+ * - `Product Discovery Delegation (Strategist Mode)` — product-discovery delegation
371
371
  *
372
372
  * Findings remain in the result so the caller can surface them as
373
373
  * advisory hints; only `required` flips to `false`.
@@ -376,5 +376,5 @@ const ARTIFACT_VALIDATION_LITE_DEMOTE_SECTIONS = new Set([
376
376
  "Architecture Diagram",
377
377
  "Data Flow",
378
378
  "Stale Diagram Drift Check",
379
- "Expansion Strategist Delegation"
379
+ "Product Discovery Delegation (Strategist Mode)"
380
380
  ]);
@@ -1,4 +1,36 @@
1
1
  import type { FlowStage } from "../types.js";
2
+ /**
3
+ * Round 5 (v6.6.0) — short bad → good behavior anchor per stage.
4
+ *
5
+ * Each entry is rendered exactly once in the corresponding stage skill md
6
+ * (via `behaviorAnchorBlock` in `skills.ts`) and exactly once in the stage's
7
+ * artifact template (via `renderBehaviorAnchorTemplateLine`). Anchors are
8
+ * deliberately attached to a real artifact section name so the cross-check
9
+ * test in `tests/unit/behavior-anchors.test.ts` can verify the section
10
+ * exists in the stage's schema.
11
+ *
12
+ * Constraints enforced by the unit test:
13
+ * - Exactly one entry per FlowStage (8 total).
14
+ * - `bad` and `good` must be distinct across stages and ≤ 40 words each.
15
+ * - `section` must match a section name present in
16
+ * `stageSchema(stage).artifactRules.artifactValidation`.
17
+ */
18
+ export interface BehaviorAnchor {
19
+ stage: FlowStage;
20
+ section: string;
21
+ bad: string;
22
+ good: string;
23
+ ruleHint?: string;
24
+ }
25
+ export declare const BEHAVIOR_ANCHORS: ReadonlyArray<BehaviorAnchor>;
26
+ export declare function behaviorAnchorFor(stage: FlowStage): BehaviorAnchor | null;
27
+ /**
28
+ * Render the one-line "Behavior anchor (bad → good)" pointer used at the top
29
+ * of each artifact template (01..08). Templates carry the anchor inline so
30
+ * agents see it before they start filling sections; the prose itself lives
31
+ * only in `BEHAVIOR_ANCHORS` to avoid duplication.
32
+ */
33
+ export declare function renderBehaviorAnchorTemplateLine(stage: FlowStage): string;
2
34
  export declare function stageGoodBadExamples(stage: FlowStage): string;
3
35
  /**
4
36
  * Returns the full example artifact body for tests and internal quality checks.
@@ -1,3 +1,77 @@
1
+ export const BEHAVIOR_ANCHORS = [
2
+ {
3
+ stage: "brainstorm",
4
+ section: "Problem Decision Record",
5
+ bad: "Frame the problem broadly and quietly add a second outcome (\"and while we're at it, refresh the dashboard\") that no Q&A row sanctioned.",
6
+ good: "Name one affected user, one current failure mode, and one observable outcome; record any extra outcome as a separate row in `## Not Doing`.",
7
+ ruleHint: "Scope creep starts in framing — keep the Problem Decision Record single-target."
8
+ },
9
+ {
10
+ stage: "scope",
11
+ section: "Scope Contract",
12
+ bad: "Invent a contract from a hunch: \"I'll let the user choose 3 templates\" with no Q&A row, no user feedback citation, no upstream decision.",
13
+ good: "Cite the Q&A row or upstream decision (`brainstorm > Selected Direction`) that produced each in/out boundary; refuse to lock without that citation.",
14
+ ruleHint: "Every scope contract row must trace to a recorded user signal or carried-forward decision."
15
+ },
16
+ {
17
+ stage: "design",
18
+ section: "Codebase Investigation",
19
+ bad: "Open with \"Use a queue + worker pool\" before reading any file; the architecture choice precedes the trace and the diagram has no concrete node.",
20
+ good: "List 1-3 blast-radius files in `Codebase Investigation` with current responsibility and reuse candidate first; only then propose architecture in `ADR`.",
21
+ ruleHint: "Trace before lock — no architecture decision lands without a codebase citation."
22
+ },
23
+ {
24
+ stage: "spec",
25
+ section: "Acceptance Criteria",
26
+ bad: "AC: \"System should be fast and reliable\" — no measurable predicate, no verification approach, no design-decision ref.",
27
+ good: "AC: \"GET /feed returns ≤ 50 items in < 200 ms p95; verified via integration test `tests/feed.spec.ts` against scope `R-2`.\"",
28
+ ruleHint: "Every AC carries an observable predicate plus the exact evidence command or path that proves it."
29
+ },
30
+ {
31
+ stage: "plan",
32
+ section: "Execution Posture",
33
+ bad: "Posture: \"parallel-safe\" with three units that all edit the same `src/api/router.ts`; no shared interface contract, no boundary map.",
34
+ good: "Posture: \"parallel-safe\" only when each Implementation Unit owns disjoint files and the shared types live in one cited interface contract entry.",
35
+ ruleHint: "Parallelization needs disjoint units AND a single shared interface contract — claim otherwise and the next batch deadlocks."
36
+ },
37
+ {
38
+ stage: "tdd",
39
+ section: "RED Evidence",
40
+ bad: "RED: `expect(true).toBe(true)` then \"failing test observed\" — the assertion can never have caught the bug it claims to prove.",
41
+ good: "RED: `expect(api.fetchFeed()).rejects.toThrow(AuthError)`; the failure output names the missing guard and ties to AC-3.",
42
+ ruleHint: "Mental mutation test: name a plausible bug that would still pass the assertion. If you can, the assertion is too coarse."
43
+ },
44
+ {
45
+ stage: "review",
46
+ section: "Layer 2 Findings",
47
+ bad: "Slip in a rename of `userSvc` → `userService` and a folder reorg under \"Layer 2: cleanup\"; no acceptance criterion or finding ID demanded the change.",
48
+ good: "Findings name observed defects with `file:line`; refactors land as a separate slice with their own RED/GREEN, not bundled into the review pass.",
49
+ ruleHint: "Review surfaces findings; it does not refactor. Drive-by edits go back through TDD."
50
+ },
51
+ {
52
+ stage: "ship",
53
+ section: "Preflight Results",
54
+ bad: "Preflight: \"Looks good, tests passed last night\"; no fresh command output, no commit SHA, no exit code.",
55
+ good: "Preflight: paste the command, the exit code, and the commit SHA from this turn; if the suite was not re-run after the last edit, mark BLOCKED.",
56
+ ruleHint: "Victory-by-confidence is not a preflight. Re-run, capture, cite SHA — or stay BLOCKED."
57
+ }
58
+ ];
59
+ const BEHAVIOR_ANCHOR_BY_STAGE = new Map(BEHAVIOR_ANCHORS.map((entry) => [entry.stage, entry]));
60
+ export function behaviorAnchorFor(stage) {
61
+ return BEHAVIOR_ANCHOR_BY_STAGE.get(stage) ?? null;
62
+ }
63
+ /**
64
+ * Render the one-line "Behavior anchor (bad → good)" pointer used at the top
65
+ * of each artifact template (01..08). Templates carry the anchor inline so
66
+ * agents see it before they start filling sections; the prose itself lives
67
+ * only in `BEHAVIOR_ANCHORS` to avoid duplication.
68
+ */
69
+ export function renderBehaviorAnchorTemplateLine(stage) {
70
+ const anchor = behaviorAnchorFor(stage);
71
+ if (!anchor)
72
+ return "";
73
+ return `> Behavior anchor (bad -> good) — ${anchor.section}: bad: ${anchor.bad} good: ${anchor.good}`;
74
+ }
1
75
  const STAGE_EXAMPLES = {
2
76
  brainstorm: `## Context
3
77
 
@@ -380,6 +380,47 @@ function buildRow(args, status, runId, now) {
380
380
  };
381
381
  }
382
382
 
383
+ async function acquireDelegationLogLock(stateDir) {
384
+ const lockDir = path.join(stateDir, "delegation-log.json.lock");
385
+ const maxWaitMs = 3000;
386
+ const startMs = Date.now();
387
+ let delayMs = 25;
388
+ while (true) {
389
+ try {
390
+ await fs.mkdir(lockDir, { recursive: false });
391
+ return lockDir;
392
+ } catch (err) {
393
+ const code = err && typeof err === "object" && "code" in err ? err.code : "";
394
+ if (code !== "EEXIST") throw err;
395
+ if (Date.now() - startMs >= maxWaitMs) {
396
+ process.stderr.write(
397
+ "[cclaw] delegation-record: timeout waiting for delegation-log.json.lock (max " + maxWaitMs + "ms)\\n"
398
+ );
399
+ process.exit(2);
400
+ }
401
+ const jitter = Math.floor(Math.random() * 25);
402
+ await new Promise((resolve) => setTimeout(resolve, delayMs + jitter));
403
+ delayMs = Math.min(delayMs * 2, 200);
404
+ }
405
+ }
406
+ }
407
+
408
+ async function releaseDelegationLogLock(lockDir) {
409
+ try {
410
+ await fs.rm(lockDir, { recursive: true, force: true });
411
+ } catch {
412
+ // best-effort release
413
+ }
414
+ }
415
+
416
+ async function writeDelegationLedgerAtomic(ledgerPath, ledger) {
417
+ const dir = path.dirname(ledgerPath);
418
+ const tmp =
419
+ path.join(dir, ".delegation-log.json." + process.pid + "." + Date.now() + "." + Math.random().toString(16).slice(2) + ".tmp");
420
+ await fs.writeFile(tmp, JSON.stringify(ledger, null, 2) + "\\n", { encoding: "utf8", mode: 0o600 });
421
+ await fs.rename(tmp, ledgerPath);
422
+ }
423
+
383
424
  async function persistEntry(root, runId, clean, event, options = {}) {
384
425
  const stateDir = path.join(root, RUNTIME_ROOT, "state");
385
426
  await fs.mkdir(stateDir, { recursive: true });
@@ -387,29 +428,34 @@ async function persistEntry(root, runId, clean, event, options = {}) {
387
428
 
388
429
  const ledgerPath = path.join(stateDir, "delegation-log.json");
389
430
  let ledger = { runId, entries: [], schemaVersion: LEDGER_SCHEMA_VERSION };
431
+ const lockDir = await acquireDelegationLogLock(stateDir);
390
432
  try {
391
- ledger = JSON.parse(await fs.readFile(ledgerPath, "utf8"));
392
- if (!Array.isArray(ledger.entries)) ledger.entries = [];
393
- } catch {
394
- ledger = { runId, entries: [], schemaVersion: LEDGER_SCHEMA_VERSION };
395
- }
396
-
397
- // Rerecord semantics: replace any pre-existing row with the same spanId
398
- // (regardless of its status) so the legacy v1/v2 row is upgraded to v3
399
- // shape on disk. The append path keeps the historical dedup semantics:
400
- // an exact (spanId, status) duplicate is dropped to keep retried hooks
401
- // idempotent.
402
- if (options.replaceBySpanId) {
403
- ledger.entries = ledger.entries.filter((entry) => entry.spanId !== clean.spanId);
404
- ledger.entries.push(clean);
405
- ledger.runId = runId;
406
- ledger.schemaVersion = LEDGER_SCHEMA_VERSION;
407
- await fs.writeFile(ledgerPath, JSON.stringify(ledger, null, 2) + "\\n", { encoding: "utf8", mode: 0o600 });
408
- } else if (!ledger.entries.some((entry) => entry.spanId === clean.spanId && entry.status === clean.status)) {
409
- ledger.entries.push(clean);
410
- ledger.runId = runId;
411
- ledger.schemaVersion = LEDGER_SCHEMA_VERSION;
412
- await fs.writeFile(ledgerPath, JSON.stringify(ledger, null, 2) + "\\n", { encoding: "utf8", mode: 0o600 });
433
+ try {
434
+ ledger = JSON.parse(await fs.readFile(ledgerPath, "utf8"));
435
+ if (!Array.isArray(ledger.entries)) ledger.entries = [];
436
+ } catch {
437
+ ledger = { runId, entries: [], schemaVersion: LEDGER_SCHEMA_VERSION };
438
+ }
439
+
440
+ // Rerecord semantics: replace any pre-existing row with the same spanId
441
+ // (regardless of its status) so the legacy v1/v2 row is upgraded to v3
442
+ // shape on disk. The append path keeps the historical dedup semantics:
443
+ // an exact (spanId, status) duplicate is dropped to keep retried hooks
444
+ // idempotent.
445
+ if (options.replaceBySpanId) {
446
+ ledger.entries = ledger.entries.filter((entry) => entry.spanId !== clean.spanId);
447
+ ledger.entries.push(clean);
448
+ ledger.runId = runId;
449
+ ledger.schemaVersion = LEDGER_SCHEMA_VERSION;
450
+ await writeDelegationLedgerAtomic(ledgerPath, ledger);
451
+ } else if (!ledger.entries.some((entry) => entry.spanId === clean.spanId && entry.status === clean.status)) {
452
+ ledger.entries.push(clean);
453
+ ledger.runId = runId;
454
+ ledger.schemaVersion = LEDGER_SCHEMA_VERSION;
455
+ await writeDelegationLedgerAtomic(ledgerPath, ledger);
456
+ }
457
+ } finally {
458
+ await releaseDelegationLogLock(lockDir);
413
459
  }
414
460
 
415
461
  const active = ledger.entries.filter((entry) => ["scheduled", "launched", "acknowledged"].includes(entry.status));
@@ -8,6 +8,16 @@ export declare function outsideVoiceSlotBlock(): string;
8
8
  export declare function antiSycophancyBlock(): string;
9
9
  export declare function noPlaceholdersBlock(): string;
10
10
  export declare function watchedFailProofBlock(): string;
11
+ /**
12
+ * Stages that perform real investigation work. The shared
13
+ * `INVESTIGATION_DISCIPLINE_BLOCK` is rendered once per stage skill in this
14
+ * set so the search → graph → narrow-read → draft ladder appears verbatim
15
+ * across the elicitation/spec/plan/tdd/review pipeline. `ship` is excluded:
16
+ * it consumes the upstream trace rather than producing one.
17
+ */
18
+ export declare const INVESTIGATION_DISCIPLINE_STAGES: ReadonlySet<FlowStage>;
19
+ export declare function investigationDisciplineBlock(): string;
20
+ export declare function behaviorAnchorBlock(stage: FlowStage): string;
11
21
  export declare function stageSkillFolder(stage: FlowStage): string;
12
22
  export declare function stageSkillMarkdown(stage: FlowStage, track?: FlowTrack): string;
13
23
  export declare function executingWavesSkillMarkdown(): string;
@@ -1,7 +1,8 @@
1
1
  import { RUNTIME_ROOT, STAGE_TO_SKILL_FOLDER } from "../constants.js";
2
2
  import { nextStage as nextStageForTrack } from "../flow-state.js";
3
3
  import { FLOW_STAGES } from "../types.js";
4
- import { stageExamples } from "./examples.js";
4
+ import { behaviorAnchorFor, stageExamples } from "./examples.js";
5
+ import { INVESTIGATION_DISCIPLINE_BLOCK } from "./templates.js";
5
6
  import { reviewStackAwareRoutes, reviewStackAwareRoutingSummary, stageAutoSubagentDispatch, stageSchema, stageTrackRenderContext } from "./stage-schema.js";
6
7
  import { referencePatternsForStage } from "./reference-patterns.js";
7
8
  import { harnessDelegationRecipes } from "../harness-adapters.js";
@@ -104,6 +105,40 @@ Any "the failure is real" claim (failing test, broken build, regression catch, d
104
105
  For TDD specifically, this is the watched-RED proof and is required per new test before \`stage-complete\` accepts the stage.
105
106
  `;
106
107
  }
108
+ /**
109
+ * Stages that perform real investigation work. The shared
110
+ * `INVESTIGATION_DISCIPLINE_BLOCK` is rendered once per stage skill in this
111
+ * set so the search → graph → narrow-read → draft ladder appears verbatim
112
+ * across the elicitation/spec/plan/tdd/review pipeline. `ship` is excluded:
113
+ * it consumes the upstream trace rather than producing one.
114
+ */
115
+ export const INVESTIGATION_DISCIPLINE_STAGES = new Set([
116
+ "brainstorm",
117
+ "scope",
118
+ "design",
119
+ "spec",
120
+ "plan",
121
+ "tdd",
122
+ "review"
123
+ ]);
124
+ export function investigationDisciplineBlock() {
125
+ return INVESTIGATION_DISCIPLINE_BLOCK;
126
+ }
127
+ export function behaviorAnchorBlock(stage) {
128
+ const anchor = behaviorAnchorFor(stage);
129
+ if (!anchor)
130
+ return "";
131
+ const ruleHint = anchor.ruleHint && anchor.ruleHint.trim().length > 0
132
+ ? `\n\nRule hint: ${anchor.ruleHint.trim()}`
133
+ : "";
134
+ return `## Behavior anchor
135
+
136
+ Anchored to artifact section: \`${anchor.section}\`.
137
+
138
+ - Bad: ${anchor.bad}
139
+ - Good: ${anchor.good}${ruleHint}
140
+ `;
141
+ }
107
142
  function crossCuttingMechanicsBlock(stage) {
108
143
  // All stages share the universal mechanics, but each stage's matching
109
144
  // linter rules decide what is mandatory vs. structural-only.
@@ -117,6 +152,13 @@ function crossCuttingMechanicsBlock(stage) {
117
152
  if (stage === "tdd" || stage === "review" || stage === "ship") {
118
153
  blocks.push(watchedFailProofBlock());
119
154
  }
155
+ if (INVESTIGATION_DISCIPLINE_STAGES.has(stage)) {
156
+ blocks.push(investigationDisciplineBlock());
157
+ }
158
+ const anchor = behaviorAnchorBlock(stage);
159
+ if (anchor.length > 0) {
160
+ blocks.push(anchor);
161
+ }
120
162
  return blocks.join("\n");
121
163
  }
122
164
  function whenNotToUseBlock(items) {
@@ -192,7 +234,7 @@ function autoSubagentDispatchBlock(stage, track) {
192
234
  |---|---|---|---|---|---|---|---|
193
235
  ${rows}
194
236
  Mandatory: ${mandatoryList}. Record lifecycle rows in \`${delegationLogRel}\` and append-only \`${delegationEventsRel}\` before completion.${runPhaseLegend}
195
- ### Harness Dispatch Contract — use true harness dispatch: Claude Task, Cursor generic dispatch, OpenCode \`.opencode/agents/<agent>.md\` via Task/@agent, Codex \`.codex/agents/<agent>.toml\`. Do not collapse OpenCode or Codex to role-switch by default. Worker ACK Contract: ACK must include \`spanId\`, \`dispatchId\`, \`dispatchSurface\`, \`agentDefinitionPath\`, and \`ackTs\`; never claim \`fulfillmentMode: "isolated"\` without matching lifecycle proof. Helper: \`.cclaw/hooks/delegation-record.mjs --status=<status> --span-id=<spanId> --dispatch-id=<dispatchId> --dispatch-surface=<surface> --agent-definition-path=<path> --json\`. Exact recipe: scheduled -> launched -> acknowledged -> completed with the same span; completed isolated/generic rows require a prior ACK event for that span or \`--ack-ts=<iso>\`.
237
+ ### Harness Dispatch Contract — use true harness dispatch: Claude Task, Cursor generic dispatch, OpenCode \`.opencode/agents/<agent>.md\` via Task/@agent, Codex \`.codex/agents/<agent>.toml\`. Do not collapse OpenCode or Codex to role-switch by default. Worker ACK Contract: ACK must include \`spanId\`, \`dispatchId\`, \`dispatchSurface\`, \`agentDefinitionPath\`, and \`ackTs\`; never claim \`fulfillmentMode: "isolated"\` without matching lifecycle proof. Canonical helper (same flags as \`delegation-record.mjs --help\`): \`node .cclaw/hooks/delegation-record.mjs --stage=<stage> --agent=<agent> --mode=<mandatory|proactive> --status=<scheduled|launched|acknowledged|completed|...> --span-id=<id> --dispatch-id=<id> --dispatch-surface=<surface> --agent-definition-path=<path> [--ack-ts=<iso>] [--evidence-ref=<ref>] --json\`. Lifecycle order: \`scheduled launched acknowledged completed\` on one span (reuse the same span id); completed isolated/generic rows require a prior ACK event for that span or \`--ack-ts=<iso>\`. For a partial audit trail, \`--repair --span-id=<id> --repair-reason="<why>"\` appends missing phases (see \`--help\`) instead of inventing shortcuts.
196
238
 
197
239
  ${perHarnessLifecycleRecipeBlock()}`;
198
240
  }
@@ -390,7 +432,7 @@ function completionParametersBlock(schema, track) {
390
432
  - If you edit any completed-stage artifact after it shipped (\`completedStageMeta\` timestamps exist), append a short \`## Amendments\` section with dated bullets (timestamp + reason) instead of overwriting the archived narrative silently — advisory linter rule \`stage_artifact_post_closure_mutation\` enforces visibility when this trail is missing.
391
433
  - Record mandatory delegation lifecycle in \`${RUNTIME_ROOT}/state/delegation-log.json\` and append proof events to \`${RUNTIME_ROOT}/state/delegation-events.jsonl\`; the ledger is current state, the event log is audit proof.${mandatoryAgents.length > 0 ? ` If a mandatory delegation cannot run in this harness, use \`--waive-delegation=${mandatoryAgents.join(",")} --waiver-reason="<why safe>"\` on the completion helper.` : ""} If proactive delegations were intentionally skipped, rerun only with \`--accept-proactive-waiver\` (optionally \`--accept-proactive-waiver-reason="<why safe>"\`) after explicit user approval.
392
434
  - Never edit raw \`flow-state.json\` to complete a stage, even in advisory mode; that bypasses validation, gate evidence, and Learnings harvest. If a helper fails, report a one-line human-readable failure plus fenced JSON diagnostics; never echo the invoking command line or apply a manual state workaround.
393
- - Stage completion claim requires \`stage-complete\` exit 0 in the current turn. Quote the success line; do not paraphrase, do not infer success from skipped retries.
435
+ - Stage completion claim requires \`stage-complete\` exit 0 in the current turn. Quote the single-line success JSON exactly as printed to stdout (for example \`{"ok":true,"command":"stage-complete",...}\` including \`completedStages\` / \`currentStage\` / \`runId\`); do not paraphrase. Do not infer success from empty stdout or from skipped retries (quiet mode always emits one JSON line on success).
394
436
  - Completion protocol: verify required gates, update the artifact, then use the completion helper with \`--evidence-json\` and \`--passed\` for every satisfied gate.
395
437
  `;
396
438
  }
@@ -412,7 +454,7 @@ ${completionBlock}
412
454
  - **NEVER paste the \`--evidence-json\` payload into chat.** It is structured data for the helper, not for the user. The same evidence already lives in the artifact section.
413
455
  - On failure, report a compact human-readable summary based on the helper's JSON \`findings\` array — list failing section names only (one line each), include the full helper JSON in a single fenced \`json\` block. Do not echo the invoking command.
414
456
  - **NEVER run shell hash commands** (\`shasum\`, \`sha256sum\`, \`md5sum\`, \`Get-FileHash\`, \`certutil\`, etc.) for hash compute. If the linter ever asks for a hash, that is a linter bug — report failure and stop, do not auto-fix in bash.
415
- - The helper defaults to quiet success (\`CCLAW_STAGE_COMPLETE_QUIET=1\`); rely on the resulting JSON, not stdout chatter.
457
+ - The helper defaults to quiet (\`CCLAW_STAGE_COMPLETE_QUIET=1\`): no pretty-printed chatter, but **stdout still prints exactly one line** of machine-readable success JSON (same contract as \`start-flow\` in quiet mode).
416
458
  `;
417
459
  }
418
460
  function quickStartBlock(stage, track) {
@@ -69,7 +69,9 @@ export const BRAINSTORM = {
69
69
  "For simple low-risk greenfield work, present a compact A/B choice with one recommended path and one higher-upside challenger; keep the artifact concise but structurally complete (Context, Premise, How Might We, Sharpening Questions, Approaches, Reaction, Selected Direction, Not Doing).",
70
70
  "Show approaches before the recommendation; include a higher-upside challenger and gather reaction first.",
71
71
  "Self-review before approval: re-read the artifact, fix contradictions/placeholders/weak trade-offs, then ask for approval. Do not ask for approval on a draft you have not re-read.",
72
- "State exactly what is being approved, then **STOP** until the user explicitly approves the artifact."
72
+ "State exactly what is being approved, then **STOP** until the user explicitly approves the artifact.",
73
+ "Investigation discipline: follow the shared `## Investigation Discipline` block (search -> graph/impact -> narrow read of 1-3 files -> draft) before any drafting or delegation; pass repo-relative paths and refs (never file bodies) in delegations.",
74
+ "Behavior anchor: see the shared `## Behavior anchor` block in this skill — the bad/good pair anchors how this stage's `Problem Decision Record` must be filled."
73
75
  ],
74
76
  process: [
75
77
  "Explore project context and align work to the run's discovery mode (lean / guided / deep).",
@@ -71,7 +71,9 @@ export const DESIGN = {
71
71
  "Classify ambiguity before acting. Only non-critical preference/default assumptions may continue; STOP on uncertainty about scope, architecture, security, data loss, public API, migration, auth/pricing, or required user approval. Design hypotheses must name validation path, rollback trigger, and owner before they can be carried forward.",
72
72
  "Before final approval, run the critic pass, reconcile material findings, and bound retries with the review-loop policy.",
73
73
  "For baseline approval, present the full design plus exact spec handoff and **STOP** until explicit approval.",
74
- "**STOP BEFORE ADVANCE.** Mandatory delegation `planner` runs **AFTER user approval of the design lock**, not before Q&A. Sequence is: Q&A loop -> draft design lock -> user approval -> `planner` delegation -> `stage-complete`. Legal fulfillment modes for `planner`: (a) **harness-native Task tool** — run the delegation, then record via `node .cclaw/hooks/delegation-record.mjs --stage=design --agent=planner --mode=mandatory --status=completed --span-id=<uuid> --dispatch-surface=cursor-task --agent-definition-path=<agent-md-path> --evidence-ref=<artifact#section>`; (b) **role-switch** — write planner output into the design artifact, then record with `--dispatch-surface=role-switch`; (c) **cclaw subagent helper** with `--dispatch-surface=isolated`. Run `node .cclaw/hooks/stage-complete.mjs design` from the tool layer (do not paste the command into chat); report only the resulting summary."
74
+ "**STOP BEFORE ADVANCE.** Mandatory delegation `planner` runs **AFTER user approval of the design lock**, not before Q&A. Sequence is: Q&A loop -> draft design lock -> user approval -> `planner` delegation -> `stage-complete`. Legal fulfillment modes for `planner`: (a) **harness-native Task tool** — run the delegation, then record via `node .cclaw/hooks/delegation-record.mjs --stage=design --agent=planner --mode=mandatory --status=completed --span-id=<uuid> --dispatch-surface=cursor-task --agent-definition-path=<agent-md-path> --evidence-ref=<artifact#section>`; (b) **role-switch** — write planner output into the design artifact, then record with `--dispatch-surface=role-switch`; (c) **cclaw subagent helper** with `--dispatch-surface=isolated`. Run `node .cclaw/hooks/stage-complete.mjs design` from the tool layer (do not paste the command into chat); report only the resulting summary.",
75
+ "Investigation discipline: follow the shared `## Investigation Discipline` block before drafting architecture — populate `Codebase Investigation` from a search/graph trace and pass paths/refs (never file bodies) to investigator/critic delegations.",
76
+ "Behavior anchor: see the shared `## Behavior anchor` block in this skill — the bad/good pair anchors how `Codebase Investigation` must precede any ADR commitment."
75
77
  ],
76
78
  process: [
77
79
  "Read upstream artifacts and current design docs.",
@@ -61,7 +61,9 @@ export const PLAN = {
61
61
  "Preserve locked scope boundaries: no silent scope reduction language in task rows.",
62
62
  "Enforce WAIT_FOR_CONFIRM: present the plan summary with options (A) Approve / (B) Revise / (C) Reject.",
63
63
  "**STOP.** Do NOT proceed until user explicitly approves.",
64
- "**STOP BEFORE ADVANCE.** Mandatory delegation `planner` must be marked completed or explicitly waived in `.cclaw/state/delegation-log.json`. Then close the stage via `node .cclaw/hooks/stage-complete.mjs plan` and tell the user to run `/cc`."
64
+ "**STOP BEFORE ADVANCE.** Mandatory delegation `planner` must be marked completed or explicitly waived in `.cclaw/state/delegation-log.json`. Then close the stage via `node .cclaw/hooks/stage-complete.mjs plan` and tell the user to run `/cc`.",
65
+ "Investigation discipline: follow the shared `## Investigation Discipline` block — when defining `Implementation Units`, list cited paths in the `Files` and `Patterns to follow` rows instead of pasting code into chat or delegations.",
66
+ "Behavior anchor: see the shared `## Behavior anchor` block in this skill — the bad/good pair anchors how `Execution Posture` may only claim parallel-safe with disjoint units and a cited interface contract."
65
67
  ],
66
68
  process: [
67
69
  "Build dependency graph and ordered slices.",
@@ -58,7 +58,9 @@ export const REVIEW = {
58
58
  "Resolve all critical blockers before ship. If verdict is BLOCKED, do not pass `review_criticals_resolved`; pass only the remediation route gate `review_verdict_blocked` when routing back to TDD.",
59
59
  "When verdict is BLOCKED, do not end with a passive stop: explicitly route remediation to TDD via `ROUTE_BACK_TO_TDD`, point to `npx cclaw-cli internal rewind tdd` with the blocking IDs, and tell the operator to ack the stale TDD marker only after rework is complete.",
60
60
  structuredAskSingleChoiceInstruction("final verdict", "verdict (APPROVED / APPROVED_WITH_CONCERNS / BLOCKED)"),
61
- "**STOP.** Do NOT proceed to ship until the user provides an explicit verdict."
61
+ "**STOP.** Do NOT proceed to ship until the user provides an explicit verdict.",
62
+ "Investigation discipline: follow the shared `## Investigation Discipline` block — `Changed-File Coverage` and Layer 2 findings cite `path:line`; delegate `reviewer`/`security-reviewer` with paths and refs, never with pasted file contents.",
63
+ "Behavior anchor: see the shared `## Behavior anchor` block in this skill — the bad/good pair anchors that `Layer 2 Findings` surface defects, not drive-by refactors."
62
64
  ],
63
65
  process: [
64
66
  "Layer 1: check acceptance criteria and requirement coverage.",
@@ -52,6 +52,7 @@ export const SCOPE = {
52
52
  "**Premise carry-forward (do NOT re-author)** — brainstorm OWNS the premise check (right problem / direct path / what if nothing). Cite brainstorm's `## Premise Check` section in `## Upstream Handoff > Decisions carried forward`. Add a row to `## Premise Drift` only when the scope-stage Q&A surfaced NEW evidence that materially changes the brainstorm answer (e.g. new constraint, new user signal). Otherwise mark `Premise Drift: None` — do not duplicate the brainstorm premise table.",
53
53
  "**Conditional 10-star boundary** — for deep/high-risk/product-strategy work, show what would make the product meaningfully better, then explicitly choose what ships now, what is deferred, and what is excluded without vague `later/for now` placeholders. Skip this for straightforward repair work and record `not needed: compact scope`.",
54
54
  "**Pick one operational mode with the user** — HOLD SCOPE preserves focus; SELECTIVE EXPANSION cherry-picks high-leverage reference ideas; SCOPE EXPANSION explores ambitious alternatives; SCOPE REDUCTION cuts to the essential wedge. Recommend one, state why and what signal would change it, then keep elicitation focused until the user either approves or asks to proceed with draft boundaries.",
55
+ "**Product-discovery is REQUIRED for SELECTIVE / SCOPE EXPANSION (hard gate)** — If the resolved scope mode is SELECTIVE EXPANSION or SCOPE EXPANSION, run \`product-discovery\` in proactive mode **after** adaptive elicitation converges and **before** \`stage-complete\`. Do not complete this stage until the delegation ledger shows \`product-discovery\` as \`completed\` with non-empty \`evidenceRefs\` pointing at this scope artifact. HOLD SCOPE and SCOPE REDUCTION do not require this row.",
55
56
  "**Run mode-specific analysis only to needed depth** — lean discovery keeps the selected-mode row compact; guided adds the standard contract rows; deep may add Landscape Check, Taste Calibration, Reference Pattern Registry, Reference Pull, Ambitious Alternatives, and Ruthless Minimum Slice evidence when mode/risk warrants it.",
56
57
  "**Decision-driver contract** — list weighted decision drivers (value, risk, reversibility, effort, timeline) and score candidate scope moves so the selected mode and boundaries are evidence-backed, not preference-led.",
57
58
  "**Architecture handoff (do NOT pick architecture tier here)** — design OWNS architecture choice (minimum-viable / product-grade / ideal). Scope only picks the SCOPE MODE (HOLD/SELECTIVE/EXPAND/REDUCE) and boundary; record in `## Scope Contract > Design handoff` what design must decide (e.g. `architecture-tier`, `framework`, `data-model`). Do NOT enumerate Implementation Alternatives in scope.",
@@ -73,7 +74,9 @@ export const SCOPE = {
73
74
  "If the user says no but cannot name the change, offer concrete moves: keep scope, add one obvious adjacent capability, reduce to wedge, or re-open stack/product direction.",
74
75
  "Before final approval, record outside-voice findings and a `## Scope Outside Voice Loop` table per the Scope Outside Voice Loop policy above.",
75
76
  "**STOP.** Wait for explicit user approval of the scope mode and scope contract before writing final approval language or advancing.",
76
- "**STOP BEFORE ADVANCE.** Mandatory delegation `planner` runs **AFTER user approval of the scope contract**, not before Q&A. Sequence is: Q&A loop -> propose contract -> user approval -> `planner` delegation -> `stage-complete`. If you delegate `planner` before the Q&A loop converges, you violate the elicitation contract and the linter will block stage-complete via `qa_log_unconverged`. Legal fulfillment modes for `planner`: (a) **harness-native Task tool** — run the delegation, then record the lifecycle row via `node .cclaw/hooks/delegation-record.mjs --stage=scope --agent=planner --mode=mandatory --status=completed --span-id=<uuid> --dispatch-surface=cursor-task --agent-definition-path=<agent-md-path> --evidence-ref=<artifact#section>` (the helper sets `fulfillmentMode: \"generic-dispatch\"` automatically); (b) **role-switch** — announce `## cclaw role-switch: scope/planner (mandatory)`, write the planner output/evidence into the scope artifact, then record the row with `--dispatch-surface=role-switch --agent-definition-path=<artifact-anchor>` (helper sets `fulfillmentMode: \"role-switch\"` automatically); (c) **cclaw subagent helper** if available, with `--dispatch-surface=isolated`. Run `node .cclaw/hooks/stage-complete.mjs scope` from the tool layer (do not paste the command into chat); report only the resulting summary."
77
+ "**STOP BEFORE ADVANCE.** Mandatory delegation `planner` runs **AFTER user approval of the scope contract**, not before Q&A. Sequence is: Q&A loop -> propose contract -> user approval -> `planner` delegation -> `stage-complete`. If you delegate `planner` before the Q&A loop converges, you violate the elicitation contract and the linter will block stage-complete via `qa_log_unconverged`. Legal fulfillment modes for `planner`: (a) **harness-native Task tool** — run the delegation, then record the lifecycle row via `node .cclaw/hooks/delegation-record.mjs --stage=scope --agent=planner --mode=mandatory --status=completed --span-id=<uuid> --dispatch-surface=cursor-task --agent-definition-path=<agent-md-path> --evidence-ref=<artifact#section>` (the helper sets `fulfillmentMode: \"generic-dispatch\"` automatically); (b) **role-switch** — announce `## cclaw role-switch: scope/planner (mandatory)`, write the planner output/evidence into the scope artifact, then record the row with `--dispatch-surface=role-switch --agent-definition-path=<artifact-anchor>` (helper sets `fulfillmentMode: \"role-switch\"` automatically); (c) **cclaw subagent helper** if available, with `--dispatch-surface=isolated`. Run `node .cclaw/hooks/stage-complete.mjs scope` from the tool layer (do not paste the command into chat); report only the resulting summary.",
78
+ "Investigation discipline: follow the shared `## Investigation Discipline` block (search -> graph/impact -> narrow read of 1-3 files -> draft); pass repo-relative paths and refs to any delegated planner/critic instead of pasting upstream content.",
79
+ "Behavior anchor: see the shared `## Behavior anchor` block in this skill — the bad/good pair anchors how this stage's `Scope Contract` must trace each row to a recorded user signal."
77
80
  ],
78
81
  process: [
79
82
  "Run pre-scope system audit (git log/diff/stash + debt-marker scan) — scope OWNS the repo audit; design will only diff the blast radius since this scope baseline.",
@@ -59,7 +59,8 @@ export const SHIP = {
59
59
  "Document release notes and rollback plan explicitly.",
60
60
  decisionProtocolInstruction("finalization mode", "present modes as labeled options (A/B/C/D/E) with consequences, and mark one as (recommended)", "recommend the mode that best addresses release blast-radius, rollback readiness, observability, and stakeholder communication — ties go to the most reversible option"),
61
61
  "Do not proceed if critical blockers remain from review.",
62
- "**STOP.** Present finalization options and wait for user selection before executing any finalization action."
62
+ "**STOP.** Present finalization options and wait for user selection before executing any finalization action.",
63
+ "Behavior anchor: see the shared `## Behavior anchor` block in this skill — the bad/good pair anchors that `Preflight Results` cite fresh command output, exit codes, and the commit SHA from this turn."
63
64
  ],
64
65
  process: [
65
66
  "Validate review and test gates.",
@@ -54,7 +54,9 @@ export const SPEC = {
54
54
  "**Chunk acceptance criteria for review.** When presenting the spec to the user for sign-off, deliver acceptance criteria in batches of 3-5 and **pause for explicit ACK** (via Decision Protocol) before sending the next batch. Do not dump the full criteria wall in one message — small batches surface objections earlier and keep the sign-off meaningful. Full spec writeup still lands in `04-spec.md`, but the conversation itself must be digestible.",
55
55
  "Require user confirmation on the written spec. **STOP.** Do NOT proceed to plan until user approves.",
56
56
  "For each criterion, ask: what exact evidence proves this passed? If the evidence or verification command/manual step is vague, rewrite.",
57
- "When encountering ambiguity, classify it before acting: (A) ask user for missing info, (B) enumerate non-critical interpretations and pick one with justification, (C) propose hypothesis with validation path. Do NOT silently resolve ambiguity. STOP on scope, architecture, security, data loss, public API, migration, auth/pricing, or user-approval uncertainty."
57
+ "When encountering ambiguity, classify it before acting: (A) ask user for missing info, (B) enumerate non-critical interpretations and pick one with justification, (C) propose hypothesis with validation path. Do NOT silently resolve ambiguity. STOP on scope, architecture, security, data loss, public API, migration, auth/pricing, or user-approval uncertainty.",
58
+ "Investigation discipline: follow the shared `## Investigation Discipline` block — derive ACs from cited upstream paths/refs (`02-scope.md#R-2`, `03-design.md#DD-1`) instead of pasting their bodies into delegation prompts.",
59
+ "Behavior anchor: see the shared `## Behavior anchor` block in this skill — the bad/good pair anchors how each `Acceptance Criteria` row must carry an observable predicate plus the evidence path."
58
60
  ],
59
61
  process: [
60
62
  "Define measurable acceptance criteria.",
@@ -71,7 +71,9 @@ export const TDD = {
71
71
  "Use incremental RED/GREEN/REFACTOR commits when the repository workflow and working tree make that appropriate; otherwise record the checkpoint boundaries in the artifact.",
72
72
  "Stop if regressions appear and fix before proceeding.",
73
73
  "If a test passes unexpectedly, investigate: does the behavior already exist, or is the test wrong?",
74
- "**Per-Slice Review point (conditional).** Check every slice against the triggers before declaring it DONE. Triggers: `touchCount >= filesChangedThreshold`, any `touchPaths` match a `touchTriggers` glob, or the plan row declares `highRisk: true`. On a trigger, run two passes on the slice alone — (1) Spec-Compliance: trace RED/GREEN/REFACTOR evidence back to its plan task + spec criterion, noting edge cases the tests skip; (2) Quality: diff-scan for naming, error handling, dead code, simpler alternatives. Record both under `## Per-Slice Review` in `06-tdd.md`, naming the trigger that fired. Dispatch the `reviewer` subagent natively when available (log `fulfillmentMode: \"isolated\"`); otherwise fulfil via in-session role switch (`fulfillmentMode: \"role-switch\"`). Never fabricate an isolated pass from memory."
74
+ "**Per-Slice Review point (conditional).** Check every slice against the triggers before declaring it DONE. Triggers: `touchCount >= filesChangedThreshold`, any `touchPaths` match a `touchTriggers` glob, or the plan row declares `highRisk: true`. On a trigger, run two passes on the slice alone — (1) Spec-Compliance: trace RED/GREEN/REFACTOR evidence back to its plan task + spec criterion, noting edge cases the tests skip; (2) Quality: diff-scan for naming, error handling, dead code, simpler alternatives. Record both under `## Per-Slice Review` in `06-tdd.md`, naming the trigger that fired. Dispatch the `reviewer` subagent natively when available (log `fulfillmentMode: \"isolated\"`); otherwise fulfil via in-session role switch (`fulfillmentMode: \"role-switch\"`). Never fabricate an isolated pass from memory.",
75
+ "Investigation discipline: follow the shared `## Investigation Discipline` block — `Watched-RED Proof` and `RED Evidence` rows must cite test paths and command logs, not pasted source bodies; delegate `test-author` with paths and refs only.",
76
+ "Behavior anchor: see the shared `## Behavior anchor` block in this skill — the bad/good pair anchors how `RED Evidence` must contain a falsifiable assertion (no tautologies)."
75
77
  ],
76
78
  process: [
77
79
  "Select one vertical slice and map it to acceptance criterion(s).",
@@ -1,3 +1,12 @@
1
+ /**
2
+ * Shared investigation discipline block (Round 5 / v6.6.0). Rendered once per
3
+ * elicitation/spec stage skill (brainstorm, scope, design, spec, plan, tdd,
4
+ * review). The block enforces a four-step ladder before drafting and a
5
+ * path-passing rule for delegations so token cost and "jumped into code"
6
+ * regressions stay bounded. Stop-trigger count and ladder-step count are
7
+ * verified by `tests/unit/investigation-discipline-block.test.ts`.
8
+ */
9
+ export declare const INVESTIGATION_DISCIPLINE_BLOCK = "## Investigation Discipline\n\nUse this ladder before drafting or delegating; do not jump straight to the editor.\n\n1. **Search** \u2014 locate the surface (file path, symbol, ref) before reading. Use `rg` / glob / graph; record the query, not the chunk.\n2. **Graph / impact** \u2014 name what the change touches (callers, callees, tests, configs) and its blast radius before opening a file.\n3. **Narrow read** \u2014 read at most 1-3 files, only the sections needed; cite paths with `:line` ranges instead of pasting bodies.\n4. **Draft** \u2014 only after the trace exists; the trace is the authority, not chat history or memory.\n\n**Path-passing in delegations.** When delegating, pass repo-relative paths and refs (e.g. `src/foo/bar.ts:42`, `D-12`, `AC-3`) \u2014 never the file body. The subagent re-reads from path; pasting content fragments breaks freshness and inflates tokens.\n\n**Stop triggers** (any one means halt and re-enter the ladder):\n\n- You are about to read more than 3 files in one pass.\n- You are about to load file content into a delegation prompt instead of paths or refs.\n- You are about to start a draft before any trace (search log, graph note, narrow-read citation) exists.\n";
1
10
  export declare const ARTIFACT_TEMPLATES: Record<string, string>;
2
11
  export declare const RULEBOOK_MARKDOWN = "# Cclaw Rulebook\n\n## MUST_ALWAYS\n- Follow flow order: brainstorm -> scope -> design -> spec -> plan -> tdd -> review -> ship\n- Require explicit user confirmation after plan before TDD\n- Keep evidence artifacts in `.cclaw/artifacts/`\n- Enforce RED before GREEN in TDD\n- Run two-layer review (spec_compliance and code_quality) before ship\n- Validate all inputs before processing \u2014 never trust external data without sanitization\n- Prefer immutable data patterns and pure functions where the language supports them\n- Follow existing repo conventions, patterns, and directory structure \u2014 match the codebase\n- Verify claims with fresh evidence: \"tests pass\" requires running tests in this message\n- Use conventional commits: `type(scope): description` (feat, fix, refactor, test, docs, chore)\n\n## MUST_NEVER\n- Skip RED phase and jump directly to GREEN in TDD\n- Ship with critical review findings\n- Start implementation during /brainstorm\n- Modify generated cclaw files manually when CLI can regenerate them\n- Commit `.cclaw/` or generated shim files\n- Expose secrets, tokens, API keys, or absolute system paths in agent output\n- Duplicate existing functionality without explicit justification \u2014 search before building\n- Bypass security checks, linting hooks, or type checking to \"move faster\"\n- Claim success (\"Done,\" \"All good,\" \"Tests pass\") without running verification in this message\n- Make changes outside the blast radius of the current task without user consent\n\n## DELEGATION\nWhen a task requires specialist knowledge (security audit, performance profiling, database review),\ndelegate to a specialized agent or skill if the harness supports it. The primary agent should:\n1. Identify the specialist domain\n2. Provide focused context (relevant files, the specific concern)\n3. Evaluate the specialist output before acting on it \u2014 do not blindly apply recommendations\n";
3
12
  /**
@@ -7,5 +16,5 @@ export declare const RULEBOOK_MARKDOWN = "# Cclaw Rulebook\n\n## MUST_ALWAYS\n-
7
16
  * (premature draft, premature subagent dispatch, command-line echo to chat).
8
17
  */
9
18
  export declare const CURSOR_GUIDELINES_RULE_MDC = "---\ndescription: cclaw zero-install behavior baseline (always-on)\nglobs:\n - \"**/*\"\nalwaysApply: true\n---\n\n<!-- cclaw-managed-cursor-guidelines-rule -->\n\n# Cclaw Baseline Guidelines\n\nThese three rules apply to every Cursor agent session in this project,\nregardless of whether stage skills loaded.\n\n## 1. Q&A floor before drafting (brainstorm/scope/design)\n\nBefore drafting any `.cclaw/artifacts/01-brainstorm-*.md`,\n`02-scope-*.md`, or `03-design-*.md`, verify that the artifact's\n`## Q&A Log` table demonstrates Ralph-Loop convergence: every\nforcing-question topic id is tagged `[topic:<id>]` on at least one row\n(see the stage's forcing-questions checklist for the id list), the last\n2 turns produce no new decision-changing impact, OR an explicit user\nstop-signal row is recorded. Walk the stage forcing questions one at a\ntime via the `AskQuestion` tool. If you find yourself proposing a\ndraft after 1-2 questions while forcing topic ids remain untagged, STOP\nand continue the loop.\n\nThe `qa_log_unconverged` linter rule will block `stage-complete` when\nconvergence has not been reached. Wave 24 (v6.0.0) made `[topic:<id>]`\ntagging mandatory; the English keyword fallback was removed because it\nmis-reported convergence on RU/UA Q&A logs.\n\n## 2. Mandatory subagents run after Q&A approval\n\nFor brainstorm / scope / design, mandatory subagents (\n`product-discovery`, `critic`, `planner`, `architect`,\n`test-author`) run **only AFTER the user approves the elicitation\noutcome**, never before the Q&A loop converges. Dispatching them early\npreempts the user dialogue and violates the elicitation contract \u2014 the\nlinter will block stage-complete.\n\nSee each stage's \"Run Phase: post-elicitation\" rows in the materialized\nAutomatic Subagent Dispatch table.\n\n## 3. Never echo cclaw command lines to chat\n\nThe user does not run cclaw helpers (`node .cclaw/hooks/...`) manually.\nNEVER paste full command lines, `--evidence-json '{...}'` payloads,\n`--waive-delegation=...`, or shell hash commands (`shasum`,\n`sha256sum`, `Get-FileHash`, `certutil`, etc.) into chat. Run the\nhelper via the tool layer and report only the resulting summary. On\nfailure, report a compact human-readable summary plus the helper JSON in\na single fenced `json` block.\n";
10
- export declare const CURSOR_WORKFLOW_RULE_MDC = "---\ndescription: cclaw workflow guardrails for Cursor agent sessions\nglobs:\n - \"**/*\"\nalwaysApply: true\n---\n\n<!-- cclaw-managed-cursor-workflow-rule -->\n\n# Cclaw Workflow Guardrails\n\n## Activation Rule\n\nBefore responding to coding work:\n1. Read `.cclaw/state/flow-state.json`.\n2. Start with `/cc` or continue with `/cc`.\n3. If no software-stage flow applies, respond normally.\n\n## Stage Order\n\n`brainstorm -> scope -> design -> spec -> plan -> tdd -> review -> ship`\n\nTrack-specific skips are allowed only when `flow-state.track` + `skippedStages` explicitly say so.\n\n## Task Classification\n\n| Class | Route |\n|---|---|\n| non-trivial software work | `/cc <idea>` |\n| trivial software fix | `/cc <idea>` (quick track) |\n| bugfix with repro | `/cc <idea>` and enforce RED-first in tdd |\n| pure question / non-software | direct answer (no stage flow) |\n\n## Command Surface\n\n- `/cc` = entry and resume.\n- `/cc` = only progression path.\n- Knowledge capture and recall use the `learnings` skill when requested.\n\n## Verification Discipline\n\n- No completion claim without fresh command evidence in this turn.\n- Stage completion claim requires `stage-complete` exit 0 in the current turn. Quote the success line; do not paraphrase, do not infer success from skipped retries.\n\n## Protocol label hygiene\n\n`skip` wording means different things depending on phase: brainstorm/scope/design Q&A stop-signals may still literal **skip**/enough/move-on wording; structured ship closeout retros and compound clustering prompts should expose **no changes** (or accept-as-is language) rather than labeling the passive path as skip. Keep the verbs aligned with the harness question copy you present to the human.\n- Do not mark gates passed from memory.\n- Keep evidence in `.cclaw/artifacts/`; archive through closeout via `/cc` or cancel early via `node .cclaw/hooks/cancel-run.mjs`.\n\n## Delegation And Approvals\n\n- Machine-only checks in design/plan/tdd/review/ship should auto-dispatch when tooling supports it.\n- **For brainstorm / scope / design stages**: ask user input continuously via adaptive elicitation (one question per turn through the harness-native question tool \u2014 `AskQuestion` in Cursor). Walk the stage forcing-questions list one-by-one. **Tag each Q&A Log row's `Decision impact` cell with `[topic:<id>]`** (the id is given in the stage's forcing-questions checklist) so the linter can verify coverage in any natural language. Do NOT batch and do NOT defer to a single approval gate at the end. The `qa_log_unconverged` linter rule will block `stage-complete` when convergence is not reached (forcing topic ids untagged AND last 2 turns still produce decision-changing rows AND no stop-signal).\n- **For other stages** (spec/plan/tdd/build/review/ship): ask user input only at explicit approval gates (scope mode, plan approval, challenge resolution, ship finalization), not for routine progress updates.\n- If you find yourself proposing a draft after 1-2 questions in brainstorm/scope/design, STOP \u2014 go back to the forcing-questions list and continue.\n- Mandatory subagents in brainstorm/scope/design run only AFTER the user approves the elicitation outcome (see each stage's \"Run Phase: post-elicitation\" rows). Dispatching them before the Q&A loop converges violates the contract.\n- Never echo cclaw command lines (`node .cclaw/hooks/...`, `--evidence-json '{...}'`) to chat \u2014 the user does not run cclaw manually. Run helpers via the tool layer; report only the resulting summary.\n- If harness capabilities are partial, record waiver reasons in delegation logs.\n\n## Routing Source Of Truth\n\n- Primary router: `.cclaw/skills/using-cclaw/SKILL.md`.\n- Stage behavior: current stage skill plus `.cclaw/state/flow-state.json`.\n- Preamble budget: keep role/status announcements brief and avoid repeating\n them unless the stage or role changes.\n";
19
+ export declare const CURSOR_WORKFLOW_RULE_MDC = "---\ndescription: cclaw workflow guardrails for Cursor agent sessions\nglobs:\n - \"**/*\"\nalwaysApply: true\n---\n\n<!-- cclaw-managed-cursor-workflow-rule -->\n\n# Cclaw Workflow Guardrails\n\n## Activation Rule\n\nBefore responding to coding work:\n1. Read `.cclaw/state/flow-state.json`.\n2. Start with `/cc` or continue with `/cc`.\n3. If no software-stage flow applies, respond normally.\n\n## Stage Order\n\n`brainstorm -> scope -> design -> spec -> plan -> tdd -> review -> ship`\n\nTrack-specific skips are allowed only when `flow-state.track` + `skippedStages` explicitly say so.\n\n## Task Classification\n\n| Class | Route |\n|---|---|\n| non-trivial software work | `/cc <idea>` |\n| trivial software fix | `/cc <idea>` (quick track) |\n| bugfix with repro | `/cc <idea>` and enforce RED-first in tdd |\n| pure question / non-software | direct answer (no stage flow) |\n\n## Command Surface\n\n- `/cc` = entry and resume.\n- `/cc` = only progression path.\n- Knowledge capture and recall use the `learnings` skill when requested.\n\n## Verification Discipline\n\n- No completion claim without fresh command evidence in this turn.\n- Stage completion claim requires `stage-complete` exit 0 in the current turn. Quote the single-line success JSON printed to stdout (e.g. `{\"ok\":true,\"command\":\"stage-complete\",...}`); do not paraphrase, do not infer success from empty stdout or from skipped retries.\n\n## Protocol label hygiene\n\n`skip` wording means different things depending on phase: brainstorm/scope/design Q&A stop-signals may still literal **skip**/enough/move-on wording; structured ship closeout retros and compound clustering prompts should expose **no changes** (or accept-as-is language) rather than labeling the passive path as skip. Keep the verbs aligned with the harness question copy you present to the human.\n- Do not mark gates passed from memory.\n- Keep evidence in `.cclaw/artifacts/`; archive through closeout via `/cc` or cancel early via `node .cclaw/hooks/cancel-run.mjs`.\n\n## Delegation And Approvals\n\n- Machine-only checks in design/plan/tdd/review/ship should auto-dispatch when tooling supports it.\n- **For brainstorm / scope / design stages**: ask user input continuously via adaptive elicitation (one question per turn through the harness-native question tool \u2014 `AskQuestion` in Cursor). Walk the stage forcing-questions list one-by-one. **Tag each Q&A Log row's `Decision impact` cell with `[topic:<id>]`** (the id is given in the stage's forcing-questions checklist) so the linter can verify coverage in any natural language. Do NOT batch and do NOT defer to a single approval gate at the end. The `qa_log_unconverged` linter rule will block `stage-complete` when convergence is not reached (forcing topic ids untagged AND last 2 turns still produce decision-changing rows AND no stop-signal).\n- **For other stages** (spec/plan/tdd/build/review/ship): ask user input only at explicit approval gates (scope mode, plan approval, challenge resolution, ship finalization), not for routine progress updates.\n- If you find yourself proposing a draft after 1-2 questions in brainstorm/scope/design, STOP \u2014 go back to the forcing-questions list and continue.\n- Mandatory subagents in brainstorm/scope/design run only AFTER the user approves the elicitation outcome (see each stage's \"Run Phase: post-elicitation\" rows). Dispatching them before the Q&A loop converges violates the contract.\n- Never echo cclaw command lines (`node .cclaw/hooks/...`, `--evidence-json '{...}'`) to chat \u2014 the user does not run cclaw manually. Run helpers via the tool layer; report only the resulting summary.\n- If harness capabilities are partial, record waiver reasons in delegation logs.\n\n## Routing Source Of Truth\n\n- Primary router: `.cclaw/skills/using-cclaw/SKILL.md`.\n- Stage behavior: current stage skill plus `.cclaw/state/flow-state.json`.\n- Preamble budget: keep role/status announcements brief and avoid repeating\n them unless the stage or role changes.\n";
11
20
  export declare function buildRulesJson(): Record<string, unknown>;
@@ -1,4 +1,5 @@
1
1
  import { CCLAW_VERSION, SHIP_FINALIZATION_MODES } from "../constants.js";
2
+ import { renderBehaviorAnchorTemplateLine } from "./examples.js";
2
3
  import { orderedStageSchemas } from "./stage-schema.js";
3
4
  import { FLOW_STAGES } from "../types.js";
4
5
  const SHIP_FINALIZATION_ENUM_LINES = SHIP_FINALIZATION_MODES.map((mode) => ` - ${mode}`).join("\n");
@@ -17,11 +18,38 @@ const SEED_SHELF_SECTION = `## Seed Shelf Candidates (optional)
17
18
  | Seed file | Trigger when | Suggested action | Status (planted/deferred/ignored) |
18
19
  |---|---|---|---|
19
20
  | .cclaw/seeds/SEED-YYYY-MM-DD-<slug>.md | | | |`;
21
+ /**
22
+ * Shared investigation discipline block (Round 5 / v6.6.0). Rendered once per
23
+ * elicitation/spec stage skill (brainstorm, scope, design, spec, plan, tdd,
24
+ * review). The block enforces a four-step ladder before drafting and a
25
+ * path-passing rule for delegations so token cost and "jumped into code"
26
+ * regressions stay bounded. Stop-trigger count and ladder-step count are
27
+ * verified by `tests/unit/investigation-discipline-block.test.ts`.
28
+ */
29
+ export const INVESTIGATION_DISCIPLINE_BLOCK = `## Investigation Discipline
30
+
31
+ Use this ladder before drafting or delegating; do not jump straight to the editor.
32
+
33
+ 1. **Search** — locate the surface (file path, symbol, ref) before reading. Use \`rg\` / glob / graph; record the query, not the chunk.
34
+ 2. **Graph / impact** — name what the change touches (callers, callees, tests, configs) and its blast radius before opening a file.
35
+ 3. **Narrow read** — read at most 1-3 files, only the sections needed; cite paths with \`:line\` ranges instead of pasting bodies.
36
+ 4. **Draft** — only after the trace exists; the trace is the authority, not chat history or memory.
37
+
38
+ **Path-passing in delegations.** When delegating, pass repo-relative paths and refs (e.g. \`src/foo/bar.ts:42\`, \`D-12\`, \`AC-3\`) — never the file body. The subagent re-reads from path; pasting content fragments breaks freshness and inflates tokens.
39
+
40
+ **Stop triggers** (any one means halt and re-enter the ladder):
41
+
42
+ - You are about to read more than 3 files in one pass.
43
+ - You are about to load file content into a delegation prompt instead of paths or refs.
44
+ - You are about to start a draft before any trace (search log, graph note, narrow-read citation) exists.
45
+ `;
20
46
  export const ARTIFACT_TEMPLATES = {
21
47
  "01-brainstorm.md": `${artifactFrontmatter("brainstorm")}
22
48
 
23
49
  # Brainstorm Artifact
24
50
 
51
+ ${renderBehaviorAnchorTemplateLine("brainstorm")}
52
+
25
53
  ## Mode Block
26
54
  - **Mode:** STARTUP | BUILDER | ENGINEERING | OPS | RESEARCH (pick exactly one)
27
55
  - **Why this mode:** (one line; cite a concrete signal — repo state, user prompt, ownership, risk window)
@@ -200,6 +228,8 @@ ${MARKDOWN_CODE_FENCE}
200
228
 
201
229
  # Scope Artifact
202
230
 
231
+ ${renderBehaviorAnchorTemplateLine("scope")}
232
+
203
233
  ## Upstream Handoff
204
234
  - Source artifacts: \`00-idea.md\`, \`01-brainstorm-<slug>.md\`
205
235
  - Decisions carried forward:
@@ -434,6 +464,8 @@ ${MARKDOWN_CODE_FENCE}
434
464
 
435
465
  # Design Artifact
436
466
 
467
+ ${renderBehaviorAnchorTemplateLine("design")}
468
+
437
469
  ## Compact-First Scaffold
438
470
  - Default to the compact design spine unless risk requires Standard/Deep add-ons.
439
471
  - Compact required spine: Upstream Handoff, Codebase Investigation, Engineering Lock, Architecture Boundaries, Architecture Diagram, Data Flow, Failure Mode Table, Test Strategy, Spec Handoff, and Completion Dashboard.
@@ -698,6 +730,8 @@ ${MARKDOWN_CODE_FENCE}
698
730
 
699
731
  # Specification Artifact
700
732
 
733
+ ${renderBehaviorAnchorTemplateLine("spec")}
734
+
701
735
  ## Upstream Handoff
702
736
  - Source artifacts: standard uses \`02-scope-<slug>.md\` + \`03-design-<slug>.md\`; medium uses \`01-brainstorm-<slug>.md\` when present; quick uses \`00-idea.md\` plus reproduction context.
703
737
  - Decisions carried forward:
@@ -797,6 +831,8 @@ ${MARKDOWN_CODE_FENCE}
797
831
 
798
832
  # Plan Artifact
799
833
 
834
+ ${renderBehaviorAnchorTemplateLine("plan")}
835
+
800
836
  ## Plan Header
801
837
  - **Goal:** (one sentence — what this plan delivers)
802
838
  - **Architecture:** (2-3 sentences — approach + key boundaries)
@@ -930,6 +966,8 @@ Execution rule: complete and verify each batch before starting the next batch.
930
966
 
931
967
  # TDD Artifact
932
968
 
969
+ ${renderBehaviorAnchorTemplateLine("tdd")}
970
+
933
971
  ## Upstream Handoff
934
972
  - Source artifacts: \`04-spec.md\` plus the active track's upstream source item (plan slice on standard/medium, spec acceptance item or bug reproduction slice on quick).
935
973
  - Decisions carried forward:
@@ -1126,6 +1164,8 @@ Execution rule: complete and verify each batch before starting the next batch.
1126
1164
 
1127
1165
  # Review Artifact
1128
1166
 
1167
+ ${renderBehaviorAnchorTemplateLine("review")}
1168
+
1129
1169
  ## Upstream Handoff
1130
1170
  - Source artifacts: \`04-spec.md\`, \`06-tdd.md\`, plus the active track's upstream source item when available.
1131
1171
  - Decisions carried forward:
@@ -1298,6 +1338,8 @@ Execution rule: complete and verify each batch before starting the next batch.
1298
1338
 
1299
1339
  # Ship Artifact
1300
1340
 
1341
+ ${renderBehaviorAnchorTemplateLine("ship")}
1342
+
1301
1343
  ## Upstream Handoff
1302
1344
  - Source artifacts: \`06-tdd.md\`, \`07-review.md\`
1303
1345
  - Decisions carried forward:
@@ -1565,7 +1607,7 @@ Track-specific skips are allowed only when \`flow-state.track\` + \`skippedStage
1565
1607
  ## Verification Discipline
1566
1608
 
1567
1609
  - No completion claim without fresh command evidence in this turn.
1568
- - Stage completion claim requires \`stage-complete\` exit 0 in the current turn. Quote the success line; do not paraphrase, do not infer success from skipped retries.
1610
+ - Stage completion claim requires \`stage-complete\` exit 0 in the current turn. Quote the single-line success JSON printed to stdout (e.g. \`{"ok":true,"command":"stage-complete",...}\`); do not paraphrase, do not infer success from empty stdout or from skipped retries.
1569
1611
 
1570
1612
  ## Protocol label hygiene
1571
1613
 
@@ -601,7 +601,17 @@ export async function runAdvanceStage(projectRoot, args, io) {
601
601
  interactionHints
602
602
  };
603
603
  await writeFlowState(projectRoot, finalState);
604
- if (!args.quiet) {
604
+ if (args.quiet) {
605
+ io.stdout.write(`${JSON.stringify({
606
+ ok: true,
607
+ command: "stage-complete",
608
+ stage: args.stage,
609
+ completedStages: finalState.completedStages,
610
+ currentStage: finalState.currentStage,
611
+ runId: finalState.activeRunId
612
+ })}\n`);
613
+ }
614
+ else {
605
615
  io.stdout.write(`${JSON.stringify({
606
616
  ok: true,
607
617
  command: "advance-stage",
@@ -211,19 +211,31 @@ export async function runStartFlow(projectRoot, args, io) {
211
211
  nextState = { ...nextState, repoSignals };
212
212
  await writeFlowState(projectRoot, nextState, { allowReset: true });
213
213
  await appendIdeaArtifact(projectRoot, args, current);
214
- if (!args.quiet) {
214
+ const successPayload = {
215
+ ok: true,
216
+ command: "start-flow",
217
+ reclassify: args.reclassify,
218
+ track: nextState.track,
219
+ discoveryMode: nextState.discoveryMode,
220
+ taskClass: nextState.taskClass ?? null,
221
+ currentStage: nextState.currentStage,
222
+ skippedStages: nextState.skippedStages,
223
+ activeRunId: nextState.activeRunId,
224
+ repoSignals
225
+ };
226
+ if (args.quiet) {
215
227
  io.stdout.write(`${JSON.stringify({
216
228
  ok: true,
217
229
  command: "start-flow",
218
- reclassify: args.reclassify,
219
- track: nextState.track,
220
- discoveryMode: nextState.discoveryMode,
221
- taskClass: nextState.taskClass ?? null,
222
- currentStage: nextState.currentStage,
223
- skippedStages: nextState.skippedStages,
224
- activeRunId: nextState.activeRunId,
225
- repoSignals
226
- }, null, 2)}\n`);
230
+ track: successPayload.track,
231
+ discoveryMode: successPayload.discoveryMode,
232
+ currentStage: successPayload.currentStage,
233
+ activeRunId: successPayload.activeRunId,
234
+ repoSignals: successPayload.repoSignals
235
+ })}\n`);
236
+ }
237
+ else {
238
+ io.stdout.write(`${JSON.stringify(successPayload, null, 2)}\n`);
227
239
  }
228
240
  return 0;
229
241
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cclaw-cli",
3
- "version": "6.4.0",
3
+ "version": "6.6.0",
4
4
  "description": "Installer-first flow toolkit for coding agents",
5
5
  "type": "module",
6
6
  "bin": {