cclaw-cli 6.13.1 → 6.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -639,4 +639,19 @@ export interface StageLintContext {
639
639
  * v6.13.0 — effective worktree execution mode for TDD linters.
640
640
  */
641
641
  worktreeExecutionMode: "single-tree" | "worktree-first";
642
+ /**
643
+ * v6.14.0 — effective TDD checkpoint mode. `per-slice` enforces
644
+ * RED-before-GREEN per slice (the default for new projects);
645
+ * `global-red` keeps the v6.12/v6.13 wave-batch barrier (auto-applied
646
+ * for `legacyContinuation: true` projects on `cclaw-cli sync`).
647
+ */
648
+ tddCheckpointMode: "per-slice" | "global-red";
649
+ /**
650
+ * v6.14.0 — effective integration-overseer dispatch mode.
651
+ * `conditional` runs the overseer only when
652
+ * `integrationCheckRequired()` returns `required: true`; `always`
653
+ * preserves the v6.13 behavior of running it on every multi-slice
654
+ * wave.
655
+ */
656
+ integrationOverseerMode: "conditional" | "always";
642
657
  }
@@ -65,20 +65,50 @@ export declare function evaluateWavePlanDispatchIgnored(params: {
65
65
  legacyContinuation: boolean;
66
66
  }): Promise<LintFinding | null>;
67
67
  /**
68
- * v6.12.0 Phase W — RED checkpoint enforcement. The wave protocol
69
- * requires ALL Phase A REDs to land before ANY Phase B GREEN starts.
70
- * The rule is enforced on a per-wave basis, where a wave is defined by
71
- * the managed `## Parallel Execution Plan` block in `05-plan.md` and/or
72
- * `<artifacts-dir>/wave-plans/wave-NN.md` files. When no wave manifest
73
- * exists, the linter falls back to a conservative implicit detection: a
74
- * wave is a contiguous run of `phase=red` events with no other-phase
75
- * events between them; the rule fires only when the implicit wave has
76
- * 2+ members.
68
+ * v6.12.0 Phase W (legacy `global-red` mode) — RED checkpoint enforcement.
69
+ * The wave protocol requires ALL Phase A REDs to land before ANY Phase B
70
+ * GREEN starts. The rule is enforced on a per-wave basis, where a wave is
71
+ * defined by the managed `## Parallel Execution Plan` block in
72
+ * `05-plan.md` and/or `<artifacts-dir>/wave-plans/wave-NN.md` files. When
73
+ * no wave manifest exists, the linter falls back to a conservative
74
+ * implicit detection: a wave is a contiguous run of `phase=red` events
75
+ * with no other-phase events between them; the rule fires only when the
76
+ * implicit wave has 2+ members.
77
+ *
78
+ * v6.14.0: this function powers the `global-red` checkpoint mode. New
79
+ * projects default to `per-slice` mode (see
80
+ * `evaluatePerSliceRedBeforeGreen`); `legacyContinuation: true` projects
81
+ * auto-keep this behavior. Exported under both `evaluateGlobalRedCheckpoint`
82
+ * (canonical name) and `evaluateRedCheckpoint` (back-compat alias for
83
+ * existing tests/consumers).
77
84
  *
78
85
  * @param waveMembers Optional explicit wave manifest. Map key is wave
79
86
  * name (e.g. `"W-01"`); value is the set of slice ids in that wave.
80
87
  */
81
- export declare function evaluateRedCheckpoint(slices: Map<string, DelegationEntry[]>, waveMembers?: Map<string, Set<string>> | null): RedCheckpointResult;
88
+ export declare function evaluateGlobalRedCheckpoint(slices: Map<string, DelegationEntry[]>, waveMembers?: Map<string, Set<string>> | null): RedCheckpointResult;
89
+ /**
90
+ * Back-compat alias for `evaluateGlobalRedCheckpoint` (v6.12.0 Phase W
91
+ * behavior). Existing tests/consumers can keep importing
92
+ * `evaluateRedCheckpoint`. The v6.14.0 stream-style mode uses
93
+ * `evaluatePerSliceRedBeforeGreen` instead.
94
+ */
95
+ export declare const evaluateRedCheckpoint: typeof evaluateGlobalRedCheckpoint;
96
+ /**
97
+ * v6.14.0 — per-slice RED-before-GREEN enforcement (default mode).
98
+ *
99
+ * For each slice with both phase=red and phase=green completed events,
100
+ * fail if any green completedTs precedes the slice's last red completedTs.
101
+ * No global wave barrier — different slices may freely interleave their
102
+ * RED/GREEN/REFACTOR phases.
103
+ *
104
+ * Note: this is intentionally weaker than `evaluateGlobalRedCheckpoint`
105
+ * because the W-02 measurement on hox showed ~6 minutes of barrier
106
+ * overhead when slices were already disjoint (file-overlap scheduler did
107
+ * the parallelism job). The per-slice rule retains the only invariant
108
+ * that mattered for correctness: no slice goes GREEN before its own
109
+ * RED is observed failing.
110
+ */
111
+ export declare function evaluatePerSliceRedBeforeGreen(slices: Map<string, DelegationEntry[]>): RedCheckpointResult;
82
112
  export declare function parseVerticalSliceCycle(body: string): ParsedSliceCycleResult;
83
113
  interface VerificationLadderResult {
84
114
  ok: boolean;
@@ -1,6 +1,6 @@
1
1
  import fs from "node:fs/promises";
2
2
  import path from "node:path";
3
- import { loadTddReadySlicePool, readDelegationLedger, readDelegationEvents, selectReadySlices } from "../delegation.js";
3
+ import { integrationCheckRequired, loadTddReadySlicePool, readDelegationLedger, readDelegationEvents, selectReadySlices } from "../delegation.js";
4
4
  import { mergeParallelWaveDefinitions, parseParallelExecutionPlanWaves, parseWavePlanDirectory } from "../internal/plan-split-waves.js";
5
5
  import { evaluateInvestigationTrace, sectionBodyByName } from "./shared.js";
6
6
  const SLICE_SUMMARY_START = "<!-- auto-start: tdd-slice-summary -->";
@@ -27,7 +27,7 @@ const SLICES_INDEX_END = "<!-- auto-end: slices-index -->";
27
27
  * via `## Slices Index`.
28
28
  */
29
29
  export async function lintTddStage(ctx) {
30
- const { projectRoot, discoveryMode, raw, absFile, sections, findings, parsedFrontmatter, worktreeExecutionMode, legacyContinuation } = ctx;
30
+ const { projectRoot, discoveryMode, raw, absFile, sections, findings, parsedFrontmatter, worktreeExecutionMode, legacyContinuation, tddCheckpointMode, integrationOverseerMode } = ctx;
31
31
  void parsedFrontmatter;
32
32
  const artifactsDir = path.dirname(absFile);
33
33
  const planPath = path.join(artifactsDir, "05-plan.md");
@@ -144,21 +144,27 @@ export async function lintTddStage(ctx) {
144
144
  });
145
145
  }
146
146
  }
147
- // v6.12.0 Phase R — slice-documenter coverage is mandatory on every
148
- // TDD run regardless of discoveryMode. `discoveryMode` is now strictly
149
- // an early-stage knob (brainstorm/scope/design); TDD parallelism must
150
- // be uniform across lean/guided/deep so the controller cannot quietly
151
- // skip per-slice prose by picking a non-deep mode.
152
- void discoveryMode;
147
+ // v6.14.0 Phase 4 — slice-documenter coverage is mandatory only on
148
+ // `discoveryMode === "deep"` runs. lean/guided still emit the finding
149
+ // but as advisory (`required: false`) so the controller can choose to
150
+ // run a tighter inline-doc pass instead. The DOC role still exists;
151
+ // the linter just stops blocking the gate on lean/guided. Reference
152
+ // research report Section 4: "soften slice-documenter mandate".
153
153
  if (eventsActive) {
154
154
  const docResult = evaluateSliceDocumenterCoverage(slicesByEvents);
155
155
  if (docResult.missing.length > 0) {
156
+ const required = discoveryMode === "deep";
156
157
  findings.push({
157
158
  section: "tdd_slice_documenter_missing",
158
- required: true,
159
- rule: "Every TDD slice with a phase=green event must also carry a slice-documenter `phase=doc` event whose evidenceRefs reference `<artifacts-dir>/tdd-slices/S-<id>.md`. The requirement is independent of discoveryMode (v6.12.0 Phase R).",
159
+ required,
160
+ rule: required
161
+ ? "deep mode: every TDD slice with a phase=green event must also carry a slice-documenter `phase=doc` event whose evidenceRefs reference `<artifacts-dir>/tdd-slices/S-<id>.md`."
162
+ : "lean/guided modes (v6.14.0): the slice-documenter `phase=doc` event is advisory; controllers may use slice-implementer --finalize-doc inline instead. Required only for deep mode.",
160
163
  found: false,
161
- details: `Slices missing slice-documenter coverage: ${docResult.missing.join(", ")}. Dispatch slice-documenter --slice <id> --phase doc in parallel with slice-implementer --phase green for each slice.`
164
+ details: `Slices missing slice-documenter coverage: ${docResult.missing.join(", ")}. ` +
165
+ (required
166
+ ? "Dispatch slice-documenter --slice <id> --phase doc in parallel with slice-implementer --phase green for each slice."
167
+ : "Either dispatch slice-documenter --phase doc or call slice-implementer --finalize-doc inline at GREEN-completion.")
162
168
  });
163
169
  }
164
170
  }
@@ -179,23 +185,42 @@ export async function lintTddStage(ctx) {
179
185
  });
180
186
  }
181
187
  }
182
- // v6.12.0 Phase W — RED checkpoint enforcement. The wave protocol
183
- // requires ALL Phase A REDs to land before ANY Phase B GREEN starts.
184
- // Enforced per-wave: explicit `wave-plans/wave-NN.md` manifest if
185
- // present, otherwise implicit detection via contiguous red blocks
186
- // (size >= 2). Sequential per-slice runs (redgreen→refactor in a
187
- // tight loop) form size-1 implicit waves and are unaffected.
188
+ // v6.14.0 Phase 1 — RED checkpoint enforcement. The mode is selected
189
+ // by `flow-state.json::tddCheckpointMode`:
190
+ //
191
+ // - `per-slice` (default for new projects): enforce RED-before-GREEN
192
+ // per slice only. No global wave barrier; lanes run REDGREEN as
193
+ // soon as their dependsOn closes. Rule id:
194
+ // `tdd_slice_red_completed_before_green`.
195
+ // - `global-red` (auto-applied for legacyContinuation): enforce the
196
+ // v6.12 wave-batch barrier — every slice in a wave must complete
197
+ // phase=red before any slice in the same wave starts phase=green.
198
+ // Rule id: `tdd_red_checkpoint_violation` (legacy).
188
199
  if (eventsActive) {
189
- const waveManifest = await readMergedWaveManifestForCheckpoint(artifactsDir, planRaw);
190
- const checkpointResult = evaluateRedCheckpoint(slicesByEvents, waveManifest);
191
- if (!checkpointResult.ok) {
192
- findings.push({
193
- section: "tdd_red_checkpoint_violation",
194
- required: true,
195
- rule: "Wave Batch Mode (v6.12.0 Phase W): every slice in a wave must complete phase=red before any slice in the same wave starts phase=green. Detected: a phase=green completedTs precedes the last phase=red completedTs of the same wave.",
196
- found: false,
197
- details: checkpointResult.details
198
- });
200
+ if (tddCheckpointMode === "global-red") {
201
+ const waveManifest = await readMergedWaveManifestForCheckpoint(artifactsDir, planRaw);
202
+ const checkpointResult = evaluateGlobalRedCheckpoint(slicesByEvents, waveManifest);
203
+ if (!checkpointResult.ok) {
204
+ findings.push({
205
+ section: "tdd_red_checkpoint_violation",
206
+ required: true,
207
+ rule: "Wave Batch Mode (legacy global-red mode, v6.12.0 Phase W): every slice in a wave must complete phase=red before any slice in the same wave starts phase=green. Detected: a phase=green completedTs precedes the last phase=red completedTs of the same wave.",
208
+ found: false,
209
+ details: checkpointResult.details
210
+ });
211
+ }
212
+ }
213
+ else {
214
+ const perSliceResult = evaluatePerSliceRedBeforeGreen(slicesByEvents);
215
+ if (!perSliceResult.ok) {
216
+ findings.push({
217
+ section: "tdd_slice_red_completed_before_green",
218
+ required: true,
219
+ rule: "Stream-style TDD (v6.14.0): each slice's phase=green completedTs must be >= the same slice's last phase=red completedTs. No global wave barrier — lanes run independently.",
220
+ found: false,
221
+ details: perSliceResult.details
222
+ });
223
+ }
199
224
  }
200
225
  }
201
226
  // v6.12.0 Phase L — advisory backslide detection. When a cutover is
@@ -418,15 +443,74 @@ export async function lintTddStage(ctx) {
418
443
  const overseerStatusInArtifact = /\bintegration-overseer\b[\s\S]{0,200}\b(?:PASS_WITH_GAPS|PASS)\b/iu.test(raw);
419
444
  const integrationOverseerFound = completedOverseerRows.length > 0 &&
420
445
  (overseerStatusInEvidence || overseerStatusInArtifact);
446
+ // v6.14.0 Phase 3 — conditional integration-overseer dispatch. When
447
+ // `integrationOverseerMode === "conditional"` and
448
+ // `integrationCheckRequired()` returns required=false, the gate is
449
+ // soft (advisory) and an audit-only finding is emitted so the
450
+ // controller can record the deliberate skip in artifacts.
451
+ //
452
+ // v6.14.1 — also surface the audit row presence. When the controller
453
+ // skips `integration-overseer` dispatch (or the heuristic returns
454
+ // false), the run log MUST contain a
455
+ // `cclaw_integration_overseer_skipped` audit row for traceability.
456
+ // The advisory `tdd_integration_overseer_skipped_audit_missing`
457
+ // surfaces a missing audit row when 2+ closed slices closed without
458
+ // any overseer dispatch AND no audit was recorded.
459
+ let overseerVerdict = null;
460
+ let overseerRequired = true;
461
+ const skippedAuditRowCount = await countIntegrationOverseerSkippedAudits(projectRoot, delegationLedger.runId);
462
+ const skippedAuditRowFound = skippedAuditRowCount > 0;
463
+ if (integrationOverseerMode === "conditional") {
464
+ const eventsForVerdict = runEvents.length > 0 ? runEvents : [];
465
+ const auditsForVerdict = fanInAudits.filter((a) => a.runId === delegationLedger.runId);
466
+ overseerVerdict = integrationCheckRequired(eventsForVerdict, auditsForVerdict);
467
+ overseerRequired = overseerVerdict.required;
468
+ if (!overseerVerdict.required) {
469
+ const auditRowSuffix = skippedAuditRowFound
470
+ ? "audit row recorded — skip is fully traceable."
471
+ : "audit row MISSING — controller should append `cclaw_integration_overseer_skipped` for traceability (see `tdd_integration_overseer_skipped_audit_missing`).";
472
+ findings.push({
473
+ section: "tdd_integration_overseer_skipped_by_disjoint_paths",
474
+ required: false,
475
+ rule: "v6.14.0+ conditional integration-overseer mode: the heuristic returned `required: false` (disjoint claimedPaths, no high-risk slices, no fan-in conflicts). The controller may skip dispatching `integration-overseer` and emit a `cclaw_integration_overseer_skipped` audit row instead.",
476
+ found: true,
477
+ details: `integrationCheckRequired() reasons: ${overseerVerdict.reasons.join(", ")}. Skip is safe — ${auditRowSuffix}`
478
+ });
479
+ }
480
+ }
481
+ // v6.14.1 — `tdd_integration_overseer_skipped_audit_missing` (advisory).
482
+ // Fires when fan-out is detected (2+ completed slice-implementers),
483
+ // no `integration-overseer` was dispatched at all (no scheduled or
484
+ // completed row for the active run), AND no
485
+ // `cclaw_integration_overseer_skipped` audit row exists. This pairs
486
+ // with the controller skill text rule that the wave-closure decision
487
+ // ("dispatch overseer or skip") MUST leave a trail.
488
+ const overseerDispatched = activeRunEntries.some((entry) => entry.agent === "integration-overseer");
489
+ if (!overseerDispatched && !skippedAuditRowFound) {
490
+ findings.push({
491
+ section: "tdd_integration_overseer_skipped_audit_missing",
492
+ required: false,
493
+ rule: "v6.14.1: when a wave with 2+ closed slices closes without any integration-overseer dispatch, the controller should call `integrationCheckRequired()` and emit a `cclaw_integration_overseer_skipped` audit row so the decision is traceable. Advisory — never blocks stage-complete.",
494
+ found: false,
495
+ details: `Fan-out detected (${completedSliceImplementers.length} completed slice-implementer rows) but no integration-overseer dispatch row OR cclaw_integration_overseer_skipped audit row exists for active run. ` +
496
+ "Remediation: emit `node .cclaw/hooks/delegation-record.mjs --audit-kind=cclaw_integration_overseer_skipped --audit-reason=\"<reasons>\" --slice-ids=\"<S-1,S-2,...>\"` after wave closure."
497
+ });
498
+ }
421
499
  findings.push({
422
500
  section: "tdd.integration_overseer_missing",
423
- required: true,
424
- rule: "When fan-out is detected, require completed `integration-overseer` evidence with PASS or PASS_WITH_GAPS.",
501
+ required: overseerRequired,
502
+ rule: overseerRequired
503
+ ? "When fan-out is detected, require completed `integration-overseer` evidence with PASS or PASS_WITH_GAPS."
504
+ : "v6.14.0+ conditional integration-overseer mode: integration-overseer dispatch is advisory because `integrationCheckRequired()` returned required=false. Run it anyway if the run touches new boundaries.",
425
505
  found: integrationOverseerFound,
426
506
  details: integrationOverseerFound
427
507
  ? "integration-overseer completion recorded with PASS/PASS_WITH_GAPS evidence."
428
508
  : completedOverseerRows.length === 0
429
- ? "Fan-out detected but no completed integration-overseer delegation row exists for active run."
509
+ ? overseerRequired
510
+ ? "Fan-out detected but no completed integration-overseer delegation row exists for active run."
511
+ : skippedAuditRowFound
512
+ ? "Fan-out detected; integration-overseer not dispatched (conditional mode skipped on disjoint paths) and `cclaw_integration_overseer_skipped` audit row recorded. Audit-only."
513
+ : "Fan-out detected; integration-overseer not dispatched (conditional mode skipped on disjoint paths). Audit-only."
430
514
  : "integration-overseer completion exists, but PASS/PASS_WITH_GAPS evidence is missing in delegation evidenceRefs and artifact text."
431
515
  });
432
516
  }
@@ -500,6 +584,46 @@ export async function lintTddStage(ctx) {
500
584
  }
501
585
  }
502
586
  }
587
+ /**
588
+ * v6.14.1 — count `cclaw_integration_overseer_skipped` audit rows in
589
+ * `delegation-events.jsonl` for a given runId. The audit row is not a
590
+ * `DelegationEvent` (no agent/status), so `readDelegationEvents`
591
+ * filters it out; we re-scan the raw file with a narrow JSON match.
592
+ *
593
+ * Best-effort: missing file or parse errors return 0.
594
+ */
595
+ async function countIntegrationOverseerSkippedAudits(projectRoot, runId) {
596
+ const filePath = path.join(projectRoot, ".cclaw/state/delegation-events.jsonl");
597
+ let raw = "";
598
+ try {
599
+ raw = await fs.readFile(filePath, "utf8");
600
+ }
601
+ catch {
602
+ return 0;
603
+ }
604
+ let count = 0;
605
+ for (const line of raw.split(/\r?\n/u)) {
606
+ const trimmed = line.trim();
607
+ if (trimmed.length === 0)
608
+ continue;
609
+ let parsed;
610
+ try {
611
+ parsed = JSON.parse(trimmed);
612
+ }
613
+ catch {
614
+ continue;
615
+ }
616
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed))
617
+ continue;
618
+ const obj = parsed;
619
+ if (obj.event !== "cclaw_integration_overseer_skipped")
620
+ continue;
621
+ if (typeof obj.runId === "string" && obj.runId !== runId)
622
+ continue;
623
+ count += 1;
624
+ }
625
+ return count;
626
+ }
503
627
  async function listSliceFiles(slicesDir) {
504
628
  let entries = [];
505
629
  try {
@@ -632,19 +756,43 @@ export function evaluateEventsSliceCycle(slices) {
632
756
  });
633
757
  continue;
634
758
  }
635
- if (refactors.length === 0) {
759
+ // v6.14.0 refactorOutcome on phase=green satisfies REFACTOR coverage
760
+ // without a separate phase=refactor / phase=refactor-deferred row.
761
+ // - mode: "inline" → REFACTOR ran inline as part of GREEN.
762
+ // - mode: "deferred" → rationale required (carried in evidenceRefs[0]
763
+ // by the hook helper so legacy linters keep working).
764
+ const greenWithOutcome = greens.find((entry) => entry.refactorOutcome &&
765
+ (entry.refactorOutcome.mode === "inline" || entry.refactorOutcome.mode === "deferred"));
766
+ if (refactors.length === 0 && !greenWithOutcome) {
636
767
  errors.push(`${sliceId}: phase=refactor or phase=refactor-deferred event missing.`);
637
768
  findings.push({
638
769
  section: `tdd_slice_refactor_missing:${sliceId}`,
639
770
  required: true,
640
- rule: "Each TDD slice must close with a `phase=refactor` event or a `phase=refactor-deferred` event whose evidenceRefs / refactorRationale captures why refactor was deferred.",
771
+ rule: "Each TDD slice must close with a `phase=refactor` event, a `phase=refactor-deferred` event whose evidenceRefs / refactorRationale captures why refactor was deferred, OR a `phase=green` event carrying `refactorOutcome` (v6.14.0).",
641
772
  found: false,
642
- details: `${sliceId}: no phase=refactor or phase=refactor-deferred event.`
773
+ details: `${sliceId}: no phase=refactor / phase=refactor-deferred event and no refactorOutcome on phase=green.`
774
+ });
775
+ continue;
776
+ }
777
+ if (greenWithOutcome &&
778
+ greenWithOutcome.refactorOutcome?.mode === "deferred" &&
779
+ !greenWithOutcome.refactorOutcome.rationale &&
780
+ !(Array.isArray(greenWithOutcome.evidenceRefs) &&
781
+ greenWithOutcome.evidenceRefs.some((ref) => typeof ref === "string" && ref.trim().length > 0))) {
782
+ errors.push(`${sliceId}: phase=green refactorOutcome=deferred missing rationale.`);
783
+ findings.push({
784
+ section: `tdd_slice_refactor_missing:${sliceId}`,
785
+ required: true,
786
+ rule: "phase=green refactorOutcome=deferred requires a rationale (via --refactor-rationale or --evidence-ref).",
787
+ found: false,
788
+ details: `${sliceId}: phase=green refactorOutcome.mode=deferred recorded without rationale.`
643
789
  });
644
790
  continue;
645
791
  }
646
792
  const deferred = refactors.find((entry) => entry.phase === "refactor-deferred");
647
- if (deferred && refactors.every((entry) => entry.phase === "refactor-deferred")) {
793
+ if (refactors.length > 0 &&
794
+ deferred &&
795
+ refactors.every((entry) => entry.phase === "refactor-deferred")) {
648
796
  const refs = Array.isArray(deferred.evidenceRefs) ? deferred.evidenceRefs : [];
649
797
  const hasRationale = refs.some((ref) => typeof ref === "string" && ref.trim().length > 0);
650
798
  if (!hasRationale) {
@@ -814,20 +962,27 @@ export async function evaluateWavePlanDispatchIgnored(params) {
814
962
  return null;
815
963
  }
816
964
  /**
817
- * v6.12.0 Phase W — RED checkpoint enforcement. The wave protocol
818
- * requires ALL Phase A REDs to land before ANY Phase B GREEN starts.
819
- * The rule is enforced on a per-wave basis, where a wave is defined by
820
- * the managed `## Parallel Execution Plan` block in `05-plan.md` and/or
821
- * `<artifacts-dir>/wave-plans/wave-NN.md` files. When no wave manifest
822
- * exists, the linter falls back to a conservative implicit detection: a
823
- * wave is a contiguous run of `phase=red` events with no other-phase
824
- * events between them; the rule fires only when the implicit wave has
825
- * 2+ members.
965
+ * v6.12.0 Phase W (legacy `global-red` mode) — RED checkpoint enforcement.
966
+ * The wave protocol requires ALL Phase A REDs to land before ANY Phase B
967
+ * GREEN starts. The rule is enforced on a per-wave basis, where a wave is
968
+ * defined by the managed `## Parallel Execution Plan` block in
969
+ * `05-plan.md` and/or `<artifacts-dir>/wave-plans/wave-NN.md` files. When
970
+ * no wave manifest exists, the linter falls back to a conservative
971
+ * implicit detection: a wave is a contiguous run of `phase=red` events
972
+ * with no other-phase events between them; the rule fires only when the
973
+ * implicit wave has 2+ members.
974
+ *
975
+ * v6.14.0: this function powers the `global-red` checkpoint mode. New
976
+ * projects default to `per-slice` mode (see
977
+ * `evaluatePerSliceRedBeforeGreen`); `legacyContinuation: true` projects
978
+ * auto-keep this behavior. Exported under both `evaluateGlobalRedCheckpoint`
979
+ * (canonical name) and `evaluateRedCheckpoint` (back-compat alias for
980
+ * existing tests/consumers).
826
981
  *
827
982
  * @param waveMembers Optional explicit wave manifest. Map key is wave
828
983
  * name (e.g. `"W-01"`); value is the set of slice ids in that wave.
829
984
  */
830
- export function evaluateRedCheckpoint(slices, waveMembers = null) {
985
+ export function evaluateGlobalRedCheckpoint(slices, waveMembers = null) {
831
986
  const events = [];
832
987
  for (const [sliceId, rows] of slices.entries()) {
833
988
  for (const entry of rows) {
@@ -903,6 +1058,63 @@ export function evaluateRedCheckpoint(slices, waveMembers = null) {
903
1058
  "Dispatch ALL Phase A test-author --phase red calls in one message, verify every phase=red event lands with non-empty evidenceRefs, and only then dispatch Phase B slice-implementer --phase green + slice-documenter --phase doc fan-out."
904
1059
  };
905
1060
  }
1061
+ /**
1062
+ * Back-compat alias for `evaluateGlobalRedCheckpoint` (v6.12.0 Phase W
1063
+ * behavior). Existing tests/consumers can keep importing
1064
+ * `evaluateRedCheckpoint`. The v6.14.0 stream-style mode uses
1065
+ * `evaluatePerSliceRedBeforeGreen` instead.
1066
+ */
1067
+ export const evaluateRedCheckpoint = evaluateGlobalRedCheckpoint;
1068
+ /**
1069
+ * v6.14.0 — per-slice RED-before-GREEN enforcement (default mode).
1070
+ *
1071
+ * For each slice with both phase=red and phase=green completed events,
1072
+ * fail if any green completedTs precedes the slice's last red completedTs.
1073
+ * No global wave barrier — different slices may freely interleave their
1074
+ * RED/GREEN/REFACTOR phases.
1075
+ *
1076
+ * Note: this is intentionally weaker than `evaluateGlobalRedCheckpoint`
1077
+ * because the W-02 measurement on hox showed ~6 minutes of barrier
1078
+ * overhead when slices were already disjoint (file-overlap scheduler did
1079
+ * the parallelism job). The per-slice rule retains the only invariant
1080
+ * that mattered for correctness: no slice goes GREEN before its own
1081
+ * RED is observed failing.
1082
+ */
1083
+ export function evaluatePerSliceRedBeforeGreen(slices) {
1084
+ const violations = [];
1085
+ for (const [sliceId, rows] of slices.entries()) {
1086
+ const reds = rows.filter((entry) => entry.phase === "red");
1087
+ const greens = rows.filter((entry) => entry.phase === "green");
1088
+ if (reds.length === 0 || greens.length === 0)
1089
+ continue;
1090
+ const redTs = reds
1091
+ .map((entry) => entry.completedTs ?? entry.endTs ?? entry.ts ?? "")
1092
+ .filter((ts) => ts.length > 0)
1093
+ .sort();
1094
+ const greenTs = greens
1095
+ .map((entry) => entry.completedTs ?? entry.endTs ?? entry.ts ?? "")
1096
+ .filter((ts) => ts.length > 0)
1097
+ .sort();
1098
+ if (redTs.length === 0 || greenTs.length === 0)
1099
+ continue;
1100
+ const lastRed = redTs[redTs.length - 1];
1101
+ const earliestGreen = greenTs[0];
1102
+ if (earliestGreen < lastRed) {
1103
+ violations.push(`${sliceId}: phase=green completedTs (${earliestGreen}) precedes the slice's last phase=red completedTs (${lastRed})`);
1104
+ }
1105
+ }
1106
+ if (violations.length === 0) {
1107
+ return {
1108
+ ok: true,
1109
+ details: `Per-slice RED-before-GREEN holds: ${slices.size} slice(s) checked.`
1110
+ };
1111
+ }
1112
+ return {
1113
+ ok: false,
1114
+ details: `Per-slice RED-before-GREEN violation: ${violations.join("; ")}. ` +
1115
+ "Stream-style TDD requires each slice's RED to land before its own GREEN, but cross-lane interleaving is allowed."
1116
+ };
1117
+ }
906
1118
  const LEGACY_PER_SLICE_SECTIONS = [
907
1119
  "Test Discovery",
908
1120
  "RED Evidence",
@@ -1,7 +1,7 @@
1
1
  import fs from "node:fs/promises";
2
2
  import path from "node:path";
3
3
  import { resolveArtifactPath as resolveStageArtifactPath } from "./artifact-paths.js";
4
- import { effectiveWorktreeExecutionMode } from "./flow-state.js";
4
+ import { effectiveIntegrationOverseerMode, effectiveTddCheckpointMode, effectiveWorktreeExecutionMode } from "./flow-state.js";
5
5
  import { exists } from "./fs-utils.js";
6
6
  import { stageSchema } from "./content/stage-schema.js";
7
7
  import { readFlowState } from "./run-persistence.js";
@@ -124,6 +124,8 @@ export async function lintArtifact(projectRoot, stage, track = "standard", optio
124
124
  let completedStageMetaForAudit;
125
125
  let legacyContinuation = false;
126
126
  let worktreeExecutionMode = "single-tree";
127
+ let tddCheckpointMode = "per-slice";
128
+ let integrationOverseerMode = "always";
127
129
  try {
128
130
  const flowState = await readFlowState(projectRoot);
129
131
  const hint = flowState.interactionHints?.[stage];
@@ -136,6 +138,8 @@ export async function lintArtifact(projectRoot, stage, track = "standard", optio
136
138
  completedStageMetaForAudit = flowState.completedStageMeta;
137
139
  legacyContinuation = flowState.legacyContinuation === true;
138
140
  worktreeExecutionMode = effectiveWorktreeExecutionMode(flowState);
141
+ tddCheckpointMode = effectiveTddCheckpointMode(flowState);
142
+ integrationOverseerMode = effectiveIntegrationOverseerMode(flowState);
139
143
  }
140
144
  catch {
141
145
  activeStageFlags = [];
@@ -146,6 +150,8 @@ export async function lintArtifact(projectRoot, stage, track = "standard", optio
146
150
  completedStageMetaForAudit = undefined;
147
151
  legacyContinuation = false;
148
152
  worktreeExecutionMode = "single-tree";
153
+ tddCheckpointMode = "per-slice";
154
+ integrationOverseerMode = "always";
149
155
  }
150
156
  for (const extra of options.extraStageFlags ?? []) {
151
157
  if (typeof extra === "string" && extra.length > 0 && !activeStageFlags.includes(extra)) {
@@ -283,7 +289,9 @@ export async function lintArtifact(projectRoot, stage, track = "standard", optio
283
289
  activeStageFlags,
284
290
  taskClass,
285
291
  legacyContinuation,
286
- worktreeExecutionMode
292
+ worktreeExecutionMode,
293
+ tddCheckpointMode,
294
+ integrationOverseerMode
287
295
  };
288
296
  switch (stage) {
289
297
  case "brainstorm":
@@ -52,6 +52,56 @@ Before doing substantive work, return an ACK object that the parent can record:
52
52
 
53
53
  Finish with the required return schema plus the same \`spanId\` and \`dispatchId\`. The parent must not claim isolated completion unless ACK/result proof matches the ledger/event span.`;
54
54
  }
55
+ /**
56
+ * v6.14.1 — TDD worker self-record contract. The parent records
57
+ * `scheduled` and `launched` rows BEFORE dispatching the Task; the
58
+ * worker is responsible for `acknowledged` (on entry) and `completed`
59
+ * (on exit). This contract restores the v6.13.1 discipline that
60
+ * v6.14.0 dropped — the controller-side fix in v6.14.1's TDD skill
61
+ * text is paired with this worker-side self-record helper template.
62
+ */
63
+ function tddWorkerSelfRecordContract(agentName) {
64
+ const isImplementer = agentName === "slice-implementer";
65
+ const refactorOutcomeFlag = isImplementer
66
+ ? " --refactor-outcome=inline|deferred [--refactor-rationale=\"<why>\"]"
67
+ : "";
68
+ const laneFlags = isImplementer
69
+ ? " [--claim-token=<t>] [--lane-id=<lane>] [--lease-until=<iso>]"
70
+ : "";
71
+ return `## TDD Worker Self-Record Contract (v6.14.1)
72
+
73
+ You are a TDD worker dispatched via \`Task\`. The parent already wrote your \`scheduled\` and \`launched\` ledger rows BEFORE invoking you. **Your responsibility is to self-record \`acknowledged\` on entry and \`completed\` on exit** by invoking \`.cclaw/hooks/delegation-record.mjs\` directly. Do NOT skip these — the controller depends on them, the linter validates them, and back-fill via \`--repair\` is reserved for recovery only.
74
+
75
+ **On entry — record acknowledgement (BEFORE doing work):**
76
+
77
+ \`\`\`bash
78
+ ACK_TS="$(date -u +%Y-%m-%dT%H:%M:%S.%3NZ 2>/dev/null || date -u +%Y-%m-%dT%H:%M:%SZ)"
79
+ node .cclaw/hooks/delegation-record.mjs \\
80
+ --stage=tdd --agent=${agentName} --mode=mandatory \\
81
+ --status=acknowledged \\
82
+ --span-id=<spanId from controller dispatch> \\
83
+ --dispatch-id=<dispatchId from controller dispatch> \\
84
+ --dispatch-surface=<surface from controller dispatch> \\
85
+ --agent-definition-path=.cclaw/agents/${agentName}.md \\
86
+ --ack-ts="$ACK_TS" \\
87
+ --json
88
+ \`\`\`
89
+
90
+ **On exit — record completion (AFTER work + verification):**
91
+
92
+ \`\`\`bash
93
+ COMPLETED_TS="$(date -u +%Y-%m-%dT%H:%M:%S.%3NZ 2>/dev/null || date -u +%Y-%m-%dT%H:%M:%SZ)"
94
+ node .cclaw/hooks/delegation-record.mjs \\
95
+ --stage=tdd --agent=${agentName} --mode=mandatory \\
96
+ --status=completed \\
97
+ --span-id=<same spanId> \\
98
+ --completed-ts="$COMPLETED_TS" \\
99
+ --evidence-ref="<test-path-or-artifact-ref>"${refactorOutcomeFlag}${laneFlags} \\
100
+ --json
101
+ \`\`\`
102
+
103
+ Reuse the same \`<spanId>\` and \`<dispatchId>\` across both rows. \`--ack-ts\` and \`--completed-ts\` must be monotonic on the span (\`startTs ≤ launchedTs ≤ ackTs ≤ completedTs\`); the helper rejects out-of-order writes with \`delegation_timestamp_non_monotonic\`. If the helper rejects with \`dispatch_active_span_collision\` against a stale span, surface the conflicting \`spanId\` to the parent — do NOT silently retry with \`--allow-parallel\`.`;
104
+ }
55
105
  function formatReturnSchema(schema) {
56
106
  const lines = [
57
107
  `- Status field: \`${schema.statusField}\``,
@@ -600,6 +650,18 @@ export const CCLAW_AGENTS = [
600
650
  ].join("\n")
601
651
  }
602
652
  ];
653
+ /**
654
+ * v6.14.1 — agents whose rendered `.cclaw/agents/<name>.md` file gets the
655
+ * TDD worker self-record helper template. These agents are the ones the
656
+ * controller dispatches via `Task` during a TDD wave; they are
657
+ * responsible for `acknowledged` and `completed` ledger writes.
658
+ */
659
+ const TDD_WORKER_SELF_RECORD_AGENTS = new Set([
660
+ "test-author",
661
+ "slice-implementer",
662
+ "slice-documenter",
663
+ "integration-overseer"
664
+ ]);
603
665
  import { stageDelegationSummary } from "./stage-schema.js";
604
666
  /**
605
667
  * Render a complete cclaw agent markdown file (YAML frontmatter + body).
@@ -627,6 +689,9 @@ export function agentMarkdown(agent) {
627
689
  ].join("\n");
628
690
  const relatedStages = agent.relatedStages.length > 0 ? agent.relatedStages.join(", ") : "(none)";
629
691
  const taskDelegation = defaultTaskDelegationSection(agent.name);
692
+ const tddWorkerSelfRecordSection = TDD_WORKER_SELF_RECORD_AGENTS.has(agent.name)
693
+ ? `\n${tddWorkerSelfRecordContract(agent.name)}\n`
694
+ : "";
630
695
  return `${frontmatter}
631
696
 
632
697
  # ${agent.name}
@@ -639,7 +704,7 @@ ${agent.body}
639
704
  - Related stages: ${relatedStages}
640
705
 
641
706
  ${workerAckContract()}
642
-
707
+ ${tddWorkerSelfRecordSection}
643
708
  ## Required Return Schema
644
709
 
645
710
  STRICT_RETURN_SCHEMA: return a structured object matching this contract before any narrative when delegated. Include \`spanId\`, \`dispatchId\` or \`workerRunId\`, \`dispatchSurface\`, \`agentDefinitionPath\`, and lifecycle timestamps when provided by the parent.