cclaw-cli 6.6.0 → 6.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/dist/artifact-linter/findings-dedup.d.ts +56 -0
  2. package/dist/artifact-linter/findings-dedup.js +232 -0
  3. package/dist/artifact-linter/plan.js +3 -2
  4. package/dist/artifact-linter/shared.d.ts +49 -0
  5. package/dist/artifact-linter/shared.js +35 -0
  6. package/dist/artifact-linter.d.ts +1 -1
  7. package/dist/artifact-linter.js +45 -3
  8. package/dist/content/hooks.js +241 -7
  9. package/dist/content/node-hooks.js +43 -0
  10. package/dist/content/skills-elicitation.js +3 -6
  11. package/dist/content/skills.js +3 -1
  12. package/dist/content/stages/brainstorm.js +4 -4
  13. package/dist/content/stages/scope.js +2 -2
  14. package/dist/content/templates.js +3 -2
  15. package/dist/delegation.d.ts +107 -0
  16. package/dist/delegation.js +223 -6
  17. package/dist/internal/advance-stage/advance.js +23 -1
  18. package/dist/internal/advance-stage/parsers.d.ts +8 -0
  19. package/dist/internal/advance-stage/parsers.js +7 -0
  20. package/dist/internal/advance-stage/proactive-delegation-trace.d.ts +3 -0
  21. package/dist/internal/advance-stage/proactive-delegation-trace.js +8 -1
  22. package/dist/internal/advance-stage/rewind.js +2 -2
  23. package/dist/internal/advance-stage/start-flow.js +4 -1
  24. package/dist/internal/advance-stage.js +41 -2
  25. package/dist/internal/flow-state-repair.d.ts +13 -0
  26. package/dist/internal/flow-state-repair.js +65 -0
  27. package/dist/internal/waiver-grant.d.ts +62 -0
  28. package/dist/internal/waiver-grant.js +294 -0
  29. package/dist/run-persistence.d.ts +70 -0
  30. package/dist/run-persistence.js +215 -3
  31. package/dist/runs.d.ts +1 -1
  32. package/dist/runs.js +1 -1
  33. package/dist/runtime/run-hook.mjs +43 -0
  34. package/package.json +1 -1
@@ -191,7 +191,7 @@ export function cancelRunScript() {
191
191
  return internalHelperScript("cancel-run", "cancel-run", "Usage: node " + RUNTIME_ROOT + "/hooks/cancel-run.mjs --reason=<text> [--disposition=<cancelled|abandoned>] [--name=<slug>]");
192
192
  }
193
193
  export function stageCompleteScript() {
194
- return internalHelperScript("stage-complete", "advance-stage", "Usage: node " + RUNTIME_ROOT + "/hooks/stage-complete.mjs <stage> [--passed=...] [--evidence-json=...] [--waive-delegation=...] [--waiver-reason=...] [--accept-proactive-waiver] [--accept-proactive-waiver-reason=\"<why safe>\"] [--skip-questions] [--json]", {
194
+ return internalHelperScript("stage-complete", "advance-stage", "Usage: node " + RUNTIME_ROOT + "/hooks/stage-complete.mjs <stage> [--passed=...] [--evidence-json=...] [--waive-delegation=...] [--waiver-reason=...] [--accept-proactive-waiver=<token>] [--accept-proactive-waiver-reason=\"<why safe>\"] [--skip-questions] [--json]", {
195
195
  positionalArgName: "stage",
196
196
  positionalArgRequired: true,
197
197
  defaultQuietEnvVar: "CCLAW_STAGE_COMPLETE_QUIET"
@@ -199,6 +199,7 @@ export function stageCompleteScript() {
199
199
  }
200
200
  export function delegationRecordScript() {
201
201
  return `#!/usr/bin/env node
202
+ import { createHash } from "node:crypto";
202
203
  import fs from "node:fs/promises";
203
204
  import path from "node:path";
204
205
  import process from "node:process";
@@ -210,6 +211,37 @@ const VALID_DISPATCH_SURFACES = ${JSON.stringify([...DELEGATION_DISPATCH_SURFACE
210
211
  const VALID_DISPATCH_SURFACES_SET = new Set(VALID_DISPATCH_SURFACES);
211
212
  const SURFACE_PATH_PREFIXES = ${JSON.stringify(DELEGATION_DISPATCH_SURFACE_PATH_PREFIXES)};
212
213
  const LEDGER_SCHEMA_VERSION = 3;
214
+ const FLOW_STATE_GUARD_REL_PATH = RUNTIME_ROOT + "/.flow-state.guard.json";
215
+
216
+ async function verifyFlowStateGuardInline(root) {
217
+ const statePath = path.join(root, RUNTIME_ROOT, "state", "flow-state.json");
218
+ const guardPath = path.join(root, FLOW_STATE_GUARD_REL_PATH);
219
+ let raw;
220
+ try {
221
+ raw = await fs.readFile(statePath, "utf8");
222
+ } catch {
223
+ return;
224
+ }
225
+ let guard;
226
+ try {
227
+ const guardRaw = await fs.readFile(guardPath, "utf8");
228
+ guard = JSON.parse(guardRaw);
229
+ } catch {
230
+ return;
231
+ }
232
+ if (!guard || typeof guard !== "object" || typeof guard.sha256 !== "string") return;
233
+ const actual = createHash("sha256").update(raw, "utf8").digest("hex");
234
+ if (actual === guard.sha256) return;
235
+ process.stderr.write(
236
+ "[cclaw] delegation-record: flow-state guard mismatch: " + (guard.runId || "unknown-run") + "\\n" +
237
+ "expected sha: " + guard.sha256 + "\\n" +
238
+ "actual sha: " + actual + "\\n" +
239
+ "last writer: " + (guard.writerSubsystem || "unknown") + "@" + (guard.writtenAt || "unknown") + "\\n" +
240
+ "do not edit flow-state.json by hand. To recover, run:\\n" +
241
+ " cclaw-cli internal flow-state-repair --reason \\"manual_edit_recovery\\"\\n"
242
+ );
243
+ process.exit(2);
244
+ }
213
245
 
214
246
  function parseArgs(argv) {
215
247
  const args = {};
@@ -294,7 +326,7 @@ function hasPriorAck(events, args, runId) {
294
326
  function usage() {
295
327
  process.stderr.write([
296
328
  "Usage:",
297
- " node .cclaw/hooks/delegation-record.mjs --stage=<stage> --agent=<agent> --mode=<mandatory|proactive> --status=<scheduled|launched|acknowledged|completed|failed|waived|stale> --span-id=<id> [--dispatch-id=<id>] [--worker-run-id=<id>] [--dispatch-surface=<surface>] [--agent-definition-path=<path>] [--ack-ts=<iso>] [--launched-ts=<iso>] [--completed-ts=<iso>] [--evidence-ref=<ref>] [--waiver-reason=<text>] [--json]",
329
+ " node .cclaw/hooks/delegation-record.mjs --stage=<stage> --agent=<agent> --mode=<mandatory|proactive> --status=<scheduled|launched|acknowledged|completed|failed|waived|stale> --span-id=<id> [--dispatch-id=<id>] [--worker-run-id=<id>] [--dispatch-surface=<surface>] [--agent-definition-path=<path>] [--ack-ts=<iso>] [--launched-ts=<iso>] [--completed-ts=<iso>] [--evidence-ref=<ref>] [--waiver-reason=<text>] [--supersede=<prevSpanId>] [--allow-parallel] [--json]",
298
330
  " node .cclaw/hooks/delegation-record.mjs --rerecord --span-id=<id> --dispatch-id=<id> --dispatch-surface=<surface> --agent-definition-path=<path> [--ack-ts=<iso>] [--completed-ts=<iso>] [--evidence-ref=<ref>] [--json]",
299
331
  " node .cclaw/hooks/delegation-record.mjs --repair --span-id=<id> --repair-reason=\"<why>\" [--json]",
300
332
  "",
@@ -303,6 +335,10 @@ function usage() {
303
335
  "",
304
336
  "Per-surface allowed --agent-definition-path prefixes:",
305
337
  ...VALID_DISPATCH_SURFACES.map((surface) => " " + surface + ": " + (SURFACE_PATH_PREFIXES[surface].length === 0 ? "(any)" : SURFACE_PATH_PREFIXES[surface].join(", "))),
338
+ "",
339
+ "Dispatch dedup (v6.8.0):",
340
+ " --supersede=<prevSpanId> close the previous active span on this (stage, agent) as 'stale' before recording the new scheduled row",
341
+ " --allow-parallel record both spans as concurrent; new row is tagged allowParallel: true",
306
342
  ""
307
343
  ].join("\\n") + "\\n");
308
344
  }
@@ -318,6 +354,51 @@ function emitProblems(problems, json, code) {
318
354
  process.exitCode = exitCode;
319
355
  }
320
356
 
357
+ function emitErrorJson(error, details, json) {
358
+ if (json) {
359
+ process.stdout.write(JSON.stringify({ ok: false, error, details }, null, 2) + "\\n");
360
+ } else {
361
+ process.stderr.write("[cclaw] delegation-record: error: " + error + " — " + JSON.stringify(details) + "\\n");
362
+ }
363
+ process.exit(2);
364
+ }
365
+
366
+ // keep in sync with validateMonotonicTimestamps in src/delegation.ts
367
+ function validateMonotonicTimestampsInline(stamped, prior) {
368
+ const startTs = stamped.startTs;
369
+ if (stamped.launchedTs && startTs && stamped.launchedTs < startTs) {
370
+ return { field: "launchedTs", actual: stamped.launchedTs, bound: startTs };
371
+ }
372
+ if (stamped.ackTs) {
373
+ const ackBound = stamped.launchedTs || startTs;
374
+ if (ackBound && stamped.ackTs < ackBound) {
375
+ return { field: "ackTs", actual: stamped.ackTs, bound: ackBound };
376
+ }
377
+ }
378
+ if (stamped.completedTs) {
379
+ const completedBound = stamped.ackTs || stamped.launchedTs || startTs;
380
+ if (completedBound && stamped.completedTs < completedBound) {
381
+ return { field: "completedTs", actual: stamped.completedTs, bound: completedBound };
382
+ }
383
+ }
384
+ if (!stamped.spanId) return null;
385
+ const priorForSpan = (prior || []).filter((entry) => entry && entry.spanId === stamped.spanId);
386
+ if (priorForSpan.length === 0) return null;
387
+ const tsValues = priorForSpan
388
+ .map((entry) => entry.ts || entry.startTs || "")
389
+ .filter((ts) => ts.length > 0);
390
+ if (tsValues.length === 0) return null;
391
+ let latest = tsValues[0];
392
+ for (let i = 1; i < tsValues.length; i += 1) {
393
+ if (tsValues[i] > latest) latest = tsValues[i];
394
+ }
395
+ const stampedTs = stamped.ts || stamped.startTs || "";
396
+ if (stampedTs && stampedTs < latest) {
397
+ return { field: "ts", actual: stampedTs, bound: latest };
398
+ }
399
+ return null;
400
+ }
401
+
321
402
  function normalizeRelPath(value) {
322
403
  return String(value || "").replace(/\\\\/gu, "/").replace(/^\\.\\//u, "");
323
404
  }
@@ -350,12 +431,15 @@ function normalizeEvidenceRefs(args) {
350
431
  return [];
351
432
  }
352
433
 
353
- function buildRow(args, status, runId, now) {
434
+ function buildRow(args, status, runId, now, options) {
354
435
  const fulfillmentMode = args["dispatch-surface"] === "role-switch"
355
436
  ? "role-switch"
356
437
  : args["dispatch-surface"] === "cursor-task" || args["dispatch-surface"] === "generic-task"
357
438
  ? "generic-dispatch"
358
439
  : "isolated";
440
+ // Inherit the span's startTs from prior rows so monotonic validation
441
+ // can compare against the original schedule, not the row write time.
442
+ const startTs = (options && options.spanStartTs) || now;
359
443
  return {
360
444
  stage: args.stage,
361
445
  agent: args.agent,
@@ -370,13 +454,83 @@ function buildRow(args, status, runId, now) {
370
454
  waiverReason: args["waiver-reason"],
371
455
  evidenceRefs: normalizeEvidenceRefs(args),
372
456
  runId,
373
- startTs: now,
457
+ startTs,
374
458
  ts: now,
375
459
  launchedTs: args["launched-ts"] || (status === "launched" ? now : undefined),
376
460
  ackTs: args["ack-ts"] || (status === "acknowledged" ? now : undefined),
377
461
  completedTs: args["completed-ts"] || (status === "completed" ? now : undefined),
378
462
  endTs: TERMINAL.has(status) ? now : undefined,
379
- schemaVersion: LEDGER_SCHEMA_VERSION
463
+ schemaVersion: LEDGER_SCHEMA_VERSION,
464
+ allowParallel: args["allow-parallel"] === true ? true : undefined
465
+ };
466
+ }
467
+
468
+ async function readDelegationLedgerEntries(root) {
469
+ try {
470
+ const raw = await fs.readFile(path.join(root, RUNTIME_ROOT, "state", "delegation-log.json"), "utf8");
471
+ const parsed = JSON.parse(raw);
472
+ if (parsed && Array.isArray(parsed.entries)) return parsed.entries;
473
+ } catch {
474
+ // empty / missing ledger is fine for dedup + monotonicity checks
475
+ }
476
+ return [];
477
+ }
478
+
479
+ // keep in sync with findActiveSpanForPair / DispatchDuplicateError in src/delegation.ts
480
+ function findActiveSpanForPairInline(stage, agent, runId, entries) {
481
+ const ACTIVE_STATUSES = new Set(["scheduled", "launched", "acknowledged"]);
482
+ const effectiveTs = (entry) =>
483
+ entry.completedTs || entry.ackTs || entry.launchedTs || entry.endTs || entry.startTs || entry.ts || "";
484
+ const latestBySpan = new Map();
485
+ for (const entry of entries) {
486
+ if (!entry || typeof entry !== "object") continue;
487
+ if (typeof entry.spanId !== "string" || entry.spanId.length === 0) continue;
488
+ if (entry.runId && entry.runId !== runId) continue;
489
+ if (entry.stage !== stage || entry.agent !== agent) continue;
490
+ const existing = latestBySpan.get(entry.spanId);
491
+ if (!existing || effectiveTs(entry) >= effectiveTs(existing)) {
492
+ latestBySpan.set(entry.spanId, entry);
493
+ }
494
+ }
495
+ for (const entry of latestBySpan.values()) {
496
+ if (ACTIVE_STATUSES.has(entry.status)) return entry;
497
+ }
498
+ return null;
499
+ }
500
+
501
+ function enforceDispatchDedupInline(stamped, priorEntries, args) {
502
+ if (stamped.status !== "scheduled") return null;
503
+ if (args["allow-parallel"] === true) return null;
504
+ const existing = findActiveSpanForPairInline(
505
+ stamped.stage,
506
+ stamped.agent,
507
+ stamped.runId,
508
+ priorEntries
509
+ );
510
+ if (!existing || existing.spanId === stamped.spanId) return null;
511
+ if (typeof args.supersede === "string" && args.supersede.length > 0) {
512
+ if (args.supersede !== existing.spanId) {
513
+ return {
514
+ kind: "supersede-mismatch",
515
+ details: {
516
+ requested: args.supersede,
517
+ actualActiveSpanId: existing.spanId,
518
+ stage: stamped.stage,
519
+ agent: stamped.agent
520
+ }
521
+ };
522
+ }
523
+ return { kind: "supersede", existing };
524
+ }
525
+ return {
526
+ kind: "error",
527
+ details: {
528
+ existingSpanId: existing.spanId,
529
+ existingStatus: existing.status,
530
+ newSpanId: stamped.spanId,
531
+ pair: { stage: stamped.stage, agent: stamped.agent },
532
+ hint: "pass --supersede=" + existing.spanId + " to close the previous span as stale, or --allow-parallel to record both as concurrent"
533
+ }
380
534
  };
381
535
  }
382
536
 
@@ -458,7 +612,32 @@ async function persistEntry(root, runId, clean, event, options = {}) {
458
612
  await releaseDelegationLogLock(lockDir);
459
613
  }
460
614
 
461
- const active = ledger.entries.filter((entry) => ["scheduled", "launched", "acknowledged"].includes(entry.status));
615
+ // keep in sync with computeActiveSubagents in src/delegation.ts
616
+ const ACTIVE_STATUSES = new Set(["scheduled", "launched", "acknowledged"]);
617
+ const effectiveTs = (entry) =>
618
+ entry.completedTs || entry.ackTs || entry.launchedTs || entry.endTs || entry.startTs || entry.ts || "";
619
+ const latestBySpan = new Map();
620
+ for (const entry of ledger.entries) {
621
+ if (!entry || typeof entry !== "object" || typeof entry.spanId !== "string" || entry.spanId.length === 0) continue;
622
+ const existing = latestBySpan.get(entry.spanId);
623
+ if (!existing) {
624
+ latestBySpan.set(entry.spanId, entry);
625
+ continue;
626
+ }
627
+ if (effectiveTs(entry) >= effectiveTs(existing)) {
628
+ latestBySpan.set(entry.spanId, entry);
629
+ }
630
+ }
631
+ const active = [];
632
+ for (const entry of latestBySpan.values()) {
633
+ if (ACTIVE_STATUSES.has(entry.status)) active.push(entry);
634
+ }
635
+ active.sort((a, b) => {
636
+ const aKey = a.startTs || a.ts || "";
637
+ const bKey = b.startTs || b.ts || "";
638
+ if (aKey === bKey) return 0;
639
+ return aKey < bKey ? -1 : 1;
640
+ });
462
641
  await fs.writeFile(path.join(stateDir, "subagents.json"), JSON.stringify({ active, updatedAt: event.eventTs }, null, 2) + "\\n", { encoding: "utf8", mode: 0o600 });
463
642
  }
464
643
 
@@ -693,6 +872,9 @@ async function main() {
693
872
  const args = parseArgs(process.argv.slice(2));
694
873
  const json = args.json !== undefined;
695
874
 
875
+ const guardRoot = await detectRoot();
876
+ await verifyFlowStateGuardInline(guardRoot);
877
+
696
878
  if (args.repair) {
697
879
  await runRepair(args, json);
698
880
  return;
@@ -779,9 +961,61 @@ async function main() {
779
961
  }
780
962
 
781
963
  const status = args.status;
782
- const row = buildRow(args, status, runId, now);
964
+ const priorLedger = await readDelegationLedgerEntries(root);
965
+ const priorForSpan = priorLedger.filter((e) => e && e.spanId === args["span-id"]);
966
+ const inheritedStartTs = priorForSpan
967
+ .map((e) => e.startTs)
968
+ .filter((ts) => typeof ts === "string" && ts.length > 0)
969
+ .sort()[0];
970
+ // When no prior row exists, fall back to the earliest user-supplied
971
+ // event timestamp so the monotonic validator never sees the row write
972
+ // time overshoot the real event timestamps.
973
+ const lifecycleCandidates = [
974
+ inheritedStartTs,
975
+ args["launched-ts"],
976
+ args["ack-ts"],
977
+ args["completed-ts"],
978
+ now
979
+ ].filter((value) => typeof value === "string" && value.length > 0);
980
+ const spanStartTs = inheritedStartTs ||
981
+ lifecycleCandidates.reduce((min, candidate) => (candidate < min ? candidate : min), now);
982
+ const row = buildRow(args, status, runId, now, { spanStartTs });
783
983
  const clean = Object.fromEntries(Object.entries(row).filter(([, value]) => value !== undefined));
784
984
  const event = { ...clean, event: status, eventTs: now };
985
+
986
+ const violation = validateMonotonicTimestampsInline(clean, priorLedger);
987
+ if (violation) {
988
+ emitErrorJson("delegation_timestamp_non_monotonic", violation, json);
989
+ return;
990
+ }
991
+ const dedupViolation = enforceDispatchDedupInline(clean, priorLedger, args);
992
+ if (dedupViolation) {
993
+ if (dedupViolation.kind === "supersede") {
994
+ const stalenessTs = new Date(new Date(now).getTime() - 1).toISOString();
995
+ const staleRow = {
996
+ stage: dedupViolation.existing.stage,
997
+ agent: dedupViolation.existing.agent,
998
+ mode: dedupViolation.existing.mode,
999
+ status: "stale",
1000
+ spanId: dedupViolation.existing.spanId,
1001
+ runId,
1002
+ startTs: dedupViolation.existing.startTs || stalenessTs,
1003
+ ts: stalenessTs,
1004
+ endTs: stalenessTs,
1005
+ supersededBy: clean.spanId,
1006
+ schemaVersion: LEDGER_SCHEMA_VERSION
1007
+ };
1008
+ const staleEvent = { ...staleRow, event: "stale", eventTs: stalenessTs };
1009
+ await persistEntry(root, runId, staleRow, staleEvent);
1010
+ } else if (dedupViolation.kind === "error") {
1011
+ emitErrorJson("dispatch_duplicate", dedupViolation.details, json);
1012
+ return;
1013
+ } else if (dedupViolation.kind === "supersede-mismatch") {
1014
+ emitErrorJson("dispatch_supersede_mismatch", dedupViolation.details, json);
1015
+ return;
1016
+ }
1017
+ }
1018
+
785
1019
  await persistEntry(root, runId, clean, event);
786
1020
  process.stdout.write(JSON.stringify({ ok: true, event }, null, 2) + "\\n");
787
1021
  }
@@ -49,12 +49,14 @@ export function nodeHookRuntimeScript(options = {}) {
49
49
  const defaultDisabledHooks = [];
50
50
  const cliRuntime = resolveCliRuntimeForGeneratedHook();
51
51
  return `#!/usr/bin/env node
52
+ import { createHash } from "node:crypto";
52
53
  import fs from "node:fs/promises";
53
54
  import path from "node:path";
54
55
  import process from "node:process";
55
56
  import { spawn } from "node:child_process";
56
57
 
57
58
  const RUNTIME_ROOT = ${JSON.stringify(RUNTIME_ROOT)};
59
+ const FLOW_STATE_GUARD_REL_PATH = RUNTIME_ROOT + "/.flow-state.guard.json";
58
60
  // Single strictness default, derived from config.strictness at install time.
59
61
  // \`CCLAW_STRICTNESS\` env var overrides for the current process. All guards
60
62
  // (prompt, workflow, TDD, iron-laws) route through \`resolveStrictness()\`.
@@ -1017,6 +1019,40 @@ function extractCodePathsFromText(value) {
1017
1019
  return out;
1018
1020
  }
1019
1021
 
1022
+ async function verifyFlowStateGuardInline(root, hookName) {
1023
+ const statePath = path.join(root, RUNTIME_ROOT, "state", "flow-state.json");
1024
+ const guardPath = path.join(root, FLOW_STATE_GUARD_REL_PATH);
1025
+ let raw;
1026
+ try {
1027
+ raw = await fs.readFile(statePath, "utf8");
1028
+ } catch {
1029
+ return true;
1030
+ }
1031
+ let guard;
1032
+ try {
1033
+ const guardRaw = await fs.readFile(guardPath, "utf8");
1034
+ guard = JSON.parse(guardRaw);
1035
+ } catch {
1036
+ return true;
1037
+ }
1038
+ if (!guard || typeof guard !== "object" || typeof guard.sha256 !== "string") {
1039
+ return true;
1040
+ }
1041
+ const actual = createHash("sha256").update(raw, "utf8").digest("hex");
1042
+ if (actual === guard.sha256) return true;
1043
+ const hookLabel = typeof hookName === "string" && hookName.length > 0 ? hookName : "hook";
1044
+ process.stderr.write(
1045
+ "[cclaw] " + hookLabel + ": flow-state guard mismatch: " + (guard.runId || "unknown-run") + "\\n" +
1046
+ "expected sha: " + guard.sha256 + "\\n" +
1047
+ "actual sha: " + actual + "\\n" +
1048
+ "last writer: " + (guard.writerSubsystem || "unknown") + "@" + (guard.writtenAt || "unknown") + "\\n" +
1049
+ "do not edit flow-state.json by hand. To recover, run:\\n" +
1050
+ " cclaw-cli internal flow-state-repair --reason \\"manual_edit_recovery\\"\\n"
1051
+ );
1052
+ await recordHookError(root, hookLabel, "flow-state guard mismatch actual=" + actual + " expected=" + guard.sha256).catch(() => undefined);
1053
+ return false;
1054
+ }
1055
+
1020
1056
  async function readFlowState(root) {
1021
1057
  const statePath = path.join(root, RUNTIME_ROOT, "state", "flow-state.json");
1022
1058
  // Loud-on-corrupt: if flow-state.json exists but fails JSON.parse, log
@@ -2110,6 +2146,13 @@ async function main() {
2110
2146
  };
2111
2147
 
2112
2148
  try {
2149
+ if (hookName === "session-start" || hookName === "stop-handoff") {
2150
+ const guardOk = await verifyFlowStateGuardInline(runtime.root, hookName);
2151
+ if (!guardOk) {
2152
+ process.exitCode = 2;
2153
+ return;
2154
+ }
2155
+ }
2113
2156
  if (hookName === "session-start") {
2114
2157
  process.exitCode = await handleSessionStart(runtime);
2115
2158
  return;
@@ -29,7 +29,7 @@ Pinned anchor: "Don't tell it what to do, give it success criteria and watch it
29
29
  These behaviors are the exact reason this skill exists. The linter will block your stage-complete if you do them.
30
30
 
31
31
  - **Bad**: User asks for a "simple web app" -> agent asks 1 question about stack -> 1 question about auth -> drafts the brainstorm artifact and asks for approval.
32
- - **Good**: User asks for a "simple web app" -> agent asks Q1 (what pain) -> Q2 (direct path) -> Q3 (do-nothing cost) -> Q4 (first operator/user) -> Q5 (no-go boundaries) -> self-eval: clear -> drafts the brainstorm artifact.
32
+ - **Good**: User asks for a "simple web app" -> agent asks Q1 (what pain) -> Q2 (direct path) -> Q3 (first operator/user) -> Q4 (no-go boundaries) -> self-eval: clear -> drafts the brainstorm artifact.
33
33
 
34
34
  - **Bad**: Agent immediately dispatches a subagent (\`product-discovery\`, \`critic\`, \`planner\`) at the start of brainstorm/scope/design to "gather context" before any user dialogue.
35
35
  - **Good**: Agent walks the Q&A loop with the user first; subagent dispatch happens only after the user approves the elicitation outcome.
@@ -121,7 +121,7 @@ Default mapping note: \`lean\` maps to a lightweight specialist tier on early st
121
121
 
122
122
  ### Topic tagging (MANDATORY for forcing-question rows)
123
123
 
124
- Each forcing question has a stable topic id (kebab-case ASCII, e.g. \`pain\`, \`do-nothing\`, \`data-flow\`). Tag the matching Q&A Log row's \`Decision impact\` cell with \`[topic:<id>]\` so the linter can verify coverage in any natural language. This is a **HARD requirement** in Wave 24 (v6.0.0): the linter no longer keyword-matches English question prose, so an un-tagged row does NOT count toward coverage even if the answer fully addresses the topic.
124
+ Each forcing question has a stable topic id (kebab-case ASCII, e.g. \`pain\`, \`direct-path\`, \`data-flow\`). Tag the matching Q&A Log row's \`Decision impact\` cell with \`[topic:<id>]\` so the linter can verify coverage in any natural language. This is a **HARD requirement** in Wave 24 (v6.0.0): the linter no longer keyword-matches English question prose, so an un-tagged row does NOT count toward coverage even if the answer fully addresses the topic.
125
125
 
126
126
  RU example (after asking \`pain\` in Russian):
127
127
 
@@ -131,21 +131,18 @@ RU example (after asking \`pain\` in Russian):
131
131
  | 1 | Какую боль мы решаем? | Регистрация занимает 30 минут. | scope-shaping [topic:pain] |
132
132
  \`\`\`
133
133
 
134
- Multiple tags in one row are allowed when one answer covers several topics: \`[topic:pain] [topic:do-nothing]\`. Stop-signal rows do NOT need a tag.
134
+ Multiple tags in one row are allowed when one answer covers several topics: \`[topic:pain] [topic:direct-path]\`. Stop-signal rows do NOT need a tag.
135
135
 
136
136
  Stage forcing question lists (id → topic):
137
137
 
138
138
  - **Brainstorm**:
139
139
  - \`pain\` — What pain are we solving?
140
140
  - \`direct-path\` — What is the most direct path?
141
- - \`do-nothing\` — What happens if we do nothing?
142
141
  - \`operator\` — Who is the operator/user impacted first?
143
142
  - \`no-go\` — What are non-negotiable no-go boundaries?
144
143
  - **Scope**:
145
144
  - \`in-out\` — What is definitely in and definitely out?
146
145
  - \`locked-upstream\` — Which decisions are already locked upstream?
147
- - \`rollback\` — What is the rollback path if this fails?
148
- - \`failure-modes\` — What are the top failure modes we must design for?
149
146
  - **Design**:
150
147
  - \`data-flow\` — What is the data flow end-to-end?
151
148
  - \`seams\` — Where are the seams/interfaces and ownership boundaries?
@@ -236,6 +236,8 @@ ${rows}
236
236
  Mandatory: ${mandatoryList}. Record lifecycle rows in \`${delegationLogRel}\` and append-only \`${delegationEventsRel}\` before completion.${runPhaseLegend}
237
237
  ### Harness Dispatch Contract — use true harness dispatch: Claude Task, Cursor generic dispatch, OpenCode \`.opencode/agents/<agent>.md\` via Task/@agent, Codex \`.codex/agents/<agent>.toml\`. Do not collapse OpenCode or Codex to role-switch by default. Worker ACK Contract: ACK must include \`spanId\`, \`dispatchId\`, \`dispatchSurface\`, \`agentDefinitionPath\`, and \`ackTs\`; never claim \`fulfillmentMode: "isolated"\` without matching lifecycle proof. Canonical helper (same flags as \`delegation-record.mjs --help\`): \`node .cclaw/hooks/delegation-record.mjs --stage=<stage> --agent=<agent> --mode=<mandatory|proactive> --status=<scheduled|launched|acknowledged|completed|...> --span-id=<id> --dispatch-id=<id> --dispatch-surface=<surface> --agent-definition-path=<path> [--ack-ts=<iso>] [--evidence-ref=<ref>] --json\`. Lifecycle order: \`scheduled → launched → acknowledged → completed\` on one span (reuse the same span id); completed isolated/generic rows require a prior ACK event for that span or \`--ack-ts=<iso>\`. For a partial audit trail, \`--repair --span-id=<id> --repair-reason="<why>"\` appends missing phases (see \`--help\`) instead of inventing shortcuts.
238
238
 
239
+ If you must re-dispatch the same agent in the same stage before the previous span has a terminal row, pass \`--supersede=<prevSpanId>\` (closes the previous span as \`stale\` with \`supersededBy=<newSpanId>\`) or \`--allow-parallel\` (records both spans as concurrently active and tags the new row with \`allowParallel: true\`). Without one of those flags, a duplicate scheduled write on the same \`(stage, agent)\` pair fails with \`exit 2\` and \`{ ok: false, error: "dispatch_duplicate" }\`. Lifecycle timestamps are also validated: \`startTs ≤ launchedTs ≤ ackTs ≤ completedTs\` and per-span \`ts\` is non-decreasing — non-monotonic values fail with \`exit 2\` and \`{ ok: false, error: "delegation_timestamp_non_monotonic" }\`.
240
+
239
241
  ${perHarnessLifecycleRecipeBlock()}`;
240
242
  }
241
243
  function perHarnessLifecycleRecipeBlock() {
@@ -430,7 +432,7 @@ function completionParametersBlock(schema, track) {
430
432
  - \`delegation lifecycle proof\`: use the delegation helper recipe in this section with explicit lifecycle rows: \`--status=scheduled\` -> \`--status=launched\` -> \`--status=acknowledged\` -> \`--status=completed\` (completed isolated/generic requires prior ACK for the same span or \`--ack-ts=<iso>\`).
431
433
  - Fill \`## Learnings\` before closeout: either \`- None this stage.\` or JSON bullets with required keys \`type\`, \`trigger\`, \`action\`, \`confidence\` (knowledge-schema compatible).
432
434
  - If you edit any completed-stage artifact after it shipped (\`completedStageMeta\` timestamps exist), append a short \`## Amendments\` section with dated bullets (timestamp + reason) instead of overwriting the archived narrative silently — advisory linter rule \`stage_artifact_post_closure_mutation\` enforces visibility when this trail is missing.
433
- - Record mandatory delegation lifecycle in \`${RUNTIME_ROOT}/state/delegation-log.json\` and append proof events to \`${RUNTIME_ROOT}/state/delegation-events.jsonl\`; the ledger is current state, the event log is audit proof.${mandatoryAgents.length > 0 ? ` If a mandatory delegation cannot run in this harness, use \`--waive-delegation=${mandatoryAgents.join(",")} --waiver-reason="<why safe>"\` on the completion helper.` : ""} If proactive delegations were intentionally skipped, rerun only with \`--accept-proactive-waiver\` (optionally \`--accept-proactive-waiver-reason="<why safe>"\`) after explicit user approval.
435
+ - Record mandatory delegation lifecycle in \`${RUNTIME_ROOT}/state/delegation-log.json\` and append proof events to \`${RUNTIME_ROOT}/state/delegation-events.jsonl\`; the ledger is current state, the event log is audit proof.${mandatoryAgents.length > 0 ? ` If a mandatory delegation cannot run in this harness, use \`--waive-delegation=${mandatoryAgents.join(",")} --waiver-reason="<why safe>"\` on the completion helper.` : ""} If proactive delegations were intentionally skipped, first issue a short-lived waiver token with \`cclaw-cli internal waiver-grant --stage <stage> --reason "<short-slug>"\`, then rerun the completion helper with \`--accept-proactive-waiver=<token> --accept-proactive-waiver-reason="<why safe>"\` after explicit user approval. Tokens expire in 30 minutes and are single-use; bare \`--accept-proactive-waiver\` is no longer accepted.
434
436
  - Never edit raw \`flow-state.json\` to complete a stage, even in advisory mode; that bypasses validation, gate evidence, and Learnings harvest. If a helper fails, report a one-line human-readable failure plus fenced JSON diagnostics; never echo the invoking command line or apply a manual state workaround.
435
437
  - Stage completion claim requires \`stage-complete\` exit 0 in the current turn. Quote the single-line success JSON exactly as printed to stdout (for example \`{"ok":true,"command":"stage-complete",...}\` including \`completedStages\` / \`currentStage\` / \`runId\`); do not paraphrase. Do not infer success from empty stdout or from skipped retries (quiet mode always emits one JSON line on success).
436
438
  - Completion protocol: verify required gates, update the artifact, then use the completion helper with \`--evidence-json\` and \`--passed\` for every satisfied gate.
@@ -38,10 +38,10 @@ export const BRAINSTORM = {
38
38
  checklist: [
39
39
  "**ADAPTIVE ELICITATION COMES FIRST (no exceptions, no subagent dispatch before).** Load `.cclaw/skills/adaptive-elicitation/SKILL.md`. Walk the brainstorm forcing questions one-at-a-time via the harness-native question tool, append one row to `## Q&A Log` (`Turn | Question | User answer (1-line) | Decision impact`) after each user answer **and stamp the row's `Decision impact` cell with the matching `[topic:<id>]` tag** (e.g. `[topic:pain]`). Continue until every forcing-question topic id is tagged on a row OR Ralph-Loop convergence detector says no new decision-changing rows in last 2 iterations OR user records an explicit stop-signal row. Only then proceed to delegations, drafts, or analysis. The linter `qa_log_unconverged` rule will block `stage-complete` if convergence is not reached.",
40
40
  "**Explore project context** — after the elicitation loop converges, inspect existing files/docs/recent activity to refine the Discovered context section; capture matching files/patterns/seeds in `Context > Discovered context` so downstream stages don't redo discovery.",
41
- "**Brainstorm forcing questions (must be covered or explicitly waived)** — `pain: what pain are we solving`; `direct-path: what is the direct path`; `do-nothing: what happens if we do nothing`; `operator: who is the first operator/user affected`; `no-go: what no-go boundaries are non-negotiable`. Tag the matching `## Q&A Log` row's `Decision impact` cell with `[topic:<id>]` (e.g. `[topic:pain]`) so the linter can verify coverage in any natural language. Tags are MANDATORY for forcing-question rows; un-tagged rows do NOT count toward coverage.",
41
+ "**Brainstorm forcing questions (must be covered or explicitly waived)** — `pain: what pain are we solving`; `direct-path: what is the direct path`; `operator: who is the first operator/user affected`; `no-go: what no-go boundaries are non-negotiable`. Tag the matching `## Q&A Log` row's `Decision impact` cell with `[topic:<id>]` (e.g. `[topic:pain]`) so the linter can verify coverage in any natural language. Tags are MANDATORY for forcing-question rows; un-tagged rows do NOT count toward coverage. Round 6 (v6.7.0) removed the counterfactual `do-nothing` topic; the Problem Decision Record already captures `Do-nothing consequence`.",
42
42
  "**Discovery posture (flow-state `discoveryMode`)** — follow `lean` / `guided` / `deep` from the active run. Use lean for smallest safe discovery pass; guided as the default balanced pass; escalate to deep when ambiguity, architecture, external dependency, security/data risk, or explicit think-bigger requests warrant fuller option pressure and mandatory specialist coverage.",
43
43
  "**Write the Problem Decision Record** — pick a free-form `Frame type` label that names how this work is framed (examples: product, technical-maintenance, research-spike, ops-incident, infrastructure), then fill the universal Framing fields: affected user/role/operator, current state/failure mode/opportunity, desired observable outcome, evidence/signal, why now, do-nothing consequence, and non-goals.",
44
- "**Premise check (one pass)** — answer the three gstack-style questions in the artifact body: *Right problem? Direct path? What if we do nothing?* Take a position; do not hedge.",
44
+ "**Premise check (one pass)** — answer the two gstack-style questions in the artifact body: *Right problem? Direct path?* Take a position; do not hedge. Round 6 (v6.7.0): the counterfactual premise line was retired; Do-nothing consequence already lives in the Problem Decision Record.",
45
45
  "**Reframe with How Might We** — write a single `How Might We …?` line that names the user/operator, the desired outcome, and the constraint. This is the altitude check before approaches.",
46
46
  "**Run Clarity Gate** — record ambiguity score (0.00-1.00), decision boundaries, reaffirmed non-goals, and residual-risk handoff before locking recommendations. If ambiguity remains high (>0.40), ask one decision-changing question before recommending.",
47
47
  "**Sharpening question discipline** — ask one decision-changing question at a time. Do not default to 3-5 batched questions; record only questions that changed the direction or a critical stop decision.",
@@ -62,7 +62,7 @@ export const BRAINSTORM = {
62
62
  "\"If something is unclear, stop. Name what's confusing. Ask.\"",
63
63
  "Start from observed project context; if the idea is vague, first narrow the project type with **one** structured question, then keep going.",
64
64
  "Honor the run's `discoveryMode` (`lean` | `guided` | `deep`) from flow-state: lean stays fastest, guided is the default breadth, deep pulls in fuller critique and mandatory delegations when the run is classified that way.",
65
- "Lead with the premise check (right problem / direct path / what if nothing) and the `How Might We` reframing before approaches; both go in the artifact, not just the chat.",
65
+ "Lead with the premise check (right problem / direct path) and the `How Might We` reframing before approaches; both go in the artifact, not just the chat. Round 6 (v6.7.0) removed the counterfactual premise line; Do-nothing consequence still lives in the Problem Decision Record.",
66
66
  "Ask at most one question per turn, only when decision-changing; if using a structured question tool, send exactly one question object, not a multi-question form.",
67
67
  "Run the shared adaptive elicitation cycle from `.cclaw/skills/adaptive-elicitation/SKILL.md`, including stop-signal handling (RU/EN/UA), smart-skip, conditional grilling triggers, and append-only `## Q&A Log` updates.",
68
68
  "Only non-critical preference/default assumptions may continue inline. STOP and ask when uncertainty affects scope, architecture, security, data loss, public API, migration, auth/pricing, or user approval.",
@@ -142,7 +142,7 @@ export const BRAINSTORM = {
142
142
  artifactValidation: [
143
143
  { section: "Context", required: true, validationRule: "Must reference project state and relevant existing code or patterns. A `Discovered context` subsection (or list) is recommended for downstream traceability." },
144
144
  { section: "Problem Decision Record", required: true, validationRule: "Must include a free-form `Frame type` label (examples only: product, technical-maintenance, research-spike, ops-incident, infrastructure) and the universal Framing fields: affected user/role/operator, current state/failure mode/opportunity, desired observable outcome, evidence/signal, why now, do-nothing consequence, non-goals. The linter checks that the section has meaningful content; the field labels themselves are the structural contract." },
145
- { section: "Premise Check", required: false, validationRule: "Recommended: explicit answers to `Right problem?`, `Direct path?`, `What if we do nothing?` — take a position, do not hedge." },
145
+ { section: "Premise Check", required: false, validationRule: "Recommended: explicit answers to `Right problem?` and `Direct path?` — take a position, do not hedge. Round 6 (v6.7.0) retired the counterfactual premise line; Do-nothing consequence already lives in the Problem Decision Record." },
146
146
  { section: "How Might We", required: false, validationRule: "Recommended: a single `How Might We …?` line naming the user, the outcome, and the binding constraint." },
147
147
  { section: "Clarity Gate", required: false, validationRule: "Recommended before recommendation lock: include ambiguity score (0.00-1.00), decision boundaries, reaffirmed non-goals, and residual-risk handoff for scope." },
148
148
  { section: "Sharpening Questions", required: false, validationRule: "Recommended only when needed: one decision-changing question per turn with explicit `Decision impact`; compact tasks may record `None - early exit` with rationale." },
@@ -47,9 +47,9 @@ export const SCOPE = {
47
47
  executionModel: {
48
48
  checklist: [
49
49
  "**ADAPTIVE ELICITATION COMES FIRST (no exceptions, no subagent dispatch before).** Load `.cclaw/skills/adaptive-elicitation/SKILL.md`. Walk the scope forcing questions one-at-a-time via the harness-native question tool, append one row to `## Q&A Log` (`Turn | Question | User answer (1-line) | Decision impact`) after each user answer **and stamp the row's `Decision impact` cell with the matching `[topic:<id>]` tag** (e.g. `[topic:in-out]`). Continue until every forcing-question topic id is tagged on a row OR Ralph-Loop convergence detector says no new decision-changing rows in last 2 iterations OR user records an explicit stop-signal row. Only then propose the scope contract draft, recommend a mode, or dispatch any delegations. The linter `qa_log_unconverged` rule will block `stage-complete` if convergence is not reached.",
50
- "**Scope forcing questions (must be covered or explicitly waived)** — `in-out: what is definitely in/out`; `locked-upstream: which upstream decisions are locked`; `rollback: what rollback path protects users if scope assumptions fail`; `failure-modes: what are the top failure modes we must design for`. Tag the matching `## Q&A Log` row's `Decision impact` cell with `[topic:<id>]` (e.g. `[topic:in-out]`) so the linter can verify coverage in any natural language. Tags are MANDATORY for forcing-question rows; un-tagged rows do NOT count toward coverage.",
50
+ "**Scope forcing questions (must be covered or explicitly waived)** — `in-out: what is definitely in/out`; `locked-upstream: which upstream decisions are locked`. Tag the matching `## Q&A Log` row's `Decision impact` cell with `[topic:<id>]` (e.g. `[topic:in-out]`) so the linter can verify coverage in any natural language. Tags are MANDATORY for forcing-question rows; un-tagged rows do NOT count toward coverage. Round 6 (v6.7.0) removed the counterfactual `rollback` and `failure-modes` topics from scope forcing questions; Design still owns the Failure Mode Table and rollback evidence.",
51
51
  "**Scope contract first** — read brainstorm handoff, name upstream decisions used, explicit drift, confidence, unresolved questions, and next-stage risk hints; draft the in-scope/out-of-scope/deferred/discretion contract before any design choice.",
52
- "**Premise carry-forward (do NOT re-author)** — brainstorm OWNS the premise check (right problem / direct path / what if nothing). Cite brainstorm's `## Premise Check` section in `## Upstream Handoff > Decisions carried forward`. Add a row to `## Premise Drift` only when the scope-stage Q&A surfaced NEW evidence that materially changes the brainstorm answer (e.g. new constraint, new user signal). Otherwise mark `Premise Drift: None` — do not duplicate the brainstorm premise table.",
52
+ "**Premise carry-forward (do NOT re-author)** — brainstorm OWNS the premise check (right problem / direct path). Cite brainstorm's `## Premise Check` section in `## Upstream Handoff > Decisions carried forward`. Add a row to `## Premise Drift` only when the scope-stage Q&A surfaced NEW evidence that materially changes the brainstorm answer (e.g. new constraint, new user signal). Otherwise mark `Premise Drift: None` — do not duplicate the brainstorm premise table.",
53
53
  "**Conditional 10-star boundary** — for deep/high-risk/product-strategy work, show what would make the product meaningfully better, then explicitly choose what ships now, what is deferred, and what is excluded without vague `later/for now` placeholders. Skip this for straightforward repair work and record `not needed: compact scope`.",
54
54
  "**Pick one operational mode with the user** — HOLD SCOPE preserves focus; SELECTIVE EXPANSION cherry-picks high-leverage reference ideas; SCOPE EXPANSION explores ambitious alternatives; SCOPE REDUCTION cuts to the essential wedge. Recommend one, state why and what signal would change it, then keep elicitation focused until the user either approves or asks to proceed with draft boundaries.",
55
55
  "**Product-discovery is REQUIRED for SELECTIVE / SCOPE EXPANSION (hard gate)** — If the resolved scope mode is SELECTIVE EXPANSION or SCOPE EXPANSION, run \`product-discovery\` in proactive mode **after** adaptive elicitation converges and **before** \`stage-complete\`. Do not complete this stage until the delegation ledger shows \`product-discovery\` as \`completed\` with non-empty \`evidenceRefs\` pointing at this scope artifact. HOLD SCOPE and SCOPE REDUCTION do not require this row.",
@@ -89,7 +89,6 @@ ${renderBehaviorAnchorTemplateLine("brainstorm")}
89
89
  ## Premise Check
90
90
  - **Right problem?** (yes/no + one-line justification — take a position)
91
91
  - **Direct path?** (yes/no + one-line justification)
92
- - **What if we do nothing?** (concrete consequence, not "nothing happens")
93
92
 
94
93
  ## How Might We
95
94
  - *How might we …?* — one line naming the user, the desired outcome, and the binding constraint.
@@ -117,7 +116,7 @@ ${renderBehaviorAnchorTemplateLine("brainstorm")}
117
116
  | 1 | | | scope-shaping [topic:pain] |
118
117
 
119
118
  > Append-only by turn. Add one row after each user answer; do not rewrite prior rows.
120
- > **Topic tag is MANDATORY for forcing-question rows.** Stamp \`[topic:<id>]\` in the \`Decision impact\` cell so the linter can verify coverage in any natural language (RU/EN/UA/etc.). Brainstorm IDs: \`pain\`, \`direct-path\`, \`do-nothing\`, \`operator\`, \`no-go\`. Multiple tags allowed when one answer covers several topics. Stop-signal rows do NOT need a tag. Wave 24 (v6.0.0) removed the English keyword fallback.
119
+ > **Topic tag is MANDATORY for forcing-question rows.** Stamp \`[topic:<id>]\` in the \`Decision impact\` cell so the linter can verify coverage in any natural language (RU/EN/UA/etc.). Brainstorm IDs: \`pain\`, \`direct-path\`, \`operator\`, \`no-go\`. Multiple tags allowed when one answer covers several topics. Stop-signal rows do NOT need a tag. Wave 24 (v6.0.0) removed the English keyword fallback; Round 6 (v6.7.0) retired the counterfactual \`do-nothing\` topic (Do-nothing consequence stays in the Problem Decision Record).
121
120
 
122
121
  ## Approach Tier
123
122
  - Tier: lite | standard | deep
@@ -948,12 +947,14 @@ Execution rule: complete and verify each batch before starting the next batch.
948
947
  - **Inline recipe (if Inline executor):** TDD loop unit-by-unit with batch checkpoints
949
948
 
950
949
  ## Plan Quality Scan
950
+ <!-- linter-meta -->
951
951
  - Placeholder scan:
952
952
  - Scanned tokens: \`TODO\`, \`TBD\`, \`FIXME\`, \`<fill-in>\`, \`<your-*-here>\`, \`xxx\`, bare ellipsis in task rows.
953
953
  - Hits: 0 (required for WAIT_FOR_CONFIRM to resolve).
954
954
  - Scope reduction language scan:
955
955
  - Scanned phrases: \`v1\`, \`for now\`, \`later\`, \`temporary\`, \`placeholder\`, \`mock for now\`, \`hardcoded for now\`, \`will improve later\`.
956
956
  - Hits: 0 (required when Locked Decisions section is non-empty; reference D-XX IDs from scope).
957
+ <!-- /linter-meta -->
957
958
 
958
959
  ## WAIT_FOR_CONFIRM
959
960
  - Status: pending