cclaw-cli 0.49.0 → 0.51.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. package/README.md +57 -84
  2. package/dist/artifact-linter.d.ts +4 -0
  3. package/dist/artifact-linter.js +24 -3
  4. package/dist/cli.d.ts +1 -19
  5. package/dist/cli.js +49 -491
  6. package/dist/constants.d.ts +2 -13
  7. package/dist/constants.js +1 -43
  8. package/dist/content/closeout-guidance.d.ts +14 -0
  9. package/dist/content/closeout-guidance.js +42 -0
  10. package/dist/content/core-agents.js +55 -17
  11. package/dist/content/decision-protocol.d.ts +12 -0
  12. package/dist/content/decision-protocol.js +20 -0
  13. package/dist/content/diff-command.d.ts +1 -2
  14. package/dist/content/diff-command.js +8 -94
  15. package/dist/content/examples.d.ts +4 -10
  16. package/dist/content/examples.js +10 -20
  17. package/dist/content/hook-events.js +2 -2
  18. package/dist/content/hook-inline-snippets.d.ts +5 -2
  19. package/dist/content/hook-inline-snippets.js +33 -1
  20. package/dist/content/hook-manifest.d.ts +3 -4
  21. package/dist/content/hook-manifest.js +11 -12
  22. package/dist/content/hooks.js +44 -21
  23. package/dist/content/ideate-command.d.ts +2 -0
  24. package/dist/content/ideate-command.js +34 -25
  25. package/dist/content/iron-laws.d.ts +5 -5
  26. package/dist/content/iron-laws.js +5 -5
  27. package/dist/content/language-policy.d.ts +2 -0
  28. package/dist/content/language-policy.js +13 -0
  29. package/dist/content/learnings.d.ts +3 -4
  30. package/dist/content/learnings.js +26 -50
  31. package/dist/content/meta-skill.js +33 -22
  32. package/dist/content/next-command.js +41 -38
  33. package/dist/content/node-hooks.js +17 -345
  34. package/dist/content/opencode-plugin.js +5 -103
  35. package/dist/content/research-playbooks.js +14 -14
  36. package/dist/content/review-loop.d.ts +2 -0
  37. package/dist/content/review-loop.js +8 -0
  38. package/dist/content/session-hooks.js +15 -47
  39. package/dist/content/skills.d.ts +0 -5
  40. package/dist/content/skills.js +55 -128
  41. package/dist/content/stage-common-guidance.d.ts +0 -1
  42. package/dist/content/stage-common-guidance.js +17 -14
  43. package/dist/content/stage-schema.d.ts +26 -1
  44. package/dist/content/stage-schema.js +121 -40
  45. package/dist/content/stages/_lint-metadata/index.js +9 -15
  46. package/dist/content/stages/brainstorm.js +22 -43
  47. package/dist/content/stages/design.js +37 -57
  48. package/dist/content/stages/plan.js +22 -13
  49. package/dist/content/stages/review.js +24 -27
  50. package/dist/content/stages/scope.js +34 -46
  51. package/dist/content/stages/ship.js +7 -4
  52. package/dist/content/stages/spec.js +20 -9
  53. package/dist/content/stages/tdd.js +64 -44
  54. package/dist/content/start-command.js +13 -12
  55. package/dist/content/status-command.d.ts +2 -7
  56. package/dist/content/status-command.js +19 -146
  57. package/dist/content/subagents.d.ts +0 -5
  58. package/dist/content/subagents.js +51 -28
  59. package/dist/content/templates.d.ts +1 -1
  60. package/dist/content/templates.js +126 -135
  61. package/dist/content/track-render-context.d.ts +17 -0
  62. package/dist/content/track-render-context.js +44 -0
  63. package/dist/content/tree-command.d.ts +1 -2
  64. package/dist/content/tree-command.js +4 -87
  65. package/dist/content/utility-skills.d.ts +2 -29
  66. package/dist/content/utility-skills.js +2 -1534
  67. package/dist/content/view-command.js +31 -11
  68. package/dist/delegation.d.ts +1 -1
  69. package/dist/delegation.js +5 -15
  70. package/dist/doctor-registry.js +20 -21
  71. package/dist/doctor.js +88 -344
  72. package/dist/flow-state.d.ts +3 -0
  73. package/dist/flow-state.js +2 -0
  74. package/dist/harness-adapters.d.ts +1 -1
  75. package/dist/harness-adapters.js +51 -58
  76. package/dist/install.js +128 -358
  77. package/dist/internal/advance-stage.js +3 -9
  78. package/dist/internal/compound-readiness.d.ts +1 -1
  79. package/dist/internal/compound-readiness.js +1 -1
  80. package/dist/internal/tdd-loop-status.d.ts +1 -1
  81. package/dist/internal/tdd-loop-status.js +1 -1
  82. package/dist/knowledge-store.d.ts +16 -10
  83. package/dist/knowledge-store.js +51 -15
  84. package/dist/policy.js +16 -105
  85. package/dist/run-archive.d.ts +4 -6
  86. package/dist/run-archive.js +15 -20
  87. package/dist/run-persistence.d.ts +2 -2
  88. package/dist/run-persistence.js +3 -9
  89. package/package.json +1 -2
  90. package/dist/content/archive-command.d.ts +0 -2
  91. package/dist/content/archive-command.js +0 -124
  92. package/dist/content/compound-command.d.ts +0 -5
  93. package/dist/content/compound-command.js +0 -193
  94. package/dist/content/contexts.d.ts +0 -18
  95. package/dist/content/contexts.js +0 -24
  96. package/dist/content/contracts.d.ts +0 -2
  97. package/dist/content/contracts.js +0 -51
  98. package/dist/content/doctor-references.d.ts +0 -2
  99. package/dist/content/doctor-references.js +0 -150
  100. package/dist/content/eval-scaffold.d.ts +0 -15
  101. package/dist/content/eval-scaffold.js +0 -370
  102. package/dist/content/feature-command.d.ts +0 -2
  103. package/dist/content/feature-command.js +0 -123
  104. package/dist/content/flow-map.d.ts +0 -23
  105. package/dist/content/flow-map.js +0 -134
  106. package/dist/content/harness-doc.d.ts +0 -2
  107. package/dist/content/harness-doc.js +0 -202
  108. package/dist/content/harness-playbooks.d.ts +0 -24
  109. package/dist/content/harness-playbooks.js +0 -393
  110. package/dist/content/harness-tool-refs.d.ts +0 -20
  111. package/dist/content/harness-tool-refs.js +0 -268
  112. package/dist/content/ops-command.d.ts +0 -2
  113. package/dist/content/ops-command.js +0 -71
  114. package/dist/content/protocols.d.ts +0 -7
  115. package/dist/content/protocols.js +0 -215
  116. package/dist/content/retro-command.d.ts +0 -2
  117. package/dist/content/retro-command.js +0 -165
  118. package/dist/content/rewind-command.d.ts +0 -2
  119. package/dist/content/rewind-command.js +0 -106
  120. package/dist/content/tdd-log-command.d.ts +0 -2
  121. package/dist/content/tdd-log-command.js +0 -85
  122. package/dist/eval/agents/single-shot.d.ts +0 -27
  123. package/dist/eval/agents/single-shot.js +0 -79
  124. package/dist/eval/agents/with-tools.d.ts +0 -44
  125. package/dist/eval/agents/with-tools.js +0 -261
  126. package/dist/eval/agents/workflow.d.ts +0 -31
  127. package/dist/eval/agents/workflow.js +0 -155
  128. package/dist/eval/baseline.d.ts +0 -38
  129. package/dist/eval/baseline.js +0 -282
  130. package/dist/eval/config-loader.d.ts +0 -14
  131. package/dist/eval/config-loader.js +0 -395
  132. package/dist/eval/corpus.d.ts +0 -30
  133. package/dist/eval/corpus.js +0 -330
  134. package/dist/eval/cost-guard.d.ts +0 -102
  135. package/dist/eval/cost-guard.js +0 -190
  136. package/dist/eval/diff.d.ts +0 -64
  137. package/dist/eval/diff.js +0 -323
  138. package/dist/eval/llm-client.d.ts +0 -176
  139. package/dist/eval/llm-client.js +0 -267
  140. package/dist/eval/mode.d.ts +0 -28
  141. package/dist/eval/mode.js +0 -61
  142. package/dist/eval/progress.d.ts +0 -83
  143. package/dist/eval/progress.js +0 -59
  144. package/dist/eval/report.d.ts +0 -11
  145. package/dist/eval/report.js +0 -181
  146. package/dist/eval/rubric-loader.d.ts +0 -20
  147. package/dist/eval/rubric-loader.js +0 -143
  148. package/dist/eval/runner.d.ts +0 -81
  149. package/dist/eval/runner.js +0 -746
  150. package/dist/eval/runs.d.ts +0 -41
  151. package/dist/eval/runs.js +0 -114
  152. package/dist/eval/sandbox.d.ts +0 -38
  153. package/dist/eval/sandbox.js +0 -137
  154. package/dist/eval/tools/glob.d.ts +0 -2
  155. package/dist/eval/tools/glob.js +0 -163
  156. package/dist/eval/tools/grep.d.ts +0 -2
  157. package/dist/eval/tools/grep.js +0 -152
  158. package/dist/eval/tools/index.d.ts +0 -7
  159. package/dist/eval/tools/index.js +0 -35
  160. package/dist/eval/tools/read.d.ts +0 -2
  161. package/dist/eval/tools/read.js +0 -122
  162. package/dist/eval/tools/types.d.ts +0 -49
  163. package/dist/eval/tools/types.js +0 -41
  164. package/dist/eval/tools/write.d.ts +0 -2
  165. package/dist/eval/tools/write.js +0 -92
  166. package/dist/eval/types.d.ts +0 -561
  167. package/dist/eval/types.js +0 -47
  168. package/dist/eval/verifiers/judge.d.ts +0 -40
  169. package/dist/eval/verifiers/judge.js +0 -256
  170. package/dist/eval/verifiers/rules.d.ts +0 -24
  171. package/dist/eval/verifiers/rules.js +0 -218
  172. package/dist/eval/verifiers/structural.d.ts +0 -14
  173. package/dist/eval/verifiers/structural.js +0 -171
  174. package/dist/eval/verifiers/traceability.d.ts +0 -23
  175. package/dist/eval/verifiers/traceability.js +0 -84
  176. package/dist/eval/verifiers/workflow-consistency.d.ts +0 -21
  177. package/dist/eval/verifiers/workflow-consistency.js +0 -225
  178. package/dist/eval/workflow-corpus.d.ts +0 -7
  179. package/dist/eval/workflow-corpus.js +0 -207
  180. package/dist/feature-system.d.ts +0 -42
  181. package/dist/feature-system.js +0 -432
  182. package/dist/internal/knowledge-digest.d.ts +0 -7
  183. package/dist/internal/knowledge-digest.js +0 -93
package/dist/cli.js CHANGED
@@ -1,29 +1,19 @@
1
1
  #!/usr/bin/env node
2
- import { createReadStream, existsSync, realpathSync } from "node:fs";
3
- import { spawn } from "node:child_process";
4
- import fs from "node:fs/promises";
5
2
  import process from "node:process";
6
3
  import path from "node:path";
4
+ import { existsSync, realpathSync } from "node:fs";
7
5
  import { createInterface } from "node:readline/promises";
8
6
  import { fileURLToPath } from "node:url";
9
- import { FLOW_TRACKS, HARNESS_IDS } from "./types.js";
10
7
  import { doctorChecks, doctorSucceeded } from "./doctor.js";
11
8
  import { initCclaw, syncCclaw, uninstallCclaw, upgradeCclaw } from "./install.js";
12
9
  import { error, info } from "./logger.js";
10
+ import { FLOW_TRACKS, HARNESS_IDS } from "./types.js";
13
11
  import { archiveRun } from "./runs.js";
14
12
  import { CCLAW_VERSION, RUNTIME_ROOT } from "./constants.js";
15
13
  import { createDefaultConfig } from "./config.js";
16
14
  import { detectHarnesses } from "./init-detect.js";
17
15
  import { HARNESS_ADAPTERS } from "./harness-adapters.js";
18
16
  import { classifyCodexHooksFlag, codexConfigPath, patchCodexHooksFlag, readCodexConfig, writeCodexConfig } from "./codex-feature-flag.js";
19
- import { runEval } from "./eval/runner.js";
20
- import { createStderrProgressLogger } from "./eval/progress.js";
21
- import { writeBaselinesFromReport } from "./eval/baseline.js";
22
- import { writeJsonReport, writeMarkdownReport } from "./eval/report.js";
23
- import { formatDiffMarkdown, runEvalDiff } from "./eval/diff.js";
24
- import { ensureRunDir, generateRunId, isRunAlive, listRuns, readRunStatus, resolveRunId, runLogPath, writeRunStatus } from "./eval/runs.js";
25
- import { parseModeInput } from "./eval/mode.js";
26
- import { FLOW_STAGES } from "./types.js";
27
17
  import { runInternalCommand } from "./internal/advance-stage.js";
28
18
  const INSTALLER_COMMANDS = [
29
19
  "init",
@@ -32,7 +22,6 @@ const INSTALLER_COMMANDS = [
32
22
  "upgrade",
33
23
  "uninstall",
34
24
  "archive",
35
- "eval",
36
25
  "internal"
37
26
  ];
38
27
  export function usage() {
@@ -49,14 +38,18 @@ Commands:
49
38
  Flags: --harnesses=<list> Comma list of harnesses (claude,cursor,opencode,codex).
50
39
  --no-interactive Skip interactive prompts even on TTY (for CI/scripts).
51
40
  sync Reconcile generated runtime files with the current config.
41
+ doctor Check install/runtime wiring and print concrete fixes for failures.
42
+ Flags: --explain Include docs pointers for every check.
43
+ --json Emit machine-readable check results.
44
+ --quiet Show only failing checks.
45
+ --only=<filter> Limit displayed checks (error,warning,hook:,state:,...).
46
+ --reconcile-gates Refresh derived gate status before checking.
52
47
  upgrade Refresh generated files in .cclaw. Preserves your config.yaml.
53
- archive Archive the active run and reset flow state for next feature.
48
+ archive Archive the active run and reset flow state for the next run.
54
49
  Flags: --name=<slug> Override archive folder suffix.
55
50
  --skip-retro Skip retro gate only when runtime allows it.
56
51
  --retro-reason=<txt> Required rationale with --skip-retro.
57
52
  uninstall Remove .cclaw runtime and the generated harness shim files.
58
- eval Run cclaw evals. Maintainer surface — see docs/evals.md.
59
- Full flag reference: \`npx cclaw-cli eval --help\` or docs/evals.md.
60
53
 
61
54
  Global flags:
62
55
  -h, --help Show this help message and exit 0.
@@ -66,15 +59,16 @@ Examples:
66
59
  npx cclaw-cli
67
60
  npx cclaw-cli init --harnesses=claude,cursor --no-interactive
68
61
  npx cclaw-cli sync
69
- npx cclaw-cli archive --name=my-feature
62
+ npx cclaw-cli archive --name=my-run
70
63
  npx cclaw-cli upgrade
71
- npx cclaw-cli eval --dry-run
72
64
 
73
- Everything operational (retro, archive, worktrees, doctor, learnings)
74
- happens inside your harness via slash commands. The CLI is just a
75
- launcher. See README.md for the four user-facing slash commands.
65
+ Happy-path work happens inside your harness via /cc, /cc-next,
66
+ /cc-ideate, and /cc-view. Doctor is an operator/support surface:
67
+ it verifies install/runtime wiring, but a real harness smoke test is
68
+ still needed to prove provider auth and model execution.
76
69
 
77
70
  Docs: https://github.com/zuevrs/cclaw
71
+ Local: docs/config.md and docs/harnesses.md
78
72
  Issues: https://github.com/zuevrs/cclaw/issues
79
73
  `;
80
74
  }
@@ -96,25 +90,6 @@ function parseTrack(raw) {
96
90
  }
97
91
  return trimmed;
98
92
  }
99
- function parseLegacyTier(raw) {
100
- return parseModeInput(raw.toUpperCase(), {
101
- source: "cli",
102
- raw: `--tier=${raw}`
103
- });
104
- }
105
- function parseEvalMode(raw) {
106
- return parseModeInput(raw, {
107
- source: "cli",
108
- raw: `--mode=${raw}`
109
- });
110
- }
111
- function parseEvalStage(raw) {
112
- const trimmed = raw.trim();
113
- if (!FLOW_STAGES.includes(trimmed)) {
114
- throw new Error(`Unknown eval stage: ${raw}. Supported: ${FLOW_STAGES.join(", ")}`);
115
- }
116
- return trimmed;
117
- }
118
93
  function isInitPromptAllowed(ctx) {
119
94
  return Boolean(process.stdin.isTTY && ctx.stdout.isTTY);
120
95
  }
@@ -147,12 +122,10 @@ function buildInitSurfacePreview(harnesses) {
147
122
  ".cclaw/agents/*.md",
148
123
  ".cclaw/hooks/*",
149
124
  ".cclaw/rules/**",
150
- ".cclaw/features/** (legacy snapshots, read-only migration)",
151
125
  ".cclaw/runs/**",
152
126
  ".cclaw/artifacts/**",
153
127
  ".cclaw/knowledge.jsonl",
154
128
  ".cclaw/state/*.json|*.jsonl",
155
- ".cclaw/references/**",
156
129
  "AGENTS.md (managed block)"
157
130
  ];
158
131
  for (const harness of harnesses) {
@@ -376,7 +349,7 @@ function printDoctorText(ctx, checks, options) {
376
349
  if (!options.quiet) {
377
350
  ctx.stdout.write(` details: ${check.details}\n`);
378
351
  }
379
- if (options.explain) {
352
+ if (!check.ok || options.explain) {
380
353
  ctx.stdout.write(` fix: ${check.fix}\n`);
381
354
  if (check.docRef) {
382
355
  ctx.stdout.write(` docs: ${check.docRef}\n`);
@@ -396,18 +369,6 @@ function printDoctorText(ctx, checks, options) {
396
369
  ctx.stdout.write("Doctor status: HEALTHY (no failing error checks)\n");
397
370
  }
398
371
  }
399
- function resolveMaxCostOption(fromCli, env) {
400
- if (fromCli !== undefined)
401
- return { maxCostUsd: fromCli };
402
- const raw = env.CCLAW_EVAL_MAX_COST_USD;
403
- if (raw === undefined || raw.trim() === "")
404
- return {};
405
- const value = Number(raw);
406
- if (!Number.isFinite(value) || value <= 0) {
407
- throw new Error(`CCLAW_EVAL_MAX_COST_USD must be a positive number, got: ${raw}`);
408
- }
409
- return { maxCostUsd: value };
410
- }
411
372
  function parseArgs(argv) {
412
373
  const parsed = {};
413
374
  const helpFlag = argv.find((arg) => arg === "--help" || arg === "-h");
@@ -429,41 +390,34 @@ function parseArgs(argv) {
429
390
  parsed.internalArgs = [...rest];
430
391
  return parsed;
431
392
  }
432
- // For `eval`, the next non-flag argument is an optional subcommand. Any
433
- // subsequent non-flag tokens are captured as evalArgs (consumed by the
434
- // subcommand handler). This preserves backwards compat: callers that run
435
- // `cclaw eval --dry-run` see no subcommand and no positional args.
436
- let flags = rest;
437
- if (parsed.command === "eval") {
438
- const evalArgs = [];
439
- const remainder = [];
440
- let sawSubcommand = false;
441
- for (const token of rest) {
442
- if (token.startsWith("--")) {
443
- remainder.push(token);
444
- continue;
445
- }
446
- if (!sawSubcommand) {
447
- if (token === "diff") {
448
- parsed.evalSubcommand = "diff";
449
- sawSubcommand = true;
450
- }
451
- else if (token === "runs") {
452
- parsed.evalSubcommand = "runs";
453
- sawSubcommand = true;
454
- }
455
- else {
456
- evalArgs.push(token);
457
- }
458
- continue;
459
- }
460
- evalArgs.push(token);
393
+ const flags = rest;
394
+ const isAllowedForCommand = (flag) => {
395
+ if (parsed.command === "init") {
396
+ return flag.startsWith("--harnesses=") ||
397
+ flag.startsWith("--track=") ||
398
+ flag.startsWith("--profile=") ||
399
+ flag === "--interactive" ||
400
+ flag === "--no-interactive" ||
401
+ flag === "--dry-run";
402
+ }
403
+ if (parsed.command === "doctor") {
404
+ return flag === "--reconcile-gates" ||
405
+ flag === "--json" ||
406
+ flag === "--explain" ||
407
+ flag === "--quiet" ||
408
+ flag.startsWith("--only=");
409
+ }
410
+ if (parsed.command === "archive") {
411
+ return flag.startsWith("--name=") ||
412
+ flag === "--skip-retro" ||
413
+ flag.startsWith("--retro-reason=");
461
414
  }
462
- if (evalArgs.length > 0)
463
- parsed.evalArgs = evalArgs;
464
- flags = remainder;
465
- }
415
+ return false;
416
+ };
466
417
  for (const flag of flags) {
418
+ if (!isAllowedForCommand(flag)) {
419
+ throw new Error(`Flag ${flag} is not supported for ${parsed.command ?? "this command"}.`);
420
+ }
467
421
  if (flag.startsWith("--harnesses=")) {
468
422
  parsed.harnesses = parseHarnesses(flag.replace("--harnesses=", ""));
469
423
  continue;
@@ -519,281 +473,9 @@ function parseArgs(argv) {
519
473
  parsed.archiveSkipRetroReason = flag.replace("--retro-reason=", "").trim();
520
474
  continue;
521
475
  }
522
- if (flag.startsWith("--stage=")) {
523
- parsed.evalStage = parseEvalStage(flag.replace("--stage=", ""));
524
- continue;
525
- }
526
- if (flag.startsWith("--mode=")) {
527
- parsed.evalMode = parseEvalMode(flag.replace("--mode=", ""));
528
- continue;
529
- }
530
- if (flag.startsWith("--tier=")) {
531
- parsed.evalMode = parseLegacyTier(flag.replace("--tier=", ""));
532
- continue;
533
- }
534
- if (flag === "--schema-only") {
535
- parsed.evalSchemaOnly = true;
536
- continue;
537
- }
538
- if (flag === "--rules") {
539
- parsed.evalRules = true;
540
- continue;
541
- }
542
- if (flag === "--judge") {
543
- parsed.evalJudge = true;
544
- continue;
545
- }
546
- if (flag === "--no-write") {
547
- parsed.evalNoWrite = true;
548
- continue;
549
- }
550
- if (flag === "--update-baseline") {
551
- parsed.evalUpdateBaseline = true;
552
- continue;
553
- }
554
- if (flag === "--confirm") {
555
- parsed.evalConfirm = true;
556
- continue;
557
- }
558
- if (flag === "--background") {
559
- parsed.evalBackground = true;
560
- continue;
561
- }
562
- if (flag.startsWith("--compare-model=")) {
563
- const value = flag.replace("--compare-model=", "").trim();
564
- if (value.length === 0) {
565
- throw new Error(`--compare-model requires a non-empty model id (e.g. --compare-model=gpt-4o-mini).`);
566
- }
567
- parsed.evalCompareModel = value;
568
- continue;
569
- }
570
- if (flag.startsWith("--max-cost-usd=")) {
571
- const raw = flag.replace("--max-cost-usd=", "").trim();
572
- const value = Number(raw);
573
- if (!Number.isFinite(value) || value <= 0) {
574
- throw new Error(`--max-cost-usd requires a positive number, got: ${raw}`);
575
- }
576
- parsed.evalMaxCostUsd = value;
577
- continue;
578
- }
579
- }
580
- // `--json` is shared between doctor and eval. Disambiguate by command.
581
- if (parsed.command === "eval" && parsed.doctorJson === true) {
582
- parsed.evalJson = true;
583
- parsed.doctorJson = undefined;
584
- }
585
- // `--quiet` on `eval` silences the stderr progress logger. On doctor it
586
- // continues to mean "print only failing checks" — the flag slot is the
587
- // same, the semantics depend on which command owns the invocation.
588
- if (parsed.command === "eval" && parsed.doctorQuiet === true) {
589
- parsed.evalQuiet = true;
590
- parsed.doctorQuiet = undefined;
591
476
  }
592
477
  return parsed;
593
478
  }
594
- /**
595
- * Spawn `cclaw eval` (without `--background`) in a detached child process
596
- * and return immediately. The child's stdout+stderr are piped to
597
- * `.cclaw/evals/runs/<id>/run.log` so the user can attach later with
598
- * `cclaw eval runs tail`. We do NOT wait for the child — the whole point
599
- * is to free the terminal while a multi-minute workflow-mode run
600
- * proceeds in the background.
601
- */
602
- async function spawnBackgroundEval(parsed, ctx) {
603
- const id = generateRunId();
604
- await ensureRunDir(ctx.cwd, id);
605
- const logPath = runLogPath(ctx.cwd, id);
606
- const childArgv = process.argv.slice(2).filter((a) => a !== "--background");
607
- const cliEntry = process.argv[1];
608
- if (!cliEntry) {
609
- error(ctx, "Could not resolve cclaw entrypoint for --background.");
610
- return 1;
611
- }
612
- const logHandle = await fs.open(logPath, "a");
613
- try {
614
- const child = spawn(process.execPath, [cliEntry, ...childArgv], {
615
- cwd: ctx.cwd,
616
- detached: true,
617
- stdio: ["ignore", logHandle.fd, logHandle.fd],
618
- env: process.env
619
- });
620
- const pid = child.pid ?? -1;
621
- await writeRunStatus(ctx.cwd, {
622
- id,
623
- startedAt: new Date().toISOString(),
624
- pid,
625
- argv: childArgv,
626
- cwd: ctx.cwd,
627
- state: "running"
628
- });
629
- child.unref();
630
- const finalize = async (code) => {
631
- const current = await readRunStatus(ctx.cwd, id);
632
- if (!current)
633
- return;
634
- const exitCode = typeof code === "number" ? code : -1;
635
- await writeRunStatus(ctx.cwd, {
636
- ...current,
637
- endedAt: new Date().toISOString(),
638
- exitCode,
639
- state: exitCode === 0 ? "succeeded" : "failed"
640
- });
641
- };
642
- child.on("exit", (code) => {
643
- void finalize(code);
644
- });
645
- child.on("error", (err) => {
646
- void writeRunStatus(ctx.cwd, {
647
- id,
648
- startedAt: new Date().toISOString(),
649
- pid,
650
- argv: childArgv,
651
- cwd: ctx.cwd,
652
- endedAt: new Date().toISOString(),
653
- exitCode: -1,
654
- state: "failed"
655
- });
656
- error(ctx, `Background eval failed to start: ${err.message}`);
657
- });
658
- ctx.stdout.write(`cclaw eval: background run id=${id} pid=${pid}\n` +
659
- ` log: ${logPath}\n` +
660
- ` tail: cclaw eval runs tail ${id}\n` +
661
- ` status: cclaw eval runs status ${id}\n`);
662
- return 0;
663
- }
664
- finally {
665
- await logHandle.close();
666
- }
667
- }
668
- function formatRunRow(status) {
669
- const ended = status.endedAt ? ` ended=${status.endedAt}` : "";
670
- const exitCode = status.exitCode !== undefined ? ` exit=${status.exitCode}` : "";
671
- const alive = status.state === "running" ? (isRunAlive(status) ? "" : " (stale)") : "";
672
- return `${status.id} state=${status.state}${alive} pid=${status.pid} started=${status.startedAt}${ended}${exitCode}`;
673
- }
674
- async function runEvalRunsSubcommand(parsed, ctx) {
675
- const args = parsed.evalArgs ?? [];
676
- const action = args[0] ?? "list";
677
- if (action === "list") {
678
- const runs = await listRuns(ctx.cwd);
679
- if (runs.length === 0) {
680
- ctx.stdout.write("No eval runs recorded under .cclaw/evals/runs/.\n");
681
- return 0;
682
- }
683
- if (parsed.evalJson === true) {
684
- ctx.stdout.write(`${JSON.stringify(runs, null, 2)}\n`);
685
- return 0;
686
- }
687
- for (const run of runs)
688
- ctx.stdout.write(`${formatRunRow(run)}\n`);
689
- return 0;
690
- }
691
- if (action === "status") {
692
- const id = await resolveRunId(ctx.cwd, args[1]);
693
- if (!id) {
694
- error(ctx, `No such run: ${args[1] ?? "(none recorded)"}`);
695
- return 1;
696
- }
697
- const status = await readRunStatus(ctx.cwd, id);
698
- if (!status) {
699
- error(ctx, `Run ${id} has no status file.`);
700
- return 1;
701
- }
702
- if (parsed.evalJson === true) {
703
- ctx.stdout.write(`${JSON.stringify(status, null, 2)}\n`);
704
- }
705
- else {
706
- ctx.stdout.write(`${formatRunRow(status)}\n`);
707
- ctx.stdout.write(`log: ${runLogPath(ctx.cwd, id)}\n`);
708
- }
709
- return status.state === "failed" ? 1 : 0;
710
- }
711
- if (action === "tail") {
712
- const id = await resolveRunId(ctx.cwd, args[1]);
713
- if (!id) {
714
- error(ctx, `No such run: ${args[1] ?? "(none recorded)"}`);
715
- return 1;
716
- }
717
- const logFile = runLogPath(ctx.cwd, id);
718
- const stream = createReadStream(logFile, { encoding: "utf8" });
719
- await new Promise((resolve, reject) => {
720
- stream.on("data", (chunk) => ctx.stdout.write(chunk));
721
- stream.on("end", () => resolve());
722
- stream.on("error", reject);
723
- });
724
- return 0;
725
- }
726
- error(ctx, `Unknown \`cclaw eval runs\` action: ${action}. Use list | status | tail.`);
727
- return 1;
728
- }
729
- /**
730
- * Run the same corpus twice — once against the configured model, once
731
- * against `--compare-model=<id>` — and print a summary comparing the
732
- * two. Both reports are written to `.cclaw/evals/reports/` (unless
733
- * `--no-write` is set) and a unified diff is emitted to stdout. Exit
734
- * code is 1 when the override model regressed against the baseline
735
- * model, 0 otherwise.
736
- */
737
- async function runCompareModel(parsed, ctx, progress) {
738
- const baselineOpts = {
739
- projectRoot: ctx.cwd,
740
- stage: parsed.evalStage,
741
- mode: parsed.evalMode,
742
- schemaOnly: parsed.evalSchemaOnly === true,
743
- rules: parsed.evalRules === true,
744
- judge: parsed.evalJudge === true,
745
- ...(progress ? { progress } : {}),
746
- ...resolveMaxCostOption(parsed.evalMaxCostUsd, process.env)
747
- };
748
- ctx.stderr.write(`[cclaw eval] compare: running baseline model...\n`);
749
- const baseline = await runEval(baselineOpts);
750
- if ("kind" in baseline) {
751
- error(ctx, "--compare-model is incompatible with --dry-run.");
752
- return 1;
753
- }
754
- ctx.stderr.write(`[cclaw eval] compare: running ${parsed.evalCompareModel} ...\n`);
755
- const candidate = await runEval({
756
- ...baselineOpts,
757
- modelOverride: parsed.evalCompareModel
758
- });
759
- if ("kind" in candidate) {
760
- error(ctx, "--compare-model received an unexpected dry-run response.");
761
- return 1;
762
- }
763
- if (parsed.evalNoWrite !== true) {
764
- await writeJsonReport(ctx.cwd, baseline);
765
- await writeMarkdownReport(ctx.cwd, baseline);
766
- await writeJsonReport(ctx.cwd, candidate);
767
- await writeMarkdownReport(ctx.cwd, candidate);
768
- }
769
- const passDelta = candidate.summary.passed - baseline.summary.passed;
770
- const failDelta = candidate.summary.failed - baseline.summary.failed;
771
- const costDelta = candidate.summary.totalCostUsd - baseline.summary.totalCostUsd;
772
- if (parsed.evalJson === true) {
773
- ctx.stdout.write(`${JSON.stringify({
774
- baseline: {
775
- model: baseline.model,
776
- summary: baseline.summary
777
- },
778
- candidate: {
779
- model: candidate.model,
780
- summary: candidate.summary
781
- },
782
- delta: { passed: passDelta, failed: failDelta, costUsd: costDelta }
783
- }, null, 2)}\n`);
784
- }
785
- else {
786
- ctx.stdout.write(`cclaw eval compare-model:\n` +
787
- ` baseline ${baseline.model}: pass=${baseline.summary.passed}/${baseline.summary.totalCases} ` +
788
- `fail=${baseline.summary.failed} cost=$${baseline.summary.totalCostUsd.toFixed(4)}\n` +
789
- ` candidate ${candidate.model}: pass=${candidate.summary.passed}/${candidate.summary.totalCases} ` +
790
- `fail=${candidate.summary.failed} cost=$${candidate.summary.totalCostUsd.toFixed(4)}\n` +
791
- ` delta: passed=${passDelta >= 0 ? "+" : ""}${passDelta} ` +
792
- `failed=${failDelta >= 0 ? "+" : ""}${failDelta} ` +
793
- `cost=${costDelta >= 0 ? "+" : ""}$${costDelta.toFixed(4)}\n`);
794
- }
795
- return failDelta > 0 ? 1 : 0;
796
- }
797
479
  async function runCommand(parsed, ctx) {
798
480
  if (parsed.showHelp) {
799
481
  ctx.stdout.write(usage());
@@ -864,7 +546,8 @@ async function runCommand(parsed, ctx) {
864
546
  if (parsed.doctorJson === true) {
865
547
  const counts = doctorCountsBySeverity(filteredChecks);
866
548
  ctx.stdout.write(`${JSON.stringify({
867
- ok: doctorSucceeded(checks),
549
+ ok: doctorSucceeded(filteredChecks),
550
+ globalOk: doctorSucceeded(checks),
868
551
  filters: parsed.doctorOnly ?? [],
869
552
  counts,
870
553
  checks: filteredChecks
@@ -878,138 +561,13 @@ async function runCommand(parsed, ctx) {
878
561
  printDoctorText(ctx, filteredChecks, { explain, quiet });
879
562
  }
880
563
  }
881
- return doctorSucceeded(checks) ? 0 : 2;
564
+ return doctorSucceeded(filteredChecks) ? 0 : 2;
882
565
  }
883
566
  if (command === "upgrade") {
884
567
  await upgradeCclaw(ctx.cwd);
885
568
  info(ctx, "Upgraded .cclaw runtime and regenerated generated files");
886
569
  return 0;
887
570
  }
888
- if (command === "eval" && parsed.evalSubcommand === "runs") {
889
- return runEvalRunsSubcommand(parsed, ctx);
890
- }
891
- if (command === "eval" && parsed.evalBackground === true) {
892
- return spawnBackgroundEval(parsed, ctx);
893
- }
894
- if (command === "eval" && parsed.evalSubcommand === "diff") {
895
- const args = parsed.evalArgs ?? [];
896
- if (args.length !== 2) {
897
- error(ctx, `\`cclaw eval diff\` requires two arguments: <old> <new>. ` +
898
- `Example: cclaw eval diff 0.26.0 latest`);
899
- return 1;
900
- }
901
- const [oldSel, newSel] = args;
902
- try {
903
- const diff = await runEvalDiff({
904
- projectRoot: ctx.cwd,
905
- old: oldSel,
906
- new: newSel
907
- });
908
- if (parsed.evalJson === true) {
909
- ctx.stdout.write(`${JSON.stringify(diff, null, 2)}\n`);
910
- }
911
- else {
912
- ctx.stdout.write(formatDiffMarkdown(diff));
913
- }
914
- return diff.regressed ? 1 : 0;
915
- }
916
- catch (err) {
917
- error(ctx, err instanceof Error ? err.message : String(err));
918
- return 1;
919
- }
920
- }
921
- if (command === "eval") {
922
- const wantProgress = parsed.evalQuiet !== true &&
923
- parsed.dryRun !== true &&
924
- parsed.evalJson !== true;
925
- const progress = wantProgress
926
- ? createStderrProgressLogger({ writer: (s) => ctx.stderr.write(s) })
927
- : undefined;
928
- if (parsed.evalCompareModel !== undefined) {
929
- return runCompareModel(parsed, ctx, progress);
930
- }
931
- const result = await runEval({
932
- projectRoot: ctx.cwd,
933
- stage: parsed.evalStage,
934
- mode: parsed.evalMode,
935
- schemaOnly: parsed.evalSchemaOnly === true,
936
- rules: parsed.evalRules === true,
937
- judge: parsed.evalJudge === true,
938
- dryRun: parsed.dryRun === true,
939
- ...(progress ? { progress } : {}),
940
- ...resolveMaxCostOption(parsed.evalMaxCostUsd, process.env)
941
- });
942
- if ("kind" in result) {
943
- if (parsed.evalJson === true) {
944
- ctx.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
945
- return 0;
946
- }
947
- ctx.stdout.write(`cclaw eval dry-run\n`);
948
- ctx.stdout.write(` provider: ${result.config.provider}\n`);
949
- ctx.stdout.write(` baseUrl: ${result.config.baseUrl}\n`);
950
- ctx.stdout.write(` model: ${result.config.model}\n`);
951
- ctx.stdout.write(` source: ${result.config.source}\n`);
952
- ctx.stdout.write(` apiKey: ${result.config.apiKey ? "set" : "unset"}\n`);
953
- ctx.stdout.write(` mode: ${result.plannedMode}\n`);
954
- ctx.stdout.write(` corpus: ${result.corpus.total} case(s)\n`);
955
- for (const [stage, count] of Object.entries(result.corpus.byStage)) {
956
- ctx.stdout.write(` - ${stage}: ${count}\n`);
957
- }
958
- if (result.workflowCorpus.total > 0 || result.plannedMode === "workflow") {
959
- ctx.stdout.write(` workflow corpus: ${result.workflowCorpus.total} case(s)\n`);
960
- for (const wf of result.workflowCorpus.cases) {
961
- ctx.stdout.write(` - ${wf.id}: ${wf.stages.join(" → ")}\n`);
962
- }
963
- }
964
- ctx.stdout.write(` verifiers available:\n`);
965
- for (const [key, value] of Object.entries(result.verifiersAvailable)) {
966
- ctx.stdout.write(` - ${key}: ${value ? "yes" : "no"}\n`);
967
- }
968
- if (result.notes.length > 0) {
969
- ctx.stdout.write(` notes:\n`);
970
- for (const note of result.notes) {
971
- ctx.stdout.write(` - ${note}\n`);
972
- }
973
- }
974
- return 0;
975
- }
976
- if (parsed.evalUpdateBaseline === true && parsed.evalConfirm !== true) {
977
- error(ctx, "--update-baseline requires --confirm to prevent accidental baseline resets.");
978
- return 1;
979
- }
980
- if (parsed.evalUpdateBaseline === true) {
981
- if (result.summary.failed > 0) {
982
- error(ctx, `Refusing to update baselines: ${result.summary.failed} case(s) currently failing. Fix structural checks first.`);
983
- return 1;
984
- }
985
- const written = await writeBaselinesFromReport(ctx.cwd, result);
986
- for (const file of written) {
987
- info(ctx, `Baseline written: ${path.relative(ctx.cwd, file)}`);
988
- }
989
- }
990
- if (parsed.evalNoWrite !== true) {
991
- const jsonPath = await writeJsonReport(ctx.cwd, result);
992
- const mdPath = await writeMarkdownReport(ctx.cwd, result);
993
- info(ctx, `Report written: ${path.relative(ctx.cwd, jsonPath)}`);
994
- info(ctx, `Report written: ${path.relative(ctx.cwd, mdPath)}`);
995
- }
996
- const regressionCount = result.baselineDelta?.criticalFailures ?? 0;
997
- if (parsed.evalJson === true) {
998
- ctx.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
999
- }
1000
- else {
1001
- const regressionNote = regressionCount > 0 ? `, ${regressionCount} regression(s)` : "";
1002
- ctx.stdout.write(`cclaw eval: ${result.summary.totalCases} case(s), ` +
1003
- `${result.summary.passed} passed, ` +
1004
- `${result.summary.failed} failed, ` +
1005
- `${result.summary.skipped} skipped${regressionNote}\n`);
1006
- }
1007
- if (result.summary.failed > 0)
1008
- return 1;
1009
- if (regressionCount > 0)
1010
- return 1;
1011
- return 0;
1012
- }
1013
571
  if (command === "archive") {
1014
572
  const archived = await archiveRun(ctx.cwd, parsed.archiveName, {
1015
573
  skipRetro: parsed.archiveSkipRetro === true,
@@ -1021,13 +579,13 @@ async function runCommand(parsed, ctx) {
1021
579
  info(ctx, `Archived active artifacts to ${archived.archivePath}. Flow state reset to brainstorm.${snapshotSummary}`);
1022
580
  const k = archived.knowledge;
1023
581
  if (k.overThreshold) {
1024
- info(ctx, `Knowledge curation recommended: ${k.knowledgePath} now has ${k.activeEntryCount} active entries (soft threshold ${k.softThreshold}). Run \`/cc-learn curate\` to plan a soft-archive of stale/duplicate entries to ${RUNTIME_ROOT}/knowledge.archive.jsonl.`);
582
+ info(ctx, `Knowledge curation recommended: ${k.knowledgePath} now has ${k.activeEntryCount} active entries (soft threshold ${k.softThreshold}). Ask your harness to curate cclaw knowledge and plan a soft-archive of stale/duplicate entries to ${RUNTIME_ROOT}/knowledge.archive.jsonl.`);
1025
583
  }
1026
584
  else if (k.activeEntryCount > 0) {
1027
- info(ctx, `Knowledge: ${k.activeEntryCount}/${k.softThreshold} active entries. Run \`/cc-learn curate\` if you want a sweep before the next run.`);
585
+ info(ctx, `Knowledge: ${k.activeEntryCount}/${k.softThreshold} active entries. Ask your harness for a cclaw knowledge curation sweep before the next run if needed.`);
1028
586
  }
1029
587
  else {
1030
- info(ctx, `Knowledge: 0 active entries in ${k.knowledgePath}. Capture lessons from this run with \`/cc-learn add\` before they fade.`);
588
+ info(ctx, `Knowledge: 0 active entries in ${k.knowledgePath}. Capture lessons from this run through the learnings skill before they fade.`);
1031
589
  }
1032
590
  return 0;
1033
591
  }
@@ -1036,13 +594,13 @@ async function runCommand(parsed, ctx) {
1036
594
  return 0;
1037
595
  }
1038
596
  async function main() {
1039
- const parsed = parseArgs(process.argv.slice(2));
1040
597
  const ctx = {
1041
598
  cwd: process.cwd(),
1042
599
  stdout: process.stdout,
1043
600
  stderr: process.stderr
1044
601
  };
1045
602
  try {
603
+ const parsed = parseArgs(process.argv.slice(2));
1046
604
  const code = await runCommand(parsed, ctx);
1047
605
  process.exitCode = code;
1048
606
  }