cclaw-cli 0.49.0 → 0.51.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/README.md +54 -82
  2. package/dist/artifact-linter.d.ts +4 -0
  3. package/dist/artifact-linter.js +24 -3
  4. package/dist/cli.d.ts +1 -19
  5. package/dist/cli.js +49 -491
  6. package/dist/constants.d.ts +2 -13
  7. package/dist/constants.js +1 -43
  8. package/dist/content/closeout-guidance.d.ts +14 -0
  9. package/dist/content/closeout-guidance.js +42 -0
  10. package/dist/content/core-agents.js +51 -9
  11. package/dist/content/decision-protocol.d.ts +12 -0
  12. package/dist/content/decision-protocol.js +20 -0
  13. package/dist/content/diff-command.d.ts +1 -2
  14. package/dist/content/diff-command.js +8 -94
  15. package/dist/content/examples.d.ts +4 -10
  16. package/dist/content/examples.js +10 -20
  17. package/dist/content/hook-events.js +2 -2
  18. package/dist/content/hook-inline-snippets.d.ts +5 -2
  19. package/dist/content/hook-inline-snippets.js +33 -1
  20. package/dist/content/hook-manifest.d.ts +3 -4
  21. package/dist/content/hook-manifest.js +11 -12
  22. package/dist/content/hooks.js +2 -0
  23. package/dist/content/ideate-command.d.ts +2 -0
  24. package/dist/content/ideate-command.js +31 -25
  25. package/dist/content/iron-laws.d.ts +5 -5
  26. package/dist/content/iron-laws.js +5 -5
  27. package/dist/content/learnings.d.ts +3 -4
  28. package/dist/content/learnings.js +24 -50
  29. package/dist/content/meta-skill.js +31 -21
  30. package/dist/content/next-command.js +38 -38
  31. package/dist/content/node-hooks.js +17 -343
  32. package/dist/content/opencode-plugin.js +2 -100
  33. package/dist/content/research-playbooks.js +14 -14
  34. package/dist/content/review-loop.d.ts +2 -0
  35. package/dist/content/review-loop.js +8 -0
  36. package/dist/content/session-hooks.js +14 -46
  37. package/dist/content/skills.d.ts +0 -5
  38. package/dist/content/skills.js +53 -128
  39. package/dist/content/stage-common-guidance.d.ts +0 -1
  40. package/dist/content/stage-common-guidance.js +15 -14
  41. package/dist/content/stage-schema.d.ts +26 -1
  42. package/dist/content/stage-schema.js +121 -40
  43. package/dist/content/stages/_lint-metadata/index.js +9 -15
  44. package/dist/content/stages/brainstorm.js +22 -43
  45. package/dist/content/stages/design.js +37 -57
  46. package/dist/content/stages/plan.js +22 -13
  47. package/dist/content/stages/review.js +24 -27
  48. package/dist/content/stages/scope.js +34 -46
  49. package/dist/content/stages/ship.js +7 -4
  50. package/dist/content/stages/spec.js +20 -9
  51. package/dist/content/stages/tdd.js +64 -44
  52. package/dist/content/start-command.js +10 -12
  53. package/dist/content/status-command.d.ts +2 -7
  54. package/dist/content/status-command.js +19 -146
  55. package/dist/content/subagents.d.ts +0 -5
  56. package/dist/content/subagents.js +47 -28
  57. package/dist/content/templates.d.ts +1 -1
  58. package/dist/content/templates.js +126 -135
  59. package/dist/content/track-render-context.d.ts +17 -0
  60. package/dist/content/track-render-context.js +44 -0
  61. package/dist/content/tree-command.d.ts +1 -2
  62. package/dist/content/tree-command.js +4 -87
  63. package/dist/content/utility-skills.d.ts +2 -29
  64. package/dist/content/utility-skills.js +2 -1534
  65. package/dist/content/view-command.js +29 -11
  66. package/dist/delegation.d.ts +1 -1
  67. package/dist/delegation.js +5 -15
  68. package/dist/doctor-registry.js +20 -21
  69. package/dist/doctor.js +88 -344
  70. package/dist/flow-state.d.ts +3 -0
  71. package/dist/flow-state.js +2 -0
  72. package/dist/harness-adapters.d.ts +1 -1
  73. package/dist/harness-adapters.js +48 -57
  74. package/dist/install.js +128 -358
  75. package/dist/internal/advance-stage.js +3 -9
  76. package/dist/internal/compound-readiness.d.ts +1 -1
  77. package/dist/internal/compound-readiness.js +1 -1
  78. package/dist/internal/tdd-loop-status.d.ts +1 -1
  79. package/dist/internal/tdd-loop-status.js +1 -1
  80. package/dist/knowledge-store.d.ts +16 -10
  81. package/dist/knowledge-store.js +51 -15
  82. package/dist/policy.js +16 -105
  83. package/dist/run-archive.d.ts +4 -6
  84. package/dist/run-archive.js +15 -20
  85. package/dist/run-persistence.d.ts +2 -2
  86. package/dist/run-persistence.js +3 -9
  87. package/package.json +1 -2
  88. package/dist/content/archive-command.d.ts +0 -2
  89. package/dist/content/archive-command.js +0 -124
  90. package/dist/content/compound-command.d.ts +0 -5
  91. package/dist/content/compound-command.js +0 -193
  92. package/dist/content/contexts.d.ts +0 -18
  93. package/dist/content/contexts.js +0 -24
  94. package/dist/content/contracts.d.ts +0 -2
  95. package/dist/content/contracts.js +0 -51
  96. package/dist/content/doctor-references.d.ts +0 -2
  97. package/dist/content/doctor-references.js +0 -150
  98. package/dist/content/eval-scaffold.d.ts +0 -15
  99. package/dist/content/eval-scaffold.js +0 -370
  100. package/dist/content/feature-command.d.ts +0 -2
  101. package/dist/content/feature-command.js +0 -123
  102. package/dist/content/flow-map.d.ts +0 -23
  103. package/dist/content/flow-map.js +0 -134
  104. package/dist/content/harness-doc.d.ts +0 -2
  105. package/dist/content/harness-doc.js +0 -202
  106. package/dist/content/harness-playbooks.d.ts +0 -24
  107. package/dist/content/harness-playbooks.js +0 -393
  108. package/dist/content/harness-tool-refs.d.ts +0 -20
  109. package/dist/content/harness-tool-refs.js +0 -268
  110. package/dist/content/ops-command.d.ts +0 -2
  111. package/dist/content/ops-command.js +0 -71
  112. package/dist/content/protocols.d.ts +0 -7
  113. package/dist/content/protocols.js +0 -215
  114. package/dist/content/retro-command.d.ts +0 -2
  115. package/dist/content/retro-command.js +0 -165
  116. package/dist/content/rewind-command.d.ts +0 -2
  117. package/dist/content/rewind-command.js +0 -106
  118. package/dist/content/tdd-log-command.d.ts +0 -2
  119. package/dist/content/tdd-log-command.js +0 -85
  120. package/dist/eval/agents/single-shot.d.ts +0 -27
  121. package/dist/eval/agents/single-shot.js +0 -79
  122. package/dist/eval/agents/with-tools.d.ts +0 -44
  123. package/dist/eval/agents/with-tools.js +0 -261
  124. package/dist/eval/agents/workflow.d.ts +0 -31
  125. package/dist/eval/agents/workflow.js +0 -155
  126. package/dist/eval/baseline.d.ts +0 -38
  127. package/dist/eval/baseline.js +0 -282
  128. package/dist/eval/config-loader.d.ts +0 -14
  129. package/dist/eval/config-loader.js +0 -395
  130. package/dist/eval/corpus.d.ts +0 -30
  131. package/dist/eval/corpus.js +0 -330
  132. package/dist/eval/cost-guard.d.ts +0 -102
  133. package/dist/eval/cost-guard.js +0 -190
  134. package/dist/eval/diff.d.ts +0 -64
  135. package/dist/eval/diff.js +0 -323
  136. package/dist/eval/llm-client.d.ts +0 -176
  137. package/dist/eval/llm-client.js +0 -267
  138. package/dist/eval/mode.d.ts +0 -28
  139. package/dist/eval/mode.js +0 -61
  140. package/dist/eval/progress.d.ts +0 -83
  141. package/dist/eval/progress.js +0 -59
  142. package/dist/eval/report.d.ts +0 -11
  143. package/dist/eval/report.js +0 -181
  144. package/dist/eval/rubric-loader.d.ts +0 -20
  145. package/dist/eval/rubric-loader.js +0 -143
  146. package/dist/eval/runner.d.ts +0 -81
  147. package/dist/eval/runner.js +0 -746
  148. package/dist/eval/runs.d.ts +0 -41
  149. package/dist/eval/runs.js +0 -114
  150. package/dist/eval/sandbox.d.ts +0 -38
  151. package/dist/eval/sandbox.js +0 -137
  152. package/dist/eval/tools/glob.d.ts +0 -2
  153. package/dist/eval/tools/glob.js +0 -163
  154. package/dist/eval/tools/grep.d.ts +0 -2
  155. package/dist/eval/tools/grep.js +0 -152
  156. package/dist/eval/tools/index.d.ts +0 -7
  157. package/dist/eval/tools/index.js +0 -35
  158. package/dist/eval/tools/read.d.ts +0 -2
  159. package/dist/eval/tools/read.js +0 -122
  160. package/dist/eval/tools/types.d.ts +0 -49
  161. package/dist/eval/tools/types.js +0 -41
  162. package/dist/eval/tools/write.d.ts +0 -2
  163. package/dist/eval/tools/write.js +0 -92
  164. package/dist/eval/types.d.ts +0 -561
  165. package/dist/eval/types.js +0 -47
  166. package/dist/eval/verifiers/judge.d.ts +0 -40
  167. package/dist/eval/verifiers/judge.js +0 -256
  168. package/dist/eval/verifiers/rules.d.ts +0 -24
  169. package/dist/eval/verifiers/rules.js +0 -218
  170. package/dist/eval/verifiers/structural.d.ts +0 -14
  171. package/dist/eval/verifiers/structural.js +0 -171
  172. package/dist/eval/verifiers/traceability.d.ts +0 -23
  173. package/dist/eval/verifiers/traceability.js +0 -84
  174. package/dist/eval/verifiers/workflow-consistency.d.ts +0 -21
  175. package/dist/eval/verifiers/workflow-consistency.js +0 -225
  176. package/dist/eval/workflow-corpus.d.ts +0 -7
  177. package/dist/eval/workflow-corpus.js +0 -207
  178. package/dist/feature-system.d.ts +0 -42
  179. package/dist/feature-system.js +0 -432
  180. package/dist/internal/knowledge-digest.d.ts +0 -7
  181. package/dist/internal/knowledge-digest.js +0 -93
package/dist/cli.js CHANGED
@@ -1,29 +1,19 @@
1
1
  #!/usr/bin/env node
2
- import { createReadStream, existsSync, realpathSync } from "node:fs";
3
- import { spawn } from "node:child_process";
4
- import fs from "node:fs/promises";
5
2
  import process from "node:process";
6
3
  import path from "node:path";
4
+ import { existsSync, realpathSync } from "node:fs";
7
5
  import { createInterface } from "node:readline/promises";
8
6
  import { fileURLToPath } from "node:url";
9
- import { FLOW_TRACKS, HARNESS_IDS } from "./types.js";
10
7
  import { doctorChecks, doctorSucceeded } from "./doctor.js";
11
8
  import { initCclaw, syncCclaw, uninstallCclaw, upgradeCclaw } from "./install.js";
12
9
  import { error, info } from "./logger.js";
10
+ import { FLOW_TRACKS, HARNESS_IDS } from "./types.js";
13
11
  import { archiveRun } from "./runs.js";
14
12
  import { CCLAW_VERSION, RUNTIME_ROOT } from "./constants.js";
15
13
  import { createDefaultConfig } from "./config.js";
16
14
  import { detectHarnesses } from "./init-detect.js";
17
15
  import { HARNESS_ADAPTERS } from "./harness-adapters.js";
18
16
  import { classifyCodexHooksFlag, codexConfigPath, patchCodexHooksFlag, readCodexConfig, writeCodexConfig } from "./codex-feature-flag.js";
19
- import { runEval } from "./eval/runner.js";
20
- import { createStderrProgressLogger } from "./eval/progress.js";
21
- import { writeBaselinesFromReport } from "./eval/baseline.js";
22
- import { writeJsonReport, writeMarkdownReport } from "./eval/report.js";
23
- import { formatDiffMarkdown, runEvalDiff } from "./eval/diff.js";
24
- import { ensureRunDir, generateRunId, isRunAlive, listRuns, readRunStatus, resolveRunId, runLogPath, writeRunStatus } from "./eval/runs.js";
25
- import { parseModeInput } from "./eval/mode.js";
26
- import { FLOW_STAGES } from "./types.js";
27
17
  import { runInternalCommand } from "./internal/advance-stage.js";
28
18
  const INSTALLER_COMMANDS = [
29
19
  "init",
@@ -32,7 +22,6 @@ const INSTALLER_COMMANDS = [
32
22
  "upgrade",
33
23
  "uninstall",
34
24
  "archive",
35
- "eval",
36
25
  "internal"
37
26
  ];
38
27
  export function usage() {
@@ -49,14 +38,18 @@ Commands:
49
38
  Flags: --harnesses=<list> Comma list of harnesses (claude,cursor,opencode,codex).
50
39
  --no-interactive Skip interactive prompts even on TTY (for CI/scripts).
51
40
  sync Reconcile generated runtime files with the current config.
41
+ doctor Check install/runtime wiring and print concrete fixes for failures.
42
+ Flags: --explain Include docs pointers for every check.
43
+ --json Emit machine-readable check results.
44
+ --quiet Show only failing checks.
45
+ --only=<filter> Limit displayed checks (error,warning,hook:,state:,...).
46
+ --reconcile-gates Refresh derived gate status before checking.
52
47
  upgrade Refresh generated files in .cclaw. Preserves your config.yaml.
53
- archive Archive the active run and reset flow state for next feature.
48
+ archive Archive the active run and reset flow state for the next run.
54
49
  Flags: --name=<slug> Override archive folder suffix.
55
50
  --skip-retro Skip retro gate only when runtime allows it.
56
51
  --retro-reason=<txt> Required rationale with --skip-retro.
57
52
  uninstall Remove .cclaw runtime and the generated harness shim files.
58
- eval Run cclaw evals. Maintainer surface — see docs/evals.md.
59
- Full flag reference: \`npx cclaw-cli eval --help\` or docs/evals.md.
60
53
 
61
54
  Global flags:
62
55
  -h, --help Show this help message and exit 0.
@@ -66,15 +59,16 @@ Examples:
66
59
  npx cclaw-cli
67
60
  npx cclaw-cli init --harnesses=claude,cursor --no-interactive
68
61
  npx cclaw-cli sync
69
- npx cclaw-cli archive --name=my-feature
62
+ npx cclaw-cli archive --name=my-run
70
63
  npx cclaw-cli upgrade
71
- npx cclaw-cli eval --dry-run
72
64
 
73
- Everything operational (retro, archive, worktrees, doctor, learnings)
74
- happens inside your harness via slash commands. The CLI is just a
75
- launcher. See README.md for the four user-facing slash commands.
65
+ Happy-path work happens inside your harness via /cc, /cc-next,
66
+ /cc-ideate, and /cc-view. Doctor is an operator/support surface:
67
+ it verifies install/runtime wiring, but a real harness smoke test is
68
+ still needed to prove provider auth and model execution.
76
69
 
77
70
  Docs: https://github.com/zuevrs/cclaw
71
+ Local: docs/config.md and docs/harnesses.md
78
72
  Issues: https://github.com/zuevrs/cclaw/issues
79
73
  `;
80
74
  }
@@ -96,25 +90,6 @@ function parseTrack(raw) {
96
90
  }
97
91
  return trimmed;
98
92
  }
99
- function parseLegacyTier(raw) {
100
- return parseModeInput(raw.toUpperCase(), {
101
- source: "cli",
102
- raw: `--tier=${raw}`
103
- });
104
- }
105
- function parseEvalMode(raw) {
106
- return parseModeInput(raw, {
107
- source: "cli",
108
- raw: `--mode=${raw}`
109
- });
110
- }
111
- function parseEvalStage(raw) {
112
- const trimmed = raw.trim();
113
- if (!FLOW_STAGES.includes(trimmed)) {
114
- throw new Error(`Unknown eval stage: ${raw}. Supported: ${FLOW_STAGES.join(", ")}`);
115
- }
116
- return trimmed;
117
- }
118
93
  function isInitPromptAllowed(ctx) {
119
94
  return Boolean(process.stdin.isTTY && ctx.stdout.isTTY);
120
95
  }
@@ -147,12 +122,10 @@ function buildInitSurfacePreview(harnesses) {
147
122
  ".cclaw/agents/*.md",
148
123
  ".cclaw/hooks/*",
149
124
  ".cclaw/rules/**",
150
- ".cclaw/features/** (legacy snapshots, read-only migration)",
151
125
  ".cclaw/runs/**",
152
126
  ".cclaw/artifacts/**",
153
127
  ".cclaw/knowledge.jsonl",
154
128
  ".cclaw/state/*.json|*.jsonl",
155
- ".cclaw/references/**",
156
129
  "AGENTS.md (managed block)"
157
130
  ];
158
131
  for (const harness of harnesses) {
@@ -376,7 +349,7 @@ function printDoctorText(ctx, checks, options) {
376
349
  if (!options.quiet) {
377
350
  ctx.stdout.write(` details: ${check.details}\n`);
378
351
  }
379
- if (options.explain) {
352
+ if (!check.ok || options.explain) {
380
353
  ctx.stdout.write(` fix: ${check.fix}\n`);
381
354
  if (check.docRef) {
382
355
  ctx.stdout.write(` docs: ${check.docRef}\n`);
@@ -396,18 +369,6 @@ function printDoctorText(ctx, checks, options) {
396
369
  ctx.stdout.write("Doctor status: HEALTHY (no failing error checks)\n");
397
370
  }
398
371
  }
399
- function resolveMaxCostOption(fromCli, env) {
400
- if (fromCli !== undefined)
401
- return { maxCostUsd: fromCli };
402
- const raw = env.CCLAW_EVAL_MAX_COST_USD;
403
- if (raw === undefined || raw.trim() === "")
404
- return {};
405
- const value = Number(raw);
406
- if (!Number.isFinite(value) || value <= 0) {
407
- throw new Error(`CCLAW_EVAL_MAX_COST_USD must be a positive number, got: ${raw}`);
408
- }
409
- return { maxCostUsd: value };
410
- }
411
372
  function parseArgs(argv) {
412
373
  const parsed = {};
413
374
  const helpFlag = argv.find((arg) => arg === "--help" || arg === "-h");
@@ -429,41 +390,34 @@ function parseArgs(argv) {
429
390
  parsed.internalArgs = [...rest];
430
391
  return parsed;
431
392
  }
432
- // For `eval`, the next non-flag argument is an optional subcommand. Any
433
- // subsequent non-flag tokens are captured as evalArgs (consumed by the
434
- // subcommand handler). This preserves backwards compat: callers that run
435
- // `cclaw eval --dry-run` see no subcommand and no positional args.
436
- let flags = rest;
437
- if (parsed.command === "eval") {
438
- const evalArgs = [];
439
- const remainder = [];
440
- let sawSubcommand = false;
441
- for (const token of rest) {
442
- if (token.startsWith("--")) {
443
- remainder.push(token);
444
- continue;
445
- }
446
- if (!sawSubcommand) {
447
- if (token === "diff") {
448
- parsed.evalSubcommand = "diff";
449
- sawSubcommand = true;
450
- }
451
- else if (token === "runs") {
452
- parsed.evalSubcommand = "runs";
453
- sawSubcommand = true;
454
- }
455
- else {
456
- evalArgs.push(token);
457
- }
458
- continue;
459
- }
460
- evalArgs.push(token);
393
+ const flags = rest;
394
+ const isAllowedForCommand = (flag) => {
395
+ if (parsed.command === "init") {
396
+ return flag.startsWith("--harnesses=") ||
397
+ flag.startsWith("--track=") ||
398
+ flag.startsWith("--profile=") ||
399
+ flag === "--interactive" ||
400
+ flag === "--no-interactive" ||
401
+ flag === "--dry-run";
402
+ }
403
+ if (parsed.command === "doctor") {
404
+ return flag === "--reconcile-gates" ||
405
+ flag === "--json" ||
406
+ flag === "--explain" ||
407
+ flag === "--quiet" ||
408
+ flag.startsWith("--only=");
409
+ }
410
+ if (parsed.command === "archive") {
411
+ return flag.startsWith("--name=") ||
412
+ flag === "--skip-retro" ||
413
+ flag.startsWith("--retro-reason=");
461
414
  }
462
- if (evalArgs.length > 0)
463
- parsed.evalArgs = evalArgs;
464
- flags = remainder;
465
- }
415
+ return false;
416
+ };
466
417
  for (const flag of flags) {
418
+ if (!isAllowedForCommand(flag)) {
419
+ throw new Error(`Flag ${flag} is not supported for ${parsed.command ?? "this command"}.`);
420
+ }
467
421
  if (flag.startsWith("--harnesses=")) {
468
422
  parsed.harnesses = parseHarnesses(flag.replace("--harnesses=", ""));
469
423
  continue;
@@ -519,281 +473,9 @@ function parseArgs(argv) {
519
473
  parsed.archiveSkipRetroReason = flag.replace("--retro-reason=", "").trim();
520
474
  continue;
521
475
  }
522
- if (flag.startsWith("--stage=")) {
523
- parsed.evalStage = parseEvalStage(flag.replace("--stage=", ""));
524
- continue;
525
- }
526
- if (flag.startsWith("--mode=")) {
527
- parsed.evalMode = parseEvalMode(flag.replace("--mode=", ""));
528
- continue;
529
- }
530
- if (flag.startsWith("--tier=")) {
531
- parsed.evalMode = parseLegacyTier(flag.replace("--tier=", ""));
532
- continue;
533
- }
534
- if (flag === "--schema-only") {
535
- parsed.evalSchemaOnly = true;
536
- continue;
537
- }
538
- if (flag === "--rules") {
539
- parsed.evalRules = true;
540
- continue;
541
- }
542
- if (flag === "--judge") {
543
- parsed.evalJudge = true;
544
- continue;
545
- }
546
- if (flag === "--no-write") {
547
- parsed.evalNoWrite = true;
548
- continue;
549
- }
550
- if (flag === "--update-baseline") {
551
- parsed.evalUpdateBaseline = true;
552
- continue;
553
- }
554
- if (flag === "--confirm") {
555
- parsed.evalConfirm = true;
556
- continue;
557
- }
558
- if (flag === "--background") {
559
- parsed.evalBackground = true;
560
- continue;
561
- }
562
- if (flag.startsWith("--compare-model=")) {
563
- const value = flag.replace("--compare-model=", "").trim();
564
- if (value.length === 0) {
565
- throw new Error(`--compare-model requires a non-empty model id (e.g. --compare-model=gpt-4o-mini).`);
566
- }
567
- parsed.evalCompareModel = value;
568
- continue;
569
- }
570
- if (flag.startsWith("--max-cost-usd=")) {
571
- const raw = flag.replace("--max-cost-usd=", "").trim();
572
- const value = Number(raw);
573
- if (!Number.isFinite(value) || value <= 0) {
574
- throw new Error(`--max-cost-usd requires a positive number, got: ${raw}`);
575
- }
576
- parsed.evalMaxCostUsd = value;
577
- continue;
578
- }
579
- }
580
- // `--json` is shared between doctor and eval. Disambiguate by command.
581
- if (parsed.command === "eval" && parsed.doctorJson === true) {
582
- parsed.evalJson = true;
583
- parsed.doctorJson = undefined;
584
- }
585
- // `--quiet` on `eval` silences the stderr progress logger. On doctor it
586
- // continues to mean "print only failing checks" — the flag slot is the
587
- // same, the semantics depend on which command owns the invocation.
588
- if (parsed.command === "eval" && parsed.doctorQuiet === true) {
589
- parsed.evalQuiet = true;
590
- parsed.doctorQuiet = undefined;
591
476
  }
592
477
  return parsed;
593
478
  }
594
- /**
595
- * Spawn `cclaw eval` (without `--background`) in a detached child process
596
- * and return immediately. The child's stdout+stderr are piped to
597
- * `.cclaw/evals/runs/<id>/run.log` so the user can attach later with
598
- * `cclaw eval runs tail`. We do NOT wait for the child — the whole point
599
- * is to free the terminal while a multi-minute workflow-mode run
600
- * proceeds in the background.
601
- */
602
- async function spawnBackgroundEval(parsed, ctx) {
603
- const id = generateRunId();
604
- await ensureRunDir(ctx.cwd, id);
605
- const logPath = runLogPath(ctx.cwd, id);
606
- const childArgv = process.argv.slice(2).filter((a) => a !== "--background");
607
- const cliEntry = process.argv[1];
608
- if (!cliEntry) {
609
- error(ctx, "Could not resolve cclaw entrypoint for --background.");
610
- return 1;
611
- }
612
- const logHandle = await fs.open(logPath, "a");
613
- try {
614
- const child = spawn(process.execPath, [cliEntry, ...childArgv], {
615
- cwd: ctx.cwd,
616
- detached: true,
617
- stdio: ["ignore", logHandle.fd, logHandle.fd],
618
- env: process.env
619
- });
620
- const pid = child.pid ?? -1;
621
- await writeRunStatus(ctx.cwd, {
622
- id,
623
- startedAt: new Date().toISOString(),
624
- pid,
625
- argv: childArgv,
626
- cwd: ctx.cwd,
627
- state: "running"
628
- });
629
- child.unref();
630
- const finalize = async (code) => {
631
- const current = await readRunStatus(ctx.cwd, id);
632
- if (!current)
633
- return;
634
- const exitCode = typeof code === "number" ? code : -1;
635
- await writeRunStatus(ctx.cwd, {
636
- ...current,
637
- endedAt: new Date().toISOString(),
638
- exitCode,
639
- state: exitCode === 0 ? "succeeded" : "failed"
640
- });
641
- };
642
- child.on("exit", (code) => {
643
- void finalize(code);
644
- });
645
- child.on("error", (err) => {
646
- void writeRunStatus(ctx.cwd, {
647
- id,
648
- startedAt: new Date().toISOString(),
649
- pid,
650
- argv: childArgv,
651
- cwd: ctx.cwd,
652
- endedAt: new Date().toISOString(),
653
- exitCode: -1,
654
- state: "failed"
655
- });
656
- error(ctx, `Background eval failed to start: ${err.message}`);
657
- });
658
- ctx.stdout.write(`cclaw eval: background run id=${id} pid=${pid}\n` +
659
- ` log: ${logPath}\n` +
660
- ` tail: cclaw eval runs tail ${id}\n` +
661
- ` status: cclaw eval runs status ${id}\n`);
662
- return 0;
663
- }
664
- finally {
665
- await logHandle.close();
666
- }
667
- }
668
- function formatRunRow(status) {
669
- const ended = status.endedAt ? ` ended=${status.endedAt}` : "";
670
- const exitCode = status.exitCode !== undefined ? ` exit=${status.exitCode}` : "";
671
- const alive = status.state === "running" ? (isRunAlive(status) ? "" : " (stale)") : "";
672
- return `${status.id} state=${status.state}${alive} pid=${status.pid} started=${status.startedAt}${ended}${exitCode}`;
673
- }
674
- async function runEvalRunsSubcommand(parsed, ctx) {
675
- const args = parsed.evalArgs ?? [];
676
- const action = args[0] ?? "list";
677
- if (action === "list") {
678
- const runs = await listRuns(ctx.cwd);
679
- if (runs.length === 0) {
680
- ctx.stdout.write("No eval runs recorded under .cclaw/evals/runs/.\n");
681
- return 0;
682
- }
683
- if (parsed.evalJson === true) {
684
- ctx.stdout.write(`${JSON.stringify(runs, null, 2)}\n`);
685
- return 0;
686
- }
687
- for (const run of runs)
688
- ctx.stdout.write(`${formatRunRow(run)}\n`);
689
- return 0;
690
- }
691
- if (action === "status") {
692
- const id = await resolveRunId(ctx.cwd, args[1]);
693
- if (!id) {
694
- error(ctx, `No such run: ${args[1] ?? "(none recorded)"}`);
695
- return 1;
696
- }
697
- const status = await readRunStatus(ctx.cwd, id);
698
- if (!status) {
699
- error(ctx, `Run ${id} has no status file.`);
700
- return 1;
701
- }
702
- if (parsed.evalJson === true) {
703
- ctx.stdout.write(`${JSON.stringify(status, null, 2)}\n`);
704
- }
705
- else {
706
- ctx.stdout.write(`${formatRunRow(status)}\n`);
707
- ctx.stdout.write(`log: ${runLogPath(ctx.cwd, id)}\n`);
708
- }
709
- return status.state === "failed" ? 1 : 0;
710
- }
711
- if (action === "tail") {
712
- const id = await resolveRunId(ctx.cwd, args[1]);
713
- if (!id) {
714
- error(ctx, `No such run: ${args[1] ?? "(none recorded)"}`);
715
- return 1;
716
- }
717
- const logFile = runLogPath(ctx.cwd, id);
718
- const stream = createReadStream(logFile, { encoding: "utf8" });
719
- await new Promise((resolve, reject) => {
720
- stream.on("data", (chunk) => ctx.stdout.write(chunk));
721
- stream.on("end", () => resolve());
722
- stream.on("error", reject);
723
- });
724
- return 0;
725
- }
726
- error(ctx, `Unknown \`cclaw eval runs\` action: ${action}. Use list | status | tail.`);
727
- return 1;
728
- }
729
- /**
730
- * Run the same corpus twice — once against the configured model, once
731
- * against `--compare-model=<id>` — and print a summary comparing the
732
- * two. Both reports are written to `.cclaw/evals/reports/` (unless
733
- * `--no-write` is set) and a unified diff is emitted to stdout. Exit
734
- * code is 1 when the override model regressed against the baseline
735
- * model, 0 otherwise.
736
- */
737
- async function runCompareModel(parsed, ctx, progress) {
738
- const baselineOpts = {
739
- projectRoot: ctx.cwd,
740
- stage: parsed.evalStage,
741
- mode: parsed.evalMode,
742
- schemaOnly: parsed.evalSchemaOnly === true,
743
- rules: parsed.evalRules === true,
744
- judge: parsed.evalJudge === true,
745
- ...(progress ? { progress } : {}),
746
- ...resolveMaxCostOption(parsed.evalMaxCostUsd, process.env)
747
- };
748
- ctx.stderr.write(`[cclaw eval] compare: running baseline model...\n`);
749
- const baseline = await runEval(baselineOpts);
750
- if ("kind" in baseline) {
751
- error(ctx, "--compare-model is incompatible with --dry-run.");
752
- return 1;
753
- }
754
- ctx.stderr.write(`[cclaw eval] compare: running ${parsed.evalCompareModel} ...\n`);
755
- const candidate = await runEval({
756
- ...baselineOpts,
757
- modelOverride: parsed.evalCompareModel
758
- });
759
- if ("kind" in candidate) {
760
- error(ctx, "--compare-model received an unexpected dry-run response.");
761
- return 1;
762
- }
763
- if (parsed.evalNoWrite !== true) {
764
- await writeJsonReport(ctx.cwd, baseline);
765
- await writeMarkdownReport(ctx.cwd, baseline);
766
- await writeJsonReport(ctx.cwd, candidate);
767
- await writeMarkdownReport(ctx.cwd, candidate);
768
- }
769
- const passDelta = candidate.summary.passed - baseline.summary.passed;
770
- const failDelta = candidate.summary.failed - baseline.summary.failed;
771
- const costDelta = candidate.summary.totalCostUsd - baseline.summary.totalCostUsd;
772
- if (parsed.evalJson === true) {
773
- ctx.stdout.write(`${JSON.stringify({
774
- baseline: {
775
- model: baseline.model,
776
- summary: baseline.summary
777
- },
778
- candidate: {
779
- model: candidate.model,
780
- summary: candidate.summary
781
- },
782
- delta: { passed: passDelta, failed: failDelta, costUsd: costDelta }
783
- }, null, 2)}\n`);
784
- }
785
- else {
786
- ctx.stdout.write(`cclaw eval compare-model:\n` +
787
- ` baseline ${baseline.model}: pass=${baseline.summary.passed}/${baseline.summary.totalCases} ` +
788
- `fail=${baseline.summary.failed} cost=$${baseline.summary.totalCostUsd.toFixed(4)}\n` +
789
- ` candidate ${candidate.model}: pass=${candidate.summary.passed}/${candidate.summary.totalCases} ` +
790
- `fail=${candidate.summary.failed} cost=$${candidate.summary.totalCostUsd.toFixed(4)}\n` +
791
- ` delta: passed=${passDelta >= 0 ? "+" : ""}${passDelta} ` +
792
- `failed=${failDelta >= 0 ? "+" : ""}${failDelta} ` +
793
- `cost=${costDelta >= 0 ? "+" : ""}$${costDelta.toFixed(4)}\n`);
794
- }
795
- return failDelta > 0 ? 1 : 0;
796
- }
797
479
  async function runCommand(parsed, ctx) {
798
480
  if (parsed.showHelp) {
799
481
  ctx.stdout.write(usage());
@@ -864,7 +546,8 @@ async function runCommand(parsed, ctx) {
864
546
  if (parsed.doctorJson === true) {
865
547
  const counts = doctorCountsBySeverity(filteredChecks);
866
548
  ctx.stdout.write(`${JSON.stringify({
867
- ok: doctorSucceeded(checks),
549
+ ok: doctorSucceeded(filteredChecks),
550
+ globalOk: doctorSucceeded(checks),
868
551
  filters: parsed.doctorOnly ?? [],
869
552
  counts,
870
553
  checks: filteredChecks
@@ -878,138 +561,13 @@ async function runCommand(parsed, ctx) {
878
561
  printDoctorText(ctx, filteredChecks, { explain, quiet });
879
562
  }
880
563
  }
881
- return doctorSucceeded(checks) ? 0 : 2;
564
+ return doctorSucceeded(filteredChecks) ? 0 : 2;
882
565
  }
883
566
  if (command === "upgrade") {
884
567
  await upgradeCclaw(ctx.cwd);
885
568
  info(ctx, "Upgraded .cclaw runtime and regenerated generated files");
886
569
  return 0;
887
570
  }
888
- if (command === "eval" && parsed.evalSubcommand === "runs") {
889
- return runEvalRunsSubcommand(parsed, ctx);
890
- }
891
- if (command === "eval" && parsed.evalBackground === true) {
892
- return spawnBackgroundEval(parsed, ctx);
893
- }
894
- if (command === "eval" && parsed.evalSubcommand === "diff") {
895
- const args = parsed.evalArgs ?? [];
896
- if (args.length !== 2) {
897
- error(ctx, `\`cclaw eval diff\` requires two arguments: <old> <new>. ` +
898
- `Example: cclaw eval diff 0.26.0 latest`);
899
- return 1;
900
- }
901
- const [oldSel, newSel] = args;
902
- try {
903
- const diff = await runEvalDiff({
904
- projectRoot: ctx.cwd,
905
- old: oldSel,
906
- new: newSel
907
- });
908
- if (parsed.evalJson === true) {
909
- ctx.stdout.write(`${JSON.stringify(diff, null, 2)}\n`);
910
- }
911
- else {
912
- ctx.stdout.write(formatDiffMarkdown(diff));
913
- }
914
- return diff.regressed ? 1 : 0;
915
- }
916
- catch (err) {
917
- error(ctx, err instanceof Error ? err.message : String(err));
918
- return 1;
919
- }
920
- }
921
- if (command === "eval") {
922
- const wantProgress = parsed.evalQuiet !== true &&
923
- parsed.dryRun !== true &&
924
- parsed.evalJson !== true;
925
- const progress = wantProgress
926
- ? createStderrProgressLogger({ writer: (s) => ctx.stderr.write(s) })
927
- : undefined;
928
- if (parsed.evalCompareModel !== undefined) {
929
- return runCompareModel(parsed, ctx, progress);
930
- }
931
- const result = await runEval({
932
- projectRoot: ctx.cwd,
933
- stage: parsed.evalStage,
934
- mode: parsed.evalMode,
935
- schemaOnly: parsed.evalSchemaOnly === true,
936
- rules: parsed.evalRules === true,
937
- judge: parsed.evalJudge === true,
938
- dryRun: parsed.dryRun === true,
939
- ...(progress ? { progress } : {}),
940
- ...resolveMaxCostOption(parsed.evalMaxCostUsd, process.env)
941
- });
942
- if ("kind" in result) {
943
- if (parsed.evalJson === true) {
944
- ctx.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
945
- return 0;
946
- }
947
- ctx.stdout.write(`cclaw eval dry-run\n`);
948
- ctx.stdout.write(` provider: ${result.config.provider}\n`);
949
- ctx.stdout.write(` baseUrl: ${result.config.baseUrl}\n`);
950
- ctx.stdout.write(` model: ${result.config.model}\n`);
951
- ctx.stdout.write(` source: ${result.config.source}\n`);
952
- ctx.stdout.write(` apiKey: ${result.config.apiKey ? "set" : "unset"}\n`);
953
- ctx.stdout.write(` mode: ${result.plannedMode}\n`);
954
- ctx.stdout.write(` corpus: ${result.corpus.total} case(s)\n`);
955
- for (const [stage, count] of Object.entries(result.corpus.byStage)) {
956
- ctx.stdout.write(` - ${stage}: ${count}\n`);
957
- }
958
- if (result.workflowCorpus.total > 0 || result.plannedMode === "workflow") {
959
- ctx.stdout.write(` workflow corpus: ${result.workflowCorpus.total} case(s)\n`);
960
- for (const wf of result.workflowCorpus.cases) {
961
- ctx.stdout.write(` - ${wf.id}: ${wf.stages.join(" → ")}\n`);
962
- }
963
- }
964
- ctx.stdout.write(` verifiers available:\n`);
965
- for (const [key, value] of Object.entries(result.verifiersAvailable)) {
966
- ctx.stdout.write(` - ${key}: ${value ? "yes" : "no"}\n`);
967
- }
968
- if (result.notes.length > 0) {
969
- ctx.stdout.write(` notes:\n`);
970
- for (const note of result.notes) {
971
- ctx.stdout.write(` - ${note}\n`);
972
- }
973
- }
974
- return 0;
975
- }
976
- if (parsed.evalUpdateBaseline === true && parsed.evalConfirm !== true) {
977
- error(ctx, "--update-baseline requires --confirm to prevent accidental baseline resets.");
978
- return 1;
979
- }
980
- if (parsed.evalUpdateBaseline === true) {
981
- if (result.summary.failed > 0) {
982
- error(ctx, `Refusing to update baselines: ${result.summary.failed} case(s) currently failing. Fix structural checks first.`);
983
- return 1;
984
- }
985
- const written = await writeBaselinesFromReport(ctx.cwd, result);
986
- for (const file of written) {
987
- info(ctx, `Baseline written: ${path.relative(ctx.cwd, file)}`);
988
- }
989
- }
990
- if (parsed.evalNoWrite !== true) {
991
- const jsonPath = await writeJsonReport(ctx.cwd, result);
992
- const mdPath = await writeMarkdownReport(ctx.cwd, result);
993
- info(ctx, `Report written: ${path.relative(ctx.cwd, jsonPath)}`);
994
- info(ctx, `Report written: ${path.relative(ctx.cwd, mdPath)}`);
995
- }
996
- const regressionCount = result.baselineDelta?.criticalFailures ?? 0;
997
- if (parsed.evalJson === true) {
998
- ctx.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
999
- }
1000
- else {
1001
- const regressionNote = regressionCount > 0 ? `, ${regressionCount} regression(s)` : "";
1002
- ctx.stdout.write(`cclaw eval: ${result.summary.totalCases} case(s), ` +
1003
- `${result.summary.passed} passed, ` +
1004
- `${result.summary.failed} failed, ` +
1005
- `${result.summary.skipped} skipped${regressionNote}\n`);
1006
- }
1007
- if (result.summary.failed > 0)
1008
- return 1;
1009
- if (regressionCount > 0)
1010
- return 1;
1011
- return 0;
1012
- }
1013
571
  if (command === "archive") {
1014
572
  const archived = await archiveRun(ctx.cwd, parsed.archiveName, {
1015
573
  skipRetro: parsed.archiveSkipRetro === true,
@@ -1021,13 +579,13 @@ async function runCommand(parsed, ctx) {
1021
579
  info(ctx, `Archived active artifacts to ${archived.archivePath}. Flow state reset to brainstorm.${snapshotSummary}`);
1022
580
  const k = archived.knowledge;
1023
581
  if (k.overThreshold) {
1024
- info(ctx, `Knowledge curation recommended: ${k.knowledgePath} now has ${k.activeEntryCount} active entries (soft threshold ${k.softThreshold}). Run \`/cc-learn curate\` to plan a soft-archive of stale/duplicate entries to ${RUNTIME_ROOT}/knowledge.archive.jsonl.`);
582
+ info(ctx, `Knowledge curation recommended: ${k.knowledgePath} now has ${k.activeEntryCount} active entries (soft threshold ${k.softThreshold}). Ask your harness to curate cclaw knowledge and plan a soft-archive of stale/duplicate entries to ${RUNTIME_ROOT}/knowledge.archive.jsonl.`);
1025
583
  }
1026
584
  else if (k.activeEntryCount > 0) {
1027
- info(ctx, `Knowledge: ${k.activeEntryCount}/${k.softThreshold} active entries. Run \`/cc-learn curate\` if you want a sweep before the next run.`);
585
+ info(ctx, `Knowledge: ${k.activeEntryCount}/${k.softThreshold} active entries. Ask your harness for a cclaw knowledge curation sweep before the next run if needed.`);
1028
586
  }
1029
587
  else {
1030
- info(ctx, `Knowledge: 0 active entries in ${k.knowledgePath}. Capture lessons from this run with \`/cc-learn add\` before they fade.`);
588
+ info(ctx, `Knowledge: 0 active entries in ${k.knowledgePath}. Capture lessons from this run through the learnings skill before they fade.`);
1031
589
  }
1032
590
  return 0;
1033
591
  }
@@ -1036,13 +594,13 @@ async function runCommand(parsed, ctx) {
1036
594
  return 0;
1037
595
  }
1038
596
  async function main() {
1039
- const parsed = parseArgs(process.argv.slice(2));
1040
597
  const ctx = {
1041
598
  cwd: process.cwd(),
1042
599
  stdout: process.stdout,
1043
600
  stderr: process.stderr
1044
601
  };
1045
602
  try {
603
+ const parsed = parseArgs(process.argv.slice(2));
1046
604
  const code = await runCommand(parsed, ctx);
1047
605
  process.exitCode = code;
1048
606
  }