cclaw-cli 0.48.35 → 0.51.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/README.md +54 -82
  2. package/dist/artifact-linter.d.ts +4 -0
  3. package/dist/artifact-linter.js +24 -3
  4. package/dist/cli.d.ts +1 -19
  5. package/dist/cli.js +49 -495
  6. package/dist/constants.d.ts +2 -13
  7. package/dist/constants.js +1 -46
  8. package/dist/content/closeout-guidance.d.ts +14 -0
  9. package/dist/content/closeout-guidance.js +42 -0
  10. package/dist/content/core-agents.js +51 -9
  11. package/dist/content/decision-protocol.d.ts +12 -0
  12. package/dist/content/decision-protocol.js +20 -0
  13. package/dist/content/diff-command.d.ts +1 -2
  14. package/dist/content/diff-command.js +8 -94
  15. package/dist/content/examples.d.ts +4 -10
  16. package/dist/content/examples.js +10 -20
  17. package/dist/content/hook-events.js +2 -2
  18. package/dist/content/hook-inline-snippets.d.ts +5 -2
  19. package/dist/content/hook-inline-snippets.js +33 -1
  20. package/dist/content/hook-manifest.d.ts +3 -4
  21. package/dist/content/hook-manifest.js +11 -12
  22. package/dist/content/hooks.js +2 -0
  23. package/dist/content/ideate-command.d.ts +2 -0
  24. package/dist/content/ideate-command.js +31 -25
  25. package/dist/content/iron-laws.d.ts +5 -5
  26. package/dist/content/iron-laws.js +5 -5
  27. package/dist/content/learnings.d.ts +3 -4
  28. package/dist/content/learnings.js +24 -50
  29. package/dist/content/meta-skill.js +31 -24
  30. package/dist/content/next-command.js +38 -38
  31. package/dist/content/node-hooks.js +17 -343
  32. package/dist/content/opencode-plugin.js +2 -100
  33. package/dist/content/research-playbooks.js +14 -14
  34. package/dist/content/review-loop.d.ts +2 -0
  35. package/dist/content/review-loop.js +8 -0
  36. package/dist/content/session-hooks.js +14 -46
  37. package/dist/content/skills.d.ts +0 -5
  38. package/dist/content/skills.js +53 -128
  39. package/dist/content/stage-common-guidance.d.ts +0 -1
  40. package/dist/content/stage-common-guidance.js +15 -14
  41. package/dist/content/stage-schema.d.ts +26 -1
  42. package/dist/content/stage-schema.js +121 -40
  43. package/dist/content/stages/_lint-metadata/index.js +9 -15
  44. package/dist/content/stages/brainstorm.js +22 -43
  45. package/dist/content/stages/design.js +37 -57
  46. package/dist/content/stages/plan.js +22 -13
  47. package/dist/content/stages/review.js +24 -27
  48. package/dist/content/stages/scope.js +34 -46
  49. package/dist/content/stages/ship.js +7 -4
  50. package/dist/content/stages/spec.js +20 -9
  51. package/dist/content/stages/tdd.js +64 -44
  52. package/dist/content/start-command.js +10 -12
  53. package/dist/content/status-command.d.ts +2 -7
  54. package/dist/content/status-command.js +19 -146
  55. package/dist/content/subagents.d.ts +0 -5
  56. package/dist/content/subagents.js +47 -28
  57. package/dist/content/templates.d.ts +1 -1
  58. package/dist/content/templates.js +126 -135
  59. package/dist/content/track-render-context.d.ts +17 -0
  60. package/dist/content/track-render-context.js +44 -0
  61. package/dist/content/tree-command.d.ts +1 -2
  62. package/dist/content/tree-command.js +4 -87
  63. package/dist/content/utility-skills.d.ts +2 -29
  64. package/dist/content/utility-skills.js +2 -1533
  65. package/dist/content/view-command.js +29 -11
  66. package/dist/delegation.d.ts +1 -1
  67. package/dist/delegation.js +5 -15
  68. package/dist/doctor-registry.js +20 -21
  69. package/dist/doctor.js +88 -408
  70. package/dist/flow-state.d.ts +3 -0
  71. package/dist/flow-state.js +2 -0
  72. package/dist/harness-adapters.d.ts +1 -1
  73. package/dist/harness-adapters.js +48 -57
  74. package/dist/install.js +128 -520
  75. package/dist/internal/advance-stage.js +3 -9
  76. package/dist/internal/compound-readiness.d.ts +1 -1
  77. package/dist/internal/compound-readiness.js +1 -1
  78. package/dist/internal/tdd-loop-status.d.ts +1 -1
  79. package/dist/internal/tdd-loop-status.js +1 -1
  80. package/dist/knowledge-store.d.ts +16 -10
  81. package/dist/knowledge-store.js +51 -15
  82. package/dist/policy.js +16 -109
  83. package/dist/run-archive.d.ts +4 -6
  84. package/dist/run-archive.js +15 -20
  85. package/dist/run-persistence.d.ts +2 -2
  86. package/dist/run-persistence.js +3 -9
  87. package/package.json +1 -2
  88. package/dist/content/archive-command.d.ts +0 -2
  89. package/dist/content/archive-command.js +0 -124
  90. package/dist/content/compound-command.d.ts +0 -5
  91. package/dist/content/compound-command.js +0 -193
  92. package/dist/content/contexts.d.ts +0 -9
  93. package/dist/content/contexts.js +0 -65
  94. package/dist/content/contracts.d.ts +0 -2
  95. package/dist/content/contracts.js +0 -51
  96. package/dist/content/doctor-references.d.ts +0 -2
  97. package/dist/content/doctor-references.js +0 -150
  98. package/dist/content/eval-scaffold.d.ts +0 -15
  99. package/dist/content/eval-scaffold.js +0 -370
  100. package/dist/content/feature-command.d.ts +0 -2
  101. package/dist/content/feature-command.js +0 -123
  102. package/dist/content/flow-map.d.ts +0 -23
  103. package/dist/content/flow-map.js +0 -134
  104. package/dist/content/harness-doc.d.ts +0 -2
  105. package/dist/content/harness-doc.js +0 -202
  106. package/dist/content/harness-playbooks.d.ts +0 -24
  107. package/dist/content/harness-playbooks.js +0 -393
  108. package/dist/content/harness-tool-refs.d.ts +0 -20
  109. package/dist/content/harness-tool-refs.js +0 -268
  110. package/dist/content/ops-command.d.ts +0 -2
  111. package/dist/content/ops-command.js +0 -71
  112. package/dist/content/protocols.d.ts +0 -7
  113. package/dist/content/protocols.js +0 -215
  114. package/dist/content/retro-command.d.ts +0 -2
  115. package/dist/content/retro-command.js +0 -165
  116. package/dist/content/rewind-command.d.ts +0 -2
  117. package/dist/content/rewind-command.js +0 -106
  118. package/dist/content/tdd-log-command.d.ts +0 -2
  119. package/dist/content/tdd-log-command.js +0 -85
  120. package/dist/eval/agents/single-shot.d.ts +0 -27
  121. package/dist/eval/agents/single-shot.js +0 -79
  122. package/dist/eval/agents/with-tools.d.ts +0 -44
  123. package/dist/eval/agents/with-tools.js +0 -261
  124. package/dist/eval/agents/workflow.d.ts +0 -31
  125. package/dist/eval/agents/workflow.js +0 -155
  126. package/dist/eval/baseline.d.ts +0 -38
  127. package/dist/eval/baseline.js +0 -282
  128. package/dist/eval/config-loader.d.ts +0 -14
  129. package/dist/eval/config-loader.js +0 -395
  130. package/dist/eval/corpus.d.ts +0 -30
  131. package/dist/eval/corpus.js +0 -330
  132. package/dist/eval/cost-guard.d.ts +0 -102
  133. package/dist/eval/cost-guard.js +0 -190
  134. package/dist/eval/diff.d.ts +0 -64
  135. package/dist/eval/diff.js +0 -323
  136. package/dist/eval/llm-client.d.ts +0 -176
  137. package/dist/eval/llm-client.js +0 -267
  138. package/dist/eval/mode.d.ts +0 -28
  139. package/dist/eval/mode.js +0 -61
  140. package/dist/eval/progress.d.ts +0 -83
  141. package/dist/eval/progress.js +0 -59
  142. package/dist/eval/report.d.ts +0 -11
  143. package/dist/eval/report.js +0 -181
  144. package/dist/eval/rubric-loader.d.ts +0 -20
  145. package/dist/eval/rubric-loader.js +0 -143
  146. package/dist/eval/runner.d.ts +0 -81
  147. package/dist/eval/runner.js +0 -746
  148. package/dist/eval/runs.d.ts +0 -41
  149. package/dist/eval/runs.js +0 -114
  150. package/dist/eval/sandbox.d.ts +0 -38
  151. package/dist/eval/sandbox.js +0 -137
  152. package/dist/eval/tools/glob.d.ts +0 -2
  153. package/dist/eval/tools/glob.js +0 -163
  154. package/dist/eval/tools/grep.d.ts +0 -2
  155. package/dist/eval/tools/grep.js +0 -152
  156. package/dist/eval/tools/index.d.ts +0 -7
  157. package/dist/eval/tools/index.js +0 -35
  158. package/dist/eval/tools/read.d.ts +0 -2
  159. package/dist/eval/tools/read.js +0 -122
  160. package/dist/eval/tools/types.d.ts +0 -49
  161. package/dist/eval/tools/types.js +0 -41
  162. package/dist/eval/tools/write.d.ts +0 -2
  163. package/dist/eval/tools/write.js +0 -92
  164. package/dist/eval/types.d.ts +0 -561
  165. package/dist/eval/types.js +0 -47
  166. package/dist/eval/verifiers/judge.d.ts +0 -40
  167. package/dist/eval/verifiers/judge.js +0 -256
  168. package/dist/eval/verifiers/rules.d.ts +0 -24
  169. package/dist/eval/verifiers/rules.js +0 -218
  170. package/dist/eval/verifiers/structural.d.ts +0 -14
  171. package/dist/eval/verifiers/structural.js +0 -171
  172. package/dist/eval/verifiers/traceability.d.ts +0 -23
  173. package/dist/eval/verifiers/traceability.js +0 -84
  174. package/dist/eval/verifiers/workflow-consistency.d.ts +0 -21
  175. package/dist/eval/verifiers/workflow-consistency.js +0 -225
  176. package/dist/eval/workflow-corpus.d.ts +0 -7
  177. package/dist/eval/workflow-corpus.js +0 -207
  178. package/dist/feature-system.d.ts +0 -42
  179. package/dist/feature-system.js +0 -432
  180. package/dist/internal/knowledge-digest.d.ts +0 -7
  181. package/dist/internal/knowledge-digest.js +0 -93
@@ -1,150 +0,0 @@
1
- import { RUNTIME_ROOT } from "../constants.js";
2
- export const DOCTOR_REFERENCE_DIR = `${RUNTIME_ROOT}/references/doctor`;
3
- export const DOCTOR_REFERENCE_MARKDOWN = {
4
- "README.md": `# Doctor Reference Index
5
-
6
- Reference docs for \`cclaw doctor\` checks.
7
-
8
- ## Categories
9
-
10
- - \`runtime-layout.md\` - runtime directories, generated commands, and skill files
11
- - \`hooks-and-lifecycle.md\` - hook wiring and harness lifecycle integration
12
- - \`harness-and-routing.md\` - harness shims, AGENTS/CLAUDE routing blocks, cursor rule
13
- - \`state-and-gates.md\` - flow-state integrity and gate evidence contracts
14
- - \`delegation-and-preamble.md\` - mandatory delegations and lightweight announce discipline
15
- - \`traceability.md\` - spec/plan/tdd trace matrix expectations
16
- - \`tooling-capabilities.md\` - local runtime prerequisites (node only)
17
- - \`config-and-policy.md\` - config schema, rules policy, and validation references
18
- `,
19
- "runtime-layout.md": `# Runtime Layout
20
-
21
- ## Expected surfaces
22
-
23
- - \`.cclaw/\` root and generated subdirectories
24
- - stage command contracts under \`.cclaw/commands/\`
25
- - stage skills under \`.cclaw/skills/\`
26
- - utility command contracts (\`start\`, \`next\`, \`learn\`, \`status\`)
27
- - state files under \`.cclaw/state/\`
28
-
29
- ## Typical fixes
30
-
31
- 1. Run \`cclaw sync\` to re-materialize generated assets.
32
- 2. If runtime is severely drifted, run \`cclaw upgrade\`.
33
- 3. Avoid manual edits under generated runtime paths unless explicitly supported.
34
- `,
35
- "hooks-and-lifecycle.md": `# Hooks And Lifecycle
36
-
37
- ## Expected behavior
38
-
39
- - session start rehydrates flow + knowledge digest
40
- - pre-tool hooks run prompt/workflow guards
41
- - post-tool hooks run context monitor
42
- - stop hooks checkpoint progress
43
- - OpenCode uses plugin-based lifecycle integration
44
-
45
- ## Typical fixes
46
-
47
- 1. Re-run \`cclaw sync\` after harness config changes.
48
- 2. Ensure harness is enabled in \`.cclaw/config.yaml\`.
49
- 3. Validate hook JSON shape and remove malformed manual edits.
50
- `,
51
- "harness-and-routing.md": `# Harness And Routing
52
-
53
- ## Expected behavior
54
-
55
- - command shims exist for every enabled harness
56
- - managed routing block is present in \`AGENTS.md\` (and \`CLAUDE.md\` when applicable)
57
- - cursor rule mirrors workflow activation guidance
58
- - opencode plugin path is registered in opencode config
59
-
60
- ## Typical fixes
61
-
62
- 1. Confirm \`harnesses\` list in \`.cclaw/config.yaml\`.
63
- 2. Run \`cclaw sync\` to re-generate shims/routing files.
64
- 3. Remove stale harness artifacts for disabled harnesses via \`cclaw sync\`.
65
- `,
66
- "state-and-gates.md": `# State And Gates
67
-
68
- ## Expected behavior
69
-
70
- - \`flow-state.json\` has activeRunId, current stage, and consistent track/skippedStages
71
- - current-stage gate evidence is internally consistent
72
- - completed stages only include passed required gates
73
-
74
- ## Typical fixes
75
-
76
- 1. Run \`cclaw doctor --reconcile-gates\` to refresh current-stage gate catalog.
77
- 2. Repair inconsistent stage artifacts, then re-run doctor.
78
- 3. Do not manually mutate gate arrays without matching artifact evidence.
79
- `,
80
- "delegation-and-preamble.md": `# Delegation And Preamble
81
-
82
- ## Delegation contract
83
-
84
- - mandatory delegations for the current stage must be completed or waived
85
- - waivers should include an explicit reason
86
- - stale entries from previous runs are ignored by current-run checks
87
- - delegation entries use span-compatible fields (\`spanId\`, \`startTs\`, \`endTs\`, \`retryCount\`, \`evidenceRefs\`)
88
-
89
- ## Announce discipline contract
90
-
91
- - no dedicated preamble runtime log is required
92
- - substantial turns should still start with a concise announce (stage + goal + next action)
93
- - do not spam repeated announces when intent did not change
94
-
95
- ## Typical fixes
96
-
97
- 1. Append missing delegation records with \`completed\` or \`waived\` status.
98
- 2. Record harness-limitation waivers when native delegation is unavailable.
99
- 3. Keep announces concise and only refresh when plan/risk materially changes.
100
- `,
101
- "traceability.md": `# Traceability
102
-
103
- ## Expected behavior
104
-
105
- - spec criteria map to plan tasks
106
- - plan tasks map to tdd slices/tests
107
- - no orphaned criteria/tasks/tests when downstream artifacts exist
108
-
109
- ## Typical fixes
110
-
111
- 1. Add stable IDs to spec/plan/tdd sections.
112
- 2. Ensure mapping tables include every active criterion/task/slice.
113
- 3. Re-run \`cclaw doctor\` after artifact updates.
114
- `,
115
- "tooling-capabilities.md": `# Tooling Capabilities
116
-
117
- ## Required
118
-
119
- - \`node\` (>=20) — the only runtime dependency. All hooks, git-hook relays, and the
120
- \`cclaw\` CLI itself run on Node.js. No \`bash\`, \`python3\`, or \`jq\` required.
121
- - \`git\` — needed for worktree and pre-commit/pre-push relays.
122
-
123
- ## Not required (removed)
124
-
125
- Earlier releases relied on \`bash\` to execute generated shell hooks and on
126
- \`python3\`/\`jq\` as JSON fallback parsers. Node-only mode removes both: hooks
127
- dispatch through \`.cclaw/hooks/run-hook.cmd <hook-name>\` (which forwards to
128
- Node), so these tools
129
- are no longer part of the supported runtime contract.
130
-
131
- ## Typical fixes
132
-
133
- 1. Install Node.js 20 or newer (matches \`package.json\` \`engines\`) and ensure \`node\` is on \`PATH\`.
134
- 2. Re-run \`cclaw sync\` to regenerate hook configs after upgrading Node.
135
- `,
136
- "config-and-policy.md": `# Config And Policy
137
-
138
- ## Expected behavior
139
-
140
- - \`.cclaw/config.yaml\` parses and uses supported keys/values
141
- - \`.cclaw/rules/rules.json\` matches generated policy schema
142
- - policy needles and required sections remain present in generated contracts
143
-
144
- ## Typical fixes
145
-
146
- 1. Repair invalid config values and run \`cclaw sync\`.
147
- 2. Re-generate policy files via \`cclaw sync\` if drift is detected.
148
- 3. Keep generated contracts aligned with stage schemas and policy needles.
149
- `
150
- };
@@ -1,15 +0,0 @@
1
- /**
2
- * Static scaffold for `.cclaw/evals/`. Written on `cclaw init` and refreshed
3
- * on `cclaw sync` only if the files are missing (user content wins). The
4
- * scaffold is intentionally minimal: a usable default config plus short
5
- * READMEs that point at `docs/evals.md` for authoring guidance.
6
- */
7
- export declare const EVAL_CONFIG_YAML = "# cclaw eval config\n# See docs/evals.md for the full schema and rollout plan.\n#\n# All values can be overridden at runtime with CCLAW_EVAL_* environment\n# variables (env wins). Secrets like CCLAW_EVAL_API_KEY never live here.\nprovider: zai\nbaseUrl: https://api.z.ai/api/coding/paas/v4\nmodel: glm-5.1\n\n# Default evaluation mode when --mode is not supplied.\n# fixture = verify existing artifacts (cheap, LLM-free unless --judge is set)\n# agent = LLM drafts one stage's artifact in a sandbox with tools\n# workflow = LLM runs the full multi-stage flow (brainstorm \u2192 plan)\n# (Legacy alias --tier=A|B|C still works; A\u2192fixture, B\u2192agent, C\u2192workflow.)\ndefaultMode: fixture\n\n# Per-call timeout and retry budget.\ntimeoutMs: 120000\nmaxRetries: 2\n\n# Optional hard-stop on estimated USD spend per day. Leave unset for no cap.\n# dailyUsdCap: 5\n\n# Regression thresholds used by CI.\nregression:\n # Fail when overall score drops by more than this fraction (e.g. -0.15 = 15%).\n failIfDeltaBelow: -0.15\n # Fail when any single critical rubric drops below this absolute score.\n failIfCriticalBelow: 3.0\n";
8
- export declare const EVAL_CORPUS_README = "# Eval Corpus\n\nSeed cases live in `./<stage>/<id>.yaml`, one file per case.\nSee `docs/evals.md` for the schema.\n\nMinimal shape:\n\n```yaml\nid: brainstorm-01\nstage: brainstorm\ninput_prompt: |\n One short paragraph describing the user's task.\ncontext_files: []\nexpected:\n # verifier-specific hints; optional\n```\n\nStart with 3 structural cases per stage (24 total), then expand to 5 per\nstage (40 total) once rule verifiers land. Agent/workflow runs may add\n`context_files` pulled from real projects to exercise the sandbox.\n";
9
- export declare const EVAL_RUBRICS_README = "# Eval Rubrics\n\nLLM-judge rubrics. Each rubric is a short list of checks scored on a\n`1\u20135` scale with a rationale. The runner picks `<stage>.yaml` when\n`cclaw eval --judge` is invoked; every stage ships a starter rubric\nbelow \u2014 edit the checks to match what your team cares about, and add\n`critical: true` to the checks that should hard-fail nightly CI on\nregression.\n\n```yaml\nstage: brainstorm\nchecks:\n - id: distinctness\n prompt: \"Are the proposed directions genuinely distinct (not rephrasings)?\"\n scale: \"1-5 where 5=fully distinct approaches\"\n weight: 1.0\n critical: false\n```\n\nSee `docs/evals.md` for the full schema.\n";
10
- export declare const EVAL_RUBRIC_FILES: ReadonlyArray<{
11
- stage: string;
12
- contents: string;
13
- }>;
14
- export declare const EVAL_BASELINES_README = "# Eval Baselines\n\nFrozen score snapshots used by regression gates. Baselines are committed to\ngit and updated explicitly via `cclaw eval --update-baseline --confirm`.\n\nEach baseline file is a JSON document keyed by stage and case id. Do not edit\nby hand; CI will flag baseline churn.\n";
15
- export declare const EVAL_REPORTS_README = "# Eval Reports\n\nGenerated reports (JSON + Markdown) land here. This directory is gitignored.\nRun `cclaw eval --dry-run` to preview configuration without producing a\nreport.\n";
@@ -1,370 +0,0 @@
1
- /**
2
- * Static scaffold for `.cclaw/evals/`. Written on `cclaw init` and refreshed
3
- * on `cclaw sync` only if the files are missing (user content wins). The
4
- * scaffold is intentionally minimal: a usable default config plus short
5
- * READMEs that point at `docs/evals.md` for authoring guidance.
6
- */
7
- export const EVAL_CONFIG_YAML = `# cclaw eval config
8
- # See docs/evals.md for the full schema and rollout plan.
9
- #
10
- # All values can be overridden at runtime with CCLAW_EVAL_* environment
11
- # variables (env wins). Secrets like CCLAW_EVAL_API_KEY never live here.
12
- provider: zai
13
- baseUrl: https://api.z.ai/api/coding/paas/v4
14
- model: glm-5.1
15
-
16
- # Default evaluation mode when --mode is not supplied.
17
- # fixture = verify existing artifacts (cheap, LLM-free unless --judge is set)
18
- # agent = LLM drafts one stage's artifact in a sandbox with tools
19
- # workflow = LLM runs the full multi-stage flow (brainstorm → plan)
20
- # (Legacy alias --tier=A|B|C still works; A→fixture, B→agent, C→workflow.)
21
- defaultMode: fixture
22
-
23
- # Per-call timeout and retry budget.
24
- timeoutMs: 120000
25
- maxRetries: 2
26
-
27
- # Optional hard-stop on estimated USD spend per day. Leave unset for no cap.
28
- # dailyUsdCap: 5
29
-
30
- # Regression thresholds used by CI.
31
- regression:
32
- # Fail when overall score drops by more than this fraction (e.g. -0.15 = 15%).
33
- failIfDeltaBelow: -0.15
34
- # Fail when any single critical rubric drops below this absolute score.
35
- failIfCriticalBelow: 3.0
36
- `;
37
- export const EVAL_CORPUS_README = `# Eval Corpus
38
-
39
- Seed cases live in \`./<stage>/<id>.yaml\`, one file per case.
40
- See \`docs/evals.md\` for the schema.
41
-
42
- Minimal shape:
43
-
44
- \`\`\`yaml
45
- id: brainstorm-01
46
- stage: brainstorm
47
- input_prompt: |
48
- One short paragraph describing the user's task.
49
- context_files: []
50
- expected:
51
- # verifier-specific hints; optional
52
- \`\`\`
53
-
54
- Start with 3 structural cases per stage (24 total), then expand to 5 per
55
- stage (40 total) once rule verifiers land. Agent/workflow runs may add
56
- \`context_files\` pulled from real projects to exercise the sandbox.
57
- `;
58
- export const EVAL_RUBRICS_README = `# Eval Rubrics
59
-
60
- LLM-judge rubrics. Each rubric is a short list of checks scored on a
61
- \`1–5\` scale with a rationale. The runner picks \`<stage>.yaml\` when
62
- \`cclaw eval --judge\` is invoked; every stage ships a starter rubric
63
- below — edit the checks to match what your team cares about, and add
64
- \`critical: true\` to the checks that should hard-fail nightly CI on
65
- regression.
66
-
67
- \`\`\`yaml
68
- stage: brainstorm
69
- checks:
70
- - id: distinctness
71
- prompt: "Are the proposed directions genuinely distinct (not rephrasings)?"
72
- scale: "1-5 where 5=fully distinct approaches"
73
- weight: 1.0
74
- critical: false
75
- \`\`\`
76
-
77
- See \`docs/evals.md\` for the full schema.
78
- `;
79
- const STARTER_RUBRICS = [
80
- {
81
- stage: "brainstorm",
82
- checks: [
83
- {
84
- id: "distinctness",
85
- prompt: "Are the proposed directions genuinely distinct (different approaches, not rephrasings of one idea)?",
86
- scale: "1-5 where 5 = every direction uses a materially different approach",
87
- weight: 1.0,
88
- critical: true
89
- },
90
- {
91
- id: "coverage",
92
- prompt: "Do the directions cover the problem space (at least one tackling cost, one velocity, one risk)?",
93
- scale: "1-5 where 5 = each major trade-off dimension has a direction",
94
- weight: 1.0
95
- },
96
- {
97
- id: "actionability",
98
- prompt: "Could a reader pick one direction and start a scope doc tomorrow without asking clarifying questions?",
99
- scale: "1-5 where 5 = every direction is concrete enough to scope immediately",
100
- weight: 1.0
101
- },
102
- {
103
- id: "recommendation-clarity",
104
- prompt: "Is the Recommendation section explicit, single-voiced, and consistent with the highest-ranked direction?",
105
- scale: "1-5 where 5 = recommendation names the chosen direction and the decisive trade-off",
106
- weight: 1.0,
107
- critical: true
108
- }
109
- ]
110
- },
111
- {
112
- stage: "scope",
113
- checks: [
114
- {
115
- id: "problem-statement",
116
- prompt: "Is the problem statement anchored on user/system behavior (not on a proposed solution)?",
117
- scale: "1-5 where 5 = problem is described independently of any implementation choice",
118
- weight: 1.0,
119
- critical: true
120
- },
121
- {
122
- id: "non-goals",
123
- prompt: "Are non-goals explicit and mutually-exclusive with the goals (no overlap, no vague 'we might' entries)?",
124
- scale: "1-5 where 5 = every non-goal is a crisp decision a future reader can defend",
125
- weight: 1.0
126
- },
127
- {
128
- id: "decision-ids",
129
- prompt: "Does the Decisions section use stable D-NN ids and name who (or what) owns each decision?",
130
- scale: "1-5 where 5 = every decision has a D-NN id and an explicit owner",
131
- weight: 1.0,
132
- critical: true
133
- },
134
- {
135
- id: "risks",
136
- prompt: "Are risks concrete (named system, threshold, or scenario) rather than generic hedges?",
137
- scale: "1-5 where 5 = each risk is testable by observing a specific signal",
138
- weight: 0.8
139
- }
140
- ]
141
- },
142
- {
143
- stage: "design",
144
- checks: [
145
- {
146
- id: "decision-trace",
147
- prompt: "Does the design doc restate every scope D-NN that drives the architecture, and call out the ones it rejects?",
148
- scale: "1-5 where 5 = full D-NN trace with explicit kept/rejected markers",
149
- weight: 1.0,
150
- critical: true
151
- },
152
- {
153
- id: "diagram-or-flow",
154
- prompt: "Is there at least one diagram or clearly labeled flow section that shows data and control moving across the system?",
155
- scale: "1-5 where 5 = diagram covers read path, write path, and failure path",
156
- weight: 1.0
157
- },
158
- {
159
- id: "alternatives-considered",
160
- prompt: "Are concrete alternatives considered with explicit trade-offs (cost, complexity, latency)?",
161
- scale: "1-5 where 5 = at least two alternatives are rejected with reasons tied to measurable properties",
162
- weight: 0.8
163
- },
164
- {
165
- id: "interface-stability",
166
- prompt: "Are public interfaces (APIs, queues, tables) named, typed, and marked as SEMVER-stable or experimental?",
167
- scale: "1-5 where 5 = every interface has a name, a type/shape, and a stability tag",
168
- weight: 1.0
169
- }
170
- ]
171
- },
172
- {
173
- stage: "spec",
174
- checks: [
175
- {
176
- id: "acceptance-criteria",
177
- prompt: "Does the spec have explicit Acceptance Criteria bullets that are unambiguously verifiable?",
178
- scale: "1-5 where 5 = each AC states an observable condition with clear pass/fail",
179
- weight: 1.0,
180
- critical: true
181
- },
182
- {
183
- id: "edge-cases",
184
- prompt: "Are failure modes and edge cases enumerated (empty input, concurrent writers, partial outage)?",
185
- scale: "1-5 where 5 = at least three distinct edge cases with expected behavior",
186
- weight: 1.0
187
- },
188
- {
189
- id: "test-plan-hooks",
190
- prompt: "Does the spec name the test surfaces (unit, integration, e2e, synthetic probe) that will validate each AC?",
191
- scale: "1-5 where 5 = every AC maps to at least one test surface",
192
- weight: 1.0
193
- },
194
- {
195
- id: "traceability",
196
- prompt: "Does the spec cite the originating scope decisions (D-NN) and design sections so future engineers can trace back?",
197
- scale: "1-5 where 5 = every material choice links to a D-NN or design heading",
198
- weight: 0.8,
199
- critical: true
200
- }
201
- ]
202
- },
203
- {
204
- stage: "plan",
205
- checks: [
206
- {
207
- id: "task-granularity",
208
- prompt: "Are tasks sized so one engineer can land each in a single PR (<1 day of work)?",
209
- scale: "1-5 where 5 = every T-NN fits in a single reviewable PR",
210
- weight: 1.0,
211
- critical: true
212
- },
213
- {
214
- id: "tdd-loop",
215
- prompt: "Does each task have explicit RED/GREEN/REFACTOR expectations or an equivalent TDD-compatible exit condition?",
216
- scale: "1-5 where 5 = every task says what test fails first and what code makes it pass",
217
- weight: 1.0,
218
- critical: true
219
- },
220
- {
221
- id: "dependency-graph",
222
- prompt: "Is the dependency order between tasks explicit (and minimal), so parallelizable work is called out?",
223
- scale: "1-5 where 5 = every task lists its blockers and independent tasks are marked parallelizable",
224
- weight: 0.8
225
- },
226
- {
227
- id: "scope-traceability",
228
- prompt: "Does the plan reference the scope D-NN ids that drive each task, and does coverage leave no decision orphaned?",
229
- scale: "1-5 where 5 = every D-NN appears in at least one task and every task names its D-NN",
230
- weight: 1.0
231
- }
232
- ]
233
- },
234
- {
235
- stage: "tdd",
236
- checks: [
237
- {
238
- id: "red-first",
239
- prompt: "Does the artifact show a failing test (RED) before the implementation change (GREEN)?",
240
- scale: "1-5 where 5 = RED command output is quoted and the fix lands after",
241
- weight: 1.0,
242
- critical: true
243
- },
244
- {
245
- id: "refactor-evidence",
246
- prompt: "Is there a REFACTOR step with a diff or named improvement (not just passing tests)?",
247
- scale: "1-5 where 5 = REFACTOR names a specific code-quality win and cites the affected file(s)",
248
- weight: 0.8
249
- },
250
- {
251
- id: "gate-evidence",
252
- prompt: "Does the artifact quote the output of the required gates (lint, typecheck, tests) after the change?",
253
- scale: "1-5 where 5 = every gate command is reproduced with its exit status",
254
- weight: 1.0,
255
- critical: true
256
- },
257
- {
258
- id: "learnings",
259
- prompt: "Does the artifact capture at least one durable learning (pattern, pitfall, follow-up) for future runs?",
260
- scale: "1-5 where 5 = learning is specific, filed under knowledge.jsonl or an equivalent store",
261
- weight: 0.6
262
- }
263
- ]
264
- },
265
- {
266
- stage: "review",
267
- checks: [
268
- {
269
- id: "two-layer-structure",
270
- prompt: "Does the review show both layers (automated gates + human judgment) with distinct evidence?",
271
- scale: "1-5 where 5 = Layer 1 cites tool outputs, Layer 2 cites reviewer reasoning",
272
- weight: 1.0,
273
- critical: true
274
- },
275
- {
276
- id: "blocker-severity",
277
- prompt: "Are issues classified by severity (blocker / major / minor) with one-line rationales?",
278
- scale: "1-5 where 5 = every finding names severity + consequence if not fixed",
279
- weight: 1.0
280
- },
281
- {
282
- id: "security-posture",
283
- prompt: "Does the review cover security-relevant areas explicitly (secrets, authz, PII, deps)?",
284
- scale: "1-5 where 5 = each security dimension is addressed (with 'n/a' counted as a deliberate pass)",
285
- weight: 0.8,
286
- critical: true
287
- },
288
- {
289
- id: "follow-ups",
290
- prompt: "Are non-blocking follow-ups filed as explicit tickets or knowledge-log entries (not left as prose)?",
291
- scale: "1-5 where 5 = every follow-up has a home and an owner",
292
- weight: 0.8
293
- }
294
- ]
295
- },
296
- {
297
- stage: "ship",
298
- checks: [
299
- {
300
- id: "release-readiness",
301
- prompt: "Does the artifact prove release readiness (gates green, changelog, version bump)?",
302
- scale: "1-5 where 5 = each readiness item is linked to concrete evidence",
303
- weight: 1.0,
304
- critical: true
305
- },
306
- {
307
- id: "rollback",
308
- prompt: "Is there an explicit rollback path (command, feature-flag, migration reversal)?",
309
- scale: "1-5 where 5 = rollback is reproducible from the doc with no context rehydration",
310
- weight: 1.0,
311
- critical: true
312
- },
313
- {
314
- id: "monitoring",
315
- prompt: "Are monitoring and alerting hooks named (dashboards, logs, SLO tripwires)?",
316
- scale: "1-5 where 5 = each hook has a canonical URL or query",
317
- weight: 0.8
318
- },
319
- {
320
- id: "retro-seed",
321
- prompt: "Does the artifact leave a retro seed (what went well, what to change for the next run)?",
322
- scale: "1-5 where 5 = at least one distinct 'keep' and one 'change' statement",
323
- weight: 0.6
324
- }
325
- ]
326
- }
327
- ];
328
- function renderRubric(rubric) {
329
- const lines = [];
330
- lines.push(`# Starter rubric for the \`${rubric.stage}\` stage.`);
331
- lines.push(`# Edit the checks to reflect your team's bar before running`);
332
- lines.push(`# \`cclaw eval --judge\`. Every check id is used verbatim in`);
333
- lines.push(`# report output and baseline files, so keep slugs stable once`);
334
- lines.push(`# they start appearing in CI.`);
335
- lines.push(`stage: ${rubric.stage}`);
336
- lines.push(`checks:`);
337
- for (const check of rubric.checks) {
338
- lines.push(` - id: ${check.id}`);
339
- lines.push(` prompt: >-`);
340
- lines.push(` ${check.prompt}`);
341
- if (check.scale !== undefined) {
342
- lines.push(` scale: ${JSON.stringify(check.scale)}`);
343
- }
344
- if (check.weight !== undefined) {
345
- lines.push(` weight: ${check.weight}`);
346
- }
347
- if (check.critical === true) {
348
- lines.push(` critical: true`);
349
- }
350
- }
351
- return `${lines.join("\n")}\n`;
352
- }
353
- export const EVAL_RUBRIC_FILES = STARTER_RUBRICS.map((rubric) => ({
354
- stage: rubric.stage,
355
- contents: renderRubric(rubric)
356
- }));
357
- export const EVAL_BASELINES_README = `# Eval Baselines
358
-
359
- Frozen score snapshots used by regression gates. Baselines are committed to
360
- git and updated explicitly via \`cclaw eval --update-baseline --confirm\`.
361
-
362
- Each baseline file is a JSON document keyed by stage and case id. Do not edit
363
- by hand; CI will flag baseline churn.
364
- `;
365
- export const EVAL_REPORTS_README = `# Eval Reports
366
-
367
- Generated reports (JSON + Markdown) land here. This directory is gitignored.
368
- Run \`cclaw eval --dry-run\` to preview configuration without producing a
369
- report.
370
- `;
@@ -1,2 +0,0 @@
1
- export declare function featureCommandContract(): string;
2
- export declare function featureCommandSkillMarkdown(): string;
@@ -1,123 +0,0 @@
1
- import { RUNTIME_ROOT } from "../constants.js";
2
- const FEATURE_SKILL_FOLDER = "using-git-worktrees";
3
- const FEATURE_SKILL_NAME = "using-git-worktrees";
4
- function activeFeaturePath() {
5
- return `${RUNTIME_ROOT}/state/active-feature.json`;
6
- }
7
- function worktreeRegistryPath() {
8
- return `${RUNTIME_ROOT}/state/worktrees.json`;
9
- }
10
- function managedWorktreesRoot() {
11
- return `${RUNTIME_ROOT}/worktrees`;
12
- }
13
- function legacyFeaturesRoot() {
14
- return `${RUNTIME_ROOT}/features`;
15
- }
16
- export function featureCommandContract() {
17
- return `# /cc-ops feature
18
-
19
- ## Purpose
20
-
21
- Manage parallel feature execution using git worktrees (git-native isolation).
22
-
23
- Runtime state/artifacts are **never** copied between features anymore. Isolation is branch/worktree-level.
24
-
25
- ## HARD-GATE
26
-
27
- - Do not mutate feature context by copying \`${RUNTIME_ROOT}/artifacts\` or \`${RUNTIME_ROOT}/state\` between feature IDs.
28
- - Use \`git worktree add\` for new feature execution paths.
29
- - Keep \`${activeFeaturePath()}\` + \`${worktreeRegistryPath()}\` as the feature routing source of truth.
30
- - Treat \`${legacyFeaturesRoot()}/\` as read-only migration data.
31
-
32
- ## Subcommands
33
-
34
- ### \`/cc-ops feature status\`
35
- Show:
36
- - active feature id from \`${activeFeaturePath()}\`
37
- - resolved worktree entry from \`${worktreeRegistryPath()}\`
38
- - active workspace path
39
-
40
- ### \`/cc-ops feature list\`
41
- List registered feature worktrees from \`${worktreeRegistryPath()}\` and mark active entry.
42
-
43
- ### \`/cc-ops feature new <feature-id>\`
44
- 1. Validate \`feature-id\` (lowercase slug, letters/numbers/dashes).
45
- 2. Create worktree under \`${managedWorktreesRoot()}/<feature-id>\`.
46
- 3. Create/switch branch using \`git worktree add\` (prefer \`feature/<feature-id>\` naming).
47
- 4. Register entry in \`${worktreeRegistryPath()}\`.
48
-
49
- Optional flags:
50
- - \`--clone-active\`: seed from active branch HEAD (default behavior).
51
- - \`--switch\`: mark new feature as active after registration.
52
-
53
- ### \`/cc-ops feature switch <feature-id>\`
54
- 1. Validate that \`<feature-id>\` exists in \`${worktreeRegistryPath()}\`.
55
- 2. Update \`${activeFeaturePath()}\`.
56
- 3. Print target worktree path and instruct the operator/agent to continue from that workspace root.
57
-
58
- ## Migration note
59
-
60
- Legacy snapshot folders under \`${legacyFeaturesRoot()}/\` are supported as read-only references during migration and should not be used for new execution.
61
-
62
- ## Output
63
-
64
- Always print:
65
- - active feature before
66
- - active feature after
67
- - target workspace path
68
- - workspace source (\`git-worktree\` | \`workspace\` | \`legacy-snapshot\`)
69
-
70
- ## Primary skill
71
-
72
- **${RUNTIME_ROOT}/skills/${FEATURE_SKILL_FOLDER}/SKILL.md**
73
- `;
74
- }
75
- export function featureCommandSkillMarkdown() {
76
- return `---
77
- name: ${FEATURE_SKILL_NAME}
78
- description: "Manage cclaw feature isolation using git worktrees (status/list/new/switch)."
79
- ---
80
-
81
- # /cc-ops feature — Git Worktree Manager
82
-
83
- ## HARD-GATE
84
-
85
- Do not implement feature switching by copying runtime files between feature IDs. Use git worktrees and registry updates only.
86
-
87
- ## Paths
88
-
89
- - Active pointer: \`${activeFeaturePath()}\`
90
- - Worktree registry: \`${worktreeRegistryPath()}\`
91
- - Managed worktree root: \`${managedWorktreesRoot()}\`
92
- - Legacy snapshots (read-only): \`${legacyFeaturesRoot()}\`
93
-
94
- ## Protocol
95
-
96
- ### status
97
- 1. Read \`${activeFeaturePath()}\`.
98
- 2. Resolve active entry in \`${worktreeRegistryPath()}\`.
99
- 3. Print active id + workspace path + source.
100
-
101
- ### list
102
- 1. Enumerate entries in \`${worktreeRegistryPath()}\`.
103
- 2. Mark the active one.
104
- 3. Highlight any \`legacy-snapshot\` entries as migration-only.
105
-
106
- ### new <feature-id> [--clone-active] [--switch]
107
- 1. Validate \`feature-id\` and ensure not already registered.
108
- 2. Run \`git worktree add\` to create \`${managedWorktreesRoot()}/<feature-id>\`.
109
- 3. Register entry in \`${worktreeRegistryPath()}\` with branch + path + source.
110
- 4. If \`--switch\`, update \`${activeFeaturePath()}\`.
111
-
112
- ### switch <feature-id>
113
- 1. Validate target exists in \`${worktreeRegistryPath()}\`.
114
- 2. Update \`${activeFeaturePath()}\`.
115
- 3. Report target path and require continuation from that workspace root.
116
-
117
- ## Safety checks
118
-
119
- - If target feature does not exist: block and suggest \`/cc-ops feature new <id>\`.
120
- - If \`git worktree add\` fails: do not write partial registry updates.
121
- - If active feature maps to \`legacy-snapshot\`, report read-only migration warning.
122
- `;
123
- }