cclaw-cli 0.49.0 → 0.51.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. package/README.md +57 -84
  2. package/dist/artifact-linter.d.ts +4 -0
  3. package/dist/artifact-linter.js +24 -3
  4. package/dist/cli.d.ts +1 -19
  5. package/dist/cli.js +49 -491
  6. package/dist/constants.d.ts +2 -13
  7. package/dist/constants.js +1 -43
  8. package/dist/content/closeout-guidance.d.ts +14 -0
  9. package/dist/content/closeout-guidance.js +42 -0
  10. package/dist/content/core-agents.js +55 -17
  11. package/dist/content/decision-protocol.d.ts +12 -0
  12. package/dist/content/decision-protocol.js +20 -0
  13. package/dist/content/diff-command.d.ts +1 -2
  14. package/dist/content/diff-command.js +8 -94
  15. package/dist/content/examples.d.ts +4 -10
  16. package/dist/content/examples.js +10 -20
  17. package/dist/content/hook-events.js +2 -2
  18. package/dist/content/hook-inline-snippets.d.ts +5 -2
  19. package/dist/content/hook-inline-snippets.js +33 -1
  20. package/dist/content/hook-manifest.d.ts +3 -4
  21. package/dist/content/hook-manifest.js +11 -12
  22. package/dist/content/hooks.js +44 -21
  23. package/dist/content/ideate-command.d.ts +2 -0
  24. package/dist/content/ideate-command.js +34 -25
  25. package/dist/content/iron-laws.d.ts +5 -5
  26. package/dist/content/iron-laws.js +5 -5
  27. package/dist/content/language-policy.d.ts +2 -0
  28. package/dist/content/language-policy.js +13 -0
  29. package/dist/content/learnings.d.ts +3 -4
  30. package/dist/content/learnings.js +26 -50
  31. package/dist/content/meta-skill.js +33 -22
  32. package/dist/content/next-command.js +41 -38
  33. package/dist/content/node-hooks.js +17 -345
  34. package/dist/content/opencode-plugin.js +5 -103
  35. package/dist/content/research-playbooks.js +14 -14
  36. package/dist/content/review-loop.d.ts +2 -0
  37. package/dist/content/review-loop.js +8 -0
  38. package/dist/content/session-hooks.js +15 -47
  39. package/dist/content/skills.d.ts +0 -5
  40. package/dist/content/skills.js +55 -128
  41. package/dist/content/stage-common-guidance.d.ts +0 -1
  42. package/dist/content/stage-common-guidance.js +17 -14
  43. package/dist/content/stage-schema.d.ts +26 -1
  44. package/dist/content/stage-schema.js +121 -40
  45. package/dist/content/stages/_lint-metadata/index.js +9 -15
  46. package/dist/content/stages/brainstorm.js +22 -43
  47. package/dist/content/stages/design.js +37 -57
  48. package/dist/content/stages/plan.js +22 -13
  49. package/dist/content/stages/review.js +24 -27
  50. package/dist/content/stages/scope.js +34 -46
  51. package/dist/content/stages/ship.js +7 -4
  52. package/dist/content/stages/spec.js +20 -9
  53. package/dist/content/stages/tdd.js +64 -44
  54. package/dist/content/start-command.js +13 -12
  55. package/dist/content/status-command.d.ts +2 -7
  56. package/dist/content/status-command.js +19 -146
  57. package/dist/content/subagents.d.ts +0 -5
  58. package/dist/content/subagents.js +51 -28
  59. package/dist/content/templates.d.ts +1 -1
  60. package/dist/content/templates.js +126 -135
  61. package/dist/content/track-render-context.d.ts +17 -0
  62. package/dist/content/track-render-context.js +44 -0
  63. package/dist/content/tree-command.d.ts +1 -2
  64. package/dist/content/tree-command.js +4 -87
  65. package/dist/content/utility-skills.d.ts +2 -29
  66. package/dist/content/utility-skills.js +2 -1534
  67. package/dist/content/view-command.js +31 -11
  68. package/dist/delegation.d.ts +1 -1
  69. package/dist/delegation.js +5 -15
  70. package/dist/doctor-registry.js +20 -21
  71. package/dist/doctor.js +88 -344
  72. package/dist/flow-state.d.ts +3 -0
  73. package/dist/flow-state.js +2 -0
  74. package/dist/harness-adapters.d.ts +1 -1
  75. package/dist/harness-adapters.js +51 -58
  76. package/dist/install.js +128 -358
  77. package/dist/internal/advance-stage.js +3 -9
  78. package/dist/internal/compound-readiness.d.ts +1 -1
  79. package/dist/internal/compound-readiness.js +1 -1
  80. package/dist/internal/tdd-loop-status.d.ts +1 -1
  81. package/dist/internal/tdd-loop-status.js +1 -1
  82. package/dist/knowledge-store.d.ts +16 -10
  83. package/dist/knowledge-store.js +51 -15
  84. package/dist/policy.js +16 -105
  85. package/dist/run-archive.d.ts +4 -6
  86. package/dist/run-archive.js +15 -20
  87. package/dist/run-persistence.d.ts +2 -2
  88. package/dist/run-persistence.js +3 -9
  89. package/package.json +1 -2
  90. package/dist/content/archive-command.d.ts +0 -2
  91. package/dist/content/archive-command.js +0 -124
  92. package/dist/content/compound-command.d.ts +0 -5
  93. package/dist/content/compound-command.js +0 -193
  94. package/dist/content/contexts.d.ts +0 -18
  95. package/dist/content/contexts.js +0 -24
  96. package/dist/content/contracts.d.ts +0 -2
  97. package/dist/content/contracts.js +0 -51
  98. package/dist/content/doctor-references.d.ts +0 -2
  99. package/dist/content/doctor-references.js +0 -150
  100. package/dist/content/eval-scaffold.d.ts +0 -15
  101. package/dist/content/eval-scaffold.js +0 -370
  102. package/dist/content/feature-command.d.ts +0 -2
  103. package/dist/content/feature-command.js +0 -123
  104. package/dist/content/flow-map.d.ts +0 -23
  105. package/dist/content/flow-map.js +0 -134
  106. package/dist/content/harness-doc.d.ts +0 -2
  107. package/dist/content/harness-doc.js +0 -202
  108. package/dist/content/harness-playbooks.d.ts +0 -24
  109. package/dist/content/harness-playbooks.js +0 -393
  110. package/dist/content/harness-tool-refs.d.ts +0 -20
  111. package/dist/content/harness-tool-refs.js +0 -268
  112. package/dist/content/ops-command.d.ts +0 -2
  113. package/dist/content/ops-command.js +0 -71
  114. package/dist/content/protocols.d.ts +0 -7
  115. package/dist/content/protocols.js +0 -215
  116. package/dist/content/retro-command.d.ts +0 -2
  117. package/dist/content/retro-command.js +0 -165
  118. package/dist/content/rewind-command.d.ts +0 -2
  119. package/dist/content/rewind-command.js +0 -106
  120. package/dist/content/tdd-log-command.d.ts +0 -2
  121. package/dist/content/tdd-log-command.js +0 -85
  122. package/dist/eval/agents/single-shot.d.ts +0 -27
  123. package/dist/eval/agents/single-shot.js +0 -79
  124. package/dist/eval/agents/with-tools.d.ts +0 -44
  125. package/dist/eval/agents/with-tools.js +0 -261
  126. package/dist/eval/agents/workflow.d.ts +0 -31
  127. package/dist/eval/agents/workflow.js +0 -155
  128. package/dist/eval/baseline.d.ts +0 -38
  129. package/dist/eval/baseline.js +0 -282
  130. package/dist/eval/config-loader.d.ts +0 -14
  131. package/dist/eval/config-loader.js +0 -395
  132. package/dist/eval/corpus.d.ts +0 -30
  133. package/dist/eval/corpus.js +0 -330
  134. package/dist/eval/cost-guard.d.ts +0 -102
  135. package/dist/eval/cost-guard.js +0 -190
  136. package/dist/eval/diff.d.ts +0 -64
  137. package/dist/eval/diff.js +0 -323
  138. package/dist/eval/llm-client.d.ts +0 -176
  139. package/dist/eval/llm-client.js +0 -267
  140. package/dist/eval/mode.d.ts +0 -28
  141. package/dist/eval/mode.js +0 -61
  142. package/dist/eval/progress.d.ts +0 -83
  143. package/dist/eval/progress.js +0 -59
  144. package/dist/eval/report.d.ts +0 -11
  145. package/dist/eval/report.js +0 -181
  146. package/dist/eval/rubric-loader.d.ts +0 -20
  147. package/dist/eval/rubric-loader.js +0 -143
  148. package/dist/eval/runner.d.ts +0 -81
  149. package/dist/eval/runner.js +0 -746
  150. package/dist/eval/runs.d.ts +0 -41
  151. package/dist/eval/runs.js +0 -114
  152. package/dist/eval/sandbox.d.ts +0 -38
  153. package/dist/eval/sandbox.js +0 -137
  154. package/dist/eval/tools/glob.d.ts +0 -2
  155. package/dist/eval/tools/glob.js +0 -163
  156. package/dist/eval/tools/grep.d.ts +0 -2
  157. package/dist/eval/tools/grep.js +0 -152
  158. package/dist/eval/tools/index.d.ts +0 -7
  159. package/dist/eval/tools/index.js +0 -35
  160. package/dist/eval/tools/read.d.ts +0 -2
  161. package/dist/eval/tools/read.js +0 -122
  162. package/dist/eval/tools/types.d.ts +0 -49
  163. package/dist/eval/tools/types.js +0 -41
  164. package/dist/eval/tools/write.d.ts +0 -2
  165. package/dist/eval/tools/write.js +0 -92
  166. package/dist/eval/types.d.ts +0 -561
  167. package/dist/eval/types.js +0 -47
  168. package/dist/eval/verifiers/judge.d.ts +0 -40
  169. package/dist/eval/verifiers/judge.js +0 -256
  170. package/dist/eval/verifiers/rules.d.ts +0 -24
  171. package/dist/eval/verifiers/rules.js +0 -218
  172. package/dist/eval/verifiers/structural.d.ts +0 -14
  173. package/dist/eval/verifiers/structural.js +0 -171
  174. package/dist/eval/verifiers/traceability.d.ts +0 -23
  175. package/dist/eval/verifiers/traceability.js +0 -84
  176. package/dist/eval/verifiers/workflow-consistency.d.ts +0 -21
  177. package/dist/eval/verifiers/workflow-consistency.js +0 -225
  178. package/dist/eval/workflow-corpus.d.ts +0 -7
  179. package/dist/eval/workflow-corpus.js +0 -207
  180. package/dist/feature-system.d.ts +0 -42
  181. package/dist/feature-system.js +0 -432
  182. package/dist/internal/knowledge-digest.d.ts +0 -7
  183. package/dist/internal/knowledge-digest.js +0 -93
@@ -1,30 +0,0 @@
1
- import type { FlowStage } from "../types.js";
2
- import type { EvalCase } from "./types.js";
3
- /**
4
- * Load all eval cases under `.cclaw/evals/corpus/**`. Optionally restrict to a
5
- * single stage. Returns an empty array for a fresh install.
6
- */
7
- export declare function loadCorpus(projectRoot: string, stage?: FlowStage): Promise<EvalCase[]>;
8
- /**
9
- * Resolve a case's `fixture` path to an absolute filesystem path. The fixture
10
- * field is interpreted relative to the case's stage directory (i.e., a
11
- * sibling subdirectory or file inside `.cclaw/evals/corpus/<stage>/`).
12
- */
13
- export declare function fixturePathFor(projectRoot: string, caseEntry: EvalCase): string | undefined;
14
- /**
15
- * Read the fixture artifact text for a case. Returns `undefined` if the case
16
- * has no fixture reference. Throws a descriptive error if the path exists in
17
- * the case but not on disk — structural fixtures ship alongside cases.
18
- */
19
- export declare function readFixtureArtifact(projectRoot: string, caseEntry: EvalCase): Promise<string | undefined>;
20
- /**
21
- * Resolve an entry from `extraFixtures` to an absolute filesystem path,
22
- * relative to the case's stage directory (same convention as `fixture`).
23
- */
24
- export declare function extraFixturePath(projectRoot: string, caseEntry: EvalCase, label: string): string | undefined;
25
- /**
26
- * Read every declared extra fixture for a case into a `{ label → text }`
27
- * map. Missing files throw so authoring mistakes surface immediately rather
28
- * than being silently skipped by cross-artifact verifiers.
29
- */
30
- export declare function readExtraFixtures(projectRoot: string, caseEntry: EvalCase): Promise<Record<string, string>>;
@@ -1,330 +0,0 @@
1
- import fs from "node:fs/promises";
2
- import path from "node:path";
3
- import { parse } from "yaml";
4
- import { EVALS_ROOT } from "../constants.js";
5
- import { exists } from "../fs-utils.js";
6
- import { FLOW_STAGES } from "../types.js";
7
- const FLOW_STAGE_SET = new Set(FLOW_STAGES);
8
- function corpusError(filePath, reason) {
9
- return new Error(`Invalid eval case at ${filePath}: ${reason}\n` +
10
- `Supported stages: ${FLOW_STAGES.join(", ")}`);
11
- }
12
- function isRecord(value) {
13
- return typeof value === "object" && value !== null && !Array.isArray(value);
14
- }
15
- function readStringArray(filePath, context, value) {
16
- if (value === undefined)
17
- return undefined;
18
- if (!Array.isArray(value) || value.some((item) => typeof item !== "string")) {
19
- throw corpusError(filePath, `"${context}" must be an array of strings`);
20
- }
21
- return value;
22
- }
23
- function readNonNegativeInteger(filePath, context, value) {
24
- if (value === undefined)
25
- return undefined;
26
- if (typeof value !== "number" || !Number.isFinite(value) || value < 0 || !Number.isInteger(value)) {
27
- throw corpusError(filePath, `"${context}" must be a non-negative integer`);
28
- }
29
- return value;
30
- }
31
- function parseStructural(filePath, raw) {
32
- if (raw === undefined)
33
- return undefined;
34
- if (!isRecord(raw)) {
35
- throw corpusError(filePath, `"expected.structural" must be a mapping`);
36
- }
37
- const requiredSections = readStringArray(filePath, "expected.structural.required_sections", raw.required_sections ?? raw.requiredSections);
38
- const forbiddenPatterns = readStringArray(filePath, "expected.structural.forbidden_patterns", raw.forbidden_patterns ?? raw.forbiddenPatterns);
39
- const requiredFrontmatterKeys = readStringArray(filePath, "expected.structural.required_frontmatter_keys", raw.required_frontmatter_keys ?? raw.requiredFrontmatterKeys);
40
- const minLines = readNonNegativeInteger(filePath, "expected.structural.min_lines", raw.min_lines ?? raw.minLines);
41
- const maxLines = readNonNegativeInteger(filePath, "expected.structural.max_lines", raw.max_lines ?? raw.maxLines);
42
- const minChars = readNonNegativeInteger(filePath, "expected.structural.min_chars", raw.min_chars ?? raw.minChars);
43
- const maxChars = readNonNegativeInteger(filePath, "expected.structural.max_chars", raw.max_chars ?? raw.maxChars);
44
- const structural = {};
45
- if (requiredSections)
46
- structural.requiredSections = requiredSections;
47
- if (forbiddenPatterns)
48
- structural.forbiddenPatterns = forbiddenPatterns;
49
- if (requiredFrontmatterKeys)
50
- structural.requiredFrontmatterKeys = requiredFrontmatterKeys;
51
- if (minLines !== undefined)
52
- structural.minLines = minLines;
53
- if (maxLines !== undefined)
54
- structural.maxLines = maxLines;
55
- if (minChars !== undefined)
56
- structural.minChars = minChars;
57
- if (maxChars !== undefined)
58
- structural.maxChars = maxChars;
59
- return structural;
60
- }
61
- function parseRegexRule(filePath, context, value) {
62
- if (typeof value === "string") {
63
- return { pattern: value };
64
- }
65
- if (!isRecord(value)) {
66
- throw corpusError(filePath, `"${context}" entries must be either a string or a mapping with "pattern"`);
67
- }
68
- const pattern = value.pattern;
69
- if (typeof pattern !== "string" || pattern.length === 0) {
70
- throw corpusError(filePath, `"${context}" mapping entry must include a non-empty "pattern" string`);
71
- }
72
- const flags = value.flags;
73
- if (flags !== undefined && typeof flags !== "string") {
74
- throw corpusError(filePath, `"${context}" flags must be a string`);
75
- }
76
- const description = value.description;
77
- if (description !== undefined && typeof description !== "string") {
78
- throw corpusError(filePath, `"${context}" description must be a string`);
79
- }
80
- const rule = { pattern };
81
- if (flags !== undefined)
82
- rule.flags = flags;
83
- if (description !== undefined)
84
- rule.description = description;
85
- return rule;
86
- }
87
- function parseRegexRules(filePath, context, value) {
88
- if (value === undefined)
89
- return undefined;
90
- if (!Array.isArray(value)) {
91
- throw corpusError(filePath, `"${context}" must be an array`);
92
- }
93
- return value.map((entry, index) => parseRegexRule(filePath, `${context}[${index}]`, entry));
94
- }
95
- function parseOccurrenceBounds(filePath, context, value) {
96
- if (value === undefined)
97
- return undefined;
98
- if (!isRecord(value)) {
99
- throw corpusError(filePath, `"${context}" must be a mapping of phrase → integer`);
100
- }
101
- const out = {};
102
- for (const [phrase, count] of Object.entries(value)) {
103
- if (typeof count !== "number" || !Number.isFinite(count) || !Number.isInteger(count) || count < 0) {
104
- throw corpusError(filePath, `"${context}.${phrase}" must be a non-negative integer`);
105
- }
106
- out[phrase] = count;
107
- }
108
- return out;
109
- }
110
- function parseRules(filePath, raw) {
111
- if (raw === undefined)
112
- return undefined;
113
- if (!isRecord(raw)) {
114
- throw corpusError(filePath, `"expected.rules" must be a mapping`);
115
- }
116
- const mustContain = readStringArray(filePath, "expected.rules.must_contain", raw.must_contain ?? raw.mustContain);
117
- const mustNotContain = readStringArray(filePath, "expected.rules.must_not_contain", raw.must_not_contain ?? raw.mustNotContain);
118
- const regexRequired = parseRegexRules(filePath, "expected.rules.regex_required", raw.regex_required ?? raw.regexRequired);
119
- const regexForbidden = parseRegexRules(filePath, "expected.rules.regex_forbidden", raw.regex_forbidden ?? raw.regexForbidden);
120
- const minOccurrences = parseOccurrenceBounds(filePath, "expected.rules.min_occurrences", raw.min_occurrences ?? raw.minOccurrences);
121
- const maxOccurrences = parseOccurrenceBounds(filePath, "expected.rules.max_occurrences", raw.max_occurrences ?? raw.maxOccurrences);
122
- const uniqueBulletsInSection = readStringArray(filePath, "expected.rules.unique_bullets_in_section", raw.unique_bullets_in_section ?? raw.uniqueBulletsInSection);
123
- const rules = {};
124
- if (mustContain)
125
- rules.mustContain = mustContain;
126
- if (mustNotContain)
127
- rules.mustNotContain = mustNotContain;
128
- if (regexRequired)
129
- rules.regexRequired = regexRequired;
130
- if (regexForbidden)
131
- rules.regexForbidden = regexForbidden;
132
- if (minOccurrences)
133
- rules.minOccurrences = minOccurrences;
134
- if (maxOccurrences)
135
- rules.maxOccurrences = maxOccurrences;
136
- if (uniqueBulletsInSection)
137
- rules.uniqueBulletsInSection = uniqueBulletsInSection;
138
- return Object.keys(rules).length === 0 ? undefined : rules;
139
- }
140
- function parseTraceability(filePath, raw) {
141
- if (raw === undefined)
142
- return undefined;
143
- if (!isRecord(raw)) {
144
- throw corpusError(filePath, `"expected.traceability" must be a mapping`);
145
- }
146
- const idPattern = raw.id_pattern ?? raw.idPattern;
147
- if (typeof idPattern !== "string" || idPattern.length === 0) {
148
- throw corpusError(filePath, `"expected.traceability.id_pattern" must be a non-empty regex source`);
149
- }
150
- const idFlags = raw.id_flags ?? raw.idFlags;
151
- if (idFlags !== undefined && typeof idFlags !== "string") {
152
- throw corpusError(filePath, `"expected.traceability.id_flags" must be a string`);
153
- }
154
- const source = raw.source;
155
- if (typeof source !== "string" || source.length === 0) {
156
- throw corpusError(filePath, `"expected.traceability.source" must be "self" or an extra_fixtures label`);
157
- }
158
- const requireInRaw = raw.require_in ?? raw.requireIn;
159
- const requireIn = readStringArray(filePath, "expected.traceability.require_in", requireInRaw);
160
- if (!requireIn || requireIn.length === 0) {
161
- throw corpusError(filePath, `"expected.traceability.require_in" must be a non-empty array`);
162
- }
163
- const out = { idPattern, source, requireIn };
164
- if (idFlags !== undefined)
165
- out.idFlags = idFlags;
166
- return out;
167
- }
168
- function parseExtraFixtures(filePath, raw) {
169
- if (raw === undefined)
170
- return undefined;
171
- if (!isRecord(raw)) {
172
- throw corpusError(filePath, `"extra_fixtures" must be a mapping of label → path`);
173
- }
174
- const out = {};
175
- for (const [label, value] of Object.entries(raw)) {
176
- if (typeof value !== "string" || value.length === 0) {
177
- throw corpusError(filePath, `"extra_fixtures.${label}" must be a non-empty path string`);
178
- }
179
- out[label] = value;
180
- }
181
- return Object.keys(out).length === 0 ? undefined : out;
182
- }
183
- function parseExpected(filePath, raw) {
184
- if (raw === undefined)
185
- return undefined;
186
- if (!isRecord(raw)) {
187
- throw corpusError(filePath, `"expected" must be a mapping`);
188
- }
189
- const shape = {};
190
- const structural = parseStructural(filePath, raw.structural);
191
- if (structural)
192
- shape.structural = structural;
193
- const rules = parseRules(filePath, raw.rules);
194
- if (rules)
195
- shape.rules = rules;
196
- const traceability = parseTraceability(filePath, raw.traceability);
197
- if (traceability)
198
- shape.traceability = traceability;
199
- if (raw.judge !== undefined) {
200
- if (!isRecord(raw.judge)) {
201
- throw corpusError(filePath, `"expected.judge" must be a mapping`);
202
- }
203
- shape.judge = raw.judge;
204
- }
205
- return Object.keys(shape).length === 0 ? undefined : shape;
206
- }
207
- function validateCase(filePath, raw) {
208
- if (!isRecord(raw)) {
209
- throw corpusError(filePath, "top-level value must be a mapping");
210
- }
211
- const id = raw.id;
212
- if (typeof id !== "string" || id.trim().length === 0) {
213
- throw corpusError(filePath, `"id" must be a non-empty string`);
214
- }
215
- const stageRaw = raw.stage;
216
- if (typeof stageRaw !== "string" || !FLOW_STAGE_SET.has(stageRaw)) {
217
- throw corpusError(filePath, `"stage" must be one of: ${FLOW_STAGES.join(", ")}`);
218
- }
219
- const inputPrompt = raw.input_prompt ?? raw.inputPrompt;
220
- if (typeof inputPrompt !== "string" || inputPrompt.trim().length === 0) {
221
- throw corpusError(filePath, `"input_prompt" must be a non-empty string`);
222
- }
223
- const contextFiles = readStringArray(filePath, "context_files", raw.context_files ?? raw.contextFiles);
224
- const expected = parseExpected(filePath, raw.expected);
225
- const fixture = typeof raw.fixture === "string" ? raw.fixture : undefined;
226
- const extraFixtures = parseExtraFixtures(filePath, raw.extra_fixtures ?? raw.extraFixtures);
227
- return {
228
- id: id.trim(),
229
- stage: stageRaw,
230
- inputPrompt: inputPrompt.trim(),
231
- contextFiles,
232
- expected,
233
- fixture,
234
- extraFixtures
235
- };
236
- }
237
- /**
238
- * Load all eval cases under `.cclaw/evals/corpus/**`. Optionally restrict to a
239
- * single stage. Returns an empty array for a fresh install.
240
- */
241
- export async function loadCorpus(projectRoot, stage) {
242
- const corpusRoot = path.join(projectRoot, EVALS_ROOT, "corpus");
243
- if (!(await exists(corpusRoot))) {
244
- return [];
245
- }
246
- const cases = [];
247
- const stageDirs = stage
248
- ? [path.join(corpusRoot, stage)]
249
- : (await fs.readdir(corpusRoot, { withFileTypes: true }))
250
- .filter((entry) => entry.isDirectory())
251
- .filter((entry) => FLOW_STAGE_SET.has(entry.name))
252
- .map((entry) => path.join(corpusRoot, entry.name));
253
- for (const stageDir of stageDirs) {
254
- if (!(await exists(stageDir)))
255
- continue;
256
- const entries = await fs.readdir(stageDir, { withFileTypes: true });
257
- for (const entry of entries) {
258
- if (!entry.isFile())
259
- continue;
260
- if (!entry.name.endsWith(".yaml") && !entry.name.endsWith(".yml"))
261
- continue;
262
- const filePath = path.join(stageDir, entry.name);
263
- let parsed;
264
- try {
265
- parsed = parse(await fs.readFile(filePath, "utf8"));
266
- }
267
- catch (err) {
268
- throw corpusError(filePath, err instanceof Error ? err.message : String(err));
269
- }
270
- cases.push(validateCase(filePath, parsed));
271
- }
272
- }
273
- cases.sort((a, b) => a.stage.localeCompare(b.stage) || a.id.localeCompare(b.id));
274
- return cases;
275
- }
276
- /**
277
- * Resolve a case's `fixture` path to an absolute filesystem path. The fixture
278
- * field is interpreted relative to the case's stage directory (i.e., a
279
- * sibling subdirectory or file inside `.cclaw/evals/corpus/<stage>/`).
280
- */
281
- export function fixturePathFor(projectRoot, caseEntry) {
282
- if (!caseEntry.fixture)
283
- return undefined;
284
- return path.resolve(projectRoot, EVALS_ROOT, "corpus", caseEntry.stage, caseEntry.fixture);
285
- }
286
- /**
287
- * Read the fixture artifact text for a case. Returns `undefined` if the case
288
- * has no fixture reference. Throws a descriptive error if the path exists in
289
- * the case but not on disk — structural fixtures ship alongside cases.
290
- */
291
- export async function readFixtureArtifact(projectRoot, caseEntry) {
292
- const fixturePath = fixturePathFor(projectRoot, caseEntry);
293
- if (!fixturePath)
294
- return undefined;
295
- if (!(await exists(fixturePath))) {
296
- throw new Error(`Fixture missing for case ${caseEntry.stage}/${caseEntry.id}: ${fixturePath}`);
297
- }
298
- return fs.readFile(fixturePath, "utf8");
299
- }
300
- /**
301
- * Resolve an entry from `extraFixtures` to an absolute filesystem path,
302
- * relative to the case's stage directory (same convention as `fixture`).
303
- */
304
- export function extraFixturePath(projectRoot, caseEntry, label) {
305
- const value = caseEntry.extraFixtures?.[label];
306
- if (!value)
307
- return undefined;
308
- return path.resolve(projectRoot, EVALS_ROOT, "corpus", caseEntry.stage, value);
309
- }
310
- /**
311
- * Read every declared extra fixture for a case into a `{ label → text }`
312
- * map. Missing files throw so authoring mistakes surface immediately rather
313
- * than being silently skipped by cross-artifact verifiers.
314
- */
315
- export async function readExtraFixtures(projectRoot, caseEntry) {
316
- const out = {};
317
- if (!caseEntry.extraFixtures)
318
- return out;
319
- for (const label of Object.keys(caseEntry.extraFixtures)) {
320
- const filePath = extraFixturePath(projectRoot, caseEntry, label);
321
- if (!filePath)
322
- continue;
323
- if (!(await exists(filePath))) {
324
- throw new Error(`Extra fixture missing for ${caseEntry.stage}/${caseEntry.id} ` +
325
- `(label="${label}"): ${filePath}`);
326
- }
327
- out[label] = await fs.readFile(filePath, "utf8");
328
- }
329
- return out;
330
- }
@@ -1,102 +0,0 @@
1
- import type { ChatUsage } from "./llm-client.js";
2
- import type { ResolvedEvalConfig, TokenPricing } from "./types.js";
3
- /**
4
- * Builtin pricing fallback. Intentionally conservative: when the user
5
- * hasn't configured pricing and we don't know the model, we default to a
6
- * "small model" USD schedule so the cap can still do something useful.
7
- *
8
- * Values are USD per 1K tokens. Sources are public pricing pages as of
9
- * 2026-04; update by editing this constant, not the guard logic.
10
- */
11
- export declare const DEFAULT_TOKEN_PRICING: Readonly<Record<string, TokenPricing>>;
12
- /** Hard default when neither config nor builtins know the model. */
13
- export declare const UNKNOWN_MODEL_PRICING: TokenPricing;
14
- export interface SpendLedger {
15
- /** ISO date (`YYYY-MM-DD` in UTC) — also embedded in the file name. */
16
- date: string;
17
- /** USD spent so far today across every call that hit the guard. */
18
- totalUsd: number;
19
- /** Number of `chat()` calls accounted for. */
20
- calls: number;
21
- /** Per-model breakdown for the report. */
22
- byModel: Record<string, {
23
- tokensIn: number;
24
- tokensOut: number;
25
- usd: number;
26
- }>;
27
- }
28
- export declare class DailyCostCapExceededError extends Error {
29
- readonly capUsd: number;
30
- readonly projectedUsd: number;
31
- readonly currentUsd: number;
32
- constructor(opts: {
33
- capUsd: number;
34
- projectedUsd: number;
35
- currentUsd: number;
36
- });
37
- }
38
- /**
39
- * Per-run cost cap — enforced in-memory, no ledger file. Complements the
40
- * daily cap so a single long workflow run can't blow the whole day's
41
- * budget even if the daily cap is generous. Opt-in via
42
- * `--max-cost-usd=<n>` on the CLI or `CCLAW_EVAL_MAX_COST_USD`.
43
- */
44
- export declare class RunCostCapExceededError extends Error {
45
- readonly capUsd: number;
46
- readonly projectedUsd: number;
47
- readonly currentUsd: number;
48
- constructor(opts: {
49
- capUsd: number;
50
- projectedUsd: number;
51
- currentUsd: number;
52
- });
53
- }
54
- declare function utcDate(now?: Date): string;
55
- declare function pricingFor(model: string, config: Pick<ResolvedEvalConfig, "tokenPricing">): TokenPricing;
56
- /**
57
- * Compute USD cost of a single `ChatUsage` using the given `model` pricing
58
- * schedule. Returns 0 when `usage.totalTokens` is 0 (e.g. transport error
59
- * before first token).
60
- */
61
- export declare function computeUsageUsd(model: string, usage: ChatUsage, config: Pick<ResolvedEvalConfig, "tokenPricing">): number;
62
- declare function ledgerPath(projectRoot: string, date: string): string;
63
- declare function readLedger(file: string, date: string): Promise<SpendLedger>;
64
- declare function writeLedger(file: string, ledger: SpendLedger): Promise<void>;
65
- /**
66
- * Guard a single LLM call against the daily USD cap. Returns the updated
67
- * ledger on success; throws `DailyCostCapExceededError` when the projected
68
- * total would cross the cap. When `config.dailyUsdCap` is unset, the guard
69
- * is a no-op — no file writes, no ledger — so non-judge runs never touch
70
- * the filesystem.
71
- */
72
- export interface CostGuard {
73
- /**
74
- * Commit the USD cost of a finished call to the ledger. When `dailyUsdCap`
75
- * is set, refuses the commit if the projected total would exceed the cap.
76
- */
77
- commit(model: string, usage: ChatUsage): Promise<number>;
78
- /** Snapshot the current ledger (or undefined when no cap is set). */
79
- snapshot(): Promise<SpendLedger | undefined>;
80
- }
81
- export interface CreateCostGuardOptions {
82
- /** Clock injection for tests. */
83
- now?: () => Date;
84
- /** Override the default filesystem root for the ledger. */
85
- ledgerPath?: string;
86
- /**
87
- * Per-run (in-memory) USD cap. Independent from the persisted daily
88
- * cap so a single `cclaw eval` invocation can be budgeted without
89
- * touching the shared nightly ledger. Undefined = unlimited.
90
- */
91
- runCapUsd?: number;
92
- }
93
- export declare function createCostGuard(projectRoot: string, config: Pick<ResolvedEvalConfig, "dailyUsdCap" | "tokenPricing">, options?: CreateCostGuardOptions): CostGuard;
94
- /** Exposed for tests. */
95
- export declare const __internal: {
96
- utcDate: typeof utcDate;
97
- pricingFor: typeof pricingFor;
98
- ledgerPath: typeof ledgerPath;
99
- readLedger: typeof readLedger;
100
- writeLedger: typeof writeLedger;
101
- };
102
- export {};
@@ -1,190 +0,0 @@
1
- /**
2
- * Cost guard for the cclaw eval subsystem.
3
- *
4
- * Two responsibilities:
5
- *
6
- * 1. Convert `ChatUsage` (prompt/completion token counts) into USD using
7
- * a per-model `TokenPricing` schedule. Pricing comes from
8
- * `config.tokenPricing[model]` first, then from the builtin fallback
9
- * schedule for well-known models (z.ai GLM 5.1 at publish time).
10
- * 2. Maintain a per-day running total persisted to
11
- * `.cclaw/evals/.spend-YYYY-MM-DD.json` so that a long eval session
12
- * (or a cron-run nightly) can't blow through the configured
13
- * `dailyUsdCap`. The counter is opt-in: no cap, no writes.
14
- *
15
- * The guard is deliberately pessimistic — it rounds USD up to 6 decimals
16
- * and never subtracts, so a CI run that errors mid-flight still shows the
17
- * partial spend in the next report.
18
- */
19
- import fs from "node:fs/promises";
20
- import path from "node:path";
21
- import { EVALS_ROOT } from "../constants.js";
22
- import { exists } from "../fs-utils.js";
23
- /**
24
- * Builtin pricing fallback. Intentionally conservative: when the user
25
- * hasn't configured pricing and we don't know the model, we default to a
26
- * "small model" USD schedule so the cap can still do something useful.
27
- *
28
- * Values are USD per 1K tokens. Sources are public pricing pages as of
29
- * 2026-04; update by editing this constant, not the guard logic.
30
- */
31
- export const DEFAULT_TOKEN_PRICING = {
32
- "glm-5.1": { input: 0.0005, output: 0.0015 },
33
- "glm-4.6": { input: 0.0005, output: 0.0015 },
34
- "gpt-4o-mini": { input: 0.00015, output: 0.0006 },
35
- "gpt-4o": { input: 0.005, output: 0.015 }
36
- };
37
- /** Hard default when neither config nor builtins know the model. */
38
- export const UNKNOWN_MODEL_PRICING = { input: 0.001, output: 0.003 };
39
- export class DailyCostCapExceededError extends Error {
40
- capUsd;
41
- projectedUsd;
42
- currentUsd;
43
- constructor(opts) {
44
- super(`Daily cost cap would be exceeded: ` +
45
- `current=$${opts.currentUsd.toFixed(4)}, ` +
46
- `projected=$${opts.projectedUsd.toFixed(4)}, ` +
47
- `cap=$${opts.capUsd.toFixed(4)}. ` +
48
- `Unset CCLAW_EVAL_DAILY_USD_CAP or increase the cap to continue.`);
49
- this.name = "DailyCostCapExceededError";
50
- this.capUsd = opts.capUsd;
51
- this.projectedUsd = opts.projectedUsd;
52
- this.currentUsd = opts.currentUsd;
53
- }
54
- }
55
- /**
56
- * Per-run cost cap — enforced in-memory, no ledger file. Complements the
57
- * daily cap so a single long workflow run can't blow the whole day's
58
- * budget even if the daily cap is generous. Opt-in via
59
- * `--max-cost-usd=<n>` on the CLI or `CCLAW_EVAL_MAX_COST_USD`.
60
- */
61
- export class RunCostCapExceededError extends Error {
62
- capUsd;
63
- projectedUsd;
64
- currentUsd;
65
- constructor(opts) {
66
- super(`Run cost cap would be exceeded: ` +
67
- `current=$${opts.currentUsd.toFixed(4)}, ` +
68
- `projected=$${opts.projectedUsd.toFixed(4)}, ` +
69
- `cap=$${opts.capUsd.toFixed(4)}. ` +
70
- `Raise --max-cost-usd or drop it to run uncapped.`);
71
- this.name = "RunCostCapExceededError";
72
- this.capUsd = opts.capUsd;
73
- this.projectedUsd = opts.projectedUsd;
74
- this.currentUsd = opts.currentUsd;
75
- }
76
- }
77
- function utcDate(now = new Date()) {
78
- return now.toISOString().slice(0, 10);
79
- }
80
- function pricingFor(model, config) {
81
- const custom = config.tokenPricing?.[model];
82
- if (custom)
83
- return custom;
84
- const builtin = DEFAULT_TOKEN_PRICING[model];
85
- if (builtin)
86
- return builtin;
87
- return UNKNOWN_MODEL_PRICING;
88
- }
89
- /**
90
- * Compute USD cost of a single `ChatUsage` using the given `model` pricing
91
- * schedule. Returns 0 when `usage.totalTokens` is 0 (e.g. transport error
92
- * before first token).
93
- */
94
- export function computeUsageUsd(model, usage, config) {
95
- if (!usage || usage.totalTokens <= 0)
96
- return 0;
97
- const schedule = pricingFor(model, config);
98
- const cost = (usage.promptTokens * schedule.input) / 1_000 +
99
- (usage.completionTokens * schedule.output) / 1_000;
100
- return Math.max(0, Number(cost.toFixed(6)));
101
- }
102
- function emptyLedger(date) {
103
- return { date, totalUsd: 0, calls: 0, byModel: {} };
104
- }
105
- function ledgerPath(projectRoot, date) {
106
- return path.join(projectRoot, EVALS_ROOT, `.spend-${date}.json`);
107
- }
108
- async function readLedger(file, date) {
109
- if (!(await exists(file)))
110
- return emptyLedger(date);
111
- try {
112
- const raw = JSON.parse(await fs.readFile(file, "utf8"));
113
- if (raw?.date !== date)
114
- return emptyLedger(date);
115
- return {
116
- date,
117
- totalUsd: typeof raw.totalUsd === "number" ? raw.totalUsd : 0,
118
- calls: typeof raw.calls === "number" ? raw.calls : 0,
119
- byModel: raw.byModel && typeof raw.byModel === "object" ? raw.byModel : {}
120
- };
121
- }
122
- catch {
123
- return emptyLedger(date);
124
- }
125
- }
126
- async function writeLedger(file, ledger) {
127
- await fs.mkdir(path.dirname(file), { recursive: true });
128
- await fs.writeFile(file, `${JSON.stringify(ledger, null, 2)}\n`, "utf8");
129
- }
130
- export function createCostGuard(projectRoot, config, options = {}) {
131
- const now = options.now ?? (() => new Date());
132
- const currentDate = () => utcDate(now());
133
- const file = () => options.ledgerPath ?? ledgerPath(projectRoot, currentDate());
134
- const runCap = options.runCapUsd;
135
- let runTotalUsd = 0;
136
- return {
137
- async commit(model, usage) {
138
- const usd = computeUsageUsd(model, usage, config);
139
- if (runCap !== undefined) {
140
- const projected = Number((runTotalUsd + usd).toFixed(6));
141
- if (projected > runCap) {
142
- throw new RunCostCapExceededError({
143
- capUsd: runCap,
144
- projectedUsd: projected,
145
- currentUsd: runTotalUsd
146
- });
147
- }
148
- }
149
- if (config.dailyUsdCap === undefined) {
150
- runTotalUsd = Number((runTotalUsd + usd).toFixed(6));
151
- return usd;
152
- }
153
- const date = currentDate();
154
- const target = file();
155
- const ledger = await readLedger(target, date);
156
- const projected = Number((ledger.totalUsd + usd).toFixed(6));
157
- if (projected > config.dailyUsdCap) {
158
- throw new DailyCostCapExceededError({
159
- capUsd: config.dailyUsdCap,
160
- projectedUsd: projected,
161
- currentUsd: ledger.totalUsd
162
- });
163
- }
164
- ledger.totalUsd = projected;
165
- ledger.calls += 1;
166
- const byModel = ledger.byModel[model] ?? { tokensIn: 0, tokensOut: 0, usd: 0 };
167
- byModel.tokensIn += usage.promptTokens;
168
- byModel.tokensOut += usage.completionTokens;
169
- byModel.usd = Number((byModel.usd + usd).toFixed(6));
170
- ledger.byModel[model] = byModel;
171
- await writeLedger(target, ledger);
172
- runTotalUsd = Number((runTotalUsd + usd).toFixed(6));
173
- return usd;
174
- },
175
- async snapshot() {
176
- if (config.dailyUsdCap === undefined)
177
- return undefined;
178
- const date = currentDate();
179
- return readLedger(file(), date);
180
- }
181
- };
182
- }
183
- /** Exposed for tests. */
184
- export const __internal = {
185
- utcDate,
186
- pricingFor,
187
- ledgerPath,
188
- readLedger,
189
- writeLedger
190
- };