cclaw-cli 0.49.0 → 0.51.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/README.md +54 -82
  2. package/dist/artifact-linter.d.ts +4 -0
  3. package/dist/artifact-linter.js +24 -3
  4. package/dist/cli.d.ts +1 -19
  5. package/dist/cli.js +49 -491
  6. package/dist/constants.d.ts +2 -13
  7. package/dist/constants.js +1 -43
  8. package/dist/content/closeout-guidance.d.ts +14 -0
  9. package/dist/content/closeout-guidance.js +42 -0
  10. package/dist/content/core-agents.js +51 -9
  11. package/dist/content/decision-protocol.d.ts +12 -0
  12. package/dist/content/decision-protocol.js +20 -0
  13. package/dist/content/diff-command.d.ts +1 -2
  14. package/dist/content/diff-command.js +8 -94
  15. package/dist/content/examples.d.ts +4 -10
  16. package/dist/content/examples.js +10 -20
  17. package/dist/content/hook-events.js +2 -2
  18. package/dist/content/hook-inline-snippets.d.ts +5 -2
  19. package/dist/content/hook-inline-snippets.js +33 -1
  20. package/dist/content/hook-manifest.d.ts +3 -4
  21. package/dist/content/hook-manifest.js +11 -12
  22. package/dist/content/hooks.js +2 -0
  23. package/dist/content/ideate-command.d.ts +2 -0
  24. package/dist/content/ideate-command.js +31 -25
  25. package/dist/content/iron-laws.d.ts +5 -5
  26. package/dist/content/iron-laws.js +5 -5
  27. package/dist/content/learnings.d.ts +3 -4
  28. package/dist/content/learnings.js +24 -50
  29. package/dist/content/meta-skill.js +31 -21
  30. package/dist/content/next-command.js +38 -38
  31. package/dist/content/node-hooks.js +17 -343
  32. package/dist/content/opencode-plugin.js +2 -100
  33. package/dist/content/research-playbooks.js +14 -14
  34. package/dist/content/review-loop.d.ts +2 -0
  35. package/dist/content/review-loop.js +8 -0
  36. package/dist/content/session-hooks.js +14 -46
  37. package/dist/content/skills.d.ts +0 -5
  38. package/dist/content/skills.js +53 -128
  39. package/dist/content/stage-common-guidance.d.ts +0 -1
  40. package/dist/content/stage-common-guidance.js +15 -14
  41. package/dist/content/stage-schema.d.ts +26 -1
  42. package/dist/content/stage-schema.js +121 -40
  43. package/dist/content/stages/_lint-metadata/index.js +9 -15
  44. package/dist/content/stages/brainstorm.js +22 -43
  45. package/dist/content/stages/design.js +37 -57
  46. package/dist/content/stages/plan.js +22 -13
  47. package/dist/content/stages/review.js +24 -27
  48. package/dist/content/stages/scope.js +34 -46
  49. package/dist/content/stages/ship.js +7 -4
  50. package/dist/content/stages/spec.js +20 -9
  51. package/dist/content/stages/tdd.js +64 -44
  52. package/dist/content/start-command.js +10 -12
  53. package/dist/content/status-command.d.ts +2 -7
  54. package/dist/content/status-command.js +19 -146
  55. package/dist/content/subagents.d.ts +0 -5
  56. package/dist/content/subagents.js +47 -28
  57. package/dist/content/templates.d.ts +1 -1
  58. package/dist/content/templates.js +126 -135
  59. package/dist/content/track-render-context.d.ts +17 -0
  60. package/dist/content/track-render-context.js +44 -0
  61. package/dist/content/tree-command.d.ts +1 -2
  62. package/dist/content/tree-command.js +4 -87
  63. package/dist/content/utility-skills.d.ts +2 -29
  64. package/dist/content/utility-skills.js +2 -1534
  65. package/dist/content/view-command.js +29 -11
  66. package/dist/delegation.d.ts +1 -1
  67. package/dist/delegation.js +5 -15
  68. package/dist/doctor-registry.js +20 -21
  69. package/dist/doctor.js +88 -344
  70. package/dist/flow-state.d.ts +3 -0
  71. package/dist/flow-state.js +2 -0
  72. package/dist/harness-adapters.d.ts +1 -1
  73. package/dist/harness-adapters.js +48 -57
  74. package/dist/install.js +128 -358
  75. package/dist/internal/advance-stage.js +3 -9
  76. package/dist/internal/compound-readiness.d.ts +1 -1
  77. package/dist/internal/compound-readiness.js +1 -1
  78. package/dist/internal/tdd-loop-status.d.ts +1 -1
  79. package/dist/internal/tdd-loop-status.js +1 -1
  80. package/dist/knowledge-store.d.ts +16 -10
  81. package/dist/knowledge-store.js +51 -15
  82. package/dist/policy.js +16 -105
  83. package/dist/run-archive.d.ts +4 -6
  84. package/dist/run-archive.js +15 -20
  85. package/dist/run-persistence.d.ts +2 -2
  86. package/dist/run-persistence.js +3 -9
  87. package/package.json +1 -2
  88. package/dist/content/archive-command.d.ts +0 -2
  89. package/dist/content/archive-command.js +0 -124
  90. package/dist/content/compound-command.d.ts +0 -5
  91. package/dist/content/compound-command.js +0 -193
  92. package/dist/content/contexts.d.ts +0 -18
  93. package/dist/content/contexts.js +0 -24
  94. package/dist/content/contracts.d.ts +0 -2
  95. package/dist/content/contracts.js +0 -51
  96. package/dist/content/doctor-references.d.ts +0 -2
  97. package/dist/content/doctor-references.js +0 -150
  98. package/dist/content/eval-scaffold.d.ts +0 -15
  99. package/dist/content/eval-scaffold.js +0 -370
  100. package/dist/content/feature-command.d.ts +0 -2
  101. package/dist/content/feature-command.js +0 -123
  102. package/dist/content/flow-map.d.ts +0 -23
  103. package/dist/content/flow-map.js +0 -134
  104. package/dist/content/harness-doc.d.ts +0 -2
  105. package/dist/content/harness-doc.js +0 -202
  106. package/dist/content/harness-playbooks.d.ts +0 -24
  107. package/dist/content/harness-playbooks.js +0 -393
  108. package/dist/content/harness-tool-refs.d.ts +0 -20
  109. package/dist/content/harness-tool-refs.js +0 -268
  110. package/dist/content/ops-command.d.ts +0 -2
  111. package/dist/content/ops-command.js +0 -71
  112. package/dist/content/protocols.d.ts +0 -7
  113. package/dist/content/protocols.js +0 -215
  114. package/dist/content/retro-command.d.ts +0 -2
  115. package/dist/content/retro-command.js +0 -165
  116. package/dist/content/rewind-command.d.ts +0 -2
  117. package/dist/content/rewind-command.js +0 -106
  118. package/dist/content/tdd-log-command.d.ts +0 -2
  119. package/dist/content/tdd-log-command.js +0 -85
  120. package/dist/eval/agents/single-shot.d.ts +0 -27
  121. package/dist/eval/agents/single-shot.js +0 -79
  122. package/dist/eval/agents/with-tools.d.ts +0 -44
  123. package/dist/eval/agents/with-tools.js +0 -261
  124. package/dist/eval/agents/workflow.d.ts +0 -31
  125. package/dist/eval/agents/workflow.js +0 -155
  126. package/dist/eval/baseline.d.ts +0 -38
  127. package/dist/eval/baseline.js +0 -282
  128. package/dist/eval/config-loader.d.ts +0 -14
  129. package/dist/eval/config-loader.js +0 -395
  130. package/dist/eval/corpus.d.ts +0 -30
  131. package/dist/eval/corpus.js +0 -330
  132. package/dist/eval/cost-guard.d.ts +0 -102
  133. package/dist/eval/cost-guard.js +0 -190
  134. package/dist/eval/diff.d.ts +0 -64
  135. package/dist/eval/diff.js +0 -323
  136. package/dist/eval/llm-client.d.ts +0 -176
  137. package/dist/eval/llm-client.js +0 -267
  138. package/dist/eval/mode.d.ts +0 -28
  139. package/dist/eval/mode.js +0 -61
  140. package/dist/eval/progress.d.ts +0 -83
  141. package/dist/eval/progress.js +0 -59
  142. package/dist/eval/report.d.ts +0 -11
  143. package/dist/eval/report.js +0 -181
  144. package/dist/eval/rubric-loader.d.ts +0 -20
  145. package/dist/eval/rubric-loader.js +0 -143
  146. package/dist/eval/runner.d.ts +0 -81
  147. package/dist/eval/runner.js +0 -746
  148. package/dist/eval/runs.d.ts +0 -41
  149. package/dist/eval/runs.js +0 -114
  150. package/dist/eval/sandbox.d.ts +0 -38
  151. package/dist/eval/sandbox.js +0 -137
  152. package/dist/eval/tools/glob.d.ts +0 -2
  153. package/dist/eval/tools/glob.js +0 -163
  154. package/dist/eval/tools/grep.d.ts +0 -2
  155. package/dist/eval/tools/grep.js +0 -152
  156. package/dist/eval/tools/index.d.ts +0 -7
  157. package/dist/eval/tools/index.js +0 -35
  158. package/dist/eval/tools/read.d.ts +0 -2
  159. package/dist/eval/tools/read.js +0 -122
  160. package/dist/eval/tools/types.d.ts +0 -49
  161. package/dist/eval/tools/types.js +0 -41
  162. package/dist/eval/tools/write.d.ts +0 -2
  163. package/dist/eval/tools/write.js +0 -92
  164. package/dist/eval/types.d.ts +0 -561
  165. package/dist/eval/types.js +0 -47
  166. package/dist/eval/verifiers/judge.d.ts +0 -40
  167. package/dist/eval/verifiers/judge.js +0 -256
  168. package/dist/eval/verifiers/rules.d.ts +0 -24
  169. package/dist/eval/verifiers/rules.js +0 -218
  170. package/dist/eval/verifiers/structural.d.ts +0 -14
  171. package/dist/eval/verifiers/structural.js +0 -171
  172. package/dist/eval/verifiers/traceability.d.ts +0 -23
  173. package/dist/eval/verifiers/traceability.js +0 -84
  174. package/dist/eval/verifiers/workflow-consistency.d.ts +0 -21
  175. package/dist/eval/verifiers/workflow-consistency.js +0 -225
  176. package/dist/eval/workflow-corpus.d.ts +0 -7
  177. package/dist/eval/workflow-corpus.js +0 -207
  178. package/dist/feature-system.d.ts +0 -42
  179. package/dist/feature-system.js +0 -432
  180. package/dist/internal/knowledge-digest.d.ts +0 -7
  181. package/dist/internal/knowledge-digest.js +0 -93
@@ -1,30 +0,0 @@
1
- import type { FlowStage } from "../types.js";
2
- import type { EvalCase } from "./types.js";
3
- /**
4
- * Load all eval cases under `.cclaw/evals/corpus/**`. Optionally restrict to a
5
- * single stage. Returns an empty array for a fresh install.
6
- */
7
- export declare function loadCorpus(projectRoot: string, stage?: FlowStage): Promise<EvalCase[]>;
8
- /**
9
- * Resolve a case's `fixture` path to an absolute filesystem path. The fixture
10
- * field is interpreted relative to the case's stage directory (i.e., a
11
- * sibling subdirectory or file inside `.cclaw/evals/corpus/<stage>/`).
12
- */
13
- export declare function fixturePathFor(projectRoot: string, caseEntry: EvalCase): string | undefined;
14
- /**
15
- * Read the fixture artifact text for a case. Returns `undefined` if the case
16
- * has no fixture reference. Throws a descriptive error if the path exists in
17
- * the case but not on disk — structural fixtures ship alongside cases.
18
- */
19
- export declare function readFixtureArtifact(projectRoot: string, caseEntry: EvalCase): Promise<string | undefined>;
20
- /**
21
- * Resolve an entry from `extraFixtures` to an absolute filesystem path,
22
- * relative to the case's stage directory (same convention as `fixture`).
23
- */
24
- export declare function extraFixturePath(projectRoot: string, caseEntry: EvalCase, label: string): string | undefined;
25
- /**
26
- * Read every declared extra fixture for a case into a `{ label → text }`
27
- * map. Missing files throw so authoring mistakes surface immediately rather
28
- * than being silently skipped by cross-artifact verifiers.
29
- */
30
- export declare function readExtraFixtures(projectRoot: string, caseEntry: EvalCase): Promise<Record<string, string>>;
@@ -1,330 +0,0 @@
1
- import fs from "node:fs/promises";
2
- import path from "node:path";
3
- import { parse } from "yaml";
4
- import { EVALS_ROOT } from "../constants.js";
5
- import { exists } from "../fs-utils.js";
6
- import { FLOW_STAGES } from "../types.js";
7
- const FLOW_STAGE_SET = new Set(FLOW_STAGES);
8
- function corpusError(filePath, reason) {
9
- return new Error(`Invalid eval case at ${filePath}: ${reason}\n` +
10
- `Supported stages: ${FLOW_STAGES.join(", ")}`);
11
- }
12
- function isRecord(value) {
13
- return typeof value === "object" && value !== null && !Array.isArray(value);
14
- }
15
- function readStringArray(filePath, context, value) {
16
- if (value === undefined)
17
- return undefined;
18
- if (!Array.isArray(value) || value.some((item) => typeof item !== "string")) {
19
- throw corpusError(filePath, `"${context}" must be an array of strings`);
20
- }
21
- return value;
22
- }
23
- function readNonNegativeInteger(filePath, context, value) {
24
- if (value === undefined)
25
- return undefined;
26
- if (typeof value !== "number" || !Number.isFinite(value) || value < 0 || !Number.isInteger(value)) {
27
- throw corpusError(filePath, `"${context}" must be a non-negative integer`);
28
- }
29
- return value;
30
- }
31
- function parseStructural(filePath, raw) {
32
- if (raw === undefined)
33
- return undefined;
34
- if (!isRecord(raw)) {
35
- throw corpusError(filePath, `"expected.structural" must be a mapping`);
36
- }
37
- const requiredSections = readStringArray(filePath, "expected.structural.required_sections", raw.required_sections ?? raw.requiredSections);
38
- const forbiddenPatterns = readStringArray(filePath, "expected.structural.forbidden_patterns", raw.forbidden_patterns ?? raw.forbiddenPatterns);
39
- const requiredFrontmatterKeys = readStringArray(filePath, "expected.structural.required_frontmatter_keys", raw.required_frontmatter_keys ?? raw.requiredFrontmatterKeys);
40
- const minLines = readNonNegativeInteger(filePath, "expected.structural.min_lines", raw.min_lines ?? raw.minLines);
41
- const maxLines = readNonNegativeInteger(filePath, "expected.structural.max_lines", raw.max_lines ?? raw.maxLines);
42
- const minChars = readNonNegativeInteger(filePath, "expected.structural.min_chars", raw.min_chars ?? raw.minChars);
43
- const maxChars = readNonNegativeInteger(filePath, "expected.structural.max_chars", raw.max_chars ?? raw.maxChars);
44
- const structural = {};
45
- if (requiredSections)
46
- structural.requiredSections = requiredSections;
47
- if (forbiddenPatterns)
48
- structural.forbiddenPatterns = forbiddenPatterns;
49
- if (requiredFrontmatterKeys)
50
- structural.requiredFrontmatterKeys = requiredFrontmatterKeys;
51
- if (minLines !== undefined)
52
- structural.minLines = minLines;
53
- if (maxLines !== undefined)
54
- structural.maxLines = maxLines;
55
- if (minChars !== undefined)
56
- structural.minChars = minChars;
57
- if (maxChars !== undefined)
58
- structural.maxChars = maxChars;
59
- return structural;
60
- }
61
- function parseRegexRule(filePath, context, value) {
62
- if (typeof value === "string") {
63
- return { pattern: value };
64
- }
65
- if (!isRecord(value)) {
66
- throw corpusError(filePath, `"${context}" entries must be either a string or a mapping with "pattern"`);
67
- }
68
- const pattern = value.pattern;
69
- if (typeof pattern !== "string" || pattern.length === 0) {
70
- throw corpusError(filePath, `"${context}" mapping entry must include a non-empty "pattern" string`);
71
- }
72
- const flags = value.flags;
73
- if (flags !== undefined && typeof flags !== "string") {
74
- throw corpusError(filePath, `"${context}" flags must be a string`);
75
- }
76
- const description = value.description;
77
- if (description !== undefined && typeof description !== "string") {
78
- throw corpusError(filePath, `"${context}" description must be a string`);
79
- }
80
- const rule = { pattern };
81
- if (flags !== undefined)
82
- rule.flags = flags;
83
- if (description !== undefined)
84
- rule.description = description;
85
- return rule;
86
- }
87
- function parseRegexRules(filePath, context, value) {
88
- if (value === undefined)
89
- return undefined;
90
- if (!Array.isArray(value)) {
91
- throw corpusError(filePath, `"${context}" must be an array`);
92
- }
93
- return value.map((entry, index) => parseRegexRule(filePath, `${context}[${index}]`, entry));
94
- }
95
- function parseOccurrenceBounds(filePath, context, value) {
96
- if (value === undefined)
97
- return undefined;
98
- if (!isRecord(value)) {
99
- throw corpusError(filePath, `"${context}" must be a mapping of phrase → integer`);
100
- }
101
- const out = {};
102
- for (const [phrase, count] of Object.entries(value)) {
103
- if (typeof count !== "number" || !Number.isFinite(count) || !Number.isInteger(count) || count < 0) {
104
- throw corpusError(filePath, `"${context}.${phrase}" must be a non-negative integer`);
105
- }
106
- out[phrase] = count;
107
- }
108
- return out;
109
- }
110
- function parseRules(filePath, raw) {
111
- if (raw === undefined)
112
- return undefined;
113
- if (!isRecord(raw)) {
114
- throw corpusError(filePath, `"expected.rules" must be a mapping`);
115
- }
116
- const mustContain = readStringArray(filePath, "expected.rules.must_contain", raw.must_contain ?? raw.mustContain);
117
- const mustNotContain = readStringArray(filePath, "expected.rules.must_not_contain", raw.must_not_contain ?? raw.mustNotContain);
118
- const regexRequired = parseRegexRules(filePath, "expected.rules.regex_required", raw.regex_required ?? raw.regexRequired);
119
- const regexForbidden = parseRegexRules(filePath, "expected.rules.regex_forbidden", raw.regex_forbidden ?? raw.regexForbidden);
120
- const minOccurrences = parseOccurrenceBounds(filePath, "expected.rules.min_occurrences", raw.min_occurrences ?? raw.minOccurrences);
121
- const maxOccurrences = parseOccurrenceBounds(filePath, "expected.rules.max_occurrences", raw.max_occurrences ?? raw.maxOccurrences);
122
- const uniqueBulletsInSection = readStringArray(filePath, "expected.rules.unique_bullets_in_section", raw.unique_bullets_in_section ?? raw.uniqueBulletsInSection);
123
- const rules = {};
124
- if (mustContain)
125
- rules.mustContain = mustContain;
126
- if (mustNotContain)
127
- rules.mustNotContain = mustNotContain;
128
- if (regexRequired)
129
- rules.regexRequired = regexRequired;
130
- if (regexForbidden)
131
- rules.regexForbidden = regexForbidden;
132
- if (minOccurrences)
133
- rules.minOccurrences = minOccurrences;
134
- if (maxOccurrences)
135
- rules.maxOccurrences = maxOccurrences;
136
- if (uniqueBulletsInSection)
137
- rules.uniqueBulletsInSection = uniqueBulletsInSection;
138
- return Object.keys(rules).length === 0 ? undefined : rules;
139
- }
140
- function parseTraceability(filePath, raw) {
141
- if (raw === undefined)
142
- return undefined;
143
- if (!isRecord(raw)) {
144
- throw corpusError(filePath, `"expected.traceability" must be a mapping`);
145
- }
146
- const idPattern = raw.id_pattern ?? raw.idPattern;
147
- if (typeof idPattern !== "string" || idPattern.length === 0) {
148
- throw corpusError(filePath, `"expected.traceability.id_pattern" must be a non-empty regex source`);
149
- }
150
- const idFlags = raw.id_flags ?? raw.idFlags;
151
- if (idFlags !== undefined && typeof idFlags !== "string") {
152
- throw corpusError(filePath, `"expected.traceability.id_flags" must be a string`);
153
- }
154
- const source = raw.source;
155
- if (typeof source !== "string" || source.length === 0) {
156
- throw corpusError(filePath, `"expected.traceability.source" must be "self" or an extra_fixtures label`);
157
- }
158
- const requireInRaw = raw.require_in ?? raw.requireIn;
159
- const requireIn = readStringArray(filePath, "expected.traceability.require_in", requireInRaw);
160
- if (!requireIn || requireIn.length === 0) {
161
- throw corpusError(filePath, `"expected.traceability.require_in" must be a non-empty array`);
162
- }
163
- const out = { idPattern, source, requireIn };
164
- if (idFlags !== undefined)
165
- out.idFlags = idFlags;
166
- return out;
167
- }
168
- function parseExtraFixtures(filePath, raw) {
169
- if (raw === undefined)
170
- return undefined;
171
- if (!isRecord(raw)) {
172
- throw corpusError(filePath, `"extra_fixtures" must be a mapping of label → path`);
173
- }
174
- const out = {};
175
- for (const [label, value] of Object.entries(raw)) {
176
- if (typeof value !== "string" || value.length === 0) {
177
- throw corpusError(filePath, `"extra_fixtures.${label}" must be a non-empty path string`);
178
- }
179
- out[label] = value;
180
- }
181
- return Object.keys(out).length === 0 ? undefined : out;
182
- }
183
- function parseExpected(filePath, raw) {
184
- if (raw === undefined)
185
- return undefined;
186
- if (!isRecord(raw)) {
187
- throw corpusError(filePath, `"expected" must be a mapping`);
188
- }
189
- const shape = {};
190
- const structural = parseStructural(filePath, raw.structural);
191
- if (structural)
192
- shape.structural = structural;
193
- const rules = parseRules(filePath, raw.rules);
194
- if (rules)
195
- shape.rules = rules;
196
- const traceability = parseTraceability(filePath, raw.traceability);
197
- if (traceability)
198
- shape.traceability = traceability;
199
- if (raw.judge !== undefined) {
200
- if (!isRecord(raw.judge)) {
201
- throw corpusError(filePath, `"expected.judge" must be a mapping`);
202
- }
203
- shape.judge = raw.judge;
204
- }
205
- return Object.keys(shape).length === 0 ? undefined : shape;
206
- }
207
- function validateCase(filePath, raw) {
208
- if (!isRecord(raw)) {
209
- throw corpusError(filePath, "top-level value must be a mapping");
210
- }
211
- const id = raw.id;
212
- if (typeof id !== "string" || id.trim().length === 0) {
213
- throw corpusError(filePath, `"id" must be a non-empty string`);
214
- }
215
- const stageRaw = raw.stage;
216
- if (typeof stageRaw !== "string" || !FLOW_STAGE_SET.has(stageRaw)) {
217
- throw corpusError(filePath, `"stage" must be one of: ${FLOW_STAGES.join(", ")}`);
218
- }
219
- const inputPrompt = raw.input_prompt ?? raw.inputPrompt;
220
- if (typeof inputPrompt !== "string" || inputPrompt.trim().length === 0) {
221
- throw corpusError(filePath, `"input_prompt" must be a non-empty string`);
222
- }
223
- const contextFiles = readStringArray(filePath, "context_files", raw.context_files ?? raw.contextFiles);
224
- const expected = parseExpected(filePath, raw.expected);
225
- const fixture = typeof raw.fixture === "string" ? raw.fixture : undefined;
226
- const extraFixtures = parseExtraFixtures(filePath, raw.extra_fixtures ?? raw.extraFixtures);
227
- return {
228
- id: id.trim(),
229
- stage: stageRaw,
230
- inputPrompt: inputPrompt.trim(),
231
- contextFiles,
232
- expected,
233
- fixture,
234
- extraFixtures
235
- };
236
- }
237
- /**
238
- * Load all eval cases under `.cclaw/evals/corpus/**`. Optionally restrict to a
239
- * single stage. Returns an empty array for a fresh install.
240
- */
241
- export async function loadCorpus(projectRoot, stage) {
242
- const corpusRoot = path.join(projectRoot, EVALS_ROOT, "corpus");
243
- if (!(await exists(corpusRoot))) {
244
- return [];
245
- }
246
- const cases = [];
247
- const stageDirs = stage
248
- ? [path.join(corpusRoot, stage)]
249
- : (await fs.readdir(corpusRoot, { withFileTypes: true }))
250
- .filter((entry) => entry.isDirectory())
251
- .filter((entry) => FLOW_STAGE_SET.has(entry.name))
252
- .map((entry) => path.join(corpusRoot, entry.name));
253
- for (const stageDir of stageDirs) {
254
- if (!(await exists(stageDir)))
255
- continue;
256
- const entries = await fs.readdir(stageDir, { withFileTypes: true });
257
- for (const entry of entries) {
258
- if (!entry.isFile())
259
- continue;
260
- if (!entry.name.endsWith(".yaml") && !entry.name.endsWith(".yml"))
261
- continue;
262
- const filePath = path.join(stageDir, entry.name);
263
- let parsed;
264
- try {
265
- parsed = parse(await fs.readFile(filePath, "utf8"));
266
- }
267
- catch (err) {
268
- throw corpusError(filePath, err instanceof Error ? err.message : String(err));
269
- }
270
- cases.push(validateCase(filePath, parsed));
271
- }
272
- }
273
- cases.sort((a, b) => a.stage.localeCompare(b.stage) || a.id.localeCompare(b.id));
274
- return cases;
275
- }
276
- /**
277
- * Resolve a case's `fixture` path to an absolute filesystem path. The fixture
278
- * field is interpreted relative to the case's stage directory (i.e., a
279
- * sibling subdirectory or file inside `.cclaw/evals/corpus/<stage>/`).
280
- */
281
- export function fixturePathFor(projectRoot, caseEntry) {
282
- if (!caseEntry.fixture)
283
- return undefined;
284
- return path.resolve(projectRoot, EVALS_ROOT, "corpus", caseEntry.stage, caseEntry.fixture);
285
- }
286
- /**
287
- * Read the fixture artifact text for a case. Returns `undefined` if the case
288
- * has no fixture reference. Throws a descriptive error if the path exists in
289
- * the case but not on disk — structural fixtures ship alongside cases.
290
- */
291
- export async function readFixtureArtifact(projectRoot, caseEntry) {
292
- const fixturePath = fixturePathFor(projectRoot, caseEntry);
293
- if (!fixturePath)
294
- return undefined;
295
- if (!(await exists(fixturePath))) {
296
- throw new Error(`Fixture missing for case ${caseEntry.stage}/${caseEntry.id}: ${fixturePath}`);
297
- }
298
- return fs.readFile(fixturePath, "utf8");
299
- }
300
- /**
301
- * Resolve an entry from `extraFixtures` to an absolute filesystem path,
302
- * relative to the case's stage directory (same convention as `fixture`).
303
- */
304
- export function extraFixturePath(projectRoot, caseEntry, label) {
305
- const value = caseEntry.extraFixtures?.[label];
306
- if (!value)
307
- return undefined;
308
- return path.resolve(projectRoot, EVALS_ROOT, "corpus", caseEntry.stage, value);
309
- }
310
- /**
311
- * Read every declared extra fixture for a case into a `{ label → text }`
312
- * map. Missing files throw so authoring mistakes surface immediately rather
313
- * than being silently skipped by cross-artifact verifiers.
314
- */
315
- export async function readExtraFixtures(projectRoot, caseEntry) {
316
- const out = {};
317
- if (!caseEntry.extraFixtures)
318
- return out;
319
- for (const label of Object.keys(caseEntry.extraFixtures)) {
320
- const filePath = extraFixturePath(projectRoot, caseEntry, label);
321
- if (!filePath)
322
- continue;
323
- if (!(await exists(filePath))) {
324
- throw new Error(`Extra fixture missing for ${caseEntry.stage}/${caseEntry.id} ` +
325
- `(label="${label}"): ${filePath}`);
326
- }
327
- out[label] = await fs.readFile(filePath, "utf8");
328
- }
329
- return out;
330
- }
@@ -1,102 +0,0 @@
1
- import type { ChatUsage } from "./llm-client.js";
2
- import type { ResolvedEvalConfig, TokenPricing } from "./types.js";
3
- /**
4
- * Builtin pricing fallback. Intentionally conservative: when the user
5
- * hasn't configured pricing and we don't know the model, we default to a
6
- * "small model" USD schedule so the cap can still do something useful.
7
- *
8
- * Values are USD per 1K tokens. Sources are public pricing pages as of
9
- * 2026-04; update by editing this constant, not the guard logic.
10
- */
11
- export declare const DEFAULT_TOKEN_PRICING: Readonly<Record<string, TokenPricing>>;
12
- /** Hard default when neither config nor builtins know the model. */
13
- export declare const UNKNOWN_MODEL_PRICING: TokenPricing;
14
- export interface SpendLedger {
15
- /** ISO date (`YYYY-MM-DD` in UTC) — also embedded in the file name. */
16
- date: string;
17
- /** USD spent so far today across every call that hit the guard. */
18
- totalUsd: number;
19
- /** Number of `chat()` calls accounted for. */
20
- calls: number;
21
- /** Per-model breakdown for the report. */
22
- byModel: Record<string, {
23
- tokensIn: number;
24
- tokensOut: number;
25
- usd: number;
26
- }>;
27
- }
28
- export declare class DailyCostCapExceededError extends Error {
29
- readonly capUsd: number;
30
- readonly projectedUsd: number;
31
- readonly currentUsd: number;
32
- constructor(opts: {
33
- capUsd: number;
34
- projectedUsd: number;
35
- currentUsd: number;
36
- });
37
- }
38
- /**
39
- * Per-run cost cap — enforced in-memory, no ledger file. Complements the
40
- * daily cap so a single long workflow run can't blow the whole day's
41
- * budget even if the daily cap is generous. Opt-in via
42
- * `--max-cost-usd=<n>` on the CLI or `CCLAW_EVAL_MAX_COST_USD`.
43
- */
44
- export declare class RunCostCapExceededError extends Error {
45
- readonly capUsd: number;
46
- readonly projectedUsd: number;
47
- readonly currentUsd: number;
48
- constructor(opts: {
49
- capUsd: number;
50
- projectedUsd: number;
51
- currentUsd: number;
52
- });
53
- }
54
- declare function utcDate(now?: Date): string;
55
- declare function pricingFor(model: string, config: Pick<ResolvedEvalConfig, "tokenPricing">): TokenPricing;
56
- /**
57
- * Compute USD cost of a single `ChatUsage` using the given `model` pricing
58
- * schedule. Returns 0 when `usage.totalTokens` is 0 (e.g. transport error
59
- * before first token).
60
- */
61
- export declare function computeUsageUsd(model: string, usage: ChatUsage, config: Pick<ResolvedEvalConfig, "tokenPricing">): number;
62
- declare function ledgerPath(projectRoot: string, date: string): string;
63
- declare function readLedger(file: string, date: string): Promise<SpendLedger>;
64
- declare function writeLedger(file: string, ledger: SpendLedger): Promise<void>;
65
- /**
66
- * Guard a single LLM call against the daily USD cap. Returns the updated
67
- * ledger on success; throws `DailyCostCapExceededError` when the projected
68
- * total would cross the cap. When `config.dailyUsdCap` is unset, the guard
69
- * is a no-op — no file writes, no ledger — so non-judge runs never touch
70
- * the filesystem.
71
- */
72
- export interface CostGuard {
73
- /**
74
- * Commit the USD cost of a finished call to the ledger. When `dailyUsdCap`
75
- * is set, refuses the commit if the projected total would exceed the cap.
76
- */
77
- commit(model: string, usage: ChatUsage): Promise<number>;
78
- /** Snapshot the current ledger (or undefined when no cap is set). */
79
- snapshot(): Promise<SpendLedger | undefined>;
80
- }
81
- export interface CreateCostGuardOptions {
82
- /** Clock injection for tests. */
83
- now?: () => Date;
84
- /** Override the default filesystem root for the ledger. */
85
- ledgerPath?: string;
86
- /**
87
- * Per-run (in-memory) USD cap. Independent from the persisted daily
88
- * cap so a single `cclaw eval` invocation can be budgeted without
89
- * touching the shared nightly ledger. Undefined = unlimited.
90
- */
91
- runCapUsd?: number;
92
- }
93
- export declare function createCostGuard(projectRoot: string, config: Pick<ResolvedEvalConfig, "dailyUsdCap" | "tokenPricing">, options?: CreateCostGuardOptions): CostGuard;
94
- /** Exposed for tests. */
95
- export declare const __internal: {
96
- utcDate: typeof utcDate;
97
- pricingFor: typeof pricingFor;
98
- ledgerPath: typeof ledgerPath;
99
- readLedger: typeof readLedger;
100
- writeLedger: typeof writeLedger;
101
- };
102
- export {};
@@ -1,190 +0,0 @@
1
- /**
2
- * Cost guard for the cclaw eval subsystem.
3
- *
4
- * Two responsibilities:
5
- *
6
- * 1. Convert `ChatUsage` (prompt/completion token counts) into USD using
7
- * a per-model `TokenPricing` schedule. Pricing comes from
8
- * `config.tokenPricing[model]` first, then from the builtin fallback
9
- * schedule for well-known models (z.ai GLM 5.1 at publish time).
10
- * 2. Maintain a per-day running total persisted to
11
- * `.cclaw/evals/.spend-YYYY-MM-DD.json` so that a long eval session
12
- * (or a cron-run nightly) can't blow through the configured
13
- * `dailyUsdCap`. The counter is opt-in: no cap, no writes.
14
- *
15
- * The guard is deliberately pessimistic — it rounds USD up to 6 decimals
16
- * and never subtracts, so a CI run that errors mid-flight still shows the
17
- * partial spend in the next report.
18
- */
19
- import fs from "node:fs/promises";
20
- import path from "node:path";
21
- import { EVALS_ROOT } from "../constants.js";
22
- import { exists } from "../fs-utils.js";
23
- /**
24
- * Builtin pricing fallback. Intentionally conservative: when the user
25
- * hasn't configured pricing and we don't know the model, we default to a
26
- * "small model" USD schedule so the cap can still do something useful.
27
- *
28
- * Values are USD per 1K tokens. Sources are public pricing pages as of
29
- * 2026-04; update by editing this constant, not the guard logic.
30
- */
31
- export const DEFAULT_TOKEN_PRICING = {
32
- "glm-5.1": { input: 0.0005, output: 0.0015 },
33
- "glm-4.6": { input: 0.0005, output: 0.0015 },
34
- "gpt-4o-mini": { input: 0.00015, output: 0.0006 },
35
- "gpt-4o": { input: 0.005, output: 0.015 }
36
- };
37
- /** Hard default when neither config nor builtins know the model. */
38
- export const UNKNOWN_MODEL_PRICING = { input: 0.001, output: 0.003 };
39
- export class DailyCostCapExceededError extends Error {
40
- capUsd;
41
- projectedUsd;
42
- currentUsd;
43
- constructor(opts) {
44
- super(`Daily cost cap would be exceeded: ` +
45
- `current=$${opts.currentUsd.toFixed(4)}, ` +
46
- `projected=$${opts.projectedUsd.toFixed(4)}, ` +
47
- `cap=$${opts.capUsd.toFixed(4)}. ` +
48
- `Unset CCLAW_EVAL_DAILY_USD_CAP or increase the cap to continue.`);
49
- this.name = "DailyCostCapExceededError";
50
- this.capUsd = opts.capUsd;
51
- this.projectedUsd = opts.projectedUsd;
52
- this.currentUsd = opts.currentUsd;
53
- }
54
- }
55
- /**
56
- * Per-run cost cap — enforced in-memory, no ledger file. Complements the
57
- * daily cap so a single long workflow run can't blow the whole day's
58
- * budget even if the daily cap is generous. Opt-in via
59
- * `--max-cost-usd=<n>` on the CLI or `CCLAW_EVAL_MAX_COST_USD`.
60
- */
61
- export class RunCostCapExceededError extends Error {
62
- capUsd;
63
- projectedUsd;
64
- currentUsd;
65
- constructor(opts) {
66
- super(`Run cost cap would be exceeded: ` +
67
- `current=$${opts.currentUsd.toFixed(4)}, ` +
68
- `projected=$${opts.projectedUsd.toFixed(4)}, ` +
69
- `cap=$${opts.capUsd.toFixed(4)}. ` +
70
- `Raise --max-cost-usd or drop it to run uncapped.`);
71
- this.name = "RunCostCapExceededError";
72
- this.capUsd = opts.capUsd;
73
- this.projectedUsd = opts.projectedUsd;
74
- this.currentUsd = opts.currentUsd;
75
- }
76
- }
77
- function utcDate(now = new Date()) {
78
- return now.toISOString().slice(0, 10);
79
- }
80
- function pricingFor(model, config) {
81
- const custom = config.tokenPricing?.[model];
82
- if (custom)
83
- return custom;
84
- const builtin = DEFAULT_TOKEN_PRICING[model];
85
- if (builtin)
86
- return builtin;
87
- return UNKNOWN_MODEL_PRICING;
88
- }
89
- /**
90
- * Compute USD cost of a single `ChatUsage` using the given `model` pricing
91
- * schedule. Returns 0 when `usage.totalTokens` is 0 (e.g. transport error
92
- * before first token).
93
- */
94
- export function computeUsageUsd(model, usage, config) {
95
- if (!usage || usage.totalTokens <= 0)
96
- return 0;
97
- const schedule = pricingFor(model, config);
98
- const cost = (usage.promptTokens * schedule.input) / 1_000 +
99
- (usage.completionTokens * schedule.output) / 1_000;
100
- return Math.max(0, Number(cost.toFixed(6)));
101
- }
102
- function emptyLedger(date) {
103
- return { date, totalUsd: 0, calls: 0, byModel: {} };
104
- }
105
- function ledgerPath(projectRoot, date) {
106
- return path.join(projectRoot, EVALS_ROOT, `.spend-${date}.json`);
107
- }
108
- async function readLedger(file, date) {
109
- if (!(await exists(file)))
110
- return emptyLedger(date);
111
- try {
112
- const raw = JSON.parse(await fs.readFile(file, "utf8"));
113
- if (raw?.date !== date)
114
- return emptyLedger(date);
115
- return {
116
- date,
117
- totalUsd: typeof raw.totalUsd === "number" ? raw.totalUsd : 0,
118
- calls: typeof raw.calls === "number" ? raw.calls : 0,
119
- byModel: raw.byModel && typeof raw.byModel === "object" ? raw.byModel : {}
120
- };
121
- }
122
- catch {
123
- return emptyLedger(date);
124
- }
125
- }
126
- async function writeLedger(file, ledger) {
127
- await fs.mkdir(path.dirname(file), { recursive: true });
128
- await fs.writeFile(file, `${JSON.stringify(ledger, null, 2)}\n`, "utf8");
129
- }
130
- export function createCostGuard(projectRoot, config, options = {}) {
131
- const now = options.now ?? (() => new Date());
132
- const currentDate = () => utcDate(now());
133
- const file = () => options.ledgerPath ?? ledgerPath(projectRoot, currentDate());
134
- const runCap = options.runCapUsd;
135
- let runTotalUsd = 0;
136
- return {
137
- async commit(model, usage) {
138
- const usd = computeUsageUsd(model, usage, config);
139
- if (runCap !== undefined) {
140
- const projected = Number((runTotalUsd + usd).toFixed(6));
141
- if (projected > runCap) {
142
- throw new RunCostCapExceededError({
143
- capUsd: runCap,
144
- projectedUsd: projected,
145
- currentUsd: runTotalUsd
146
- });
147
- }
148
- }
149
- if (config.dailyUsdCap === undefined) {
150
- runTotalUsd = Number((runTotalUsd + usd).toFixed(6));
151
- return usd;
152
- }
153
- const date = currentDate();
154
- const target = file();
155
- const ledger = await readLedger(target, date);
156
- const projected = Number((ledger.totalUsd + usd).toFixed(6));
157
- if (projected > config.dailyUsdCap) {
158
- throw new DailyCostCapExceededError({
159
- capUsd: config.dailyUsdCap,
160
- projectedUsd: projected,
161
- currentUsd: ledger.totalUsd
162
- });
163
- }
164
- ledger.totalUsd = projected;
165
- ledger.calls += 1;
166
- const byModel = ledger.byModel[model] ?? { tokensIn: 0, tokensOut: 0, usd: 0 };
167
- byModel.tokensIn += usage.promptTokens;
168
- byModel.tokensOut += usage.completionTokens;
169
- byModel.usd = Number((byModel.usd + usd).toFixed(6));
170
- ledger.byModel[model] = byModel;
171
- await writeLedger(target, ledger);
172
- runTotalUsd = Number((runTotalUsd + usd).toFixed(6));
173
- return usd;
174
- },
175
- async snapshot() {
176
- if (config.dailyUsdCap === undefined)
177
- return undefined;
178
- const date = currentDate();
179
- return readLedger(file(), date);
180
- }
181
- };
182
- }
183
- /** Exposed for tests. */
184
- export const __internal = {
185
- utcDate,
186
- pricingFor,
187
- ledgerPath,
188
- readLedger,
189
- writeLedger
190
- };