cclaw-cli 0.49.0 → 0.51.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. package/README.md +57 -84
  2. package/dist/artifact-linter.d.ts +4 -0
  3. package/dist/artifact-linter.js +24 -3
  4. package/dist/cli.d.ts +1 -19
  5. package/dist/cli.js +49 -491
  6. package/dist/constants.d.ts +2 -13
  7. package/dist/constants.js +1 -43
  8. package/dist/content/closeout-guidance.d.ts +14 -0
  9. package/dist/content/closeout-guidance.js +42 -0
  10. package/dist/content/core-agents.js +55 -17
  11. package/dist/content/decision-protocol.d.ts +12 -0
  12. package/dist/content/decision-protocol.js +20 -0
  13. package/dist/content/diff-command.d.ts +1 -2
  14. package/dist/content/diff-command.js +8 -94
  15. package/dist/content/examples.d.ts +4 -10
  16. package/dist/content/examples.js +10 -20
  17. package/dist/content/hook-events.js +2 -2
  18. package/dist/content/hook-inline-snippets.d.ts +5 -2
  19. package/dist/content/hook-inline-snippets.js +33 -1
  20. package/dist/content/hook-manifest.d.ts +3 -4
  21. package/dist/content/hook-manifest.js +11 -12
  22. package/dist/content/hooks.js +44 -21
  23. package/dist/content/ideate-command.d.ts +2 -0
  24. package/dist/content/ideate-command.js +34 -25
  25. package/dist/content/iron-laws.d.ts +5 -5
  26. package/dist/content/iron-laws.js +5 -5
  27. package/dist/content/language-policy.d.ts +2 -0
  28. package/dist/content/language-policy.js +13 -0
  29. package/dist/content/learnings.d.ts +3 -4
  30. package/dist/content/learnings.js +26 -50
  31. package/dist/content/meta-skill.js +33 -22
  32. package/dist/content/next-command.js +41 -38
  33. package/dist/content/node-hooks.js +17 -345
  34. package/dist/content/opencode-plugin.js +5 -103
  35. package/dist/content/research-playbooks.js +14 -14
  36. package/dist/content/review-loop.d.ts +2 -0
  37. package/dist/content/review-loop.js +8 -0
  38. package/dist/content/session-hooks.js +15 -47
  39. package/dist/content/skills.d.ts +0 -5
  40. package/dist/content/skills.js +55 -128
  41. package/dist/content/stage-common-guidance.d.ts +0 -1
  42. package/dist/content/stage-common-guidance.js +17 -14
  43. package/dist/content/stage-schema.d.ts +26 -1
  44. package/dist/content/stage-schema.js +121 -40
  45. package/dist/content/stages/_lint-metadata/index.js +9 -15
  46. package/dist/content/stages/brainstorm.js +22 -43
  47. package/dist/content/stages/design.js +37 -57
  48. package/dist/content/stages/plan.js +22 -13
  49. package/dist/content/stages/review.js +24 -27
  50. package/dist/content/stages/scope.js +34 -46
  51. package/dist/content/stages/ship.js +7 -4
  52. package/dist/content/stages/spec.js +20 -9
  53. package/dist/content/stages/tdd.js +64 -44
  54. package/dist/content/start-command.js +13 -12
  55. package/dist/content/status-command.d.ts +2 -7
  56. package/dist/content/status-command.js +19 -146
  57. package/dist/content/subagents.d.ts +0 -5
  58. package/dist/content/subagents.js +51 -28
  59. package/dist/content/templates.d.ts +1 -1
  60. package/dist/content/templates.js +126 -135
  61. package/dist/content/track-render-context.d.ts +17 -0
  62. package/dist/content/track-render-context.js +44 -0
  63. package/dist/content/tree-command.d.ts +1 -2
  64. package/dist/content/tree-command.js +4 -87
  65. package/dist/content/utility-skills.d.ts +2 -29
  66. package/dist/content/utility-skills.js +2 -1534
  67. package/dist/content/view-command.js +31 -11
  68. package/dist/delegation.d.ts +1 -1
  69. package/dist/delegation.js +5 -15
  70. package/dist/doctor-registry.js +20 -21
  71. package/dist/doctor.js +88 -344
  72. package/dist/flow-state.d.ts +3 -0
  73. package/dist/flow-state.js +2 -0
  74. package/dist/harness-adapters.d.ts +1 -1
  75. package/dist/harness-adapters.js +51 -58
  76. package/dist/install.js +128 -358
  77. package/dist/internal/advance-stage.js +3 -9
  78. package/dist/internal/compound-readiness.d.ts +1 -1
  79. package/dist/internal/compound-readiness.js +1 -1
  80. package/dist/internal/tdd-loop-status.d.ts +1 -1
  81. package/dist/internal/tdd-loop-status.js +1 -1
  82. package/dist/knowledge-store.d.ts +16 -10
  83. package/dist/knowledge-store.js +51 -15
  84. package/dist/policy.js +16 -105
  85. package/dist/run-archive.d.ts +4 -6
  86. package/dist/run-archive.js +15 -20
  87. package/dist/run-persistence.d.ts +2 -2
  88. package/dist/run-persistence.js +3 -9
  89. package/package.json +1 -2
  90. package/dist/content/archive-command.d.ts +0 -2
  91. package/dist/content/archive-command.js +0 -124
  92. package/dist/content/compound-command.d.ts +0 -5
  93. package/dist/content/compound-command.js +0 -193
  94. package/dist/content/contexts.d.ts +0 -18
  95. package/dist/content/contexts.js +0 -24
  96. package/dist/content/contracts.d.ts +0 -2
  97. package/dist/content/contracts.js +0 -51
  98. package/dist/content/doctor-references.d.ts +0 -2
  99. package/dist/content/doctor-references.js +0 -150
  100. package/dist/content/eval-scaffold.d.ts +0 -15
  101. package/dist/content/eval-scaffold.js +0 -370
  102. package/dist/content/feature-command.d.ts +0 -2
  103. package/dist/content/feature-command.js +0 -123
  104. package/dist/content/flow-map.d.ts +0 -23
  105. package/dist/content/flow-map.js +0 -134
  106. package/dist/content/harness-doc.d.ts +0 -2
  107. package/dist/content/harness-doc.js +0 -202
  108. package/dist/content/harness-playbooks.d.ts +0 -24
  109. package/dist/content/harness-playbooks.js +0 -393
  110. package/dist/content/harness-tool-refs.d.ts +0 -20
  111. package/dist/content/harness-tool-refs.js +0 -268
  112. package/dist/content/ops-command.d.ts +0 -2
  113. package/dist/content/ops-command.js +0 -71
  114. package/dist/content/protocols.d.ts +0 -7
  115. package/dist/content/protocols.js +0 -215
  116. package/dist/content/retro-command.d.ts +0 -2
  117. package/dist/content/retro-command.js +0 -165
  118. package/dist/content/rewind-command.d.ts +0 -2
  119. package/dist/content/rewind-command.js +0 -106
  120. package/dist/content/tdd-log-command.d.ts +0 -2
  121. package/dist/content/tdd-log-command.js +0 -85
  122. package/dist/eval/agents/single-shot.d.ts +0 -27
  123. package/dist/eval/agents/single-shot.js +0 -79
  124. package/dist/eval/agents/with-tools.d.ts +0 -44
  125. package/dist/eval/agents/with-tools.js +0 -261
  126. package/dist/eval/agents/workflow.d.ts +0 -31
  127. package/dist/eval/agents/workflow.js +0 -155
  128. package/dist/eval/baseline.d.ts +0 -38
  129. package/dist/eval/baseline.js +0 -282
  130. package/dist/eval/config-loader.d.ts +0 -14
  131. package/dist/eval/config-loader.js +0 -395
  132. package/dist/eval/corpus.d.ts +0 -30
  133. package/dist/eval/corpus.js +0 -330
  134. package/dist/eval/cost-guard.d.ts +0 -102
  135. package/dist/eval/cost-guard.js +0 -190
  136. package/dist/eval/diff.d.ts +0 -64
  137. package/dist/eval/diff.js +0 -323
  138. package/dist/eval/llm-client.d.ts +0 -176
  139. package/dist/eval/llm-client.js +0 -267
  140. package/dist/eval/mode.d.ts +0 -28
  141. package/dist/eval/mode.js +0 -61
  142. package/dist/eval/progress.d.ts +0 -83
  143. package/dist/eval/progress.js +0 -59
  144. package/dist/eval/report.d.ts +0 -11
  145. package/dist/eval/report.js +0 -181
  146. package/dist/eval/rubric-loader.d.ts +0 -20
  147. package/dist/eval/rubric-loader.js +0 -143
  148. package/dist/eval/runner.d.ts +0 -81
  149. package/dist/eval/runner.js +0 -746
  150. package/dist/eval/runs.d.ts +0 -41
  151. package/dist/eval/runs.js +0 -114
  152. package/dist/eval/sandbox.d.ts +0 -38
  153. package/dist/eval/sandbox.js +0 -137
  154. package/dist/eval/tools/glob.d.ts +0 -2
  155. package/dist/eval/tools/glob.js +0 -163
  156. package/dist/eval/tools/grep.d.ts +0 -2
  157. package/dist/eval/tools/grep.js +0 -152
  158. package/dist/eval/tools/index.d.ts +0 -7
  159. package/dist/eval/tools/index.js +0 -35
  160. package/dist/eval/tools/read.d.ts +0 -2
  161. package/dist/eval/tools/read.js +0 -122
  162. package/dist/eval/tools/types.d.ts +0 -49
  163. package/dist/eval/tools/types.js +0 -41
  164. package/dist/eval/tools/write.d.ts +0 -2
  165. package/dist/eval/tools/write.js +0 -92
  166. package/dist/eval/types.d.ts +0 -561
  167. package/dist/eval/types.js +0 -47
  168. package/dist/eval/verifiers/judge.d.ts +0 -40
  169. package/dist/eval/verifiers/judge.js +0 -256
  170. package/dist/eval/verifiers/rules.d.ts +0 -24
  171. package/dist/eval/verifiers/rules.js +0 -218
  172. package/dist/eval/verifiers/structural.d.ts +0 -14
  173. package/dist/eval/verifiers/structural.js +0 -171
  174. package/dist/eval/verifiers/traceability.d.ts +0 -23
  175. package/dist/eval/verifiers/traceability.js +0 -84
  176. package/dist/eval/verifiers/workflow-consistency.d.ts +0 -21
  177. package/dist/eval/verifiers/workflow-consistency.js +0 -225
  178. package/dist/eval/workflow-corpus.d.ts +0 -7
  179. package/dist/eval/workflow-corpus.js +0 -207
  180. package/dist/feature-system.d.ts +0 -42
  181. package/dist/feature-system.js +0 -432
  182. package/dist/internal/knowledge-digest.d.ts +0 -7
  183. package/dist/internal/knowledge-digest.js +0 -93
@@ -1,256 +0,0 @@
1
- /**
2
- * LLM judge verifier — Step 3.
3
- *
4
- * Given an artifact and the stage's rubric, runs N judge samples (default
5
- * median-of-3) against the configured LLM, aggregates the per-check
6
- * scores, and returns one VerifierResult per rubric check plus one
7
- * aggregate result covering the whole stage.
8
- *
9
- * Deterministic pieces (JSON parsing, aggregation, scoring) are kept pure
10
- * so unit tests inject a stub EvalLlmClient and assert on the aggregate
11
- * math without touching the network.
12
- */
13
- import { EvalLlmError } from "../llm-client.js";
14
- import { computeUsageUsd } from "../cost-guard.js";
15
- const SCALE_MIN = 1;
16
- const SCALE_MAX = 5;
17
- const SYSTEM_PREAMBLE = `You are a strict reviewer for software engineering artifacts. ` +
18
- `You will receive a rubric and an artifact. ` +
19
- `Score each rubric check on an integer 1..5 scale, where:\n` +
20
- ` 1 = does not meet the bar at all\n` +
21
- ` 2 = barely meets the bar, major gaps\n` +
22
- ` 3 = partially meets the bar, noticeable gaps\n` +
23
- ` 4 = mostly meets the bar, small gaps\n` +
24
- ` 5 = fully meets the bar\n` +
25
- `Respond with JSON only (no prose, no markdown fences). ` +
26
- `Shape: {"scores": {"<check-id>": 1..5, ...}, "rationales": {"<check-id>": "one sentence", ...}}. ` +
27
- `Include every check id in both maps. Use integer scores only.`;
28
- function median(values) {
29
- if (values.length === 0)
30
- return 0;
31
- const sorted = [...values].sort((a, b) => a - b);
32
- const mid = Math.floor(sorted.length / 2);
33
- if (sorted.length % 2 === 1)
34
- return sorted[mid];
35
- return ((sorted[mid - 1] + sorted[mid]) / 2);
36
- }
37
- function mean(values) {
38
- if (values.length === 0)
39
- return 0;
40
- return values.reduce((acc, v) => acc + v, 0) / values.length;
41
- }
42
- function clampScore(raw) {
43
- if (typeof raw !== "number" || !Number.isFinite(raw))
44
- return undefined;
45
- const clamped = Math.round(Math.min(Math.max(raw, SCALE_MIN), SCALE_MAX));
46
- return clamped;
47
- }
48
- function stripFences(raw) {
49
- const trimmed = raw.trim();
50
- if (!trimmed.startsWith("```"))
51
- return trimmed;
52
- return trimmed.replace(/^```(?:json)?\s*/i, "").replace(/```\s*$/i, "").trim();
53
- }
54
- /**
55
- * Parse one judge response into a JudgeSample. The parser is intentionally
56
- * forgiving with rationales (missing -> empty string) but strict with
57
- * scores: missing or non-numeric entries are dropped and the coverage
58
- * flag on the aggregate flips to false.
59
- */
60
- export function parseJudgeResponse(content, rubric) {
61
- let parsed;
62
- try {
63
- parsed = JSON.parse(stripFences(content));
64
- }
65
- catch (err) {
66
- throw new Error(`Judge response was not valid JSON: ${err instanceof Error ? err.message : String(err)}`);
67
- }
68
- if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
69
- throw new Error("Judge response must be a JSON object with scores/rationales maps.");
70
- }
71
- const rawScores = parsed.scores;
72
- const rawRationales = parsed.rationales;
73
- if (!rawScores || typeof rawScores !== "object" || Array.isArray(rawScores)) {
74
- throw new Error('Judge response missing "scores" object.');
75
- }
76
- const scores = {};
77
- const rationales = {};
78
- for (const check of rubric.checks) {
79
- const rawScore = rawScores[check.id];
80
- const clamped = clampScore(rawScore);
81
- if (clamped !== undefined)
82
- scores[check.id] = clamped;
83
- let rationale = "";
84
- if (rawRationales && typeof rawRationales === "object" && !Array.isArray(rawRationales)) {
85
- const raw = rawRationales[check.id];
86
- if (typeof raw === "string")
87
- rationale = raw.trim();
88
- }
89
- rationales[check.id] = rationale;
90
- }
91
- return { scores, rationales };
92
- }
93
- function aggregateSamples(rubric, samples) {
94
- return rubric.checks.map((check) => {
95
- const values = [];
96
- let covered = true;
97
- for (const sample of samples) {
98
- const value = sample.scores[check.id];
99
- if (typeof value === "number")
100
- values.push(value);
101
- else
102
- covered = false;
103
- }
104
- return {
105
- checkId: check.id,
106
- samples: values,
107
- median: median(values),
108
- mean: Number(mean(values).toFixed(4)),
109
- coverage: covered && samples.length > 0
110
- };
111
- });
112
- }
113
- function buildMessages(artifact, rubric) {
114
- const rubricLines = rubric.checks.map((check) => {
115
- const scale = check.scale ? ` (${check.scale})` : "";
116
- const critical = check.critical ? " [critical]" : "";
117
- return `- ${check.id}${critical}: ${check.prompt}${scale}`;
118
- });
119
- const userContent = [
120
- `Rubric (stage=${rubric.stage}, rubric=${rubric.id}):`,
121
- ...rubricLines,
122
- ``,
123
- `Artifact:`,
124
- `"""`,
125
- artifact,
126
- `"""`,
127
- ``,
128
- `Return JSON only.`
129
- ].join("\n");
130
- return [
131
- { role: "system", content: SYSTEM_PREAMBLE },
132
- { role: "user", content: userContent }
133
- ];
134
- }
135
- function sumUsage(usages) {
136
- let promptTokens = 0;
137
- let completionTokens = 0;
138
- let totalTokens = 0;
139
- for (const u of usages) {
140
- promptTokens += u.promptTokens;
141
- completionTokens += u.completionTokens;
142
- totalTokens += u.totalTokens;
143
- }
144
- return { promptTokens, completionTokens, totalTokens };
145
- }
146
- /** Run the judge against an artifact and return per-sample + aggregate data. */
147
- export async function runJudge(options) {
148
- const { artifact, rubric, config, client, caseHint, baseSeed } = options;
149
- const rawSamples = caseHint?.samples ?? config.judgeSamples ?? 3;
150
- if (!Number.isInteger(rawSamples) || rawSamples < 1) {
151
- throw new Error(`Invalid judge sample count: ${rawSamples}. Use a positive integer (1, 3, 5).`);
152
- }
153
- if (rawSamples % 2 === 0) {
154
- throw new Error(`Judge sample count must be odd (so a true median exists), got: ${rawSamples}.`);
155
- }
156
- const started = Date.now();
157
- const model = config.judgeModel ?? config.model;
158
- const temperature = config.judgeTemperature ?? 0;
159
- const messages = buildMessages(artifact, rubric);
160
- const samples = [];
161
- const usages = [];
162
- for (let i = 0; i < rawSamples; i += 1) {
163
- let response;
164
- try {
165
- response = await client.chat({
166
- model,
167
- messages,
168
- temperature,
169
- responseFormatJson: true,
170
- ...(baseSeed !== undefined ? { seed: baseSeed + i } : {}),
171
- timeoutMs: config.timeoutMs
172
- });
173
- }
174
- catch (err) {
175
- if (err instanceof EvalLlmError)
176
- throw err;
177
- throw err;
178
- }
179
- usages.push(response.usage);
180
- samples.push(parseJudgeResponse(response.content, rubric));
181
- }
182
- const aggregates = aggregateSamples(rubric, samples);
183
- const usage = sumUsage(usages);
184
- const usageUsd = computeUsageUsd(model, usage, { tokenPricing: config.tokenPricing });
185
- return {
186
- rubricId: rubric.id,
187
- samples,
188
- aggregates,
189
- usageUsd,
190
- durationMs: Date.now() - started
191
- };
192
- }
193
- function verifierIdFor(check) {
194
- return `judge:${check.id}`;
195
- }
196
- /**
197
- * Convert a JudgeInvocation into VerifierResult[] for the runner. One
198
- * result per rubric check (score 0..1 normalized from the 1..5 median) +
199
- * one "coverage" result that flips to `ok:false` when any sample failed
200
- * to emit a score for a check.
201
- */
202
- export function judgeResultsToVerifiers(rubric, invocation, config, caseHint) {
203
- const out = [];
204
- const failIfCriticalBelow = config.regression.failIfCriticalBelow;
205
- for (const aggregate of invocation.aggregates) {
206
- const check = rubric.checks.find((c) => c.id === aggregate.checkId);
207
- if (!check)
208
- continue;
209
- const normalized = (aggregate.median - SCALE_MIN) / (SCALE_MAX - SCALE_MIN);
210
- const caseMinimum = caseHint?.minimumScores?.[check.id];
211
- const criticalFloor = check.critical ? failIfCriticalBelow : undefined;
212
- const floors = [];
213
- if (typeof caseMinimum === "number")
214
- floors.push(caseMinimum);
215
- if (typeof criticalFloor === "number")
216
- floors.push(criticalFloor);
217
- const floor = floors.length > 0 ? Math.max(...floors) : undefined;
218
- const ok = !aggregate.coverage
219
- ? false
220
- : floor === undefined || aggregate.median >= floor;
221
- out.push({
222
- kind: "judge",
223
- id: verifierIdFor(check),
224
- ok,
225
- score: Number(Math.max(0, Math.min(1, normalized)).toFixed(4)),
226
- message: ok
227
- ? `median=${aggregate.median.toFixed(2)} across ${aggregate.samples.length} sample(s)`
228
- : aggregate.coverage
229
- ? `median=${aggregate.median.toFixed(2)} below floor=${floor?.toFixed(2) ?? "n/a"}`
230
- : `judge did not score every sample (${aggregate.samples.length}/${invocation.samples.length}); treated as failing`,
231
- details: {
232
- median: aggregate.median,
233
- mean: aggregate.mean,
234
- samples: aggregate.samples,
235
- coverage: aggregate.coverage,
236
- critical: check.critical === true,
237
- caseMinimum: caseMinimum ?? null,
238
- criticalFloor: criticalFloor ?? null
239
- }
240
- });
241
- }
242
- const required = caseHint?.requiredChecks ?? [];
243
- const covered = new Set(rubric.checks.map((c) => c.id));
244
- const missingRequired = required.filter((id) => !covered.has(id));
245
- if (missingRequired.length > 0) {
246
- out.push({
247
- kind: "judge",
248
- id: "judge:required-checks",
249
- ok: false,
250
- score: 0,
251
- message: `Rubric is missing required check id(s): ${missingRequired.join(", ")}`,
252
- details: { missing: missingRequired, rubricId: rubric.id }
253
- });
254
- }
255
- return out;
256
- }
@@ -1,24 +0,0 @@
1
- /**
2
- * Rule-based verifier: deterministic, zero-LLM checks that are richer than
3
- * structural heading/length assertions. Each rule produces exactly one
4
- * `VerifierResult` so baselines diff at the check level, and authoring a
5
- * rule sideways in YAML never silently skips.
6
- *
7
- * Semantics:
8
- *
9
- * - All substring matching is case-insensitive. Regex matching uses the
10
- * flags declared on the rule (default `"i"`).
11
- * - Rules operate on the artifact BODY (frontmatter stripped), mirroring
12
- * the structural verifier so min/max counts and length checks agree on
13
- * what "body" means.
14
- * - `uniqueBulletsInSection` scans every section (heading, case-insensitive
15
- * substring match) and flags duplicate top-level bullets ("- item"). The
16
- * search stops at the next heading of equal or lower depth.
17
- */
18
- import type { RulesExpected, VerifierResult } from "../types.js";
19
- /**
20
- * Run every configured rule check against the artifact body. Returns `[]`
21
- * when `expected` is undefined or empty so the runner can distinguish
22
- * "no rules declared" from "all rules passed".
23
- */
24
- export declare function verifyRules(artifact: string, expected: RulesExpected | undefined): VerifierResult[];
@@ -1,218 +0,0 @@
1
- import { splitFrontmatter } from "./structural.js";
2
- function slugify(input) {
3
- return (input
4
- .toLowerCase()
5
- .replace(/[^a-z0-9]+/g, "-")
6
- .replace(/(^-|-$)/g, "")
7
- .slice(0, 64) || "rule");
8
- }
9
- function result(id, ok, message, details) {
10
- return {
11
- kind: "rules",
12
- id,
13
- ok,
14
- score: ok ? 1 : 0,
15
- message,
16
- ...(details !== undefined ? { details } : {})
17
- };
18
- }
19
- function countOccurrences(haystack, needle) {
20
- if (needle.length === 0)
21
- return 0;
22
- let index = 0;
23
- let count = 0;
24
- while (true) {
25
- const at = haystack.indexOf(needle, index);
26
- if (at < 0)
27
- return count;
28
- count += 1;
29
- index = at + needle.length;
30
- }
31
- }
32
- function compileRegex(rule) {
33
- const flags = rule.flags ?? "i";
34
- try {
35
- return new RegExp(rule.pattern, flags);
36
- }
37
- catch (err) {
38
- throw new Error(`Invalid regex for rule "${rule.description ?? rule.pattern}" ` +
39
- `(pattern=${JSON.stringify(rule.pattern)}, flags=${JSON.stringify(flags)}): ` +
40
- (err instanceof Error ? err.message : String(err)));
41
- }
42
- }
43
- function ruleLabel(rule) {
44
- return rule.description?.trim() || rule.pattern;
45
- }
46
- function checkMustContain(needles, body) {
47
- const bodyLower = body.toLowerCase();
48
- return needles.map((needle) => {
49
- const found = bodyLower.includes(needle.toLowerCase());
50
- return result(`rules:contains:${slugify(needle)}`, found, found
51
- ? `Required phrase "${needle}" present.`
52
- : `Required phrase "${needle}" missing from body.`, { phrase: needle });
53
- });
54
- }
55
- function checkMustNotContain(needles, body) {
56
- const bodyLower = body.toLowerCase();
57
- return needles.map((needle) => {
58
- const lowered = needle.toLowerCase();
59
- const occurrences = countOccurrences(bodyLower, lowered);
60
- const ok = occurrences === 0;
61
- return result(`rules:not-contains:${slugify(needle)}`, ok, ok
62
- ? `Forbidden phrase "${needle}" absent (as required).`
63
- : `Forbidden phrase "${needle}" appears ${occurrences} time(s).`, { phrase: needle, occurrences });
64
- });
65
- }
66
- function checkRegexRequired(rules, body) {
67
- return rules.map((rule) => {
68
- const label = ruleLabel(rule);
69
- const regex = compileRegex(rule);
70
- const matches = body.match(new RegExp(regex.source, withGlobal(regex.flags)));
71
- const count = matches ? matches.length : 0;
72
- const ok = count > 0;
73
- return result(`rules:regex-required:${slugify(label)}`, ok, ok
74
- ? `Required pattern /${rule.pattern}/ matched ${count} time(s).`
75
- : `Required pattern /${rule.pattern}/ did not match.`, { pattern: rule.pattern, flags: rule.flags ?? "i", matches: count });
76
- });
77
- }
78
- function checkRegexForbidden(rules, body) {
79
- return rules.map((rule) => {
80
- const label = ruleLabel(rule);
81
- const regex = compileRegex(rule);
82
- const matches = body.match(new RegExp(regex.source, withGlobal(regex.flags)));
83
- const count = matches ? matches.length : 0;
84
- const ok = count === 0;
85
- return result(`rules:regex-forbidden:${slugify(label)}`, ok, ok
86
- ? `Forbidden pattern /${rule.pattern}/ absent.`
87
- : `Forbidden pattern /${rule.pattern}/ matched ${count} time(s).`, { pattern: rule.pattern, flags: rule.flags ?? "i", matches: count });
88
- });
89
- }
90
- function withGlobal(flags) {
91
- return flags.includes("g") ? flags : `${flags}g`;
92
- }
93
- function checkMinOccurrences(bounds, body) {
94
- const bodyLower = body.toLowerCase();
95
- return Object.entries(bounds).map(([needle, min]) => {
96
- const occurrences = countOccurrences(bodyLower, needle.toLowerCase());
97
- const ok = occurrences >= min;
98
- return result(`rules:min-occurrences:${slugify(needle)}`, ok, ok
99
- ? `Phrase "${needle}" appears ${occurrences} time(s) (>= ${min}).`
100
- : `Phrase "${needle}" appears ${occurrences} time(s); expected at least ${min}.`, { phrase: needle, occurrences, min });
101
- });
102
- }
103
- function checkMaxOccurrences(bounds, body) {
104
- const bodyLower = body.toLowerCase();
105
- return Object.entries(bounds).map(([needle, max]) => {
106
- const occurrences = countOccurrences(bodyLower, needle.toLowerCase());
107
- const ok = occurrences <= max;
108
- return result(`rules:max-occurrences:${slugify(needle)}`, ok, ok
109
- ? `Phrase "${needle}" appears ${occurrences} time(s) (<= ${max}).`
110
- : `Phrase "${needle}" appears ${occurrences} time(s); expected at most ${max}.`, { phrase: needle, occurrences, max });
111
- });
112
- }
113
- function sliceBySection(body) {
114
- const lines = body.split(/\r?\n/);
115
- const slices = [];
116
- let current = null;
117
- for (const rawLine of lines) {
118
- const line = rawLine.trimStart();
119
- const match = line.match(/^(#{1,6})\s+(.+?)\s*$/);
120
- if (match) {
121
- if (current) {
122
- slices.push({
123
- heading: current.heading,
124
- depth: current.depth,
125
- body: current.body.join("\n")
126
- });
127
- }
128
- current = { heading: match[2].trim(), depth: match[1].length, body: [] };
129
- }
130
- else if (current) {
131
- current.body.push(rawLine);
132
- }
133
- }
134
- if (current) {
135
- slices.push({
136
- heading: current.heading,
137
- depth: current.depth,
138
- body: current.body.join("\n")
139
- });
140
- }
141
- return slices;
142
- }
143
- function extractTopLevelBullets(sectionBody) {
144
- const bullets = [];
145
- for (const rawLine of sectionBody.split(/\r?\n/)) {
146
- const line = rawLine.replace(/\s+$/, "");
147
- const leading = line.match(/^(\s*)[-*]\s+(.+)$/);
148
- if (!leading)
149
- continue;
150
- if (leading[1].length > 0)
151
- continue;
152
- bullets.push(leading[2].trim());
153
- }
154
- return bullets;
155
- }
156
- function checkUniqueBulletsInSection(sections, body) {
157
- const slices = sliceBySection(body);
158
- return sections.map((needle) => {
159
- const lowerNeedle = needle.toLowerCase();
160
- const slice = slices.find((s) => s.heading.toLowerCase().includes(lowerNeedle));
161
- if (!slice) {
162
- return result(`rules:unique-in-section:${slugify(needle)}`, false, `Section matching "${needle}" not found; cannot check uniqueness.`, { section: needle, found: false });
163
- }
164
- const bullets = extractTopLevelBullets(slice.body);
165
- const seen = new Map();
166
- for (const bullet of bullets) {
167
- const key = bullet.toLowerCase();
168
- seen.set(key, (seen.get(key) ?? 0) + 1);
169
- }
170
- const duplicates = [...seen.entries()]
171
- .filter(([, count]) => count > 1)
172
- .map(([entry, count]) => ({ entry, count }));
173
- const ok = duplicates.length === 0;
174
- return result(`rules:unique-in-section:${slugify(needle)}`, ok, ok
175
- ? `Section "${slice.heading}" has ${bullets.length} unique bullet(s).`
176
- : `Section "${slice.heading}" has duplicate bullet(s): ${duplicates
177
- .map((d) => `"${d.entry}" x${d.count}`)
178
- .join(", ")}.`, {
179
- section: slice.heading,
180
- bullets: bullets.length,
181
- duplicates
182
- });
183
- });
184
- }
185
- /**
186
- * Run every configured rule check against the artifact body. Returns `[]`
187
- * when `expected` is undefined or empty so the runner can distinguish
188
- * "no rules declared" from "all rules passed".
189
- */
190
- export function verifyRules(artifact, expected) {
191
- if (!expected)
192
- return [];
193
- const split = splitFrontmatter(artifact);
194
- const body = split.body;
195
- const results = [];
196
- if (expected.mustContain?.length) {
197
- results.push(...checkMustContain(expected.mustContain, body));
198
- }
199
- if (expected.mustNotContain?.length) {
200
- results.push(...checkMustNotContain(expected.mustNotContain, body));
201
- }
202
- if (expected.regexRequired?.length) {
203
- results.push(...checkRegexRequired(expected.regexRequired, body));
204
- }
205
- if (expected.regexForbidden?.length) {
206
- results.push(...checkRegexForbidden(expected.regexForbidden, body));
207
- }
208
- if (expected.minOccurrences && Object.keys(expected.minOccurrences).length) {
209
- results.push(...checkMinOccurrences(expected.minOccurrences, body));
210
- }
211
- if (expected.maxOccurrences && Object.keys(expected.maxOccurrences).length) {
212
- results.push(...checkMaxOccurrences(expected.maxOccurrences, body));
213
- }
214
- if (expected.uniqueBulletsInSection?.length) {
215
- results.push(...checkUniqueBulletsInSection(expected.uniqueBulletsInSection, body));
216
- }
217
- return results;
218
- }
@@ -1,14 +0,0 @@
1
- import type { StructuralExpected, VerifierResult } from "../types.js";
2
- export interface ArtifactSplit {
3
- hasFrontmatter: boolean;
4
- frontmatterRaw: string;
5
- frontmatterParsed?: Record<string, unknown>;
6
- body: string;
7
- }
8
- export declare function splitFrontmatter(artifact: string): ArtifactSplit;
9
- /**
10
- * Run every configured structural check against the artifact text.
11
- * Returns [] when `expected` is undefined/empty so the runner can treat
12
- * "no structural expectations" as "no verifier results" rather than "pass".
13
- */
14
- export declare function verifyStructural(artifact: string, expected: StructuralExpected | undefined): VerifierResult[];
@@ -1,171 +0,0 @@
1
- /**
2
- * Structural verifier: deterministic, zero-LLM checks against a
3
- * single markdown artifact. Each structural expectation produces one
4
- * `VerifierResult` so baselines diff cleanly at the check level rather than
5
- * lumping everything into a single boolean.
6
- *
7
- * Design notes:
8
- *
9
- * - All pattern matching is case-insensitive. Authoring a check as
10
- * `"Directions"` matches `## Directions` and `### directions-suggested`.
11
- * - Frontmatter detection is permissive: it must start at byte 0 with `---\n`
12
- * and close on a subsequent `---` line. Anything else is treated as "no
13
- * frontmatter", which fails every `requiredFrontmatterKeys` entry
14
- * deterministically.
15
- * - `minLines`/`maxLines` intentionally exclude frontmatter so a rewrite that
16
- * adds metadata does not accidentally drop the body below the floor.
17
- * - Scoring: each check scores 0 or 1. The case `passed` becomes the AND of
18
- * all individual `ok` flags. This keeps the structural verifier
19
- * deterministic; the 0..1 rubric scale shows up later in the LLM judge.
20
- */
21
- import { parse as parseYaml } from "yaml";
22
- const FRONTMATTER_OPEN = /^---\r?\n/;
23
- const FRONTMATTER_CLOSE = /\r?\n---\r?(?:\n|$)/;
24
- function slugify(input) {
25
- return input
26
- .toLowerCase()
27
- .replace(/[^a-z0-9]+/g, "-")
28
- .replace(/(^-|-$)/g, "")
29
- .slice(0, 64);
30
- }
31
- export function splitFrontmatter(artifact) {
32
- if (!FRONTMATTER_OPEN.test(artifact)) {
33
- return { hasFrontmatter: false, frontmatterRaw: "", body: artifact };
34
- }
35
- const afterOpen = artifact.replace(FRONTMATTER_OPEN, "");
36
- const closeMatch = afterOpen.match(FRONTMATTER_CLOSE);
37
- if (!closeMatch || closeMatch.index === undefined) {
38
- return { hasFrontmatter: false, frontmatterRaw: "", body: artifact };
39
- }
40
- const frontmatterRaw = afterOpen.slice(0, closeMatch.index);
41
- const body = afterOpen.slice(closeMatch.index + closeMatch[0].length);
42
- let frontmatterParsed;
43
- try {
44
- const parsed = parseYaml(frontmatterRaw);
45
- if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
46
- frontmatterParsed = parsed;
47
- }
48
- }
49
- catch {
50
- frontmatterParsed = undefined;
51
- }
52
- return {
53
- hasFrontmatter: true,
54
- frontmatterRaw,
55
- frontmatterParsed,
56
- body
57
- };
58
- }
59
- function extractHeadingLines(body) {
60
- return body
61
- .split(/\r?\n/)
62
- .map((line) => line.trimStart())
63
- .filter((line) => /^#{1,6}\s+\S/.test(line));
64
- }
65
- function result(id, ok, message, details) {
66
- return {
67
- kind: "structural",
68
- id,
69
- ok,
70
- score: ok ? 1 : 0,
71
- message,
72
- ...(details !== undefined ? { details } : {})
73
- };
74
- }
75
- function checkRequiredSections(sections, body) {
76
- const headings = extractHeadingLines(body).map((line) => line.toLowerCase());
77
- return sections.map((section) => {
78
- const needle = section.toLowerCase().trim();
79
- const found = headings.some((heading) => heading.includes(needle));
80
- return result(`structural:section:${slugify(section)}`, found, found
81
- ? `Section matching "${section}" present.`
82
- : `No heading contains "${section}".`, { pattern: section, searchedHeadings: headings.length });
83
- });
84
- }
85
- function checkForbiddenPatterns(patterns, body) {
86
- const bodyLower = body.toLowerCase();
87
- return patterns.map((pattern) => {
88
- const needle = pattern.toLowerCase();
89
- const hits = countOccurrences(bodyLower, needle);
90
- const ok = hits === 0;
91
- return result(`structural:forbidden:${slugify(pattern)}`, ok, ok
92
- ? `Pattern "${pattern}" absent (as required).`
93
- : `Pattern "${pattern}" appears ${hits} time(s); remove.`, { pattern, occurrences: hits });
94
- });
95
- }
96
- function countOccurrences(haystack, needle) {
97
- if (needle.length === 0)
98
- return 0;
99
- let index = 0;
100
- let count = 0;
101
- while (true) {
102
- const at = haystack.indexOf(needle, index);
103
- if (at < 0)
104
- return count;
105
- count += 1;
106
- index = at + needle.length;
107
- }
108
- }
109
- function checkLengthBounds(expected, body) {
110
- const results = [];
111
- const lineCount = body.length === 0 ? 0 : body.split(/\r?\n/).length;
112
- const charCount = body.length;
113
- if (expected.minLines !== undefined || expected.maxLines !== undefined) {
114
- const min = expected.minLines;
115
- const max = expected.maxLines;
116
- const withinMin = min === undefined || lineCount >= min;
117
- const withinMax = max === undefined || lineCount <= max;
118
- const ok = withinMin && withinMax;
119
- results.push(result("structural:length:lines", ok, ok
120
- ? `Body has ${lineCount} line(s), within bounds.`
121
- : buildOutOfRangeMessage("line", lineCount, min, max), { lineCount, minLines: min, maxLines: max }));
122
- }
123
- if (expected.minChars !== undefined || expected.maxChars !== undefined) {
124
- const min = expected.minChars;
125
- const max = expected.maxChars;
126
- const withinMin = min === undefined || charCount >= min;
127
- const withinMax = max === undefined || charCount <= max;
128
- const ok = withinMin && withinMax;
129
- results.push(result("structural:length:chars", ok, ok
130
- ? `Body has ${charCount} char(s), within bounds.`
131
- : buildOutOfRangeMessage("char", charCount, min, max), { charCount, minChars: min, maxChars: max }));
132
- }
133
- return results;
134
- }
135
- function buildOutOfRangeMessage(unit, actual, min, max) {
136
- const lo = min === undefined ? "0" : String(min);
137
- const hi = max === undefined ? "∞" : String(max);
138
- return `Body has ${actual} ${unit}(s); expected ${lo}..${hi}.`;
139
- }
140
- function checkFrontmatterKeys(keys, split) {
141
- if (!split.hasFrontmatter || !split.frontmatterParsed) {
142
- return keys.map((key) => result(`structural:frontmatter:${slugify(key)}`, false, `Frontmatter key "${key}" missing (no parseable frontmatter).`, { key, frontmatterPresent: split.hasFrontmatter }));
143
- }
144
- const present = new Set(Object.keys(split.frontmatterParsed));
145
- return keys.map((key) => {
146
- const ok = present.has(key);
147
- return result(`structural:frontmatter:${slugify(key)}`, ok, ok ? `Frontmatter key "${key}" present.` : `Frontmatter key "${key}" missing.`, { key });
148
- });
149
- }
150
- /**
151
- * Run every configured structural check against the artifact text.
152
- * Returns [] when `expected` is undefined/empty so the runner can treat
153
- * "no structural expectations" as "no verifier results" rather than "pass".
154
- */
155
- export function verifyStructural(artifact, expected) {
156
- if (!expected)
157
- return [];
158
- const split = splitFrontmatter(artifact);
159
- const results = [];
160
- if (expected.requiredSections?.length) {
161
- results.push(...checkRequiredSections(expected.requiredSections, split.body));
162
- }
163
- if (expected.forbiddenPatterns?.length) {
164
- results.push(...checkForbiddenPatterns(expected.forbiddenPatterns, split.body));
165
- }
166
- results.push(...checkLengthBounds(expected, split.body));
167
- if (expected.requiredFrontmatterKeys?.length) {
168
- results.push(...checkFrontmatterKeys(expected.requiredFrontmatterKeys, split));
169
- }
170
- return results;
171
- }