cclaw-cli 0.48.35 → 0.51.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/README.md +54 -82
  2. package/dist/artifact-linter.d.ts +4 -0
  3. package/dist/artifact-linter.js +24 -3
  4. package/dist/cli.d.ts +1 -19
  5. package/dist/cli.js +49 -495
  6. package/dist/constants.d.ts +2 -13
  7. package/dist/constants.js +1 -46
  8. package/dist/content/closeout-guidance.d.ts +14 -0
  9. package/dist/content/closeout-guidance.js +42 -0
  10. package/dist/content/core-agents.js +51 -9
  11. package/dist/content/decision-protocol.d.ts +12 -0
  12. package/dist/content/decision-protocol.js +20 -0
  13. package/dist/content/diff-command.d.ts +1 -2
  14. package/dist/content/diff-command.js +8 -94
  15. package/dist/content/examples.d.ts +4 -10
  16. package/dist/content/examples.js +10 -20
  17. package/dist/content/hook-events.js +2 -2
  18. package/dist/content/hook-inline-snippets.d.ts +5 -2
  19. package/dist/content/hook-inline-snippets.js +33 -1
  20. package/dist/content/hook-manifest.d.ts +3 -4
  21. package/dist/content/hook-manifest.js +11 -12
  22. package/dist/content/hooks.js +2 -0
  23. package/dist/content/ideate-command.d.ts +2 -0
  24. package/dist/content/ideate-command.js +31 -25
  25. package/dist/content/iron-laws.d.ts +5 -5
  26. package/dist/content/iron-laws.js +5 -5
  27. package/dist/content/learnings.d.ts +3 -4
  28. package/dist/content/learnings.js +24 -50
  29. package/dist/content/meta-skill.js +31 -24
  30. package/dist/content/next-command.js +38 -38
  31. package/dist/content/node-hooks.js +17 -343
  32. package/dist/content/opencode-plugin.js +2 -100
  33. package/dist/content/research-playbooks.js +14 -14
  34. package/dist/content/review-loop.d.ts +2 -0
  35. package/dist/content/review-loop.js +8 -0
  36. package/dist/content/session-hooks.js +14 -46
  37. package/dist/content/skills.d.ts +0 -5
  38. package/dist/content/skills.js +53 -128
  39. package/dist/content/stage-common-guidance.d.ts +0 -1
  40. package/dist/content/stage-common-guidance.js +15 -14
  41. package/dist/content/stage-schema.d.ts +26 -1
  42. package/dist/content/stage-schema.js +121 -40
  43. package/dist/content/stages/_lint-metadata/index.js +9 -15
  44. package/dist/content/stages/brainstorm.js +22 -43
  45. package/dist/content/stages/design.js +37 -57
  46. package/dist/content/stages/plan.js +22 -13
  47. package/dist/content/stages/review.js +24 -27
  48. package/dist/content/stages/scope.js +34 -46
  49. package/dist/content/stages/ship.js +7 -4
  50. package/dist/content/stages/spec.js +20 -9
  51. package/dist/content/stages/tdd.js +64 -44
  52. package/dist/content/start-command.js +10 -12
  53. package/dist/content/status-command.d.ts +2 -7
  54. package/dist/content/status-command.js +19 -146
  55. package/dist/content/subagents.d.ts +0 -5
  56. package/dist/content/subagents.js +47 -28
  57. package/dist/content/templates.d.ts +1 -1
  58. package/dist/content/templates.js +126 -135
  59. package/dist/content/track-render-context.d.ts +17 -0
  60. package/dist/content/track-render-context.js +44 -0
  61. package/dist/content/tree-command.d.ts +1 -2
  62. package/dist/content/tree-command.js +4 -87
  63. package/dist/content/utility-skills.d.ts +2 -29
  64. package/dist/content/utility-skills.js +2 -1533
  65. package/dist/content/view-command.js +29 -11
  66. package/dist/delegation.d.ts +1 -1
  67. package/dist/delegation.js +5 -15
  68. package/dist/doctor-registry.js +20 -21
  69. package/dist/doctor.js +88 -408
  70. package/dist/flow-state.d.ts +3 -0
  71. package/dist/flow-state.js +2 -0
  72. package/dist/harness-adapters.d.ts +1 -1
  73. package/dist/harness-adapters.js +48 -57
  74. package/dist/install.js +128 -520
  75. package/dist/internal/advance-stage.js +3 -9
  76. package/dist/internal/compound-readiness.d.ts +1 -1
  77. package/dist/internal/compound-readiness.js +1 -1
  78. package/dist/internal/tdd-loop-status.d.ts +1 -1
  79. package/dist/internal/tdd-loop-status.js +1 -1
  80. package/dist/knowledge-store.d.ts +16 -10
  81. package/dist/knowledge-store.js +51 -15
  82. package/dist/policy.js +16 -109
  83. package/dist/run-archive.d.ts +4 -6
  84. package/dist/run-archive.js +15 -20
  85. package/dist/run-persistence.d.ts +2 -2
  86. package/dist/run-persistence.js +3 -9
  87. package/package.json +1 -2
  88. package/dist/content/archive-command.d.ts +0 -2
  89. package/dist/content/archive-command.js +0 -124
  90. package/dist/content/compound-command.d.ts +0 -5
  91. package/dist/content/compound-command.js +0 -193
  92. package/dist/content/contexts.d.ts +0 -9
  93. package/dist/content/contexts.js +0 -65
  94. package/dist/content/contracts.d.ts +0 -2
  95. package/dist/content/contracts.js +0 -51
  96. package/dist/content/doctor-references.d.ts +0 -2
  97. package/dist/content/doctor-references.js +0 -150
  98. package/dist/content/eval-scaffold.d.ts +0 -15
  99. package/dist/content/eval-scaffold.js +0 -370
  100. package/dist/content/feature-command.d.ts +0 -2
  101. package/dist/content/feature-command.js +0 -123
  102. package/dist/content/flow-map.d.ts +0 -23
  103. package/dist/content/flow-map.js +0 -134
  104. package/dist/content/harness-doc.d.ts +0 -2
  105. package/dist/content/harness-doc.js +0 -202
  106. package/dist/content/harness-playbooks.d.ts +0 -24
  107. package/dist/content/harness-playbooks.js +0 -393
  108. package/dist/content/harness-tool-refs.d.ts +0 -20
  109. package/dist/content/harness-tool-refs.js +0 -268
  110. package/dist/content/ops-command.d.ts +0 -2
  111. package/dist/content/ops-command.js +0 -71
  112. package/dist/content/protocols.d.ts +0 -7
  113. package/dist/content/protocols.js +0 -215
  114. package/dist/content/retro-command.d.ts +0 -2
  115. package/dist/content/retro-command.js +0 -165
  116. package/dist/content/rewind-command.d.ts +0 -2
  117. package/dist/content/rewind-command.js +0 -106
  118. package/dist/content/tdd-log-command.d.ts +0 -2
  119. package/dist/content/tdd-log-command.js +0 -85
  120. package/dist/eval/agents/single-shot.d.ts +0 -27
  121. package/dist/eval/agents/single-shot.js +0 -79
  122. package/dist/eval/agents/with-tools.d.ts +0 -44
  123. package/dist/eval/agents/with-tools.js +0 -261
  124. package/dist/eval/agents/workflow.d.ts +0 -31
  125. package/dist/eval/agents/workflow.js +0 -155
  126. package/dist/eval/baseline.d.ts +0 -38
  127. package/dist/eval/baseline.js +0 -282
  128. package/dist/eval/config-loader.d.ts +0 -14
  129. package/dist/eval/config-loader.js +0 -395
  130. package/dist/eval/corpus.d.ts +0 -30
  131. package/dist/eval/corpus.js +0 -330
  132. package/dist/eval/cost-guard.d.ts +0 -102
  133. package/dist/eval/cost-guard.js +0 -190
  134. package/dist/eval/diff.d.ts +0 -64
  135. package/dist/eval/diff.js +0 -323
  136. package/dist/eval/llm-client.d.ts +0 -176
  137. package/dist/eval/llm-client.js +0 -267
  138. package/dist/eval/mode.d.ts +0 -28
  139. package/dist/eval/mode.js +0 -61
  140. package/dist/eval/progress.d.ts +0 -83
  141. package/dist/eval/progress.js +0 -59
  142. package/dist/eval/report.d.ts +0 -11
  143. package/dist/eval/report.js +0 -181
  144. package/dist/eval/rubric-loader.d.ts +0 -20
  145. package/dist/eval/rubric-loader.js +0 -143
  146. package/dist/eval/runner.d.ts +0 -81
  147. package/dist/eval/runner.js +0 -746
  148. package/dist/eval/runs.d.ts +0 -41
  149. package/dist/eval/runs.js +0 -114
  150. package/dist/eval/sandbox.d.ts +0 -38
  151. package/dist/eval/sandbox.js +0 -137
  152. package/dist/eval/tools/glob.d.ts +0 -2
  153. package/dist/eval/tools/glob.js +0 -163
  154. package/dist/eval/tools/grep.d.ts +0 -2
  155. package/dist/eval/tools/grep.js +0 -152
  156. package/dist/eval/tools/index.d.ts +0 -7
  157. package/dist/eval/tools/index.js +0 -35
  158. package/dist/eval/tools/read.d.ts +0 -2
  159. package/dist/eval/tools/read.js +0 -122
  160. package/dist/eval/tools/types.d.ts +0 -49
  161. package/dist/eval/tools/types.js +0 -41
  162. package/dist/eval/tools/write.d.ts +0 -2
  163. package/dist/eval/tools/write.js +0 -92
  164. package/dist/eval/types.d.ts +0 -561
  165. package/dist/eval/types.js +0 -47
  166. package/dist/eval/verifiers/judge.d.ts +0 -40
  167. package/dist/eval/verifiers/judge.js +0 -256
  168. package/dist/eval/verifiers/rules.d.ts +0 -24
  169. package/dist/eval/verifiers/rules.js +0 -218
  170. package/dist/eval/verifiers/structural.d.ts +0 -14
  171. package/dist/eval/verifiers/structural.js +0 -171
  172. package/dist/eval/verifiers/traceability.d.ts +0 -23
  173. package/dist/eval/verifiers/traceability.js +0 -84
  174. package/dist/eval/verifiers/workflow-consistency.d.ts +0 -21
  175. package/dist/eval/verifiers/workflow-consistency.js +0 -225
  176. package/dist/eval/workflow-corpus.d.ts +0 -7
  177. package/dist/eval/workflow-corpus.js +0 -207
  178. package/dist/feature-system.d.ts +0 -42
  179. package/dist/feature-system.js +0 -432
  180. package/dist/internal/knowledge-digest.d.ts +0 -7
  181. package/dist/internal/knowledge-digest.js +0 -93
@@ -1,256 +0,0 @@
1
- /**
2
- * LLM judge verifier — Step 3.
3
- *
4
- * Given an artifact and the stage's rubric, runs N judge samples (default
5
- * median-of-3) against the configured LLM, aggregates the per-check
6
- * scores, and returns one VerifierResult per rubric check plus one
7
- * aggregate result covering the whole stage.
8
- *
9
- * Deterministic pieces (JSON parsing, aggregation, scoring) are kept pure
10
- * so unit tests inject a stub EvalLlmClient and assert on the aggregate
11
- * math without touching the network.
12
- */
13
- import { EvalLlmError } from "../llm-client.js";
14
- import { computeUsageUsd } from "../cost-guard.js";
15
- const SCALE_MIN = 1;
16
- const SCALE_MAX = 5;
17
- const SYSTEM_PREAMBLE = `You are a strict reviewer for software engineering artifacts. ` +
18
- `You will receive a rubric and an artifact. ` +
19
- `Score each rubric check on an integer 1..5 scale, where:\n` +
20
- ` 1 = does not meet the bar at all\n` +
21
- ` 2 = barely meets the bar, major gaps\n` +
22
- ` 3 = partially meets the bar, noticeable gaps\n` +
23
- ` 4 = mostly meets the bar, small gaps\n` +
24
- ` 5 = fully meets the bar\n` +
25
- `Respond with JSON only (no prose, no markdown fences). ` +
26
- `Shape: {"scores": {"<check-id>": 1..5, ...}, "rationales": {"<check-id>": "one sentence", ...}}. ` +
27
- `Include every check id in both maps. Use integer scores only.`;
28
- function median(values) {
29
- if (values.length === 0)
30
- return 0;
31
- const sorted = [...values].sort((a, b) => a - b);
32
- const mid = Math.floor(sorted.length / 2);
33
- if (sorted.length % 2 === 1)
34
- return sorted[mid];
35
- return ((sorted[mid - 1] + sorted[mid]) / 2);
36
- }
37
- function mean(values) {
38
- if (values.length === 0)
39
- return 0;
40
- return values.reduce((acc, v) => acc + v, 0) / values.length;
41
- }
42
- function clampScore(raw) {
43
- if (typeof raw !== "number" || !Number.isFinite(raw))
44
- return undefined;
45
- const clamped = Math.round(Math.min(Math.max(raw, SCALE_MIN), SCALE_MAX));
46
- return clamped;
47
- }
48
- function stripFences(raw) {
49
- const trimmed = raw.trim();
50
- if (!trimmed.startsWith("```"))
51
- return trimmed;
52
- return trimmed.replace(/^```(?:json)?\s*/i, "").replace(/```\s*$/i, "").trim();
53
- }
54
- /**
55
- * Parse one judge response into a JudgeSample. The parser is intentionally
56
- * forgiving with rationales (missing -> empty string) but strict with
57
- * scores: missing or non-numeric entries are dropped and the coverage
58
- * flag on the aggregate flips to false.
59
- */
60
- export function parseJudgeResponse(content, rubric) {
61
- let parsed;
62
- try {
63
- parsed = JSON.parse(stripFences(content));
64
- }
65
- catch (err) {
66
- throw new Error(`Judge response was not valid JSON: ${err instanceof Error ? err.message : String(err)}`);
67
- }
68
- if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
69
- throw new Error("Judge response must be a JSON object with scores/rationales maps.");
70
- }
71
- const rawScores = parsed.scores;
72
- const rawRationales = parsed.rationales;
73
- if (!rawScores || typeof rawScores !== "object" || Array.isArray(rawScores)) {
74
- throw new Error('Judge response missing "scores" object.');
75
- }
76
- const scores = {};
77
- const rationales = {};
78
- for (const check of rubric.checks) {
79
- const rawScore = rawScores[check.id];
80
- const clamped = clampScore(rawScore);
81
- if (clamped !== undefined)
82
- scores[check.id] = clamped;
83
- let rationale = "";
84
- if (rawRationales && typeof rawRationales === "object" && !Array.isArray(rawRationales)) {
85
- const raw = rawRationales[check.id];
86
- if (typeof raw === "string")
87
- rationale = raw.trim();
88
- }
89
- rationales[check.id] = rationale;
90
- }
91
- return { scores, rationales };
92
- }
93
- function aggregateSamples(rubric, samples) {
94
- return rubric.checks.map((check) => {
95
- const values = [];
96
- let covered = true;
97
- for (const sample of samples) {
98
- const value = sample.scores[check.id];
99
- if (typeof value === "number")
100
- values.push(value);
101
- else
102
- covered = false;
103
- }
104
- return {
105
- checkId: check.id,
106
- samples: values,
107
- median: median(values),
108
- mean: Number(mean(values).toFixed(4)),
109
- coverage: covered && samples.length > 0
110
- };
111
- });
112
- }
113
- function buildMessages(artifact, rubric) {
114
- const rubricLines = rubric.checks.map((check) => {
115
- const scale = check.scale ? ` (${check.scale})` : "";
116
- const critical = check.critical ? " [critical]" : "";
117
- return `- ${check.id}${critical}: ${check.prompt}${scale}`;
118
- });
119
- const userContent = [
120
- `Rubric (stage=${rubric.stage}, rubric=${rubric.id}):`,
121
- ...rubricLines,
122
- ``,
123
- `Artifact:`,
124
- `"""`,
125
- artifact,
126
- `"""`,
127
- ``,
128
- `Return JSON only.`
129
- ].join("\n");
130
- return [
131
- { role: "system", content: SYSTEM_PREAMBLE },
132
- { role: "user", content: userContent }
133
- ];
134
- }
135
- function sumUsage(usages) {
136
- let promptTokens = 0;
137
- let completionTokens = 0;
138
- let totalTokens = 0;
139
- for (const u of usages) {
140
- promptTokens += u.promptTokens;
141
- completionTokens += u.completionTokens;
142
- totalTokens += u.totalTokens;
143
- }
144
- return { promptTokens, completionTokens, totalTokens };
145
- }
146
- /** Run the judge against an artifact and return per-sample + aggregate data. */
147
- export async function runJudge(options) {
148
- const { artifact, rubric, config, client, caseHint, baseSeed } = options;
149
- const rawSamples = caseHint?.samples ?? config.judgeSamples ?? 3;
150
- if (!Number.isInteger(rawSamples) || rawSamples < 1) {
151
- throw new Error(`Invalid judge sample count: ${rawSamples}. Use a positive integer (1, 3, 5).`);
152
- }
153
- if (rawSamples % 2 === 0) {
154
- throw new Error(`Judge sample count must be odd (so a true median exists), got: ${rawSamples}.`);
155
- }
156
- const started = Date.now();
157
- const model = config.judgeModel ?? config.model;
158
- const temperature = config.judgeTemperature ?? 0;
159
- const messages = buildMessages(artifact, rubric);
160
- const samples = [];
161
- const usages = [];
162
- for (let i = 0; i < rawSamples; i += 1) {
163
- let response;
164
- try {
165
- response = await client.chat({
166
- model,
167
- messages,
168
- temperature,
169
- responseFormatJson: true,
170
- ...(baseSeed !== undefined ? { seed: baseSeed + i } : {}),
171
- timeoutMs: config.timeoutMs
172
- });
173
- }
174
- catch (err) {
175
- if (err instanceof EvalLlmError)
176
- throw err;
177
- throw err;
178
- }
179
- usages.push(response.usage);
180
- samples.push(parseJudgeResponse(response.content, rubric));
181
- }
182
- const aggregates = aggregateSamples(rubric, samples);
183
- const usage = sumUsage(usages);
184
- const usageUsd = computeUsageUsd(model, usage, { tokenPricing: config.tokenPricing });
185
- return {
186
- rubricId: rubric.id,
187
- samples,
188
- aggregates,
189
- usageUsd,
190
- durationMs: Date.now() - started
191
- };
192
- }
193
- function verifierIdFor(check) {
194
- return `judge:${check.id}`;
195
- }
196
- /**
197
- * Convert a JudgeInvocation into VerifierResult[] for the runner. One
198
- * result per rubric check (score 0..1 normalized from the 1..5 median) +
199
- * one "coverage" result that flips to `ok:false` when any sample failed
200
- * to emit a score for a check.
201
- */
202
- export function judgeResultsToVerifiers(rubric, invocation, config, caseHint) {
203
- const out = [];
204
- const failIfCriticalBelow = config.regression.failIfCriticalBelow;
205
- for (const aggregate of invocation.aggregates) {
206
- const check = rubric.checks.find((c) => c.id === aggregate.checkId);
207
- if (!check)
208
- continue;
209
- const normalized = (aggregate.median - SCALE_MIN) / (SCALE_MAX - SCALE_MIN);
210
- const caseMinimum = caseHint?.minimumScores?.[check.id];
211
- const criticalFloor = check.critical ? failIfCriticalBelow : undefined;
212
- const floors = [];
213
- if (typeof caseMinimum === "number")
214
- floors.push(caseMinimum);
215
- if (typeof criticalFloor === "number")
216
- floors.push(criticalFloor);
217
- const floor = floors.length > 0 ? Math.max(...floors) : undefined;
218
- const ok = !aggregate.coverage
219
- ? false
220
- : floor === undefined || aggregate.median >= floor;
221
- out.push({
222
- kind: "judge",
223
- id: verifierIdFor(check),
224
- ok,
225
- score: Number(Math.max(0, Math.min(1, normalized)).toFixed(4)),
226
- message: ok
227
- ? `median=${aggregate.median.toFixed(2)} across ${aggregate.samples.length} sample(s)`
228
- : aggregate.coverage
229
- ? `median=${aggregate.median.toFixed(2)} below floor=${floor?.toFixed(2) ?? "n/a"}`
230
- : `judge did not score every sample (${aggregate.samples.length}/${invocation.samples.length}); treated as failing`,
231
- details: {
232
- median: aggregate.median,
233
- mean: aggregate.mean,
234
- samples: aggregate.samples,
235
- coverage: aggregate.coverage,
236
- critical: check.critical === true,
237
- caseMinimum: caseMinimum ?? null,
238
- criticalFloor: criticalFloor ?? null
239
- }
240
- });
241
- }
242
- const required = caseHint?.requiredChecks ?? [];
243
- const covered = new Set(rubric.checks.map((c) => c.id));
244
- const missingRequired = required.filter((id) => !covered.has(id));
245
- if (missingRequired.length > 0) {
246
- out.push({
247
- kind: "judge",
248
- id: "judge:required-checks",
249
- ok: false,
250
- score: 0,
251
- message: `Rubric is missing required check id(s): ${missingRequired.join(", ")}`,
252
- details: { missing: missingRequired, rubricId: rubric.id }
253
- });
254
- }
255
- return out;
256
- }
@@ -1,24 +0,0 @@
1
- /**
2
- * Rule-based verifier: deterministic, zero-LLM checks that are richer than
3
- * structural heading/length assertions. Each rule produces exactly one
4
- * `VerifierResult` so baselines diff at the check level, and authoring a
5
- * rule sideways in YAML never silently skips.
6
- *
7
- * Semantics:
8
- *
9
- * - All substring matching is case-insensitive. Regex matching uses the
10
- * flags declared on the rule (default `"i"`).
11
- * - Rules operate on the artifact BODY (frontmatter stripped), mirroring
12
- * the structural verifier so min/max counts and length checks agree on
13
- * what "body" means.
14
- * - `uniqueBulletsInSection` scans every section (heading, case-insensitive
15
- * substring match) and flags duplicate top-level bullets ("- item"). The
16
- * search stops at the next heading of equal or lower depth.
17
- */
18
- import type { RulesExpected, VerifierResult } from "../types.js";
19
- /**
20
- * Run every configured rule check against the artifact body. Returns `[]`
21
- * when `expected` is undefined or empty so the runner can distinguish
22
- * "no rules declared" from "all rules passed".
23
- */
24
- export declare function verifyRules(artifact: string, expected: RulesExpected | undefined): VerifierResult[];
@@ -1,218 +0,0 @@
1
- import { splitFrontmatter } from "./structural.js";
2
- function slugify(input) {
3
- return (input
4
- .toLowerCase()
5
- .replace(/[^a-z0-9]+/g, "-")
6
- .replace(/(^-|-$)/g, "")
7
- .slice(0, 64) || "rule");
8
- }
9
- function result(id, ok, message, details) {
10
- return {
11
- kind: "rules",
12
- id,
13
- ok,
14
- score: ok ? 1 : 0,
15
- message,
16
- ...(details !== undefined ? { details } : {})
17
- };
18
- }
19
- function countOccurrences(haystack, needle) {
20
- if (needle.length === 0)
21
- return 0;
22
- let index = 0;
23
- let count = 0;
24
- while (true) {
25
- const at = haystack.indexOf(needle, index);
26
- if (at < 0)
27
- return count;
28
- count += 1;
29
- index = at + needle.length;
30
- }
31
- }
32
- function compileRegex(rule) {
33
- const flags = rule.flags ?? "i";
34
- try {
35
- return new RegExp(rule.pattern, flags);
36
- }
37
- catch (err) {
38
- throw new Error(`Invalid regex for rule "${rule.description ?? rule.pattern}" ` +
39
- `(pattern=${JSON.stringify(rule.pattern)}, flags=${JSON.stringify(flags)}): ` +
40
- (err instanceof Error ? err.message : String(err)));
41
- }
42
- }
43
- function ruleLabel(rule) {
44
- return rule.description?.trim() || rule.pattern;
45
- }
46
- function checkMustContain(needles, body) {
47
- const bodyLower = body.toLowerCase();
48
- return needles.map((needle) => {
49
- const found = bodyLower.includes(needle.toLowerCase());
50
- return result(`rules:contains:${slugify(needle)}`, found, found
51
- ? `Required phrase "${needle}" present.`
52
- : `Required phrase "${needle}" missing from body.`, { phrase: needle });
53
- });
54
- }
55
- function checkMustNotContain(needles, body) {
56
- const bodyLower = body.toLowerCase();
57
- return needles.map((needle) => {
58
- const lowered = needle.toLowerCase();
59
- const occurrences = countOccurrences(bodyLower, lowered);
60
- const ok = occurrences === 0;
61
- return result(`rules:not-contains:${slugify(needle)}`, ok, ok
62
- ? `Forbidden phrase "${needle}" absent (as required).`
63
- : `Forbidden phrase "${needle}" appears ${occurrences} time(s).`, { phrase: needle, occurrences });
64
- });
65
- }
66
- function checkRegexRequired(rules, body) {
67
- return rules.map((rule) => {
68
- const label = ruleLabel(rule);
69
- const regex = compileRegex(rule);
70
- const matches = body.match(new RegExp(regex.source, withGlobal(regex.flags)));
71
- const count = matches ? matches.length : 0;
72
- const ok = count > 0;
73
- return result(`rules:regex-required:${slugify(label)}`, ok, ok
74
- ? `Required pattern /${rule.pattern}/ matched ${count} time(s).`
75
- : `Required pattern /${rule.pattern}/ did not match.`, { pattern: rule.pattern, flags: rule.flags ?? "i", matches: count });
76
- });
77
- }
78
- function checkRegexForbidden(rules, body) {
79
- return rules.map((rule) => {
80
- const label = ruleLabel(rule);
81
- const regex = compileRegex(rule);
82
- const matches = body.match(new RegExp(regex.source, withGlobal(regex.flags)));
83
- const count = matches ? matches.length : 0;
84
- const ok = count === 0;
85
- return result(`rules:regex-forbidden:${slugify(label)}`, ok, ok
86
- ? `Forbidden pattern /${rule.pattern}/ absent.`
87
- : `Forbidden pattern /${rule.pattern}/ matched ${count} time(s).`, { pattern: rule.pattern, flags: rule.flags ?? "i", matches: count });
88
- });
89
- }
90
- function withGlobal(flags) {
91
- return flags.includes("g") ? flags : `${flags}g`;
92
- }
93
- function checkMinOccurrences(bounds, body) {
94
- const bodyLower = body.toLowerCase();
95
- return Object.entries(bounds).map(([needle, min]) => {
96
- const occurrences = countOccurrences(bodyLower, needle.toLowerCase());
97
- const ok = occurrences >= min;
98
- return result(`rules:min-occurrences:${slugify(needle)}`, ok, ok
99
- ? `Phrase "${needle}" appears ${occurrences} time(s) (>= ${min}).`
100
- : `Phrase "${needle}" appears ${occurrences} time(s); expected at least ${min}.`, { phrase: needle, occurrences, min });
101
- });
102
- }
103
- function checkMaxOccurrences(bounds, body) {
104
- const bodyLower = body.toLowerCase();
105
- return Object.entries(bounds).map(([needle, max]) => {
106
- const occurrences = countOccurrences(bodyLower, needle.toLowerCase());
107
- const ok = occurrences <= max;
108
- return result(`rules:max-occurrences:${slugify(needle)}`, ok, ok
109
- ? `Phrase "${needle}" appears ${occurrences} time(s) (<= ${max}).`
110
- : `Phrase "${needle}" appears ${occurrences} time(s); expected at most ${max}.`, { phrase: needle, occurrences, max });
111
- });
112
- }
113
- function sliceBySection(body) {
114
- const lines = body.split(/\r?\n/);
115
- const slices = [];
116
- let current = null;
117
- for (const rawLine of lines) {
118
- const line = rawLine.trimStart();
119
- const match = line.match(/^(#{1,6})\s+(.+?)\s*$/);
120
- if (match) {
121
- if (current) {
122
- slices.push({
123
- heading: current.heading,
124
- depth: current.depth,
125
- body: current.body.join("\n")
126
- });
127
- }
128
- current = { heading: match[2].trim(), depth: match[1].length, body: [] };
129
- }
130
- else if (current) {
131
- current.body.push(rawLine);
132
- }
133
- }
134
- if (current) {
135
- slices.push({
136
- heading: current.heading,
137
- depth: current.depth,
138
- body: current.body.join("\n")
139
- });
140
- }
141
- return slices;
142
- }
143
- function extractTopLevelBullets(sectionBody) {
144
- const bullets = [];
145
- for (const rawLine of sectionBody.split(/\r?\n/)) {
146
- const line = rawLine.replace(/\s+$/, "");
147
- const leading = line.match(/^(\s*)[-*]\s+(.+)$/);
148
- if (!leading)
149
- continue;
150
- if (leading[1].length > 0)
151
- continue;
152
- bullets.push(leading[2].trim());
153
- }
154
- return bullets;
155
- }
156
- function checkUniqueBulletsInSection(sections, body) {
157
- const slices = sliceBySection(body);
158
- return sections.map((needle) => {
159
- const lowerNeedle = needle.toLowerCase();
160
- const slice = slices.find((s) => s.heading.toLowerCase().includes(lowerNeedle));
161
- if (!slice) {
162
- return result(`rules:unique-in-section:${slugify(needle)}`, false, `Section matching "${needle}" not found; cannot check uniqueness.`, { section: needle, found: false });
163
- }
164
- const bullets = extractTopLevelBullets(slice.body);
165
- const seen = new Map();
166
- for (const bullet of bullets) {
167
- const key = bullet.toLowerCase();
168
- seen.set(key, (seen.get(key) ?? 0) + 1);
169
- }
170
- const duplicates = [...seen.entries()]
171
- .filter(([, count]) => count > 1)
172
- .map(([entry, count]) => ({ entry, count }));
173
- const ok = duplicates.length === 0;
174
- return result(`rules:unique-in-section:${slugify(needle)}`, ok, ok
175
- ? `Section "${slice.heading}" has ${bullets.length} unique bullet(s).`
176
- : `Section "${slice.heading}" has duplicate bullet(s): ${duplicates
177
- .map((d) => `"${d.entry}" x${d.count}`)
178
- .join(", ")}.`, {
179
- section: slice.heading,
180
- bullets: bullets.length,
181
- duplicates
182
- });
183
- });
184
- }
185
- /**
186
- * Run every configured rule check against the artifact body. Returns `[]`
187
- * when `expected` is undefined or empty so the runner can distinguish
188
- * "no rules declared" from "all rules passed".
189
- */
190
- export function verifyRules(artifact, expected) {
191
- if (!expected)
192
- return [];
193
- const split = splitFrontmatter(artifact);
194
- const body = split.body;
195
- const results = [];
196
- if (expected.mustContain?.length) {
197
- results.push(...checkMustContain(expected.mustContain, body));
198
- }
199
- if (expected.mustNotContain?.length) {
200
- results.push(...checkMustNotContain(expected.mustNotContain, body));
201
- }
202
- if (expected.regexRequired?.length) {
203
- results.push(...checkRegexRequired(expected.regexRequired, body));
204
- }
205
- if (expected.regexForbidden?.length) {
206
- results.push(...checkRegexForbidden(expected.regexForbidden, body));
207
- }
208
- if (expected.minOccurrences && Object.keys(expected.minOccurrences).length) {
209
- results.push(...checkMinOccurrences(expected.minOccurrences, body));
210
- }
211
- if (expected.maxOccurrences && Object.keys(expected.maxOccurrences).length) {
212
- results.push(...checkMaxOccurrences(expected.maxOccurrences, body));
213
- }
214
- if (expected.uniqueBulletsInSection?.length) {
215
- results.push(...checkUniqueBulletsInSection(expected.uniqueBulletsInSection, body));
216
- }
217
- return results;
218
- }
@@ -1,14 +0,0 @@
1
- import type { StructuralExpected, VerifierResult } from "../types.js";
2
- export interface ArtifactSplit {
3
- hasFrontmatter: boolean;
4
- frontmatterRaw: string;
5
- frontmatterParsed?: Record<string, unknown>;
6
- body: string;
7
- }
8
- export declare function splitFrontmatter(artifact: string): ArtifactSplit;
9
- /**
10
- * Run every configured structural check against the artifact text.
11
- * Returns [] when `expected` is undefined/empty so the runner can treat
12
- * "no structural expectations" as "no verifier results" rather than "pass".
13
- */
14
- export declare function verifyStructural(artifact: string, expected: StructuralExpected | undefined): VerifierResult[];
@@ -1,171 +0,0 @@
1
- /**
2
- * Structural verifier: deterministic, zero-LLM checks against a
3
- * single markdown artifact. Each structural expectation produces one
4
- * `VerifierResult` so baselines diff cleanly at the check level rather than
5
- * lumping everything into a single boolean.
6
- *
7
- * Design notes:
8
- *
9
- * - All pattern matching is case-insensitive. Authoring a check as
10
- * `"Directions"` matches `## Directions` and `### directions-suggested`.
11
- * - Frontmatter detection is permissive: it must start at byte 0 with `---\n`
12
- * and close on a subsequent `---` line. Anything else is treated as "no
13
- * frontmatter", which fails every `requiredFrontmatterKeys` entry
14
- * deterministically.
15
- * - `minLines`/`maxLines` intentionally exclude frontmatter so a rewrite that
16
- * adds metadata does not accidentally drop the body below the floor.
17
- * - Scoring: each check scores 0 or 1. The case `passed` becomes the AND of
18
- * all individual `ok` flags. This keeps the structural verifier
19
- * deterministic; the 0..1 rubric scale shows up later in the LLM judge.
20
- */
21
- import { parse as parseYaml } from "yaml";
22
- const FRONTMATTER_OPEN = /^---\r?\n/;
23
- const FRONTMATTER_CLOSE = /\r?\n---\r?(?:\n|$)/;
24
- function slugify(input) {
25
- return input
26
- .toLowerCase()
27
- .replace(/[^a-z0-9]+/g, "-")
28
- .replace(/(^-|-$)/g, "")
29
- .slice(0, 64);
30
- }
31
- export function splitFrontmatter(artifact) {
32
- if (!FRONTMATTER_OPEN.test(artifact)) {
33
- return { hasFrontmatter: false, frontmatterRaw: "", body: artifact };
34
- }
35
- const afterOpen = artifact.replace(FRONTMATTER_OPEN, "");
36
- const closeMatch = afterOpen.match(FRONTMATTER_CLOSE);
37
- if (!closeMatch || closeMatch.index === undefined) {
38
- return { hasFrontmatter: false, frontmatterRaw: "", body: artifact };
39
- }
40
- const frontmatterRaw = afterOpen.slice(0, closeMatch.index);
41
- const body = afterOpen.slice(closeMatch.index + closeMatch[0].length);
42
- let frontmatterParsed;
43
- try {
44
- const parsed = parseYaml(frontmatterRaw);
45
- if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
46
- frontmatterParsed = parsed;
47
- }
48
- }
49
- catch {
50
- frontmatterParsed = undefined;
51
- }
52
- return {
53
- hasFrontmatter: true,
54
- frontmatterRaw,
55
- frontmatterParsed,
56
- body
57
- };
58
- }
59
- function extractHeadingLines(body) {
60
- return body
61
- .split(/\r?\n/)
62
- .map((line) => line.trimStart())
63
- .filter((line) => /^#{1,6}\s+\S/.test(line));
64
- }
65
- function result(id, ok, message, details) {
66
- return {
67
- kind: "structural",
68
- id,
69
- ok,
70
- score: ok ? 1 : 0,
71
- message,
72
- ...(details !== undefined ? { details } : {})
73
- };
74
- }
75
- function checkRequiredSections(sections, body) {
76
- const headings = extractHeadingLines(body).map((line) => line.toLowerCase());
77
- return sections.map((section) => {
78
- const needle = section.toLowerCase().trim();
79
- const found = headings.some((heading) => heading.includes(needle));
80
- return result(`structural:section:${slugify(section)}`, found, found
81
- ? `Section matching "${section}" present.`
82
- : `No heading contains "${section}".`, { pattern: section, searchedHeadings: headings.length });
83
- });
84
- }
85
- function checkForbiddenPatterns(patterns, body) {
86
- const bodyLower = body.toLowerCase();
87
- return patterns.map((pattern) => {
88
- const needle = pattern.toLowerCase();
89
- const hits = countOccurrences(bodyLower, needle);
90
- const ok = hits === 0;
91
- return result(`structural:forbidden:${slugify(pattern)}`, ok, ok
92
- ? `Pattern "${pattern}" absent (as required).`
93
- : `Pattern "${pattern}" appears ${hits} time(s); remove.`, { pattern, occurrences: hits });
94
- });
95
- }
96
- function countOccurrences(haystack, needle) {
97
- if (needle.length === 0)
98
- return 0;
99
- let index = 0;
100
- let count = 0;
101
- while (true) {
102
- const at = haystack.indexOf(needle, index);
103
- if (at < 0)
104
- return count;
105
- count += 1;
106
- index = at + needle.length;
107
- }
108
- }
109
- function checkLengthBounds(expected, body) {
110
- const results = [];
111
- const lineCount = body.length === 0 ? 0 : body.split(/\r?\n/).length;
112
- const charCount = body.length;
113
- if (expected.minLines !== undefined || expected.maxLines !== undefined) {
114
- const min = expected.minLines;
115
- const max = expected.maxLines;
116
- const withinMin = min === undefined || lineCount >= min;
117
- const withinMax = max === undefined || lineCount <= max;
118
- const ok = withinMin && withinMax;
119
- results.push(result("structural:length:lines", ok, ok
120
- ? `Body has ${lineCount} line(s), within bounds.`
121
- : buildOutOfRangeMessage("line", lineCount, min, max), { lineCount, minLines: min, maxLines: max }));
122
- }
123
- if (expected.minChars !== undefined || expected.maxChars !== undefined) {
124
- const min = expected.minChars;
125
- const max = expected.maxChars;
126
- const withinMin = min === undefined || charCount >= min;
127
- const withinMax = max === undefined || charCount <= max;
128
- const ok = withinMin && withinMax;
129
- results.push(result("structural:length:chars", ok, ok
130
- ? `Body has ${charCount} char(s), within bounds.`
131
- : buildOutOfRangeMessage("char", charCount, min, max), { charCount, minChars: min, maxChars: max }));
132
- }
133
- return results;
134
- }
135
- function buildOutOfRangeMessage(unit, actual, min, max) {
136
- const lo = min === undefined ? "0" : String(min);
137
- const hi = max === undefined ? "∞" : String(max);
138
- return `Body has ${actual} ${unit}(s); expected ${lo}..${hi}.`;
139
- }
140
- function checkFrontmatterKeys(keys, split) {
141
- if (!split.hasFrontmatter || !split.frontmatterParsed) {
142
- return keys.map((key) => result(`structural:frontmatter:${slugify(key)}`, false, `Frontmatter key "${key}" missing (no parseable frontmatter).`, { key, frontmatterPresent: split.hasFrontmatter }));
143
- }
144
- const present = new Set(Object.keys(split.frontmatterParsed));
145
- return keys.map((key) => {
146
- const ok = present.has(key);
147
- return result(`structural:frontmatter:${slugify(key)}`, ok, ok ? `Frontmatter key "${key}" present.` : `Frontmatter key "${key}" missing.`, { key });
148
- });
149
- }
150
- /**
151
- * Run every configured structural check against the artifact text.
152
- * Returns [] when `expected` is undefined/empty so the runner can treat
153
- * "no structural expectations" as "no verifier results" rather than "pass".
154
- */
155
- export function verifyStructural(artifact, expected) {
156
- if (!expected)
157
- return [];
158
- const split = splitFrontmatter(artifact);
159
- const results = [];
160
- if (expected.requiredSections?.length) {
161
- results.push(...checkRequiredSections(expected.requiredSections, split.body));
162
- }
163
- if (expected.forbiddenPatterns?.length) {
164
- results.push(...checkForbiddenPatterns(expected.forbiddenPatterns, split.body));
165
- }
166
- results.push(...checkLengthBounds(expected, split.body));
167
- if (expected.requiredFrontmatterKeys?.length) {
168
- results.push(...checkFrontmatterKeys(expected.requiredFrontmatterKeys, split));
169
- }
170
- return results;
171
- }