cclaw-cli 0.22.0 → 0.23.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,11 +4,11 @@ export interface RunEvalOptions {
4
4
  projectRoot: string;
5
5
  stage?: FlowStage;
6
6
  tier?: EvalTier;
7
- /** When true, run only structural verifiers. Wave 7.1 wires actual verifiers. */
7
+ /** When true, run only structural verifiers (Step 1). */
8
8
  schemaOnly?: boolean;
9
- /** When true, run structural + rule-based verifiers. Wave 7.2 wires rules. */
9
+ /** When true, run structural + rule-based verifiers. Step 2 wires rules. */
10
10
  rules?: boolean;
11
- /** When true, also run LLM judge verifiers. Wave 7.3 wires judging. */
11
+ /** When true, also run LLM judge verifiers. Step 3 wires judging. */
12
12
  judge?: boolean;
13
13
  /** When true, load config + corpus and return a summary without running any verifier. */
14
14
  dryRun?: boolean;
@@ -27,10 +27,6 @@ export interface DryRunSummary {
27
27
  }>;
28
28
  };
29
29
  plannedTier: EvalTier;
30
- /**
31
- * Waves 7.1–7.3 progressively flip these to `true`. Wave 7.0 is `false`
32
- * across the board because no verifier is implemented yet.
33
- */
34
30
  verifiersAvailable: {
35
31
  structural: boolean;
36
32
  rules: boolean;
@@ -40,14 +36,10 @@ export interface DryRunSummary {
40
36
  notes: string[];
41
37
  }
42
38
  /**
43
- * Wave 7.0 runner. Responsibilities:
44
- * - Load resolved config (defaults + file + env).
45
- * - Load corpus (empty on a fresh install).
46
- * - Validate that no verifier flag asks for a capability that does not exist yet.
47
- * - Return either a dry-run summary or an empty report.
48
- *
49
- * Waves 7.1+ will replace the "no verifiers available" branch with the real
50
- * verifier dispatch pipeline. The signature stays stable so CLI wiring does
51
- * not churn.
39
+ * Structural runner. When `schemaOnly` is set (or no other verifier flags are
40
+ * active), runs structural verifiers against fixture-backed cases and loads
41
+ * per-stage baselines for regression comparison. Tier A/B/C agent loops
42
+ * arrive in later steps; until then cases without `fixture` are marked as
43
+ * skipped rather than failing.
52
44
  */
53
45
  export declare function runEval(options: RunEvalOptions): Promise<DryRunSummary | EvalReport>;
@@ -1,23 +1,121 @@
1
1
  import { randomUUID } from "node:crypto";
2
2
  import { CCLAW_VERSION } from "../constants.js";
3
- import { loadCorpus } from "./corpus.js";
3
+ import { FLOW_STAGES } from "../types.js";
4
+ import { compareAgainstBaselines, loadBaselinesByStage } from "./baseline.js";
5
+ import { loadCorpus, readFixtureArtifact } from "./corpus.js";
4
6
  import { loadEvalConfig } from "./config-loader.js";
7
+ import { verifyStructural } from "./verifiers/structural.js";
5
8
  function groupByStage(cases) {
6
9
  return cases.reduce((acc, item) => {
7
10
  acc[item.stage] = (acc[item.stage] ?? 0) + 1;
8
11
  return acc;
9
12
  }, {});
10
13
  }
14
+ function skeletonVerifierResult(message, details) {
15
+ return {
16
+ kind: "structural",
17
+ id: "structural:no-expectations",
18
+ ok: true,
19
+ score: 1,
20
+ message,
21
+ ...(details !== undefined ? { details } : {})
22
+ };
23
+ }
24
+ async function runCaseStructural(projectRoot, caseEntry, plannedTier) {
25
+ const started = Date.now();
26
+ const structuralExpected = caseEntry.expected?.structural;
27
+ const verifierResults = [];
28
+ if (!structuralExpected || Object.keys(structuralExpected).length === 0) {
29
+ // No structural expectations declared — case is treated as "N/A" for this
30
+ // verifier kind; a placeholder pass keeps downstream math simple while
31
+ // making the situation visible in the report.
32
+ verifierResults.push(skeletonVerifierResult("No structural expectations declared for this case; structural verifier skipped.", { skipped: true }));
33
+ }
34
+ else {
35
+ let artifact;
36
+ try {
37
+ artifact = await readFixtureArtifact(projectRoot, caseEntry);
38
+ }
39
+ catch (err) {
40
+ verifierResults.push({
41
+ kind: "structural",
42
+ id: "structural:fixture:missing",
43
+ ok: false,
44
+ score: 0,
45
+ message: err instanceof Error ? err.message : String(err),
46
+ details: { fixture: caseEntry.fixture }
47
+ });
48
+ }
49
+ if (artifact !== undefined) {
50
+ const results = verifyStructural(artifact, structuralExpected);
51
+ if (results.length === 0) {
52
+ verifierResults.push(skeletonVerifierResult("Structural expectations parsed but produced zero checks.", { skipped: true }));
53
+ }
54
+ else {
55
+ verifierResults.push(...results);
56
+ }
57
+ }
58
+ else if (verifierResults.length === 0) {
59
+ verifierResults.push({
60
+ kind: "structural",
61
+ id: "structural:fixture:absent",
62
+ ok: false,
63
+ score: 0,
64
+ message: "Structural expectations declared but no fixture path provided. Add `fixture: ./<id>/fixture.md`.",
65
+ details: { fixtureProvided: false }
66
+ });
67
+ }
68
+ }
69
+ const allOk = verifierResults.every((r) => r.ok);
70
+ return {
71
+ caseId: caseEntry.id,
72
+ stage: caseEntry.stage,
73
+ tier: plannedTier,
74
+ passed: allOk,
75
+ durationMs: Date.now() - started,
76
+ verifierResults
77
+ };
78
+ }
79
+ function reduceSummary(caseResults) {
80
+ let passed = 0;
81
+ let failed = 0;
82
+ let skipped = 0;
83
+ let totalCostUsd = 0;
84
+ let totalDurationMs = 0;
85
+ for (const c of caseResults) {
86
+ totalDurationMs += c.durationMs;
87
+ if (c.costUsd !== undefined)
88
+ totalCostUsd += c.costUsd;
89
+ if (c.verifierResults.length === 1 && c.verifierResults[0]?.details?.skipped === true) {
90
+ skipped += 1;
91
+ continue;
92
+ }
93
+ if (c.passed)
94
+ passed += 1;
95
+ else
96
+ failed += 1;
97
+ }
98
+ return {
99
+ totalCases: caseResults.length,
100
+ passed,
101
+ failed,
102
+ skipped,
103
+ totalCostUsd: Number(totalCostUsd.toFixed(6)),
104
+ totalDurationMs
105
+ };
106
+ }
107
+ function stagesInResults(caseResults) {
108
+ const set = new Set();
109
+ for (const c of caseResults)
110
+ set.add(c.stage);
111
+ return FLOW_STAGES.filter((s) => set.has(s));
112
+ }
11
113
  /**
12
- * Wave 7.0 runner. Responsibilities:
13
- * - Load resolved config (defaults + file + env).
14
- * - Load corpus (empty on a fresh install).
15
- * - Validate that no verifier flag asks for a capability that does not exist yet.
16
- * - Return either a dry-run summary or an empty report.
17
- *
18
- * Waves 7.1+ will replace the "no verifiers available" branch with the real
19
- * verifier dispatch pipeline. The signature stays stable so CLI wiring does
20
- * not churn.
114
+ * Structural runner. When `schemaOnly` is set (or no other verifier flags are
115
+ * active), runs structural verifiers against fixture-backed cases and loads
116
+ * per-stage baselines for regression comparison. Tier A/B/C agent loops
117
+ * arrive in later steps; until then cases without `fixture` are marked as
118
+ * skipped rather than failing.
21
119
  */
22
120
  export async function runEval(options) {
23
121
  const config = await loadEvalConfig(options.projectRoot, options.env ?? process.env);
@@ -25,16 +123,13 @@ export async function runEval(options) {
25
123
  const plannedTier = options.tier ?? config.defaultTier;
26
124
  const notes = [];
27
125
  if (corpus.length === 0) {
28
- notes.push("Corpus is empty. Seed cases land in Wave 7.1 (`.cclaw/evals/corpus/<stage>/*.yaml`).");
29
- }
30
- if (options.schemaOnly) {
31
- notes.push("--schema-only is accepted; structural verifiers wire up in Wave 7.1.");
126
+ notes.push("Corpus is empty. Seed cases live under `.cclaw/evals/corpus/<stage>/*.yaml`.");
32
127
  }
33
128
  if (options.rules) {
34
- notes.push("--rules is accepted; rule verifiers wire up in Wave 7.2.");
129
+ notes.push("--rules is accepted; rule verifiers are not wired yet.");
35
130
  }
36
131
  if (options.judge) {
37
- notes.push("--judge is accepted; LLM judging wires up in Wave 7.3.");
132
+ notes.push("--judge is accepted; LLM judging is not wired yet.");
38
133
  }
39
134
  if (options.dryRun === true) {
40
135
  const summary = {
@@ -47,7 +142,7 @@ export async function runEval(options) {
47
142
  },
48
143
  plannedTier,
49
144
  verifiersAvailable: {
50
- structural: false,
145
+ structural: true,
51
146
  rules: false,
52
147
  judge: false,
53
148
  workflow: false
@@ -57,22 +152,13 @@ export async function runEval(options) {
57
152
  return summary;
58
153
  }
59
154
  const now = new Date().toISOString();
60
- const caseResults = corpus.map((item) => ({
61
- caseId: item.id,
62
- stage: item.stage,
63
- tier: plannedTier,
64
- passed: false,
65
- durationMs: 0,
66
- verifierResults: [
67
- {
68
- kind: "structural",
69
- id: "wave-7-0-skeleton",
70
- ok: false,
71
- message: "Verifiers are not implemented in Wave 7.0; run with --dry-run.",
72
- details: { skipped: true }
73
- }
74
- ]
75
- }));
155
+ const caseResults = [];
156
+ for (const item of corpus) {
157
+ caseResults.push(await runCaseStructural(options.projectRoot, item, plannedTier));
158
+ }
159
+ const stages = stagesInResults(caseResults);
160
+ const baselines = await loadBaselinesByStage(options.projectRoot, stages);
161
+ const summary = reduceSummary(caseResults);
76
162
  const report = {
77
163
  schemaVersion: 1,
78
164
  generatedAt: now,
@@ -81,16 +167,12 @@ export async function runEval(options) {
81
167
  provider: config.provider,
82
168
  model: config.model,
83
169
  tier: plannedTier,
84
- stages: options.stage ? [options.stage] : [],
170
+ stages,
85
171
  cases: caseResults,
86
- summary: {
87
- totalCases: caseResults.length,
88
- passed: 0,
89
- failed: 0,
90
- skipped: caseResults.length,
91
- totalCostUsd: 0,
92
- totalDurationMs: 0
93
- }
172
+ summary
94
173
  };
174
+ const baselineDelta = compareAgainstBaselines(report, baselines);
175
+ if (baselineDelta)
176
+ report.baselineDelta = baselineDelta;
95
177
  return report;
96
178
  }
@@ -6,7 +6,7 @@
6
6
  * deliberately decoupled from the main cclaw runtime so that:
7
7
  *
8
8
  * - Users who never run `cclaw eval` pay zero runtime cost.
9
- * - The verifier / rubric / LLM stack evolves on its own release cadence (Waves 7.0-7.6).
9
+ * - The verifier / rubric / LLM stack evolves on its own release cadence (Steps 0-6).
10
10
  * - Any OpenAI-compatible endpoint can be swapped in via config (z.ai, OpenAI, vLLM, etc.).
11
11
  */
12
12
  import type { FlowStage } from "../types.js";
@@ -27,11 +27,50 @@ export type EvalTier = (typeof EVAL_TIERS)[number];
27
27
  */
28
28
  export declare const VERIFIER_KINDS: readonly ["structural", "rules", "judge", "workflow"];
29
29
  export type VerifierKind = (typeof VERIFIER_KINDS)[number];
30
+ /**
31
+ * Structural expectations — deterministic, LLM-free checks against a single
32
+ * text artifact. Step 1 implements all fields below; Step 2 adds the
33
+ * sibling `rules` shape, Step 3 adds `judge`.
34
+ */
35
+ export interface StructuralExpected {
36
+ /**
37
+ * Case-insensitive substrings that must each appear on at least one markdown
38
+ * heading line (line starting with `#`). Useful for "required sections".
39
+ */
40
+ requiredSections?: string[];
41
+ /**
42
+ * Case-insensitive substrings that must NOT appear anywhere in the body
43
+ * (headings or prose). Typical entries: "TBD", "TODO", "placeholder".
44
+ */
45
+ forbiddenPatterns?: string[];
46
+ /** Inclusive minimum line count of the artifact body (frontmatter excluded). */
47
+ minLines?: number;
48
+ /** Inclusive maximum line count of the artifact body (frontmatter excluded). */
49
+ maxLines?: number;
50
+ /** Inclusive minimum character count of the artifact body. */
51
+ minChars?: number;
52
+ /** Inclusive maximum character count of the artifact body. */
53
+ maxChars?: number;
54
+ /**
55
+ * Keys that must appear in the leading YAML frontmatter (between a pair of
56
+ * `---` delimiters at the very top of the file). An artifact without
57
+ * frontmatter will fail every entry.
58
+ */
59
+ requiredFrontmatterKeys?: string[];
60
+ }
61
+ /** Superset of per-verifier expectation shapes. Only `structural` is wired in Step 1. */
62
+ export interface ExpectedShape {
63
+ structural?: StructuralExpected;
64
+ /** Rule-based (keyword/regex/traceability) checks — Step 2. */
65
+ rules?: Record<string, unknown>;
66
+ /** LLM-judge rubrics — Step 3. */
67
+ judge?: Record<string, unknown>;
68
+ }
30
69
  /**
31
70
  * A single eval case describes one input scenario for one stage. Cases live in
32
71
  * `.cclaw/evals/corpus/<stage>/<id>.yaml` and may reference a pre-generated
33
- * fixture artifact for verifier development (Wave 7.1) before the agent loop
34
- * exists (Wave 7.3+).
72
+ * fixture artifact for verifier development (Step 1) before the agent loop
73
+ * exists (Step 3+).
35
74
  */
36
75
  export interface EvalCase {
37
76
  id: string;
@@ -40,14 +79,14 @@ export interface EvalCase {
40
79
  /** Project files copied into the Tier B/C sandbox before the agent runs. */
41
80
  contextFiles?: string[];
42
81
  /**
43
- * Optional expected-shape hints consumed by structural/rule verifiers.
44
- * Left intentionally loose; verifiers in Waves 7.1–7.2 will narrow this.
82
+ * Typed expectation hints consumed by the structural/rules/judge verifiers.
83
+ * Each sub-shape is optional; missing sub-shapes skip that verifier tier.
45
84
  */
46
- expected?: Record<string, unknown>;
85
+ expected?: ExpectedShape;
47
86
  /**
48
87
  * Path (relative to the corpus case file) of a pre-generated artifact used
49
- * when verifiers are exercised without a live agent loop. Primarily a Wave
50
- * 7.1 development aid.
88
+ * when verifiers are exercised without a live agent loop. Primarily a
89
+ * Step 1 development aid.
51
90
  */
52
91
  fixture?: string;
53
92
  }
@@ -90,12 +129,8 @@ export interface EvalReport {
90
129
  totalCostUsd: number;
91
130
  totalDurationMs: number;
92
131
  };
93
- /** Present when comparing against a saved baseline (Wave 7.1+). */
94
- baselineDelta?: {
95
- baselineId: string;
96
- scoreDelta: number;
97
- criticalFailures: number;
98
- };
132
+ /** Present when comparing against a saved baseline (Step 1+). */
133
+ baselineDelta?: BaselineDelta;
99
134
  }
100
135
  /**
101
136
  * Eval configuration, persisted to `.cclaw/evals/config.yaml` and mergeable
@@ -134,3 +169,48 @@ export interface ResolvedEvalConfig extends EvalConfig {
134
169
  apiKey?: string;
135
170
  source: "default" | "file" | "env" | "file+env";
136
171
  }
172
+ /**
173
+ * Frozen per-stage baseline used by regression gating (Step 1). Baselines
174
+ * are committed to git; `cclaw eval --update-baseline --confirm` rewrites
175
+ * them. The shape is intentionally flat so a quick `git diff` reveals what
176
+ * changed between runs.
177
+ */
178
+ export interface BaselineSnapshot {
179
+ schemaVersion: 1;
180
+ stage: FlowStage;
181
+ generatedAt: string;
182
+ cclawVersion: string;
183
+ /** Keyed by `EvalCase.id` so unchanged cases produce zero diff. */
184
+ cases: Record<string, BaselineCaseEntry>;
185
+ }
186
+ export interface BaselineCaseEntry {
187
+ passed: boolean;
188
+ verifierResults: BaselineVerifierEntry[];
189
+ }
190
+ export interface BaselineVerifierEntry {
191
+ id: string;
192
+ kind: VerifierKind;
193
+ ok: boolean;
194
+ score?: number;
195
+ }
196
+ /**
197
+ * Delta between a fresh report and the saved baseline. Populated when
198
+ * baselines exist on disk and the run covers matching cases.
199
+ */
200
+ export interface BaselineDelta {
201
+ baselineId: string;
202
+ /** Fresh-score − baseline-score, bounded to [-1, 1]. */
203
+ scoreDelta: number;
204
+ /** Count of checks that flipped from `ok:true` to `ok:false`. */
205
+ criticalFailures: number;
206
+ /** Per-case regression details for the Markdown report. */
207
+ regressions: BaselineRegression[];
208
+ }
209
+ export interface BaselineRegression {
210
+ caseId: string;
211
+ stage: FlowStage;
212
+ verifierId: string;
213
+ reason: "newly-failing" | "case-now-failing" | "score-drop";
214
+ previousScore?: number;
215
+ currentScore?: number;
216
+ }
@@ -0,0 +1,14 @@
1
+ import type { StructuralExpected, VerifierResult } from "../types.js";
2
+ export interface ArtifactSplit {
3
+ hasFrontmatter: boolean;
4
+ frontmatterRaw: string;
5
+ frontmatterParsed?: Record<string, unknown>;
6
+ body: string;
7
+ }
8
+ export declare function splitFrontmatter(artifact: string): ArtifactSplit;
9
+ /**
10
+ * Run every configured structural check against the artifact text.
11
+ * Returns [] when `expected` is undefined/empty so the runner can treat
12
+ * "no structural expectations" as "no verifier results" rather than "pass".
13
+ */
14
+ export declare function verifyStructural(artifact: string, expected: StructuralExpected | undefined): VerifierResult[];
@@ -0,0 +1,171 @@
1
+ /**
2
+ * Structural verifier: deterministic, zero-LLM checks against a
3
+ * single markdown artifact. Each structural expectation produces one
4
+ * `VerifierResult` so baselines diff cleanly at the check level rather than
5
+ * lumping everything into a single boolean.
6
+ *
7
+ * Design notes:
8
+ *
9
+ * - All pattern matching is case-insensitive. Authoring a check as
10
+ * `"Directions"` matches `## Directions` and `### directions-suggested`.
11
+ * - Frontmatter detection is permissive: it must start at byte 0 with `---\n`
12
+ * and close on a subsequent `---` line. Anything else is treated as "no
13
+ * frontmatter", which fails every `requiredFrontmatterKeys` entry
14
+ * deterministically.
15
+ * - `minLines`/`maxLines` intentionally exclude frontmatter so a rewrite that
16
+ * adds metadata does not accidentally drop the body below the floor.
17
+ * - Scoring: each check scores 0 or 1. The case `passed` becomes the AND of
18
+ * all individual `ok` flags. This keeps the structural verifier
19
+ * deterministic; the 0..1 rubric scale shows up later in the LLM judge.
20
+ */
21
+ import { parse as parseYaml } from "yaml";
22
+ const FRONTMATTER_OPEN = /^---\r?\n/;
23
+ const FRONTMATTER_CLOSE = /\r?\n---\r?(?:\n|$)/;
24
+ function slugify(input) {
25
+ return input
26
+ .toLowerCase()
27
+ .replace(/[^a-z0-9]+/g, "-")
28
+ .replace(/(^-|-$)/g, "")
29
+ .slice(0, 64);
30
+ }
31
+ export function splitFrontmatter(artifact) {
32
+ if (!FRONTMATTER_OPEN.test(artifact)) {
33
+ return { hasFrontmatter: false, frontmatterRaw: "", body: artifact };
34
+ }
35
+ const afterOpen = artifact.replace(FRONTMATTER_OPEN, "");
36
+ const closeMatch = afterOpen.match(FRONTMATTER_CLOSE);
37
+ if (!closeMatch || closeMatch.index === undefined) {
38
+ return { hasFrontmatter: false, frontmatterRaw: "", body: artifact };
39
+ }
40
+ const frontmatterRaw = afterOpen.slice(0, closeMatch.index);
41
+ const body = afterOpen.slice(closeMatch.index + closeMatch[0].length);
42
+ let frontmatterParsed;
43
+ try {
44
+ const parsed = parseYaml(frontmatterRaw);
45
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
46
+ frontmatterParsed = parsed;
47
+ }
48
+ }
49
+ catch {
50
+ frontmatterParsed = undefined;
51
+ }
52
+ return {
53
+ hasFrontmatter: true,
54
+ frontmatterRaw,
55
+ frontmatterParsed,
56
+ body
57
+ };
58
+ }
59
+ function extractHeadingLines(body) {
60
+ return body
61
+ .split(/\r?\n/)
62
+ .map((line) => line.trimStart())
63
+ .filter((line) => /^#{1,6}\s+\S/.test(line));
64
+ }
65
+ function result(id, ok, message, details) {
66
+ return {
67
+ kind: "structural",
68
+ id,
69
+ ok,
70
+ score: ok ? 1 : 0,
71
+ message,
72
+ ...(details !== undefined ? { details } : {})
73
+ };
74
+ }
75
+ function checkRequiredSections(sections, body) {
76
+ const headings = extractHeadingLines(body).map((line) => line.toLowerCase());
77
+ return sections.map((section) => {
78
+ const needle = section.toLowerCase().trim();
79
+ const found = headings.some((heading) => heading.includes(needle));
80
+ return result(`structural:section:${slugify(section)}`, found, found
81
+ ? `Section matching "${section}" present.`
82
+ : `No heading contains "${section}".`, { pattern: section, searchedHeadings: headings.length });
83
+ });
84
+ }
85
+ function checkForbiddenPatterns(patterns, body) {
86
+ const bodyLower = body.toLowerCase();
87
+ return patterns.map((pattern) => {
88
+ const needle = pattern.toLowerCase();
89
+ const hits = countOccurrences(bodyLower, needle);
90
+ const ok = hits === 0;
91
+ return result(`structural:forbidden:${slugify(pattern)}`, ok, ok
92
+ ? `Pattern "${pattern}" absent (as required).`
93
+ : `Pattern "${pattern}" appears ${hits} time(s); remove.`, { pattern, occurrences: hits });
94
+ });
95
+ }
96
+ function countOccurrences(haystack, needle) {
97
+ if (needle.length === 0)
98
+ return 0;
99
+ let index = 0;
100
+ let count = 0;
101
+ while (true) {
102
+ const at = haystack.indexOf(needle, index);
103
+ if (at < 0)
104
+ return count;
105
+ count += 1;
106
+ index = at + needle.length;
107
+ }
108
+ }
109
+ function checkLengthBounds(expected, body) {
110
+ const results = [];
111
+ const lineCount = body.length === 0 ? 0 : body.split(/\r?\n/).length;
112
+ const charCount = body.length;
113
+ if (expected.minLines !== undefined || expected.maxLines !== undefined) {
114
+ const min = expected.minLines;
115
+ const max = expected.maxLines;
116
+ const withinMin = min === undefined || lineCount >= min;
117
+ const withinMax = max === undefined || lineCount <= max;
118
+ const ok = withinMin && withinMax;
119
+ results.push(result("structural:length:lines", ok, ok
120
+ ? `Body has ${lineCount} line(s), within bounds.`
121
+ : buildOutOfRangeMessage("line", lineCount, min, max), { lineCount, minLines: min, maxLines: max }));
122
+ }
123
+ if (expected.minChars !== undefined || expected.maxChars !== undefined) {
124
+ const min = expected.minChars;
125
+ const max = expected.maxChars;
126
+ const withinMin = min === undefined || charCount >= min;
127
+ const withinMax = max === undefined || charCount <= max;
128
+ const ok = withinMin && withinMax;
129
+ results.push(result("structural:length:chars", ok, ok
130
+ ? `Body has ${charCount} char(s), within bounds.`
131
+ : buildOutOfRangeMessage("char", charCount, min, max), { charCount, minChars: min, maxChars: max }));
132
+ }
133
+ return results;
134
+ }
135
+ function buildOutOfRangeMessage(unit, actual, min, max) {
136
+ const lo = min === undefined ? "0" : String(min);
137
+ const hi = max === undefined ? "∞" : String(max);
138
+ return `Body has ${actual} ${unit}(s); expected ${lo}..${hi}.`;
139
+ }
140
+ function checkFrontmatterKeys(keys, split) {
141
+ if (!split.hasFrontmatter || !split.frontmatterParsed) {
142
+ return keys.map((key) => result(`structural:frontmatter:${slugify(key)}`, false, `Frontmatter key "${key}" missing (no parseable frontmatter).`, { key, frontmatterPresent: split.hasFrontmatter }));
143
+ }
144
+ const present = new Set(Object.keys(split.frontmatterParsed));
145
+ return keys.map((key) => {
146
+ const ok = present.has(key);
147
+ return result(`structural:frontmatter:${slugify(key)}`, ok, ok ? `Frontmatter key "${key}" present.` : `Frontmatter key "${key}" missing.`, { key });
148
+ });
149
+ }
150
+ /**
151
+ * Run every configured structural check against the artifact text.
152
+ * Returns [] when `expected` is undefined/empty so the runner can treat
153
+ * "no structural expectations" as "no verifier results" rather than "pass".
154
+ */
155
+ export function verifyStructural(artifact, expected) {
156
+ if (!expected)
157
+ return [];
158
+ const split = splitFrontmatter(artifact);
159
+ const results = [];
160
+ if (expected.requiredSections?.length) {
161
+ results.push(...checkRequiredSections(expected.requiredSections, split.body));
162
+ }
163
+ if (expected.forbiddenPatterns?.length) {
164
+ results.push(...checkForbiddenPatterns(expected.forbiddenPatterns, split.body));
165
+ }
166
+ results.push(...checkLengthBounds(expected, split.body));
167
+ if (expected.requiredFrontmatterKeys?.length) {
168
+ results.push(...checkFrontmatterKeys(expected.requiredFrontmatterKeys, split));
169
+ }
170
+ return results;
171
+ }
package/dist/install.js CHANGED
@@ -29,7 +29,7 @@ import { META_SKILL_NAME, usingCclawSkillMarkdown } from "./content/meta-skill.j
29
29
  import { decisionProtocolMarkdown, completionProtocolMarkdown, ethosProtocolMarkdown } from "./content/protocols.js";
30
30
  import { ARTIFACT_TEMPLATES, CURSOR_WORKFLOW_RULE_MDC, RULEBOOK_MARKDOWN, buildRulesJson } from "./content/templates.js";
31
31
  import { EVAL_BASELINES_README, EVAL_CONFIG_YAML, EVAL_CORPUS_README, EVAL_REPORTS_README, EVAL_RUBRICS_README } from "./content/eval-scaffold.js";
32
- import { TDD_WAVE_WALKTHROUGH_MARKDOWN, stageSkillFolder, stageSkillMarkdown } from "./content/skills.js";
32
+ import { TDD_BATCH_WALKTHROUGH_MARKDOWN, stageSkillFolder, stageSkillMarkdown } from "./content/skills.js";
33
33
  import { stageCommonGuidanceMarkdown } from "./content/stage-common-guidance.js";
34
34
  import { STAGE_EXAMPLES_REFERENCE_DIR, stageExamplesReferenceMarkdown } from "./content/examples.js";
35
35
  import { LANGUAGE_RULE_PACK_DIR, LANGUAGE_RULE_PACK_FILES, LANGUAGE_RULE_PACK_GENERATORS, LEGACY_LANGUAGE_RULE_PACK_FOLDERS, UTILITY_SKILL_FOLDERS, UTILITY_SKILL_MAP } from "./content/utility-skills.js";
@@ -218,11 +218,11 @@ async function writeSkills(projectRoot, config) {
218
218
  await writeFileSafe(runtimePath(projectRoot, ...referenceDir, `${stage}-examples.md`), referenceMarkdown);
219
219
  }
220
220
  }
221
- // Progressive disclosure for the TDD Wave Execution walkthrough (A.1#1).
221
+ // Progressive disclosure for the TDD Batch Execution walkthrough (A.1#1).
222
222
  // The detailed 3-task transcript lives next to stage examples so the
223
223
  // always-rendered TDD skill stays under the line-budget and the reference
224
224
  // is loaded on demand.
225
- await writeFileSafe(runtimePath(projectRoot, ...STAGE_EXAMPLES_REFERENCE_DIR.split("/"), "tdd-wave-walkthrough.md"), TDD_WAVE_WALKTHROUGH_MARKDOWN);
225
+ await writeFileSafe(runtimePath(projectRoot, ...STAGE_EXAMPLES_REFERENCE_DIR.split("/"), "tdd-batch-walkthrough.md"), TDD_BATCH_WALKTHROUGH_MARKDOWN);
226
226
  await writeFileSafe(runtimePath(projectRoot, ...STAGE_EXAMPLES_REFERENCE_DIR.split("/"), "common-guidance.md"), stageCommonGuidanceMarkdown());
227
227
  // Utility skills (not flow stages)
228
228
  await writeFileSafe(runtimePath(projectRoot, "skills", "learnings", "SKILL.md"), learnSkillMarkdown());
package/dist/policy.js CHANGED
@@ -161,7 +161,7 @@ export async function policyChecks(projectRoot, options = {}) {
161
161
  { file: runtimeFile("skills/docs/SKILL.md"), needle: "## README Guidance", name: "utility_skill:docs:readme" },
162
162
  { file: runtimeFile("skills/executing-plans/SKILL.md"), needle: "## HARD-GATE", name: "utility_skill:executing_plans:hard_gate" },
163
163
  { file: runtimeFile("skills/executing-plans/SKILL.md"), needle: "## Execution Protocol", name: "utility_skill:executing_plans:protocol" },
164
- { file: runtimeFile("skills/executing-plans/SKILL.md"), needle: "## Wave Checklist", name: "utility_skill:executing_plans:waves" },
164
+ { file: runtimeFile("skills/executing-plans/SKILL.md"), needle: "## Batch Checklist", name: "utility_skill:executing_plans:batches" },
165
165
  { file: runtimeFile("skills/verification-before-completion/SKILL.md"), needle: "## HARD-GATE", name: "utility_skill:verification_before_completion:hard_gate" },
166
166
  { file: runtimeFile("skills/verification-before-completion/SKILL.md"), needle: "## Protocol", name: "utility_skill:verification_before_completion:protocol" },
167
167
  { file: runtimeFile("skills/finishing-a-development-branch/SKILL.md"), needle: "## HARD-GATE", name: "utility_skill:finishing_branch:hard_gate" },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cclaw-cli",
3
- "version": "0.22.0",
3
+ "version": "0.23.1",
4
4
  "description": "Installer-first flow toolkit for coding agents",
5
5
  "type": "module",
6
6
  "bin": {