cclaw-cli 0.26.0 → 0.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.d.ts +4 -0
- package/dist/cli.js +78 -4
- package/dist/eval/agents/with-tools.d.ts +14 -1
- package/dist/eval/agents/with-tools.js +17 -11
- package/dist/eval/agents/workflow.d.ts +24 -0
- package/dist/eval/agents/workflow.js +133 -0
- package/dist/eval/config-loader.js +6 -2
- package/dist/eval/diff.d.ts +64 -0
- package/dist/eval/diff.js +323 -0
- package/dist/eval/report.js +35 -0
- package/dist/eval/runner.d.ts +10 -1
- package/dist/eval/runner.js +236 -19
- package/dist/eval/types.d.ts +117 -1
- package/dist/eval/types.js +21 -1
- package/dist/eval/verifiers/workflow-consistency.d.ts +21 -0
- package/dist/eval/verifiers/workflow-consistency.js +225 -0
- package/dist/eval/workflow-corpus.d.ts +7 -0
- package/dist/eval/workflow-corpus.js +207 -0
- package/package.json +1 -1
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
const DEFAULT_PLACEHOLDERS = ["TBD", "TODO", "placeholder"];
|
|
2
|
+
export function verifyWorkflowConsistency(artifacts, expected) {
|
|
3
|
+
if (!expected)
|
|
4
|
+
return [];
|
|
5
|
+
const out = [];
|
|
6
|
+
if (expected.idsFlow) {
|
|
7
|
+
for (const rule of expected.idsFlow) {
|
|
8
|
+
out.push(...checkIdsFlow(artifacts, rule));
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
if (expected.placeholderFree) {
|
|
12
|
+
out.push(...checkPlaceholderFree(artifacts, expected.placeholderFree.stages, expected.placeholderFree.phrases && expected.placeholderFree.phrases.length > 0
|
|
13
|
+
? expected.placeholderFree.phrases
|
|
14
|
+
: DEFAULT_PLACEHOLDERS));
|
|
15
|
+
}
|
|
16
|
+
if (expected.noContradictions) {
|
|
17
|
+
for (const rule of expected.noContradictions) {
|
|
18
|
+
out.push(...checkNoContradiction(artifacts, rule));
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
return out;
|
|
22
|
+
}
|
|
23
|
+
function slug(value) {
|
|
24
|
+
return value
|
|
25
|
+
.toLowerCase()
|
|
26
|
+
.replace(/[^a-z0-9]+/g, "-")
|
|
27
|
+
.replace(/^-+|-+$/g, "")
|
|
28
|
+
.slice(0, 48);
|
|
29
|
+
}
|
|
30
|
+
function missingStage(artifacts, stage, verifierId, label) {
|
|
31
|
+
if (artifacts.has(stage))
|
|
32
|
+
return undefined;
|
|
33
|
+
return {
|
|
34
|
+
kind: "consistency",
|
|
35
|
+
id: verifierId,
|
|
36
|
+
ok: false,
|
|
37
|
+
score: 0,
|
|
38
|
+
message: `Workflow artifact for stage "${stage}" is missing (${label}).`,
|
|
39
|
+
details: { stage, missing: true }
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
function extractIds(text, pattern, flags) {
|
|
43
|
+
const normalized = flags.includes("g") ? flags : `${flags}g`;
|
|
44
|
+
const regex = new RegExp(pattern, normalized);
|
|
45
|
+
const hits = new Set();
|
|
46
|
+
let match;
|
|
47
|
+
while ((match = regex.exec(text)) !== null) {
|
|
48
|
+
hits.add(match[0]);
|
|
49
|
+
if (regex.lastIndex === match.index)
|
|
50
|
+
regex.lastIndex += 1;
|
|
51
|
+
}
|
|
52
|
+
return [...hits].sort((a, b) => a.localeCompare(b));
|
|
53
|
+
}
|
|
54
|
+
function checkIdsFlow(artifacts, rule) {
|
|
55
|
+
const idTag = slug(rule.idPattern);
|
|
56
|
+
const baseId = `consistency:ids-flow:${rule.from}:${idTag}`;
|
|
57
|
+
const results = [];
|
|
58
|
+
const missingFrom = missingStage(artifacts, rule.from, `${baseId}:source-missing`, "ids-flow source");
|
|
59
|
+
if (missingFrom) {
|
|
60
|
+
results.push(missingFrom);
|
|
61
|
+
return results;
|
|
62
|
+
}
|
|
63
|
+
const source = artifacts.get(rule.from);
|
|
64
|
+
let sourceIds;
|
|
65
|
+
try {
|
|
66
|
+
sourceIds = extractIds(source, rule.idPattern, rule.idFlags ?? "g");
|
|
67
|
+
}
|
|
68
|
+
catch (err) {
|
|
69
|
+
results.push({
|
|
70
|
+
kind: "consistency",
|
|
71
|
+
id: `${baseId}:regex`,
|
|
72
|
+
ok: false,
|
|
73
|
+
score: 0,
|
|
74
|
+
message: `Invalid id regex "${rule.idPattern}": ${err instanceof Error ? err.message : String(err)}`,
|
|
75
|
+
details: { from: rule.from }
|
|
76
|
+
});
|
|
77
|
+
return results;
|
|
78
|
+
}
|
|
79
|
+
if (sourceIds.length === 0) {
|
|
80
|
+
results.push({
|
|
81
|
+
kind: "consistency",
|
|
82
|
+
id: `${baseId}:source-empty`,
|
|
83
|
+
ok: false,
|
|
84
|
+
score: 0,
|
|
85
|
+
message: `No ids matched "${rule.idPattern}" in stage "${rule.from}".`,
|
|
86
|
+
details: { from: rule.from, pattern: rule.idPattern }
|
|
87
|
+
});
|
|
88
|
+
return results;
|
|
89
|
+
}
|
|
90
|
+
for (const target of rule.to) {
|
|
91
|
+
const missingTarget = missingStage(artifacts, target, `${baseId}:${target}:target-missing`, "ids-flow target");
|
|
92
|
+
if (missingTarget) {
|
|
93
|
+
results.push(missingTarget);
|
|
94
|
+
continue;
|
|
95
|
+
}
|
|
96
|
+
const body = artifacts.get(target);
|
|
97
|
+
const missing = sourceIds.filter((id) => !body.includes(id));
|
|
98
|
+
const verifierId = `${baseId}:${target}`;
|
|
99
|
+
if (missing.length === 0) {
|
|
100
|
+
results.push({
|
|
101
|
+
kind: "consistency",
|
|
102
|
+
id: verifierId,
|
|
103
|
+
ok: true,
|
|
104
|
+
score: 1,
|
|
105
|
+
message: `All ${sourceIds.length} id(s) from "${rule.from}" appear in "${target}".`,
|
|
106
|
+
details: { from: rule.from, to: target, ids: sourceIds }
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
else {
|
|
110
|
+
results.push({
|
|
111
|
+
kind: "consistency",
|
|
112
|
+
id: verifierId,
|
|
113
|
+
ok: false,
|
|
114
|
+
score: 0,
|
|
115
|
+
message: `Missing in "${target}": ${missing.slice(0, 5).join(", ")}` +
|
|
116
|
+
(missing.length > 5 ? ` (+${missing.length - 5} more)` : ""),
|
|
117
|
+
details: {
|
|
118
|
+
from: rule.from,
|
|
119
|
+
to: target,
|
|
120
|
+
ids: sourceIds,
|
|
121
|
+
missing
|
|
122
|
+
}
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
return results;
|
|
127
|
+
}
|
|
128
|
+
function checkPlaceholderFree(artifacts, stages, phrases) {
|
|
129
|
+
const results = [];
|
|
130
|
+
for (const stage of stages) {
|
|
131
|
+
const verifierId = `consistency:placeholder-free:${stage}`;
|
|
132
|
+
const missing = missingStage(artifacts, stage, verifierId, "placeholder-free");
|
|
133
|
+
if (missing) {
|
|
134
|
+
results.push(missing);
|
|
135
|
+
continue;
|
|
136
|
+
}
|
|
137
|
+
const body = artifacts.get(stage);
|
|
138
|
+
const lower = body.toLowerCase();
|
|
139
|
+
const hits = phrases.filter((p) => lower.includes(p.toLowerCase()));
|
|
140
|
+
if (hits.length === 0) {
|
|
141
|
+
results.push({
|
|
142
|
+
kind: "consistency",
|
|
143
|
+
id: verifierId,
|
|
144
|
+
ok: true,
|
|
145
|
+
score: 1,
|
|
146
|
+
message: `No placeholder phrases found in "${stage}".`,
|
|
147
|
+
details: { stage, phrases }
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
else {
|
|
151
|
+
results.push({
|
|
152
|
+
kind: "consistency",
|
|
153
|
+
id: verifierId,
|
|
154
|
+
ok: false,
|
|
155
|
+
score: 0,
|
|
156
|
+
message: `Placeholder phrases in "${stage}": ${hits.join(", ")}.`,
|
|
157
|
+
details: { stage, phrases, hits }
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
return results;
|
|
162
|
+
}
|
|
163
|
+
function checkNoContradiction(artifacts, rule) {
|
|
164
|
+
const tag = `${slug(rule.must)}-vs-${slug(rule.forbid)}`;
|
|
165
|
+
const baseId = `consistency:no-contradiction:${rule.stage}:${tag}`;
|
|
166
|
+
const results = [];
|
|
167
|
+
const missingAnchor = missingStage(artifacts, rule.stage, `${baseId}:anchor-missing`, "no-contradiction anchor");
|
|
168
|
+
if (missingAnchor) {
|
|
169
|
+
results.push(missingAnchor);
|
|
170
|
+
return results;
|
|
171
|
+
}
|
|
172
|
+
const anchorText = artifacts.get(rule.stage);
|
|
173
|
+
if (!anchorText.toLowerCase().includes(rule.must.toLowerCase())) {
|
|
174
|
+
// The declaring stage doesn't actually assert `must`, so the rule is vacuously satisfied.
|
|
175
|
+
results.push({
|
|
176
|
+
kind: "consistency",
|
|
177
|
+
id: `${baseId}:anchor-inactive`,
|
|
178
|
+
ok: true,
|
|
179
|
+
score: 1,
|
|
180
|
+
message: `Anchor "${rule.must}" not present in "${rule.stage}"; contradiction check skipped.`,
|
|
181
|
+
details: { stage: rule.stage, anchor: rule.must, skipped: true }
|
|
182
|
+
});
|
|
183
|
+
return results;
|
|
184
|
+
}
|
|
185
|
+
for (const target of rule.stages) {
|
|
186
|
+
const verifierId = `${baseId}:${target}`;
|
|
187
|
+
const missingTarget = missingStage(artifacts, target, `${verifierId}:target-missing`, "no-contradiction target");
|
|
188
|
+
if (missingTarget) {
|
|
189
|
+
results.push(missingTarget);
|
|
190
|
+
continue;
|
|
191
|
+
}
|
|
192
|
+
const body = artifacts.get(target);
|
|
193
|
+
if (body.toLowerCase().includes(rule.forbid.toLowerCase())) {
|
|
194
|
+
results.push({
|
|
195
|
+
kind: "consistency",
|
|
196
|
+
id: verifierId,
|
|
197
|
+
ok: false,
|
|
198
|
+
score: 0,
|
|
199
|
+
message: `"${rule.stage}" asserts "${rule.must}" but "${target}" contains "${rule.forbid}".`,
|
|
200
|
+
details: {
|
|
201
|
+
stage: rule.stage,
|
|
202
|
+
anchor: rule.must,
|
|
203
|
+
forbid: rule.forbid,
|
|
204
|
+
target
|
|
205
|
+
}
|
|
206
|
+
});
|
|
207
|
+
}
|
|
208
|
+
else {
|
|
209
|
+
results.push({
|
|
210
|
+
kind: "consistency",
|
|
211
|
+
id: verifierId,
|
|
212
|
+
ok: true,
|
|
213
|
+
score: 1,
|
|
214
|
+
message: `"${target}" does not contradict "${rule.stage}" on "${rule.must}".`,
|
|
215
|
+
details: {
|
|
216
|
+
stage: rule.stage,
|
|
217
|
+
anchor: rule.must,
|
|
218
|
+
forbid: rule.forbid,
|
|
219
|
+
target
|
|
220
|
+
}
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
return results;
|
|
225
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { WorkflowCase } from "./types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Load every Tier C workflow case under
|
|
4
|
+
* `.cclaw/evals/corpus/workflows/*.yaml`. Returns an empty array when the
|
|
5
|
+
* directory is missing — a fresh `cclaw init` has no Tier C corpus yet.
|
|
6
|
+
*/
|
|
7
|
+
export declare function loadWorkflowCorpus(projectRoot: string): Promise<WorkflowCase[]>;
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Workflow corpus loader for Tier C.
|
|
3
|
+
*
|
|
4
|
+
* Tier C cases live under `.cclaw/evals/corpus/workflows/<id>.yaml` and
|
|
5
|
+
* describe a multi-stage run that chains the with-tools agent across
|
|
6
|
+
* `brainstorm → scope → design → spec → plan`. Unlike single-stage
|
|
7
|
+
* cases (which are keyed by stage folder), workflow cases ship as a
|
|
8
|
+
* single YAML that embeds each stage's prompt + expectations.
|
|
9
|
+
*
|
|
10
|
+
* The loader is intentionally separate from `loadCorpus` so the
|
|
11
|
+
* structural / rules CI paths never walk the workflow directory — those
|
|
12
|
+
* paths are single-stage only.
|
|
13
|
+
*/
|
|
14
|
+
import fs from "node:fs/promises";
|
|
15
|
+
import path from "node:path";
|
|
16
|
+
import { parse } from "yaml";
|
|
17
|
+
import { EVALS_ROOT } from "../constants.js";
|
|
18
|
+
import { exists } from "../fs-utils.js";
|
|
19
|
+
import { WORKFLOW_STAGES } from "./types.js";
|
|
20
|
+
const WORKFLOW_STAGE_SET = new Set(WORKFLOW_STAGES);
|
|
21
|
+
function workflowCorpusError(filePath, reason) {
|
|
22
|
+
return new Error(`Invalid workflow case at ${filePath}: ${reason}\n` +
|
|
23
|
+
`Supported workflow stages: ${WORKFLOW_STAGES.join(", ")}`);
|
|
24
|
+
}
|
|
25
|
+
function isRecord(value) {
|
|
26
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
27
|
+
}
|
|
28
|
+
function readStringArray(filePath, context, value) {
|
|
29
|
+
if (value === undefined)
|
|
30
|
+
return undefined;
|
|
31
|
+
if (!Array.isArray(value) || value.some((item) => typeof item !== "string")) {
|
|
32
|
+
throw workflowCorpusError(filePath, `"${context}" must be an array of strings`);
|
|
33
|
+
}
|
|
34
|
+
return value;
|
|
35
|
+
}
|
|
36
|
+
function parseStageName(filePath, context, value) {
|
|
37
|
+
if (typeof value !== "string" || !WORKFLOW_STAGE_SET.has(value)) {
|
|
38
|
+
throw workflowCorpusError(filePath, `"${context}" must be one of: ${WORKFLOW_STAGES.join(", ")}`);
|
|
39
|
+
}
|
|
40
|
+
return value;
|
|
41
|
+
}
|
|
42
|
+
function parseStageArray(filePath, context, value) {
|
|
43
|
+
if (!Array.isArray(value) || value.length === 0) {
|
|
44
|
+
throw workflowCorpusError(filePath, `"${context}" must be a non-empty array of stage names`);
|
|
45
|
+
}
|
|
46
|
+
return value.map((entry, index) => parseStageName(filePath, `${context}[${index}]`, entry));
|
|
47
|
+
}
|
|
48
|
+
function parseStageStep(filePath, index, raw) {
|
|
49
|
+
if (!isRecord(raw)) {
|
|
50
|
+
throw workflowCorpusError(filePath, `stages[${index}] must be a mapping`);
|
|
51
|
+
}
|
|
52
|
+
const name = parseStageName(filePath, `stages[${index}].name`, raw.name);
|
|
53
|
+
const inputPrompt = raw.input_prompt ?? raw.inputPrompt;
|
|
54
|
+
if (typeof inputPrompt !== "string" || inputPrompt.trim().length === 0) {
|
|
55
|
+
throw workflowCorpusError(filePath, `stages[${index}].input_prompt must be a non-empty string`);
|
|
56
|
+
}
|
|
57
|
+
const step = { name, inputPrompt: inputPrompt.trim() };
|
|
58
|
+
if (raw.rubric !== undefined) {
|
|
59
|
+
if (typeof raw.rubric !== "string" || raw.rubric.trim().length === 0) {
|
|
60
|
+
throw workflowCorpusError(filePath, `stages[${index}].rubric must be a non-empty string`);
|
|
61
|
+
}
|
|
62
|
+
step.rubric = raw.rubric.trim();
|
|
63
|
+
}
|
|
64
|
+
const requiredChecks = readStringArray(filePath, `stages[${index}].required_checks`, raw.required_checks ?? raw.requiredChecks);
|
|
65
|
+
if (requiredChecks)
|
|
66
|
+
step.requiredChecks = requiredChecks;
|
|
67
|
+
const minScoresRaw = raw.minimum_scores ?? raw.minimumScores;
|
|
68
|
+
if (minScoresRaw !== undefined) {
|
|
69
|
+
if (!isRecord(minScoresRaw)) {
|
|
70
|
+
throw workflowCorpusError(filePath, `stages[${index}].minimum_scores must be a mapping of check id → number`);
|
|
71
|
+
}
|
|
72
|
+
const minimumScores = {};
|
|
73
|
+
for (const [key, val] of Object.entries(minScoresRaw)) {
|
|
74
|
+
if (typeof val !== "number" || !Number.isFinite(val) || val < 1 || val > 5) {
|
|
75
|
+
throw workflowCorpusError(filePath, `stages[${index}].minimum_scores.${key} must be a number in [1,5]`);
|
|
76
|
+
}
|
|
77
|
+
minimumScores[key] = val;
|
|
78
|
+
}
|
|
79
|
+
step.minimumScores = minimumScores;
|
|
80
|
+
}
|
|
81
|
+
return step;
|
|
82
|
+
}
|
|
83
|
+
function parseConsistency(filePath, raw) {
|
|
84
|
+
if (raw === undefined)
|
|
85
|
+
return undefined;
|
|
86
|
+
if (!isRecord(raw)) {
|
|
87
|
+
throw workflowCorpusError(filePath, `"consistency" must be a mapping`);
|
|
88
|
+
}
|
|
89
|
+
const out = {};
|
|
90
|
+
const idsFlowRaw = raw.ids_flow ?? raw.idsFlow;
|
|
91
|
+
if (idsFlowRaw !== undefined) {
|
|
92
|
+
if (!Array.isArray(idsFlowRaw)) {
|
|
93
|
+
throw workflowCorpusError(filePath, `"consistency.ids_flow" must be an array`);
|
|
94
|
+
}
|
|
95
|
+
out.idsFlow = idsFlowRaw.map((entry, index) => {
|
|
96
|
+
if (!isRecord(entry)) {
|
|
97
|
+
throw workflowCorpusError(filePath, `consistency.ids_flow[${index}] must be a mapping`);
|
|
98
|
+
}
|
|
99
|
+
const idPattern = entry.id_pattern ?? entry.idPattern;
|
|
100
|
+
if (typeof idPattern !== "string" || idPattern.length === 0) {
|
|
101
|
+
throw workflowCorpusError(filePath, `consistency.ids_flow[${index}].id_pattern must be a non-empty regex source`);
|
|
102
|
+
}
|
|
103
|
+
const idFlags = entry.id_flags ?? entry.idFlags;
|
|
104
|
+
if (idFlags !== undefined && typeof idFlags !== "string") {
|
|
105
|
+
throw workflowCorpusError(filePath, `consistency.ids_flow[${index}].id_flags must be a string`);
|
|
106
|
+
}
|
|
107
|
+
const from = parseStageName(filePath, `consistency.ids_flow[${index}].from`, entry.from);
|
|
108
|
+
const to = parseStageArray(filePath, `consistency.ids_flow[${index}].to`, entry.to);
|
|
109
|
+
const result = { idPattern, from, to };
|
|
110
|
+
if (idFlags !== undefined)
|
|
111
|
+
result.idFlags = idFlags;
|
|
112
|
+
return result;
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
const placeholderRaw = raw.placeholder_free ?? raw.placeholderFree;
|
|
116
|
+
if (placeholderRaw !== undefined) {
|
|
117
|
+
if (!isRecord(placeholderRaw)) {
|
|
118
|
+
throw workflowCorpusError(filePath, `"consistency.placeholder_free" must be a mapping`);
|
|
119
|
+
}
|
|
120
|
+
const stages = parseStageArray(filePath, "consistency.placeholder_free.stages", placeholderRaw.stages);
|
|
121
|
+
const phrases = readStringArray(filePath, "consistency.placeholder_free.phrases", placeholderRaw.phrases);
|
|
122
|
+
const block = { stages };
|
|
123
|
+
if (phrases)
|
|
124
|
+
block.phrases = phrases;
|
|
125
|
+
out.placeholderFree = block;
|
|
126
|
+
}
|
|
127
|
+
const noContradictionsRaw = raw.no_contradictions ?? raw.noContradictions;
|
|
128
|
+
if (noContradictionsRaw !== undefined) {
|
|
129
|
+
if (!Array.isArray(noContradictionsRaw)) {
|
|
130
|
+
throw workflowCorpusError(filePath, `"consistency.no_contradictions" must be an array`);
|
|
131
|
+
}
|
|
132
|
+
out.noContradictions = noContradictionsRaw.map((entry, index) => {
|
|
133
|
+
if (!isRecord(entry)) {
|
|
134
|
+
throw workflowCorpusError(filePath, `consistency.no_contradictions[${index}] must be a mapping`);
|
|
135
|
+
}
|
|
136
|
+
const stage = parseStageName(filePath, `consistency.no_contradictions[${index}].stage`, entry.stage);
|
|
137
|
+
if (typeof entry.must !== "string" || entry.must.length === 0) {
|
|
138
|
+
throw workflowCorpusError(filePath, `consistency.no_contradictions[${index}].must must be a non-empty string`);
|
|
139
|
+
}
|
|
140
|
+
if (typeof entry.forbid !== "string" || entry.forbid.length === 0) {
|
|
141
|
+
throw workflowCorpusError(filePath, `consistency.no_contradictions[${index}].forbid must be a non-empty string`);
|
|
142
|
+
}
|
|
143
|
+
const stages = parseStageArray(filePath, `consistency.no_contradictions[${index}].stages`, entry.stages);
|
|
144
|
+
return {
|
|
145
|
+
stage,
|
|
146
|
+
must: entry.must,
|
|
147
|
+
forbid: entry.forbid,
|
|
148
|
+
stages
|
|
149
|
+
};
|
|
150
|
+
});
|
|
151
|
+
}
|
|
152
|
+
return Object.keys(out).length === 0 ? undefined : out;
|
|
153
|
+
}
|
|
154
|
+
function validateWorkflowCase(filePath, raw) {
|
|
155
|
+
if (!isRecord(raw)) {
|
|
156
|
+
throw workflowCorpusError(filePath, `top-level value must be a mapping`);
|
|
157
|
+
}
|
|
158
|
+
const id = raw.id;
|
|
159
|
+
if (typeof id !== "string" || id.trim().length === 0) {
|
|
160
|
+
throw workflowCorpusError(filePath, `"id" must be a non-empty string`);
|
|
161
|
+
}
|
|
162
|
+
const stagesRaw = raw.stages;
|
|
163
|
+
if (!Array.isArray(stagesRaw) || stagesRaw.length === 0) {
|
|
164
|
+
throw workflowCorpusError(filePath, `"stages" must be a non-empty array`);
|
|
165
|
+
}
|
|
166
|
+
const stages = stagesRaw.map((entry, index) => parseStageStep(filePath, index, entry));
|
|
167
|
+
const contextFiles = readStringArray(filePath, "context_files", raw.context_files ?? raw.contextFiles);
|
|
168
|
+
const consistency = parseConsistency(filePath, raw.consistency);
|
|
169
|
+
const description = typeof raw.description === "string" ? raw.description.trim() : undefined;
|
|
170
|
+
const out = { id: id.trim(), stages };
|
|
171
|
+
if (description)
|
|
172
|
+
out.description = description;
|
|
173
|
+
if (contextFiles)
|
|
174
|
+
out.contextFiles = contextFiles;
|
|
175
|
+
if (consistency)
|
|
176
|
+
out.consistency = consistency;
|
|
177
|
+
return out;
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Load every Tier C workflow case under
|
|
181
|
+
* `.cclaw/evals/corpus/workflows/*.yaml`. Returns an empty array when the
|
|
182
|
+
* directory is missing — a fresh `cclaw init` has no Tier C corpus yet.
|
|
183
|
+
*/
|
|
184
|
+
export async function loadWorkflowCorpus(projectRoot) {
|
|
185
|
+
const dir = path.join(projectRoot, EVALS_ROOT, "corpus", "workflows");
|
|
186
|
+
if (!(await exists(dir)))
|
|
187
|
+
return [];
|
|
188
|
+
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
189
|
+
const out = [];
|
|
190
|
+
for (const entry of entries) {
|
|
191
|
+
if (!entry.isFile())
|
|
192
|
+
continue;
|
|
193
|
+
if (!entry.name.endsWith(".yaml") && !entry.name.endsWith(".yml"))
|
|
194
|
+
continue;
|
|
195
|
+
const filePath = path.join(dir, entry.name);
|
|
196
|
+
let parsed;
|
|
197
|
+
try {
|
|
198
|
+
parsed = parse(await fs.readFile(filePath, "utf8"));
|
|
199
|
+
}
|
|
200
|
+
catch (err) {
|
|
201
|
+
throw workflowCorpusError(filePath, err instanceof Error ? err.message : String(err));
|
|
202
|
+
}
|
|
203
|
+
out.push(validateWorkflowCase(filePath, parsed));
|
|
204
|
+
}
|
|
205
|
+
out.sort((a, b) => a.id.localeCompare(b.id));
|
|
206
|
+
return out;
|
|
207
|
+
}
|