cclaw-cli 0.48.35 → 0.51.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +54 -82
- package/dist/artifact-linter.d.ts +4 -0
- package/dist/artifact-linter.js +24 -3
- package/dist/cli.d.ts +1 -19
- package/dist/cli.js +49 -495
- package/dist/constants.d.ts +2 -13
- package/dist/constants.js +1 -46
- package/dist/content/closeout-guidance.d.ts +14 -0
- package/dist/content/closeout-guidance.js +42 -0
- package/dist/content/core-agents.js +51 -9
- package/dist/content/decision-protocol.d.ts +12 -0
- package/dist/content/decision-protocol.js +20 -0
- package/dist/content/diff-command.d.ts +1 -2
- package/dist/content/diff-command.js +8 -94
- package/dist/content/examples.d.ts +4 -10
- package/dist/content/examples.js +10 -20
- package/dist/content/hook-events.js +2 -2
- package/dist/content/hook-inline-snippets.d.ts +5 -2
- package/dist/content/hook-inline-snippets.js +33 -1
- package/dist/content/hook-manifest.d.ts +3 -4
- package/dist/content/hook-manifest.js +11 -12
- package/dist/content/hooks.js +2 -0
- package/dist/content/ideate-command.d.ts +2 -0
- package/dist/content/ideate-command.js +31 -25
- package/dist/content/iron-laws.d.ts +5 -5
- package/dist/content/iron-laws.js +5 -5
- package/dist/content/learnings.d.ts +3 -4
- package/dist/content/learnings.js +24 -50
- package/dist/content/meta-skill.js +31 -24
- package/dist/content/next-command.js +38 -38
- package/dist/content/node-hooks.js +17 -343
- package/dist/content/opencode-plugin.js +2 -100
- package/dist/content/research-playbooks.js +14 -14
- package/dist/content/review-loop.d.ts +2 -0
- package/dist/content/review-loop.js +8 -0
- package/dist/content/session-hooks.js +14 -46
- package/dist/content/skills.d.ts +0 -5
- package/dist/content/skills.js +53 -128
- package/dist/content/stage-common-guidance.d.ts +0 -1
- package/dist/content/stage-common-guidance.js +15 -14
- package/dist/content/stage-schema.d.ts +26 -1
- package/dist/content/stage-schema.js +121 -40
- package/dist/content/stages/_lint-metadata/index.js +9 -15
- package/dist/content/stages/brainstorm.js +22 -43
- package/dist/content/stages/design.js +37 -57
- package/dist/content/stages/plan.js +22 -13
- package/dist/content/stages/review.js +24 -27
- package/dist/content/stages/scope.js +34 -46
- package/dist/content/stages/ship.js +7 -4
- package/dist/content/stages/spec.js +20 -9
- package/dist/content/stages/tdd.js +64 -44
- package/dist/content/start-command.js +10 -12
- package/dist/content/status-command.d.ts +2 -7
- package/dist/content/status-command.js +19 -146
- package/dist/content/subagents.d.ts +0 -5
- package/dist/content/subagents.js +47 -28
- package/dist/content/templates.d.ts +1 -1
- package/dist/content/templates.js +126 -135
- package/dist/content/track-render-context.d.ts +17 -0
- package/dist/content/track-render-context.js +44 -0
- package/dist/content/tree-command.d.ts +1 -2
- package/dist/content/tree-command.js +4 -87
- package/dist/content/utility-skills.d.ts +2 -29
- package/dist/content/utility-skills.js +2 -1533
- package/dist/content/view-command.js +29 -11
- package/dist/delegation.d.ts +1 -1
- package/dist/delegation.js +5 -15
- package/dist/doctor-registry.js +20 -21
- package/dist/doctor.js +88 -408
- package/dist/flow-state.d.ts +3 -0
- package/dist/flow-state.js +2 -0
- package/dist/harness-adapters.d.ts +1 -1
- package/dist/harness-adapters.js +48 -57
- package/dist/install.js +128 -520
- package/dist/internal/advance-stage.js +3 -9
- package/dist/internal/compound-readiness.d.ts +1 -1
- package/dist/internal/compound-readiness.js +1 -1
- package/dist/internal/tdd-loop-status.d.ts +1 -1
- package/dist/internal/tdd-loop-status.js +1 -1
- package/dist/knowledge-store.d.ts +16 -10
- package/dist/knowledge-store.js +51 -15
- package/dist/policy.js +16 -109
- package/dist/run-archive.d.ts +4 -6
- package/dist/run-archive.js +15 -20
- package/dist/run-persistence.d.ts +2 -2
- package/dist/run-persistence.js +3 -9
- package/package.json +1 -2
- package/dist/content/archive-command.d.ts +0 -2
- package/dist/content/archive-command.js +0 -124
- package/dist/content/compound-command.d.ts +0 -5
- package/dist/content/compound-command.js +0 -193
- package/dist/content/contexts.d.ts +0 -9
- package/dist/content/contexts.js +0 -65
- package/dist/content/contracts.d.ts +0 -2
- package/dist/content/contracts.js +0 -51
- package/dist/content/doctor-references.d.ts +0 -2
- package/dist/content/doctor-references.js +0 -150
- package/dist/content/eval-scaffold.d.ts +0 -15
- package/dist/content/eval-scaffold.js +0 -370
- package/dist/content/feature-command.d.ts +0 -2
- package/dist/content/feature-command.js +0 -123
- package/dist/content/flow-map.d.ts +0 -23
- package/dist/content/flow-map.js +0 -134
- package/dist/content/harness-doc.d.ts +0 -2
- package/dist/content/harness-doc.js +0 -202
- package/dist/content/harness-playbooks.d.ts +0 -24
- package/dist/content/harness-playbooks.js +0 -393
- package/dist/content/harness-tool-refs.d.ts +0 -20
- package/dist/content/harness-tool-refs.js +0 -268
- package/dist/content/ops-command.d.ts +0 -2
- package/dist/content/ops-command.js +0 -71
- package/dist/content/protocols.d.ts +0 -7
- package/dist/content/protocols.js +0 -215
- package/dist/content/retro-command.d.ts +0 -2
- package/dist/content/retro-command.js +0 -165
- package/dist/content/rewind-command.d.ts +0 -2
- package/dist/content/rewind-command.js +0 -106
- package/dist/content/tdd-log-command.d.ts +0 -2
- package/dist/content/tdd-log-command.js +0 -85
- package/dist/eval/agents/single-shot.d.ts +0 -27
- package/dist/eval/agents/single-shot.js +0 -79
- package/dist/eval/agents/with-tools.d.ts +0 -44
- package/dist/eval/agents/with-tools.js +0 -261
- package/dist/eval/agents/workflow.d.ts +0 -31
- package/dist/eval/agents/workflow.js +0 -155
- package/dist/eval/baseline.d.ts +0 -38
- package/dist/eval/baseline.js +0 -282
- package/dist/eval/config-loader.d.ts +0 -14
- package/dist/eval/config-loader.js +0 -395
- package/dist/eval/corpus.d.ts +0 -30
- package/dist/eval/corpus.js +0 -330
- package/dist/eval/cost-guard.d.ts +0 -102
- package/dist/eval/cost-guard.js +0 -190
- package/dist/eval/diff.d.ts +0 -64
- package/dist/eval/diff.js +0 -323
- package/dist/eval/llm-client.d.ts +0 -176
- package/dist/eval/llm-client.js +0 -267
- package/dist/eval/mode.d.ts +0 -28
- package/dist/eval/mode.js +0 -61
- package/dist/eval/progress.d.ts +0 -83
- package/dist/eval/progress.js +0 -59
- package/dist/eval/report.d.ts +0 -11
- package/dist/eval/report.js +0 -181
- package/dist/eval/rubric-loader.d.ts +0 -20
- package/dist/eval/rubric-loader.js +0 -143
- package/dist/eval/runner.d.ts +0 -81
- package/dist/eval/runner.js +0 -746
- package/dist/eval/runs.d.ts +0 -41
- package/dist/eval/runs.js +0 -114
- package/dist/eval/sandbox.d.ts +0 -38
- package/dist/eval/sandbox.js +0 -137
- package/dist/eval/tools/glob.d.ts +0 -2
- package/dist/eval/tools/glob.js +0 -163
- package/dist/eval/tools/grep.d.ts +0 -2
- package/dist/eval/tools/grep.js +0 -152
- package/dist/eval/tools/index.d.ts +0 -7
- package/dist/eval/tools/index.js +0 -35
- package/dist/eval/tools/read.d.ts +0 -2
- package/dist/eval/tools/read.js +0 -122
- package/dist/eval/tools/types.d.ts +0 -49
- package/dist/eval/tools/types.js +0 -41
- package/dist/eval/tools/write.d.ts +0 -2
- package/dist/eval/tools/write.js +0 -92
- package/dist/eval/types.d.ts +0 -561
- package/dist/eval/types.js +0 -47
- package/dist/eval/verifiers/judge.d.ts +0 -40
- package/dist/eval/verifiers/judge.js +0 -256
- package/dist/eval/verifiers/rules.d.ts +0 -24
- package/dist/eval/verifiers/rules.js +0 -218
- package/dist/eval/verifiers/structural.d.ts +0 -14
- package/dist/eval/verifiers/structural.js +0 -171
- package/dist/eval/verifiers/traceability.d.ts +0 -23
- package/dist/eval/verifiers/traceability.js +0 -84
- package/dist/eval/verifiers/workflow-consistency.d.ts +0 -21
- package/dist/eval/verifiers/workflow-consistency.js +0 -225
- package/dist/eval/workflow-corpus.d.ts +0 -7
- package/dist/eval/workflow-corpus.js +0 -207
- package/dist/feature-system.d.ts +0 -42
- package/dist/feature-system.js +0 -432
- package/dist/internal/knowledge-digest.d.ts +0 -7
- package/dist/internal/knowledge-digest.js +0 -93
package/dist/eval/llm-client.js
DELETED
|
@@ -1,267 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* LLM client for the cclaw eval subsystem.
|
|
3
|
-
*
|
|
4
|
-
* Thin adapter over the `openai` SDK pointed at any OpenAI-compatible
|
|
5
|
-
* `baseURL` (z.ai, OpenAI, vLLM, Ollama+openai-proxy, ...). The surface is
|
|
6
|
-
* deliberately narrow:
|
|
7
|
-
*
|
|
8
|
-
* - `chat()` — one request/response round-trip with timeout, bounded
|
|
9
|
-
* retries on transient errors, and a structured error hierarchy so
|
|
10
|
-
* callers can react policy-style (cost-guard, judge, agent-under-test).
|
|
11
|
-
* - `ChatRequest` / `ChatResponse` — wire format decoupled from the
|
|
12
|
-
* OpenAI types so swapping vendors stays a one-file change.
|
|
13
|
-
*
|
|
14
|
-
* Factories stay side-effect-free: no network calls are made until `chat()`
|
|
15
|
-
* is invoked, so CLI help and dry-run paths never need an API key.
|
|
16
|
-
*/
|
|
17
|
-
import OpenAI from "openai";
|
|
18
|
-
/** Base class so callers can `catch (err) { if (err instanceof EvalLlmError) ... }`. */
|
|
19
|
-
export class EvalLlmError extends Error {
|
|
20
|
-
retryable;
|
|
21
|
-
status;
|
|
22
|
-
constructor(message, opts) {
|
|
23
|
-
super(message);
|
|
24
|
-
this.name = "EvalLlmError";
|
|
25
|
-
this.retryable = opts.retryable;
|
|
26
|
-
if (opts.status !== undefined)
|
|
27
|
-
this.status = opts.status;
|
|
28
|
-
if (opts.cause !== undefined)
|
|
29
|
-
this.cause = opts.cause;
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
export class EvalLlmAuthError extends EvalLlmError {
|
|
33
|
-
constructor(cause) {
|
|
34
|
-
super("LLM request rejected (auth). Check CCLAW_EVAL_API_KEY and provider permissions.", {
|
|
35
|
-
retryable: false,
|
|
36
|
-
status: 401,
|
|
37
|
-
cause
|
|
38
|
-
});
|
|
39
|
-
this.name = "EvalLlmAuthError";
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
export class EvalLlmConfigError extends EvalLlmError {
|
|
43
|
-
constructor(message, cause) {
|
|
44
|
-
super(message, { retryable: false, cause });
|
|
45
|
-
this.name = "EvalLlmConfigError";
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
export class EvalLlmTimeoutError extends EvalLlmError {
|
|
49
|
-
constructor(timeoutMs) {
|
|
50
|
-
super(`LLM request timed out after ${timeoutMs}ms.`, { retryable: true });
|
|
51
|
-
this.name = "EvalLlmTimeoutError";
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
export class EvalLlmRateLimitedError extends EvalLlmError {
|
|
55
|
-
constructor(cause) {
|
|
56
|
-
super("LLM rate limit hit. Retrying with backoff.", {
|
|
57
|
-
retryable: true,
|
|
58
|
-
status: 429,
|
|
59
|
-
cause
|
|
60
|
-
});
|
|
61
|
-
this.name = "EvalLlmRateLimitedError";
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
export class EvalLlmTransportError extends EvalLlmError {
|
|
65
|
-
constructor(cause, status) {
|
|
66
|
-
super("LLM transport error.", { retryable: true, status, cause });
|
|
67
|
-
this.name = "EvalLlmTransportError";
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
export class EvalLlmInvalidResponseError extends EvalLlmError {
|
|
71
|
-
constructor(message, details) {
|
|
72
|
-
super(message, { retryable: false });
|
|
73
|
-
this.name = "EvalLlmInvalidResponseError";
|
|
74
|
-
if (details)
|
|
75
|
-
this.details = details;
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
export class EvalLlmNotConfiguredError extends EvalLlmError {
|
|
79
|
-
constructor() {
|
|
80
|
-
super(`LLM client not configured. Set CCLAW_EVAL_API_KEY (and optionally ` +
|
|
81
|
-
`CCLAW_EVAL_BASE_URL / CCLAW_EVAL_MODEL) or run with --schema-only / --rules.`, { retryable: false });
|
|
82
|
-
this.name = "EvalLlmNotConfiguredError";
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
/**
|
|
86
|
-
* Deprecated shim preserved so older wiring keeps compiling. Prefer
|
|
87
|
-
* `EvalLlmNotConfiguredError` for the "caller forgot to provide an API
|
|
88
|
-
* key" case.
|
|
89
|
-
*/
|
|
90
|
-
export class EvalLlmNotWiredError extends EvalLlmNotConfiguredError {
|
|
91
|
-
}
|
|
92
|
-
export const DEFAULT_RETRY_POLICY = {
|
|
93
|
-
maxRetries: 2,
|
|
94
|
-
initialBackoffMs: 500,
|
|
95
|
-
maxBackoffMs: 8_000
|
|
96
|
-
};
|
|
97
|
-
function isAbortError(err) {
|
|
98
|
-
if (err === null || typeof err !== "object")
|
|
99
|
-
return false;
|
|
100
|
-
const name = err.name;
|
|
101
|
-
const code = err.code;
|
|
102
|
-
return (name === "AbortError" || code === "ABORT_ERR" || code === "ERR_CANCELED");
|
|
103
|
-
}
|
|
104
|
-
function statusFromError(err) {
|
|
105
|
-
if (err === null || typeof err !== "object")
|
|
106
|
-
return undefined;
|
|
107
|
-
const status = err.status;
|
|
108
|
-
return typeof status === "number" ? status : undefined;
|
|
109
|
-
}
|
|
110
|
-
function normalizeError(err, timeoutMs) {
|
|
111
|
-
if (err instanceof EvalLlmError)
|
|
112
|
-
return err;
|
|
113
|
-
if (isAbortError(err))
|
|
114
|
-
return new EvalLlmTimeoutError(timeoutMs);
|
|
115
|
-
const status = statusFromError(err);
|
|
116
|
-
if (status === 401 || status === 403)
|
|
117
|
-
return new EvalLlmAuthError(err);
|
|
118
|
-
if (status === 429)
|
|
119
|
-
return new EvalLlmRateLimitedError(err);
|
|
120
|
-
if (status !== undefined && status >= 400 && status < 500) {
|
|
121
|
-
return new EvalLlmError(`LLM request rejected (HTTP ${status}).`, {
|
|
122
|
-
retryable: false,
|
|
123
|
-
status,
|
|
124
|
-
cause: err
|
|
125
|
-
});
|
|
126
|
-
}
|
|
127
|
-
return new EvalLlmTransportError(err, status);
|
|
128
|
-
}
|
|
129
|
-
function normalizeFinishReason(raw) {
|
|
130
|
-
switch (raw) {
|
|
131
|
-
case "length":
|
|
132
|
-
return "length";
|
|
133
|
-
case "tool_calls":
|
|
134
|
-
case "function_call":
|
|
135
|
-
return "tool_calls";
|
|
136
|
-
case "content_filter":
|
|
137
|
-
return "content_filter";
|
|
138
|
-
case "stop":
|
|
139
|
-
case null:
|
|
140
|
-
case undefined:
|
|
141
|
-
default:
|
|
142
|
-
return "stop";
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
|
-
function buildBody(request) {
|
|
146
|
-
const body = {
|
|
147
|
-
model: request.model,
|
|
148
|
-
messages: request.messages.map((m) => ({
|
|
149
|
-
role: m.role,
|
|
150
|
-
content: m.content,
|
|
151
|
-
...(m.name !== undefined ? { name: m.name } : {}),
|
|
152
|
-
...(m.toolCallId !== undefined ? { tool_call_id: m.toolCallId } : {}),
|
|
153
|
-
...(m.toolCalls && m.toolCalls.length > 0
|
|
154
|
-
? {
|
|
155
|
-
tool_calls: m.toolCalls.map((call) => ({
|
|
156
|
-
id: call.id,
|
|
157
|
-
type: "function",
|
|
158
|
-
function: { name: call.name, arguments: call.arguments }
|
|
159
|
-
}))
|
|
160
|
-
}
|
|
161
|
-
: {})
|
|
162
|
-
}))
|
|
163
|
-
};
|
|
164
|
-
if (request.maxTokens !== undefined)
|
|
165
|
-
body.max_tokens = request.maxTokens;
|
|
166
|
-
if (request.temperature !== undefined)
|
|
167
|
-
body.temperature = request.temperature;
|
|
168
|
-
if (request.seed !== undefined)
|
|
169
|
-
body.seed = request.seed;
|
|
170
|
-
if (request.tools !== undefined)
|
|
171
|
-
body.tools = request.tools;
|
|
172
|
-
if (request.toolChoice !== undefined)
|
|
173
|
-
body.tool_choice = request.toolChoice;
|
|
174
|
-
if (request.responseFormatJson === true) {
|
|
175
|
-
body.response_format = { type: "json_object" };
|
|
176
|
-
}
|
|
177
|
-
return body;
|
|
178
|
-
}
|
|
179
|
-
function defaultSleep(ms) {
|
|
180
|
-
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
181
|
-
}
|
|
182
|
-
function backoffDelay(attempt, policy) {
|
|
183
|
-
const raw = policy.initialBackoffMs * 2 ** attempt;
|
|
184
|
-
return Math.min(raw, policy.maxBackoffMs);
|
|
185
|
-
}
|
|
186
|
-
/**
|
|
187
|
-
* Build a real client pointed at the configured endpoint. Throws
|
|
188
|
-
* `EvalLlmNotConfiguredError` at call time (not construction time) when no
|
|
189
|
-
* API key is available, so CLI help and dry-run paths stay offline-safe.
|
|
190
|
-
*/
|
|
191
|
-
export function createEvalClient(config, options = {}) {
|
|
192
|
-
const retryPolicy = options.retryPolicy ?? {
|
|
193
|
-
...DEFAULT_RETRY_POLICY,
|
|
194
|
-
maxRetries: Math.max(0, config.maxRetries ?? DEFAULT_RETRY_POLICY.maxRetries)
|
|
195
|
-
};
|
|
196
|
-
const sleep = options.sleep ?? defaultSleep;
|
|
197
|
-
let cached;
|
|
198
|
-
const getClient = () => {
|
|
199
|
-
if (cached)
|
|
200
|
-
return cached;
|
|
201
|
-
if (!config.apiKey)
|
|
202
|
-
throw new EvalLlmNotConfiguredError();
|
|
203
|
-
const factory = options.openaiFactory ??
|
|
204
|
-
((opts) => new OpenAI(opts));
|
|
205
|
-
cached = factory({ apiKey: config.apiKey, baseURL: config.baseUrl });
|
|
206
|
-
return cached;
|
|
207
|
-
};
|
|
208
|
-
return {
|
|
209
|
-
async chat(request) {
|
|
210
|
-
const timeoutMs = Math.max(1_000, request.timeoutMs ?? config.timeoutMs);
|
|
211
|
-
const body = buildBody(request);
|
|
212
|
-
const client = getClient();
|
|
213
|
-
let lastError;
|
|
214
|
-
const maxAttempts = retryPolicy.maxRetries + 1;
|
|
215
|
-
for (let attempt = 0; attempt < maxAttempts; attempt += 1) {
|
|
216
|
-
const controller = new AbortController();
|
|
217
|
-
const handle = setTimeout(() => controller.abort(), timeoutMs);
|
|
218
|
-
try {
|
|
219
|
-
const raw = await client.chat.completions.create(body, {
|
|
220
|
-
signal: controller.signal
|
|
221
|
-
});
|
|
222
|
-
clearTimeout(handle);
|
|
223
|
-
const choice = raw.choices?.[0];
|
|
224
|
-
if (!choice) {
|
|
225
|
-
throw new EvalLlmInvalidResponseError("LLM response contained no choices.", { model: raw.model });
|
|
226
|
-
}
|
|
227
|
-
const content = choice.message?.content ?? "";
|
|
228
|
-
const toolCalls = choice.message?.tool_calls?.map((call) => ({
|
|
229
|
-
id: call.id,
|
|
230
|
-
name: call.function.name,
|
|
231
|
-
arguments: call.function.arguments
|
|
232
|
-
}));
|
|
233
|
-
const usage = {
|
|
234
|
-
promptTokens: raw.usage?.prompt_tokens ?? 0,
|
|
235
|
-
completionTokens: raw.usage?.completion_tokens ?? 0,
|
|
236
|
-
totalTokens: raw.usage?.total_tokens ?? 0
|
|
237
|
-
};
|
|
238
|
-
return {
|
|
239
|
-
content,
|
|
240
|
-
...(toolCalls && toolCalls.length > 0 ? { toolCalls } : {}),
|
|
241
|
-
usage,
|
|
242
|
-
finishReason: normalizeFinishReason(choice.finish_reason),
|
|
243
|
-
model: raw.model ?? request.model,
|
|
244
|
-
attempts: attempt + 1
|
|
245
|
-
};
|
|
246
|
-
}
|
|
247
|
-
catch (err) {
|
|
248
|
-
clearTimeout(handle);
|
|
249
|
-
const normalized = normalizeError(err, timeoutMs);
|
|
250
|
-
lastError = normalized;
|
|
251
|
-
const isLastAttempt = attempt === maxAttempts - 1;
|
|
252
|
-
if (!normalized.retryable || isLastAttempt)
|
|
253
|
-
throw normalized;
|
|
254
|
-
const waitMs = backoffDelay(attempt, retryPolicy);
|
|
255
|
-
options.onRetry?.({
|
|
256
|
-
attempt: attempt + 1,
|
|
257
|
-
maxAttempts,
|
|
258
|
-
waitMs,
|
|
259
|
-
error: normalized
|
|
260
|
-
});
|
|
261
|
-
await sleep(waitMs);
|
|
262
|
-
}
|
|
263
|
-
}
|
|
264
|
-
throw lastError ?? new EvalLlmTransportError(new Error("unknown"));
|
|
265
|
-
}
|
|
266
|
-
};
|
|
267
|
-
}
|
package/dist/eval/mode.d.ts
DELETED
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Helpers that translate between the legacy `Tier A/B/C` naming and the
|
|
3
|
-
* current `EvalMode` identifiers (`fixture` / `agent` / `workflow`).
|
|
4
|
-
*
|
|
5
|
-
* The names we actually carry in reports, config, CLI flags, and verifier
|
|
6
|
-
* messages are the `EvalMode` ones; legacy tier inputs are accepted with a
|
|
7
|
-
* single deprecation warning per process so existing scripts keep working
|
|
8
|
-
* through the 0.28.x line.
|
|
9
|
-
*/
|
|
10
|
-
import { type EvalMode } from "./types.js";
|
|
11
|
-
/**
|
|
12
|
-
* Reset the per-process "already warned about legacy tier" flag. Used by
|
|
13
|
-
* tests so each test file gets a deterministic warning surface.
|
|
14
|
-
*/
|
|
15
|
-
export declare function __resetLegacyWarningForTests(): void;
|
|
16
|
-
export interface LegacyTierInput {
|
|
17
|
-
source: "cli" | "env" | "config";
|
|
18
|
-
raw: string;
|
|
19
|
-
}
|
|
20
|
-
/**
|
|
21
|
-
* Normalize a raw string from the CLI / env / config into an `EvalMode`.
|
|
22
|
-
* Accepts both new (`fixture|agent|workflow`) and legacy (`A|B|C`) names.
|
|
23
|
-
* Emits a deprecation warning to stderr at most once per process when a
|
|
24
|
-
* legacy tier name is seen.
|
|
25
|
-
*/
|
|
26
|
-
export declare function parseModeInput(raw: string, input: LegacyTierInput, writeWarning?: (message: string) => void): EvalMode;
|
|
27
|
-
/** @deprecated kept for callers that still need to serialize as legacy. */
|
|
28
|
-
export declare function modeToLegacyTier(mode: EvalMode): "A" | "B" | "C";
|
package/dist/eval/mode.js
DELETED
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Helpers that translate between the legacy `Tier A/B/C` naming and the
|
|
3
|
-
* current `EvalMode` identifiers (`fixture` / `agent` / `workflow`).
|
|
4
|
-
*
|
|
5
|
-
* The names we actually carry in reports, config, CLI flags, and verifier
|
|
6
|
-
* messages are the `EvalMode` ones; legacy tier inputs are accepted with a
|
|
7
|
-
* single deprecation warning per process so existing scripts keep working
|
|
8
|
-
* through the 0.28.x line.
|
|
9
|
-
*/
|
|
10
|
-
import { EVAL_MODES } from "./types.js";
|
|
11
|
-
const LEGACY_TIER_TO_MODE = {
|
|
12
|
-
A: "fixture",
|
|
13
|
-
B: "agent",
|
|
14
|
-
C: "workflow"
|
|
15
|
-
};
|
|
16
|
-
const MODE_TO_LEGACY_TIER = {
|
|
17
|
-
fixture: "A",
|
|
18
|
-
agent: "B",
|
|
19
|
-
workflow: "C"
|
|
20
|
-
};
|
|
21
|
-
const DEPRECATED_NAMES = new Set(Object.keys(LEGACY_TIER_TO_MODE));
|
|
22
|
-
let legacyWarningEmitted = false;
|
|
23
|
-
/**
|
|
24
|
-
* Reset the per-process "already warned about legacy tier" flag. Used by
|
|
25
|
-
* tests so each test file gets a deterministic warning surface.
|
|
26
|
-
*/
|
|
27
|
-
export function __resetLegacyWarningForTests() {
|
|
28
|
-
legacyWarningEmitted = false;
|
|
29
|
-
}
|
|
30
|
-
/**
|
|
31
|
-
* Normalize a raw string from the CLI / env / config into an `EvalMode`.
|
|
32
|
-
* Accepts both new (`fixture|agent|workflow`) and legacy (`A|B|C`) names.
|
|
33
|
-
* Emits a deprecation warning to stderr at most once per process when a
|
|
34
|
-
* legacy tier name is seen.
|
|
35
|
-
*/
|
|
36
|
-
export function parseModeInput(raw, input, writeWarning = defaultWriteWarning) {
|
|
37
|
-
const trimmed = raw.trim();
|
|
38
|
-
if (trimmed.length === 0) {
|
|
39
|
-
throw new Error(`Evaluation mode must be one of: ${EVAL_MODES.join("|")} (or legacy A|B|C).`);
|
|
40
|
-
}
|
|
41
|
-
if (EVAL_MODES.includes(trimmed)) {
|
|
42
|
-
return trimmed;
|
|
43
|
-
}
|
|
44
|
-
if (DEPRECATED_NAMES.has(trimmed)) {
|
|
45
|
-
const replacement = LEGACY_TIER_TO_MODE[trimmed];
|
|
46
|
-
if (!legacyWarningEmitted) {
|
|
47
|
-
legacyWarningEmitted = true;
|
|
48
|
-
writeWarning(`[cclaw] "${input.source}: ${input.raw}" is using the legacy tier name "${trimmed}". ` +
|
|
49
|
-
`Please switch to --mode=${replacement} (legacy --tier=A|B|C will be removed in the next major release).`);
|
|
50
|
-
}
|
|
51
|
-
return replacement;
|
|
52
|
-
}
|
|
53
|
-
throw new Error(`Evaluation mode must be one of: ${EVAL_MODES.join("|")} (or legacy A|B|C), got: ${raw}`);
|
|
54
|
-
}
|
|
55
|
-
/** @deprecated kept for callers that still need to serialize as legacy. */
|
|
56
|
-
export function modeToLegacyTier(mode) {
|
|
57
|
-
return MODE_TO_LEGACY_TIER[mode];
|
|
58
|
-
}
|
|
59
|
-
function defaultWriteWarning(message) {
|
|
60
|
-
process.stderr.write(`${message}\n`);
|
|
61
|
-
}
|
package/dist/eval/progress.d.ts
DELETED
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Lightweight progress logger for `cclaw eval`.
|
|
3
|
-
*
|
|
4
|
-
* The runner is otherwise silent: a full workflow-mode run can easily take
|
|
5
|
-
* a few minutes and the user would see nothing until the Markdown report
|
|
6
|
-
* hits disk. We emit structured events here so the CLI can print concise
|
|
7
|
-
* one-line status updates to stderr (stdout stays reserved for the final
|
|
8
|
-
* report + `--json` output).
|
|
9
|
-
*
|
|
10
|
-
* The logger is intentionally minimal: no ANSI colors, no spinners, no
|
|
11
|
-
* carriage-return rewrites. Those do not survive `tee`, CI log viewers,
|
|
12
|
-
* or the background `runs/tail` path (which copies the stream to a log
|
|
13
|
-
* file), and users also told us "nothing is clear now, everything is
|
|
14
|
-
* long" — so we optimize for log-friendly line-by-line readability.
|
|
15
|
-
*/
|
|
16
|
-
import type { EvalMode, WorkflowStageName } from "./types.js";
|
|
17
|
-
export type ProgressEvent = {
|
|
18
|
-
kind: "run-start";
|
|
19
|
-
mode: EvalMode;
|
|
20
|
-
totalCases: number;
|
|
21
|
-
} | {
|
|
22
|
-
kind: "case-start";
|
|
23
|
-
caseId: string;
|
|
24
|
-
stage: string;
|
|
25
|
-
index: number;
|
|
26
|
-
total: number;
|
|
27
|
-
} | {
|
|
28
|
-
kind: "case-end";
|
|
29
|
-
caseId: string;
|
|
30
|
-
stage: string;
|
|
31
|
-
index: number;
|
|
32
|
-
total: number;
|
|
33
|
-
passed: boolean;
|
|
34
|
-
durationMs: number;
|
|
35
|
-
costUsd?: number;
|
|
36
|
-
} | {
|
|
37
|
-
kind: "stage-start";
|
|
38
|
-
caseId: string;
|
|
39
|
-
stage: WorkflowStageName;
|
|
40
|
-
index: number;
|
|
41
|
-
total: number;
|
|
42
|
-
} | {
|
|
43
|
-
kind: "stage-end";
|
|
44
|
-
caseId: string;
|
|
45
|
-
stage: WorkflowStageName;
|
|
46
|
-
index: number;
|
|
47
|
-
total: number;
|
|
48
|
-
passed: boolean;
|
|
49
|
-
durationMs: number;
|
|
50
|
-
costUsd?: number;
|
|
51
|
-
} | {
|
|
52
|
-
kind: "retry";
|
|
53
|
-
caseId: string;
|
|
54
|
-
stage?: string;
|
|
55
|
-
attempt: number;
|
|
56
|
-
maxAttempts: number;
|
|
57
|
-
waitMs: number;
|
|
58
|
-
reason: string;
|
|
59
|
-
} | {
|
|
60
|
-
kind: "run-end";
|
|
61
|
-
totalCases: number;
|
|
62
|
-
passed: number;
|
|
63
|
-
failed: number;
|
|
64
|
-
durationMs: number;
|
|
65
|
-
};
|
|
66
|
-
export interface ProgressLogger {
|
|
67
|
-
emit(event: ProgressEvent): void;
|
|
68
|
-
}
|
|
69
|
-
export declare function noopProgressLogger(): ProgressLogger;
|
|
70
|
-
export interface StderrProgressLoggerOptions {
|
|
71
|
-
/** Override the underlying write target; defaults to `process.stderr.write`. */
|
|
72
|
-
writer?: (message: string) => void;
|
|
73
|
-
/** Return wall-clock in ms. Injectable for tests. */
|
|
74
|
-
now?: () => number;
|
|
75
|
-
}
|
|
76
|
-
/**
|
|
77
|
-
* Emit a one-line status update per event to stderr.
|
|
78
|
-
*
|
|
79
|
-
* Format is deliberately boring: `[cclaw eval] <message>` so users can grep
|
|
80
|
-
* for the prefix in combined logs. Costs are rendered with up to 4 decimals
|
|
81
|
-
* so sub-cent runs still show a non-zero value.
|
|
82
|
-
*/
|
|
83
|
-
export declare function createStderrProgressLogger(opts?: StderrProgressLoggerOptions): ProgressLogger;
|
package/dist/eval/progress.js
DELETED
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
const NOOP_LOGGER = { emit() { } };
|
|
2
|
-
export function noopProgressLogger() {
|
|
3
|
-
return NOOP_LOGGER;
|
|
4
|
-
}
|
|
5
|
-
/**
|
|
6
|
-
* Emit a one-line status update per event to stderr.
|
|
7
|
-
*
|
|
8
|
-
* Format is deliberately boring: `[cclaw eval] <message>` so users can grep
|
|
9
|
-
* for the prefix in combined logs. Costs are rendered with up to 4 decimals
|
|
10
|
-
* so sub-cent runs still show a non-zero value.
|
|
11
|
-
*/
|
|
12
|
-
export function createStderrProgressLogger(opts = {}) {
|
|
13
|
-
const writer = opts.writer ?? ((s) => process.stderr.write(s));
|
|
14
|
-
return {
|
|
15
|
-
emit(event) {
|
|
16
|
-
writer(`[cclaw eval] ${formatEvent(event)}\n`);
|
|
17
|
-
}
|
|
18
|
-
};
|
|
19
|
-
}
|
|
20
|
-
function formatDuration(ms) {
|
|
21
|
-
if (ms < 1000)
|
|
22
|
-
return `${ms}ms`;
|
|
23
|
-
const s = ms / 1000;
|
|
24
|
-
if (s < 60)
|
|
25
|
-
return `${s.toFixed(1)}s`;
|
|
26
|
-
const m = Math.floor(s / 60);
|
|
27
|
-
const rem = Math.round(s - m * 60);
|
|
28
|
-
return `${m}m${rem.toString().padStart(2, "0")}s`;
|
|
29
|
-
}
|
|
30
|
-
function formatCost(usd) {
|
|
31
|
-
if (usd === undefined || usd <= 0)
|
|
32
|
-
return "";
|
|
33
|
-
return ` $${usd.toFixed(4)}`;
|
|
34
|
-
}
|
|
35
|
-
function formatEvent(event) {
|
|
36
|
-
switch (event.kind) {
|
|
37
|
-
case "run-start":
|
|
38
|
-
return `start mode=${event.mode} cases=${event.totalCases}`;
|
|
39
|
-
case "case-start":
|
|
40
|
-
return `[${event.index}/${event.total}] ${event.caseId} (${event.stage}) ...`;
|
|
41
|
-
case "case-end": {
|
|
42
|
-
const status = event.passed ? "PASS" : "FAIL";
|
|
43
|
-
return (`[${event.index}/${event.total}] ${event.caseId} (${event.stage}) ${status} ` +
|
|
44
|
-
`in ${formatDuration(event.durationMs)}${formatCost(event.costUsd)}`);
|
|
45
|
-
}
|
|
46
|
-
case "stage-start":
|
|
47
|
-
return ` stage ${event.stage} ...`;
|
|
48
|
-
case "stage-end": {
|
|
49
|
-
const status = event.passed ? "ok" : "fail";
|
|
50
|
-
return ` stage ${event.stage} ${status} in ${formatDuration(event.durationMs)}${formatCost(event.costUsd)}`;
|
|
51
|
-
}
|
|
52
|
-
case "retry":
|
|
53
|
-
return (` retry ${event.caseId}${event.stage ? `/${event.stage}` : ""} ` +
|
|
54
|
-
`attempt ${event.attempt}/${event.maxAttempts} in ${formatDuration(event.waitMs)} (${event.reason})`);
|
|
55
|
-
case "run-end":
|
|
56
|
-
return (`done pass=${event.passed} fail=${event.failed} total=${event.totalCases} ` +
|
|
57
|
-
`in ${formatDuration(event.durationMs)}`);
|
|
58
|
-
}
|
|
59
|
-
}
|
package/dist/eval/report.d.ts
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
import type { EvalReport } from "./types.js";
|
|
2
|
-
export declare function reportsDir(projectRoot: string): string;
|
|
3
|
-
export declare function defaultReportBasename(report: EvalReport): string;
|
|
4
|
-
/**
|
|
5
|
-
* Format a report as a human-readable Markdown document. Keeping the layout
|
|
6
|
-
* stable matters: CI posts diffs against earlier reports, and unit tests use
|
|
7
|
-
* the output as a regression guard.
|
|
8
|
-
*/
|
|
9
|
-
export declare function formatMarkdownReport(report: EvalReport): string;
|
|
10
|
-
export declare function writeJsonReport(projectRoot: string, report: EvalReport, basename?: string): Promise<string>;
|
|
11
|
-
export declare function writeMarkdownReport(projectRoot: string, report: EvalReport, basename?: string): Promise<string>;
|