cclaw-cli 0.48.35 → 0.51.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/README.md +54 -82
  2. package/dist/artifact-linter.d.ts +4 -0
  3. package/dist/artifact-linter.js +24 -3
  4. package/dist/cli.d.ts +1 -19
  5. package/dist/cli.js +49 -495
  6. package/dist/constants.d.ts +2 -13
  7. package/dist/constants.js +1 -46
  8. package/dist/content/closeout-guidance.d.ts +14 -0
  9. package/dist/content/closeout-guidance.js +42 -0
  10. package/dist/content/core-agents.js +51 -9
  11. package/dist/content/decision-protocol.d.ts +12 -0
  12. package/dist/content/decision-protocol.js +20 -0
  13. package/dist/content/diff-command.d.ts +1 -2
  14. package/dist/content/diff-command.js +8 -94
  15. package/dist/content/examples.d.ts +4 -10
  16. package/dist/content/examples.js +10 -20
  17. package/dist/content/hook-events.js +2 -2
  18. package/dist/content/hook-inline-snippets.d.ts +5 -2
  19. package/dist/content/hook-inline-snippets.js +33 -1
  20. package/dist/content/hook-manifest.d.ts +3 -4
  21. package/dist/content/hook-manifest.js +11 -12
  22. package/dist/content/hooks.js +2 -0
  23. package/dist/content/ideate-command.d.ts +2 -0
  24. package/dist/content/ideate-command.js +31 -25
  25. package/dist/content/iron-laws.d.ts +5 -5
  26. package/dist/content/iron-laws.js +5 -5
  27. package/dist/content/learnings.d.ts +3 -4
  28. package/dist/content/learnings.js +24 -50
  29. package/dist/content/meta-skill.js +31 -24
  30. package/dist/content/next-command.js +38 -38
  31. package/dist/content/node-hooks.js +17 -343
  32. package/dist/content/opencode-plugin.js +2 -100
  33. package/dist/content/research-playbooks.js +14 -14
  34. package/dist/content/review-loop.d.ts +2 -0
  35. package/dist/content/review-loop.js +8 -0
  36. package/dist/content/session-hooks.js +14 -46
  37. package/dist/content/skills.d.ts +0 -5
  38. package/dist/content/skills.js +53 -128
  39. package/dist/content/stage-common-guidance.d.ts +0 -1
  40. package/dist/content/stage-common-guidance.js +15 -14
  41. package/dist/content/stage-schema.d.ts +26 -1
  42. package/dist/content/stage-schema.js +121 -40
  43. package/dist/content/stages/_lint-metadata/index.js +9 -15
  44. package/dist/content/stages/brainstorm.js +22 -43
  45. package/dist/content/stages/design.js +37 -57
  46. package/dist/content/stages/plan.js +22 -13
  47. package/dist/content/stages/review.js +24 -27
  48. package/dist/content/stages/scope.js +34 -46
  49. package/dist/content/stages/ship.js +7 -4
  50. package/dist/content/stages/spec.js +20 -9
  51. package/dist/content/stages/tdd.js +64 -44
  52. package/dist/content/start-command.js +10 -12
  53. package/dist/content/status-command.d.ts +2 -7
  54. package/dist/content/status-command.js +19 -146
  55. package/dist/content/subagents.d.ts +0 -5
  56. package/dist/content/subagents.js +47 -28
  57. package/dist/content/templates.d.ts +1 -1
  58. package/dist/content/templates.js +126 -135
  59. package/dist/content/track-render-context.d.ts +17 -0
  60. package/dist/content/track-render-context.js +44 -0
  61. package/dist/content/tree-command.d.ts +1 -2
  62. package/dist/content/tree-command.js +4 -87
  63. package/dist/content/utility-skills.d.ts +2 -29
  64. package/dist/content/utility-skills.js +2 -1533
  65. package/dist/content/view-command.js +29 -11
  66. package/dist/delegation.d.ts +1 -1
  67. package/dist/delegation.js +5 -15
  68. package/dist/doctor-registry.js +20 -21
  69. package/dist/doctor.js +88 -408
  70. package/dist/flow-state.d.ts +3 -0
  71. package/dist/flow-state.js +2 -0
  72. package/dist/harness-adapters.d.ts +1 -1
  73. package/dist/harness-adapters.js +48 -57
  74. package/dist/install.js +128 -520
  75. package/dist/internal/advance-stage.js +3 -9
  76. package/dist/internal/compound-readiness.d.ts +1 -1
  77. package/dist/internal/compound-readiness.js +1 -1
  78. package/dist/internal/tdd-loop-status.d.ts +1 -1
  79. package/dist/internal/tdd-loop-status.js +1 -1
  80. package/dist/knowledge-store.d.ts +16 -10
  81. package/dist/knowledge-store.js +51 -15
  82. package/dist/policy.js +16 -109
  83. package/dist/run-archive.d.ts +4 -6
  84. package/dist/run-archive.js +15 -20
  85. package/dist/run-persistence.d.ts +2 -2
  86. package/dist/run-persistence.js +3 -9
  87. package/package.json +1 -2
  88. package/dist/content/archive-command.d.ts +0 -2
  89. package/dist/content/archive-command.js +0 -124
  90. package/dist/content/compound-command.d.ts +0 -5
  91. package/dist/content/compound-command.js +0 -193
  92. package/dist/content/contexts.d.ts +0 -9
  93. package/dist/content/contexts.js +0 -65
  94. package/dist/content/contracts.d.ts +0 -2
  95. package/dist/content/contracts.js +0 -51
  96. package/dist/content/doctor-references.d.ts +0 -2
  97. package/dist/content/doctor-references.js +0 -150
  98. package/dist/content/eval-scaffold.d.ts +0 -15
  99. package/dist/content/eval-scaffold.js +0 -370
  100. package/dist/content/feature-command.d.ts +0 -2
  101. package/dist/content/feature-command.js +0 -123
  102. package/dist/content/flow-map.d.ts +0 -23
  103. package/dist/content/flow-map.js +0 -134
  104. package/dist/content/harness-doc.d.ts +0 -2
  105. package/dist/content/harness-doc.js +0 -202
  106. package/dist/content/harness-playbooks.d.ts +0 -24
  107. package/dist/content/harness-playbooks.js +0 -393
  108. package/dist/content/harness-tool-refs.d.ts +0 -20
  109. package/dist/content/harness-tool-refs.js +0 -268
  110. package/dist/content/ops-command.d.ts +0 -2
  111. package/dist/content/ops-command.js +0 -71
  112. package/dist/content/protocols.d.ts +0 -7
  113. package/dist/content/protocols.js +0 -215
  114. package/dist/content/retro-command.d.ts +0 -2
  115. package/dist/content/retro-command.js +0 -165
  116. package/dist/content/rewind-command.d.ts +0 -2
  117. package/dist/content/rewind-command.js +0 -106
  118. package/dist/content/tdd-log-command.d.ts +0 -2
  119. package/dist/content/tdd-log-command.js +0 -85
  120. package/dist/eval/agents/single-shot.d.ts +0 -27
  121. package/dist/eval/agents/single-shot.js +0 -79
  122. package/dist/eval/agents/with-tools.d.ts +0 -44
  123. package/dist/eval/agents/with-tools.js +0 -261
  124. package/dist/eval/agents/workflow.d.ts +0 -31
  125. package/dist/eval/agents/workflow.js +0 -155
  126. package/dist/eval/baseline.d.ts +0 -38
  127. package/dist/eval/baseline.js +0 -282
  128. package/dist/eval/config-loader.d.ts +0 -14
  129. package/dist/eval/config-loader.js +0 -395
  130. package/dist/eval/corpus.d.ts +0 -30
  131. package/dist/eval/corpus.js +0 -330
  132. package/dist/eval/cost-guard.d.ts +0 -102
  133. package/dist/eval/cost-guard.js +0 -190
  134. package/dist/eval/diff.d.ts +0 -64
  135. package/dist/eval/diff.js +0 -323
  136. package/dist/eval/llm-client.d.ts +0 -176
  137. package/dist/eval/llm-client.js +0 -267
  138. package/dist/eval/mode.d.ts +0 -28
  139. package/dist/eval/mode.js +0 -61
  140. package/dist/eval/progress.d.ts +0 -83
  141. package/dist/eval/progress.js +0 -59
  142. package/dist/eval/report.d.ts +0 -11
  143. package/dist/eval/report.js +0 -181
  144. package/dist/eval/rubric-loader.d.ts +0 -20
  145. package/dist/eval/rubric-loader.js +0 -143
  146. package/dist/eval/runner.d.ts +0 -81
  147. package/dist/eval/runner.js +0 -746
  148. package/dist/eval/runs.d.ts +0 -41
  149. package/dist/eval/runs.js +0 -114
  150. package/dist/eval/sandbox.d.ts +0 -38
  151. package/dist/eval/sandbox.js +0 -137
  152. package/dist/eval/tools/glob.d.ts +0 -2
  153. package/dist/eval/tools/glob.js +0 -163
  154. package/dist/eval/tools/grep.d.ts +0 -2
  155. package/dist/eval/tools/grep.js +0 -152
  156. package/dist/eval/tools/index.d.ts +0 -7
  157. package/dist/eval/tools/index.js +0 -35
  158. package/dist/eval/tools/read.d.ts +0 -2
  159. package/dist/eval/tools/read.js +0 -122
  160. package/dist/eval/tools/types.d.ts +0 -49
  161. package/dist/eval/tools/types.js +0 -41
  162. package/dist/eval/tools/write.d.ts +0 -2
  163. package/dist/eval/tools/write.js +0 -92
  164. package/dist/eval/types.d.ts +0 -561
  165. package/dist/eval/types.js +0 -47
  166. package/dist/eval/verifiers/judge.d.ts +0 -40
  167. package/dist/eval/verifiers/judge.js +0 -256
  168. package/dist/eval/verifiers/rules.d.ts +0 -24
  169. package/dist/eval/verifiers/rules.js +0 -218
  170. package/dist/eval/verifiers/structural.d.ts +0 -14
  171. package/dist/eval/verifiers/structural.js +0 -171
  172. package/dist/eval/verifiers/traceability.d.ts +0 -23
  173. package/dist/eval/verifiers/traceability.js +0 -84
  174. package/dist/eval/verifiers/workflow-consistency.d.ts +0 -21
  175. package/dist/eval/verifiers/workflow-consistency.js +0 -225
  176. package/dist/eval/workflow-corpus.d.ts +0 -7
  177. package/dist/eval/workflow-corpus.js +0 -207
  178. package/dist/feature-system.d.ts +0 -42
  179. package/dist/feature-system.js +0 -432
  180. package/dist/internal/knowledge-digest.d.ts +0 -7
  181. package/dist/internal/knowledge-digest.js +0 -93
@@ -1,267 +0,0 @@
1
- /**
2
- * LLM client for the cclaw eval subsystem.
3
- *
4
- * Thin adapter over the `openai` SDK pointed at any OpenAI-compatible
5
- * `baseURL` (z.ai, OpenAI, vLLM, Ollama+openai-proxy, ...). The surface is
6
- * deliberately narrow:
7
- *
8
- * - `chat()` — one request/response round-trip with timeout, bounded
9
- * retries on transient errors, and a structured error hierarchy so
10
- * callers can react policy-style (cost-guard, judge, agent-under-test).
11
- * - `ChatRequest` / `ChatResponse` — wire format decoupled from the
12
- * OpenAI types so swapping vendors stays a one-file change.
13
- *
14
- * Factories stay side-effect-free: no network calls are made until `chat()`
15
- * is invoked, so CLI help and dry-run paths never need an API key.
16
- */
17
- import OpenAI from "openai";
18
- /** Base class so callers can `catch (err) { if (err instanceof EvalLlmError) ... }`. */
19
- export class EvalLlmError extends Error {
20
- retryable;
21
- status;
22
- constructor(message, opts) {
23
- super(message);
24
- this.name = "EvalLlmError";
25
- this.retryable = opts.retryable;
26
- if (opts.status !== undefined)
27
- this.status = opts.status;
28
- if (opts.cause !== undefined)
29
- this.cause = opts.cause;
30
- }
31
- }
32
- export class EvalLlmAuthError extends EvalLlmError {
33
- constructor(cause) {
34
- super("LLM request rejected (auth). Check CCLAW_EVAL_API_KEY and provider permissions.", {
35
- retryable: false,
36
- status: 401,
37
- cause
38
- });
39
- this.name = "EvalLlmAuthError";
40
- }
41
- }
42
- export class EvalLlmConfigError extends EvalLlmError {
43
- constructor(message, cause) {
44
- super(message, { retryable: false, cause });
45
- this.name = "EvalLlmConfigError";
46
- }
47
- }
48
- export class EvalLlmTimeoutError extends EvalLlmError {
49
- constructor(timeoutMs) {
50
- super(`LLM request timed out after ${timeoutMs}ms.`, { retryable: true });
51
- this.name = "EvalLlmTimeoutError";
52
- }
53
- }
54
- export class EvalLlmRateLimitedError extends EvalLlmError {
55
- constructor(cause) {
56
- super("LLM rate limit hit. Retrying with backoff.", {
57
- retryable: true,
58
- status: 429,
59
- cause
60
- });
61
- this.name = "EvalLlmRateLimitedError";
62
- }
63
- }
64
- export class EvalLlmTransportError extends EvalLlmError {
65
- constructor(cause, status) {
66
- super("LLM transport error.", { retryable: true, status, cause });
67
- this.name = "EvalLlmTransportError";
68
- }
69
- }
70
- export class EvalLlmInvalidResponseError extends EvalLlmError {
71
- constructor(message, details) {
72
- super(message, { retryable: false });
73
- this.name = "EvalLlmInvalidResponseError";
74
- if (details)
75
- this.details = details;
76
- }
77
- }
78
- export class EvalLlmNotConfiguredError extends EvalLlmError {
79
- constructor() {
80
- super(`LLM client not configured. Set CCLAW_EVAL_API_KEY (and optionally ` +
81
- `CCLAW_EVAL_BASE_URL / CCLAW_EVAL_MODEL) or run with --schema-only / --rules.`, { retryable: false });
82
- this.name = "EvalLlmNotConfiguredError";
83
- }
84
- }
85
- /**
86
- * Deprecated shim preserved so older wiring keeps compiling. Prefer
87
- * `EvalLlmNotConfiguredError` for the "caller forgot to provide an API
88
- * key" case.
89
- */
90
- export class EvalLlmNotWiredError extends EvalLlmNotConfiguredError {
91
- }
92
- export const DEFAULT_RETRY_POLICY = {
93
- maxRetries: 2,
94
- initialBackoffMs: 500,
95
- maxBackoffMs: 8_000
96
- };
97
- function isAbortError(err) {
98
- if (err === null || typeof err !== "object")
99
- return false;
100
- const name = err.name;
101
- const code = err.code;
102
- return (name === "AbortError" || code === "ABORT_ERR" || code === "ERR_CANCELED");
103
- }
104
- function statusFromError(err) {
105
- if (err === null || typeof err !== "object")
106
- return undefined;
107
- const status = err.status;
108
- return typeof status === "number" ? status : undefined;
109
- }
110
- function normalizeError(err, timeoutMs) {
111
- if (err instanceof EvalLlmError)
112
- return err;
113
- if (isAbortError(err))
114
- return new EvalLlmTimeoutError(timeoutMs);
115
- const status = statusFromError(err);
116
- if (status === 401 || status === 403)
117
- return new EvalLlmAuthError(err);
118
- if (status === 429)
119
- return new EvalLlmRateLimitedError(err);
120
- if (status !== undefined && status >= 400 && status < 500) {
121
- return new EvalLlmError(`LLM request rejected (HTTP ${status}).`, {
122
- retryable: false,
123
- status,
124
- cause: err
125
- });
126
- }
127
- return new EvalLlmTransportError(err, status);
128
- }
129
- function normalizeFinishReason(raw) {
130
- switch (raw) {
131
- case "length":
132
- return "length";
133
- case "tool_calls":
134
- case "function_call":
135
- return "tool_calls";
136
- case "content_filter":
137
- return "content_filter";
138
- case "stop":
139
- case null:
140
- case undefined:
141
- default:
142
- return "stop";
143
- }
144
- }
145
- function buildBody(request) {
146
- const body = {
147
- model: request.model,
148
- messages: request.messages.map((m) => ({
149
- role: m.role,
150
- content: m.content,
151
- ...(m.name !== undefined ? { name: m.name } : {}),
152
- ...(m.toolCallId !== undefined ? { tool_call_id: m.toolCallId } : {}),
153
- ...(m.toolCalls && m.toolCalls.length > 0
154
- ? {
155
- tool_calls: m.toolCalls.map((call) => ({
156
- id: call.id,
157
- type: "function",
158
- function: { name: call.name, arguments: call.arguments }
159
- }))
160
- }
161
- : {})
162
- }))
163
- };
164
- if (request.maxTokens !== undefined)
165
- body.max_tokens = request.maxTokens;
166
- if (request.temperature !== undefined)
167
- body.temperature = request.temperature;
168
- if (request.seed !== undefined)
169
- body.seed = request.seed;
170
- if (request.tools !== undefined)
171
- body.tools = request.tools;
172
- if (request.toolChoice !== undefined)
173
- body.tool_choice = request.toolChoice;
174
- if (request.responseFormatJson === true) {
175
- body.response_format = { type: "json_object" };
176
- }
177
- return body;
178
- }
179
- function defaultSleep(ms) {
180
- return new Promise((resolve) => setTimeout(resolve, ms));
181
- }
182
- function backoffDelay(attempt, policy) {
183
- const raw = policy.initialBackoffMs * 2 ** attempt;
184
- return Math.min(raw, policy.maxBackoffMs);
185
- }
186
- /**
187
- * Build a real client pointed at the configured endpoint. Throws
188
- * `EvalLlmNotConfiguredError` at call time (not construction time) when no
189
- * API key is available, so CLI help and dry-run paths stay offline-safe.
190
- */
191
- export function createEvalClient(config, options = {}) {
192
- const retryPolicy = options.retryPolicy ?? {
193
- ...DEFAULT_RETRY_POLICY,
194
- maxRetries: Math.max(0, config.maxRetries ?? DEFAULT_RETRY_POLICY.maxRetries)
195
- };
196
- const sleep = options.sleep ?? defaultSleep;
197
- let cached;
198
- const getClient = () => {
199
- if (cached)
200
- return cached;
201
- if (!config.apiKey)
202
- throw new EvalLlmNotConfiguredError();
203
- const factory = options.openaiFactory ??
204
- ((opts) => new OpenAI(opts));
205
- cached = factory({ apiKey: config.apiKey, baseURL: config.baseUrl });
206
- return cached;
207
- };
208
- return {
209
- async chat(request) {
210
- const timeoutMs = Math.max(1_000, request.timeoutMs ?? config.timeoutMs);
211
- const body = buildBody(request);
212
- const client = getClient();
213
- let lastError;
214
- const maxAttempts = retryPolicy.maxRetries + 1;
215
- for (let attempt = 0; attempt < maxAttempts; attempt += 1) {
216
- const controller = new AbortController();
217
- const handle = setTimeout(() => controller.abort(), timeoutMs);
218
- try {
219
- const raw = await client.chat.completions.create(body, {
220
- signal: controller.signal
221
- });
222
- clearTimeout(handle);
223
- const choice = raw.choices?.[0];
224
- if (!choice) {
225
- throw new EvalLlmInvalidResponseError("LLM response contained no choices.", { model: raw.model });
226
- }
227
- const content = choice.message?.content ?? "";
228
- const toolCalls = choice.message?.tool_calls?.map((call) => ({
229
- id: call.id,
230
- name: call.function.name,
231
- arguments: call.function.arguments
232
- }));
233
- const usage = {
234
- promptTokens: raw.usage?.prompt_tokens ?? 0,
235
- completionTokens: raw.usage?.completion_tokens ?? 0,
236
- totalTokens: raw.usage?.total_tokens ?? 0
237
- };
238
- return {
239
- content,
240
- ...(toolCalls && toolCalls.length > 0 ? { toolCalls } : {}),
241
- usage,
242
- finishReason: normalizeFinishReason(choice.finish_reason),
243
- model: raw.model ?? request.model,
244
- attempts: attempt + 1
245
- };
246
- }
247
- catch (err) {
248
- clearTimeout(handle);
249
- const normalized = normalizeError(err, timeoutMs);
250
- lastError = normalized;
251
- const isLastAttempt = attempt === maxAttempts - 1;
252
- if (!normalized.retryable || isLastAttempt)
253
- throw normalized;
254
- const waitMs = backoffDelay(attempt, retryPolicy);
255
- options.onRetry?.({
256
- attempt: attempt + 1,
257
- maxAttempts,
258
- waitMs,
259
- error: normalized
260
- });
261
- await sleep(waitMs);
262
- }
263
- }
264
- throw lastError ?? new EvalLlmTransportError(new Error("unknown"));
265
- }
266
- };
267
- }
@@ -1,28 +0,0 @@
1
- /**
2
- * Helpers that translate between the legacy `Tier A/B/C` naming and the
3
- * current `EvalMode` identifiers (`fixture` / `agent` / `workflow`).
4
- *
5
- * The names we actually carry in reports, config, CLI flags, and verifier
6
- * messages are the `EvalMode` ones; legacy tier inputs are accepted with a
7
- * single deprecation warning per process so existing scripts keep working
8
- * through the 0.28.x line.
9
- */
10
- import { type EvalMode } from "./types.js";
11
- /**
12
- * Reset the per-process "already warned about legacy tier" flag. Used by
13
- * tests so each test file gets a deterministic warning surface.
14
- */
15
- export declare function __resetLegacyWarningForTests(): void;
16
- export interface LegacyTierInput {
17
- source: "cli" | "env" | "config";
18
- raw: string;
19
- }
20
- /**
21
- * Normalize a raw string from the CLI / env / config into an `EvalMode`.
22
- * Accepts both new (`fixture|agent|workflow`) and legacy (`A|B|C`) names.
23
- * Emits a deprecation warning to stderr at most once per process when a
24
- * legacy tier name is seen.
25
- */
26
- export declare function parseModeInput(raw: string, input: LegacyTierInput, writeWarning?: (message: string) => void): EvalMode;
27
- /** @deprecated kept for callers that still need to serialize as legacy. */
28
- export declare function modeToLegacyTier(mode: EvalMode): "A" | "B" | "C";
package/dist/eval/mode.js DELETED
@@ -1,61 +0,0 @@
1
- /**
2
- * Helpers that translate between the legacy `Tier A/B/C` naming and the
3
- * current `EvalMode` identifiers (`fixture` / `agent` / `workflow`).
4
- *
5
- * The names we actually carry in reports, config, CLI flags, and verifier
6
- * messages are the `EvalMode` ones; legacy tier inputs are accepted with a
7
- * single deprecation warning per process so existing scripts keep working
8
- * through the 0.28.x line.
9
- */
10
- import { EVAL_MODES } from "./types.js";
11
- const LEGACY_TIER_TO_MODE = {
12
- A: "fixture",
13
- B: "agent",
14
- C: "workflow"
15
- };
16
- const MODE_TO_LEGACY_TIER = {
17
- fixture: "A",
18
- agent: "B",
19
- workflow: "C"
20
- };
21
- const DEPRECATED_NAMES = new Set(Object.keys(LEGACY_TIER_TO_MODE));
22
- let legacyWarningEmitted = false;
23
- /**
24
- * Reset the per-process "already warned about legacy tier" flag. Used by
25
- * tests so each test file gets a deterministic warning surface.
26
- */
27
- export function __resetLegacyWarningForTests() {
28
- legacyWarningEmitted = false;
29
- }
30
- /**
31
- * Normalize a raw string from the CLI / env / config into an `EvalMode`.
32
- * Accepts both new (`fixture|agent|workflow`) and legacy (`A|B|C`) names.
33
- * Emits a deprecation warning to stderr at most once per process when a
34
- * legacy tier name is seen.
35
- */
36
- export function parseModeInput(raw, input, writeWarning = defaultWriteWarning) {
37
- const trimmed = raw.trim();
38
- if (trimmed.length === 0) {
39
- throw new Error(`Evaluation mode must be one of: ${EVAL_MODES.join("|")} (or legacy A|B|C).`);
40
- }
41
- if (EVAL_MODES.includes(trimmed)) {
42
- return trimmed;
43
- }
44
- if (DEPRECATED_NAMES.has(trimmed)) {
45
- const replacement = LEGACY_TIER_TO_MODE[trimmed];
46
- if (!legacyWarningEmitted) {
47
- legacyWarningEmitted = true;
48
- writeWarning(`[cclaw] "${input.source}: ${input.raw}" is using the legacy tier name "${trimmed}". ` +
49
- `Please switch to --mode=${replacement} (legacy --tier=A|B|C will be removed in the next major release).`);
50
- }
51
- return replacement;
52
- }
53
- throw new Error(`Evaluation mode must be one of: ${EVAL_MODES.join("|")} (or legacy A|B|C), got: ${raw}`);
54
- }
55
- /** @deprecated kept for callers that still need to serialize as legacy. */
56
- export function modeToLegacyTier(mode) {
57
- return MODE_TO_LEGACY_TIER[mode];
58
- }
59
- function defaultWriteWarning(message) {
60
- process.stderr.write(`${message}\n`);
61
- }
@@ -1,83 +0,0 @@
1
- /**
2
- * Lightweight progress logger for `cclaw eval`.
3
- *
4
- * The runner is otherwise silent: a full workflow-mode run can easily take
5
- * a few minutes and the user would see nothing until the Markdown report
6
- * hits disk. We emit structured events here so the CLI can print concise
7
- * one-line status updates to stderr (stdout stays reserved for the final
8
- * report + `--json` output).
9
- *
10
- * The logger is intentionally minimal: no ANSI colors, no spinners, no
11
- * carriage-return rewrites. Those do not survive `tee`, CI log viewers,
12
- * or the background `runs/tail` path (which copies the stream to a log
13
- * file), and users also told us "nothing is clear now, everything is
14
- * long" — so we optimize for log-friendly line-by-line readability.
15
- */
16
- import type { EvalMode, WorkflowStageName } from "./types.js";
17
- export type ProgressEvent = {
18
- kind: "run-start";
19
- mode: EvalMode;
20
- totalCases: number;
21
- } | {
22
- kind: "case-start";
23
- caseId: string;
24
- stage: string;
25
- index: number;
26
- total: number;
27
- } | {
28
- kind: "case-end";
29
- caseId: string;
30
- stage: string;
31
- index: number;
32
- total: number;
33
- passed: boolean;
34
- durationMs: number;
35
- costUsd?: number;
36
- } | {
37
- kind: "stage-start";
38
- caseId: string;
39
- stage: WorkflowStageName;
40
- index: number;
41
- total: number;
42
- } | {
43
- kind: "stage-end";
44
- caseId: string;
45
- stage: WorkflowStageName;
46
- index: number;
47
- total: number;
48
- passed: boolean;
49
- durationMs: number;
50
- costUsd?: number;
51
- } | {
52
- kind: "retry";
53
- caseId: string;
54
- stage?: string;
55
- attempt: number;
56
- maxAttempts: number;
57
- waitMs: number;
58
- reason: string;
59
- } | {
60
- kind: "run-end";
61
- totalCases: number;
62
- passed: number;
63
- failed: number;
64
- durationMs: number;
65
- };
66
- export interface ProgressLogger {
67
- emit(event: ProgressEvent): void;
68
- }
69
- export declare function noopProgressLogger(): ProgressLogger;
70
- export interface StderrProgressLoggerOptions {
71
- /** Override the underlying write target; defaults to `process.stderr.write`. */
72
- writer?: (message: string) => void;
73
- /** Return wall-clock in ms. Injectable for tests. */
74
- now?: () => number;
75
- }
76
- /**
77
- * Emit a one-line status update per event to stderr.
78
- *
79
- * Format is deliberately boring: `[cclaw eval] <message>` so users can grep
80
- * for the prefix in combined logs. Costs are rendered with up to 4 decimals
81
- * so sub-cent runs still show a non-zero value.
82
- */
83
- export declare function createStderrProgressLogger(opts?: StderrProgressLoggerOptions): ProgressLogger;
@@ -1,59 +0,0 @@
1
- const NOOP_LOGGER = { emit() { } };
2
- export function noopProgressLogger() {
3
- return NOOP_LOGGER;
4
- }
5
- /**
6
- * Emit a one-line status update per event to stderr.
7
- *
8
- * Format is deliberately boring: `[cclaw eval] <message>` so users can grep
9
- * for the prefix in combined logs. Costs are rendered with up to 4 decimals
10
- * so sub-cent runs still show a non-zero value.
11
- */
12
- export function createStderrProgressLogger(opts = {}) {
13
- const writer = opts.writer ?? ((s) => process.stderr.write(s));
14
- return {
15
- emit(event) {
16
- writer(`[cclaw eval] ${formatEvent(event)}\n`);
17
- }
18
- };
19
- }
20
- function formatDuration(ms) {
21
- if (ms < 1000)
22
- return `${ms}ms`;
23
- const s = ms / 1000;
24
- if (s < 60)
25
- return `${s.toFixed(1)}s`;
26
- const m = Math.floor(s / 60);
27
- const rem = Math.round(s - m * 60);
28
- return `${m}m${rem.toString().padStart(2, "0")}s`;
29
- }
30
- function formatCost(usd) {
31
- if (usd === undefined || usd <= 0)
32
- return "";
33
- return ` $${usd.toFixed(4)}`;
34
- }
35
- function formatEvent(event) {
36
- switch (event.kind) {
37
- case "run-start":
38
- return `start mode=${event.mode} cases=${event.totalCases}`;
39
- case "case-start":
40
- return `[${event.index}/${event.total}] ${event.caseId} (${event.stage}) ...`;
41
- case "case-end": {
42
- const status = event.passed ? "PASS" : "FAIL";
43
- return (`[${event.index}/${event.total}] ${event.caseId} (${event.stage}) ${status} ` +
44
- `in ${formatDuration(event.durationMs)}${formatCost(event.costUsd)}`);
45
- }
46
- case "stage-start":
47
- return ` stage ${event.stage} ...`;
48
- case "stage-end": {
49
- const status = event.passed ? "ok" : "fail";
50
- return ` stage ${event.stage} ${status} in ${formatDuration(event.durationMs)}${formatCost(event.costUsd)}`;
51
- }
52
- case "retry":
53
- return (` retry ${event.caseId}${event.stage ? `/${event.stage}` : ""} ` +
54
- `attempt ${event.attempt}/${event.maxAttempts} in ${formatDuration(event.waitMs)} (${event.reason})`);
55
- case "run-end":
56
- return (`done pass=${event.passed} fail=${event.failed} total=${event.totalCases} ` +
57
- `in ${formatDuration(event.durationMs)}`);
58
- }
59
- }
@@ -1,11 +0,0 @@
1
- import type { EvalReport } from "./types.js";
2
- export declare function reportsDir(projectRoot: string): string;
3
- export declare function defaultReportBasename(report: EvalReport): string;
4
- /**
5
- * Format a report as a human-readable Markdown document. Keeping the layout
6
- * stable matters: CI posts diffs against earlier reports, and unit tests use
7
- * the output as a regression guard.
8
- */
9
- export declare function formatMarkdownReport(report: EvalReport): string;
10
- export declare function writeJsonReport(projectRoot: string, report: EvalReport, basename?: string): Promise<string>;
11
- export declare function writeMarkdownReport(projectRoot: string, report: EvalReport, basename?: string): Promise<string>;