cclaw-cli 0.23.1 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -58,6 +58,128 @@ function parseStructural(filePath, raw) {
58
58
  structural.maxChars = maxChars;
59
59
  return structural;
60
60
  }
61
+ function parseRegexRule(filePath, context, value) {
62
+ if (typeof value === "string") {
63
+ return { pattern: value };
64
+ }
65
+ if (!isRecord(value)) {
66
+ throw corpusError(filePath, `"${context}" entries must be either a string or a mapping with "pattern"`);
67
+ }
68
+ const pattern = value.pattern;
69
+ if (typeof pattern !== "string" || pattern.length === 0) {
70
+ throw corpusError(filePath, `"${context}" mapping entry must include a non-empty "pattern" string`);
71
+ }
72
+ const flags = value.flags;
73
+ if (flags !== undefined && typeof flags !== "string") {
74
+ throw corpusError(filePath, `"${context}" flags must be a string`);
75
+ }
76
+ const description = value.description;
77
+ if (description !== undefined && typeof description !== "string") {
78
+ throw corpusError(filePath, `"${context}" description must be a string`);
79
+ }
80
+ const rule = { pattern };
81
+ if (flags !== undefined)
82
+ rule.flags = flags;
83
+ if (description !== undefined)
84
+ rule.description = description;
85
+ return rule;
86
+ }
87
+ function parseRegexRules(filePath, context, value) {
88
+ if (value === undefined)
89
+ return undefined;
90
+ if (!Array.isArray(value)) {
91
+ throw corpusError(filePath, `"${context}" must be an array`);
92
+ }
93
+ return value.map((entry, index) => parseRegexRule(filePath, `${context}[${index}]`, entry));
94
+ }
95
+ function parseOccurrenceBounds(filePath, context, value) {
96
+ if (value === undefined)
97
+ return undefined;
98
+ if (!isRecord(value)) {
99
+ throw corpusError(filePath, `"${context}" must be a mapping of phrase → integer`);
100
+ }
101
+ const out = {};
102
+ for (const [phrase, count] of Object.entries(value)) {
103
+ if (typeof count !== "number" || !Number.isFinite(count) || !Number.isInteger(count) || count < 0) {
104
+ throw corpusError(filePath, `"${context}.${phrase}" must be a non-negative integer`);
105
+ }
106
+ out[phrase] = count;
107
+ }
108
+ return out;
109
+ }
110
+ function parseRules(filePath, raw) {
111
+ if (raw === undefined)
112
+ return undefined;
113
+ if (!isRecord(raw)) {
114
+ throw corpusError(filePath, `"expected.rules" must be a mapping`);
115
+ }
116
+ const mustContain = readStringArray(filePath, "expected.rules.must_contain", raw.must_contain ?? raw.mustContain);
117
+ const mustNotContain = readStringArray(filePath, "expected.rules.must_not_contain", raw.must_not_contain ?? raw.mustNotContain);
118
+ const regexRequired = parseRegexRules(filePath, "expected.rules.regex_required", raw.regex_required ?? raw.regexRequired);
119
+ const regexForbidden = parseRegexRules(filePath, "expected.rules.regex_forbidden", raw.regex_forbidden ?? raw.regexForbidden);
120
+ const minOccurrences = parseOccurrenceBounds(filePath, "expected.rules.min_occurrences", raw.min_occurrences ?? raw.minOccurrences);
121
+ const maxOccurrences = parseOccurrenceBounds(filePath, "expected.rules.max_occurrences", raw.max_occurrences ?? raw.maxOccurrences);
122
+ const uniqueBulletsInSection = readStringArray(filePath, "expected.rules.unique_bullets_in_section", raw.unique_bullets_in_section ?? raw.uniqueBulletsInSection);
123
+ const rules = {};
124
+ if (mustContain)
125
+ rules.mustContain = mustContain;
126
+ if (mustNotContain)
127
+ rules.mustNotContain = mustNotContain;
128
+ if (regexRequired)
129
+ rules.regexRequired = regexRequired;
130
+ if (regexForbidden)
131
+ rules.regexForbidden = regexForbidden;
132
+ if (minOccurrences)
133
+ rules.minOccurrences = minOccurrences;
134
+ if (maxOccurrences)
135
+ rules.maxOccurrences = maxOccurrences;
136
+ if (uniqueBulletsInSection)
137
+ rules.uniqueBulletsInSection = uniqueBulletsInSection;
138
+ return Object.keys(rules).length === 0 ? undefined : rules;
139
+ }
140
+ function parseTraceability(filePath, raw) {
141
+ if (raw === undefined)
142
+ return undefined;
143
+ if (!isRecord(raw)) {
144
+ throw corpusError(filePath, `"expected.traceability" must be a mapping`);
145
+ }
146
+ const idPattern = raw.id_pattern ?? raw.idPattern;
147
+ if (typeof idPattern !== "string" || idPattern.length === 0) {
148
+ throw corpusError(filePath, `"expected.traceability.id_pattern" must be a non-empty regex source`);
149
+ }
150
+ const idFlags = raw.id_flags ?? raw.idFlags;
151
+ if (idFlags !== undefined && typeof idFlags !== "string") {
152
+ throw corpusError(filePath, `"expected.traceability.id_flags" must be a string`);
153
+ }
154
+ const source = raw.source;
155
+ if (typeof source !== "string" || source.length === 0) {
156
+ throw corpusError(filePath, `"expected.traceability.source" must be "self" or an extra_fixtures label`);
157
+ }
158
+ const requireInRaw = raw.require_in ?? raw.requireIn;
159
+ const requireIn = readStringArray(filePath, "expected.traceability.require_in", requireInRaw);
160
+ if (!requireIn || requireIn.length === 0) {
161
+ throw corpusError(filePath, `"expected.traceability.require_in" must be a non-empty array`);
162
+ }
163
+ const out = { idPattern, source, requireIn };
164
+ if (idFlags !== undefined)
165
+ out.idFlags = idFlags;
166
+ return out;
167
+ }
168
+ function parseExtraFixtures(filePath, raw) {
169
+ if (raw === undefined)
170
+ return undefined;
171
+ if (!isRecord(raw)) {
172
+ throw corpusError(filePath, `"extra_fixtures" must be a mapping of label → path`);
173
+ }
174
+ const out = {};
175
+ for (const [label, value] of Object.entries(raw)) {
176
+ if (typeof value !== "string" || value.length === 0) {
177
+ throw corpusError(filePath, `"extra_fixtures.${label}" must be a non-empty path string`);
178
+ }
179
+ out[label] = value;
180
+ }
181
+ return Object.keys(out).length === 0 ? undefined : out;
182
+ }
61
183
  function parseExpected(filePath, raw) {
62
184
  if (raw === undefined)
63
185
  return undefined;
@@ -68,12 +190,12 @@ function parseExpected(filePath, raw) {
68
190
  const structural = parseStructural(filePath, raw.structural);
69
191
  if (structural)
70
192
  shape.structural = structural;
71
- if (raw.rules !== undefined) {
72
- if (!isRecord(raw.rules)) {
73
- throw corpusError(filePath, `"expected.rules" must be a mapping`);
74
- }
75
- shape.rules = raw.rules;
76
- }
193
+ const rules = parseRules(filePath, raw.rules);
194
+ if (rules)
195
+ shape.rules = rules;
196
+ const traceability = parseTraceability(filePath, raw.traceability);
197
+ if (traceability)
198
+ shape.traceability = traceability;
77
199
  if (raw.judge !== undefined) {
78
200
  if (!isRecord(raw.judge)) {
79
201
  throw corpusError(filePath, `"expected.judge" must be a mapping`);
@@ -101,13 +223,15 @@ function validateCase(filePath, raw) {
101
223
  const contextFiles = readStringArray(filePath, "context_files", raw.context_files ?? raw.contextFiles);
102
224
  const expected = parseExpected(filePath, raw.expected);
103
225
  const fixture = typeof raw.fixture === "string" ? raw.fixture : undefined;
226
+ const extraFixtures = parseExtraFixtures(filePath, raw.extra_fixtures ?? raw.extraFixtures);
104
227
  return {
105
228
  id: id.trim(),
106
229
  stage: stageRaw,
107
230
  inputPrompt: inputPrompt.trim(),
108
231
  contextFiles,
109
232
  expected,
110
- fixture
233
+ fixture,
234
+ extraFixtures
111
235
  };
112
236
  }
113
237
  /**
@@ -173,3 +297,34 @@ export async function readFixtureArtifact(projectRoot, caseEntry) {
173
297
  }
174
298
  return fs.readFile(fixturePath, "utf8");
175
299
  }
300
+ /**
301
+ * Resolve an entry from `extraFixtures` to an absolute filesystem path,
302
+ * relative to the case's stage directory (same convention as `fixture`).
303
+ */
304
+ export function extraFixturePath(projectRoot, caseEntry, label) {
305
+ const value = caseEntry.extraFixtures?.[label];
306
+ if (!value)
307
+ return undefined;
308
+ return path.resolve(projectRoot, EVALS_ROOT, "corpus", caseEntry.stage, value);
309
+ }
310
+ /**
311
+ * Read every declared extra fixture for a case into a `{ label → text }`
312
+ * map. Missing files throw so authoring mistakes surface immediately rather
313
+ * than being silently skipped by cross-artifact verifiers.
314
+ */
315
+ export async function readExtraFixtures(projectRoot, caseEntry) {
316
+ const out = {};
317
+ if (!caseEntry.extraFixtures)
318
+ return out;
319
+ for (const label of Object.keys(caseEntry.extraFixtures)) {
320
+ const filePath = extraFixturePath(projectRoot, caseEntry, label);
321
+ if (!filePath)
322
+ continue;
323
+ if (!(await exists(filePath))) {
324
+ throw new Error(`Extra fixture missing for ${caseEntry.stage}/${caseEntry.id} ` +
325
+ `(label="${label}"): ${filePath}`);
326
+ }
327
+ out[label] = await fs.readFile(filePath, "utf8");
328
+ }
329
+ return out;
330
+ }
@@ -0,0 +1,80 @@
1
+ import type { ChatUsage } from "./llm-client.js";
2
+ import type { ResolvedEvalConfig, TokenPricing } from "./types.js";
3
+ /**
4
+ * Builtin pricing fallback. Intentionally conservative: when the user
5
+ * hasn't configured pricing and we don't know the model, we default to a
6
+ * "small model" USD schedule so the cap can still do something useful.
7
+ *
8
+ * Values are USD per 1K tokens. Sources are public pricing pages as of
9
+ * 2026-04; update by editing this constant, not the guard logic.
10
+ */
11
+ export declare const DEFAULT_TOKEN_PRICING: Readonly<Record<string, TokenPricing>>;
12
+ /** Hard default when neither config nor builtins know the model. */
13
+ export declare const UNKNOWN_MODEL_PRICING: TokenPricing;
14
+ export interface SpendLedger {
15
+ /** ISO date (`YYYY-MM-DD` in UTC) — also embedded in the file name. */
16
+ date: string;
17
+ /** USD spent so far today across every call that hit the guard. */
18
+ totalUsd: number;
19
+ /** Number of `chat()` calls accounted for. */
20
+ calls: number;
21
+ /** Per-model breakdown for the report. */
22
+ byModel: Record<string, {
23
+ tokensIn: number;
24
+ tokensOut: number;
25
+ usd: number;
26
+ }>;
27
+ }
28
+ export declare class DailyCostCapExceededError extends Error {
29
+ readonly capUsd: number;
30
+ readonly projectedUsd: number;
31
+ readonly currentUsd: number;
32
+ constructor(opts: {
33
+ capUsd: number;
34
+ projectedUsd: number;
35
+ currentUsd: number;
36
+ });
37
+ }
38
+ declare function utcDate(now?: Date): string;
39
+ declare function pricingFor(model: string, config: Pick<ResolvedEvalConfig, "tokenPricing">): TokenPricing;
40
+ /**
41
+ * Compute USD cost of a single `ChatUsage` using the given `model` pricing
42
+ * schedule. Returns 0 when `usage.totalTokens` is 0 (e.g. transport error
43
+ * before first token).
44
+ */
45
+ export declare function computeUsageUsd(model: string, usage: ChatUsage, config: Pick<ResolvedEvalConfig, "tokenPricing">): number;
46
+ declare function ledgerPath(projectRoot: string, date: string): string;
47
+ declare function readLedger(file: string, date: string): Promise<SpendLedger>;
48
+ declare function writeLedger(file: string, ledger: SpendLedger): Promise<void>;
49
+ /**
50
+ * Guard a single LLM call against the daily USD cap. Returns the updated
51
+ * ledger on success; throws `DailyCostCapExceededError` when the projected
52
+ * total would cross the cap. When `config.dailyUsdCap` is unset, the guard
53
+ * is a no-op — no file writes, no ledger — so non-judge runs never touch
54
+ * the filesystem.
55
+ */
56
+ export interface CostGuard {
57
+ /**
58
+ * Commit the USD cost of a finished call to the ledger. When `dailyUsdCap`
59
+ * is set, refuses the commit if the projected total would exceed the cap.
60
+ */
61
+ commit(model: string, usage: ChatUsage): Promise<number>;
62
+ /** Snapshot the current ledger (or undefined when no cap is set). */
63
+ snapshot(): Promise<SpendLedger | undefined>;
64
+ }
65
+ export interface CreateCostGuardOptions {
66
+ /** Clock injection for tests. */
67
+ now?: () => Date;
68
+ /** Override the default filesystem root for the ledger. */
69
+ ledgerPath?: string;
70
+ }
71
+ export declare function createCostGuard(projectRoot: string, config: Pick<ResolvedEvalConfig, "dailyUsdCap" | "tokenPricing">, options?: CreateCostGuardOptions): CostGuard;
72
+ /** Exposed for tests. */
73
+ export declare const __internal: {
74
+ utcDate: typeof utcDate;
75
+ pricingFor: typeof pricingFor;
76
+ ledgerPath: typeof ledgerPath;
77
+ readLedger: typeof readLedger;
78
+ writeLedger: typeof writeLedger;
79
+ };
80
+ export {};
@@ -0,0 +1,153 @@
1
+ /**
2
+ * Cost guard for the cclaw eval subsystem.
3
+ *
4
+ * Two responsibilities:
5
+ *
6
+ * 1. Convert `ChatUsage` (prompt/completion token counts) into USD using
7
+ * a per-model `TokenPricing` schedule. Pricing comes from
8
+ * `config.tokenPricing[model]` first, then from the builtin fallback
9
+ * schedule for well-known models (z.ai GLM 5.1 at publish time).
10
+ * 2. Maintain a per-day running total persisted to
11
+ * `.cclaw/evals/.spend-YYYY-MM-DD.json` so that a long eval session
12
+ * (or a cron-run nightly) can't blow through the configured
13
+ * `dailyUsdCap`. The counter is opt-in: no cap, no writes.
14
+ *
15
+ * The guard is deliberately pessimistic — it rounds USD up to 6 decimals
16
+ * and never subtracts, so a CI run that errors mid-flight still shows the
17
+ * partial spend in the next report.
18
+ */
19
+ import fs from "node:fs/promises";
20
+ import path from "node:path";
21
+ import { EVALS_ROOT } from "../constants.js";
22
+ import { exists } from "../fs-utils.js";
23
+ /**
24
+ * Builtin pricing fallback. Intentionally conservative: when the user
25
+ * hasn't configured pricing and we don't know the model, we default to a
26
+ * "small model" USD schedule so the cap can still do something useful.
27
+ *
28
+ * Values are USD per 1K tokens. Sources are public pricing pages as of
29
+ * 2026-04; update by editing this constant, not the guard logic.
30
+ */
31
+ export const DEFAULT_TOKEN_PRICING = {
32
+ "glm-5.1": { input: 0.0005, output: 0.0015 },
33
+ "glm-4.6": { input: 0.0005, output: 0.0015 },
34
+ "gpt-4o-mini": { input: 0.00015, output: 0.0006 },
35
+ "gpt-4o": { input: 0.005, output: 0.015 }
36
+ };
37
+ /** Hard default when neither config nor builtins know the model. */
38
+ export const UNKNOWN_MODEL_PRICING = { input: 0.001, output: 0.003 };
39
+ export class DailyCostCapExceededError extends Error {
40
+ capUsd;
41
+ projectedUsd;
42
+ currentUsd;
43
+ constructor(opts) {
44
+ super(`Daily cost cap would be exceeded: ` +
45
+ `current=$${opts.currentUsd.toFixed(4)}, ` +
46
+ `projected=$${opts.projectedUsd.toFixed(4)}, ` +
47
+ `cap=$${opts.capUsd.toFixed(4)}. ` +
48
+ `Unset CCLAW_EVAL_DAILY_USD_CAP or increase the cap to continue.`);
49
+ this.name = "DailyCostCapExceededError";
50
+ this.capUsd = opts.capUsd;
51
+ this.projectedUsd = opts.projectedUsd;
52
+ this.currentUsd = opts.currentUsd;
53
+ }
54
+ }
55
+ function utcDate(now = new Date()) {
56
+ return now.toISOString().slice(0, 10);
57
+ }
58
+ function pricingFor(model, config) {
59
+ const custom = config.tokenPricing?.[model];
60
+ if (custom)
61
+ return custom;
62
+ const builtin = DEFAULT_TOKEN_PRICING[model];
63
+ if (builtin)
64
+ return builtin;
65
+ return UNKNOWN_MODEL_PRICING;
66
+ }
67
+ /**
68
+ * Compute USD cost of a single `ChatUsage` using the given `model` pricing
69
+ * schedule. Returns 0 when `usage.totalTokens` is 0 (e.g. transport error
70
+ * before first token).
71
+ */
72
+ export function computeUsageUsd(model, usage, config) {
73
+ if (!usage || usage.totalTokens <= 0)
74
+ return 0;
75
+ const schedule = pricingFor(model, config);
76
+ const cost = (usage.promptTokens * schedule.input) / 1_000 +
77
+ (usage.completionTokens * schedule.output) / 1_000;
78
+ return Math.max(0, Number(cost.toFixed(6)));
79
+ }
80
+ function emptyLedger(date) {
81
+ return { date, totalUsd: 0, calls: 0, byModel: {} };
82
+ }
83
+ function ledgerPath(projectRoot, date) {
84
+ return path.join(projectRoot, EVALS_ROOT, `.spend-${date}.json`);
85
+ }
86
+ async function readLedger(file, date) {
87
+ if (!(await exists(file)))
88
+ return emptyLedger(date);
89
+ try {
90
+ const raw = JSON.parse(await fs.readFile(file, "utf8"));
91
+ if (raw?.date !== date)
92
+ return emptyLedger(date);
93
+ return {
94
+ date,
95
+ totalUsd: typeof raw.totalUsd === "number" ? raw.totalUsd : 0,
96
+ calls: typeof raw.calls === "number" ? raw.calls : 0,
97
+ byModel: raw.byModel && typeof raw.byModel === "object" ? raw.byModel : {}
98
+ };
99
+ }
100
+ catch {
101
+ return emptyLedger(date);
102
+ }
103
+ }
104
+ async function writeLedger(file, ledger) {
105
+ await fs.mkdir(path.dirname(file), { recursive: true });
106
+ await fs.writeFile(file, `${JSON.stringify(ledger, null, 2)}\n`, "utf8");
107
+ }
108
+ export function createCostGuard(projectRoot, config, options = {}) {
109
+ const now = options.now ?? (() => new Date());
110
+ const currentDate = () => utcDate(now());
111
+ const file = () => options.ledgerPath ?? ledgerPath(projectRoot, currentDate());
112
+ return {
113
+ async commit(model, usage) {
114
+ const usd = computeUsageUsd(model, usage, config);
115
+ if (config.dailyUsdCap === undefined)
116
+ return usd;
117
+ const date = currentDate();
118
+ const target = file();
119
+ const ledger = await readLedger(target, date);
120
+ const projected = Number((ledger.totalUsd + usd).toFixed(6));
121
+ if (projected > config.dailyUsdCap) {
122
+ throw new DailyCostCapExceededError({
123
+ capUsd: config.dailyUsdCap,
124
+ projectedUsd: projected,
125
+ currentUsd: ledger.totalUsd
126
+ });
127
+ }
128
+ ledger.totalUsd = projected;
129
+ ledger.calls += 1;
130
+ const byModel = ledger.byModel[model] ?? { tokensIn: 0, tokensOut: 0, usd: 0 };
131
+ byModel.tokensIn += usage.promptTokens;
132
+ byModel.tokensOut += usage.completionTokens;
133
+ byModel.usd = Number((byModel.usd + usd).toFixed(6));
134
+ ledger.byModel[model] = byModel;
135
+ await writeLedger(target, ledger);
136
+ return usd;
137
+ },
138
+ async snapshot() {
139
+ if (config.dailyUsdCap === undefined)
140
+ return undefined;
141
+ const date = currentDate();
142
+ return readLedger(file(), date);
143
+ }
144
+ };
145
+ }
146
+ /** Exposed for tests. */
147
+ export const __internal = {
148
+ utcDate,
149
+ pricingFor,
150
+ ledgerPath,
151
+ readLedger,
152
+ writeLedger
153
+ };
@@ -1,18 +1,5 @@
1
- /**
2
- * LLM client skeleton for the cclaw eval subsystem.
3
- *
4
- * This module declares the shape of the client without pulling in the
5
- * `openai` runtime dependency. The real implementation lands when
6
- * single-shot (Tier A) evals and LLM judging come online. Keeping this stub
7
- * separate means users who only run structural + rule-based verifiers never
8
- * install an extra dependency or receive network egress warnings.
9
- */
1
+ import type { ClientOptions } from "openai";
10
2
  import type { ResolvedEvalConfig } from "./types.js";
11
- /**
12
- * Minimal chat interface the rest of the eval code will depend on. It is
13
- * intentionally a subset of OpenAI's Chat Completions surface so that the
14
- * real implementation is a thin adapter around `OpenAI.chat.completions.create`.
15
- */
16
3
  export interface ChatMessage {
17
4
  role: "system" | "user" | "assistant" | "tool";
18
5
  content: string;
@@ -24,7 +11,18 @@ export interface ChatRequest {
24
11
  messages: ChatMessage[];
25
12
  maxTokens?: number;
26
13
  temperature?: number;
14
+ /** Per-call timeout override. Falls back to `config.timeoutMs`. */
27
15
  timeoutMs?: number;
16
+ /**
17
+ * Ask the provider for a JSON-object response. The judge pipeline sets
18
+ * this; the agent-under-test usually leaves it unset.
19
+ */
20
+ responseFormatJson?: boolean;
21
+ /**
22
+ * Optional deterministic sampling seed. Providers that don't implement
23
+ * `seed` simply ignore it.
24
+ */
25
+ seed?: number;
28
26
  /**
29
27
  * Tool/function-calling definitions in OpenAI wire format. Populated only
30
28
  * by Tier B. Ignored by the Tier A single-shot path.
@@ -46,17 +44,112 @@ export interface ChatResponse {
46
44
  }>;
47
45
  usage: ChatUsage;
48
46
  finishReason: "stop" | "length" | "tool_calls" | "content_filter";
47
+ model: string;
48
+ attempts: number;
49
+ }
50
+ /** Base class so callers can `catch (err) { if (err instanceof EvalLlmError) ... }`. */
51
+ export declare class EvalLlmError extends Error {
52
+ readonly retryable: boolean;
53
+ readonly status?: number;
54
+ constructor(message: string, opts: {
55
+ retryable: boolean;
56
+ status?: number;
57
+ cause?: unknown;
58
+ });
59
+ }
60
+ export declare class EvalLlmAuthError extends EvalLlmError {
61
+ constructor(cause: unknown);
62
+ }
63
+ export declare class EvalLlmConfigError extends EvalLlmError {
64
+ constructor(message: string, cause?: unknown);
65
+ }
66
+ export declare class EvalLlmTimeoutError extends EvalLlmError {
67
+ constructor(timeoutMs: number);
68
+ }
69
+ export declare class EvalLlmRateLimitedError extends EvalLlmError {
70
+ constructor(cause: unknown);
71
+ }
72
+ export declare class EvalLlmTransportError extends EvalLlmError {
73
+ constructor(cause: unknown, status?: number);
74
+ }
75
+ export declare class EvalLlmInvalidResponseError extends EvalLlmError {
76
+ constructor(message: string, details?: Record<string, unknown>);
77
+ }
78
+ export declare class EvalLlmNotConfiguredError extends EvalLlmError {
79
+ constructor();
49
80
  }
50
81
  /** Lightweight client abstraction shared across eval runners. */
51
82
  export interface EvalLlmClient {
52
83
  chat(request: ChatRequest): Promise<ChatResponse>;
53
84
  }
54
- export declare class EvalLlmNotWiredError extends Error {
55
- constructor();
85
+ /**
86
+ * Deprecated shim preserved so older wiring keeps compiling. Prefer
87
+ * `EvalLlmNotConfiguredError` for the "caller forgot to provide an API
88
+ * key" case.
89
+ */
90
+ export declare class EvalLlmNotWiredError extends EvalLlmNotConfiguredError {
91
+ }
92
+ /** `createEvalClient` options — mostly for tests to inject a fake transport. */
93
+ export interface CreateEvalClientOptions {
94
+ /** Inject an `openai` stand-in. Used by unit tests to avoid real HTTP. */
95
+ openaiFactory?: (opts: ClientOptions) => OpenAILike;
96
+ /**
97
+ * Override the default retry/backoff policy. Honored by the internal
98
+ * retry loop; transport errors still fall back to the defaults when
99
+ * unset.
100
+ */
101
+ retryPolicy?: RetryPolicy;
102
+ /** Deterministic sleep used by the retry loop. Defaults to `setTimeout`. */
103
+ sleep?: (ms: number) => Promise<void>;
104
+ }
105
+ export interface RetryPolicy {
106
+ /** Max retries *on top of* the initial attempt. 0 = single attempt. */
107
+ maxRetries: number;
108
+ /** Initial backoff in ms. Doubles each retry (capped at `maxBackoffMs`). */
109
+ initialBackoffMs: number;
110
+ /** Upper bound for a single sleep between attempts. */
111
+ maxBackoffMs: number;
112
+ }
113
+ export declare const DEFAULT_RETRY_POLICY: RetryPolicy;
114
+ /**
115
+ * Minimal OpenAI-SDK surface we depend on, declared here so tests can
116
+ * substitute a plain object without pulling the real SDK into the test
117
+ * runtime.
118
+ */
119
+ export interface OpenAILike {
120
+ chat: {
121
+ completions: {
122
+ create(body: Record<string, unknown>, options: {
123
+ signal: AbortSignal;
124
+ }): Promise<OpenAILikeChatResponse>;
125
+ };
126
+ };
127
+ }
128
+ interface OpenAILikeChatResponse {
129
+ model?: string;
130
+ choices: Array<{
131
+ message?: {
132
+ content?: string | null;
133
+ tool_calls?: Array<{
134
+ id: string;
135
+ function: {
136
+ name: string;
137
+ arguments: string;
138
+ };
139
+ }>;
140
+ };
141
+ finish_reason?: string | null;
142
+ }>;
143
+ usage?: {
144
+ prompt_tokens?: number;
145
+ completion_tokens?: number;
146
+ total_tokens?: number;
147
+ };
56
148
  }
57
149
  /**
58
- * Factory stub. Throws with a clear message so accidental early usage is
59
- * easy to diagnose. The real implementation will replace this body with
60
- * `new OpenAI({ apiKey, baseURL }) ... adapter`.
150
+ * Build a real client pointed at the configured endpoint. Throws
151
+ * `EvalLlmNotConfiguredError` at call time (not construction time) when no
152
+ * API key is available, so CLI help and dry-run paths stay offline-safe.
61
153
  */
62
- export declare function createEvalClient(_config: ResolvedEvalConfig): EvalLlmClient;
154
+ export declare function createEvalClient(config: ResolvedEvalConfig, options?: CreateEvalClientOptions): EvalLlmClient;
155
+ export {};