@plune-ai/cli 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs ADDED
@@ -0,0 +1,1502 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, { get: all[name], enumerable: true });
11
+ };
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
+ }
18
+ return to;
19
+ };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
28
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
+
30
+ // src/index.ts
31
+ var src_exports = {};
32
+ __export(src_exports, {
33
+ run: () => handleRun
34
+ });
35
+ module.exports = __toCommonJS(src_exports);
36
+
37
+ // src/cli/commands/run.ts
38
+ var fs5 = __toESM(require("fs"), 1);
39
+ var path4 = __toESM(require("path"), 1);
40
+
41
+ // src/config/loader.ts
42
+ var fs2 = __toESM(require("fs/promises"), 1);
43
+ var import_yaml = require("yaml");
44
+
45
+ // src/config/env-overrides.ts
46
+ function applyEnvOverrides(config, env) {
47
+ const result = structuredClone(config);
48
+ if (env["PLUNE_PROVIDER"] !== void 0) {
49
+ result.provider.type = env["PLUNE_PROVIDER"];
50
+ }
51
+ if (env["PLUNE_MODEL"] !== void 0) {
52
+ result.provider.model = env["PLUNE_MODEL"];
53
+ }
54
+ if (env["PLUNE_TIMEOUT"] !== void 0) {
55
+ const n = Number(env["PLUNE_TIMEOUT"]);
56
+ if (!isNaN(n)) result.provider.timeout = n;
57
+ }
58
+ if (env["PLUNE_MAX_RETRIES"] !== void 0) {
59
+ const n = Number(env["PLUNE_MAX_RETRIES"]);
60
+ if (!isNaN(n)) result.provider.max_retries = n;
61
+ }
62
+ return result;
63
+ }
64
+
65
+ // src/config/errors.ts
66
+ var ConfigNotFoundError = class extends Error {
67
+ code = "CFG_NOT_FOUND";
68
+ constructor(message) {
69
+ super(message);
70
+ this.name = "ConfigNotFoundError";
71
+ }
72
+ };
73
+ var YamlParseError = class extends Error {
74
+ code = "YAML_PARSE_ERROR";
75
+ constructor(message) {
76
+ super(message);
77
+ this.name = "YamlParseError";
78
+ }
79
+ };
80
+ var ConfigValidationError = class extends Error {
81
+ code = "CONFIG_VALIDATION_ERROR";
82
+ issues;
83
+ constructor(message, issues) {
84
+ super(message);
85
+ this.name = "ConfigValidationError";
86
+ this.issues = issues;
87
+ }
88
+ };
89
+
90
+ // src/config/discover.ts
91
+ var fs = __toESM(require("fs"), 1);
92
+ var path = __toESM(require("path"), 1);
93
+ var FILENAME = "plune.yaml";
94
+ var MAX_STEPS = 50;
95
+ function discoverConfigPath(cwd, override) {
96
+ if (override !== void 0) {
97
+ if (fs.existsSync(override)) {
98
+ return override;
99
+ }
100
+ throw new ConfigNotFoundError(`Config file not found: ${override}`);
101
+ }
102
+ let dir = cwd;
103
+ let steps = 0;
104
+ while (steps < MAX_STEPS) {
105
+ const candidate = path.join(dir, FILENAME);
106
+ if (fs.existsSync(candidate)) {
107
+ return candidate;
108
+ }
109
+ const parent = path.dirname(dir);
110
+ if (parent === dir) {
111
+ break;
112
+ }
113
+ dir = parent;
114
+ steps++;
115
+ }
116
+ throw new ConfigNotFoundError(
117
+ `No ${FILENAME} found. Run "plune init" to create one.`
118
+ );
119
+ }
120
+
121
+ // src/config/schema.ts
122
+ var import_zod = require("zod");
123
+ var providerConfigSchema = import_zod.z.object({
124
+ type: import_zod.z.enum(["anthropic", "openai", "openrouter"]),
125
+ model: import_zod.z.string().min(1),
126
+ temperature: import_zod.z.number().min(0).max(2).optional(),
127
+ max_tokens: import_zod.z.number().int().positive().optional(),
128
+ concurrency: import_zod.z.number().int().positive().optional(),
129
+ timeout: import_zod.z.number().int().positive().optional(),
130
+ max_retries: import_zod.z.number().int().min(0).optional()
131
+ });
132
+ var modelPriceSchema = import_zod.z.object({
133
+ input_per_1k_usd: import_zod.z.number().nonnegative(),
134
+ output_per_1k_usd: import_zod.z.number().nonnegative()
135
+ });
136
+ var pricingSchema = import_zod.z.record(modelPriceSchema);
137
+ var datasetRowSchema = import_zod.z.object({
138
+ vars: import_zod.z.record(import_zod.z.union([import_zod.z.string(), import_zod.z.number(), import_zod.z.boolean()])),
139
+ expected: import_zod.z.string().optional()
140
+ });
141
+ var datasetRefSchema = import_zod.z.union([
142
+ import_zod.z.string().min(1),
143
+ import_zod.z.object({ examples: import_zod.z.array(datasetRowSchema).min(1) })
144
+ ]);
145
+ var exactMatchAssertionSchema = import_zod.z.object({
146
+ type: import_zod.z.literal("exact-match"),
147
+ value: import_zod.z.string(),
148
+ trim: import_zod.z.boolean().optional(),
149
+ ignore_case: import_zod.z.boolean().optional()
150
+ });
151
+ var containsAssertionSchema = import_zod.z.object({
152
+ type: import_zod.z.literal("contains"),
153
+ value: import_zod.z.string(),
154
+ ignore_case: import_zod.z.boolean().optional()
155
+ });
156
+ var containsAnyAssertionSchema = import_zod.z.object({
157
+ type: import_zod.z.literal("contains-any"),
158
+ values: import_zod.z.array(import_zod.z.string()).min(1),
159
+ ignore_case: import_zod.z.boolean().optional()
160
+ });
161
+ var containsAllAssertionSchema = import_zod.z.object({
162
+ type: import_zod.z.literal("contains-all"),
163
+ values: import_zod.z.array(import_zod.z.string()).min(1),
164
+ ignore_case: import_zod.z.boolean().optional()
165
+ });
166
+ var jsonSchemaAssertionSchema = import_zod.z.object({
167
+ type: import_zod.z.literal("json-schema"),
168
+ schema: import_zod.z.record(import_zod.z.unknown()),
169
+ extract: import_zod.z.enum(["auto", "strict"]).optional()
170
+ });
171
+ var llmJudgeAssertionSchema = import_zod.z.object({
172
+ type: import_zod.z.literal("llm-judge"),
173
+ criteria: import_zod.z.string().min(1),
174
+ provider: providerConfigSchema.partial().optional(),
175
+ pass_threshold: import_zod.z.number().min(0).max(1).optional()
176
+ });
177
+ var semanticSimilarityAssertionSchema = import_zod.z.object({
178
+ type: import_zod.z.literal("semantic-similarity"),
179
+ reference: import_zod.z.string().min(1),
180
+ threshold: import_zod.z.number().min(0).max(1).optional()
181
+ });
182
+ var faithfulnessAssertionSchema = import_zod.z.object({
183
+ type: import_zod.z.literal("faithfulness"),
184
+ context: import_zod.z.string().min(1),
185
+ threshold: import_zod.z.number().min(0).max(1).optional()
186
+ });
187
+ var answerRelevanceAssertionSchema = import_zod.z.object({
188
+ type: import_zod.z.literal("answer-relevance"),
189
+ question: import_zod.z.string().min(1),
190
+ threshold: import_zod.z.number().min(0).max(1).optional()
191
+ });
192
+ var contextPrecisionAssertionSchema = import_zod.z.object({
193
+ type: import_zod.z.literal("context-precision"),
194
+ context: import_zod.z.string().min(1),
195
+ question: import_zod.z.string().min(1),
196
+ threshold: import_zod.z.number().min(0).max(1).optional()
197
+ });
198
+ var assertionConfigSchema = import_zod.z.discriminatedUnion("type", [
199
+ exactMatchAssertionSchema,
200
+ containsAssertionSchema,
201
+ containsAnyAssertionSchema,
202
+ containsAllAssertionSchema,
203
+ jsonSchemaAssertionSchema,
204
+ llmJudgeAssertionSchema,
205
+ semanticSimilarityAssertionSchema,
206
+ faithfulnessAssertionSchema,
207
+ answerRelevanceAssertionSchema,
208
+ contextPrecisionAssertionSchema
209
+ ]);
210
+ var evalConfigSchema = import_zod.z.object({
211
+ id: import_zod.z.string().min(1).regex(/^[a-z0-9_-]+$/, "id must be lowercase slug"),
212
+ description: import_zod.z.string().optional(),
213
+ tags: import_zod.z.array(import_zod.z.string()).optional(),
214
+ provider: providerConfigSchema.partial().optional(),
215
+ prompt: import_zod.z.string().optional(),
216
+ prompt_file: import_zod.z.string().optional(),
217
+ dataset: datasetRefSchema,
218
+ assertions: import_zod.z.array(assertionConfigSchema)
219
+ }).refine(
220
+ (data) => !(data.prompt !== void 0 && data.prompt_file !== void 0),
221
+ { message: "prompt and prompt_file are mutually exclusive" }
222
+ );
223
+ var pluneConfigSchema = import_zod.z.object({
224
+ version: import_zod.z.literal(1),
225
+ provider: providerConfigSchema,
226
+ defaults: import_zod.z.object({ assertions: import_zod.z.array(assertionConfigSchema).optional() }).optional(),
227
+ pricing: pricingSchema.optional(),
228
+ evals: import_zod.z.array(evalConfigSchema).min(1)
229
+ }).strict();
230
+
231
+ // src/config/loader.ts
232
+ async function loadConfig(opts = {}) {
233
+ const { configPath, cwd = process.cwd(), env = process.env } = opts;
234
+ const resolvedPath = discoverConfigPath(cwd, configPath);
235
+ let raw;
236
+ try {
237
+ raw = await fs2.readFile(resolvedPath, "utf8");
238
+ } catch {
239
+ throw new ConfigNotFoundError(`Cannot read config file: ${resolvedPath}`);
240
+ }
241
+ let parsed;
242
+ try {
243
+ parsed = (0, import_yaml.parse)(raw);
244
+ } catch (err) {
245
+ throw new YamlParseError(
246
+ `YAML parse error in ${resolvedPath}: ${err.message}`
247
+ );
248
+ }
249
+ const result = pluneConfigSchema.safeParse(parsed);
250
+ if (!result.success) {
251
+ const issues = result.error.issues.map(
252
+ (issue) => `${issue.path.join(".")}: ${issue.message}`
253
+ );
254
+ throw new ConfigValidationError(
255
+ `Config validation failed in ${resolvedPath}`,
256
+ issues
257
+ );
258
+ }
259
+ return applyEnvOverrides(result.data, env);
260
+ }
261
+
262
+ // src/providers/anthropic.ts
263
+ var import_sdk = __toESM(require("@anthropic-ai/sdk"), 1);
264
+
265
+ // src/providers/errors.ts
266
+ var NETWORK_ERROR_CODES = /* @__PURE__ */ new Set([
267
+ "ECONNRESET",
268
+ "ETIMEDOUT",
269
+ "ECONNREFUSED",
270
+ "EPIPE",
271
+ "ENOTFOUND",
272
+ "EAI_AGAIN"
273
+ ]);
274
+ var AuthError = class extends Error {
275
+ code = "PROVIDER_AUTH";
276
+ envVar;
277
+ constructor(message, envVar) {
278
+ super(message);
279
+ this.name = "AuthError";
280
+ this.envVar = envVar;
281
+ }
282
+ };
283
+ var ProviderError = class extends Error {
284
+ code;
285
+ constructor(code, message) {
286
+ super(message);
287
+ this.name = "ProviderError";
288
+ this.code = code;
289
+ }
290
+ };
291
+ function statusOf(err) {
292
+ if (typeof err === "object" && err !== null && "status" in err) {
293
+ const s = err.status;
294
+ if (typeof s === "number") return s;
295
+ }
296
+ return void 0;
297
+ }
298
+ function codeOf(err) {
299
+ if (typeof err === "object" && err !== null && "code" in err) {
300
+ const c = err.code;
301
+ if (typeof c === "string") return c;
302
+ }
303
+ return void 0;
304
+ }
305
+ function classifyError(err) {
306
+ const status = statusOf(err);
307
+ if (status !== void 0) {
308
+ if (status === 401 || status === 403) return "auth";
309
+ if (status === 429 || status >= 500 && status <= 599) return "transient";
310
+ return "fatal";
311
+ }
312
+ const code = codeOf(err);
313
+ if (code !== void 0 && NETWORK_ERROR_CODES.has(code)) return "transient";
314
+ if (isTransportError(err)) return "transient";
315
+ return "fatal";
316
+ }
317
+ function isTransportError(err) {
318
+ if (typeof err !== "object" || err === null) return false;
319
+ const e = err;
320
+ const names = [e.name, e.constructor?.name].filter(
321
+ (n) => typeof n === "string"
322
+ );
323
+ return names.some(
324
+ (n) => /APIConnection|ConnectionError|TimeoutError|AbortError|FetchError/.test(n)
325
+ );
326
+ }
327
+ function redactSecrets(message, ...secrets) {
328
+ let out = message;
329
+ for (const secret of secrets) {
330
+ if (secret) out = out.split(secret).join("[REDACTED]");
331
+ }
332
+ out = out.replace(/Bearer\s+[A-Za-z0-9._-]+/g, "Bearer [REDACTED]");
333
+ return out;
334
+ }
335
+ function messageOf(err) {
336
+ if (err instanceof Error) return err.message;
337
+ if (typeof err === "object" && err !== null && "message" in err) {
338
+ return String(err.message);
339
+ }
340
+ return String(err);
341
+ }
342
+ function normalizeProviderError(err, secret, envVar) {
343
+ if (err instanceof AuthError) return err;
344
+ if (classifyError(err) === "auth") {
345
+ return new AuthError(redactSecrets(messageOf(err), secret), envVar);
346
+ }
347
+ if (err instanceof ProviderError) {
348
+ return new ProviderError(err.code, redactSecrets(err.message, secret));
349
+ }
350
+ return new ProviderError("PROVIDER_ERROR", redactSecrets(messageOf(err), secret));
351
+ }
352
+
353
+ // src/providers/retry.ts
354
+ var defaultSleep = (ms) => new Promise((resolve4) => setTimeout(resolve4, ms));
355
+ function retryAfterMs(err) {
356
+ if (typeof err !== "object" || err === null || !("headers" in err)) return void 0;
357
+ const raw = readHeader(err.headers, "retry-after");
358
+ if (raw === void 0) return void 0;
359
+ const seconds = Number(raw);
360
+ if (!Number.isFinite(seconds) || seconds < 0) return void 0;
361
+ return seconds * 1e3;
362
+ }
363
+ function readHeader(headers, name) {
364
+ if (headers === null || typeof headers !== "object" && typeof headers !== "function") {
365
+ return void 0;
366
+ }
367
+ const getter = headers.get;
368
+ if (typeof getter === "function") {
369
+ const v2 = getter.call(headers, name);
370
+ return v2 === null ? void 0 : v2;
371
+ }
372
+ const v = headers[name];
373
+ return typeof v === "string" || typeof v === "number" ? v : void 0;
374
+ }
375
+ async function withRetry(fn, options) {
376
+ const { base_delay_ms = 500, sleep = defaultSleep, random = Math.random } = options;
377
+ const maxRetries = Math.max(0, Math.floor(options.max_retries));
378
+ let attempt = 0;
379
+ for (; ; ) {
380
+ try {
381
+ return await fn();
382
+ } catch (err) {
383
+ const klass = classifyError(err);
384
+ if (klass === "auth") throw err;
385
+ if (klass === "fatal") throw new ProviderError("PROVIDER_FATAL", messageOf(err));
386
+ if (attempt >= maxRetries) {
387
+ throw new ProviderError("PROVIDER_TRANSIENT_EXHAUSTED", messageOf(err));
388
+ }
389
+ const backoff = base_delay_ms * 2 ** attempt;
390
+ const jitter = backoff * 0.1 * random();
391
+ const delay = Math.max(backoff + jitter, retryAfterMs(err) ?? 0);
392
+ await sleep(delay);
393
+ attempt += 1;
394
+ }
395
+ }
396
+ }
397
+
398
+ // src/providers/prices.ts
399
+ var PRICE_TABLE = {
400
+ // Anthropic
401
+ "claude-3-5-haiku-latest": { input_per_1k_usd: 8e-4, output_per_1k_usd: 4e-3 },
402
+ "claude-3-5-sonnet-latest": { input_per_1k_usd: 3e-3, output_per_1k_usd: 0.015 },
403
+ "claude-3-opus-latest": { input_per_1k_usd: 0.015, output_per_1k_usd: 0.075 },
404
+ // OpenAI
405
+ "gpt-4o": { input_per_1k_usd: 25e-4, output_per_1k_usd: 0.01 },
406
+ "gpt-4o-mini": { input_per_1k_usd: 15e-5, output_per_1k_usd: 6e-4 },
407
+ // OpenAI via OpenRouter (namespaced ids). OpenRouter passes OpenAI list price through for
408
+ // openai/* routes, so these mirror the direct entries. Only a few common ids are listed —
409
+ // OpenRouter has hundreds of models and dynamic routing, so most still need a `pricing` entry
410
+ // in plune.yaml (or report cost_usd=0). Verify against OpenRouter's current rates.
411
+ "openai/gpt-4o": { input_per_1k_usd: 25e-4, output_per_1k_usd: 0.01 },
412
+ "openai/gpt-4o-mini": { input_per_1k_usd: 15e-5, output_per_1k_usd: 6e-4 }
413
+ };
414
+
415
+ // src/providers/cost.ts
416
+ function priceFrom(price, usage) {
417
+ return usage.input_tokens / 1e3 * price.input_per_1k_usd + usage.output_tokens / 1e3 * price.output_per_1k_usd;
418
+ }
419
+ var defaultWarn = (message) => {
420
+ process.stderr.write(message + "\n");
421
+ };
422
+ function resolveCost(usage, model, reportedCostUsd, overrides, deps = {}) {
423
+ const table = deps.table ?? PRICE_TABLE;
424
+ const warn = deps.warn ?? defaultWarn;
425
+ const override = overrides?.[model];
426
+ if (override !== void 0) {
427
+ return priceFrom(override, usage);
428
+ }
429
+ if (reportedCostUsd !== void 0) {
430
+ return reportedCostUsd;
431
+ }
432
+ const tablePrice = table[model];
433
+ if (tablePrice !== void 0) {
434
+ return priceFrom(tablePrice, usage);
435
+ }
436
+ warn(
437
+ `plune: no price for model "${model}" \u2014 reporting cost_usd=0. Set pricing["${model}"] in plune.yaml to track its cost.`
438
+ );
439
+ return 0;
440
+ }
441
+
442
+ // src/providers/anthropic.ts
443
+ var ENV_VAR = "ANTHROPIC_API_KEY";
444
+ var DEFAULT_MAX_RETRIES = 2;
445
+ function makeAnthropicProvider(config, env, pricing) {
446
+ const apiKey = env[ENV_VAR];
447
+ if (apiKey === void 0 || apiKey.trim() === "") {
448
+ throw new AuthError(
449
+ `Missing ${ENV_VAR}. Set it in your environment to use the anthropic provider.`,
450
+ ENV_VAR
451
+ );
452
+ }
453
+ const client = new import_sdk.default({
454
+ apiKey,
455
+ maxRetries: 0,
456
+ ...config.timeout !== void 0 ? { timeout: config.timeout } : {}
457
+ });
458
+ const maxRetries = config.max_retries ?? DEFAULT_MAX_RETRIES;
459
+ return {
460
+ async complete(req) {
461
+ try {
462
+ const res = await withRetry(
463
+ () => client.messages.create({
464
+ model: req.model,
465
+ max_tokens: req.max_tokens,
466
+ temperature: req.temperature,
467
+ messages: [{ role: "user", content: req.prompt_resolved }]
468
+ }),
469
+ { max_retries: maxRetries }
470
+ );
471
+ const output = res.content.map((block) => block.type === "text" ? block.text : "").join("");
472
+ return {
473
+ output,
474
+ usage: {
475
+ input_tokens: res.usage.input_tokens,
476
+ output_tokens: res.usage.output_tokens
477
+ }
478
+ };
479
+ } catch (err) {
480
+ throw normalizeProviderError(err, apiKey, ENV_VAR);
481
+ }
482
+ },
483
+ estimateCost(usage, reportedCostUsd) {
484
+ return { cost_usd: resolveCost(usage, config.model, reportedCostUsd, pricing) };
485
+ }
486
+ };
487
+ }
488
+
489
+ // src/providers/openai.ts
490
+ var import_openai = __toESM(require("openai"), 1);
491
+ var DEFAULT_MAX_RETRIES2 = 2;
492
+ function makeOpenAiCompatibleProvider(opts) {
493
+ const { config, env, pricing, apiKeyEnv, baseURL, reportsCost } = opts;
494
+ const apiKey = env[apiKeyEnv];
495
+ if (apiKey === void 0 || apiKey.trim() === "") {
496
+ throw new AuthError(
497
+ `Missing ${apiKeyEnv}. Set it in your environment to use the ${config.type} provider.`,
498
+ apiKeyEnv
499
+ );
500
+ }
501
+ const client = new import_openai.default({
502
+ apiKey,
503
+ maxRetries: 0,
504
+ // our own withRetry owns retry (ADR-PRV03)
505
+ ...baseURL !== void 0 ? { baseURL } : {},
506
+ ...config.timeout !== void 0 ? { timeout: config.timeout } : {}
507
+ });
508
+ const maxRetries = config.max_retries ?? DEFAULT_MAX_RETRIES2;
509
+ return {
510
+ async complete(req) {
511
+ try {
512
+ const res = await withRetry(
513
+ () => client.chat.completions.create({
514
+ model: req.model,
515
+ temperature: req.temperature,
516
+ max_tokens: req.max_tokens,
517
+ messages: [{ role: "user", content: req.prompt_resolved }],
518
+ // OpenRouter extension: ask it to include the call's actual cost in `usage` (ADR-PRC02).
519
+ ...reportsCost ? { usage: { include: true } } : {}
520
+ }),
521
+ { max_retries: maxRetries }
522
+ );
523
+ const reportedCost = reportsCost ? res.usage?.cost : void 0;
524
+ return {
525
+ output: res.choices[0]?.message?.content ?? "",
526
+ usage: {
527
+ input_tokens: res.usage?.prompt_tokens ?? 0,
528
+ output_tokens: res.usage?.completion_tokens ?? 0
529
+ },
530
+ ...typeof reportedCost === "number" ? { cost_usd: reportedCost } : {}
531
+ };
532
+ } catch (err) {
533
+ throw normalizeProviderError(err, apiKey, apiKeyEnv);
534
+ }
535
+ },
536
+ estimateCost(usage, reportedCostUsd) {
537
+ return { cost_usd: resolveCost(usage, config.model, reportedCostUsd, pricing) };
538
+ }
539
+ };
540
+ }
541
+ function makeOpenAiProvider(config, env, pricing) {
542
+ return makeOpenAiCompatibleProvider({ config, env, pricing, apiKeyEnv: "OPENAI_API_KEY" });
543
+ }
544
+
545
+ // src/providers/openrouter.ts
546
+ var OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1";
547
+ function makeOpenRouterProvider(config, env, pricing) {
548
+ return makeOpenAiCompatibleProvider({
549
+ config,
550
+ env,
551
+ pricing,
552
+ apiKeyEnv: "OPENROUTER_API_KEY",
553
+ baseURL: OPENROUTER_BASE_URL,
554
+ reportsCost: true
555
+ // OpenRouter reports the call's actual cost via usage.cost (ADR-PRC02)
556
+ });
557
+ }
558
+
559
+ // src/providers/registry.ts
560
+ function createProviderRegistry() {
561
+ const factories = /* @__PURE__ */ new Map();
562
+ return {
563
+ register(name, factory) {
564
+ if (factories.has(name)) {
565
+ throw new Error(`Provider "${name}" is already registered \u2014 provider names must be unique.`);
566
+ }
567
+ factories.set(name, factory);
568
+ },
569
+ resolve(name, config, env, pricing) {
570
+ const factory = factories.get(name);
571
+ if (factory === void 0) {
572
+ const known = [...factories.keys()].join(", ") || "(none)";
573
+ throw new Error(`Unknown provider type "${name}". Registered providers: ${known}.`);
574
+ }
575
+ return factory(config, env, pricing);
576
+ },
577
+ has(name) {
578
+ return factories.has(name);
579
+ }
580
+ };
581
+ }
582
+ function createDefaultRegistry() {
583
+ const registry = createProviderRegistry();
584
+ registry.register("anthropic", makeAnthropicProvider);
585
+ registry.register("openai", makeOpenAiProvider);
586
+ registry.register("openrouter", makeOpenRouterProvider);
587
+ return registry;
588
+ }
589
+ function getProvider(config, env, pricing) {
590
+ return createDefaultRegistry().resolve(config.type, config, env, pricing);
591
+ }
592
+
593
+ // src/providers/mock.ts
594
+ function makeMockProvider() {
595
+ return {
596
+ complete: () => Promise.resolve({
597
+ output: "mock response",
598
+ usage: { input_tokens: 10, output_tokens: 5 }
599
+ }),
600
+ estimateCost: () => ({ cost_usd: 0 })
601
+ };
602
+ }
603
+
604
+ // src/embeddings/cosine.ts
605
+ function cosine(a, b) {
606
+ if (a.length !== b.length) {
607
+ throw new Error(`cosine: vector length mismatch (${a.length} vs ${b.length})`);
608
+ }
609
+ let dot = 0;
610
+ let normA = 0;
611
+ let normB = 0;
612
+ for (let i = 0; i < a.length; i++) {
613
+ const x = a[i];
614
+ const y = b[i];
615
+ dot += x * y;
616
+ normA += x * x;
617
+ normB += y * y;
618
+ }
619
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
620
+ return denom === 0 ? 0 : dot / denom;
621
+ }
622
+
623
+ // src/embeddings/embedder.ts
624
+ var MODEL_ID = "Xenova/all-MiniLM-L6-v2";
625
+ var XenovaEmbedder = class {
626
+ extractor;
627
+ async embed(texts) {
628
+ const extract = await this.load();
629
+ const output = await extract(texts, { pooling: "mean", normalize: true });
630
+ const rows = output.tolist();
631
+ return rows.map((row) => Float32Array.from(row));
632
+ }
633
+ load() {
634
+ this.extractor ??= import("@huggingface/transformers").then(
635
+ ({ pipeline }) => pipeline("feature-extraction", MODEL_ID)
636
+ );
637
+ return this.extractor;
638
+ }
639
+ };
640
+ var defaultEmbedder;
641
+ function getDefaultEmbedder() {
642
+ return defaultEmbedder ??= new XenovaEmbedder();
643
+ }
644
+
645
+ // src/cache/cache.ts
646
+ var import_better_sqlite3 = __toESM(require("better-sqlite3"), 1);
647
+ var SCHEMA_VERSION = 1;
648
+ var META_DDL = "CREATE TABLE IF NOT EXISTS meta (key TEXT PRIMARY KEY, value TEXT);";
649
+ var COMPLETIONS_DDL = "CREATE TABLE IF NOT EXISTS completions (cache_key TEXT PRIMARY KEY, output TEXT NOT NULL, input_tokens INTEGER NOT NULL, output_tokens INTEGER NOT NULL, created_at INTEGER NOT NULL);";
650
+ function openCache(file, opts = {}) {
651
+ const now = opts.now ?? Date.now;
652
+ try {
653
+ const db = new import_better_sqlite3.default(file);
654
+ try {
655
+ ensureSchema(db);
656
+ } catch (err) {
657
+ db.close();
658
+ throw err;
659
+ }
660
+ return new SqliteCache(db, now);
661
+ } catch {
662
+ return new NoOpCache();
663
+ }
664
+ }
665
+ function ensureSchema(db) {
666
+ db.exec(META_DDL + COMPLETIONS_DDL);
667
+ const stored = db.prepare("SELECT value FROM meta WHERE key = 'schema_version'").get()?.value;
668
+ const current = String(SCHEMA_VERSION);
669
+ if (stored !== current) {
670
+ db.exec("DROP TABLE IF EXISTS completions");
671
+ db.exec(COMPLETIONS_DDL);
672
+ db.prepare("INSERT OR REPLACE INTO meta (key, value) VALUES (?, ?)").run("schema_version", current);
673
+ }
674
+ }
675
+ var SqliteCache = class {
676
+ constructor(db, now) {
677
+ this.db = db;
678
+ this.now = now;
679
+ }
680
+ db;
681
+ now;
682
+ get(key, opts = {}) {
683
+ try {
684
+ const row = this.db.prepare("SELECT output, input_tokens, output_tokens, created_at FROM completions WHERE cache_key = ?").get(key);
685
+ if (row === void 0) return void 0;
686
+ if (opts.maxAgeMs !== void 0 && this.now() - row.created_at > opts.maxAgeMs) {
687
+ return void 0;
688
+ }
689
+ return {
690
+ output: row.output,
691
+ usage: { input_tokens: row.input_tokens, output_tokens: row.output_tokens }
692
+ };
693
+ } catch {
694
+ return void 0;
695
+ }
696
+ }
697
+ set(key, value) {
698
+ try {
699
+ this.db.prepare(
700
+ `INSERT INTO completions (cache_key, output, input_tokens, output_tokens, created_at)
701
+ VALUES (?, ?, ?, ?, ?)
702
+ ON CONFLICT(cache_key) DO UPDATE SET
703
+ output = excluded.output,
704
+ input_tokens = excluded.input_tokens,
705
+ output_tokens = excluded.output_tokens,
706
+ created_at = excluded.created_at`
707
+ ).run(key, value.output, value.usage.input_tokens, value.usage.output_tokens, this.now());
708
+ } catch {
709
+ }
710
+ }
711
+ clear() {
712
+ try {
713
+ this.db.exec("DELETE FROM completions");
714
+ } catch {
715
+ }
716
+ }
717
+ close() {
718
+ this.db.close();
719
+ }
720
+ };
721
+ var NoOpCache = class {
722
+ get() {
723
+ return void 0;
724
+ }
725
+ set() {
726
+ }
727
+ clear() {
728
+ }
729
+ close() {
730
+ }
731
+ };
732
+
733
+ // src/orchestrator/run.ts
734
+ var fs3 = __toESM(require("fs"), 1);
735
+ var path2 = __toESM(require("path"), 1);
736
+
737
+ // src/util/hash.ts
738
+ var import_node_crypto = require("crypto");
739
+
740
+ // src/util/canonical-json.ts
741
+ function normalizeValue(value) {
742
+ if (value === null) return null;
743
+ if (typeof value === "boolean") return value;
744
+ if (typeof value === "number") {
745
+ if (!isFinite(value)) {
746
+ throw new Error(
747
+ `canonicalJson: non-finite number rejected: ${String(value)}`
748
+ );
749
+ }
750
+ return value === 0 ? 0 : value;
751
+ }
752
+ if (typeof value === "string") {
753
+ return value.normalize("NFC");
754
+ }
755
+ if (Array.isArray(value)) {
756
+ return value.map(normalizeValue);
757
+ }
758
+ if (typeof value === "object") {
759
+ const proto = Object.getPrototypeOf(value);
760
+ if (proto !== null && proto !== Object.prototype) {
761
+ const name = value.constructor?.name ?? "unknown";
762
+ throw new Error(
763
+ `canonicalJson: unsupported non-plain object (${name}) \u2014 only plain objects, arrays, and primitives are serializable`
764
+ );
765
+ }
766
+ const obj = value;
767
+ const sorted = {};
768
+ for (const key of Object.keys(obj).sort()) {
769
+ sorted[key] = normalizeValue(obj[key]);
770
+ }
771
+ return sorted;
772
+ }
773
+ throw new Error(
774
+ `canonicalJson: unsupported value type: ${typeof value}`
775
+ );
776
+ }
777
+ function canonicalJson(value) {
778
+ return JSON.stringify(normalizeValue(value));
779
+ }
780
+
781
+ // src/util/hash.ts
782
+ function sha256(input) {
783
+ return (0, import_node_crypto.createHash)("sha256").update(input, "utf8").digest("hex");
784
+ }
785
+ function cacheKey(inputs) {
786
+ const { provider, model, temperature, max_tokens, prompt_resolved } = inputs;
787
+ return sha256(canonicalJson({ provider, model, temperature, max_tokens, prompt_resolved }));
788
+ }
789
+ function configHash(config) {
790
+ return sha256(canonicalJson(config));
791
+ }
792
+
793
+ // src/assertions/interpolate.ts
794
+ var PLACEHOLDER = /\{\{\s*([\w.]+)\s*\}\}/g;
795
+ function interpolate(template, ctx) {
796
+ return template.replace(PLACEHOLDER, (match, key) => {
797
+ if (key === "expected") {
798
+ return ctx.row.expected ?? "";
799
+ }
800
+ if (key.startsWith("vars.")) {
801
+ const name = key.slice("vars.".length);
802
+ const value = ctx.vars[name];
803
+ return value === void 0 ? match : String(value);
804
+ }
805
+ return match;
806
+ });
807
+ }
808
+
809
+ // src/assertions/exact-match.ts
810
+ var exactMatch = {
811
+ run(ctx) {
812
+ const expected = interpolate(ctx.params.value, ctx);
813
+ let actual = ctx.output;
814
+ let target = expected;
815
+ if (ctx.params.trim) {
816
+ actual = actual.trim();
817
+ target = target.trim();
818
+ }
819
+ if (ctx.params.ignore_case) {
820
+ actual = actual.toLowerCase();
821
+ target = target.toLowerCase();
822
+ }
823
+ const result = actual === target ? { passed: true } : { passed: false, reason: `expected exact match: "${expected}"` };
824
+ return Promise.resolve(result);
825
+ }
826
+ };
827
+
828
+ // src/assertions/contains.ts
829
+ function norm(s, ignoreCase) {
830
+ return ignoreCase ? s.toLowerCase() : s;
831
+ }
832
+ var contains = {
833
+ run(ctx) {
834
+ const value = interpolate(ctx.params.value, ctx);
835
+ const found = norm(ctx.output, ctx.params.ignore_case).includes(
836
+ norm(value, ctx.params.ignore_case)
837
+ );
838
+ return Promise.resolve(
839
+ found ? { passed: true } : { passed: false, reason: `expected output to contain: "${value}"` }
840
+ );
841
+ }
842
+ };
843
+ var containsAny = {
844
+ run(ctx) {
845
+ const hay = norm(ctx.output, ctx.params.ignore_case);
846
+ const values = ctx.params.values.map((v) => interpolate(v, ctx));
847
+ const found = values.some((v) => hay.includes(norm(v, ctx.params.ignore_case)));
848
+ return Promise.resolve(
849
+ found ? { passed: true } : { passed: false, reason: `expected output to contain any of: ${JSON.stringify(values)}` }
850
+ );
851
+ }
852
+ };
853
+ var containsAll = {
854
+ run(ctx) {
855
+ const hay = norm(ctx.output, ctx.params.ignore_case);
856
+ const values = ctx.params.values.map((v) => interpolate(v, ctx));
857
+ const missing = values.filter((v) => !hay.includes(norm(v, ctx.params.ignore_case)));
858
+ return Promise.resolve(
859
+ missing.length === 0 ? { passed: true } : { passed: false, reason: `output is missing required substrings: ${JSON.stringify(missing)}` }
860
+ );
861
+ }
862
+ };
863
+
864
+ // src/assertions/json-schema.ts
865
+ var import_ajv = require("ajv");
866
+
867
+ // src/assertions/json-extract.ts
868
+ function extractJson(output, mode) {
869
+ if (mode === "strict") {
870
+ return tryParse(output.trim());
871
+ }
872
+ const fenced = extractFencedBlock(output);
873
+ if (fenced !== null) {
874
+ const r = tryParse(fenced);
875
+ if (r.ok) return r;
876
+ }
877
+ for (const run of balancedRuns(output)) {
878
+ const r = tryParse(run);
879
+ if (r.ok) return r;
880
+ }
881
+ return { ok: false };
882
+ }
883
+ function tryParse(s) {
884
+ try {
885
+ return { ok: true, value: JSON.parse(s) };
886
+ } catch {
887
+ return { ok: false };
888
+ }
889
+ }
890
+ function extractFencedBlock(s) {
891
+ const m = s.match(/```(?:json)?\s*\n?([\s\S]*?)```/i);
892
+ return m?.[1]?.trim() ?? null;
893
+ }
894
+ function* balancedRuns(s) {
895
+ let i = 0;
896
+ while (i < s.length) {
897
+ const rel = s.slice(i).search(/[{[]/);
898
+ if (rel === -1) return;
899
+ const start = i + rel;
900
+ const run = balancedFrom(s, start);
901
+ if (run === null) {
902
+ i = start + 1;
903
+ } else {
904
+ yield run;
905
+ i = start + run.length;
906
+ }
907
+ }
908
+ }
909
+ function balancedFrom(s, start) {
910
+ const open = s[start];
911
+ const close = open === "{" ? "}" : "]";
912
+ let depth = 0;
913
+ let inString = false;
914
+ let escaped = false;
915
+ for (let i = start; i < s.length; i++) {
916
+ const ch = s[i];
917
+ if (inString) {
918
+ if (escaped) escaped = false;
919
+ else if (ch === "\\") escaped = true;
920
+ else if (ch === '"') inString = false;
921
+ continue;
922
+ }
923
+ if (ch === '"') {
924
+ inString = true;
925
+ } else if (ch === open) {
926
+ depth++;
927
+ } else if (ch === close) {
928
+ depth--;
929
+ if (depth === 0) return s.slice(start, i + 1);
930
+ }
931
+ }
932
+ return null;
933
+ }
934
+
935
+ // src/assertions/json-schema.ts
936
+ var jsonSchema = {
937
+ run(ctx) {
938
+ const mode = ctx.params.extract ?? "auto";
939
+ const extracted = extractJson(ctx.output, mode);
940
+ if (!extracted.ok) {
941
+ return Promise.resolve({
942
+ passed: false,
943
+ reason: `output is not valid JSON (extract mode: ${mode})`
944
+ });
945
+ }
946
+ const ajv = new import_ajv.Ajv({ allErrors: true });
947
+ let validate;
948
+ try {
949
+ validate = ajv.compile(ctx.params.schema);
950
+ } catch (err) {
951
+ return Promise.resolve({
952
+ passed: false,
953
+ reason: `invalid json-schema: ${err.message}`
954
+ });
955
+ }
956
+ if (validate(extracted.value)) {
957
+ return Promise.resolve({ passed: true });
958
+ }
959
+ return Promise.resolve({
960
+ passed: false,
961
+ reason: `json-schema validation failed: ${ajv.errorsText(validate.errors, { separator: "; " })}`
962
+ });
963
+ }
964
+ };
965
+
966
+ // src/assertions/semantic-similarity.ts
967
+ var DEFAULT_THRESHOLD = 0.8;
968
+ var semanticSimilarity = {
969
+ async run(ctx) {
970
+ if (ctx.embedder === void 0) {
971
+ throw new Error("semantic-similarity requires an embedder in the AssertionContext");
972
+ }
973
+ const reference = interpolate(ctx.params.reference, ctx);
974
+ const threshold = ctx.params.threshold ?? DEFAULT_THRESHOLD;
975
+ const vecs = await ctx.embedder.embed([ctx.output, reference]);
976
+ const [outVec, refVec] = vecs;
977
+ if (outVec === void 0 || refVec === void 0) {
978
+ throw new Error(`semantic-similarity: embedder returned ${vecs.length} vector(s), expected 2`);
979
+ }
980
+ const score = cosine(outVec, refVec);
981
+ return score >= threshold ? { passed: true, score } : {
982
+ passed: false,
983
+ score,
984
+ reason: `similarity ${score.toFixed(3)} < threshold ${threshold}`
985
+ };
986
+ }
987
+ };
988
+
989
+ // src/assertions/judge-helpers.ts
990
+ async function askJson(judge, prompt5) {
991
+ const text = await judge.ask(prompt5);
992
+ const result = extractJson(text, "auto");
993
+ if (!result.ok) {
994
+ throw new Error(`judge returned no parseable JSON (got: ${text.slice(0, 120)})`);
995
+ }
996
+ return result.value;
997
+ }
998
+ function asObject(judgment) {
999
+ if (typeof judgment !== "object" || judgment === null) {
1000
+ throw new Error("judge response is not a JSON object");
1001
+ }
1002
+ return judgment;
1003
+ }
1004
+ function parseScored(judgment) {
1005
+ const obj = asObject(judgment);
1006
+ const raw = obj["score"];
1007
+ if (typeof raw !== "number" || !Number.isFinite(raw)) {
1008
+ throw new Error('judge response missing a finite numeric "score"');
1009
+ }
1010
+ const score = Math.max(0, Math.min(1, raw));
1011
+ const reason = obj["reason"];
1012
+ return typeof reason === "string" ? { score, reason } : { score };
1013
+ }
1014
+ function parseStatements(judgment) {
1015
+ const arr = asObject(judgment)["statements"];
1016
+ if (!Array.isArray(arr)) {
1017
+ throw new Error('judge response missing "statements" array');
1018
+ }
1019
+ return arr.map(
1020
+ (s) => typeof s === "object" && s !== null && s["faithful"] === true
1021
+ );
1022
+ }
1023
+ function parseQuestions(judgment) {
1024
+ const arr = asObject(judgment)["questions"];
1025
+ if (!Array.isArray(arr)) {
1026
+ throw new Error('judge response missing "questions" array');
1027
+ }
1028
+ return arr.filter((q) => typeof q === "string");
1029
+ }
1030
+
1031
+ // src/assertions/llm-judge.ts
1032
+ var DEFAULT_PASS_THRESHOLD = 0.5;
1033
+ function prompt(criteria, output) {
1034
+ return `You are a strict evaluator. Score from 0 to 1 how well the OUTPUT meets the CRITERIA. Respond ONLY with JSON: {"score": <0..1>, "reason": "<short explanation>"}.
1035
+
1036
+ CRITERIA: ${criteria}
1037
+
1038
+ OUTPUT:
1039
+ ${output}`;
1040
+ }
1041
+ var llmJudge = {
1042
+ async run(ctx) {
1043
+ if (ctx.judge === void 0) {
1044
+ throw new Error("llm-judge requires a judge in the AssertionContext");
1045
+ }
1046
+ const criteria = interpolate(ctx.params.criteria, ctx);
1047
+ const threshold = ctx.params.pass_threshold ?? DEFAULT_PASS_THRESHOLD;
1048
+ const { score, reason } = parseScored(await askJson(ctx.judge, prompt(criteria, ctx.output)));
1049
+ const passed = score >= threshold;
1050
+ return {
1051
+ passed,
1052
+ score,
1053
+ reason: reason ?? `score ${score.toFixed(3)} vs threshold ${threshold}`
1054
+ };
1055
+ }
1056
+ };
1057
+
1058
+ // src/assertions/faithfulness.ts
1059
+ var DEFAULT_THRESHOLD2 = 0.7;
1060
+ function prompt2(context, output) {
1061
+ return `Extract the atomic factual claims from the OUTPUT and judge whether each is supported by the CONTEXT. Respond ONLY with JSON: {"statements":[{"claim":"<text>","faithful":<true|false>}]}.
1062
+
1063
+ CONTEXT:
1064
+ ${context}
1065
+
1066
+ OUTPUT:
1067
+ ${output}`;
1068
+ }
1069
+ var faithfulness = {
1070
+ async run(ctx) {
1071
+ if (ctx.judge === void 0) {
1072
+ throw new Error("faithfulness requires a judge in the AssertionContext");
1073
+ }
1074
+ const context = interpolate(ctx.params.context, ctx);
1075
+ const threshold = ctx.params.threshold ?? DEFAULT_THRESHOLD2;
1076
+ const flags = parseStatements(await askJson(ctx.judge, prompt2(context, ctx.output)));
1077
+ const score = flags.length === 0 ? 1 : flags.filter((f) => f).length / flags.length;
1078
+ const passed = score >= threshold;
1079
+ return passed ? { passed: true, score } : { passed: false, score, reason: `faithfulness ${score.toFixed(3)} < threshold ${threshold}` };
1080
+ }
1081
+ };
1082
+
1083
+ // src/assertions/answer-relevance.ts
1084
+ var DEFAULT_THRESHOLD3 = 0.7;
1085
+ function prompt3(output) {
1086
+ return `Generate the questions that the OUTPUT directly and fully answers. Respond ONLY with JSON: {"questions": ["<question>", ...]}.
1087
+
1088
+ OUTPUT:
1089
+ ${output}`;
1090
+ }
1091
+ var answerRelevance = {
1092
+ async run(ctx) {
1093
+ if (ctx.judge === void 0) {
1094
+ throw new Error("answer-relevance requires a judge in the AssertionContext");
1095
+ }
1096
+ if (ctx.embedder === void 0) {
1097
+ throw new Error("answer-relevance requires an embedder in the AssertionContext");
1098
+ }
1099
+ const question = interpolate(ctx.params.question, ctx);
1100
+ const threshold = ctx.params.threshold ?? DEFAULT_THRESHOLD3;
1101
+ const questions = parseQuestions(await askJson(ctx.judge, prompt3(ctx.output)));
1102
+ if (questions.length === 0) {
1103
+ return { passed: false, score: 0, reason: "judge generated no questions from the output" };
1104
+ }
1105
+ const vecs = await ctx.embedder.embed([question, ...questions]);
1106
+ if (vecs.length < questions.length + 1) {
1107
+ throw new Error(
1108
+ `answer-relevance: embedder returned ${vecs.length} vectors, expected ${questions.length + 1}`
1109
+ );
1110
+ }
1111
+ let sum = 0;
1112
+ for (let i = 0; i < questions.length; i++) {
1113
+ sum += cosine(vecs[0], vecs[i + 1]);
1114
+ }
1115
+ const score = Math.max(0, sum / questions.length);
1116
+ const passed = score >= threshold;
1117
+ return passed ? { passed: true, score } : { passed: false, score, reason: `answer-relevance ${score.toFixed(3)} < threshold ${threshold}` };
1118
+ }
1119
+ };
1120
+
1121
+ // src/assertions/context-precision.ts
1122
+ var DEFAULT_THRESHOLD4 = 0.7;
1123
+ function prompt4(context, question) {
1124
+ return `Judge from 0 to 1 how well the CONTEXT provides the information needed to answer the QUESTION. Respond ONLY with JSON: {"score": <0..1>, "reason": "<short explanation>"}.
1125
+
1126
+ QUESTION: ${question}
1127
+
1128
+ CONTEXT:
1129
+ ${context}`;
1130
+ }
1131
+ var contextPrecision = {
1132
+ async run(ctx) {
1133
+ if (ctx.judge === void 0) {
1134
+ throw new Error("context-precision requires a judge in the AssertionContext");
1135
+ }
1136
+ const context = interpolate(ctx.params.context, ctx);
1137
+ const question = interpolate(ctx.params.question, ctx);
1138
+ const threshold = ctx.params.threshold ?? DEFAULT_THRESHOLD4;
1139
+ const { score, reason } = parseScored(await askJson(ctx.judge, prompt4(context, question)));
1140
+ const passed = score >= threshold;
1141
+ return {
1142
+ passed,
1143
+ score,
1144
+ reason: reason ?? `context-precision ${score.toFixed(3)} vs threshold ${threshold}`
1145
+ };
1146
+ }
1147
+ };
1148
+
1149
+ // src/assertions/registry.ts
1150
+ function createAssertionRegistry() {
1151
+ const impls = /* @__PURE__ */ new Map();
1152
+ return {
1153
+ register(type, impl) {
1154
+ if (impls.has(type)) {
1155
+ throw new Error(`Assertion "${type}" is already registered \u2014 assertion types must be unique.`);
1156
+ }
1157
+ impls.set(type, impl);
1158
+ },
1159
+ resolve(type) {
1160
+ const impl = impls.get(type);
1161
+ if (impl === void 0) {
1162
+ const known = [...impls.keys()].join(", ") || "(none)";
1163
+ throw new Error(`Unknown assertion type "${type}". Registered: ${known}.`);
1164
+ }
1165
+ return impl;
1166
+ },
1167
+ has(type) {
1168
+ return impls.has(type);
1169
+ }
1170
+ };
1171
+ }
1172
+ function createDefaultRegistry2() {
1173
+ const registry = createAssertionRegistry();
1174
+ registry.register("exact-match", exactMatch);
1175
+ registry.register("contains", contains);
1176
+ registry.register("contains-any", containsAny);
1177
+ registry.register("contains-all", containsAll);
1178
+ registry.register("json-schema", jsonSchema);
1179
+ registry.register("semantic-similarity", semanticSimilarity);
1180
+ registry.register("llm-judge", llmJudge);
1181
+ registry.register("faithfulness", faithfulness);
1182
+ registry.register("answer-relevance", answerRelevance);
1183
+ registry.register("context-precision", contextPrecision);
1184
+ return registry;
1185
+ }
1186
+ var defaultRegistry;
1187
+ function getAssertion(type) {
1188
+ defaultRegistry ??= createDefaultRegistry2();
1189
+ return defaultRegistry.resolve(type);
1190
+ }
1191
+
1192
+ // src/orchestrator/judge.ts
1193
+ var DEFAULT_MAX_TOKENS = 1024;
1194
+ function buildJudge(provider, cfg, onUsage) {
1195
+ return {
1196
+ async ask(prompt5) {
1197
+ const res = await provider.complete({
1198
+ provider: cfg.type,
1199
+ model: cfg.model,
1200
+ temperature: 0,
1201
+ max_tokens: cfg.max_tokens ?? DEFAULT_MAX_TOKENS,
1202
+ prompt_resolved: prompt5
1203
+ });
1204
+ onUsage(res.usage);
1205
+ return res.output;
1206
+ }
1207
+ };
1208
+ }
1209
+
1210
+ // src/orchestrator/concurrency.ts
1211
+ async function mapLimit(items, limit, fn) {
1212
+ const results = new Array(items.length);
1213
+ let next = 0;
1214
+ const worker = async () => {
1215
+ while (next < items.length) {
1216
+ const i = next;
1217
+ next += 1;
1218
+ results[i] = await fn(items[i], i);
1219
+ }
1220
+ };
1221
+ const safeLimit = Number.isFinite(limit) && limit >= 1 ? Math.floor(limit) : 1;
1222
+ const workerCount = Math.min(safeLimit, items.length);
1223
+ await Promise.all(Array.from({ length: workerCount }, () => worker()));
1224
+ return results;
1225
+ }
1226
+
1227
+ // src/orchestrator/run.ts
1228
+ var DEFAULT_MAX_TOKENS2 = 1024;
1229
+ var PLUNE_VERSION = "0.1.0";
1230
+ var RunConfigError = class extends Error {
1231
+ code = "CONFIG_ERROR";
1232
+ constructor(message) {
1233
+ super(message);
1234
+ this.name = "RunConfigError";
1235
+ }
1236
+ };
1237
+ function resolvePrompt(template, vars) {
1238
+ return template.replace(/\{\{\s*(\w+)\s*\}\}/g, (_match, key) => {
1239
+ if (!(key in vars)) {
1240
+ throw new RunConfigError(`Unknown variable "${key}" referenced in prompt`);
1241
+ }
1242
+ return String(vars[key]);
1243
+ });
1244
+ }
1245
+ function toRunError(err) {
1246
+ if (err instanceof ProviderError || err instanceof AuthError) {
1247
+ return { code: err.code, message: err.message };
1248
+ }
1249
+ return { code: "ERROR", message: err instanceof Error ? err.message : String(err) };
1250
+ }
1251
+ function estimateUsage(prompt5, maxTokens) {
1252
+ return { input_tokens: Math.ceil(prompt5.length / 4), output_tokens: maxTokens };
1253
+ }
1254
+ async function runRow(p) {
1255
+ const t0 = p.now();
1256
+ const prompt5 = resolvePrompt(p.template, p.row.vars);
1257
+ const temperature = p.providerConfig.temperature ?? 0;
1258
+ const maxTokens = p.providerConfig.max_tokens ?? DEFAULT_MAX_TOKENS2;
1259
+ if (p.dryRun) {
1260
+ const est = estimateUsage(prompt5, maxTokens);
1261
+ const usage2 = { ...est, cost_usd: p.provider.estimateCost(est).cost_usd };
1262
+ return { vars: p.row.vars, output: null, cached: false, usage: usage2, assertions: [] };
1263
+ }
1264
+ const key = cacheKey({
1265
+ provider: p.providerConfig.type,
1266
+ model: p.providerConfig.model,
1267
+ temperature,
1268
+ max_tokens: maxTokens,
1269
+ prompt_resolved: prompt5
1270
+ });
1271
+ let output;
1272
+ let usage;
1273
+ let cached = false;
1274
+ const hit = p.noCache ? void 0 : p.cache.get(key);
1275
+ if (hit !== void 0) {
1276
+ output = hit.output;
1277
+ cached = true;
1278
+ usage = {
1279
+ input_tokens: hit.usage.input_tokens,
1280
+ output_tokens: hit.usage.output_tokens,
1281
+ cost_usd: 0
1282
+ };
1283
+ } else {
1284
+ try {
1285
+ const res = await p.provider.complete({
1286
+ provider: p.providerConfig.type,
1287
+ model: p.providerConfig.model,
1288
+ temperature,
1289
+ max_tokens: maxTokens,
1290
+ prompt_resolved: prompt5
1291
+ });
1292
+ output = res.output;
1293
+ usage = {
1294
+ input_tokens: res.usage.input_tokens,
1295
+ output_tokens: res.usage.output_tokens,
1296
+ // Prefer the provider-reported actual cost (res.cost_usd) when present; resolveCost still
1297
+ // lets a config pricing override win, else falls back to the table estimate (ADR-PRC01).
1298
+ cost_usd: p.provider.estimateCost(res.usage, res.cost_usd).cost_usd
1299
+ };
1300
+ if (!p.noCache) p.cache.set(key, res);
1301
+ } catch (err) {
1302
+ return {
1303
+ vars: p.row.vars,
1304
+ output: null,
1305
+ cached: false,
1306
+ latency_ms: p.now() - t0,
1307
+ error: toRunError(err),
1308
+ assertions: []
1309
+ };
1310
+ }
1311
+ }
1312
+ let judgeCost = 0;
1313
+ const judge = buildJudge(p.provider, p.providerConfig, (u) => {
1314
+ judgeCost += p.provider.estimateCost(u).cost_usd;
1315
+ });
1316
+ const records = [];
1317
+ let assertionError;
1318
+ for (const assertion of p.assertions) {
1319
+ try {
1320
+ const result = await getAssertion(assertion.type).run({
1321
+ output,
1322
+ vars: p.row.vars,
1323
+ row: p.row,
1324
+ params: assertion,
1325
+ embedder: p.embedder,
1326
+ judge
1327
+ });
1328
+ records.push({
1329
+ type: assertion.type,
1330
+ passed: result.passed,
1331
+ ...result.score !== void 0 ? { score: result.score } : {},
1332
+ ...result.reason !== void 0 ? { reason: result.reason } : {}
1333
+ });
1334
+ } catch (err) {
1335
+ assertionError = toRunError(err);
1336
+ break;
1337
+ }
1338
+ }
1339
+ const finalUsage = { ...usage, cost_usd: usage.cost_usd + judgeCost };
1340
+ return {
1341
+ vars: p.row.vars,
1342
+ output,
1343
+ cached,
1344
+ usage: finalUsage,
1345
+ latency_ms: p.now() - t0,
1346
+ ...assertionError !== void 0 ? { error: assertionError } : {},
1347
+ assertions: records
1348
+ };
1349
+ }
1350
+ function selectEvals(evals, only) {
1351
+ if (only === void 0 || only.length === 0) return evals;
1352
+ return evals.filter(
1353
+ (ev) => only.some(
1354
+ (sel) => sel.startsWith("tag:") ? (ev.tags ?? []).includes(sel.slice(4)) : ev.id === sel
1355
+ )
1356
+ );
1357
+ }
1358
+ function resolveTemplate(ev, baseDir) {
1359
+ if (ev.prompt !== void 0) return ev.prompt;
1360
+ if (ev.prompt_file !== void 0) {
1361
+ try {
1362
+ return fs3.readFileSync(path2.resolve(baseDir, ev.prompt_file), "utf8");
1363
+ } catch {
1364
+ throw new RunConfigError(`Eval "${ev.id}": prompt_file not found: ${ev.prompt_file}`);
1365
+ }
1366
+ }
1367
+ throw new RunConfigError(`Eval "${ev.id}" has neither prompt nor prompt_file`);
1368
+ }
1369
+ function classify(row) {
1370
+ if (row.error !== void 0) return "errored";
1371
+ if (row.assertions.some((a) => !a.passed)) return "failed";
1372
+ return "passed";
1373
+ }
1374
+ async function runOrchestration(config, options, deps) {
1375
+ const started = deps.now();
1376
+ const dryRun = options.dryRun ?? false;
1377
+ const noCache = options.noCache ?? false;
1378
+ const evalResults = [];
1379
+ const prepared = selectEvals(config.evals, options.only).map((ev) => {
1380
+ const template = resolveTemplate(ev, deps.baseDir);
1381
+ const rows = deps.loadDataset(ev.dataset, deps.baseDir);
1382
+ for (const row of rows) {
1383
+ resolvePrompt(template, row.vars);
1384
+ }
1385
+ return { ev, template, rows };
1386
+ });
1387
+ for (const { ev, template, rows } of prepared) {
1388
+ const providerConfig = { ...config.provider, ...ev.provider };
1389
+ const provider = deps.resolveProvider(providerConfig);
1390
+ const limit = options.concurrency ?? providerConfig.concurrency ?? 4;
1391
+ const rowResults = await mapLimit(
1392
+ rows,
1393
+ limit,
1394
+ (row) => runRow({
1395
+ template,
1396
+ assertions: ev.assertions,
1397
+ row,
1398
+ providerConfig,
1399
+ provider,
1400
+ embedder: deps.embedder,
1401
+ cache: deps.cache,
1402
+ now: deps.now,
1403
+ dryRun,
1404
+ noCache
1405
+ })
1406
+ );
1407
+ const passed = rowResults.every((r) => classify(r) === "passed");
1408
+ evalResults.push({ id: ev.id, tags: ev.tags ?? [], rows: rowResults, passed });
1409
+ if (options.bail === true && !passed) break;
1410
+ }
1411
+ const finished = deps.now();
1412
+ const summary = {
1413
+ total: 0,
1414
+ passed: 0,
1415
+ failed: 0,
1416
+ errored: 0,
1417
+ cost_usd: 0,
1418
+ duration_ms: finished - started
1419
+ };
1420
+ for (const ev of evalResults) {
1421
+ for (const row of ev.rows) {
1422
+ summary.total += 1;
1423
+ summary[classify(row)] += 1;
1424
+ summary.cost_usd += row.usage?.cost_usd ?? 0;
1425
+ }
1426
+ }
1427
+ return {
1428
+ schema: 1,
1429
+ plune_version: PLUNE_VERSION,
1430
+ started_at: new Date(started).toISOString(),
1431
+ finished_at: new Date(finished).toISOString(),
1432
+ config_hash: configHash(config),
1433
+ summary,
1434
+ evals: evalResults
1435
+ };
1436
+ }
1437
+
1438
+ // src/orchestrator/dataset.ts
1439
+ var fs4 = __toESM(require("fs"), 1);
1440
+ var path3 = __toESM(require("path"), 1);
1441
+ function loadDataset(ref, baseDir) {
1442
+ if (typeof ref !== "string") {
1443
+ return ref.examples;
1444
+ }
1445
+ const content = fs4.readFileSync(path3.resolve(baseDir, ref), "utf8");
1446
+ return content.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0).map((line) => JSON.parse(line));
1447
+ }
1448
+
1449
+ // src/cli/commands/run.ts
1450
+ var NOOP_CACHE = { get: () => void 0, set: () => {
1451
+ }, clear: () => {
1452
+ }, close: () => {
1453
+ } };
1454
+ function isMockMode(env) {
1455
+ return env["PLUNE_MOCK_PROVIDER"] === "1";
1456
+ }
1457
+ function buildRealDeps(config, baseDir, dryRun) {
1458
+ const dir = path4.join(baseDir, ".plune");
1459
+ fs5.mkdirSync(dir, { recursive: true });
1460
+ return {
1461
+ resolveProvider: isMockMode(process.env) ? () => makeMockProvider() : (cfg) => getProvider(cfg, process.env, config.pricing),
1462
+ embedder: getDefaultEmbedder(),
1463
+ cache: dryRun ? NOOP_CACHE : openCache(path4.join(dir, "cache.db")),
1464
+ now: Date.now,
1465
+ loadDataset,
1466
+ baseDir
1467
+ };
1468
+ }
1469
+ function persist(result, baseDir) {
1470
+ const dir = path4.join(baseDir, ".plune");
1471
+ fs5.mkdirSync(dir, { recursive: true });
1472
+ fs5.writeFileSync(path4.join(dir, "last-run.json"), JSON.stringify(result, null, 2));
1473
+ }
1474
+ async function handleRun(options, depsFactory) {
1475
+ const config = await loadConfig(
1476
+ options.configPath !== void 0 ? { configPath: options.configPath } : {}
1477
+ );
1478
+ const baseDir = options.configPath !== void 0 ? path4.dirname(path4.resolve(options.configPath)) : process.cwd();
1479
+ const factory = depsFactory ?? ((c, d) => buildRealDeps(c, d, options.dryRun));
1480
+ const deps = factory(config, baseDir);
1481
+ try {
1482
+ const result = await runOrchestration(
1483
+ config,
1484
+ {
1485
+ dryRun: options.dryRun,
1486
+ ...options.only !== void 0 ? { only: options.only } : {},
1487
+ ...options.concurrency !== void 0 ? { concurrency: options.concurrency } : {},
1488
+ ...options.noCache !== void 0 ? { noCache: options.noCache } : {},
1489
+ ...options.bail !== void 0 ? { bail: options.bail } : {}
1490
+ },
1491
+ deps
1492
+ );
1493
+ persist(result, baseDir);
1494
+ return result;
1495
+ } finally {
1496
+ deps.cache.close();
1497
+ }
1498
+ }
1499
+ // Annotate the CommonJS export names for ESM import in node:
1500
+ 0 && (module.exports = {
1501
+ run
1502
+ });