@plune-ai/cli 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +139 -0
- package/dist/cli.cjs +2721 -0
- package/dist/index.cjs +1502 -0
- package/dist/index.d.cts +215 -0
- package/dist/index.d.ts +215 -0
- package/dist/index.js +1465 -0
- package/package.json +100 -0
package/dist/cli.cjs
ADDED
|
@@ -0,0 +1,2721 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
"use strict";
|
|
3
|
+
var __create = Object.create;
|
|
4
|
+
var __defProp = Object.defineProperty;
|
|
5
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
6
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
7
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
8
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
9
|
+
var __esm = (fn, res) => function __init() {
|
|
10
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
11
|
+
};
|
|
12
|
+
var __export = (target, all) => {
|
|
13
|
+
for (var name in all)
|
|
14
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
15
|
+
};
|
|
16
|
+
var __copyProps = (to, from, except, desc) => {
|
|
17
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
18
|
+
for (let key of __getOwnPropNames(from))
|
|
19
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
20
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
21
|
+
}
|
|
22
|
+
return to;
|
|
23
|
+
};
|
|
24
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
25
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
26
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
27
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
28
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
29
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
30
|
+
mod
|
|
31
|
+
));
|
|
32
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
33
|
+
|
|
34
|
+
// node_modules/.pnpm/tsup@8.5.1_postcss@8.5.15_t_55a87a10d51c31d069b35a93bf4e9619/node_modules/tsup/assets/cjs_shims.js
|
|
35
|
+
var getImportMetaUrl, importMetaUrl;
|
|
36
|
+
var init_cjs_shims = __esm({
|
|
37
|
+
"node_modules/.pnpm/tsup@8.5.1_postcss@8.5.15_t_55a87a10d51c31d069b35a93bf4e9619/node_modules/tsup/assets/cjs_shims.js"() {
|
|
38
|
+
"use strict";
|
|
39
|
+
getImportMetaUrl = () => typeof document === "undefined" ? new URL(`file:${__filename}`).href : document.currentScript && document.currentScript.tagName.toUpperCase() === "SCRIPT" ? document.currentScript.src : new URL("main.js", document.baseURI).href;
|
|
40
|
+
importMetaUrl = /* @__PURE__ */ getImportMetaUrl();
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
// src/cli/env.ts
|
|
45
|
+
var env_exports = {};
|
|
46
|
+
__export(env_exports, {
|
|
47
|
+
loadEnv: () => loadEnv
|
|
48
|
+
});
|
|
49
|
+
function loadEnv(configDir) {
|
|
50
|
+
(0, import_dotenv.config)({ path: path.join(configDir, ".env"), override: false });
|
|
51
|
+
}
|
|
52
|
+
var import_dotenv, path;
|
|
53
|
+
var init_env = __esm({
|
|
54
|
+
"src/cli/env.ts"() {
|
|
55
|
+
"use strict";
|
|
56
|
+
init_cjs_shims();
|
|
57
|
+
import_dotenv = require("dotenv");
|
|
58
|
+
path = __toESM(require("path"), 1);
|
|
59
|
+
}
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
// src/config/env-overrides.ts
|
|
63
|
+
function applyEnvOverrides(config, env) {
|
|
64
|
+
const result = structuredClone(config);
|
|
65
|
+
if (env["PLUNE_PROVIDER"] !== void 0) {
|
|
66
|
+
result.provider.type = env["PLUNE_PROVIDER"];
|
|
67
|
+
}
|
|
68
|
+
if (env["PLUNE_MODEL"] !== void 0) {
|
|
69
|
+
result.provider.model = env["PLUNE_MODEL"];
|
|
70
|
+
}
|
|
71
|
+
if (env["PLUNE_TIMEOUT"] !== void 0) {
|
|
72
|
+
const n = Number(env["PLUNE_TIMEOUT"]);
|
|
73
|
+
if (!isNaN(n)) result.provider.timeout = n;
|
|
74
|
+
}
|
|
75
|
+
if (env["PLUNE_MAX_RETRIES"] !== void 0) {
|
|
76
|
+
const n = Number(env["PLUNE_MAX_RETRIES"]);
|
|
77
|
+
if (!isNaN(n)) result.provider.max_retries = n;
|
|
78
|
+
}
|
|
79
|
+
return result;
|
|
80
|
+
}
|
|
81
|
+
var init_env_overrides = __esm({
|
|
82
|
+
"src/config/env-overrides.ts"() {
|
|
83
|
+
"use strict";
|
|
84
|
+
init_cjs_shims();
|
|
85
|
+
}
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
// src/config/errors.ts
|
|
89
|
+
var errors_exports = {};
|
|
90
|
+
__export(errors_exports, {
|
|
91
|
+
ConfigNotFoundError: () => ConfigNotFoundError,
|
|
92
|
+
ConfigValidationError: () => ConfigValidationError,
|
|
93
|
+
NonTtyError: () => NonTtyError,
|
|
94
|
+
YamlParseError: () => YamlParseError
|
|
95
|
+
});
|
|
96
|
+
var ConfigNotFoundError, YamlParseError, ConfigValidationError, NonTtyError;
|
|
97
|
+
var init_errors = __esm({
|
|
98
|
+
"src/config/errors.ts"() {
|
|
99
|
+
"use strict";
|
|
100
|
+
init_cjs_shims();
|
|
101
|
+
ConfigNotFoundError = class extends Error {
|
|
102
|
+
code = "CFG_NOT_FOUND";
|
|
103
|
+
constructor(message) {
|
|
104
|
+
super(message);
|
|
105
|
+
this.name = "ConfigNotFoundError";
|
|
106
|
+
}
|
|
107
|
+
};
|
|
108
|
+
YamlParseError = class extends Error {
|
|
109
|
+
code = "YAML_PARSE_ERROR";
|
|
110
|
+
constructor(message) {
|
|
111
|
+
super(message);
|
|
112
|
+
this.name = "YamlParseError";
|
|
113
|
+
}
|
|
114
|
+
};
|
|
115
|
+
ConfigValidationError = class extends Error {
|
|
116
|
+
code = "CONFIG_VALIDATION_ERROR";
|
|
117
|
+
issues;
|
|
118
|
+
constructor(message, issues) {
|
|
119
|
+
super(message);
|
|
120
|
+
this.name = "ConfigValidationError";
|
|
121
|
+
this.issues = issues;
|
|
122
|
+
}
|
|
123
|
+
};
|
|
124
|
+
NonTtyError = class extends Error {
|
|
125
|
+
constructor() {
|
|
126
|
+
super("runInitWizard requires an interactive terminal (TTY)");
|
|
127
|
+
this.name = "NonTtyError";
|
|
128
|
+
}
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
// src/config/discover.ts
|
|
134
|
+
function discoverConfigPath(cwd, override) {
|
|
135
|
+
if (override !== void 0) {
|
|
136
|
+
if (fs.existsSync(override)) {
|
|
137
|
+
return override;
|
|
138
|
+
}
|
|
139
|
+
throw new ConfigNotFoundError(`Config file not found: ${override}`);
|
|
140
|
+
}
|
|
141
|
+
let dir = cwd;
|
|
142
|
+
let steps = 0;
|
|
143
|
+
while (steps < MAX_STEPS) {
|
|
144
|
+
const candidate = path2.join(dir, FILENAME);
|
|
145
|
+
if (fs.existsSync(candidate)) {
|
|
146
|
+
return candidate;
|
|
147
|
+
}
|
|
148
|
+
const parent = path2.dirname(dir);
|
|
149
|
+
if (parent === dir) {
|
|
150
|
+
break;
|
|
151
|
+
}
|
|
152
|
+
dir = parent;
|
|
153
|
+
steps++;
|
|
154
|
+
}
|
|
155
|
+
throw new ConfigNotFoundError(
|
|
156
|
+
`No ${FILENAME} found. Run "plune init" to create one.`
|
|
157
|
+
);
|
|
158
|
+
}
|
|
159
|
+
var fs, path2, FILENAME, MAX_STEPS;
|
|
160
|
+
var init_discover = __esm({
|
|
161
|
+
"src/config/discover.ts"() {
|
|
162
|
+
"use strict";
|
|
163
|
+
init_cjs_shims();
|
|
164
|
+
fs = __toESM(require("fs"), 1);
|
|
165
|
+
path2 = __toESM(require("path"), 1);
|
|
166
|
+
init_errors();
|
|
167
|
+
FILENAME = "plune.yaml";
|
|
168
|
+
MAX_STEPS = 50;
|
|
169
|
+
}
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
// src/config/schema.ts
|
|
173
|
+
var import_zod, providerConfigSchema, modelPriceSchema, pricingSchema, datasetRowSchema, datasetRefSchema, exactMatchAssertionSchema, containsAssertionSchema, containsAnyAssertionSchema, containsAllAssertionSchema, jsonSchemaAssertionSchema, llmJudgeAssertionSchema, semanticSimilarityAssertionSchema, faithfulnessAssertionSchema, answerRelevanceAssertionSchema, contextPrecisionAssertionSchema, assertionConfigSchema, evalConfigSchema, pluneConfigSchema;
|
|
174
|
+
var init_schema = __esm({
|
|
175
|
+
"src/config/schema.ts"() {
|
|
176
|
+
"use strict";
|
|
177
|
+
init_cjs_shims();
|
|
178
|
+
import_zod = require("zod");
|
|
179
|
+
providerConfigSchema = import_zod.z.object({
|
|
180
|
+
type: import_zod.z.enum(["anthropic", "openai", "openrouter"]),
|
|
181
|
+
model: import_zod.z.string().min(1),
|
|
182
|
+
temperature: import_zod.z.number().min(0).max(2).optional(),
|
|
183
|
+
max_tokens: import_zod.z.number().int().positive().optional(),
|
|
184
|
+
concurrency: import_zod.z.number().int().positive().optional(),
|
|
185
|
+
timeout: import_zod.z.number().int().positive().optional(),
|
|
186
|
+
max_retries: import_zod.z.number().int().min(0).optional()
|
|
187
|
+
});
|
|
188
|
+
modelPriceSchema = import_zod.z.object({
|
|
189
|
+
input_per_1k_usd: import_zod.z.number().nonnegative(),
|
|
190
|
+
output_per_1k_usd: import_zod.z.number().nonnegative()
|
|
191
|
+
});
|
|
192
|
+
pricingSchema = import_zod.z.record(modelPriceSchema);
|
|
193
|
+
datasetRowSchema = import_zod.z.object({
|
|
194
|
+
vars: import_zod.z.record(import_zod.z.union([import_zod.z.string(), import_zod.z.number(), import_zod.z.boolean()])),
|
|
195
|
+
expected: import_zod.z.string().optional()
|
|
196
|
+
});
|
|
197
|
+
datasetRefSchema = import_zod.z.union([
|
|
198
|
+
import_zod.z.string().min(1),
|
|
199
|
+
import_zod.z.object({ examples: import_zod.z.array(datasetRowSchema).min(1) })
|
|
200
|
+
]);
|
|
201
|
+
exactMatchAssertionSchema = import_zod.z.object({
|
|
202
|
+
type: import_zod.z.literal("exact-match"),
|
|
203
|
+
value: import_zod.z.string(),
|
|
204
|
+
trim: import_zod.z.boolean().optional(),
|
|
205
|
+
ignore_case: import_zod.z.boolean().optional()
|
|
206
|
+
});
|
|
207
|
+
containsAssertionSchema = import_zod.z.object({
|
|
208
|
+
type: import_zod.z.literal("contains"),
|
|
209
|
+
value: import_zod.z.string(),
|
|
210
|
+
ignore_case: import_zod.z.boolean().optional()
|
|
211
|
+
});
|
|
212
|
+
containsAnyAssertionSchema = import_zod.z.object({
|
|
213
|
+
type: import_zod.z.literal("contains-any"),
|
|
214
|
+
values: import_zod.z.array(import_zod.z.string()).min(1),
|
|
215
|
+
ignore_case: import_zod.z.boolean().optional()
|
|
216
|
+
});
|
|
217
|
+
containsAllAssertionSchema = import_zod.z.object({
|
|
218
|
+
type: import_zod.z.literal("contains-all"),
|
|
219
|
+
values: import_zod.z.array(import_zod.z.string()).min(1),
|
|
220
|
+
ignore_case: import_zod.z.boolean().optional()
|
|
221
|
+
});
|
|
222
|
+
jsonSchemaAssertionSchema = import_zod.z.object({
|
|
223
|
+
type: import_zod.z.literal("json-schema"),
|
|
224
|
+
schema: import_zod.z.record(import_zod.z.unknown()),
|
|
225
|
+
extract: import_zod.z.enum(["auto", "strict"]).optional()
|
|
226
|
+
});
|
|
227
|
+
llmJudgeAssertionSchema = import_zod.z.object({
|
|
228
|
+
type: import_zod.z.literal("llm-judge"),
|
|
229
|
+
criteria: import_zod.z.string().min(1),
|
|
230
|
+
provider: providerConfigSchema.partial().optional(),
|
|
231
|
+
pass_threshold: import_zod.z.number().min(0).max(1).optional()
|
|
232
|
+
});
|
|
233
|
+
semanticSimilarityAssertionSchema = import_zod.z.object({
|
|
234
|
+
type: import_zod.z.literal("semantic-similarity"),
|
|
235
|
+
reference: import_zod.z.string().min(1),
|
|
236
|
+
threshold: import_zod.z.number().min(0).max(1).optional()
|
|
237
|
+
});
|
|
238
|
+
faithfulnessAssertionSchema = import_zod.z.object({
|
|
239
|
+
type: import_zod.z.literal("faithfulness"),
|
|
240
|
+
context: import_zod.z.string().min(1),
|
|
241
|
+
threshold: import_zod.z.number().min(0).max(1).optional()
|
|
242
|
+
});
|
|
243
|
+
answerRelevanceAssertionSchema = import_zod.z.object({
|
|
244
|
+
type: import_zod.z.literal("answer-relevance"),
|
|
245
|
+
question: import_zod.z.string().min(1),
|
|
246
|
+
threshold: import_zod.z.number().min(0).max(1).optional()
|
|
247
|
+
});
|
|
248
|
+
contextPrecisionAssertionSchema = import_zod.z.object({
|
|
249
|
+
type: import_zod.z.literal("context-precision"),
|
|
250
|
+
context: import_zod.z.string().min(1),
|
|
251
|
+
question: import_zod.z.string().min(1),
|
|
252
|
+
threshold: import_zod.z.number().min(0).max(1).optional()
|
|
253
|
+
});
|
|
254
|
+
assertionConfigSchema = import_zod.z.discriminatedUnion("type", [
|
|
255
|
+
exactMatchAssertionSchema,
|
|
256
|
+
containsAssertionSchema,
|
|
257
|
+
containsAnyAssertionSchema,
|
|
258
|
+
containsAllAssertionSchema,
|
|
259
|
+
jsonSchemaAssertionSchema,
|
|
260
|
+
llmJudgeAssertionSchema,
|
|
261
|
+
semanticSimilarityAssertionSchema,
|
|
262
|
+
faithfulnessAssertionSchema,
|
|
263
|
+
answerRelevanceAssertionSchema,
|
|
264
|
+
contextPrecisionAssertionSchema
|
|
265
|
+
]);
|
|
266
|
+
evalConfigSchema = import_zod.z.object({
|
|
267
|
+
id: import_zod.z.string().min(1).regex(/^[a-z0-9_-]+$/, "id must be lowercase slug"),
|
|
268
|
+
description: import_zod.z.string().optional(),
|
|
269
|
+
tags: import_zod.z.array(import_zod.z.string()).optional(),
|
|
270
|
+
provider: providerConfigSchema.partial().optional(),
|
|
271
|
+
prompt: import_zod.z.string().optional(),
|
|
272
|
+
prompt_file: import_zod.z.string().optional(),
|
|
273
|
+
dataset: datasetRefSchema,
|
|
274
|
+
assertions: import_zod.z.array(assertionConfigSchema)
|
|
275
|
+
}).refine(
|
|
276
|
+
(data) => !(data.prompt !== void 0 && data.prompt_file !== void 0),
|
|
277
|
+
{ message: "prompt and prompt_file are mutually exclusive" }
|
|
278
|
+
);
|
|
279
|
+
pluneConfigSchema = import_zod.z.object({
|
|
280
|
+
version: import_zod.z.literal(1),
|
|
281
|
+
provider: providerConfigSchema,
|
|
282
|
+
defaults: import_zod.z.object({ assertions: import_zod.z.array(assertionConfigSchema).optional() }).optional(),
|
|
283
|
+
pricing: pricingSchema.optional(),
|
|
284
|
+
evals: import_zod.z.array(evalConfigSchema).min(1)
|
|
285
|
+
}).strict();
|
|
286
|
+
}
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
// src/config/loader.ts
|
|
290
|
+
async function loadConfig(opts = {}) {
|
|
291
|
+
const { configPath, cwd = process.cwd(), env = process.env } = opts;
|
|
292
|
+
const resolvedPath = discoverConfigPath(cwd, configPath);
|
|
293
|
+
let raw;
|
|
294
|
+
try {
|
|
295
|
+
raw = await fs2.readFile(resolvedPath, "utf8");
|
|
296
|
+
} catch {
|
|
297
|
+
throw new ConfigNotFoundError(`Cannot read config file: ${resolvedPath}`);
|
|
298
|
+
}
|
|
299
|
+
let parsed;
|
|
300
|
+
try {
|
|
301
|
+
parsed = (0, import_yaml.parse)(raw);
|
|
302
|
+
} catch (err) {
|
|
303
|
+
throw new YamlParseError(
|
|
304
|
+
`YAML parse error in ${resolvedPath}: ${err.message}`
|
|
305
|
+
);
|
|
306
|
+
}
|
|
307
|
+
const result = pluneConfigSchema.safeParse(parsed);
|
|
308
|
+
if (!result.success) {
|
|
309
|
+
const issues = result.error.issues.map(
|
|
310
|
+
(issue) => `${issue.path.join(".")}: ${issue.message}`
|
|
311
|
+
);
|
|
312
|
+
throw new ConfigValidationError(
|
|
313
|
+
`Config validation failed in ${resolvedPath}`,
|
|
314
|
+
issues
|
|
315
|
+
);
|
|
316
|
+
}
|
|
317
|
+
return applyEnvOverrides(result.data, env);
|
|
318
|
+
}
|
|
319
|
+
var fs2, import_yaml;
|
|
320
|
+
var init_loader = __esm({
|
|
321
|
+
"src/config/loader.ts"() {
|
|
322
|
+
"use strict";
|
|
323
|
+
init_cjs_shims();
|
|
324
|
+
fs2 = __toESM(require("fs/promises"), 1);
|
|
325
|
+
import_yaml = require("yaml");
|
|
326
|
+
init_env_overrides();
|
|
327
|
+
init_errors();
|
|
328
|
+
init_discover();
|
|
329
|
+
init_schema();
|
|
330
|
+
}
|
|
331
|
+
});
|
|
332
|
+
|
|
333
|
+
// src/providers/errors.ts
|
|
334
|
+
var errors_exports2 = {};
|
|
335
|
+
__export(errors_exports2, {
|
|
336
|
+
AuthError: () => AuthError,
|
|
337
|
+
ProviderError: () => ProviderError,
|
|
338
|
+
classifyError: () => classifyError,
|
|
339
|
+
messageOf: () => messageOf,
|
|
340
|
+
normalizeProviderError: () => normalizeProviderError,
|
|
341
|
+
redactSecrets: () => redactSecrets
|
|
342
|
+
});
|
|
343
|
+
function statusOf(err) {
|
|
344
|
+
if (typeof err === "object" && err !== null && "status" in err) {
|
|
345
|
+
const s = err.status;
|
|
346
|
+
if (typeof s === "number") return s;
|
|
347
|
+
}
|
|
348
|
+
return void 0;
|
|
349
|
+
}
|
|
350
|
+
function codeOf(err) {
|
|
351
|
+
if (typeof err === "object" && err !== null && "code" in err) {
|
|
352
|
+
const c = err.code;
|
|
353
|
+
if (typeof c === "string") return c;
|
|
354
|
+
}
|
|
355
|
+
return void 0;
|
|
356
|
+
}
|
|
357
|
+
function classifyError(err) {
|
|
358
|
+
const status = statusOf(err);
|
|
359
|
+
if (status !== void 0) {
|
|
360
|
+
if (status === 401 || status === 403) return "auth";
|
|
361
|
+
if (status === 429 || status >= 500 && status <= 599) return "transient";
|
|
362
|
+
return "fatal";
|
|
363
|
+
}
|
|
364
|
+
const code = codeOf(err);
|
|
365
|
+
if (code !== void 0 && NETWORK_ERROR_CODES.has(code)) return "transient";
|
|
366
|
+
if (isTransportError(err)) return "transient";
|
|
367
|
+
return "fatal";
|
|
368
|
+
}
|
|
369
|
+
function isTransportError(err) {
|
|
370
|
+
if (typeof err !== "object" || err === null) return false;
|
|
371
|
+
const e = err;
|
|
372
|
+
const names = [e.name, e.constructor?.name].filter(
|
|
373
|
+
(n) => typeof n === "string"
|
|
374
|
+
);
|
|
375
|
+
return names.some(
|
|
376
|
+
(n) => /APIConnection|ConnectionError|TimeoutError|AbortError|FetchError/.test(n)
|
|
377
|
+
);
|
|
378
|
+
}
|
|
379
|
+
function redactSecrets(message, ...secrets) {
|
|
380
|
+
let out = message;
|
|
381
|
+
for (const secret of secrets) {
|
|
382
|
+
if (secret) out = out.split(secret).join("[REDACTED]");
|
|
383
|
+
}
|
|
384
|
+
out = out.replace(/Bearer\s+[A-Za-z0-9._-]+/g, "Bearer [REDACTED]");
|
|
385
|
+
return out;
|
|
386
|
+
}
|
|
387
|
+
function messageOf(err) {
|
|
388
|
+
if (err instanceof Error) return err.message;
|
|
389
|
+
if (typeof err === "object" && err !== null && "message" in err) {
|
|
390
|
+
return String(err.message);
|
|
391
|
+
}
|
|
392
|
+
return String(err);
|
|
393
|
+
}
|
|
394
|
+
function normalizeProviderError(err, secret, envVar) {
|
|
395
|
+
if (err instanceof AuthError) return err;
|
|
396
|
+
if (classifyError(err) === "auth") {
|
|
397
|
+
return new AuthError(redactSecrets(messageOf(err), secret), envVar);
|
|
398
|
+
}
|
|
399
|
+
if (err instanceof ProviderError) {
|
|
400
|
+
return new ProviderError(err.code, redactSecrets(err.message, secret));
|
|
401
|
+
}
|
|
402
|
+
return new ProviderError("PROVIDER_ERROR", redactSecrets(messageOf(err), secret));
|
|
403
|
+
}
|
|
404
|
+
var NETWORK_ERROR_CODES, AuthError, ProviderError;
|
|
405
|
+
var init_errors2 = __esm({
|
|
406
|
+
"src/providers/errors.ts"() {
|
|
407
|
+
"use strict";
|
|
408
|
+
init_cjs_shims();
|
|
409
|
+
NETWORK_ERROR_CODES = /* @__PURE__ */ new Set([
|
|
410
|
+
"ECONNRESET",
|
|
411
|
+
"ETIMEDOUT",
|
|
412
|
+
"ECONNREFUSED",
|
|
413
|
+
"EPIPE",
|
|
414
|
+
"ENOTFOUND",
|
|
415
|
+
"EAI_AGAIN"
|
|
416
|
+
]);
|
|
417
|
+
AuthError = class extends Error {
|
|
418
|
+
code = "PROVIDER_AUTH";
|
|
419
|
+
envVar;
|
|
420
|
+
constructor(message, envVar) {
|
|
421
|
+
super(message);
|
|
422
|
+
this.name = "AuthError";
|
|
423
|
+
this.envVar = envVar;
|
|
424
|
+
}
|
|
425
|
+
};
|
|
426
|
+
ProviderError = class extends Error {
|
|
427
|
+
code;
|
|
428
|
+
constructor(code, message) {
|
|
429
|
+
super(message);
|
|
430
|
+
this.name = "ProviderError";
|
|
431
|
+
this.code = code;
|
|
432
|
+
}
|
|
433
|
+
};
|
|
434
|
+
}
|
|
435
|
+
});
|
|
436
|
+
|
|
437
|
+
// src/providers/retry.ts
|
|
438
|
+
function retryAfterMs(err) {
|
|
439
|
+
if (typeof err !== "object" || err === null || !("headers" in err)) return void 0;
|
|
440
|
+
const raw = readHeader(err.headers, "retry-after");
|
|
441
|
+
if (raw === void 0) return void 0;
|
|
442
|
+
const seconds = Number(raw);
|
|
443
|
+
if (!Number.isFinite(seconds) || seconds < 0) return void 0;
|
|
444
|
+
return seconds * 1e3;
|
|
445
|
+
}
|
|
446
|
+
function readHeader(headers, name) {
|
|
447
|
+
if (headers === null || typeof headers !== "object" && typeof headers !== "function") {
|
|
448
|
+
return void 0;
|
|
449
|
+
}
|
|
450
|
+
const getter = headers.get;
|
|
451
|
+
if (typeof getter === "function") {
|
|
452
|
+
const v2 = getter.call(headers, name);
|
|
453
|
+
return v2 === null ? void 0 : v2;
|
|
454
|
+
}
|
|
455
|
+
const v = headers[name];
|
|
456
|
+
return typeof v === "string" || typeof v === "number" ? v : void 0;
|
|
457
|
+
}
|
|
458
|
+
async function withRetry(fn, options) {
|
|
459
|
+
const { base_delay_ms = 500, sleep = defaultSleep, random = Math.random } = options;
|
|
460
|
+
const maxRetries = Math.max(0, Math.floor(options.max_retries));
|
|
461
|
+
let attempt = 0;
|
|
462
|
+
for (; ; ) {
|
|
463
|
+
try {
|
|
464
|
+
return await fn();
|
|
465
|
+
} catch (err) {
|
|
466
|
+
const klass = classifyError(err);
|
|
467
|
+
if (klass === "auth") throw err;
|
|
468
|
+
if (klass === "fatal") throw new ProviderError("PROVIDER_FATAL", messageOf(err));
|
|
469
|
+
if (attempt >= maxRetries) {
|
|
470
|
+
throw new ProviderError("PROVIDER_TRANSIENT_EXHAUSTED", messageOf(err));
|
|
471
|
+
}
|
|
472
|
+
const backoff = base_delay_ms * 2 ** attempt;
|
|
473
|
+
const jitter = backoff * 0.1 * random();
|
|
474
|
+
const delay = Math.max(backoff + jitter, retryAfterMs(err) ?? 0);
|
|
475
|
+
await sleep(delay);
|
|
476
|
+
attempt += 1;
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
var defaultSleep;
|
|
481
|
+
var init_retry = __esm({
|
|
482
|
+
"src/providers/retry.ts"() {
|
|
483
|
+
"use strict";
|
|
484
|
+
init_cjs_shims();
|
|
485
|
+
init_errors2();
|
|
486
|
+
defaultSleep = (ms) => new Promise((resolve5) => setTimeout(resolve5, ms));
|
|
487
|
+
}
|
|
488
|
+
});
|
|
489
|
+
|
|
490
|
+
// src/providers/prices.ts
|
|
491
|
+
var PRICE_TABLE;
|
|
492
|
+
var init_prices = __esm({
|
|
493
|
+
"src/providers/prices.ts"() {
|
|
494
|
+
"use strict";
|
|
495
|
+
init_cjs_shims();
|
|
496
|
+
PRICE_TABLE = {
|
|
497
|
+
// Anthropic
|
|
498
|
+
"claude-3-5-haiku-latest": { input_per_1k_usd: 8e-4, output_per_1k_usd: 4e-3 },
|
|
499
|
+
"claude-3-5-sonnet-latest": { input_per_1k_usd: 3e-3, output_per_1k_usd: 0.015 },
|
|
500
|
+
"claude-3-opus-latest": { input_per_1k_usd: 0.015, output_per_1k_usd: 0.075 },
|
|
501
|
+
// OpenAI
|
|
502
|
+
"gpt-4o": { input_per_1k_usd: 25e-4, output_per_1k_usd: 0.01 },
|
|
503
|
+
"gpt-4o-mini": { input_per_1k_usd: 15e-5, output_per_1k_usd: 6e-4 },
|
|
504
|
+
// OpenAI via OpenRouter (namespaced ids). OpenRouter passes OpenAI list price through for
|
|
505
|
+
// openai/* routes, so these mirror the direct entries. Only a few common ids are listed —
|
|
506
|
+
// OpenRouter has hundreds of models and dynamic routing, so most still need a `pricing` entry
|
|
507
|
+
// in plune.yaml (or report cost_usd=0). Verify against OpenRouter's current rates.
|
|
508
|
+
"openai/gpt-4o": { input_per_1k_usd: 25e-4, output_per_1k_usd: 0.01 },
|
|
509
|
+
"openai/gpt-4o-mini": { input_per_1k_usd: 15e-5, output_per_1k_usd: 6e-4 }
|
|
510
|
+
};
|
|
511
|
+
}
|
|
512
|
+
});
|
|
513
|
+
|
|
514
|
+
// src/providers/cost.ts
|
|
515
|
+
function priceFrom(price, usage) {
|
|
516
|
+
return usage.input_tokens / 1e3 * price.input_per_1k_usd + usage.output_tokens / 1e3 * price.output_per_1k_usd;
|
|
517
|
+
}
|
|
518
|
+
function resolveCost(usage, model, reportedCostUsd, overrides, deps = {}) {
|
|
519
|
+
const table = deps.table ?? PRICE_TABLE;
|
|
520
|
+
const warn = deps.warn ?? defaultWarn;
|
|
521
|
+
const override = overrides?.[model];
|
|
522
|
+
if (override !== void 0) {
|
|
523
|
+
return priceFrom(override, usage);
|
|
524
|
+
}
|
|
525
|
+
if (reportedCostUsd !== void 0) {
|
|
526
|
+
return reportedCostUsd;
|
|
527
|
+
}
|
|
528
|
+
const tablePrice = table[model];
|
|
529
|
+
if (tablePrice !== void 0) {
|
|
530
|
+
return priceFrom(tablePrice, usage);
|
|
531
|
+
}
|
|
532
|
+
warn(
|
|
533
|
+
`plune: no price for model "${model}" \u2014 reporting cost_usd=0. Set pricing["${model}"] in plune.yaml to track its cost.`
|
|
534
|
+
);
|
|
535
|
+
return 0;
|
|
536
|
+
}
|
|
537
|
+
var defaultWarn;
|
|
538
|
+
var init_cost = __esm({
|
|
539
|
+
"src/providers/cost.ts"() {
|
|
540
|
+
"use strict";
|
|
541
|
+
init_cjs_shims();
|
|
542
|
+
init_prices();
|
|
543
|
+
defaultWarn = (message) => {
|
|
544
|
+
process.stderr.write(message + "\n");
|
|
545
|
+
};
|
|
546
|
+
}
|
|
547
|
+
});
|
|
548
|
+
|
|
549
|
+
// src/providers/anthropic.ts
|
|
550
|
+
function makeAnthropicProvider(config, env, pricing) {
|
|
551
|
+
const apiKey = env[ENV_VAR];
|
|
552
|
+
if (apiKey === void 0 || apiKey.trim() === "") {
|
|
553
|
+
throw new AuthError(
|
|
554
|
+
`Missing ${ENV_VAR}. Set it in your environment to use the anthropic provider.`,
|
|
555
|
+
ENV_VAR
|
|
556
|
+
);
|
|
557
|
+
}
|
|
558
|
+
const client = new import_sdk.default({
|
|
559
|
+
apiKey,
|
|
560
|
+
maxRetries: 0,
|
|
561
|
+
...config.timeout !== void 0 ? { timeout: config.timeout } : {}
|
|
562
|
+
});
|
|
563
|
+
const maxRetries = config.max_retries ?? DEFAULT_MAX_RETRIES;
|
|
564
|
+
return {
|
|
565
|
+
async complete(req) {
|
|
566
|
+
try {
|
|
567
|
+
const res = await withRetry(
|
|
568
|
+
() => client.messages.create({
|
|
569
|
+
model: req.model,
|
|
570
|
+
max_tokens: req.max_tokens,
|
|
571
|
+
temperature: req.temperature,
|
|
572
|
+
messages: [{ role: "user", content: req.prompt_resolved }]
|
|
573
|
+
}),
|
|
574
|
+
{ max_retries: maxRetries }
|
|
575
|
+
);
|
|
576
|
+
const output = res.content.map((block) => block.type === "text" ? block.text : "").join("");
|
|
577
|
+
return {
|
|
578
|
+
output,
|
|
579
|
+
usage: {
|
|
580
|
+
input_tokens: res.usage.input_tokens,
|
|
581
|
+
output_tokens: res.usage.output_tokens
|
|
582
|
+
}
|
|
583
|
+
};
|
|
584
|
+
} catch (err) {
|
|
585
|
+
throw normalizeProviderError(err, apiKey, ENV_VAR);
|
|
586
|
+
}
|
|
587
|
+
},
|
|
588
|
+
estimateCost(usage, reportedCostUsd) {
|
|
589
|
+
return { cost_usd: resolveCost(usage, config.model, reportedCostUsd, pricing) };
|
|
590
|
+
}
|
|
591
|
+
};
|
|
592
|
+
}
|
|
593
|
+
var import_sdk, ENV_VAR, DEFAULT_MAX_RETRIES;
|
|
594
|
+
var init_anthropic = __esm({
|
|
595
|
+
"src/providers/anthropic.ts"() {
|
|
596
|
+
"use strict";
|
|
597
|
+
init_cjs_shims();
|
|
598
|
+
import_sdk = __toESM(require("@anthropic-ai/sdk"), 1);
|
|
599
|
+
init_retry();
|
|
600
|
+
init_cost();
|
|
601
|
+
init_errors2();
|
|
602
|
+
ENV_VAR = "ANTHROPIC_API_KEY";
|
|
603
|
+
DEFAULT_MAX_RETRIES = 2;
|
|
604
|
+
}
|
|
605
|
+
});
|
|
606
|
+
|
|
607
|
+
// src/providers/openai.ts
|
|
608
|
+
function makeOpenAiCompatibleProvider(opts) {
|
|
609
|
+
const { config, env, pricing, apiKeyEnv, baseURL, reportsCost } = opts;
|
|
610
|
+
const apiKey = env[apiKeyEnv];
|
|
611
|
+
if (apiKey === void 0 || apiKey.trim() === "") {
|
|
612
|
+
throw new AuthError(
|
|
613
|
+
`Missing ${apiKeyEnv}. Set it in your environment to use the ${config.type} provider.`,
|
|
614
|
+
apiKeyEnv
|
|
615
|
+
);
|
|
616
|
+
}
|
|
617
|
+
const client = new import_openai.default({
|
|
618
|
+
apiKey,
|
|
619
|
+
maxRetries: 0,
|
|
620
|
+
// our own withRetry owns retry (ADR-PRV03)
|
|
621
|
+
...baseURL !== void 0 ? { baseURL } : {},
|
|
622
|
+
...config.timeout !== void 0 ? { timeout: config.timeout } : {}
|
|
623
|
+
});
|
|
624
|
+
const maxRetries = config.max_retries ?? DEFAULT_MAX_RETRIES2;
|
|
625
|
+
return {
|
|
626
|
+
async complete(req) {
|
|
627
|
+
try {
|
|
628
|
+
const res = await withRetry(
|
|
629
|
+
() => client.chat.completions.create({
|
|
630
|
+
model: req.model,
|
|
631
|
+
temperature: req.temperature,
|
|
632
|
+
max_tokens: req.max_tokens,
|
|
633
|
+
messages: [{ role: "user", content: req.prompt_resolved }],
|
|
634
|
+
// OpenRouter extension: ask it to include the call's actual cost in `usage` (ADR-PRC02).
|
|
635
|
+
...reportsCost ? { usage: { include: true } } : {}
|
|
636
|
+
}),
|
|
637
|
+
{ max_retries: maxRetries }
|
|
638
|
+
);
|
|
639
|
+
const reportedCost = reportsCost ? res.usage?.cost : void 0;
|
|
640
|
+
return {
|
|
641
|
+
output: res.choices[0]?.message?.content ?? "",
|
|
642
|
+
usage: {
|
|
643
|
+
input_tokens: res.usage?.prompt_tokens ?? 0,
|
|
644
|
+
output_tokens: res.usage?.completion_tokens ?? 0
|
|
645
|
+
},
|
|
646
|
+
...typeof reportedCost === "number" ? { cost_usd: reportedCost } : {}
|
|
647
|
+
};
|
|
648
|
+
} catch (err) {
|
|
649
|
+
throw normalizeProviderError(err, apiKey, apiKeyEnv);
|
|
650
|
+
}
|
|
651
|
+
},
|
|
652
|
+
estimateCost(usage, reportedCostUsd) {
|
|
653
|
+
return { cost_usd: resolveCost(usage, config.model, reportedCostUsd, pricing) };
|
|
654
|
+
}
|
|
655
|
+
};
|
|
656
|
+
}
|
|
657
|
+
function makeOpenAiProvider(config, env, pricing) {
|
|
658
|
+
return makeOpenAiCompatibleProvider({ config, env, pricing, apiKeyEnv: "OPENAI_API_KEY" });
|
|
659
|
+
}
|
|
660
|
+
var import_openai, DEFAULT_MAX_RETRIES2;
|
|
661
|
+
var init_openai = __esm({
|
|
662
|
+
"src/providers/openai.ts"() {
|
|
663
|
+
"use strict";
|
|
664
|
+
init_cjs_shims();
|
|
665
|
+
import_openai = __toESM(require("openai"), 1);
|
|
666
|
+
init_retry();
|
|
667
|
+
init_cost();
|
|
668
|
+
init_errors2();
|
|
669
|
+
DEFAULT_MAX_RETRIES2 = 2;
|
|
670
|
+
}
|
|
671
|
+
});
|
|
672
|
+
|
|
673
|
+
// src/providers/openrouter.ts
|
|
674
|
+
function makeOpenRouterProvider(config, env, pricing) {
|
|
675
|
+
return makeOpenAiCompatibleProvider({
|
|
676
|
+
config,
|
|
677
|
+
env,
|
|
678
|
+
pricing,
|
|
679
|
+
apiKeyEnv: "OPENROUTER_API_KEY",
|
|
680
|
+
baseURL: OPENROUTER_BASE_URL,
|
|
681
|
+
reportsCost: true
|
|
682
|
+
// OpenRouter reports the call's actual cost via usage.cost (ADR-PRC02)
|
|
683
|
+
});
|
|
684
|
+
}
|
|
685
|
+
var OPENROUTER_BASE_URL;
|
|
686
|
+
var init_openrouter = __esm({
|
|
687
|
+
"src/providers/openrouter.ts"() {
|
|
688
|
+
"use strict";
|
|
689
|
+
init_cjs_shims();
|
|
690
|
+
init_openai();
|
|
691
|
+
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1";
|
|
692
|
+
}
|
|
693
|
+
});
|
|
694
|
+
|
|
695
|
+
// src/providers/registry.ts
|
|
696
|
+
function createProviderRegistry() {
|
|
697
|
+
const factories = /* @__PURE__ */ new Map();
|
|
698
|
+
return {
|
|
699
|
+
register(name, factory) {
|
|
700
|
+
if (factories.has(name)) {
|
|
701
|
+
throw new Error(`Provider "${name}" is already registered \u2014 provider names must be unique.`);
|
|
702
|
+
}
|
|
703
|
+
factories.set(name, factory);
|
|
704
|
+
},
|
|
705
|
+
resolve(name, config, env, pricing) {
|
|
706
|
+
const factory = factories.get(name);
|
|
707
|
+
if (factory === void 0) {
|
|
708
|
+
const known = [...factories.keys()].join(", ") || "(none)";
|
|
709
|
+
throw new Error(`Unknown provider type "${name}". Registered providers: ${known}.`);
|
|
710
|
+
}
|
|
711
|
+
return factory(config, env, pricing);
|
|
712
|
+
},
|
|
713
|
+
has(name) {
|
|
714
|
+
return factories.has(name);
|
|
715
|
+
}
|
|
716
|
+
};
|
|
717
|
+
}
|
|
718
|
+
function createDefaultRegistry() {
|
|
719
|
+
const registry = createProviderRegistry();
|
|
720
|
+
registry.register("anthropic", makeAnthropicProvider);
|
|
721
|
+
registry.register("openai", makeOpenAiProvider);
|
|
722
|
+
registry.register("openrouter", makeOpenRouterProvider);
|
|
723
|
+
return registry;
|
|
724
|
+
}
|
|
725
|
+
function getProvider(config, env, pricing) {
|
|
726
|
+
return createDefaultRegistry().resolve(config.type, config, env, pricing);
|
|
727
|
+
}
|
|
728
|
+
var init_registry = __esm({
|
|
729
|
+
"src/providers/registry.ts"() {
|
|
730
|
+
"use strict";
|
|
731
|
+
init_cjs_shims();
|
|
732
|
+
init_anthropic();
|
|
733
|
+
init_openai();
|
|
734
|
+
init_openrouter();
|
|
735
|
+
}
|
|
736
|
+
});
|
|
737
|
+
|
|
738
|
+
// src/providers/index.ts
|
|
739
|
+
var init_providers = __esm({
|
|
740
|
+
"src/providers/index.ts"() {
|
|
741
|
+
"use strict";
|
|
742
|
+
init_cjs_shims();
|
|
743
|
+
init_registry();
|
|
744
|
+
init_errors2();
|
|
745
|
+
init_cost();
|
|
746
|
+
init_prices();
|
|
747
|
+
}
|
|
748
|
+
});
|
|
749
|
+
|
|
750
|
+
// src/providers/mock.ts
|
|
751
|
+
function makeMockProvider() {
|
|
752
|
+
return {
|
|
753
|
+
complete: () => Promise.resolve({
|
|
754
|
+
output: "mock response",
|
|
755
|
+
usage: { input_tokens: 10, output_tokens: 5 }
|
|
756
|
+
}),
|
|
757
|
+
estimateCost: () => ({ cost_usd: 0 })
|
|
758
|
+
};
|
|
759
|
+
}
|
|
760
|
+
var init_mock = __esm({
|
|
761
|
+
"src/providers/mock.ts"() {
|
|
762
|
+
"use strict";
|
|
763
|
+
init_cjs_shims();
|
|
764
|
+
}
|
|
765
|
+
});
|
|
766
|
+
|
|
767
|
+
// src/embeddings/cosine.ts
|
|
768
|
+
function cosine(a, b) {
|
|
769
|
+
if (a.length !== b.length) {
|
|
770
|
+
throw new Error(`cosine: vector length mismatch (${a.length} vs ${b.length})`);
|
|
771
|
+
}
|
|
772
|
+
let dot = 0;
|
|
773
|
+
let normA = 0;
|
|
774
|
+
let normB = 0;
|
|
775
|
+
for (let i = 0; i < a.length; i++) {
|
|
776
|
+
const x = a[i];
|
|
777
|
+
const y = b[i];
|
|
778
|
+
dot += x * y;
|
|
779
|
+
normA += x * x;
|
|
780
|
+
normB += y * y;
|
|
781
|
+
}
|
|
782
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
783
|
+
return denom === 0 ? 0 : dot / denom;
|
|
784
|
+
}
|
|
785
|
+
var init_cosine = __esm({
|
|
786
|
+
"src/embeddings/cosine.ts"() {
|
|
787
|
+
"use strict";
|
|
788
|
+
init_cjs_shims();
|
|
789
|
+
}
|
|
790
|
+
});
|
|
791
|
+
|
|
792
|
+
// src/embeddings/embedder.ts
|
|
793
|
+
function getDefaultEmbedder() {
|
|
794
|
+
return defaultEmbedder ??= new XenovaEmbedder();
|
|
795
|
+
}
|
|
796
|
+
var MODEL_ID, XenovaEmbedder, defaultEmbedder;
|
|
797
|
+
var init_embedder = __esm({
|
|
798
|
+
"src/embeddings/embedder.ts"() {
|
|
799
|
+
"use strict";
|
|
800
|
+
init_cjs_shims();
|
|
801
|
+
MODEL_ID = "Xenova/all-MiniLM-L6-v2";
|
|
802
|
+
XenovaEmbedder = class {
|
|
803
|
+
extractor;
|
|
804
|
+
async embed(texts) {
|
|
805
|
+
const extract = await this.load();
|
|
806
|
+
const output = await extract(texts, { pooling: "mean", normalize: true });
|
|
807
|
+
const rows = output.tolist();
|
|
808
|
+
return rows.map((row) => Float32Array.from(row));
|
|
809
|
+
}
|
|
810
|
+
load() {
|
|
811
|
+
this.extractor ??= import("@huggingface/transformers").then(
|
|
812
|
+
({ pipeline }) => pipeline("feature-extraction", MODEL_ID)
|
|
813
|
+
);
|
|
814
|
+
return this.extractor;
|
|
815
|
+
}
|
|
816
|
+
};
|
|
817
|
+
}
|
|
818
|
+
});
|
|
819
|
+
|
|
820
|
+
// src/embeddings/index.ts
|
|
821
|
+
var init_embeddings = __esm({
|
|
822
|
+
"src/embeddings/index.ts"() {
|
|
823
|
+
"use strict";
|
|
824
|
+
init_cjs_shims();
|
|
825
|
+
init_cosine();
|
|
826
|
+
init_embedder();
|
|
827
|
+
}
|
|
828
|
+
});
|
|
829
|
+
|
|
830
|
+
// src/cache/cache.ts
|
|
831
|
+
function openCache(file, opts = {}) {
|
|
832
|
+
const now = opts.now ?? Date.now;
|
|
833
|
+
try {
|
|
834
|
+
const db = new import_better_sqlite3.default(file);
|
|
835
|
+
try {
|
|
836
|
+
ensureSchema(db);
|
|
837
|
+
} catch (err) {
|
|
838
|
+
db.close();
|
|
839
|
+
throw err;
|
|
840
|
+
}
|
|
841
|
+
return new SqliteCache(db, now);
|
|
842
|
+
} catch {
|
|
843
|
+
return new NoOpCache();
|
|
844
|
+
}
|
|
845
|
+
}
|
|
846
|
+
function ensureSchema(db) {
|
|
847
|
+
db.exec(META_DDL + COMPLETIONS_DDL);
|
|
848
|
+
const stored = db.prepare("SELECT value FROM meta WHERE key = 'schema_version'").get()?.value;
|
|
849
|
+
const current = String(SCHEMA_VERSION);
|
|
850
|
+
if (stored !== current) {
|
|
851
|
+
db.exec("DROP TABLE IF EXISTS completions");
|
|
852
|
+
db.exec(COMPLETIONS_DDL);
|
|
853
|
+
db.prepare("INSERT OR REPLACE INTO meta (key, value) VALUES (?, ?)").run("schema_version", current);
|
|
854
|
+
}
|
|
855
|
+
}
|
|
856
|
+
var import_better_sqlite3, SCHEMA_VERSION, META_DDL, COMPLETIONS_DDL, SqliteCache, NoOpCache;
|
|
857
|
+
var init_cache = __esm({
|
|
858
|
+
"src/cache/cache.ts"() {
|
|
859
|
+
"use strict";
|
|
860
|
+
init_cjs_shims();
|
|
861
|
+
import_better_sqlite3 = __toESM(require("better-sqlite3"), 1);
|
|
862
|
+
SCHEMA_VERSION = 1;
|
|
863
|
+
META_DDL = "CREATE TABLE IF NOT EXISTS meta (key TEXT PRIMARY KEY, value TEXT);";
|
|
864
|
+
COMPLETIONS_DDL = "CREATE TABLE IF NOT EXISTS completions (cache_key TEXT PRIMARY KEY, output TEXT NOT NULL, input_tokens INTEGER NOT NULL, output_tokens INTEGER NOT NULL, created_at INTEGER NOT NULL);";
|
|
865
|
+
SqliteCache = class {
|
|
866
|
+
constructor(db, now) {
|
|
867
|
+
this.db = db;
|
|
868
|
+
this.now = now;
|
|
869
|
+
}
|
|
870
|
+
db;
|
|
871
|
+
now;
|
|
872
|
+
get(key, opts = {}) {
|
|
873
|
+
try {
|
|
874
|
+
const row = this.db.prepare("SELECT output, input_tokens, output_tokens, created_at FROM completions WHERE cache_key = ?").get(key);
|
|
875
|
+
if (row === void 0) return void 0;
|
|
876
|
+
if (opts.maxAgeMs !== void 0 && this.now() - row.created_at > opts.maxAgeMs) {
|
|
877
|
+
return void 0;
|
|
878
|
+
}
|
|
879
|
+
return {
|
|
880
|
+
output: row.output,
|
|
881
|
+
usage: { input_tokens: row.input_tokens, output_tokens: row.output_tokens }
|
|
882
|
+
};
|
|
883
|
+
} catch {
|
|
884
|
+
return void 0;
|
|
885
|
+
}
|
|
886
|
+
}
|
|
887
|
+
set(key, value) {
|
|
888
|
+
try {
|
|
889
|
+
this.db.prepare(
|
|
890
|
+
`INSERT INTO completions (cache_key, output, input_tokens, output_tokens, created_at)
|
|
891
|
+
VALUES (?, ?, ?, ?, ?)
|
|
892
|
+
ON CONFLICT(cache_key) DO UPDATE SET
|
|
893
|
+
output = excluded.output,
|
|
894
|
+
input_tokens = excluded.input_tokens,
|
|
895
|
+
output_tokens = excluded.output_tokens,
|
|
896
|
+
created_at = excluded.created_at`
|
|
897
|
+
).run(key, value.output, value.usage.input_tokens, value.usage.output_tokens, this.now());
|
|
898
|
+
} catch {
|
|
899
|
+
}
|
|
900
|
+
}
|
|
901
|
+
clear() {
|
|
902
|
+
try {
|
|
903
|
+
this.db.exec("DELETE FROM completions");
|
|
904
|
+
} catch {
|
|
905
|
+
}
|
|
906
|
+
}
|
|
907
|
+
close() {
|
|
908
|
+
this.db.close();
|
|
909
|
+
}
|
|
910
|
+
};
|
|
911
|
+
NoOpCache = class {
|
|
912
|
+
get() {
|
|
913
|
+
return void 0;
|
|
914
|
+
}
|
|
915
|
+
set() {
|
|
916
|
+
}
|
|
917
|
+
clear() {
|
|
918
|
+
}
|
|
919
|
+
close() {
|
|
920
|
+
}
|
|
921
|
+
};
|
|
922
|
+
}
|
|
923
|
+
});
|
|
924
|
+
|
|
925
|
+
// src/cache/index.ts
|
|
926
|
+
var init_cache2 = __esm({
|
|
927
|
+
"src/cache/index.ts"() {
|
|
928
|
+
"use strict";
|
|
929
|
+
init_cjs_shims();
|
|
930
|
+
init_cache();
|
|
931
|
+
}
|
|
932
|
+
});
|
|
933
|
+
|
|
934
|
+
// src/util/canonical-json.ts
|
|
935
|
+
function normalizeValue(value) {
|
|
936
|
+
if (value === null) return null;
|
|
937
|
+
if (typeof value === "boolean") return value;
|
|
938
|
+
if (typeof value === "number") {
|
|
939
|
+
if (!isFinite(value)) {
|
|
940
|
+
throw new Error(
|
|
941
|
+
`canonicalJson: non-finite number rejected: ${String(value)}`
|
|
942
|
+
);
|
|
943
|
+
}
|
|
944
|
+
return value === 0 ? 0 : value;
|
|
945
|
+
}
|
|
946
|
+
if (typeof value === "string") {
|
|
947
|
+
return value.normalize("NFC");
|
|
948
|
+
}
|
|
949
|
+
if (Array.isArray(value)) {
|
|
950
|
+
return value.map(normalizeValue);
|
|
951
|
+
}
|
|
952
|
+
if (typeof value === "object") {
|
|
953
|
+
const proto = Object.getPrototypeOf(value);
|
|
954
|
+
if (proto !== null && proto !== Object.prototype) {
|
|
955
|
+
const name = value.constructor?.name ?? "unknown";
|
|
956
|
+
throw new Error(
|
|
957
|
+
`canonicalJson: unsupported non-plain object (${name}) \u2014 only plain objects, arrays, and primitives are serializable`
|
|
958
|
+
);
|
|
959
|
+
}
|
|
960
|
+
const obj = value;
|
|
961
|
+
const sorted = {};
|
|
962
|
+
for (const key of Object.keys(obj).sort()) {
|
|
963
|
+
sorted[key] = normalizeValue(obj[key]);
|
|
964
|
+
}
|
|
965
|
+
return sorted;
|
|
966
|
+
}
|
|
967
|
+
throw new Error(
|
|
968
|
+
`canonicalJson: unsupported value type: ${typeof value}`
|
|
969
|
+
);
|
|
970
|
+
}
|
|
971
|
+
function canonicalJson(value) {
|
|
972
|
+
return JSON.stringify(normalizeValue(value));
|
|
973
|
+
}
|
|
974
|
+
var init_canonical_json = __esm({
|
|
975
|
+
"src/util/canonical-json.ts"() {
|
|
976
|
+
"use strict";
|
|
977
|
+
init_cjs_shims();
|
|
978
|
+
}
|
|
979
|
+
});
|
|
980
|
+
|
|
981
|
+
// src/util/hash.ts
|
|
982
|
+
function sha256(input) {
|
|
983
|
+
return (0, import_node_crypto.createHash)("sha256").update(input, "utf8").digest("hex");
|
|
984
|
+
}
|
|
985
|
+
function cacheKey(inputs) {
|
|
986
|
+
const { provider, model, temperature, max_tokens, prompt_resolved } = inputs;
|
|
987
|
+
return sha256(canonicalJson({ provider, model, temperature, max_tokens, prompt_resolved }));
|
|
988
|
+
}
|
|
989
|
+
function configHash(config) {
|
|
990
|
+
return sha256(canonicalJson(config));
|
|
991
|
+
}
|
|
992
|
+
var import_node_crypto;
|
|
993
|
+
var init_hash = __esm({
|
|
994
|
+
"src/util/hash.ts"() {
|
|
995
|
+
"use strict";
|
|
996
|
+
init_cjs_shims();
|
|
997
|
+
import_node_crypto = require("crypto");
|
|
998
|
+
init_canonical_json();
|
|
999
|
+
}
|
|
1000
|
+
});
|
|
1001
|
+
|
|
1002
|
+
// src/assertions/interpolate.ts
|
|
1003
|
+
function interpolate(template, ctx) {
|
|
1004
|
+
return template.replace(PLACEHOLDER, (match, key) => {
|
|
1005
|
+
if (key === "expected") {
|
|
1006
|
+
return ctx.row.expected ?? "";
|
|
1007
|
+
}
|
|
1008
|
+
if (key.startsWith("vars.")) {
|
|
1009
|
+
const name = key.slice("vars.".length);
|
|
1010
|
+
const value = ctx.vars[name];
|
|
1011
|
+
return value === void 0 ? match : String(value);
|
|
1012
|
+
}
|
|
1013
|
+
return match;
|
|
1014
|
+
});
|
|
1015
|
+
}
|
|
1016
|
+
var PLACEHOLDER;
|
|
1017
|
+
var init_interpolate = __esm({
|
|
1018
|
+
"src/assertions/interpolate.ts"() {
|
|
1019
|
+
"use strict";
|
|
1020
|
+
init_cjs_shims();
|
|
1021
|
+
PLACEHOLDER = /\{\{\s*([\w.]+)\s*\}\}/g;
|
|
1022
|
+
}
|
|
1023
|
+
});
|
|
1024
|
+
|
|
1025
|
+
// src/assertions/exact-match.ts
|
|
1026
|
+
var exactMatch;
|
|
1027
|
+
var init_exact_match = __esm({
|
|
1028
|
+
"src/assertions/exact-match.ts"() {
|
|
1029
|
+
"use strict";
|
|
1030
|
+
init_cjs_shims();
|
|
1031
|
+
init_interpolate();
|
|
1032
|
+
exactMatch = {
|
|
1033
|
+
run(ctx) {
|
|
1034
|
+
const expected = interpolate(ctx.params.value, ctx);
|
|
1035
|
+
let actual = ctx.output;
|
|
1036
|
+
let target = expected;
|
|
1037
|
+
if (ctx.params.trim) {
|
|
1038
|
+
actual = actual.trim();
|
|
1039
|
+
target = target.trim();
|
|
1040
|
+
}
|
|
1041
|
+
if (ctx.params.ignore_case) {
|
|
1042
|
+
actual = actual.toLowerCase();
|
|
1043
|
+
target = target.toLowerCase();
|
|
1044
|
+
}
|
|
1045
|
+
const result = actual === target ? { passed: true } : { passed: false, reason: `expected exact match: "${expected}"` };
|
|
1046
|
+
return Promise.resolve(result);
|
|
1047
|
+
}
|
|
1048
|
+
};
|
|
1049
|
+
}
|
|
1050
|
+
});
|
|
1051
|
+
|
|
1052
|
+
// src/assertions/contains.ts
|
|
1053
|
+
function norm(s, ignoreCase) {
|
|
1054
|
+
return ignoreCase ? s.toLowerCase() : s;
|
|
1055
|
+
}
|
|
1056
|
+
var contains, containsAny, containsAll;
|
|
1057
|
+
var init_contains = __esm({
|
|
1058
|
+
"src/assertions/contains.ts"() {
|
|
1059
|
+
"use strict";
|
|
1060
|
+
init_cjs_shims();
|
|
1061
|
+
init_interpolate();
|
|
1062
|
+
contains = {
|
|
1063
|
+
run(ctx) {
|
|
1064
|
+
const value = interpolate(ctx.params.value, ctx);
|
|
1065
|
+
const found = norm(ctx.output, ctx.params.ignore_case).includes(
|
|
1066
|
+
norm(value, ctx.params.ignore_case)
|
|
1067
|
+
);
|
|
1068
|
+
return Promise.resolve(
|
|
1069
|
+
found ? { passed: true } : { passed: false, reason: `expected output to contain: "${value}"` }
|
|
1070
|
+
);
|
|
1071
|
+
}
|
|
1072
|
+
};
|
|
1073
|
+
containsAny = {
|
|
1074
|
+
run(ctx) {
|
|
1075
|
+
const hay = norm(ctx.output, ctx.params.ignore_case);
|
|
1076
|
+
const values = ctx.params.values.map((v) => interpolate(v, ctx));
|
|
1077
|
+
const found = values.some((v) => hay.includes(norm(v, ctx.params.ignore_case)));
|
|
1078
|
+
return Promise.resolve(
|
|
1079
|
+
found ? { passed: true } : { passed: false, reason: `expected output to contain any of: ${JSON.stringify(values)}` }
|
|
1080
|
+
);
|
|
1081
|
+
}
|
|
1082
|
+
};
|
|
1083
|
+
containsAll = {
|
|
1084
|
+
run(ctx) {
|
|
1085
|
+
const hay = norm(ctx.output, ctx.params.ignore_case);
|
|
1086
|
+
const values = ctx.params.values.map((v) => interpolate(v, ctx));
|
|
1087
|
+
const missing = values.filter((v) => !hay.includes(norm(v, ctx.params.ignore_case)));
|
|
1088
|
+
return Promise.resolve(
|
|
1089
|
+
missing.length === 0 ? { passed: true } : { passed: false, reason: `output is missing required substrings: ${JSON.stringify(missing)}` }
|
|
1090
|
+
);
|
|
1091
|
+
}
|
|
1092
|
+
};
|
|
1093
|
+
}
|
|
1094
|
+
});
|
|
1095
|
+
|
|
1096
|
+
// src/assertions/json-extract.ts
|
|
1097
|
+
function extractJson(output, mode) {
|
|
1098
|
+
if (mode === "strict") {
|
|
1099
|
+
return tryParse(output.trim());
|
|
1100
|
+
}
|
|
1101
|
+
const fenced = extractFencedBlock(output);
|
|
1102
|
+
if (fenced !== null) {
|
|
1103
|
+
const r = tryParse(fenced);
|
|
1104
|
+
if (r.ok) return r;
|
|
1105
|
+
}
|
|
1106
|
+
for (const run of balancedRuns(output)) {
|
|
1107
|
+
const r = tryParse(run);
|
|
1108
|
+
if (r.ok) return r;
|
|
1109
|
+
}
|
|
1110
|
+
return { ok: false };
|
|
1111
|
+
}
|
|
1112
|
+
function tryParse(s) {
|
|
1113
|
+
try {
|
|
1114
|
+
return { ok: true, value: JSON.parse(s) };
|
|
1115
|
+
} catch {
|
|
1116
|
+
return { ok: false };
|
|
1117
|
+
}
|
|
1118
|
+
}
|
|
1119
|
+
function extractFencedBlock(s) {
|
|
1120
|
+
const m = s.match(/```(?:json)?\s*\n?([\s\S]*?)```/i);
|
|
1121
|
+
return m?.[1]?.trim() ?? null;
|
|
1122
|
+
}
|
|
1123
|
+
function* balancedRuns(s) {
|
|
1124
|
+
let i = 0;
|
|
1125
|
+
while (i < s.length) {
|
|
1126
|
+
const rel = s.slice(i).search(/[{[]/);
|
|
1127
|
+
if (rel === -1) return;
|
|
1128
|
+
const start = i + rel;
|
|
1129
|
+
const run = balancedFrom(s, start);
|
|
1130
|
+
if (run === null) {
|
|
1131
|
+
i = start + 1;
|
|
1132
|
+
} else {
|
|
1133
|
+
yield run;
|
|
1134
|
+
i = start + run.length;
|
|
1135
|
+
}
|
|
1136
|
+
}
|
|
1137
|
+
}
|
|
1138
|
+
function balancedFrom(s, start) {
|
|
1139
|
+
const open = s[start];
|
|
1140
|
+
const close = open === "{" ? "}" : "]";
|
|
1141
|
+
let depth = 0;
|
|
1142
|
+
let inString = false;
|
|
1143
|
+
let escaped = false;
|
|
1144
|
+
for (let i = start; i < s.length; i++) {
|
|
1145
|
+
const ch = s[i];
|
|
1146
|
+
if (inString) {
|
|
1147
|
+
if (escaped) escaped = false;
|
|
1148
|
+
else if (ch === "\\") escaped = true;
|
|
1149
|
+
else if (ch === '"') inString = false;
|
|
1150
|
+
continue;
|
|
1151
|
+
}
|
|
1152
|
+
if (ch === '"') {
|
|
1153
|
+
inString = true;
|
|
1154
|
+
} else if (ch === open) {
|
|
1155
|
+
depth++;
|
|
1156
|
+
} else if (ch === close) {
|
|
1157
|
+
depth--;
|
|
1158
|
+
if (depth === 0) return s.slice(start, i + 1);
|
|
1159
|
+
}
|
|
1160
|
+
}
|
|
1161
|
+
return null;
|
|
1162
|
+
}
|
|
1163
|
+
var init_json_extract = __esm({
|
|
1164
|
+
"src/assertions/json-extract.ts"() {
|
|
1165
|
+
"use strict";
|
|
1166
|
+
init_cjs_shims();
|
|
1167
|
+
}
|
|
1168
|
+
});
|
|
1169
|
+
|
|
1170
|
+
// src/assertions/json-schema.ts
|
|
1171
|
+
var import_ajv, jsonSchema;
|
|
1172
|
+
var init_json_schema = __esm({
|
|
1173
|
+
"src/assertions/json-schema.ts"() {
|
|
1174
|
+
"use strict";
|
|
1175
|
+
init_cjs_shims();
|
|
1176
|
+
import_ajv = require("ajv");
|
|
1177
|
+
init_json_extract();
|
|
1178
|
+
jsonSchema = {
|
|
1179
|
+
run(ctx) {
|
|
1180
|
+
const mode = ctx.params.extract ?? "auto";
|
|
1181
|
+
const extracted = extractJson(ctx.output, mode);
|
|
1182
|
+
if (!extracted.ok) {
|
|
1183
|
+
return Promise.resolve({
|
|
1184
|
+
passed: false,
|
|
1185
|
+
reason: `output is not valid JSON (extract mode: ${mode})`
|
|
1186
|
+
});
|
|
1187
|
+
}
|
|
1188
|
+
const ajv = new import_ajv.Ajv({ allErrors: true });
|
|
1189
|
+
let validate;
|
|
1190
|
+
try {
|
|
1191
|
+
validate = ajv.compile(ctx.params.schema);
|
|
1192
|
+
} catch (err) {
|
|
1193
|
+
return Promise.resolve({
|
|
1194
|
+
passed: false,
|
|
1195
|
+
reason: `invalid json-schema: ${err.message}`
|
|
1196
|
+
});
|
|
1197
|
+
}
|
|
1198
|
+
if (validate(extracted.value)) {
|
|
1199
|
+
return Promise.resolve({ passed: true });
|
|
1200
|
+
}
|
|
1201
|
+
return Promise.resolve({
|
|
1202
|
+
passed: false,
|
|
1203
|
+
reason: `json-schema validation failed: ${ajv.errorsText(validate.errors, { separator: "; " })}`
|
|
1204
|
+
});
|
|
1205
|
+
}
|
|
1206
|
+
};
|
|
1207
|
+
}
|
|
1208
|
+
});
|
|
1209
|
+
|
|
1210
|
+
// src/assertions/semantic-similarity.ts
|
|
1211
|
+
var DEFAULT_THRESHOLD, semanticSimilarity;
|
|
1212
|
+
var init_semantic_similarity = __esm({
|
|
1213
|
+
"src/assertions/semantic-similarity.ts"() {
|
|
1214
|
+
"use strict";
|
|
1215
|
+
init_cjs_shims();
|
|
1216
|
+
init_interpolate();
|
|
1217
|
+
init_cosine();
|
|
1218
|
+
DEFAULT_THRESHOLD = 0.8;
|
|
1219
|
+
semanticSimilarity = {
|
|
1220
|
+
async run(ctx) {
|
|
1221
|
+
if (ctx.embedder === void 0) {
|
|
1222
|
+
throw new Error("semantic-similarity requires an embedder in the AssertionContext");
|
|
1223
|
+
}
|
|
1224
|
+
const reference = interpolate(ctx.params.reference, ctx);
|
|
1225
|
+
const threshold = ctx.params.threshold ?? DEFAULT_THRESHOLD;
|
|
1226
|
+
const vecs = await ctx.embedder.embed([ctx.output, reference]);
|
|
1227
|
+
const [outVec, refVec] = vecs;
|
|
1228
|
+
if (outVec === void 0 || refVec === void 0) {
|
|
1229
|
+
throw new Error(`semantic-similarity: embedder returned ${vecs.length} vector(s), expected 2`);
|
|
1230
|
+
}
|
|
1231
|
+
const score = cosine(outVec, refVec);
|
|
1232
|
+
return score >= threshold ? { passed: true, score } : {
|
|
1233
|
+
passed: false,
|
|
1234
|
+
score,
|
|
1235
|
+
reason: `similarity ${score.toFixed(3)} < threshold ${threshold}`
|
|
1236
|
+
};
|
|
1237
|
+
}
|
|
1238
|
+
};
|
|
1239
|
+
}
|
|
1240
|
+
});
|
|
1241
|
+
|
|
1242
|
+
// src/assertions/judge-helpers.ts
|
|
1243
|
+
async function askJson(judge, prompt5) {
|
|
1244
|
+
const text2 = await judge.ask(prompt5);
|
|
1245
|
+
const result = extractJson(text2, "auto");
|
|
1246
|
+
if (!result.ok) {
|
|
1247
|
+
throw new Error(`judge returned no parseable JSON (got: ${text2.slice(0, 120)})`);
|
|
1248
|
+
}
|
|
1249
|
+
return result.value;
|
|
1250
|
+
}
|
|
1251
|
+
function asObject(judgment) {
|
|
1252
|
+
if (typeof judgment !== "object" || judgment === null) {
|
|
1253
|
+
throw new Error("judge response is not a JSON object");
|
|
1254
|
+
}
|
|
1255
|
+
return judgment;
|
|
1256
|
+
}
|
|
1257
|
+
function parseScored(judgment) {
|
|
1258
|
+
const obj = asObject(judgment);
|
|
1259
|
+
const raw = obj["score"];
|
|
1260
|
+
if (typeof raw !== "number" || !Number.isFinite(raw)) {
|
|
1261
|
+
throw new Error('judge response missing a finite numeric "score"');
|
|
1262
|
+
}
|
|
1263
|
+
const score = Math.max(0, Math.min(1, raw));
|
|
1264
|
+
const reason = obj["reason"];
|
|
1265
|
+
return typeof reason === "string" ? { score, reason } : { score };
|
|
1266
|
+
}
|
|
1267
|
+
function parseStatements(judgment) {
|
|
1268
|
+
const arr = asObject(judgment)["statements"];
|
|
1269
|
+
if (!Array.isArray(arr)) {
|
|
1270
|
+
throw new Error('judge response missing "statements" array');
|
|
1271
|
+
}
|
|
1272
|
+
return arr.map(
|
|
1273
|
+
(s) => typeof s === "object" && s !== null && s["faithful"] === true
|
|
1274
|
+
);
|
|
1275
|
+
}
|
|
1276
|
+
function parseQuestions(judgment) {
|
|
1277
|
+
const arr = asObject(judgment)["questions"];
|
|
1278
|
+
if (!Array.isArray(arr)) {
|
|
1279
|
+
throw new Error('judge response missing "questions" array');
|
|
1280
|
+
}
|
|
1281
|
+
return arr.filter((q) => typeof q === "string");
|
|
1282
|
+
}
|
|
1283
|
+
var init_judge_helpers = __esm({
|
|
1284
|
+
"src/assertions/judge-helpers.ts"() {
|
|
1285
|
+
"use strict";
|
|
1286
|
+
init_cjs_shims();
|
|
1287
|
+
init_json_extract();
|
|
1288
|
+
}
|
|
1289
|
+
});
|
|
1290
|
+
|
|
1291
|
+
// src/assertions/llm-judge.ts
|
|
1292
|
+
function prompt(criteria, output) {
|
|
1293
|
+
return `You are a strict evaluator. Score from 0 to 1 how well the OUTPUT meets the CRITERIA. Respond ONLY with JSON: {"score": <0..1>, "reason": "<short explanation>"}.
|
|
1294
|
+
|
|
1295
|
+
CRITERIA: ${criteria}
|
|
1296
|
+
|
|
1297
|
+
OUTPUT:
|
|
1298
|
+
${output}`;
|
|
1299
|
+
}
|
|
1300
|
+
var DEFAULT_PASS_THRESHOLD, llmJudge;
|
|
1301
|
+
var init_llm_judge = __esm({
|
|
1302
|
+
"src/assertions/llm-judge.ts"() {
|
|
1303
|
+
"use strict";
|
|
1304
|
+
init_cjs_shims();
|
|
1305
|
+
init_interpolate();
|
|
1306
|
+
init_judge_helpers();
|
|
1307
|
+
DEFAULT_PASS_THRESHOLD = 0.5;
|
|
1308
|
+
llmJudge = {
|
|
1309
|
+
async run(ctx) {
|
|
1310
|
+
if (ctx.judge === void 0) {
|
|
1311
|
+
throw new Error("llm-judge requires a judge in the AssertionContext");
|
|
1312
|
+
}
|
|
1313
|
+
const criteria = interpolate(ctx.params.criteria, ctx);
|
|
1314
|
+
const threshold = ctx.params.pass_threshold ?? DEFAULT_PASS_THRESHOLD;
|
|
1315
|
+
const { score, reason } = parseScored(await askJson(ctx.judge, prompt(criteria, ctx.output)));
|
|
1316
|
+
const passed = score >= threshold;
|
|
1317
|
+
return {
|
|
1318
|
+
passed,
|
|
1319
|
+
score,
|
|
1320
|
+
reason: reason ?? `score ${score.toFixed(3)} vs threshold ${threshold}`
|
|
1321
|
+
};
|
|
1322
|
+
}
|
|
1323
|
+
};
|
|
1324
|
+
}
|
|
1325
|
+
});
|
|
1326
|
+
|
|
1327
|
+
// src/assertions/faithfulness.ts
|
|
1328
|
+
function prompt2(context, output) {
|
|
1329
|
+
return `Extract the atomic factual claims from the OUTPUT and judge whether each is supported by the CONTEXT. Respond ONLY with JSON: {"statements":[{"claim":"<text>","faithful":<true|false>}]}.
|
|
1330
|
+
|
|
1331
|
+
CONTEXT:
|
|
1332
|
+
${context}
|
|
1333
|
+
|
|
1334
|
+
OUTPUT:
|
|
1335
|
+
${output}`;
|
|
1336
|
+
}
|
|
1337
|
+
var DEFAULT_THRESHOLD2, faithfulness;
|
|
1338
|
+
var init_faithfulness = __esm({
|
|
1339
|
+
"src/assertions/faithfulness.ts"() {
|
|
1340
|
+
"use strict";
|
|
1341
|
+
init_cjs_shims();
|
|
1342
|
+
init_interpolate();
|
|
1343
|
+
init_judge_helpers();
|
|
1344
|
+
DEFAULT_THRESHOLD2 = 0.7;
|
|
1345
|
+
faithfulness = {
|
|
1346
|
+
async run(ctx) {
|
|
1347
|
+
if (ctx.judge === void 0) {
|
|
1348
|
+
throw new Error("faithfulness requires a judge in the AssertionContext");
|
|
1349
|
+
}
|
|
1350
|
+
const context = interpolate(ctx.params.context, ctx);
|
|
1351
|
+
const threshold = ctx.params.threshold ?? DEFAULT_THRESHOLD2;
|
|
1352
|
+
const flags = parseStatements(await askJson(ctx.judge, prompt2(context, ctx.output)));
|
|
1353
|
+
const score = flags.length === 0 ? 1 : flags.filter((f) => f).length / flags.length;
|
|
1354
|
+
const passed = score >= threshold;
|
|
1355
|
+
return passed ? { passed: true, score } : { passed: false, score, reason: `faithfulness ${score.toFixed(3)} < threshold ${threshold}` };
|
|
1356
|
+
}
|
|
1357
|
+
};
|
|
1358
|
+
}
|
|
1359
|
+
});
|
|
1360
|
+
|
|
1361
|
+
// src/assertions/answer-relevance.ts
|
|
1362
|
+
function prompt3(output) {
|
|
1363
|
+
return `Generate the questions that the OUTPUT directly and fully answers. Respond ONLY with JSON: {"questions": ["<question>", ...]}.
|
|
1364
|
+
|
|
1365
|
+
OUTPUT:
|
|
1366
|
+
${output}`;
|
|
1367
|
+
}
|
|
1368
|
+
var DEFAULT_THRESHOLD3, answerRelevance;
|
|
1369
|
+
var init_answer_relevance = __esm({
|
|
1370
|
+
"src/assertions/answer-relevance.ts"() {
|
|
1371
|
+
"use strict";
|
|
1372
|
+
init_cjs_shims();
|
|
1373
|
+
init_interpolate();
|
|
1374
|
+
init_judge_helpers();
|
|
1375
|
+
init_cosine();
|
|
1376
|
+
DEFAULT_THRESHOLD3 = 0.7;
|
|
1377
|
+
answerRelevance = {
|
|
1378
|
+
async run(ctx) {
|
|
1379
|
+
if (ctx.judge === void 0) {
|
|
1380
|
+
throw new Error("answer-relevance requires a judge in the AssertionContext");
|
|
1381
|
+
}
|
|
1382
|
+
if (ctx.embedder === void 0) {
|
|
1383
|
+
throw new Error("answer-relevance requires an embedder in the AssertionContext");
|
|
1384
|
+
}
|
|
1385
|
+
const question = interpolate(ctx.params.question, ctx);
|
|
1386
|
+
const threshold = ctx.params.threshold ?? DEFAULT_THRESHOLD3;
|
|
1387
|
+
const questions = parseQuestions(await askJson(ctx.judge, prompt3(ctx.output)));
|
|
1388
|
+
if (questions.length === 0) {
|
|
1389
|
+
return { passed: false, score: 0, reason: "judge generated no questions from the output" };
|
|
1390
|
+
}
|
|
1391
|
+
const vecs = await ctx.embedder.embed([question, ...questions]);
|
|
1392
|
+
if (vecs.length < questions.length + 1) {
|
|
1393
|
+
throw new Error(
|
|
1394
|
+
`answer-relevance: embedder returned ${vecs.length} vectors, expected ${questions.length + 1}`
|
|
1395
|
+
);
|
|
1396
|
+
}
|
|
1397
|
+
let sum = 0;
|
|
1398
|
+
for (let i = 0; i < questions.length; i++) {
|
|
1399
|
+
sum += cosine(vecs[0], vecs[i + 1]);
|
|
1400
|
+
}
|
|
1401
|
+
const score = Math.max(0, sum / questions.length);
|
|
1402
|
+
const passed = score >= threshold;
|
|
1403
|
+
return passed ? { passed: true, score } : { passed: false, score, reason: `answer-relevance ${score.toFixed(3)} < threshold ${threshold}` };
|
|
1404
|
+
}
|
|
1405
|
+
};
|
|
1406
|
+
}
|
|
1407
|
+
});
|
|
1408
|
+
|
|
1409
|
+
// src/assertions/context-precision.ts
|
|
1410
|
+
function prompt4(context, question) {
|
|
1411
|
+
return `Judge from 0 to 1 how well the CONTEXT provides the information needed to answer the QUESTION. Respond ONLY with JSON: {"score": <0..1>, "reason": "<short explanation>"}.
|
|
1412
|
+
|
|
1413
|
+
QUESTION: ${question}
|
|
1414
|
+
|
|
1415
|
+
CONTEXT:
|
|
1416
|
+
${context}`;
|
|
1417
|
+
}
|
|
1418
|
+
var DEFAULT_THRESHOLD4, contextPrecision;
|
|
1419
|
+
var init_context_precision = __esm({
|
|
1420
|
+
"src/assertions/context-precision.ts"() {
|
|
1421
|
+
"use strict";
|
|
1422
|
+
init_cjs_shims();
|
|
1423
|
+
init_interpolate();
|
|
1424
|
+
init_judge_helpers();
|
|
1425
|
+
DEFAULT_THRESHOLD4 = 0.7;
|
|
1426
|
+
contextPrecision = {
|
|
1427
|
+
async run(ctx) {
|
|
1428
|
+
if (ctx.judge === void 0) {
|
|
1429
|
+
throw new Error("context-precision requires a judge in the AssertionContext");
|
|
1430
|
+
}
|
|
1431
|
+
const context = interpolate(ctx.params.context, ctx);
|
|
1432
|
+
const question = interpolate(ctx.params.question, ctx);
|
|
1433
|
+
const threshold = ctx.params.threshold ?? DEFAULT_THRESHOLD4;
|
|
1434
|
+
const { score, reason } = parseScored(await askJson(ctx.judge, prompt4(context, question)));
|
|
1435
|
+
const passed = score >= threshold;
|
|
1436
|
+
return {
|
|
1437
|
+
passed,
|
|
1438
|
+
score,
|
|
1439
|
+
reason: reason ?? `context-precision ${score.toFixed(3)} vs threshold ${threshold}`
|
|
1440
|
+
};
|
|
1441
|
+
}
|
|
1442
|
+
};
|
|
1443
|
+
}
|
|
1444
|
+
});
|
|
1445
|
+
|
|
1446
|
+
// src/assertions/registry.ts
|
|
1447
|
+
function createAssertionRegistry() {
|
|
1448
|
+
const impls = /* @__PURE__ */ new Map();
|
|
1449
|
+
return {
|
|
1450
|
+
register(type, impl) {
|
|
1451
|
+
if (impls.has(type)) {
|
|
1452
|
+
throw new Error(`Assertion "${type}" is already registered \u2014 assertion types must be unique.`);
|
|
1453
|
+
}
|
|
1454
|
+
impls.set(type, impl);
|
|
1455
|
+
},
|
|
1456
|
+
resolve(type) {
|
|
1457
|
+
const impl = impls.get(type);
|
|
1458
|
+
if (impl === void 0) {
|
|
1459
|
+
const known = [...impls.keys()].join(", ") || "(none)";
|
|
1460
|
+
throw new Error(`Unknown assertion type "${type}". Registered: ${known}.`);
|
|
1461
|
+
}
|
|
1462
|
+
return impl;
|
|
1463
|
+
},
|
|
1464
|
+
has(type) {
|
|
1465
|
+
return impls.has(type);
|
|
1466
|
+
}
|
|
1467
|
+
};
|
|
1468
|
+
}
|
|
1469
|
+
function createDefaultRegistry2() {
|
|
1470
|
+
const registry = createAssertionRegistry();
|
|
1471
|
+
registry.register("exact-match", exactMatch);
|
|
1472
|
+
registry.register("contains", contains);
|
|
1473
|
+
registry.register("contains-any", containsAny);
|
|
1474
|
+
registry.register("contains-all", containsAll);
|
|
1475
|
+
registry.register("json-schema", jsonSchema);
|
|
1476
|
+
registry.register("semantic-similarity", semanticSimilarity);
|
|
1477
|
+
registry.register("llm-judge", llmJudge);
|
|
1478
|
+
registry.register("faithfulness", faithfulness);
|
|
1479
|
+
registry.register("answer-relevance", answerRelevance);
|
|
1480
|
+
registry.register("context-precision", contextPrecision);
|
|
1481
|
+
return registry;
|
|
1482
|
+
}
|
|
1483
|
+
function getAssertion(type) {
|
|
1484
|
+
defaultRegistry ??= createDefaultRegistry2();
|
|
1485
|
+
return defaultRegistry.resolve(type);
|
|
1486
|
+
}
|
|
1487
|
+
var defaultRegistry;
|
|
1488
|
+
var init_registry2 = __esm({
|
|
1489
|
+
"src/assertions/registry.ts"() {
|
|
1490
|
+
"use strict";
|
|
1491
|
+
init_cjs_shims();
|
|
1492
|
+
init_exact_match();
|
|
1493
|
+
init_contains();
|
|
1494
|
+
init_json_schema();
|
|
1495
|
+
init_semantic_similarity();
|
|
1496
|
+
init_llm_judge();
|
|
1497
|
+
init_faithfulness();
|
|
1498
|
+
init_answer_relevance();
|
|
1499
|
+
init_context_precision();
|
|
1500
|
+
}
|
|
1501
|
+
});
|
|
1502
|
+
|
|
1503
|
+
// src/assertions/index.ts
|
|
1504
|
+
var init_assertions = __esm({
|
|
1505
|
+
"src/assertions/index.ts"() {
|
|
1506
|
+
"use strict";
|
|
1507
|
+
init_cjs_shims();
|
|
1508
|
+
init_registry2();
|
|
1509
|
+
}
|
|
1510
|
+
});
|
|
1511
|
+
|
|
1512
|
+
// src/orchestrator/judge.ts
|
|
1513
|
+
function buildJudge(provider, cfg, onUsage) {
|
|
1514
|
+
return {
|
|
1515
|
+
async ask(prompt5) {
|
|
1516
|
+
const res = await provider.complete({
|
|
1517
|
+
provider: cfg.type,
|
|
1518
|
+
model: cfg.model,
|
|
1519
|
+
temperature: 0,
|
|
1520
|
+
max_tokens: cfg.max_tokens ?? DEFAULT_MAX_TOKENS,
|
|
1521
|
+
prompt_resolved: prompt5
|
|
1522
|
+
});
|
|
1523
|
+
onUsage(res.usage);
|
|
1524
|
+
return res.output;
|
|
1525
|
+
}
|
|
1526
|
+
};
|
|
1527
|
+
}
|
|
1528
|
+
var DEFAULT_MAX_TOKENS;
|
|
1529
|
+
var init_judge = __esm({
|
|
1530
|
+
"src/orchestrator/judge.ts"() {
|
|
1531
|
+
"use strict";
|
|
1532
|
+
init_cjs_shims();
|
|
1533
|
+
DEFAULT_MAX_TOKENS = 1024;
|
|
1534
|
+
}
|
|
1535
|
+
});
|
|
1536
|
+
|
|
1537
|
+
// src/orchestrator/concurrency.ts
|
|
1538
|
+
async function mapLimit(items, limit, fn) {
|
|
1539
|
+
const results = new Array(items.length);
|
|
1540
|
+
let next = 0;
|
|
1541
|
+
const worker = async () => {
|
|
1542
|
+
while (next < items.length) {
|
|
1543
|
+
const i = next;
|
|
1544
|
+
next += 1;
|
|
1545
|
+
results[i] = await fn(items[i], i);
|
|
1546
|
+
}
|
|
1547
|
+
};
|
|
1548
|
+
const safeLimit = Number.isFinite(limit) && limit >= 1 ? Math.floor(limit) : 1;
|
|
1549
|
+
const workerCount = Math.min(safeLimit, items.length);
|
|
1550
|
+
await Promise.all(Array.from({ length: workerCount }, () => worker()));
|
|
1551
|
+
return results;
|
|
1552
|
+
}
|
|
1553
|
+
var init_concurrency = __esm({
|
|
1554
|
+
"src/orchestrator/concurrency.ts"() {
|
|
1555
|
+
"use strict";
|
|
1556
|
+
init_cjs_shims();
|
|
1557
|
+
}
|
|
1558
|
+
});
|
|
1559
|
+
|
|
1560
|
+
// src/orchestrator/run.ts
|
|
1561
|
+
function resolvePrompt(template, vars) {
|
|
1562
|
+
return template.replace(/\{\{\s*(\w+)\s*\}\}/g, (_match, key) => {
|
|
1563
|
+
if (!(key in vars)) {
|
|
1564
|
+
throw new RunConfigError(`Unknown variable "${key}" referenced in prompt`);
|
|
1565
|
+
}
|
|
1566
|
+
return String(vars[key]);
|
|
1567
|
+
});
|
|
1568
|
+
}
|
|
1569
|
+
function toRunError(err) {
|
|
1570
|
+
if (err instanceof ProviderError || err instanceof AuthError) {
|
|
1571
|
+
return { code: err.code, message: err.message };
|
|
1572
|
+
}
|
|
1573
|
+
return { code: "ERROR", message: err instanceof Error ? err.message : String(err) };
|
|
1574
|
+
}
|
|
1575
|
+
function estimateUsage(prompt5, maxTokens) {
|
|
1576
|
+
return { input_tokens: Math.ceil(prompt5.length / 4), output_tokens: maxTokens };
|
|
1577
|
+
}
|
|
1578
|
+
async function runRow(p) {
|
|
1579
|
+
const t0 = p.now();
|
|
1580
|
+
const prompt5 = resolvePrompt(p.template, p.row.vars);
|
|
1581
|
+
const temperature = p.providerConfig.temperature ?? 0;
|
|
1582
|
+
const maxTokens = p.providerConfig.max_tokens ?? DEFAULT_MAX_TOKENS2;
|
|
1583
|
+
if (p.dryRun) {
|
|
1584
|
+
const est = estimateUsage(prompt5, maxTokens);
|
|
1585
|
+
const usage2 = { ...est, cost_usd: p.provider.estimateCost(est).cost_usd };
|
|
1586
|
+
return { vars: p.row.vars, output: null, cached: false, usage: usage2, assertions: [] };
|
|
1587
|
+
}
|
|
1588
|
+
const key = cacheKey({
|
|
1589
|
+
provider: p.providerConfig.type,
|
|
1590
|
+
model: p.providerConfig.model,
|
|
1591
|
+
temperature,
|
|
1592
|
+
max_tokens: maxTokens,
|
|
1593
|
+
prompt_resolved: prompt5
|
|
1594
|
+
});
|
|
1595
|
+
let output;
|
|
1596
|
+
let usage;
|
|
1597
|
+
let cached = false;
|
|
1598
|
+
const hit = p.noCache ? void 0 : p.cache.get(key);
|
|
1599
|
+
if (hit !== void 0) {
|
|
1600
|
+
output = hit.output;
|
|
1601
|
+
cached = true;
|
|
1602
|
+
usage = {
|
|
1603
|
+
input_tokens: hit.usage.input_tokens,
|
|
1604
|
+
output_tokens: hit.usage.output_tokens,
|
|
1605
|
+
cost_usd: 0
|
|
1606
|
+
};
|
|
1607
|
+
} else {
|
|
1608
|
+
try {
|
|
1609
|
+
const res = await p.provider.complete({
|
|
1610
|
+
provider: p.providerConfig.type,
|
|
1611
|
+
model: p.providerConfig.model,
|
|
1612
|
+
temperature,
|
|
1613
|
+
max_tokens: maxTokens,
|
|
1614
|
+
prompt_resolved: prompt5
|
|
1615
|
+
});
|
|
1616
|
+
output = res.output;
|
|
1617
|
+
usage = {
|
|
1618
|
+
input_tokens: res.usage.input_tokens,
|
|
1619
|
+
output_tokens: res.usage.output_tokens,
|
|
1620
|
+
// Prefer the provider-reported actual cost (res.cost_usd) when present; resolveCost still
|
|
1621
|
+
// lets a config pricing override win, else falls back to the table estimate (ADR-PRC01).
|
|
1622
|
+
cost_usd: p.provider.estimateCost(res.usage, res.cost_usd).cost_usd
|
|
1623
|
+
};
|
|
1624
|
+
if (!p.noCache) p.cache.set(key, res);
|
|
1625
|
+
} catch (err) {
|
|
1626
|
+
return {
|
|
1627
|
+
vars: p.row.vars,
|
|
1628
|
+
output: null,
|
|
1629
|
+
cached: false,
|
|
1630
|
+
latency_ms: p.now() - t0,
|
|
1631
|
+
error: toRunError(err),
|
|
1632
|
+
assertions: []
|
|
1633
|
+
};
|
|
1634
|
+
}
|
|
1635
|
+
}
|
|
1636
|
+
let judgeCost = 0;
|
|
1637
|
+
const judge = buildJudge(p.provider, p.providerConfig, (u) => {
|
|
1638
|
+
judgeCost += p.provider.estimateCost(u).cost_usd;
|
|
1639
|
+
});
|
|
1640
|
+
const records = [];
|
|
1641
|
+
let assertionError;
|
|
1642
|
+
for (const assertion of p.assertions) {
|
|
1643
|
+
try {
|
|
1644
|
+
const result = await getAssertion(assertion.type).run({
|
|
1645
|
+
output,
|
|
1646
|
+
vars: p.row.vars,
|
|
1647
|
+
row: p.row,
|
|
1648
|
+
params: assertion,
|
|
1649
|
+
embedder: p.embedder,
|
|
1650
|
+
judge
|
|
1651
|
+
});
|
|
1652
|
+
records.push({
|
|
1653
|
+
type: assertion.type,
|
|
1654
|
+
passed: result.passed,
|
|
1655
|
+
...result.score !== void 0 ? { score: result.score } : {},
|
|
1656
|
+
...result.reason !== void 0 ? { reason: result.reason } : {}
|
|
1657
|
+
});
|
|
1658
|
+
} catch (err) {
|
|
1659
|
+
assertionError = toRunError(err);
|
|
1660
|
+
break;
|
|
1661
|
+
}
|
|
1662
|
+
}
|
|
1663
|
+
const finalUsage = { ...usage, cost_usd: usage.cost_usd + judgeCost };
|
|
1664
|
+
return {
|
|
1665
|
+
vars: p.row.vars,
|
|
1666
|
+
output,
|
|
1667
|
+
cached,
|
|
1668
|
+
usage: finalUsage,
|
|
1669
|
+
latency_ms: p.now() - t0,
|
|
1670
|
+
...assertionError !== void 0 ? { error: assertionError } : {},
|
|
1671
|
+
assertions: records
|
|
1672
|
+
};
|
|
1673
|
+
}
|
|
1674
|
+
function selectEvals(evals, only) {
|
|
1675
|
+
if (only === void 0 || only.length === 0) return evals;
|
|
1676
|
+
return evals.filter(
|
|
1677
|
+
(ev) => only.some(
|
|
1678
|
+
(sel) => sel.startsWith("tag:") ? (ev.tags ?? []).includes(sel.slice(4)) : ev.id === sel
|
|
1679
|
+
)
|
|
1680
|
+
);
|
|
1681
|
+
}
|
|
1682
|
+
function resolveTemplate(ev, baseDir) {
|
|
1683
|
+
if (ev.prompt !== void 0) return ev.prompt;
|
|
1684
|
+
if (ev.prompt_file !== void 0) {
|
|
1685
|
+
try {
|
|
1686
|
+
return fs3.readFileSync(path3.resolve(baseDir, ev.prompt_file), "utf8");
|
|
1687
|
+
} catch {
|
|
1688
|
+
throw new RunConfigError(`Eval "${ev.id}": prompt_file not found: ${ev.prompt_file}`);
|
|
1689
|
+
}
|
|
1690
|
+
}
|
|
1691
|
+
throw new RunConfigError(`Eval "${ev.id}" has neither prompt nor prompt_file`);
|
|
1692
|
+
}
|
|
1693
|
+
function classify(row) {
|
|
1694
|
+
if (row.error !== void 0) return "errored";
|
|
1695
|
+
if (row.assertions.some((a) => !a.passed)) return "failed";
|
|
1696
|
+
return "passed";
|
|
1697
|
+
}
|
|
1698
|
+
async function runOrchestration(config, options, deps) {
|
|
1699
|
+
const started = deps.now();
|
|
1700
|
+
const dryRun = options.dryRun ?? false;
|
|
1701
|
+
const noCache = options.noCache ?? false;
|
|
1702
|
+
const evalResults = [];
|
|
1703
|
+
const prepared = selectEvals(config.evals, options.only).map((ev) => {
|
|
1704
|
+
const template = resolveTemplate(ev, deps.baseDir);
|
|
1705
|
+
const rows = deps.loadDataset(ev.dataset, deps.baseDir);
|
|
1706
|
+
for (const row of rows) {
|
|
1707
|
+
resolvePrompt(template, row.vars);
|
|
1708
|
+
}
|
|
1709
|
+
return { ev, template, rows };
|
|
1710
|
+
});
|
|
1711
|
+
for (const { ev, template, rows } of prepared) {
|
|
1712
|
+
const providerConfig = { ...config.provider, ...ev.provider };
|
|
1713
|
+
const provider = deps.resolveProvider(providerConfig);
|
|
1714
|
+
const limit = options.concurrency ?? providerConfig.concurrency ?? 4;
|
|
1715
|
+
const rowResults = await mapLimit(
|
|
1716
|
+
rows,
|
|
1717
|
+
limit,
|
|
1718
|
+
(row) => runRow({
|
|
1719
|
+
template,
|
|
1720
|
+
assertions: ev.assertions,
|
|
1721
|
+
row,
|
|
1722
|
+
providerConfig,
|
|
1723
|
+
provider,
|
|
1724
|
+
embedder: deps.embedder,
|
|
1725
|
+
cache: deps.cache,
|
|
1726
|
+
now: deps.now,
|
|
1727
|
+
dryRun,
|
|
1728
|
+
noCache
|
|
1729
|
+
})
|
|
1730
|
+
);
|
|
1731
|
+
const passed = rowResults.every((r) => classify(r) === "passed");
|
|
1732
|
+
evalResults.push({ id: ev.id, tags: ev.tags ?? [], rows: rowResults, passed });
|
|
1733
|
+
if (options.bail === true && !passed) break;
|
|
1734
|
+
}
|
|
1735
|
+
const finished = deps.now();
|
|
1736
|
+
const summary = {
|
|
1737
|
+
total: 0,
|
|
1738
|
+
passed: 0,
|
|
1739
|
+
failed: 0,
|
|
1740
|
+
errored: 0,
|
|
1741
|
+
cost_usd: 0,
|
|
1742
|
+
duration_ms: finished - started
|
|
1743
|
+
};
|
|
1744
|
+
for (const ev of evalResults) {
|
|
1745
|
+
for (const row of ev.rows) {
|
|
1746
|
+
summary.total += 1;
|
|
1747
|
+
summary[classify(row)] += 1;
|
|
1748
|
+
summary.cost_usd += row.usage?.cost_usd ?? 0;
|
|
1749
|
+
}
|
|
1750
|
+
}
|
|
1751
|
+
return {
|
|
1752
|
+
schema: 1,
|
|
1753
|
+
plune_version: PLUNE_VERSION,
|
|
1754
|
+
started_at: new Date(started).toISOString(),
|
|
1755
|
+
finished_at: new Date(finished).toISOString(),
|
|
1756
|
+
config_hash: configHash(config),
|
|
1757
|
+
summary,
|
|
1758
|
+
evals: evalResults
|
|
1759
|
+
};
|
|
1760
|
+
}
|
|
1761
|
+
var fs3, path3, DEFAULT_MAX_TOKENS2, PLUNE_VERSION, RunConfigError;
|
|
1762
|
+
var init_run = __esm({
|
|
1763
|
+
"src/orchestrator/run.ts"() {
|
|
1764
|
+
"use strict";
|
|
1765
|
+
init_cjs_shims();
|
|
1766
|
+
fs3 = __toESM(require("fs"), 1);
|
|
1767
|
+
path3 = __toESM(require("path"), 1);
|
|
1768
|
+
init_hash();
|
|
1769
|
+
init_assertions();
|
|
1770
|
+
init_judge();
|
|
1771
|
+
init_concurrency();
|
|
1772
|
+
init_errors2();
|
|
1773
|
+
DEFAULT_MAX_TOKENS2 = 1024;
|
|
1774
|
+
PLUNE_VERSION = "0.1.0";
|
|
1775
|
+
RunConfigError = class extends Error {
|
|
1776
|
+
code = "CONFIG_ERROR";
|
|
1777
|
+
constructor(message) {
|
|
1778
|
+
super(message);
|
|
1779
|
+
this.name = "RunConfigError";
|
|
1780
|
+
}
|
|
1781
|
+
};
|
|
1782
|
+
}
|
|
1783
|
+
});
|
|
1784
|
+
|
|
1785
|
+
// src/orchestrator/exit-code.ts
|
|
1786
|
+
function exitCodeFor(result) {
|
|
1787
|
+
const { failed, errored } = result.summary;
|
|
1788
|
+
if (failed > 0) return 1;
|
|
1789
|
+
if (errored > 0) return 2;
|
|
1790
|
+
return 0;
|
|
1791
|
+
}
|
|
1792
|
+
var init_exit_code = __esm({
|
|
1793
|
+
"src/orchestrator/exit-code.ts"() {
|
|
1794
|
+
"use strict";
|
|
1795
|
+
init_cjs_shims();
|
|
1796
|
+
}
|
|
1797
|
+
});
|
|
1798
|
+
|
|
1799
|
+
// src/orchestrator/dataset.ts
|
|
1800
|
+
function loadDataset(ref, baseDir) {
|
|
1801
|
+
if (typeof ref !== "string") {
|
|
1802
|
+
return ref.examples;
|
|
1803
|
+
}
|
|
1804
|
+
const content = fs4.readFileSync(path4.resolve(baseDir, ref), "utf8");
|
|
1805
|
+
return content.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0).map((line) => JSON.parse(line));
|
|
1806
|
+
}
|
|
1807
|
+
var fs4, path4;
|
|
1808
|
+
var init_dataset = __esm({
|
|
1809
|
+
"src/orchestrator/dataset.ts"() {
|
|
1810
|
+
"use strict";
|
|
1811
|
+
init_cjs_shims();
|
|
1812
|
+
fs4 = __toESM(require("fs"), 1);
|
|
1813
|
+
path4 = __toESM(require("path"), 1);
|
|
1814
|
+
}
|
|
1815
|
+
});
|
|
1816
|
+
|
|
1817
|
+
// src/orchestrator/index.ts
|
|
1818
|
+
var orchestrator_exports = {};
|
|
1819
|
+
__export(orchestrator_exports, {
|
|
1820
|
+
RunConfigError: () => RunConfigError,
|
|
1821
|
+
buildJudge: () => buildJudge,
|
|
1822
|
+
exitCodeFor: () => exitCodeFor,
|
|
1823
|
+
loadDataset: () => loadDataset,
|
|
1824
|
+
mapLimit: () => mapLimit,
|
|
1825
|
+
resolvePrompt: () => resolvePrompt,
|
|
1826
|
+
runOrchestration: () => runOrchestration,
|
|
1827
|
+
runRow: () => runRow
|
|
1828
|
+
});
|
|
1829
|
+
var init_orchestrator = __esm({
|
|
1830
|
+
"src/orchestrator/index.ts"() {
|
|
1831
|
+
"use strict";
|
|
1832
|
+
init_cjs_shims();
|
|
1833
|
+
init_run();
|
|
1834
|
+
init_exit_code();
|
|
1835
|
+
init_dataset();
|
|
1836
|
+
init_concurrency();
|
|
1837
|
+
init_judge();
|
|
1838
|
+
}
|
|
1839
|
+
});
|
|
1840
|
+
|
|
1841
|
+
// src/cli/commands/run.ts
|
|
1842
|
+
var run_exports = {};
|
|
1843
|
+
__export(run_exports, {
|
|
1844
|
+
handleRun: () => handleRun,
|
|
1845
|
+
isMockMode: () => isMockMode
|
|
1846
|
+
});
|
|
1847
|
+
function isMockMode(env) {
|
|
1848
|
+
return env["PLUNE_MOCK_PROVIDER"] === "1";
|
|
1849
|
+
}
|
|
1850
|
+
function buildRealDeps(config, baseDir, dryRun) {
|
|
1851
|
+
const dir = path5.join(baseDir, ".plune");
|
|
1852
|
+
fs5.mkdirSync(dir, { recursive: true });
|
|
1853
|
+
return {
|
|
1854
|
+
resolveProvider: isMockMode(process.env) ? () => makeMockProvider() : (cfg) => getProvider(cfg, process.env, config.pricing),
|
|
1855
|
+
embedder: getDefaultEmbedder(),
|
|
1856
|
+
cache: dryRun ? NOOP_CACHE : openCache(path5.join(dir, "cache.db")),
|
|
1857
|
+
now: Date.now,
|
|
1858
|
+
loadDataset,
|
|
1859
|
+
baseDir
|
|
1860
|
+
};
|
|
1861
|
+
}
|
|
1862
|
+
function persist(result, baseDir) {
|
|
1863
|
+
const dir = path5.join(baseDir, ".plune");
|
|
1864
|
+
fs5.mkdirSync(dir, { recursive: true });
|
|
1865
|
+
fs5.writeFileSync(path5.join(dir, "last-run.json"), JSON.stringify(result, null, 2));
|
|
1866
|
+
}
|
|
1867
|
+
async function handleRun(options, depsFactory) {
|
|
1868
|
+
const config = await loadConfig(
|
|
1869
|
+
options.configPath !== void 0 ? { configPath: options.configPath } : {}
|
|
1870
|
+
);
|
|
1871
|
+
const baseDir = options.configPath !== void 0 ? path5.dirname(path5.resolve(options.configPath)) : process.cwd();
|
|
1872
|
+
const factory = depsFactory ?? ((c, d) => buildRealDeps(c, d, options.dryRun));
|
|
1873
|
+
const deps = factory(config, baseDir);
|
|
1874
|
+
try {
|
|
1875
|
+
const result = await runOrchestration(
|
|
1876
|
+
config,
|
|
1877
|
+
{
|
|
1878
|
+
dryRun: options.dryRun,
|
|
1879
|
+
...options.only !== void 0 ? { only: options.only } : {},
|
|
1880
|
+
...options.concurrency !== void 0 ? { concurrency: options.concurrency } : {},
|
|
1881
|
+
...options.noCache !== void 0 ? { noCache: options.noCache } : {},
|
|
1882
|
+
...options.bail !== void 0 ? { bail: options.bail } : {}
|
|
1883
|
+
},
|
|
1884
|
+
deps
|
|
1885
|
+
);
|
|
1886
|
+
persist(result, baseDir);
|
|
1887
|
+
return result;
|
|
1888
|
+
} finally {
|
|
1889
|
+
deps.cache.close();
|
|
1890
|
+
}
|
|
1891
|
+
}
|
|
1892
|
+
var fs5, path5, NOOP_CACHE;
|
|
1893
|
+
var init_run2 = __esm({
|
|
1894
|
+
"src/cli/commands/run.ts"() {
|
|
1895
|
+
"use strict";
|
|
1896
|
+
init_cjs_shims();
|
|
1897
|
+
fs5 = __toESM(require("fs"), 1);
|
|
1898
|
+
path5 = __toESM(require("path"), 1);
|
|
1899
|
+
init_loader();
|
|
1900
|
+
init_providers();
|
|
1901
|
+
init_mock();
|
|
1902
|
+
init_embeddings();
|
|
1903
|
+
init_cache2();
|
|
1904
|
+
init_orchestrator();
|
|
1905
|
+
NOOP_CACHE = { get: () => void 0, set: () => {
|
|
1906
|
+
}, clear: () => {
|
|
1907
|
+
}, close: () => {
|
|
1908
|
+
} };
|
|
1909
|
+
}
|
|
1910
|
+
});
|
|
1911
|
+
|
|
1912
|
+
// src/reporters/style.ts
|
|
1913
|
+
function wrap(code, s) {
|
|
1914
|
+
return `\x1B[${code}m${s}\x1B[0m`;
|
|
1915
|
+
}
|
|
1916
|
+
function createStyler(color) {
|
|
1917
|
+
if (!color) {
|
|
1918
|
+
const id = (s) => s;
|
|
1919
|
+
return { green: id, red: id, yellow: id, dim: id, bold: id };
|
|
1920
|
+
}
|
|
1921
|
+
return {
|
|
1922
|
+
green: (s) => wrap(CODE.green, s),
|
|
1923
|
+
red: (s) => wrap(CODE.red, s),
|
|
1924
|
+
yellow: (s) => wrap(CODE.yellow, s),
|
|
1925
|
+
dim: (s) => wrap(CODE.dim, s),
|
|
1926
|
+
bold: (s) => wrap(CODE.bold, s)
|
|
1927
|
+
};
|
|
1928
|
+
}
|
|
1929
|
+
var CODE;
|
|
1930
|
+
var init_style = __esm({
|
|
1931
|
+
"src/reporters/style.ts"() {
|
|
1932
|
+
"use strict";
|
|
1933
|
+
init_cjs_shims();
|
|
1934
|
+
CODE = { green: 32, red: 31, yellow: 33, dim: 2, bold: 1 };
|
|
1935
|
+
}
|
|
1936
|
+
});
|
|
1937
|
+
|
|
1938
|
+
// src/reporters/helpers.ts
|
|
1939
|
+
function truncate(s, maxChars) {
|
|
1940
|
+
if (s.length <= maxChars) return s;
|
|
1941
|
+
return s.slice(0, maxChars) + "\u2026 (truncated)";
|
|
1942
|
+
}
|
|
1943
|
+
var init_helpers = __esm({
|
|
1944
|
+
"src/reporters/helpers.ts"() {
|
|
1945
|
+
"use strict";
|
|
1946
|
+
init_cjs_shims();
|
|
1947
|
+
}
|
|
1948
|
+
});
|
|
1949
|
+
|
|
1950
|
+
// src/reporters/console.ts
|
|
1951
|
+
function renderConsole(result, opts = {}) {
|
|
1952
|
+
const s = createStyler(opts.color ?? false);
|
|
1953
|
+
const maxOut = opts.maxOutputChars ?? DEFAULT_MAX_OUTPUT;
|
|
1954
|
+
const { summary } = result;
|
|
1955
|
+
const lines = [];
|
|
1956
|
+
lines.push(s.bold("Plune run"));
|
|
1957
|
+
const passedTxt = s.green(`${summary.passed} passed`);
|
|
1958
|
+
const failedTxt = summary.failed > 0 ? s.red(`${summary.failed} failed`) : `${summary.failed} failed`;
|
|
1959
|
+
const erroredTxt = summary.errored > 0 ? s.yellow(`${summary.errored} errored`) : `${summary.errored} errored`;
|
|
1960
|
+
lines.push(
|
|
1961
|
+
`${passedTxt} \xB7 ${failedTxt} \xB7 ${erroredTxt} \xB7 ${summary.total} total \xB7 $${summary.cost_usd.toFixed(4)} \xB7 ${summary.duration_ms}ms`
|
|
1962
|
+
);
|
|
1963
|
+
lines.push("");
|
|
1964
|
+
for (const ev of result.evals) {
|
|
1965
|
+
const mark = ev.passed ? s.green("PASS") : s.red("FAIL");
|
|
1966
|
+
const tags = ev.tags.length > 0 ? s.dim(` (${ev.tags.join(", ")})`) : "";
|
|
1967
|
+
lines.push(`${mark} ${s.bold(ev.id)}${tags}`);
|
|
1968
|
+
let passedCount = 0;
|
|
1969
|
+
for (const row of ev.rows) {
|
|
1970
|
+
if (row.error !== void 0) {
|
|
1971
|
+
lines.push(` ${s.dim("vars:")} ${JSON.stringify(row.vars)}`);
|
|
1972
|
+
lines.push(` ${s.red("error:")} ${row.error.message}`);
|
|
1973
|
+
continue;
|
|
1974
|
+
}
|
|
1975
|
+
const failed = row.assertions.filter((a) => !a.passed);
|
|
1976
|
+
if (failed.length === 0) {
|
|
1977
|
+
passedCount += 1;
|
|
1978
|
+
continue;
|
|
1979
|
+
}
|
|
1980
|
+
lines.push(` ${s.dim("vars:")} ${JSON.stringify(row.vars)}`);
|
|
1981
|
+
if (row.output !== null) {
|
|
1982
|
+
lines.push(` ${s.dim("output:")} ${truncate(row.output, maxOut)}`);
|
|
1983
|
+
}
|
|
1984
|
+
for (const a of failed) {
|
|
1985
|
+
const scoreTxt = a.score !== void 0 ? s.dim(` (score ${a.score})`) : "";
|
|
1986
|
+
const reasonTxt = a.reason !== void 0 ? `: ${a.reason}` : "";
|
|
1987
|
+
lines.push(` ${s.red("x")} ${a.type}${reasonTxt}${scoreTxt}`);
|
|
1988
|
+
}
|
|
1989
|
+
}
|
|
1990
|
+
if (passedCount > 0) {
|
|
1991
|
+
lines.push(s.dim(` ${passedCount} row(s) passed`));
|
|
1992
|
+
}
|
|
1993
|
+
}
|
|
1994
|
+
return lines.join("\n");
|
|
1995
|
+
}
|
|
1996
|
+
var DEFAULT_MAX_OUTPUT;
|
|
1997
|
+
var init_console = __esm({
|
|
1998
|
+
"src/reporters/console.ts"() {
|
|
1999
|
+
"use strict";
|
|
2000
|
+
init_cjs_shims();
|
|
2001
|
+
init_style();
|
|
2002
|
+
init_helpers();
|
|
2003
|
+
DEFAULT_MAX_OUTPUT = 500;
|
|
2004
|
+
}
|
|
2005
|
+
});
|
|
2006
|
+
|
|
2007
|
+
// src/reporters/json.ts
|
|
2008
|
+
function renderJson(result) {
|
|
2009
|
+
return JSON.stringify(result, null, 2);
|
|
2010
|
+
}
|
|
2011
|
+
var init_json = __esm({
|
|
2012
|
+
"src/reporters/json.ts"() {
|
|
2013
|
+
"use strict";
|
|
2014
|
+
init_cjs_shims();
|
|
2015
|
+
}
|
|
2016
|
+
});
|
|
2017
|
+
|
|
2018
|
+
// src/reporters/markdown.ts
|
|
2019
|
+
function classify2(row) {
|
|
2020
|
+
if (row.error !== void 0) return "errored";
|
|
2021
|
+
if (row.assertions.some((a) => !a.passed)) return "failed";
|
|
2022
|
+
return "passed";
|
|
2023
|
+
}
|
|
2024
|
+
function fence(content) {
|
|
2025
|
+
const runs = content.match(new RegExp(TICK + "+", "g"));
|
|
2026
|
+
const longest = runs ? runs.reduce((m, r) => Math.max(m, r.length), 0) : 0;
|
|
2027
|
+
const bar = TICK.repeat(Math.max(3, longest + 1));
|
|
2028
|
+
return `${bar}
|
|
2029
|
+
${content}
|
|
2030
|
+
${bar}`;
|
|
2031
|
+
}
|
|
2032
|
+
function renderMarkdown(result, opts = {}) {
|
|
2033
|
+
const maxOut = opts.maxOutputChars ?? DEFAULT_MAX_OUTPUT2;
|
|
2034
|
+
const { summary } = result;
|
|
2035
|
+
const out = [];
|
|
2036
|
+
out.push("# Plune run");
|
|
2037
|
+
out.push("");
|
|
2038
|
+
out.push("| Metric | Value |");
|
|
2039
|
+
out.push("| --- | --- |");
|
|
2040
|
+
out.push(`| Total | ${summary.total} |`);
|
|
2041
|
+
out.push(`| Passed | ${summary.passed} |`);
|
|
2042
|
+
out.push(`| Failed | ${summary.failed} |`);
|
|
2043
|
+
out.push(`| Errored | ${summary.errored} |`);
|
|
2044
|
+
out.push(`| Cost (USD) | ${summary.cost_usd.toFixed(4)} |`);
|
|
2045
|
+
out.push(`| Duration (ms) | ${summary.duration_ms} |`);
|
|
2046
|
+
out.push("");
|
|
2047
|
+
const failures = [];
|
|
2048
|
+
for (const ev of result.evals) {
|
|
2049
|
+
for (const row of ev.rows) {
|
|
2050
|
+
const state = classify2(row);
|
|
2051
|
+
if (state !== "passed") failures.push({ id: ev.id, row, state });
|
|
2052
|
+
}
|
|
2053
|
+
}
|
|
2054
|
+
if (failures.length === 0) {
|
|
2055
|
+
out.push("All evals passed.");
|
|
2056
|
+
return out.join("\n");
|
|
2057
|
+
}
|
|
2058
|
+
out.push("## Failures");
|
|
2059
|
+
out.push("");
|
|
2060
|
+
for (const f of failures) {
|
|
2061
|
+
out.push(`### ${f.id} \u2014 ${f.state}`);
|
|
2062
|
+
out.push(`- vars: ${JSON.stringify(f.row.vars)}`);
|
|
2063
|
+
if (f.row.error !== void 0) {
|
|
2064
|
+
out.push(`- error: ${f.row.error.message}`);
|
|
2065
|
+
} else {
|
|
2066
|
+
for (const a of f.row.assertions) {
|
|
2067
|
+
if (a.passed) continue;
|
|
2068
|
+
const reasonTxt = a.reason !== void 0 ? ` \u2014 ${a.reason}` : "";
|
|
2069
|
+
const scoreTxt = a.score !== void 0 ? ` (score ${a.score})` : "";
|
|
2070
|
+
out.push(`- ${a.type}${reasonTxt}${scoreTxt}`);
|
|
2071
|
+
}
|
|
2072
|
+
if (f.row.output !== null) {
|
|
2073
|
+
out.push("");
|
|
2074
|
+
out.push(fence(truncate(f.row.output, maxOut)));
|
|
2075
|
+
}
|
|
2076
|
+
}
|
|
2077
|
+
out.push("");
|
|
2078
|
+
}
|
|
2079
|
+
return out.join("\n");
|
|
2080
|
+
}
|
|
2081
|
+
var DEFAULT_MAX_OUTPUT2, TICK;
|
|
2082
|
+
var init_markdown = __esm({
|
|
2083
|
+
"src/reporters/markdown.ts"() {
|
|
2084
|
+
"use strict";
|
|
2085
|
+
init_cjs_shims();
|
|
2086
|
+
init_helpers();
|
|
2087
|
+
DEFAULT_MAX_OUTPUT2 = 500;
|
|
2088
|
+
TICK = String.fromCharCode(96);
|
|
2089
|
+
}
|
|
2090
|
+
});
|
|
2091
|
+
|
|
2092
|
+
// src/reporters/index.ts
|
|
2093
|
+
var reporters_exports = {};
|
|
2094
|
+
__export(reporters_exports, {
|
|
2095
|
+
renderConsole: () => renderConsole,
|
|
2096
|
+
renderJson: () => renderJson,
|
|
2097
|
+
renderMarkdown: () => renderMarkdown,
|
|
2098
|
+
renderReport: () => renderReport
|
|
2099
|
+
});
|
|
2100
|
+
function renderReport(result, format, opts = {}) {
|
|
2101
|
+
if (format === "json") return renderJson(result);
|
|
2102
|
+
if (format === "markdown") return renderMarkdown(result, opts);
|
|
2103
|
+
return renderConsole(result, opts);
|
|
2104
|
+
}
|
|
2105
|
+
var init_reporters = __esm({
|
|
2106
|
+
"src/reporters/index.ts"() {
|
|
2107
|
+
"use strict";
|
|
2108
|
+
init_cjs_shims();
|
|
2109
|
+
init_console();
|
|
2110
|
+
init_json();
|
|
2111
|
+
init_markdown();
|
|
2112
|
+
init_console();
|
|
2113
|
+
init_json();
|
|
2114
|
+
init_markdown();
|
|
2115
|
+
}
|
|
2116
|
+
});
|
|
2117
|
+
|
|
2118
|
+
// src/cli/commands/report.ts
|
|
2119
|
+
var report_exports = {};
|
|
2120
|
+
__export(report_exports, {
|
|
2121
|
+
ReportNotFoundError: () => ReportNotFoundError,
|
|
2122
|
+
handleReport: () => handleReport
|
|
2123
|
+
});
|
|
2124
|
+
function isRunResult(v) {
|
|
2125
|
+
if (typeof v !== "object" || v === null) return false;
|
|
2126
|
+
const o = v;
|
|
2127
|
+
return typeof o["summary"] === "object" && o["summary"] !== null && Array.isArray(o["evals"]);
|
|
2128
|
+
}
|
|
2129
|
+
function handleReport(opts = {}) {
|
|
2130
|
+
const cwd = opts.cwd ?? process.cwd();
|
|
2131
|
+
const file = path6.join(cwd, ".plune", "last-run.json");
|
|
2132
|
+
let raw;
|
|
2133
|
+
try {
|
|
2134
|
+
raw = fs6.readFileSync(file, "utf8");
|
|
2135
|
+
} catch {
|
|
2136
|
+
throw new ReportNotFoundError('No saved run found. Run "plune run" first.');
|
|
2137
|
+
}
|
|
2138
|
+
let parsed;
|
|
2139
|
+
try {
|
|
2140
|
+
parsed = JSON.parse(raw);
|
|
2141
|
+
} catch {
|
|
2142
|
+
throw new ReportNotFoundError('Saved run is unreadable. Run "plune run" again.');
|
|
2143
|
+
}
|
|
2144
|
+
if (!isRunResult(parsed)) {
|
|
2145
|
+
throw new ReportNotFoundError('Saved run is malformed. Run "plune run" again.');
|
|
2146
|
+
}
|
|
2147
|
+
return parsed;
|
|
2148
|
+
}
|
|
2149
|
+
var fs6, path6, ReportNotFoundError;
|
|
2150
|
+
var init_report = __esm({
|
|
2151
|
+
"src/cli/commands/report.ts"() {
|
|
2152
|
+
"use strict";
|
|
2153
|
+
init_cjs_shims();
|
|
2154
|
+
fs6 = __toESM(require("fs"), 1);
|
|
2155
|
+
path6 = __toESM(require("path"), 1);
|
|
2156
|
+
ReportNotFoundError = class extends Error {
|
|
2157
|
+
code = "NO_SAVED_RUN";
|
|
2158
|
+
constructor(message) {
|
|
2159
|
+
super(message);
|
|
2160
|
+
this.name = "ReportNotFoundError";
|
|
2161
|
+
}
|
|
2162
|
+
};
|
|
2163
|
+
}
|
|
2164
|
+
});
|
|
2165
|
+
|
|
2166
|
+
// src/diff/diff.ts
|
|
2167
|
+
function rowStatus(r) {
|
|
2168
|
+
if (r.error !== void 0) return "errored";
|
|
2169
|
+
if (r.assertions.some((a) => !a.passed)) return "failed";
|
|
2170
|
+
return "passed";
|
|
2171
|
+
}
|
|
2172
|
+
function evalStatus(e) {
|
|
2173
|
+
const statuses = e.rows.map(rowStatus);
|
|
2174
|
+
if (statuses.includes("failed")) return "failed";
|
|
2175
|
+
if (statuses.includes("errored")) return "errored";
|
|
2176
|
+
return "passed";
|
|
2177
|
+
}
|
|
2178
|
+
function classify3(baseline, current) {
|
|
2179
|
+
if (current === "errored") return "errored";
|
|
2180
|
+
if (baseline === "absent") return current === "passed" ? "new-pass" : "new-fail";
|
|
2181
|
+
if (current === "absent") return "removed";
|
|
2182
|
+
if (baseline === "passed") return current === "passed" ? "stable-pass" : "regression";
|
|
2183
|
+
return current === "passed" ? "improvement" : "pre-existing-fail";
|
|
2184
|
+
}
|
|
2185
|
+
function diffRuns(baseline, current) {
|
|
2186
|
+
const baseMap = new Map(baseline.evals.map((e) => [e.id, evalStatus(e)]));
|
|
2187
|
+
const curMap = new Map(current.evals.map((e) => [e.id, evalStatus(e)]));
|
|
2188
|
+
const ids = [.../* @__PURE__ */ new Set([...baseMap.keys(), ...curMap.keys()])];
|
|
2189
|
+
const evals = ids.map((id) => {
|
|
2190
|
+
const baselineP = baseMap.get(id) ?? "absent";
|
|
2191
|
+
const currentP = curMap.get(id) ?? "absent";
|
|
2192
|
+
return { id, status: classify3(baselineP, currentP), baseline: baselineP, current: currentP };
|
|
2193
|
+
});
|
|
2194
|
+
const count = (s) => evals.filter((e) => e.status === s).length;
|
|
2195
|
+
const summary = {
|
|
2196
|
+
regressions: count("regression"),
|
|
2197
|
+
improvements: count("improvement"),
|
|
2198
|
+
preExistingFails: count("pre-existing-fail"),
|
|
2199
|
+
newFails: count("new-fail"),
|
|
2200
|
+
newPasses: count("new-pass"),
|
|
2201
|
+
stablePasses: count("stable-pass"),
|
|
2202
|
+
errored: count("errored"),
|
|
2203
|
+
removed: count("removed"),
|
|
2204
|
+
hasRegression: count("regression") > 0
|
|
2205
|
+
};
|
|
2206
|
+
return { evals, summary };
|
|
2207
|
+
}
|
|
2208
|
+
var init_diff = __esm({
|
|
2209
|
+
"src/diff/diff.ts"() {
|
|
2210
|
+
"use strict";
|
|
2211
|
+
init_cjs_shims();
|
|
2212
|
+
}
|
|
2213
|
+
});
|
|
2214
|
+
|
|
2215
|
+
// src/cli/commands/diff.ts
|
|
2216
|
+
var diff_exports = {};
|
|
2217
|
+
__export(diff_exports, {
|
|
2218
|
+
DiffInputError: () => DiffInputError,
|
|
2219
|
+
handleDiff: () => handleDiff
|
|
2220
|
+
});
|
|
2221
|
+
function isRunResult2(v) {
|
|
2222
|
+
if (typeof v !== "object" || v === null) return false;
|
|
2223
|
+
const o = v;
|
|
2224
|
+
return Array.isArray(o["evals"]) && typeof o["summary"] === "object" && o["summary"] !== null;
|
|
2225
|
+
}
|
|
2226
|
+
function loadRunResult(file, label) {
|
|
2227
|
+
let raw;
|
|
2228
|
+
try {
|
|
2229
|
+
raw = fs7.readFileSync(file, "utf8");
|
|
2230
|
+
} catch {
|
|
2231
|
+
throw new DiffInputError(`Cannot read ${label} run file: ${file}`);
|
|
2232
|
+
}
|
|
2233
|
+
let parsed;
|
|
2234
|
+
try {
|
|
2235
|
+
parsed = JSON.parse(raw);
|
|
2236
|
+
} catch {
|
|
2237
|
+
throw new DiffInputError(`${label} run file is not valid JSON: ${file}`);
|
|
2238
|
+
}
|
|
2239
|
+
if (!isRunResult2(parsed)) {
|
|
2240
|
+
throw new DiffInputError(`${label} run file is not a Plune RunResult: ${file}`);
|
|
2241
|
+
}
|
|
2242
|
+
return parsed;
|
|
2243
|
+
}
|
|
2244
|
+
function handleDiff(opts) {
|
|
2245
|
+
const baseline = loadRunResult(opts.baselinePath, "baseline");
|
|
2246
|
+
const current = loadRunResult(opts.currentPath, "current");
|
|
2247
|
+
return diffRuns(baseline, current);
|
|
2248
|
+
}
|
|
2249
|
+
var fs7, DiffInputError;
|
|
2250
|
+
var init_diff2 = __esm({
|
|
2251
|
+
"src/cli/commands/diff.ts"() {
|
|
2252
|
+
"use strict";
|
|
2253
|
+
init_cjs_shims();
|
|
2254
|
+
fs7 = __toESM(require("fs"), 1);
|
|
2255
|
+
init_diff();
|
|
2256
|
+
DiffInputError = class extends Error {
|
|
2257
|
+
code = "DIFF_INPUT";
|
|
2258
|
+
constructor(message) {
|
|
2259
|
+
super(message);
|
|
2260
|
+
this.name = "DiffInputError";
|
|
2261
|
+
}
|
|
2262
|
+
};
|
|
2263
|
+
}
|
|
2264
|
+
});
|
|
2265
|
+
|
|
2266
|
+
// src/reporters/diff.ts
|
|
2267
|
+
var diff_exports2 = {};
|
|
2268
|
+
__export(diff_exports2, {
|
|
2269
|
+
STICKY_MARKER: () => STICKY_MARKER,
|
|
2270
|
+
renderDiff: () => renderDiff,
|
|
2271
|
+
renderDiffMarkdown: () => renderDiffMarkdown
|
|
2272
|
+
});
|
|
2273
|
+
function summaryLine(d) {
|
|
2274
|
+
const s = d.summary;
|
|
2275
|
+
return [
|
|
2276
|
+
`${s.regressions} regression(s)`,
|
|
2277
|
+
`${s.improvements} improvement(s)`,
|
|
2278
|
+
`${s.newFails} new-fail`,
|
|
2279
|
+
`${s.preExistingFails} pre-existing-fail`,
|
|
2280
|
+
`${s.errored} errored`,
|
|
2281
|
+
`${s.removed} removed`,
|
|
2282
|
+
`${s.stablePasses} stable`
|
|
2283
|
+
].join(" \xB7 ");
|
|
2284
|
+
}
|
|
2285
|
+
function renderDiffMarkdown(d) {
|
|
2286
|
+
const out = [STICKY_MARKER, "## Plune eval diff", ""];
|
|
2287
|
+
out.push(
|
|
2288
|
+
d.summary.hasRegression ? `### \u274C ${d.summary.regressions} regression(s)` : "### \u2705 No regressions"
|
|
2289
|
+
);
|
|
2290
|
+
out.push("");
|
|
2291
|
+
const notable = d.evals.filter((e) => e.status !== "stable-pass");
|
|
2292
|
+
if (notable.length > 0) {
|
|
2293
|
+
out.push("| Eval | Baseline \u2192 Current | Change |");
|
|
2294
|
+
out.push("| --- | --- | --- |");
|
|
2295
|
+
for (const e of notable) {
|
|
2296
|
+
out.push(`| ${e.id} | ${e.baseline} \u2192 ${e.current} | ${BADGE[e.status]} |`);
|
|
2297
|
+
}
|
|
2298
|
+
out.push("");
|
|
2299
|
+
}
|
|
2300
|
+
out.push(`_${summaryLine(d)}_`);
|
|
2301
|
+
return out.join("\n");
|
|
2302
|
+
}
|
|
2303
|
+
function renderDiffConsole(d) {
|
|
2304
|
+
const out = [
|
|
2305
|
+
d.summary.hasRegression ? `Plune eval diff: ${d.summary.regressions} regression(s)` : "Plune eval diff: no regressions"
|
|
2306
|
+
];
|
|
2307
|
+
for (const e of d.evals) {
|
|
2308
|
+
if (e.status === "stable-pass") continue;
|
|
2309
|
+
out.push(` ${e.id}: ${e.baseline} -> ${e.current} [${e.status}]`);
|
|
2310
|
+
}
|
|
2311
|
+
out.push(summaryLine(d));
|
|
2312
|
+
return out.join("\n");
|
|
2313
|
+
}
|
|
2314
|
+
function renderDiffJson(d) {
|
|
2315
|
+
return JSON.stringify(d, null, 2);
|
|
2316
|
+
}
|
|
2317
|
+
function renderDiff(d, format = "console") {
|
|
2318
|
+
if (format === "json") return renderDiffJson(d);
|
|
2319
|
+
if (format === "markdown") return renderDiffMarkdown(d);
|
|
2320
|
+
return renderDiffConsole(d);
|
|
2321
|
+
}
|
|
2322
|
+
var STICKY_MARKER, BADGE;
|
|
2323
|
+
var init_diff3 = __esm({
|
|
2324
|
+
"src/reporters/diff.ts"() {
|
|
2325
|
+
"use strict";
|
|
2326
|
+
init_cjs_shims();
|
|
2327
|
+
STICKY_MARKER = "<!-- plune-eval-diff -->";
|
|
2328
|
+
BADGE = {
|
|
2329
|
+
regression: "\u{1F534} regression",
|
|
2330
|
+
improvement: "\u{1F7E2} improvement",
|
|
2331
|
+
"pre-existing-fail": "\u26AA pre-existing fail",
|
|
2332
|
+
"new-fail": "\u{1F195} new fail",
|
|
2333
|
+
"new-pass": "\u{1F7E2} new pass",
|
|
2334
|
+
"stable-pass": "\u2705 stable",
|
|
2335
|
+
errored: "\u26A0\uFE0F errored",
|
|
2336
|
+
removed: "\u2796 removed"
|
|
2337
|
+
};
|
|
2338
|
+
}
|
|
2339
|
+
});
|
|
2340
|
+
|
|
2341
|
+
// src/config/init/wizard.ts
|
|
2342
|
+
async function runInitWizard(cwd) {
|
|
2343
|
+
if (!process.stdin.isTTY) {
|
|
2344
|
+
throw new NonTtyError();
|
|
2345
|
+
}
|
|
2346
|
+
clack.intro("Plune \u2014 init");
|
|
2347
|
+
const provider = await clack.select({
|
|
2348
|
+
message: "Choose your provider:",
|
|
2349
|
+
options: [
|
|
2350
|
+
{ value: "anthropic", label: "Anthropic (Claude)" },
|
|
2351
|
+
{ value: "openai", label: "OpenAI (GPT)" }
|
|
2352
|
+
]
|
|
2353
|
+
});
|
|
2354
|
+
if (clack.isCancel(provider)) {
|
|
2355
|
+
clack.cancel("Operation cancelled.");
|
|
2356
|
+
process.exit(1);
|
|
2357
|
+
return;
|
|
2358
|
+
}
|
|
2359
|
+
const defaultModel = provider === "anthropic" ? "claude-3-opus" : "gpt-4o";
|
|
2360
|
+
const model = await clack.text({
|
|
2361
|
+
message: "Model name:",
|
|
2362
|
+
defaultValue: defaultModel,
|
|
2363
|
+
placeholder: defaultModel
|
|
2364
|
+
});
|
|
2365
|
+
if (clack.isCancel(model)) {
|
|
2366
|
+
clack.cancel("Operation cancelled.");
|
|
2367
|
+
process.exit(1);
|
|
2368
|
+
return;
|
|
2369
|
+
}
|
|
2370
|
+
const dataset = await clack.text({
|
|
2371
|
+
message: "Dataset path (JSONL):",
|
|
2372
|
+
placeholder: "data/evals.jsonl"
|
|
2373
|
+
});
|
|
2374
|
+
if (clack.isCancel(dataset)) {
|
|
2375
|
+
clack.cancel("Operation cancelled.");
|
|
2376
|
+
process.exit(1);
|
|
2377
|
+
return;
|
|
2378
|
+
}
|
|
2379
|
+
const outputPath = path7.join(cwd, "plune.yaml");
|
|
2380
|
+
if (fs8.existsSync(outputPath)) {
|
|
2381
|
+
const overwrite = await clack.confirm({
|
|
2382
|
+
message: "plune.yaml already exists. Overwrite?"
|
|
2383
|
+
});
|
|
2384
|
+
if (clack.isCancel(overwrite)) {
|
|
2385
|
+
clack.cancel("Operation cancelled.");
|
|
2386
|
+
process.exit(1);
|
|
2387
|
+
return;
|
|
2388
|
+
}
|
|
2389
|
+
if (!overwrite) {
|
|
2390
|
+
clack.outro("No changes made.");
|
|
2391
|
+
return;
|
|
2392
|
+
}
|
|
2393
|
+
}
|
|
2394
|
+
const config = {
|
|
2395
|
+
version: 1,
|
|
2396
|
+
provider: {
|
|
2397
|
+
type: provider,
|
|
2398
|
+
model
|
|
2399
|
+
},
|
|
2400
|
+
evals: [
|
|
2401
|
+
{
|
|
2402
|
+
id: "example-eval",
|
|
2403
|
+
prompt: "Replace this with your actual prompt",
|
|
2404
|
+
dataset,
|
|
2405
|
+
assertions: [{ type: "contains", value: "expected output" }]
|
|
2406
|
+
}
|
|
2407
|
+
]
|
|
2408
|
+
};
|
|
2409
|
+
const header = `# Plune configuration \u2014 generated by \`plune init\`.
|
|
2410
|
+
#
|
|
2411
|
+
# version \u2014 config schema version (always 1 in v0.1).
|
|
2412
|
+
# provider \u2014 which LLM backend to call: 'type' + 'model'. The API key is read from .env
|
|
2413
|
+
# (see .env.example), never stored here.
|
|
2414
|
+
# evals \u2014 your test cases. Each has a prompt, a dataset of variables, and assertions
|
|
2415
|
+
# that must hold for the model's output.
|
|
2416
|
+
`;
|
|
2417
|
+
const spin = clack.spinner();
|
|
2418
|
+
spin.start("Writing plune.yaml\u2026");
|
|
2419
|
+
fs8.writeFileSync(outputPath, header + "\n" + (0, import_yaml2.stringify)(config), "utf8");
|
|
2420
|
+
spin.stop("plune.yaml created!");
|
|
2421
|
+
clack.outro("Done! Run `plune run` to execute your evaluations.");
|
|
2422
|
+
}
|
|
2423
|
+
var clack, fs8, path7, import_yaml2;
|
|
2424
|
+
var init_wizard = __esm({
|
|
2425
|
+
"src/config/init/wizard.ts"() {
|
|
2426
|
+
"use strict";
|
|
2427
|
+
init_cjs_shims();
|
|
2428
|
+
clack = __toESM(require("@clack/prompts"), 1);
|
|
2429
|
+
fs8 = __toESM(require("fs"), 1);
|
|
2430
|
+
path7 = __toESM(require("path"), 1);
|
|
2431
|
+
import_yaml2 = require("yaml");
|
|
2432
|
+
init_errors();
|
|
2433
|
+
}
|
|
2434
|
+
});
|
|
2435
|
+
|
|
2436
|
+
// src/cli/templates/index.ts
|
|
2437
|
+
function writeTemplateFile(cwd, rel, content, force) {
|
|
2438
|
+
const target = path8.join(cwd, rel);
|
|
2439
|
+
if (!force && fs9.existsSync(target)) {
|
|
2440
|
+
return "skipped";
|
|
2441
|
+
}
|
|
2442
|
+
fs9.mkdirSync(path8.dirname(target), { recursive: true });
|
|
2443
|
+
fs9.writeFileSync(target, content, "utf8");
|
|
2444
|
+
return "written";
|
|
2445
|
+
}
|
|
2446
|
+
var fs9, path8, ENV_EXAMPLE_TPL, EXAMPLE_JSONL_TPL, PLUNE_YAML_TPL;
|
|
2447
|
+
var init_templates = __esm({
|
|
2448
|
+
"src/cli/templates/index.ts"() {
|
|
2449
|
+
"use strict";
|
|
2450
|
+
init_cjs_shims();
|
|
2451
|
+
fs9 = __toESM(require("fs"), 1);
|
|
2452
|
+
path8 = __toESM(require("path"), 1);
|
|
2453
|
+
ENV_EXAMPLE_TPL = `# Copy this file to .env and replace the placeholders with your real keys.
|
|
2454
|
+
# .env is gitignored; .env.example is safe to commit.
|
|
2455
|
+
# Plune reads whichever key matches your configured provider.
|
|
2456
|
+
ANTHROPIC_API_KEY=YOUR_ANTHROPIC_API_KEY_HERE
|
|
2457
|
+
OPENAI_API_KEY=YOUR_OPENAI_API_KEY_HERE
|
|
2458
|
+
OPENROUTER_API_KEY=YOUR_OPENROUTER_API_KEY_HERE
|
|
2459
|
+
`;
|
|
2460
|
+
EXAMPLE_JSONL_TPL = `{"vars":{"question":"What is 2 + 2?"},"expected":"4"}
|
|
2461
|
+
{"vars":{"question":"What is the capital of France?"},"expected":"Paris"}
|
|
2462
|
+
{"vars":{"question":"Reply with the single word: hello"},"expected":"hello"}
|
|
2463
|
+
`;
|
|
2464
|
+
PLUNE_YAML_TPL = `# Plune configuration \u2014 generated by \`plune init --yes\`.
|
|
2465
|
+
#
|
|
2466
|
+
# version \u2014 config schema version (always 1 in v0.1).
|
|
2467
|
+
# provider \u2014 which LLM backend to call: 'type' (anthropic | openai | openrouter) + 'model'.
|
|
2468
|
+
# The API key is read from .env (see .env.example), never stored here.
|
|
2469
|
+
# evals \u2014 your test cases: a prompt (with {{vars}}), a dataset, and assertions to check.
|
|
2470
|
+
version: 1
|
|
2471
|
+
provider:
|
|
2472
|
+
type: anthropic
|
|
2473
|
+
model: claude-3-5-sonnet-latest
|
|
2474
|
+
evals:
|
|
2475
|
+
- id: example
|
|
2476
|
+
prompt: "Answer concisely. {{question}}"
|
|
2477
|
+
dataset: datasets/example.jsonl
|
|
2478
|
+
assertions:
|
|
2479
|
+
- type: contains
|
|
2480
|
+
value: "the"
|
|
2481
|
+
`;
|
|
2482
|
+
}
|
|
2483
|
+
});
|
|
2484
|
+
|
|
2485
|
+
// src/cli/commands/init.ts
|
|
2486
|
+
var init_exports = {};
|
|
2487
|
+
__export(init_exports, {
|
|
2488
|
+
initCommand: () => initCommand
|
|
2489
|
+
});
|
|
2490
|
+
function reportWrite(cwd, rel, content, force) {
|
|
2491
|
+
const status = writeTemplateFile(cwd, rel, content, force);
|
|
2492
|
+
process.stdout.write(
|
|
2493
|
+
status === "skipped" ? `Skipped existing ${rel} (use --force to overwrite).
|
|
2494
|
+
` : `Created ${rel}.
|
|
2495
|
+
`
|
|
2496
|
+
);
|
|
2497
|
+
}
|
|
2498
|
+
async function initCommand({ cwd, force, yes = false }) {
|
|
2499
|
+
if (yes) {
|
|
2500
|
+
reportWrite(cwd, "plune.yaml", PLUNE_YAML_TPL, force);
|
|
2501
|
+
} else {
|
|
2502
|
+
await runInitWizard(cwd);
|
|
2503
|
+
}
|
|
2504
|
+
for (const [rel, content] of TEMPLATES) {
|
|
2505
|
+
reportWrite(cwd, rel, content, force);
|
|
2506
|
+
}
|
|
2507
|
+
}
|
|
2508
|
+
var TEMPLATES;
|
|
2509
|
+
var init_init = __esm({
|
|
2510
|
+
"src/cli/commands/init.ts"() {
|
|
2511
|
+
"use strict";
|
|
2512
|
+
init_cjs_shims();
|
|
2513
|
+
init_wizard();
|
|
2514
|
+
init_templates();
|
|
2515
|
+
TEMPLATES = [
|
|
2516
|
+
["datasets/example.jsonl", EXAMPLE_JSONL_TPL],
|
|
2517
|
+
[".env.example", ENV_EXAMPLE_TPL]
|
|
2518
|
+
];
|
|
2519
|
+
}
|
|
2520
|
+
});
|
|
2521
|
+
|
|
2522
|
+
// src/cli.ts
|
|
2523
|
+
var cli_exports = {};
|
|
2524
|
+
__export(cli_exports, {
|
|
2525
|
+
createProgram: () => createProgram
|
|
2526
|
+
});
|
|
2527
|
+
module.exports = __toCommonJS(cli_exports);
|
|
2528
|
+
init_cjs_shims();
|
|
2529
|
+
var import_commander = require("commander");
|
|
2530
|
+
var import_node_fs = require("fs");
|
|
2531
|
+
var import_node_path = require("path");
|
|
2532
|
+
var import_node_url = require("url");
|
|
2533
|
+
var __dirname = (0, import_node_path.dirname)((0, import_node_url.fileURLToPath)(importMetaUrl));
|
|
2534
|
+
var pkg = JSON.parse((0, import_node_fs.readFileSync)((0, import_node_path.join)(__dirname, "..", "package.json"), "utf-8"));
|
|
2535
|
+
function toFormat(s) {
|
|
2536
|
+
return s === "json" || s === "markdown" ? s : "console";
|
|
2537
|
+
}
|
|
2538
|
+
function shouldColor(outputPath) {
|
|
2539
|
+
return outputPath === void 0 && process.stdout.isTTY === true && process.env.NO_COLOR === void 0;
|
|
2540
|
+
}
|
|
2541
|
+
function writeReport(text2, outputPath) {
|
|
2542
|
+
if (outputPath !== void 0) {
|
|
2543
|
+
(0, import_node_fs.writeFileSync)(outputPath, text2);
|
|
2544
|
+
} else {
|
|
2545
|
+
process.stdout.write(text2 + "\n");
|
|
2546
|
+
}
|
|
2547
|
+
}
|
|
2548
|
+
function maybeStack(err, verbose) {
|
|
2549
|
+
if (verbose && err instanceof Error && err.stack !== void 0) {
|
|
2550
|
+
process.stderr.write(err.stack + "\n");
|
|
2551
|
+
}
|
|
2552
|
+
}
|
|
2553
|
+
function failUnexpected(err, verbose) {
|
|
2554
|
+
if (err instanceof Error) {
|
|
2555
|
+
process.stderr.write((verbose && err.stack !== void 0 ? err.stack : err.message) + "\n");
|
|
2556
|
+
} else {
|
|
2557
|
+
process.stderr.write(String(err) + "\n");
|
|
2558
|
+
}
|
|
2559
|
+
process.exit(1);
|
|
2560
|
+
}
|
|
2561
|
+
function createProgram() {
|
|
2562
|
+
const program = new import_commander.Command();
|
|
2563
|
+
program.name("plune").description("Plune \u2014 AI-powered assertion testing CLI").version(pkg.version);
|
|
2564
|
+
program.option("-c, --config <path>", "Path to plune.yaml config file (applies to all commands)").option("-v, --verbose", "Verbose output, including stack traces on unexpected errors", false).option("--no-color", "Disable colored output regardless of TTY");
|
|
2565
|
+
program.command("run").description("Run assertions against a dataset").option("--dry-run", "Estimate cost/tokens; do not call the model", false).option("--config <path>", "Path to plune.yaml config file").option(
|
|
2566
|
+
"--only <selector>",
|
|
2567
|
+
"Run a subset by eval id or tag (repeatable)",
|
|
2568
|
+
(val, prev) => [...prev, val],
|
|
2569
|
+
[]
|
|
2570
|
+
).option("--concurrency <n>", "Override provider concurrency", (v) => parseInt(v, 10)).option("--no-cache", "Bypass the local cache for this run").option("--bail", "Stop after the first failing eval", false).option("--format <fmt>", "console | json | markdown", "console").option("-o, --output <path>", "Write the report to a file instead of stdout").action(
|
|
2571
|
+
async (options, command) => {
|
|
2572
|
+
const globals = command.optsWithGlobals();
|
|
2573
|
+
const configPath = globals.config;
|
|
2574
|
+
const [
|
|
2575
|
+
{ loadEnv: loadEnv2 },
|
|
2576
|
+
{ handleRun: handleRun2 },
|
|
2577
|
+
{ exitCodeFor: exitCodeFor2, RunConfigError: RunConfigError2 },
|
|
2578
|
+
{ renderReport: renderReport2 },
|
|
2579
|
+
{ ConfigNotFoundError: ConfigNotFoundError2, ConfigValidationError: ConfigValidationError2, YamlParseError: YamlParseError2 },
|
|
2580
|
+
{ AuthError: AuthError2 }
|
|
2581
|
+
] = await Promise.all([
|
|
2582
|
+
Promise.resolve().then(() => (init_env(), env_exports)),
|
|
2583
|
+
Promise.resolve().then(() => (init_run2(), run_exports)),
|
|
2584
|
+
Promise.resolve().then(() => (init_orchestrator(), orchestrator_exports)),
|
|
2585
|
+
Promise.resolve().then(() => (init_reporters(), reporters_exports)),
|
|
2586
|
+
Promise.resolve().then(() => (init_errors(), errors_exports)),
|
|
2587
|
+
Promise.resolve().then(() => (init_errors2(), errors_exports2))
|
|
2588
|
+
]);
|
|
2589
|
+
loadEnv2(configPath !== void 0 ? (0, import_node_path.dirname)((0, import_node_path.resolve)(configPath)) : process.cwd());
|
|
2590
|
+
try {
|
|
2591
|
+
const result = await handleRun2({
|
|
2592
|
+
dryRun: options.dryRun,
|
|
2593
|
+
...configPath !== void 0 ? { configPath } : {},
|
|
2594
|
+
...options.only.length > 0 ? { only: options.only } : {},
|
|
2595
|
+
...typeof options.concurrency === "number" && Number.isFinite(options.concurrency) ? { concurrency: options.concurrency } : {},
|
|
2596
|
+
noCache: options.cache === false,
|
|
2597
|
+
bail: options.bail
|
|
2598
|
+
});
|
|
2599
|
+
if (options.only.length > 0 && result.evals.length === 0) {
|
|
2600
|
+
process.stderr.write("No evals matched the selector.\n");
|
|
2601
|
+
}
|
|
2602
|
+
const text2 = renderReport2(result, toFormat(options.format), {
|
|
2603
|
+
color: globals.color !== false && shouldColor(options.output)
|
|
2604
|
+
});
|
|
2605
|
+
writeReport(text2, options.output);
|
|
2606
|
+
process.exitCode = exitCodeFor2(result);
|
|
2607
|
+
} catch (err) {
|
|
2608
|
+
const verbose = globals.verbose === true;
|
|
2609
|
+
if (err instanceof ConfigValidationError2) {
|
|
2610
|
+
process.stderr.write(err.message + "\n");
|
|
2611
|
+
for (const issue of err.issues) {
|
|
2612
|
+
process.stderr.write(` - ${issue}
|
|
2613
|
+
`);
|
|
2614
|
+
}
|
|
2615
|
+
maybeStack(err, verbose);
|
|
2616
|
+
process.exit(2);
|
|
2617
|
+
return;
|
|
2618
|
+
}
|
|
2619
|
+
if (err instanceof ConfigNotFoundError2 || err instanceof YamlParseError2) {
|
|
2620
|
+
process.stderr.write(err.message + "\n");
|
|
2621
|
+
maybeStack(err, verbose);
|
|
2622
|
+
process.exit(2);
|
|
2623
|
+
return;
|
|
2624
|
+
}
|
|
2625
|
+
if (err instanceof AuthError2) {
|
|
2626
|
+
process.stderr.write(err.message + "\n");
|
|
2627
|
+
maybeStack(err, verbose);
|
|
2628
|
+
process.exit(2);
|
|
2629
|
+
return;
|
|
2630
|
+
}
|
|
2631
|
+
if (err instanceof RunConfigError2) {
|
|
2632
|
+
process.stderr.write(err.message + "\n");
|
|
2633
|
+
maybeStack(err, verbose);
|
|
2634
|
+
process.exit(2);
|
|
2635
|
+
return;
|
|
2636
|
+
}
|
|
2637
|
+
failUnexpected(err, verbose);
|
|
2638
|
+
}
|
|
2639
|
+
}
|
|
2640
|
+
);
|
|
2641
|
+
program.command("report").description("Render the most recent run in a chosen format").option("--format <fmt>", "console | json | markdown", "console").option("-o, --output <path>", "Write the report to a file instead of stdout").action(async (options, command) => {
|
|
2642
|
+
const globals = command.optsWithGlobals();
|
|
2643
|
+
const [{ loadEnv: loadEnv2 }, { handleReport: handleReport2, ReportNotFoundError: ReportNotFoundError2 }, { renderReport: renderReport2 }] = await Promise.all([
|
|
2644
|
+
Promise.resolve().then(() => (init_env(), env_exports)),
|
|
2645
|
+
Promise.resolve().then(() => (init_report(), report_exports)),
|
|
2646
|
+
Promise.resolve().then(() => (init_reporters(), reporters_exports))
|
|
2647
|
+
]);
|
|
2648
|
+
const cwd = globals.config !== void 0 ? (0, import_node_path.dirname)((0, import_node_path.resolve)(globals.config)) : process.cwd();
|
|
2649
|
+
loadEnv2(cwd);
|
|
2650
|
+
try {
|
|
2651
|
+
const result = handleReport2({ cwd });
|
|
2652
|
+
const text2 = renderReport2(result, toFormat(options.format), {
|
|
2653
|
+
color: globals.color !== false && shouldColor(options.output)
|
|
2654
|
+
});
|
|
2655
|
+
writeReport(text2, options.output);
|
|
2656
|
+
} catch (err) {
|
|
2657
|
+
if (err instanceof ReportNotFoundError2) {
|
|
2658
|
+
process.stderr.write(err.message + "\n");
|
|
2659
|
+
process.exit(2);
|
|
2660
|
+
return;
|
|
2661
|
+
}
|
|
2662
|
+
failUnexpected(err, globals.verbose === true);
|
|
2663
|
+
}
|
|
2664
|
+
});
|
|
2665
|
+
program.command("diff").description("Compare two `plune run --format json` outputs and report regressions").argument("<baseline>", "Path to the baseline run JSON (e.g. from main)").argument("<current>", "Path to the current run JSON (e.g. from the PR)").option("--format <fmt>", "console | json | markdown", "console").option("--fail-on-regression", "Exit 1 when a pass\u2192fail regression is detected", false).option("-o, --output <path>", "Write the diff to a file instead of stdout").action(
|
|
2666
|
+
async (baseline, current, options, command) => {
|
|
2667
|
+
const verbose = command.optsWithGlobals().verbose === true;
|
|
2668
|
+
const [{ handleDiff: handleDiff2, DiffInputError: DiffInputError2 }, { renderDiff: renderDiff2 }] = await Promise.all([
|
|
2669
|
+
Promise.resolve().then(() => (init_diff2(), diff_exports)),
|
|
2670
|
+
Promise.resolve().then(() => (init_diff3(), diff_exports2))
|
|
2671
|
+
]);
|
|
2672
|
+
try {
|
|
2673
|
+
const diff = handleDiff2({ baselinePath: baseline, currentPath: current });
|
|
2674
|
+
writeReport(renderDiff2(diff, toFormat(options.format)), options.output);
|
|
2675
|
+
process.exitCode = options.failOnRegression && diff.summary.hasRegression ? 1 : 0;
|
|
2676
|
+
} catch (err) {
|
|
2677
|
+
if (err instanceof DiffInputError2) {
|
|
2678
|
+
process.stderr.write(err.message + "\n");
|
|
2679
|
+
maybeStack(err, verbose);
|
|
2680
|
+
process.exit(2);
|
|
2681
|
+
return;
|
|
2682
|
+
}
|
|
2683
|
+
failUnexpected(err, verbose);
|
|
2684
|
+
}
|
|
2685
|
+
}
|
|
2686
|
+
);
|
|
2687
|
+
program.command("init").description("Scaffold plune.yaml (interactive), an example dataset, and .env.example").option("--force", "Overwrite existing files instead of skipping them", false).option("--yes", "Non-interactive: scaffold defaults without prompts (for CI)", false).action(async (options, command) => {
|
|
2688
|
+
const verbose = command.optsWithGlobals().verbose === true;
|
|
2689
|
+
const [{ initCommand: initCommand2 }, { NonTtyError: NonTtyError2 }] = await Promise.all([
|
|
2690
|
+
Promise.resolve().then(() => (init_init(), init_exports)),
|
|
2691
|
+
Promise.resolve().then(() => (init_errors(), errors_exports))
|
|
2692
|
+
]);
|
|
2693
|
+
try {
|
|
2694
|
+
await initCommand2({ cwd: process.cwd(), force: options.force, yes: options.yes });
|
|
2695
|
+
} catch (err) {
|
|
2696
|
+
if (err instanceof NonTtyError2) {
|
|
2697
|
+
process.stderr.write(err.message + "\n");
|
|
2698
|
+
process.exit(1);
|
|
2699
|
+
return;
|
|
2700
|
+
}
|
|
2701
|
+
failUnexpected(err, verbose);
|
|
2702
|
+
}
|
|
2703
|
+
});
|
|
2704
|
+
program.on("command:*", (operands) => {
|
|
2705
|
+
process.stderr.write(`Unknown command: ${operands.join(" ")}
|
|
2706
|
+
`);
|
|
2707
|
+
process.exit(2);
|
|
2708
|
+
});
|
|
2709
|
+
return program;
|
|
2710
|
+
}
|
|
2711
|
+
var isMain = process.argv[1] === (0, import_node_url.fileURLToPath)(importMetaUrl);
|
|
2712
|
+
if (isMain) {
|
|
2713
|
+
createProgram().parseAsync(process.argv).catch((err) => {
|
|
2714
|
+
process.stderr.write(String(err) + "\n");
|
|
2715
|
+
process.exit(1);
|
|
2716
|
+
});
|
|
2717
|
+
}
|
|
2718
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
2719
|
+
0 && (module.exports = {
|
|
2720
|
+
createProgram
|
|
2721
|
+
});
|