@evalguardai/cli 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +87 -0
- package/bin/evalguard.js +2 -0
- package/dist/commands/compare.d.ts +6 -0
- package/dist/commands/compare.d.ts.map +1 -0
- package/dist/commands/compare.js +109 -0
- package/dist/commands/compare.js.map +1 -0
- package/dist/commands/compliance-check.d.ts +18 -0
- package/dist/commands/compliance-check.d.ts.map +1 -0
- package/dist/commands/compliance-check.js +474 -0
- package/dist/commands/compliance-check.js.map +1 -0
- package/dist/commands/debug.d.ts +6 -0
- package/dist/commands/debug.d.ts.map +1 -0
- package/dist/commands/debug.js +151 -0
- package/dist/commands/debug.js.map +1 -0
- package/dist/commands/delete.d.ts +6 -0
- package/dist/commands/delete.d.ts.map +1 -0
- package/dist/commands/delete.js +105 -0
- package/dist/commands/delete.js.map +1 -0
- package/dist/commands/eval-local.d.ts +7 -0
- package/dist/commands/eval-local.d.ts.map +1 -0
- package/dist/commands/eval-local.js +376 -0
- package/dist/commands/eval-local.js.map +1 -0
- package/dist/commands/export.d.ts +6 -0
- package/dist/commands/export.d.ts.map +1 -0
- package/dist/commands/export.js +135 -0
- package/dist/commands/export.js.map +1 -0
- package/dist/commands/firewall.d.ts +6 -0
- package/dist/commands/firewall.d.ts.map +1 -0
- package/dist/commands/firewall.js +56 -0
- package/dist/commands/firewall.js.map +1 -0
- package/dist/commands/gate.d.ts +14 -0
- package/dist/commands/gate.d.ts.map +1 -0
- package/dist/commands/gate.js +232 -0
- package/dist/commands/gate.js.map +1 -0
- package/dist/commands/generate.d.ts +7 -0
- package/dist/commands/generate.d.ts.map +1 -0
- package/dist/commands/generate.js +182 -0
- package/dist/commands/generate.js.map +1 -0
- package/dist/commands/history.d.ts +7 -0
- package/dist/commands/history.d.ts.map +1 -0
- package/dist/commands/history.js +59 -0
- package/dist/commands/history.js.map +1 -0
- package/dist/commands/import-promptfoo.d.ts +7 -0
- package/dist/commands/import-promptfoo.d.ts.map +1 -0
- package/dist/commands/import-promptfoo.js +218 -0
- package/dist/commands/import-promptfoo.js.map +1 -0
- package/dist/commands/index.d.ts +21 -0
- package/dist/commands/index.d.ts.map +1 -0
- package/dist/commands/index.js +21 -0
- package/dist/commands/index.js.map +1 -0
- package/dist/commands/init.d.ts +7 -0
- package/dist/commands/init.d.ts.map +1 -0
- package/dist/commands/init.js +509 -0
- package/dist/commands/init.js.map +1 -0
- package/dist/commands/list.d.ts +10 -0
- package/dist/commands/list.d.ts.map +1 -0
- package/dist/commands/list.js +165 -0
- package/dist/commands/list.js.map +1 -0
- package/dist/commands/logs.d.ts +6 -0
- package/dist/commands/logs.d.ts.map +1 -0
- package/dist/commands/logs.js +153 -0
- package/dist/commands/logs.js.map +1 -0
- package/dist/commands/model-scan.d.ts +7 -0
- package/dist/commands/model-scan.d.ts.map +1 -0
- package/dist/commands/model-scan.js +276 -0
- package/dist/commands/model-scan.js.map +1 -0
- package/dist/commands/retry.d.ts +6 -0
- package/dist/commands/retry.d.ts.map +1 -0
- package/dist/commands/retry.js +83 -0
- package/dist/commands/retry.js.map +1 -0
- package/dist/commands/scan-local.d.ts +6 -0
- package/dist/commands/scan-local.d.ts.map +1 -0
- package/dist/commands/scan-local.js +138 -0
- package/dist/commands/scan-local.js.map +1 -0
- package/dist/commands/share.d.ts +6 -0
- package/dist/commands/share.d.ts.map +1 -0
- package/dist/commands/share.js +74 -0
- package/dist/commands/share.js.map +1 -0
- package/dist/commands/store.d.ts +23 -0
- package/dist/commands/store.d.ts.map +1 -0
- package/dist/commands/store.js +54 -0
- package/dist/commands/store.js.map +1 -0
- package/dist/commands/validate.d.ts +6 -0
- package/dist/commands/validate.d.ts.map +1 -0
- package/dist/commands/validate.js +171 -0
- package/dist/commands/validate.js.map +1 -0
- package/dist/commands/watch.d.ts +6 -0
- package/dist/commands/watch.d.ts.map +1 -0
- package/dist/commands/watch.js +92 -0
- package/dist/commands/watch.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +342 -0
- package/dist/index.js.map +1 -0
- package/package.json +73 -0
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
import chalk from "chalk";
|
|
2
|
+
import * as fs from "fs";
|
|
3
|
+
import * as path from "path";
|
|
4
|
+
export function registerValidate(program) {
|
|
5
|
+
program
|
|
6
|
+
.command("validate")
|
|
7
|
+
.description("Validate an eval or scan config file")
|
|
8
|
+
.argument("<file>", "Path to config JSON file")
|
|
9
|
+
.action(async (file) => {
|
|
10
|
+
const { BUILT_IN_SCORERS, ATTACK_TYPES, ALL_PLUGINS, ALL_STRATEGIES, ALL_GRADERS } = await import("@evalguard/core");
|
|
11
|
+
const filePath = path.resolve(file);
|
|
12
|
+
if (!fs.existsSync(filePath)) {
|
|
13
|
+
console.error(chalk.red(`File not found: ${filePath}`));
|
|
14
|
+
process.exit(1);
|
|
15
|
+
}
|
|
16
|
+
let config;
|
|
17
|
+
try {
|
|
18
|
+
config = JSON.parse(fs.readFileSync(filePath, "utf-8"));
|
|
19
|
+
}
|
|
20
|
+
catch (err) {
|
|
21
|
+
console.error(chalk.red(`Invalid JSON: ${err instanceof Error ? err.message : String(err)}`));
|
|
22
|
+
process.exit(1);
|
|
23
|
+
}
|
|
24
|
+
const errors = [];
|
|
25
|
+
const warnings = [];
|
|
26
|
+
// Detect config type
|
|
27
|
+
const isEval = "scorers" in config || "cases" in config;
|
|
28
|
+
const isScan = "attackTypes" in config || "plugins" in config;
|
|
29
|
+
if (!isEval && !isScan) {
|
|
30
|
+
errors.push("Cannot determine config type. Expected 'scorers'/'cases' (eval) or 'attackTypes'/'plugins' (scan).");
|
|
31
|
+
}
|
|
32
|
+
if (isEval) {
|
|
33
|
+
validateEvalConfig(config, errors, warnings, BUILT_IN_SCORERS);
|
|
34
|
+
}
|
|
35
|
+
if (isScan) {
|
|
36
|
+
validateScanConfig(config, errors, warnings, ATTACK_TYPES, ALL_PLUGINS, ALL_STRATEGIES, ALL_GRADERS);
|
|
37
|
+
}
|
|
38
|
+
// Common checks
|
|
39
|
+
if (!config.model && !config.provider) {
|
|
40
|
+
warnings.push("No 'model' specified. Will need --model flag at runtime.");
|
|
41
|
+
}
|
|
42
|
+
if (!config.prompt && isEval) {
|
|
43
|
+
errors.push("Missing 'prompt' field.");
|
|
44
|
+
}
|
|
45
|
+
// Display results
|
|
46
|
+
console.log();
|
|
47
|
+
if (errors.length === 0) {
|
|
48
|
+
console.log(` ${chalk.green("✓")} Config is valid (${isEval ? "eval" : "scan"})`);
|
|
49
|
+
}
|
|
50
|
+
else {
|
|
51
|
+
console.log(` ${chalk.red("✗")} Config has ${errors.length} error(s)`);
|
|
52
|
+
}
|
|
53
|
+
for (const err of errors) {
|
|
54
|
+
console.log(` ${chalk.red("ERROR")} ${err}`);
|
|
55
|
+
}
|
|
56
|
+
for (const warn of warnings) {
|
|
57
|
+
console.log(` ${chalk.yellow("WARN")} ${warn}`);
|
|
58
|
+
}
|
|
59
|
+
if (errors.length === 0) {
|
|
60
|
+
// Show summary
|
|
61
|
+
if (isEval) {
|
|
62
|
+
const cases = config.cases;
|
|
63
|
+
const scorers = config.scorers;
|
|
64
|
+
console.log(chalk.dim(` ${cases?.length ?? 0} test cases, ${scorers?.length ?? 0} scorers`));
|
|
65
|
+
}
|
|
66
|
+
if (isScan) {
|
|
67
|
+
const attacks = config.attackTypes;
|
|
68
|
+
const plugins = config.plugins;
|
|
69
|
+
console.log(chalk.dim(` ${attacks?.length ?? 0} attack types, ${plugins?.length ?? 0} plugins`));
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
console.log();
|
|
73
|
+
process.exit(errors.length > 0 ? 1 : 0);
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
function validateEvalConfig(config, errors, warnings, BUILT_IN_SCORERS) {
|
|
77
|
+
// Scorers
|
|
78
|
+
if (config.scorers) {
|
|
79
|
+
if (!Array.isArray(config.scorers)) {
|
|
80
|
+
errors.push("'scorers' must be an array of strings.");
|
|
81
|
+
}
|
|
82
|
+
else {
|
|
83
|
+
const availableScorers = Object.keys(BUILT_IN_SCORERS);
|
|
84
|
+
for (const s of config.scorers) {
|
|
85
|
+
if (!availableScorers.includes(s)) {
|
|
86
|
+
errors.push(`Unknown scorer: '${s}'. Available: ${availableScorers.join(", ")}`);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
else {
|
|
92
|
+
errors.push("Missing 'scorers' field.");
|
|
93
|
+
}
|
|
94
|
+
// Cases
|
|
95
|
+
if (config.cases) {
|
|
96
|
+
if (!Array.isArray(config.cases)) {
|
|
97
|
+
errors.push("'cases' must be an array.");
|
|
98
|
+
}
|
|
99
|
+
else {
|
|
100
|
+
for (let i = 0; i < config.cases.length; i++) {
|
|
101
|
+
const c = config.cases[i];
|
|
102
|
+
if (!c.input) {
|
|
103
|
+
errors.push(`cases[${i}]: missing 'input' field.`);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
if (config.cases.length === 0) {
|
|
107
|
+
warnings.push("'cases' is empty. Add test cases.");
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
else {
|
|
112
|
+
errors.push("Missing 'cases' field.");
|
|
113
|
+
}
|
|
114
|
+
// Prompt
|
|
115
|
+
if (config.prompt && typeof config.prompt === "string") {
|
|
116
|
+
if (!config.prompt.includes("{{input}}")) {
|
|
117
|
+
warnings.push("Prompt doesn't contain '{{input}}' placeholder. Inputs won't be substituted.");
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
function validateScanConfig(config, errors, warnings, ATTACK_TYPES, ALL_PLUGINS, ALL_STRATEGIES, ALL_GRADERS) {
|
|
122
|
+
const validAttackTypes = ATTACK_TYPES.map((a) => a.type);
|
|
123
|
+
const validPlugins = ALL_PLUGINS.map((p) => p.id);
|
|
124
|
+
const validStrategies = ALL_STRATEGIES.map((s) => s.id);
|
|
125
|
+
const validGraders = ALL_GRADERS.map((g) => g.id);
|
|
126
|
+
if (config.attackTypes) {
|
|
127
|
+
if (!Array.isArray(config.attackTypes)) {
|
|
128
|
+
errors.push("'attackTypes' must be an array.");
|
|
129
|
+
}
|
|
130
|
+
else {
|
|
131
|
+
for (const at of config.attackTypes) {
|
|
132
|
+
if (!validAttackTypes.includes(at)) {
|
|
133
|
+
errors.push(`Unknown attack type: '${at}'. Available: ${validAttackTypes.join(", ")}`);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
if (config.plugins) {
|
|
139
|
+
if (!Array.isArray(config.plugins)) {
|
|
140
|
+
errors.push("'plugins' must be an array.");
|
|
141
|
+
}
|
|
142
|
+
else {
|
|
143
|
+
for (const p of config.plugins) {
|
|
144
|
+
if (!validPlugins.includes(p)) {
|
|
145
|
+
errors.push(`Unknown plugin: '${p}'. Available: ${validPlugins.join(", ")}`);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
if (config.strategies) {
|
|
151
|
+
for (const s of config.strategies) {
|
|
152
|
+
if (!validStrategies.includes(s)) {
|
|
153
|
+
errors.push(`Unknown strategy: '${s}'. Available: ${validStrategies.join(", ")}`);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
if (config.graders) {
|
|
158
|
+
for (const g of config.graders) {
|
|
159
|
+
if (!validGraders.includes(g)) {
|
|
160
|
+
errors.push(`Unknown grader: '${g}'. Available: ${validGraders.join(", ")}`);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
if (!config.prompt) {
|
|
165
|
+
errors.push("Missing 'prompt' field (system prompt to test).");
|
|
166
|
+
}
|
|
167
|
+
if (!config.attackTypes && !config.plugins) {
|
|
168
|
+
warnings.push("Neither 'attackTypes' nor 'plugins' specified. Will use all legacy attack types.");
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
//# sourceMappingURL=validate.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"validate.js","sourceRoot":"","sources":["../../src/commands/validate.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAE7B,MAAM,UAAU,gBAAgB,CAAC,OAAgB;IAC/C,OAAO;SACJ,OAAO,CAAC,UAAU,CAAC;SACnB,WAAW,CAAC,sCAAsC,CAAC;SACnD,QAAQ,CAAC,QAAQ,EAAE,0BAA0B,CAAC;SAC9C,MAAM,CAAC,KAAK,EAAE,IAAY,EAAE,EAAE;QAC7B,MAAM,EAAE,gBAAgB,EAAE,YAAY,EAAE,WAAW,EAAE,cAAc,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAQ,CAAC;QAE5H,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAEpC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC7B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,mBAAmB,QAAQ,EAAE,CAAC,CAAC,CAAC;YACxD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,IAAI,MAA+B,CAAC;QACpC,IAAI,CAAC;YACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;QAC1D,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,iBAAiB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;YAC9F,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,qBAAqB;QACrB,MAAM,MAAM,GAAG,SAAS,IAAI,MAAM,IAAI,OAAO,IAAI,MAAM,CAAC;QACxD,MAAM,MAAM,GAAG,aAAa,IAAI,MAAM,IAAI,SAAS,IAAI,MAAM,CAAC;QAE9D,IAAI,CAAC,MAAM,IAAI,CAAC,MAAM,EAAE,CAAC;YACvB,MAAM,CAAC,IAAI,CAAC,oGAAoG,CAAC,CAAC;QACpH,CAAC;QAED,IAAI,MAAM,EAAE,CAAC;YACX,kBAAkB,CAAC,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,gBAAgB,CAAC,CAAC;QACjE,CAAC;QAED,IAAI,MAAM,EAAE,CAAC;YACX,kBAAkB,CAAC,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,WAAW,EAAE,cAAc,EAAE,WAAW,CAAC,CAAC;QACvG,CAAC;QAED,gBAAgB;QAChB,IAAI,CAAC,MAAM,CAAC,KAAK,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC;YACtC,QAAQ,CAAC,IAAI,CAAC,0DAA0D,CAAC,CAAC;QAC5E,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,MAAM,EAAE,CAAC;YAC7B,MAAM,CAAC,IAAI,CAAC,yBAAyB,CAAC,CAAC;QACzC,CAAC;QAED,kBAAkB;QAClB,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,OAAO,CAAC,GAAG,CAAC,KAAK,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,qBAAqB,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;QACrF,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CAAC,KAAK,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,eAAe,MAAM,CAAC,MAAM,WAAW,CAAC,CAAC;QAC1E,CAAC;QAED,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;YACzB,OAAO,CAAC,GAAG,CAAC,KAAK,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,GAAG,EAAE,CAAC,CAAC;QAChD,CAAC;QACD,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;YAC5B,OAAO,CAAC,GAAG,CAAC,KAAK,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;QACnD,CAAC;QAED,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,eAAe;YACf,IAAI,MAAM,EAAE,CAAC;gBACX,MAAM,KAAK,GAAG,MAAM,CAAC,KAAkB,CAAC;gBACxC,MAAM,OAAO,GAAG,MAAM,CAAC,OAAmB,CAAC;gBAC3C,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,KAAK,EAAE,MAAM,IAAI,CAAC,gBAAgB,OAAO,EAAE,MAAM,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;YAChG,CAAC;YACD,IAAI,MAAM,EAAE,CAAC;gBACX,MAAM,OAAO,GAAG,MAAM,CAAC,WAAuB,CAAC;gBAC/C,MAAM,OAAO,GAAG,MAAM,CAAC,OAAmB,CAAC;gBAC3C,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,OAAO,EAAE,MAAM,IAAI,CAAC,kBAAkB,OAAO,EAAE,MAAM,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;YACpG,CAAC;QACH,CAAC;QAED,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC1C,CAAC,CAAC,CAAC;AACP,CAAC;AAED,SAAS,kBAAkB,CAAC,MAA+B,EAAE,MAAgB,EAAE,QAAkB,EAAE,gBAAqB;IACtH,UAAU;IACV,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;QACnB,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC;YACnC,MAAM,CAAC,IAAI,CAAC,wCAAwC,CAAC,CAAC;QACxD,CAAC;aAAM,CAAC;YACN,MAAM,gBAAgB,GAAG,MAAM,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;YACvD,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,OAAmB,EAAE,CAAC;gBAC3C,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;oBAClC,MAAM,CAAC,IAAI,CAAC,oBAAoB,CAAC,iBAAiB,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACnF,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;SAAM,CAAC;QACN,MAAM,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;IAC1C,CAAC;IAED,QAAQ;IACR,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;QACjB,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC;YACjC,MAAM,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QAC3C,CAAC;aAAM,CAAC;YACN,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAI,MAAM,CAAC,KAAmB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC5D,MAAM,CAAC,GAAI,MAAM,CAAC,KAAmC,CAAC,CAAC,CAAC,CAAC;gBACzD,IAAI,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC;oBACb,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,2BAA2B,CAAC,CAAC;gBACrD,CAAC;YACH,CAAC;YACD,IAAK,MAAM,CAAC,KAAmB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC7C,QAAQ,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC;YACrD,CAAC;QACH,CAAC;IACH,CAAC;SAAM,CAAC;QACN,MAAM,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;IACxC,CAAC;IAED,SAAS;IACT,IAAI,MAAM,CAAC,MAAM,IAAI,OAAO,MAAM,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;QACvD,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;YACzC,QAAQ,CAAC,IAAI,CAAC,8EAA8E,CAAC,CAAC;QAChG,CAAC;IACH,CAAC;AACH,CAAC;AAED,SAAS,kBAAkB,CAAC,MAA+B,EAAE,MAAgB,EAAE,QAAkB,EAAE,YAAiB,EAAE,WAAgB,EAAE,cAAmB,EAAE,WAAgB;IAC3K,MAAM,gBAAgB,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAC9D,MAAM,YAAY,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IACvD,MAAM,eAAe,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAC7D,MAAM,YAAY,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAEvD,IAAI,MAAM,CAAC,WAAW,EAAE,CAAC;QACvB,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,WAAW,CAAC,EAAE,CAAC;YACvC,MAAM,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;QACjD,CAAC;aAAM,CAAC;YACN,KAAK,MAAM,EAAE,IAAI,MAAM,CAAC,WAAuB,EAAE,CAAC;gBAChD,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,CAAC;oBACnC,MAAM,CAAC,IAAI,CAAC,yBAAyB,EAAE,iBAAiB,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACzF,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;QACnB,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC;YACnC,MAAM,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;QAC7C,CAAC;aAAM,CAAC;YACN,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,OAAmB,EAAE,CAAC;gBAC3C,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC9B,MAAM,CAAC,IAAI,CAAC,oBAAoB,CAAC,iBAAiB,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBAC/E,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;QACtB,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,UAAsB,EAAE,CAAC;YAC9C,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;gBACjC,MAAM,CAAC,IAAI,CAAC,sBAAsB,CAAC,iBAAiB,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACpF,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;QACnB,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,OAAmB,EAAE,CAAC;YAC3C,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC9B,MAAM,CAAC,IAAI,CAAC,oBAAoB,CAAC,iBAAiB,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAC/E,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;QACnB,MAAM,CAAC,IAAI,CAAC,iDAAiD,CAAC,CAAC;IACjE,CAAC;IAED,IAAI,CAAC,MAAM,CAAC,WAAW,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QAC3C,QAAQ,CAAC,IAAI,CAAC,kFAAkF,CAAC,CAAC;IACpG,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"watch.d.ts","sourceRoot":"","sources":["../../src/commands/watch.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAKpC,wBAAgB,aAAa,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAmGpD"}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import chalk from "chalk";
|
|
2
|
+
import * as fs from "fs";
|
|
3
|
+
import * as path from "path";
|
|
4
|
+
export function registerWatch(program) {
|
|
5
|
+
program
|
|
6
|
+
.command("watch")
|
|
7
|
+
.description("Watch eval config and re-run on changes")
|
|
8
|
+
.argument("<file>", "Path to eval config JSON file")
|
|
9
|
+
.option("--model <model>", "Override model")
|
|
10
|
+
.option("--provider <provider>", "Override provider")
|
|
11
|
+
.option("--debounce <ms>", "Debounce interval in ms", "1000")
|
|
12
|
+
.action(async (file, opts) => {
|
|
13
|
+
const filePath = path.resolve(file);
|
|
14
|
+
if (!fs.existsSync(filePath)) {
|
|
15
|
+
console.error(chalk.red(`File not found: ${filePath}`));
|
|
16
|
+
process.exit(1);
|
|
17
|
+
}
|
|
18
|
+
const debounceMs = parseInt(opts.debounce, 10);
|
|
19
|
+
let timer = null;
|
|
20
|
+
let runCount = 0;
|
|
21
|
+
console.log();
|
|
22
|
+
console.log(chalk.bold(` Watching ${path.basename(file)} for changes...`));
|
|
23
|
+
console.log(chalk.dim(" Press Ctrl+C to stop"));
|
|
24
|
+
console.log();
|
|
25
|
+
const runEval = async () => {
|
|
26
|
+
runCount++;
|
|
27
|
+
console.log(chalk.dim(` ─── Run #${runCount} at ${new Date().toLocaleTimeString()} ───`));
|
|
28
|
+
try {
|
|
29
|
+
// Dynamic import to get fresh module state
|
|
30
|
+
const { runEvaluation, BUILT_IN_SCORERS, createProvider } = await import("@evalguard/core");
|
|
31
|
+
const raw = fs.readFileSync(filePath, "utf-8");
|
|
32
|
+
const config = JSON.parse(raw);
|
|
33
|
+
const model = opts.model ?? config.model;
|
|
34
|
+
const providerName = opts.provider ?? config.provider ?? "openai";
|
|
35
|
+
const envMap = {
|
|
36
|
+
openai: "OPENAI_API_KEY", anthropic: "ANTHROPIC_API_KEY", gemini: "GEMINI_API_KEY",
|
|
37
|
+
};
|
|
38
|
+
const envKey = envMap[providerName] ?? `${providerName.toUpperCase()}_API_KEY`;
|
|
39
|
+
const apiKey = process.env[envKey] ?? "";
|
|
40
|
+
const provider = createProvider(providerName, apiKey);
|
|
41
|
+
const callLLM = async (prompt) => {
|
|
42
|
+
const r = await provider.chat([{ role: "user", content: prompt }], { model });
|
|
43
|
+
return r.content;
|
|
44
|
+
};
|
|
45
|
+
const result = await runEvaluation({
|
|
46
|
+
model,
|
|
47
|
+
prompt: config.prompt,
|
|
48
|
+
cases: config.cases,
|
|
49
|
+
scorers: config.scorers,
|
|
50
|
+
callLLM,
|
|
51
|
+
scorerOptions: config.scorerOptions,
|
|
52
|
+
});
|
|
53
|
+
const passed = result.cases.filter((c) => c.passed).length;
|
|
54
|
+
const failed = result.cases.length - passed;
|
|
55
|
+
const passColor = result.passRate >= 0.8 ? chalk.green : result.passRate >= 0.5 ? chalk.yellow : chalk.red;
|
|
56
|
+
console.log(` ${passColor("●")} ${chalk.green(`${passed} passed`)}, ${chalk.red(`${failed} failed`)} (${(result.passRate * 100).toFixed(1)}%) ${chalk.dim(`${result.totalLatency}ms`)}`);
|
|
57
|
+
}
|
|
58
|
+
catch (err) {
|
|
59
|
+
console.log(` ${chalk.red("✗")} Error: ${err instanceof Error ? err.message : String(err)}`);
|
|
60
|
+
}
|
|
61
|
+
console.log();
|
|
62
|
+
};
|
|
63
|
+
// Initial run
|
|
64
|
+
await runEval();
|
|
65
|
+
// Watch for changes
|
|
66
|
+
const watcher = fs.watch(filePath, () => {
|
|
67
|
+
if (timer)
|
|
68
|
+
clearTimeout(timer);
|
|
69
|
+
timer = setTimeout(runEval, debounceMs);
|
|
70
|
+
});
|
|
71
|
+
// Also watch directory for related files
|
|
72
|
+
const dir = path.dirname(filePath);
|
|
73
|
+
const dirWatcher = fs.watch(dir, (event, filename) => {
|
|
74
|
+
if (filename && filename.endsWith(".json") && filename !== path.basename(filePath)) {
|
|
75
|
+
// Check if the config references this file
|
|
76
|
+
if (timer)
|
|
77
|
+
clearTimeout(timer);
|
|
78
|
+
timer = setTimeout(runEval, debounceMs);
|
|
79
|
+
}
|
|
80
|
+
});
|
|
81
|
+
// Keep alive
|
|
82
|
+
process.on("SIGINT", () => {
|
|
83
|
+
watcher.close();
|
|
84
|
+
dirWatcher.close();
|
|
85
|
+
console.log(chalk.dim("\n Watch stopped."));
|
|
86
|
+
process.exit(0);
|
|
87
|
+
});
|
|
88
|
+
// Prevent Node from exiting
|
|
89
|
+
await new Promise(() => { });
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
//# sourceMappingURL=watch.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"watch.js","sourceRoot":"","sources":["../../src/commands/watch.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAE7B,MAAM,UAAU,aAAa,CAAC,OAAgB;IAC5C,OAAO;SACJ,OAAO,CAAC,OAAO,CAAC;SAChB,WAAW,CAAC,yCAAyC,CAAC;SACtD,QAAQ,CAAC,QAAQ,EAAE,+BAA+B,CAAC;SACnD,MAAM,CAAC,iBAAiB,EAAE,gBAAgB,CAAC;SAC3C,MAAM,CAAC,uBAAuB,EAAE,mBAAmB,CAAC;SACpD,MAAM,CAAC,iBAAiB,EAAE,yBAAyB,EAAE,MAAM,CAAC;SAC5D,MAAM,CAAC,KAAK,EAAE,IAAY,EAAE,IAA6D,EAAE,EAAE;QAC5F,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QACpC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC7B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,mBAAmB,QAAQ,EAAE,CAAC,CAAC,CAAC;YACxD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,MAAM,UAAU,GAAG,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QAC/C,IAAI,KAAK,GAAyC,IAAI,CAAC;QACvD,IAAI,QAAQ,GAAG,CAAC,CAAC;QAEjB,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,cAAc,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC,CAAC;QAC5E,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC,CAAC;QACjD,OAAO,CAAC,GAAG,EAAE,CAAC;QAEd,MAAM,OAAO,GAAG,KAAK,IAAI,EAAE;YACzB,QAAQ,EAAE,CAAC;YACX,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,cAAc,QAAQ,OAAO,IAAI,IAAI,EAAE,CAAC,kBAAkB,EAAE,MAAM,CAAC,CAAC,CAAC;YAE3F,IAAI,CAAC;gBACH,2CAA2C;gBAC3C,MAAM,EAAE,aAAa,EAAE,gBAAgB,EAAE,cAAc,EAAE,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAQ,CAAC;gBAEnG,MAAM,GAAG,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;gBAC/C,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;gBAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,MAAM,CAAC,KAAK,CAAC;gBACzC,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,IAAI,MAAM,CAAC,QAAQ,IAAI,QAAQ,CAAC;gBAElE,MAAM,MAAM,GAA2B;oBACrC,MAAM,EAAE,gBAAgB,EAAE,SAAS,EAAE,mBAAmB,EAAE,MAAM,EAAE,gBAAgB;iBACnF,CAAC;gBACF,MAAM,MAAM,GAAG,MAAM,CAAC,YAAY,CAAC,IAAI,GAAG,YAAY,CAAC,WAAW,EAAE,UAAU,CAAC;gBAC/E,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;gBACzC,MAAM,QAAQ,GAAG,cAAc,CAAC,YAAmB,EAAE,MAAM,CAAC,CAAC;gBAE7D,MAAM,OAAO,GAAG,KAAK,EAAE,MAAc,EAAE,EAAE;oBACvC,MAAM,CAAC,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC;oBAC9E,OAAO,CAAC,CAAC,OAAO,CAAC;gBACnB,CAAC,CAAC;gBAEF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC;oBACjC,KAAK;oBACL,MAAM,EAAE,MAAM,CAAC,MAAM;oBACrB,KAAK,EAAE,MAAM,CAAC,KAAK;oBACnB,OAAO,EAAE,MAAM,CAAC,OAAO;oBACvB,OAAO;oBACP,aAAa,EAAE,MAAM,CAAC,aAAa;iBACpC,CAAC,CAAC;gBAEH,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;gBAChE,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,MAAM,CAAC;gBAC5C,MAAM,SAAS,GAAG,MAAM,CAAC,QAAQ,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC;gBAE3G,OAAO,CAAC,GAAG,CAAC,KAAK,SAAS,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,GAAG,MAAM,SAAS,CAAC,KAAK,KAAK,CAAC,GAAG,CAAC,GAAG,MAAM,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,KAAK,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,YAAY,IAAI,CAAC,EAAE,CAAC,CAAC;YAC5L,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,OAAO,CAAC,GAAG,CAAC,KAAK,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAChG,CAAC;YACD,OAAO,CAAC,GAAG,EAAE,CAAC;QAChB,CAAC,CAAC;QAEF,cAAc;QACd,MAAM,OAAO,EAAE,CAAC;QAEhB,oBAAoB;QACpB,MAAM,OAAO,GAAG,EAAE,CAAC,KAAK,CAAC,QAAQ,EAAE,GAAG,EAAE;YACtC,IAAI,KAAK;gBAAE,YAAY,CAAC,KAAK,CAAC,CAAC;YAC/B,KAAK,GAAG,UAAU,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC;QAC1C,CAAC,CAAC,CAAC;QAEH,yCAAyC;QACzC,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACnC,MAAM,UAAU,GAAG,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE;YACnD,IAAI,QAAQ,IAAI,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,QAAQ,KAAK,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;gBACnF,2CAA2C;gBAC3C,IAAI,KAAK;oBAAE,YAAY,CAAC,KAAK,CAAC,CAAC;gBAC/B,KAAK,GAAG,UAAU,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC;YAC1C,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,aAAa;QACb,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,GAAG,EAAE;YACxB,OAAO,CAAC,KAAK,EAAE,CAAC;YAChB,UAAU,CAAC,KAAK,EAAE,CAAC;YACnB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC,CAAC;YAC7C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC,CAAC,CAAC;QAEH,4BAA4B;QAC5B,MAAM,IAAI,OAAO,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;IAC9B,CAAC,CAAC,CAAC;AACP,CAAC"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":""}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { Command } from "commander";
|
|
3
|
+
import chalk from "chalk";
|
|
4
|
+
import ora from "ora";
|
|
5
|
+
import * as fs from "fs";
|
|
6
|
+
import * as path from "path";
|
|
7
|
+
import * as os from "os";
|
|
8
|
+
import { createRequire } from "module";
|
|
9
|
+
import { registerInit, registerEvalLocal, registerScanLocal, registerGenerate, registerValidate, registerCompare, registerList, registerFirewall, registerWatch, registerGate, registerHistory, registerComplianceCheck, registerImportPromptfoo, registerShare, registerExport, registerRetry, registerDebug, registerLogs, registerDelete, registerModelScan, } from "./commands/index.js";
|
|
10
|
+
const require = createRequire(import.meta.url);
|
|
11
|
+
const pkg = require("../package.json");
|
|
12
|
+
const CONFIG_DIR = path.join(os.homedir(), ".evalguard");
|
|
13
|
+
const CONFIG_FILE = path.join(CONFIG_DIR, "config.json");
|
|
14
|
+
// SECURITY WARNING: API key is stored in plaintext at ~/.evalguard/config.json
|
|
15
|
+
// (mode 0o600). For production use, consider integrating an OS keyring
|
|
16
|
+
// (e.g., keytar, @aspect/credentials) or using environment variables instead.
|
|
17
|
+
/**
|
|
18
|
+
* Validates that a resolved file path is within the current working directory.
|
|
19
|
+
* Prevents path traversal attacks (e.g., ../../etc/passwd).
|
|
20
|
+
*/
|
|
21
|
+
function assertPathWithinCwd(filePath) {
|
|
22
|
+
const resolved = path.resolve(filePath);
|
|
23
|
+
// Resolve symlinks to prevent symlink-based traversal attacks
|
|
24
|
+
let canonical;
|
|
25
|
+
try {
|
|
26
|
+
canonical = fs.realpathSync(resolved);
|
|
27
|
+
}
|
|
28
|
+
catch {
|
|
29
|
+
canonical = resolved; // File may not exist yet (e.g., output paths)
|
|
30
|
+
}
|
|
31
|
+
const cwd = fs.realpathSync(process.cwd());
|
|
32
|
+
if (!canonical.startsWith(cwd + path.sep) && canonical !== cwd) {
|
|
33
|
+
throw new Error(`Security error: Path "${filePath}" resolves outside the current working directory.`);
|
|
34
|
+
}
|
|
35
|
+
return canonical;
|
|
36
|
+
}
|
|
37
|
+
function loadConfig() {
|
|
38
|
+
try {
|
|
39
|
+
if (fs.existsSync(CONFIG_FILE)) {
|
|
40
|
+
return JSON.parse(fs.readFileSync(CONFIG_FILE, "utf-8"));
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
catch (err) {
|
|
44
|
+
console.warn(`Warning: Failed to load config from ${CONFIG_FILE}: ${err.message}`);
|
|
45
|
+
}
|
|
46
|
+
return {};
|
|
47
|
+
}
|
|
48
|
+
function saveConfig(config) {
|
|
49
|
+
if (!fs.existsSync(CONFIG_DIR)) {
|
|
50
|
+
fs.mkdirSync(CONFIG_DIR, { recursive: true, mode: 0o700 });
|
|
51
|
+
}
|
|
52
|
+
fs.writeFileSync(CONFIG_FILE, JSON.stringify(config, null, 2), { mode: 0o600 });
|
|
53
|
+
}
|
|
54
|
+
function getClient() {
|
|
55
|
+
const config = loadConfig();
|
|
56
|
+
if (!config.apiKey) {
|
|
57
|
+
console.error(chalk.red("Not authenticated. Run `evalguard login` first."));
|
|
58
|
+
process.exit(1);
|
|
59
|
+
}
|
|
60
|
+
// Dynamic import not needed — use fetch directly
|
|
61
|
+
const baseUrl = config.baseUrl ?? "https://evalguard.ai/api/v1";
|
|
62
|
+
return {
|
|
63
|
+
apiKey: config.apiKey,
|
|
64
|
+
baseUrl,
|
|
65
|
+
projectId: config.projectId,
|
|
66
|
+
async request(urlPath, method, body) {
|
|
67
|
+
const res = await fetch(`${baseUrl}${urlPath}`, {
|
|
68
|
+
method,
|
|
69
|
+
headers: {
|
|
70
|
+
"Content-Type": "application/json",
|
|
71
|
+
Authorization: `Bearer ${config.apiKey}`,
|
|
72
|
+
},
|
|
73
|
+
body: body ? JSON.stringify(body) : undefined,
|
|
74
|
+
});
|
|
75
|
+
const data = await res.json().catch(() => ({ message: res.statusText }));
|
|
76
|
+
if (!res.ok) {
|
|
77
|
+
throw new Error(`API error ${res.status}: ${data.message ?? "Unknown error"}`);
|
|
78
|
+
}
|
|
79
|
+
return data;
|
|
80
|
+
},
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
// ─── Program ───
|
|
84
|
+
const program = new Command();
|
|
85
|
+
program
|
|
86
|
+
.name("evalguard")
|
|
87
|
+
.description(chalk.bold("EvalGuard CLI") +
|
|
88
|
+
" — The Operating System for AI Quality")
|
|
89
|
+
.version(pkg.version);
|
|
90
|
+
// ─── login ───
|
|
91
|
+
program
|
|
92
|
+
.command("login")
|
|
93
|
+
.description("Authenticate with your EvalGuard API key")
|
|
94
|
+
.option("--key <apiKey>", "API key (or set EVALGUARD_API_KEY env var)")
|
|
95
|
+
.option("--url <baseUrl>", "Custom API base URL")
|
|
96
|
+
.action((opts) => {
|
|
97
|
+
const apiKey = opts.key ?? process.env.EVALGUARD_API_KEY;
|
|
98
|
+
if (!apiKey) {
|
|
99
|
+
console.error(chalk.red("Provide an API key via --key or EVALGUARD_API_KEY environment variable."));
|
|
100
|
+
console.log(chalk.dim(" Get your API key at https://evalguard.ai/dashboard/settings"));
|
|
101
|
+
process.exit(1);
|
|
102
|
+
}
|
|
103
|
+
const config = loadConfig();
|
|
104
|
+
config.apiKey = apiKey;
|
|
105
|
+
if (opts.url) {
|
|
106
|
+
try {
|
|
107
|
+
const parsed = new URL(opts.url);
|
|
108
|
+
if (parsed.protocol !== 'https:' && !opts.url.includes('localhost') && !opts.url.includes('127.0.0.1')) {
|
|
109
|
+
console.error('Error: Base URL must use HTTPS for security. Use --url https://...');
|
|
110
|
+
process.exit(1);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
catch {
|
|
114
|
+
console.error('Error: Invalid URL format');
|
|
115
|
+
process.exit(1);
|
|
116
|
+
}
|
|
117
|
+
config.baseUrl = opts.url;
|
|
118
|
+
}
|
|
119
|
+
saveConfig(config);
|
|
120
|
+
console.log(chalk.green("✓") + " Authenticated successfully.");
|
|
121
|
+
console.log(chalk.dim(` Config saved to ${CONFIG_FILE}`));
|
|
122
|
+
});
|
|
123
|
+
// ─── logout ───
|
|
124
|
+
program
|
|
125
|
+
.command("logout")
|
|
126
|
+
.description("Remove stored credentials")
|
|
127
|
+
.action(() => {
|
|
128
|
+
if (fs.existsSync(CONFIG_FILE)) {
|
|
129
|
+
fs.unlinkSync(CONFIG_FILE);
|
|
130
|
+
}
|
|
131
|
+
console.log(chalk.green("✓") + " Logged out.");
|
|
132
|
+
});
|
|
133
|
+
// ─── init (registered from commands/init.ts) ───
|
|
134
|
+
registerInit(program);
|
|
135
|
+
// ─── eval ───
|
|
136
|
+
program
|
|
137
|
+
.command("eval")
|
|
138
|
+
.description("Run an evaluation from a config file")
|
|
139
|
+
.argument("[file]", "Path to eval config JSON/YAML file (default: evalguard.yaml)")
|
|
140
|
+
.option("--project <projectId>", "Override project ID")
|
|
141
|
+
.option("--model <model>", "Override model")
|
|
142
|
+
.option("--wait", "Wait for completion and show results", false)
|
|
143
|
+
.option("--local", "Run locally without API key (uses eval:local)", false)
|
|
144
|
+
.action(async (fileArg, opts) => {
|
|
145
|
+
// Auto-detect evalguard.yaml if no file specified
|
|
146
|
+
let file = fileArg ?? "";
|
|
147
|
+
if (!file) {
|
|
148
|
+
const yamlPath = path.join(process.cwd(), "evalguard.yaml");
|
|
149
|
+
const ymlPath = path.join(process.cwd(), "evalguard.yml");
|
|
150
|
+
const jsonPath = path.join(process.cwd(), "evalguard.config.json");
|
|
151
|
+
if (fs.existsSync(yamlPath)) {
|
|
152
|
+
file = yamlPath;
|
|
153
|
+
}
|
|
154
|
+
else if (fs.existsSync(ymlPath)) {
|
|
155
|
+
file = ymlPath;
|
|
156
|
+
}
|
|
157
|
+
else if (fs.existsSync(jsonPath)) {
|
|
158
|
+
file = jsonPath;
|
|
159
|
+
}
|
|
160
|
+
else {
|
|
161
|
+
console.log(chalk.red("No evalguard.yaml found in current directory."));
|
|
162
|
+
console.log(chalk.dim(" Run `npx evalguard init` to create one."));
|
|
163
|
+
process.exit(1);
|
|
164
|
+
}
|
|
165
|
+
console.log(chalk.dim(` Using ${path.basename(file)}`));
|
|
166
|
+
}
|
|
167
|
+
// Delegate to eval:local for --local flag or YAML files
|
|
168
|
+
if (opts.local || file.endsWith(".yaml") || file.endsWith(".yml")) {
|
|
169
|
+
const evalLocalArgs = [file];
|
|
170
|
+
if (opts.model)
|
|
171
|
+
evalLocalArgs.push("--model", opts.model);
|
|
172
|
+
// Re-dispatch to eval:local
|
|
173
|
+
await program.parseAsync(["node", "evalguard", "eval:local", ...evalLocalArgs]);
|
|
174
|
+
return;
|
|
175
|
+
}
|
|
176
|
+
const spinner = ora("Reading eval config...").start();
|
|
177
|
+
try {
|
|
178
|
+
const filePath = assertPathWithinCwd(file);
|
|
179
|
+
if (!fs.existsSync(filePath)) {
|
|
180
|
+
spinner.fail(`File not found: ${filePath}`);
|
|
181
|
+
process.exit(1);
|
|
182
|
+
}
|
|
183
|
+
const config = JSON.parse(fs.readFileSync(filePath, "utf-8"));
|
|
184
|
+
const client = getClient();
|
|
185
|
+
const projectId = opts.project ?? config.projectId ?? client.projectId;
|
|
186
|
+
if (!projectId) {
|
|
187
|
+
spinner.fail("No project ID. Pass --project or set in config file.");
|
|
188
|
+
process.exit(1);
|
|
189
|
+
}
|
|
190
|
+
spinner.text = `Running eval "${config.name}" with ${config.cases.length} cases...`;
|
|
191
|
+
const result = await client.request("/evals", "POST", {
|
|
192
|
+
projectId,
|
|
193
|
+
name: config.name,
|
|
194
|
+
model: opts.model ?? config.model,
|
|
195
|
+
prompt: config.prompt,
|
|
196
|
+
scorers: config.scorers,
|
|
197
|
+
cases: config.cases,
|
|
198
|
+
});
|
|
199
|
+
const data = result;
|
|
200
|
+
const evalData = (data.data ?? data);
|
|
201
|
+
spinner.succeed(`Eval created: ${chalk.cyan(evalData.id)}`);
|
|
202
|
+
console.log(chalk.dim(` Status: ${evalData.status}`));
|
|
203
|
+
if (opts.wait && evalData.id) {
|
|
204
|
+
const pollSpinner = ora("Waiting for results...").start();
|
|
205
|
+
let attempts = 0;
|
|
206
|
+
while (attempts < 60) {
|
|
207
|
+
await new Promise((r) => setTimeout(r, 2000));
|
|
208
|
+
const poll = (await client.request(`/evals/${evalData.id}`, "GET"));
|
|
209
|
+
const pollData = (poll.data ?? poll);
|
|
210
|
+
const status = pollData.status;
|
|
211
|
+
if (status === "passed" ||
|
|
212
|
+
status === "failed" ||
|
|
213
|
+
status === "error") {
|
|
214
|
+
pollSpinner.stop();
|
|
215
|
+
const scoreStr = pollData.score != null
|
|
216
|
+
? `${pollData.score}/${pollData.max_score}`
|
|
217
|
+
: "N/A";
|
|
218
|
+
const color = status === "passed" ? chalk.green : chalk.red;
|
|
219
|
+
console.log(`\n ${color("●")} ${chalk.bold(status.toUpperCase())} Score: ${chalk.bold(scoreStr)}`);
|
|
220
|
+
break;
|
|
221
|
+
}
|
|
222
|
+
attempts++;
|
|
223
|
+
}
|
|
224
|
+
if (attempts >= 60) {
|
|
225
|
+
pollSpinner.warn("Timed out waiting for results.");
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
catch (err) {
|
|
230
|
+
spinner.fail(`Eval failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
231
|
+
process.exit(1);
|
|
232
|
+
}
|
|
233
|
+
});
|
|
234
|
+
// ─── scan ───
|
|
235
|
+
program
|
|
236
|
+
.command("scan")
|
|
237
|
+
.description("Run a security scan from a config file")
|
|
238
|
+
.argument("<file>", "Path to scan config JSON file")
|
|
239
|
+
.option("--project <projectId>", "Override project ID")
|
|
240
|
+
.option("--model <model>", "Override model")
|
|
241
|
+
.option("--wait", "Wait for completion and show results", false)
|
|
242
|
+
.action(async (file, opts) => {
|
|
243
|
+
const spinner = ora("Reading scan config...").start();
|
|
244
|
+
try {
|
|
245
|
+
const filePath = assertPathWithinCwd(file);
|
|
246
|
+
if (!fs.existsSync(filePath)) {
|
|
247
|
+
spinner.fail(`File not found: ${filePath}`);
|
|
248
|
+
process.exit(1);
|
|
249
|
+
}
|
|
250
|
+
const config = JSON.parse(fs.readFileSync(filePath, "utf-8"));
|
|
251
|
+
const client = getClient();
|
|
252
|
+
const projectId = opts.project ?? config.projectId ?? client.projectId;
|
|
253
|
+
if (!projectId) {
|
|
254
|
+
spinner.fail("No project ID. Pass --project or set in config file.");
|
|
255
|
+
process.exit(1);
|
|
256
|
+
}
|
|
257
|
+
spinner.text = `Scanning ${config.attackTypes.length} attack types against ${opts.model ?? config.model}...`;
|
|
258
|
+
const result = await client.request("/security", "POST", {
|
|
259
|
+
projectId,
|
|
260
|
+
model: opts.model ?? config.model,
|
|
261
|
+
prompt: config.prompt,
|
|
262
|
+
attackTypes: config.attackTypes,
|
|
263
|
+
});
|
|
264
|
+
const data = result;
|
|
265
|
+
const scanData = (data.data ?? data);
|
|
266
|
+
spinner.succeed(`Scan created: ${chalk.cyan(scanData.id)}`);
|
|
267
|
+
console.log(chalk.dim(` Status: ${scanData.status}`));
|
|
268
|
+
if (opts.wait && scanData.id) {
|
|
269
|
+
const pollSpinner = ora("Waiting for results...").start();
|
|
270
|
+
let attempts = 0;
|
|
271
|
+
while (attempts < 60) {
|
|
272
|
+
await new Promise((r) => setTimeout(r, 2000));
|
|
273
|
+
const poll = (await client.request(`/security/${scanData.id}`, "GET"));
|
|
274
|
+
const pollData = (poll.data ?? poll);
|
|
275
|
+
const status = pollData.status;
|
|
276
|
+
if (status === "passed" ||
|
|
277
|
+
status === "failed" ||
|
|
278
|
+
status === "error") {
|
|
279
|
+
pollSpinner.stop();
|
|
280
|
+
const color = status === "passed" ? chalk.green : chalk.red;
|
|
281
|
+
console.log(`\n ${color("●")} ${chalk.bold(status.toUpperCase())}`);
|
|
282
|
+
if (pollData.security_score != null) {
|
|
283
|
+
console.log(` Security Score: ${chalk.bold(String(pollData.security_score))}%`);
|
|
284
|
+
}
|
|
285
|
+
break;
|
|
286
|
+
}
|
|
287
|
+
attempts++;
|
|
288
|
+
}
|
|
289
|
+
if (attempts >= 60) {
|
|
290
|
+
pollSpinner.warn("Timed out waiting for results.");
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
catch (err) {
|
|
295
|
+
spinner.fail(`Scan failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
296
|
+
process.exit(1);
|
|
297
|
+
}
|
|
298
|
+
});
|
|
299
|
+
// ─── whoami ───
|
|
300
|
+
program
|
|
301
|
+
.command("whoami")
|
|
302
|
+
.description("Show current authentication status")
|
|
303
|
+
.action(() => {
|
|
304
|
+
const config = loadConfig();
|
|
305
|
+
if (config.apiKey) {
|
|
306
|
+
const masked = config.apiKey.substring(0, 7) +
|
|
307
|
+
"..." +
|
|
308
|
+
config.apiKey.substring(config.apiKey.length - 4);
|
|
309
|
+
console.log(chalk.green("✓") + " Authenticated");
|
|
310
|
+
console.log(chalk.dim(` API Key: ${masked}`));
|
|
311
|
+
console.log(chalk.dim(` Base URL: ${config.baseUrl ?? "https://evalguard.ai/api/v1"}`));
|
|
312
|
+
if (config.projectId) {
|
|
313
|
+
console.log(chalk.dim(` Project: ${config.projectId}`));
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
else {
|
|
317
|
+
console.log(chalk.yellow("Not authenticated."));
|
|
318
|
+
console.log(chalk.dim(" Run `evalguard login --key <your-api-key>`"));
|
|
319
|
+
}
|
|
320
|
+
});
|
|
321
|
+
// ─── Phase 6: New Commands ───
|
|
322
|
+
registerEvalLocal(program);
|
|
323
|
+
registerScanLocal(program);
|
|
324
|
+
registerGenerate(program);
|
|
325
|
+
registerValidate(program);
|
|
326
|
+
registerCompare(program);
|
|
327
|
+
registerList(program);
|
|
328
|
+
registerFirewall(program);
|
|
329
|
+
registerWatch(program);
|
|
330
|
+
registerGate(program);
|
|
331
|
+
registerHistory(program);
|
|
332
|
+
registerComplianceCheck(program);
|
|
333
|
+
registerImportPromptfoo(program);
|
|
334
|
+
registerShare(program);
|
|
335
|
+
registerExport(program);
|
|
336
|
+
registerRetry(program);
|
|
337
|
+
registerDebug(program);
|
|
338
|
+
registerLogs(program);
|
|
339
|
+
registerDelete(program);
|
|
340
|
+
registerModelScan(program);
|
|
341
|
+
program.parse();
|
|
342
|
+
//# sourceMappingURL=index.js.map
|