trickle-cli 0.1.189 → 0.1.191
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/eval.d.ts +1 -0
- package/dist/commands/eval.js +17 -1
- package/dist/commands/security.d.ts +1 -6
- package/dist/commands/security.js +95 -1
- package/dist/index.js +1 -0
- package/package.json +1 -1
- package/src/commands/eval.ts +19 -2
- package/src/commands/security.ts +101 -3
- package/src/index.ts +1 -0
package/dist/commands/eval.d.ts
CHANGED
package/dist/commands/eval.js
CHANGED
|
@@ -76,7 +76,15 @@ function evalCommand(opts) {
|
|
|
76
76
|
}
|
|
77
77
|
const result = scoreRun(agentEvents, llmCalls, errors, mcpCalls);
|
|
78
78
|
if (opts.json) {
|
|
79
|
-
|
|
79
|
+
const threshold = opts.failUnder ? parseInt(opts.failUnder, 10) : undefined;
|
|
80
|
+
const output = {
|
|
81
|
+
...result,
|
|
82
|
+
...(threshold !== undefined ? { threshold, passed: result.overallScore >= threshold } : {}),
|
|
83
|
+
};
|
|
84
|
+
console.log(JSON.stringify(output, null, 2));
|
|
85
|
+
if (threshold !== undefined && result.overallScore < threshold) {
|
|
86
|
+
process.exit(1);
|
|
87
|
+
}
|
|
80
88
|
return;
|
|
81
89
|
}
|
|
82
90
|
// Pretty print
|
|
@@ -104,6 +112,14 @@ function evalCommand(opts) {
|
|
|
104
112
|
}
|
|
105
113
|
}
|
|
106
114
|
console.log('');
|
|
115
|
+
// CI mode: exit with non-zero if score below threshold
|
|
116
|
+
if (opts.failUnder) {
|
|
117
|
+
const threshold = parseInt(opts.failUnder, 10);
|
|
118
|
+
if (!isNaN(threshold) && result.overallScore < threshold) {
|
|
119
|
+
console.log(chalk_1.default.red(` FAIL: Score ${result.overallScore} is below threshold ${threshold}`));
|
|
120
|
+
process.exit(1);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
107
123
|
}
|
|
108
124
|
function printDimension(name, dim) {
|
|
109
125
|
const bar = renderBar(dim.score);
|
|
@@ -21,12 +21,7 @@ interface SecurityFinding {
|
|
|
21
21
|
}
|
|
22
22
|
export interface SecurityResult {
|
|
23
23
|
findings: SecurityFinding[];
|
|
24
|
-
scanned:
|
|
25
|
-
variables: number;
|
|
26
|
-
queries: number;
|
|
27
|
-
logs: number;
|
|
28
|
-
observations: number;
|
|
29
|
-
};
|
|
24
|
+
scanned: Record<string, number>;
|
|
30
25
|
summary: {
|
|
31
26
|
critical: number;
|
|
32
27
|
warning: number;
|
|
@@ -154,6 +154,90 @@ function runSecurityScan(opts) {
|
|
|
154
154
|
if (o.sampleOutput)
|
|
155
155
|
findings.push(...scanValue(o.sampleOutput, 'function_output', `${o.module}.${o.functionName}`));
|
|
156
156
|
}
|
|
157
|
+
// ── Agent Security: The "Lethal Trifecta" ──
|
|
158
|
+
// Scan LLM calls for prompt injection and data exfiltration
|
|
159
|
+
const llmCalls = readJsonl(path.join(trickleDir, 'llm.jsonl'));
|
|
160
|
+
for (const c of llmCalls) {
|
|
161
|
+
// Prompt injection patterns in LLM inputs
|
|
162
|
+
const input = String(c.inputPreview || '').toLowerCase();
|
|
163
|
+
const INJECTION_PATTERNS = [
|
|
164
|
+
{ pattern: /ignore\s+(all\s+)?previous\s+instructions/i, name: 'Instruction override' },
|
|
165
|
+
{ pattern: /you\s+are\s+now\s+a\s+/i, name: 'Role hijacking' },
|
|
166
|
+
{ pattern: /system\s*:\s*you\s+are/i, name: 'System prompt injection' },
|
|
167
|
+
{ pattern: /\bdo\s+not\s+follow\s+(any|the)\s+(previous|above)/i, name: 'Instruction bypass' },
|
|
168
|
+
{ pattern: /forget\s+(all|everything|your)\s+(previous|prior|instructions)/i, name: 'Memory wipe attempt' },
|
|
169
|
+
{ pattern: /pretend\s+you\s+(are|have)\s+(no|unrestricted)/i, name: 'Jailbreak attempt' },
|
|
170
|
+
];
|
|
171
|
+
for (const inj of INJECTION_PATTERNS) {
|
|
172
|
+
if (inj.pattern.test(c.inputPreview || '') || inj.pattern.test(c.systemPrompt || '')) {
|
|
173
|
+
findings.push({
|
|
174
|
+
severity: 'critical', category: 'prompt_injection',
|
|
175
|
+
message: `${inj.name} detected in LLM input`,
|
|
176
|
+
source: 'llm_call', location: c.model || 'unknown',
|
|
177
|
+
evidence: (c.inputPreview || '').substring(0, 100),
|
|
178
|
+
});
|
|
179
|
+
break;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
// Secrets in LLM outputs (data exfiltration)
|
|
183
|
+
const output = String(c.outputPreview || '');
|
|
184
|
+
if (output) {
|
|
185
|
+
const outputFindings = scanValue(output, 'llm_output', `${c.provider}/${c.model}`);
|
|
186
|
+
for (const f of outputFindings) {
|
|
187
|
+
f.category = 'data_exfiltration';
|
|
188
|
+
f.message = `LLM output contains ${f.message.toLowerCase()}`;
|
|
189
|
+
findings.push(f);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
// Secrets in LLM inputs
|
|
193
|
+
const inputStr = String(c.inputPreview || '');
|
|
194
|
+
if (inputStr) {
|
|
195
|
+
const inputFindings = scanValue(inputStr, 'llm_input', `${c.provider}/${c.model}`);
|
|
196
|
+
for (const f of inputFindings) {
|
|
197
|
+
f.message = `Secret passed to LLM: ${f.message}`;
|
|
198
|
+
findings.push(f);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
// Scan agent events for unauthorized tool calls
|
|
203
|
+
const agentEvents = readJsonl(path.join(trickleDir, 'agents.jsonl'));
|
|
204
|
+
const toolErrors = agentEvents.filter(e => e.event === 'tool_error');
|
|
205
|
+
const toolStarts = agentEvents.filter(e => e.event === 'tool_start');
|
|
206
|
+
// Detect privilege escalation: agent calling dangerous tools
|
|
207
|
+
const DANGEROUS_TOOLS = ['Bash', 'bash', 'shell', 'exec', 'eval', 'rm', 'sudo', 'chmod', 'kill'];
|
|
208
|
+
for (const t of toolStarts) {
|
|
209
|
+
const toolName = String(t.tool || '');
|
|
210
|
+
if (DANGEROUS_TOOLS.some(d => toolName.toLowerCase().includes(d.toLowerCase()))) {
|
|
211
|
+
// Check if tool input contains dangerous commands
|
|
212
|
+
const toolInput = String(t.toolInput || '').toLowerCase();
|
|
213
|
+
if (toolInput.includes('rm -rf') || toolInput.includes('sudo') || toolInput.includes('chmod 777') ||
|
|
214
|
+
toolInput.includes('curl') && toolInput.includes('|') || toolInput.includes('wget') && toolInput.includes('|')) {
|
|
215
|
+
findings.push({
|
|
216
|
+
severity: 'critical', category: 'privilege_escalation',
|
|
217
|
+
message: `Agent executed dangerous command via ${toolName}`,
|
|
218
|
+
source: 'agent_tool', location: t.framework || 'agent',
|
|
219
|
+
evidence: (t.toolInput || '').substring(0, 100),
|
|
220
|
+
});
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
// Scan MCP tool calls for secrets in args/responses
|
|
225
|
+
const mcpCalls = readJsonl(path.join(trickleDir, 'mcp.jsonl'));
|
|
226
|
+
for (const m of mcpCalls) {
|
|
227
|
+
if (m.args) {
|
|
228
|
+
const argsStr = typeof m.args === 'string' ? m.args : JSON.stringify(m.args);
|
|
229
|
+
const argsFindings = scanValue(argsStr, 'mcp_tool_args', `MCP: ${m.tool}`);
|
|
230
|
+
findings.push(...argsFindings);
|
|
231
|
+
}
|
|
232
|
+
if (m.resultPreview) {
|
|
233
|
+
const resultFindings = scanValue(m.resultPreview, 'mcp_tool_result', `MCP: ${m.tool}`);
|
|
234
|
+
for (const f of resultFindings) {
|
|
235
|
+
f.category = 'data_exfiltration';
|
|
236
|
+
f.message = `MCP tool response contains ${f.message.toLowerCase()}`;
|
|
237
|
+
findings.push(f);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
}
|
|
157
241
|
// Deduplicate
|
|
158
242
|
const seen = new Set();
|
|
159
243
|
const deduped = findings.filter(f => {
|
|
@@ -168,6 +252,9 @@ function runSecurityScan(opts) {
|
|
|
168
252
|
warning: deduped.filter(f => f.severity === 'warning').length,
|
|
169
253
|
info: deduped.filter(f => f.severity === 'info').length,
|
|
170
254
|
};
|
|
255
|
+
scanned.llmCalls = llmCalls.length;
|
|
256
|
+
scanned.agentEvents = agentEvents.length;
|
|
257
|
+
scanned.mcpCalls = mcpCalls.length;
|
|
171
258
|
const result = { findings: deduped, scanned, summary };
|
|
172
259
|
if (opts?.json) {
|
|
173
260
|
console.log(JSON.stringify(result, null, 2));
|
|
@@ -177,7 +264,14 @@ function runSecurityScan(opts) {
|
|
|
177
264
|
console.log('');
|
|
178
265
|
console.log(chalk_1.default.bold(' trickle security'));
|
|
179
266
|
console.log(chalk_1.default.gray(' ' + '─'.repeat(50)));
|
|
180
|
-
|
|
267
|
+
const scanParts = [`${scanned.variables} vars`, `${scanned.queries} queries`, `${scanned.logs} logs`, `${scanned.observations} functions`];
|
|
268
|
+
if (scanned.llmCalls)
|
|
269
|
+
scanParts.push(`${scanned.llmCalls} LLM calls`);
|
|
270
|
+
if (scanned.agentEvents)
|
|
271
|
+
scanParts.push(`${scanned.agentEvents} agent events`);
|
|
272
|
+
if (scanned.mcpCalls)
|
|
273
|
+
scanParts.push(`${scanned.mcpCalls} MCP calls`);
|
|
274
|
+
console.log(chalk_1.default.gray(` Scanned: ${scanParts.join(', ')}`));
|
|
181
275
|
if (deduped.length === 0) {
|
|
182
276
|
console.log(chalk_1.default.green(' No security issues found. ✓'));
|
|
183
277
|
}
|
package/dist/index.js
CHANGED
|
@@ -918,6 +918,7 @@ program
|
|
|
918
918
|
.command("eval")
|
|
919
919
|
.description("Score agent runs on reliability — completion, errors, cost efficiency, tool reliability, latency")
|
|
920
920
|
.option("--json", "Output raw JSON for CI integration")
|
|
921
|
+
.option("--fail-under <score>", "Exit with code 1 if overall score is below this threshold (0-100, for CI)")
|
|
921
922
|
.action(async (opts) => {
|
|
922
923
|
const { evalCommand } = await Promise.resolve().then(() => __importStar(require("./commands/eval")));
|
|
923
924
|
evalCommand(opts);
|
package/package.json
CHANGED
package/src/commands/eval.ts
CHANGED
|
@@ -36,7 +36,7 @@ function readJsonl(fp: string): any[] {
|
|
|
36
36
|
.map(l => { try { return JSON.parse(l); } catch { return null; } }).filter(Boolean);
|
|
37
37
|
}
|
|
38
38
|
|
|
39
|
-
export function evalCommand(opts: { json?: boolean }): void {
|
|
39
|
+
export function evalCommand(opts: { json?: boolean; failUnder?: string }): void {
|
|
40
40
|
const dir = process.env.TRICKLE_LOCAL_DIR || path.join(process.cwd(), '.trickle');
|
|
41
41
|
const agentEvents = readJsonl(path.join(dir, 'agents.jsonl'));
|
|
42
42
|
const llmCalls = readJsonl(path.join(dir, 'llm.jsonl'));
|
|
@@ -51,7 +51,15 @@ export function evalCommand(opts: { json?: boolean }): void {
|
|
|
51
51
|
const result = scoreRun(agentEvents, llmCalls, errors, mcpCalls);
|
|
52
52
|
|
|
53
53
|
if (opts.json) {
|
|
54
|
-
|
|
54
|
+
const threshold = opts.failUnder ? parseInt(opts.failUnder, 10) : undefined;
|
|
55
|
+
const output = {
|
|
56
|
+
...result,
|
|
57
|
+
...(threshold !== undefined ? { threshold, passed: result.overallScore >= threshold } : {}),
|
|
58
|
+
};
|
|
59
|
+
console.log(JSON.stringify(output, null, 2));
|
|
60
|
+
if (threshold !== undefined && result.overallScore < threshold) {
|
|
61
|
+
process.exit(1);
|
|
62
|
+
}
|
|
55
63
|
return;
|
|
56
64
|
}
|
|
57
65
|
|
|
@@ -85,6 +93,15 @@ export function evalCommand(opts: { json?: boolean }): void {
|
|
|
85
93
|
}
|
|
86
94
|
|
|
87
95
|
console.log('');
|
|
96
|
+
|
|
97
|
+
// CI mode: exit with non-zero if score below threshold
|
|
98
|
+
if (opts.failUnder) {
|
|
99
|
+
const threshold = parseInt(opts.failUnder, 10);
|
|
100
|
+
if (!isNaN(threshold) && result.overallScore < threshold) {
|
|
101
|
+
console.log(chalk.red(` FAIL: Score ${result.overallScore} is below threshold ${threshold}`));
|
|
102
|
+
process.exit(1);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
88
105
|
}
|
|
89
106
|
|
|
90
107
|
function printDimension(name: string, dim: { score: number; detail: string }): void {
|
package/src/commands/security.ts
CHANGED
|
@@ -82,14 +82,14 @@ function scanValue(value: unknown, source: string, location: string): SecurityFi
|
|
|
82
82
|
|
|
83
83
|
export interface SecurityResult {
|
|
84
84
|
findings: SecurityFinding[];
|
|
85
|
-
scanned:
|
|
85
|
+
scanned: Record<string, number>;
|
|
86
86
|
summary: { critical: number; warning: number; info: number };
|
|
87
87
|
}
|
|
88
88
|
|
|
89
89
|
export function runSecurityScan(opts?: { dir?: string; json?: boolean }): SecurityResult {
|
|
90
90
|
const trickleDir = opts?.dir || process.env.TRICKLE_LOCAL_DIR || path.join(process.cwd(), '.trickle');
|
|
91
91
|
const findings: SecurityFinding[] = [];
|
|
92
|
-
const scanned = { variables: 0, queries: 0, logs: 0, observations: 0 };
|
|
92
|
+
const scanned: Record<string, number> = { variables: 0, queries: 0, logs: 0, observations: 0 };
|
|
93
93
|
|
|
94
94
|
// Scan variables
|
|
95
95
|
const variables = readJsonl(path.join(trickleDir, 'variables.jsonl'));
|
|
@@ -133,6 +133,97 @@ export function runSecurityScan(opts?: { dir?: string; json?: boolean }): Securi
|
|
|
133
133
|
if (o.sampleOutput) findings.push(...scanValue(o.sampleOutput, 'function_output', `${o.module}.${o.functionName}`));
|
|
134
134
|
}
|
|
135
135
|
|
|
136
|
+
// ── Agent Security: The "Lethal Trifecta" ──
|
|
137
|
+
|
|
138
|
+
// Scan LLM calls for prompt injection and data exfiltration
|
|
139
|
+
const llmCalls = readJsonl(path.join(trickleDir, 'llm.jsonl'));
|
|
140
|
+
for (const c of llmCalls) {
|
|
141
|
+
// Prompt injection patterns in LLM inputs
|
|
142
|
+
const input = String(c.inputPreview || '').toLowerCase();
|
|
143
|
+
const INJECTION_PATTERNS = [
|
|
144
|
+
{ pattern: /ignore\s+(all\s+)?previous\s+instructions/i, name: 'Instruction override' },
|
|
145
|
+
{ pattern: /you\s+are\s+now\s+a\s+/i, name: 'Role hijacking' },
|
|
146
|
+
{ pattern: /system\s*:\s*you\s+are/i, name: 'System prompt injection' },
|
|
147
|
+
{ pattern: /\bdo\s+not\s+follow\s+(any|the)\s+(previous|above)/i, name: 'Instruction bypass' },
|
|
148
|
+
{ pattern: /forget\s+(all|everything|your)\s+(previous|prior|instructions)/i, name: 'Memory wipe attempt' },
|
|
149
|
+
{ pattern: /pretend\s+you\s+(are|have)\s+(no|unrestricted)/i, name: 'Jailbreak attempt' },
|
|
150
|
+
];
|
|
151
|
+
for (const inj of INJECTION_PATTERNS) {
|
|
152
|
+
if (inj.pattern.test(c.inputPreview || '') || inj.pattern.test(c.systemPrompt || '')) {
|
|
153
|
+
findings.push({
|
|
154
|
+
severity: 'critical', category: 'prompt_injection',
|
|
155
|
+
message: `${inj.name} detected in LLM input`,
|
|
156
|
+
source: 'llm_call', location: c.model || 'unknown',
|
|
157
|
+
evidence: (c.inputPreview || '').substring(0, 100),
|
|
158
|
+
});
|
|
159
|
+
break;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Secrets in LLM outputs (data exfiltration)
|
|
164
|
+
const output = String(c.outputPreview || '');
|
|
165
|
+
if (output) {
|
|
166
|
+
const outputFindings = scanValue(output, 'llm_output', `${c.provider}/${c.model}`);
|
|
167
|
+
for (const f of outputFindings) {
|
|
168
|
+
f.category = 'data_exfiltration';
|
|
169
|
+
f.message = `LLM output contains ${f.message.toLowerCase()}`;
|
|
170
|
+
findings.push(f);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// Secrets in LLM inputs
|
|
175
|
+
const inputStr = String(c.inputPreview || '');
|
|
176
|
+
if (inputStr) {
|
|
177
|
+
const inputFindings = scanValue(inputStr, 'llm_input', `${c.provider}/${c.model}`);
|
|
178
|
+
for (const f of inputFindings) {
|
|
179
|
+
f.message = `Secret passed to LLM: ${f.message}`;
|
|
180
|
+
findings.push(f);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// Scan agent events for unauthorized tool calls
|
|
186
|
+
const agentEvents = readJsonl(path.join(trickleDir, 'agents.jsonl'));
|
|
187
|
+
const toolErrors = agentEvents.filter(e => e.event === 'tool_error');
|
|
188
|
+
const toolStarts = agentEvents.filter(e => e.event === 'tool_start');
|
|
189
|
+
|
|
190
|
+
// Detect privilege escalation: agent calling dangerous tools
|
|
191
|
+
const DANGEROUS_TOOLS = ['Bash', 'bash', 'shell', 'exec', 'eval', 'rm', 'sudo', 'chmod', 'kill'];
|
|
192
|
+
for (const t of toolStarts) {
|
|
193
|
+
const toolName = String(t.tool || '');
|
|
194
|
+
if (DANGEROUS_TOOLS.some(d => toolName.toLowerCase().includes(d.toLowerCase()))) {
|
|
195
|
+
// Check if tool input contains dangerous commands
|
|
196
|
+
const toolInput = String(t.toolInput || '').toLowerCase();
|
|
197
|
+
if (toolInput.includes('rm -rf') || toolInput.includes('sudo') || toolInput.includes('chmod 777') ||
|
|
198
|
+
toolInput.includes('curl') && toolInput.includes('|') || toolInput.includes('wget') && toolInput.includes('|')) {
|
|
199
|
+
findings.push({
|
|
200
|
+
severity: 'critical', category: 'privilege_escalation',
|
|
201
|
+
message: `Agent executed dangerous command via ${toolName}`,
|
|
202
|
+
source: 'agent_tool', location: t.framework || 'agent',
|
|
203
|
+
evidence: (t.toolInput || '').substring(0, 100),
|
|
204
|
+
});
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Scan MCP tool calls for secrets in args/responses
|
|
210
|
+
const mcpCalls = readJsonl(path.join(trickleDir, 'mcp.jsonl'));
|
|
211
|
+
for (const m of mcpCalls) {
|
|
212
|
+
if (m.args) {
|
|
213
|
+
const argsStr = typeof m.args === 'string' ? m.args : JSON.stringify(m.args);
|
|
214
|
+
const argsFindings = scanValue(argsStr, 'mcp_tool_args', `MCP: ${m.tool}`);
|
|
215
|
+
findings.push(...argsFindings);
|
|
216
|
+
}
|
|
217
|
+
if (m.resultPreview) {
|
|
218
|
+
const resultFindings = scanValue(m.resultPreview, 'mcp_tool_result', `MCP: ${m.tool}`);
|
|
219
|
+
for (const f of resultFindings) {
|
|
220
|
+
f.category = 'data_exfiltration';
|
|
221
|
+
f.message = `MCP tool response contains ${f.message.toLowerCase()}`;
|
|
222
|
+
findings.push(f);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
136
227
|
// Deduplicate
|
|
137
228
|
const seen = new Set<string>();
|
|
138
229
|
const deduped = findings.filter(f => {
|
|
@@ -148,6 +239,9 @@ export function runSecurityScan(opts?: { dir?: string; json?: boolean }): Securi
|
|
|
148
239
|
info: deduped.filter(f => f.severity === 'info').length,
|
|
149
240
|
};
|
|
150
241
|
|
|
242
|
+
scanned.llmCalls = llmCalls.length;
|
|
243
|
+
scanned.agentEvents = agentEvents.length;
|
|
244
|
+
scanned.mcpCalls = mcpCalls.length;
|
|
151
245
|
const result: SecurityResult = { findings: deduped, scanned, summary };
|
|
152
246
|
|
|
153
247
|
if (opts?.json) {
|
|
@@ -159,7 +253,11 @@ export function runSecurityScan(opts?: { dir?: string; json?: boolean }): Securi
|
|
|
159
253
|
console.log('');
|
|
160
254
|
console.log(chalk.bold(' trickle security'));
|
|
161
255
|
console.log(chalk.gray(' ' + '─'.repeat(50)));
|
|
162
|
-
|
|
256
|
+
const scanParts = [`${scanned.variables} vars`, `${scanned.queries} queries`, `${scanned.logs} logs`, `${scanned.observations} functions`];
|
|
257
|
+
if (scanned.llmCalls) scanParts.push(`${scanned.llmCalls} LLM calls`);
|
|
258
|
+
if (scanned.agentEvents) scanParts.push(`${scanned.agentEvents} agent events`);
|
|
259
|
+
if (scanned.mcpCalls) scanParts.push(`${scanned.mcpCalls} MCP calls`);
|
|
260
|
+
console.log(chalk.gray(` Scanned: ${scanParts.join(', ')}`));
|
|
163
261
|
|
|
164
262
|
if (deduped.length === 0) {
|
|
165
263
|
console.log(chalk.green(' No security issues found. ✓'));
|
package/src/index.ts
CHANGED
|
@@ -951,6 +951,7 @@ program
|
|
|
951
951
|
.command("eval")
|
|
952
952
|
.description("Score agent runs on reliability — completion, errors, cost efficiency, tool reliability, latency")
|
|
953
953
|
.option("--json", "Output raw JSON for CI integration")
|
|
954
|
+
.option("--fail-under <score>", "Exit with code 1 if overall score is below this threshold (0-100, for CI)")
|
|
954
955
|
.action(async (opts) => {
|
|
955
956
|
const { evalCommand } = await import("./commands/eval");
|
|
956
957
|
evalCommand(opts);
|