trickle-cli 0.1.189 → 0.1.190
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/eval.d.ts +1 -0
- package/dist/commands/eval.js +17 -1
- package/dist/index.js +1 -0
- package/package.json +1 -1
- package/src/commands/eval.ts +19 -2
- package/src/index.ts +1 -0
package/dist/commands/eval.d.ts
CHANGED
package/dist/commands/eval.js
CHANGED
|
@@ -76,7 +76,15 @@ function evalCommand(opts) {
|
|
|
76
76
|
}
|
|
77
77
|
const result = scoreRun(agentEvents, llmCalls, errors, mcpCalls);
|
|
78
78
|
if (opts.json) {
|
|
79
|
-
|
|
79
|
+
const threshold = opts.failUnder ? parseInt(opts.failUnder, 10) : undefined;
|
|
80
|
+
const output = {
|
|
81
|
+
...result,
|
|
82
|
+
...(threshold !== undefined ? { threshold, passed: result.overallScore >= threshold } : {}),
|
|
83
|
+
};
|
|
84
|
+
console.log(JSON.stringify(output, null, 2));
|
|
85
|
+
if (threshold !== undefined && result.overallScore < threshold) {
|
|
86
|
+
process.exit(1);
|
|
87
|
+
}
|
|
80
88
|
return;
|
|
81
89
|
}
|
|
82
90
|
// Pretty print
|
|
@@ -104,6 +112,14 @@ function evalCommand(opts) {
|
|
|
104
112
|
}
|
|
105
113
|
}
|
|
106
114
|
console.log('');
|
|
115
|
+
// CI mode: exit with non-zero if score below threshold
|
|
116
|
+
if (opts.failUnder) {
|
|
117
|
+
const threshold = parseInt(opts.failUnder, 10);
|
|
118
|
+
if (!isNaN(threshold) && result.overallScore < threshold) {
|
|
119
|
+
console.log(chalk_1.default.red(` FAIL: Score ${result.overallScore} is below threshold ${threshold}`));
|
|
120
|
+
process.exit(1);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
107
123
|
}
|
|
108
124
|
function printDimension(name, dim) {
|
|
109
125
|
const bar = renderBar(dim.score);
|
package/dist/index.js
CHANGED
|
@@ -918,6 +918,7 @@ program
|
|
|
918
918
|
.command("eval")
|
|
919
919
|
.description("Score agent runs on reliability — completion, errors, cost efficiency, tool reliability, latency")
|
|
920
920
|
.option("--json", "Output raw JSON for CI integration")
|
|
921
|
+
.option("--fail-under <score>", "Exit with code 1 if overall score is below this threshold (0-100, for CI)")
|
|
921
922
|
.action(async (opts) => {
|
|
922
923
|
const { evalCommand } = await Promise.resolve().then(() => __importStar(require("./commands/eval")));
|
|
923
924
|
evalCommand(opts);
|
package/package.json
CHANGED
package/src/commands/eval.ts
CHANGED
|
@@ -36,7 +36,7 @@ function readJsonl(fp: string): any[] {
|
|
|
36
36
|
.map(l => { try { return JSON.parse(l); } catch { return null; } }).filter(Boolean);
|
|
37
37
|
}
|
|
38
38
|
|
|
39
|
-
export function evalCommand(opts: { json?: boolean }): void {
|
|
39
|
+
export function evalCommand(opts: { json?: boolean; failUnder?: string }): void {
|
|
40
40
|
const dir = process.env.TRICKLE_LOCAL_DIR || path.join(process.cwd(), '.trickle');
|
|
41
41
|
const agentEvents = readJsonl(path.join(dir, 'agents.jsonl'));
|
|
42
42
|
const llmCalls = readJsonl(path.join(dir, 'llm.jsonl'));
|
|
@@ -51,7 +51,15 @@ export function evalCommand(opts: { json?: boolean }): void {
|
|
|
51
51
|
const result = scoreRun(agentEvents, llmCalls, errors, mcpCalls);
|
|
52
52
|
|
|
53
53
|
if (opts.json) {
|
|
54
|
-
|
|
54
|
+
const threshold = opts.failUnder ? parseInt(opts.failUnder, 10) : undefined;
|
|
55
|
+
const output = {
|
|
56
|
+
...result,
|
|
57
|
+
...(threshold !== undefined ? { threshold, passed: result.overallScore >= threshold } : {}),
|
|
58
|
+
};
|
|
59
|
+
console.log(JSON.stringify(output, null, 2));
|
|
60
|
+
if (threshold !== undefined && result.overallScore < threshold) {
|
|
61
|
+
process.exit(1);
|
|
62
|
+
}
|
|
55
63
|
return;
|
|
56
64
|
}
|
|
57
65
|
|
|
@@ -85,6 +93,15 @@ export function evalCommand(opts: { json?: boolean }): void {
|
|
|
85
93
|
}
|
|
86
94
|
|
|
87
95
|
console.log('');
|
|
96
|
+
|
|
97
|
+
// CI mode: exit with non-zero if score below threshold
|
|
98
|
+
if (opts.failUnder) {
|
|
99
|
+
const threshold = parseInt(opts.failUnder, 10);
|
|
100
|
+
if (!isNaN(threshold) && result.overallScore < threshold) {
|
|
101
|
+
console.log(chalk.red(` FAIL: Score ${result.overallScore} is below threshold ${threshold}`));
|
|
102
|
+
process.exit(1);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
88
105
|
}
|
|
89
106
|
|
|
90
107
|
function printDimension(name: string, dim: { score: number; detail: string }): void {
|
package/src/index.ts
CHANGED
|
@@ -951,6 +951,7 @@ program
|
|
|
951
951
|
.command("eval")
|
|
952
952
|
.description("Score agent runs on reliability — completion, errors, cost efficiency, tool reliability, latency")
|
|
953
953
|
.option("--json", "Output raw JSON for CI integration")
|
|
954
|
+
.option("--fail-under <score>", "Exit with code 1 if overall score is below this threshold (0-100, for CI)")
|
|
954
955
|
.action(async (opts) => {
|
|
955
956
|
const { evalCommand } = await import("./commands/eval");
|
|
956
957
|
evalCommand(opts);
|