@wayai/cli 0.2.38 → 0.2.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/eval-results.d.ts +10 -0
- package/dist/commands/eval-results.js +158 -0
- package/dist/commands/eval-results.js.map +1 -0
- package/dist/commands/evals.d.ts +9 -0
- package/dist/commands/evals.js +40 -0
- package/dist/commands/evals.js.map +1 -0
- package/dist/commands/run-eval.d.ts +11 -0
- package/dist/commands/run-eval.js +85 -0
- package/dist/commands/run-eval.js.map +1 -0
- package/dist/index.js +25 -0
- package/dist/index.js.map +1 -1
- package/dist/lib/api-client.d.ts +127 -0
- package/dist/lib/api-client.js +35 -0
- package/dist/lib/api-client.js.map +1 -1
- package/dist/lib/eval-format.d.ts +27 -0
- package/dist/lib/eval-format.js +32 -0
- package/dist/lib/eval-format.js.map +1 -0
- package/package.json +1 -1
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* wayai eval-results — view results from a completed eval session
|
|
3
|
+
*
|
|
4
|
+
* Usage:
|
|
5
|
+
* wayai eval-results --session <id> # show aggregate results
|
|
6
|
+
* wayai eval-results --session <id> --runs # show individual run details
|
|
7
|
+
* wayai eval-results --session <id> --json # raw JSON output
|
|
8
|
+
* wayai eval-results --eval "Test Greeting" # latest session results for an eval
|
|
9
|
+
*/
|
|
10
|
+
export declare function evalResultsCommand(args: string[]): Promise<void>;
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* wayai eval-results — view results from a completed eval session
|
|
3
|
+
*
|
|
4
|
+
* Usage:
|
|
5
|
+
* wayai eval-results --session <id> # show aggregate results
|
|
6
|
+
* wayai eval-results --session <id> --runs # show individual run details
|
|
7
|
+
* wayai eval-results --session <id> --json # raw JSON output
|
|
8
|
+
* wayai eval-results --eval "Test Greeting" # latest session results for an eval
|
|
9
|
+
*/
|
|
10
|
+
import { requireAuth } from '../lib/auth.js';
|
|
11
|
+
import { requireRepoConfig } from '../lib/repo-config.js';
|
|
12
|
+
import { ApiClient } from '../lib/api-client.js';
|
|
13
|
+
import { printResultsTable } from '../lib/eval-format.js';
|
|
14
|
+
export async function evalResultsCommand(args) {
|
|
15
|
+
const { config, accessToken } = await requireAuth();
|
|
16
|
+
const repoConfig = requireRepoConfig();
|
|
17
|
+
// Parse args
|
|
18
|
+
let sessionId;
|
|
19
|
+
let evalName;
|
|
20
|
+
let showRuns = false;
|
|
21
|
+
let jsonOutput = false;
|
|
22
|
+
for (let i = 0; i < args.length; i++) {
|
|
23
|
+
const arg = args[i];
|
|
24
|
+
if (arg === '--session') {
|
|
25
|
+
if (i + 1 >= args.length) {
|
|
26
|
+
console.error('--session requires a value');
|
|
27
|
+
process.exit(1);
|
|
28
|
+
}
|
|
29
|
+
sessionId = args[++i];
|
|
30
|
+
}
|
|
31
|
+
else if (arg === '--eval') {
|
|
32
|
+
if (i + 1 >= args.length) {
|
|
33
|
+
console.error('--eval requires a value');
|
|
34
|
+
process.exit(1);
|
|
35
|
+
}
|
|
36
|
+
evalName = args[++i];
|
|
37
|
+
}
|
|
38
|
+
else if (arg === '--runs') {
|
|
39
|
+
showRuns = true;
|
|
40
|
+
}
|
|
41
|
+
else if (arg === '--json') {
|
|
42
|
+
jsonOutput = true;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
if (!sessionId && !evalName) {
|
|
46
|
+
console.error('Usage: wayai eval-results --session <id> [--runs] [--json]');
|
|
47
|
+
console.error(' wayai eval-results --eval <name> [--runs] [--json]');
|
|
48
|
+
process.exit(1);
|
|
49
|
+
}
|
|
50
|
+
const client = new ApiClient({ apiUrl: config.api_url, accessToken });
|
|
51
|
+
// If --eval, find the latest completed session containing that eval
|
|
52
|
+
if (evalName && !sessionId) {
|
|
53
|
+
sessionId = await findLatestSessionForEval(client, repoConfig.hub_id, evalName);
|
|
54
|
+
}
|
|
55
|
+
if (!sessionId) {
|
|
56
|
+
// findLatestSessionForEval already printed the error
|
|
57
|
+
return;
|
|
58
|
+
}
|
|
59
|
+
if (showRuns) {
|
|
60
|
+
await showRunDetails(client, sessionId, evalName, jsonOutput);
|
|
61
|
+
}
|
|
62
|
+
else {
|
|
63
|
+
await showAggregateResults(client, sessionId, jsonOutput);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
async function findLatestSessionForEval(client, hubId, evalName) {
|
|
67
|
+
// Fetch evals and completed sessions in parallel
|
|
68
|
+
const [evalsResult, sessionsResult] = await Promise.all([
|
|
69
|
+
client.getEvals(hubId),
|
|
70
|
+
client.getEvalSessions(hubId, { status: 'completed' }),
|
|
71
|
+
]);
|
|
72
|
+
const matchingEval = evalsResult.data.evals.find((e) => e.eval_name.toLowerCase() === evalName.toLowerCase());
|
|
73
|
+
if (!matchingEval) {
|
|
74
|
+
console.error(`Eval "${evalName}" not found in this hub.`);
|
|
75
|
+
process.exit(1);
|
|
76
|
+
}
|
|
77
|
+
const completedSessions = sessionsResult.data.sessions;
|
|
78
|
+
if (completedSessions.length === 0) {
|
|
79
|
+
console.error('No completed sessions found for this hub.');
|
|
80
|
+
process.exit(1);
|
|
81
|
+
}
|
|
82
|
+
// Sessions are ordered by created_at desc, so first match is the latest
|
|
83
|
+
for (const session of completedSessions) {
|
|
84
|
+
const details = await client.getEvalSessionDetails(session.eval_session_id);
|
|
85
|
+
const hasEval = details.data.results.some((r) => r.eval_fk === matchingEval.eval_id);
|
|
86
|
+
if (hasEval) {
|
|
87
|
+
return session.eval_session_id;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
console.error(`No completed session found containing eval "${evalName}".`);
|
|
91
|
+
process.exit(1);
|
|
92
|
+
}
|
|
93
|
+
async function showAggregateResults(client, sessionId, jsonOutput) {
|
|
94
|
+
const details = await client.getEvalSessionDetails(sessionId);
|
|
95
|
+
if (jsonOutput) {
|
|
96
|
+
console.log(JSON.stringify(details.data, null, 2));
|
|
97
|
+
return;
|
|
98
|
+
}
|
|
99
|
+
const session = details.data.session;
|
|
100
|
+
const results = details.data.results;
|
|
101
|
+
console.log(`Session: ${session.session_name} (${session.session_status})`);
|
|
102
|
+
if (session.started_at) {
|
|
103
|
+
console.log(`Started: ${new Date(session.started_at).toLocaleString()}`);
|
|
104
|
+
}
|
|
105
|
+
if (session.completed_at && session.started_at) {
|
|
106
|
+
const durationMs = new Date(session.completed_at).getTime() - new Date(session.started_at).getTime();
|
|
107
|
+
console.log(`Duration: ${(durationMs / 1000).toFixed(0)}s`);
|
|
108
|
+
}
|
|
109
|
+
console.log('');
|
|
110
|
+
printResultsTable(results, 'detailed');
|
|
111
|
+
}
|
|
112
|
+
async function showRunDetails(client, sessionId, evalNameFilter, jsonOutput) {
|
|
113
|
+
// Fetch session details (for header + eval name resolution) once
|
|
114
|
+
const details = await client.getEvalSessionDetails(sessionId);
|
|
115
|
+
// If filtering by eval name, resolve eval_id from the already-fetched details
|
|
116
|
+
let evalId;
|
|
117
|
+
if (evalNameFilter) {
|
|
118
|
+
const matchingResult = details.data.results.find((r) => r.eval?.eval_name?.toLowerCase() === evalNameFilter.toLowerCase());
|
|
119
|
+
if (matchingResult) {
|
|
120
|
+
evalId = matchingResult.eval_fk;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
const runsResult = await client.getEvalSessionRuns(sessionId, { evalId });
|
|
124
|
+
if (jsonOutput) {
|
|
125
|
+
console.log(JSON.stringify(runsResult.data, null, 2));
|
|
126
|
+
return;
|
|
127
|
+
}
|
|
128
|
+
console.log(`Session: ${details.data.session.session_name} (${details.data.session.session_status})\n`);
|
|
129
|
+
if (runsResult.data.runs.length === 0) {
|
|
130
|
+
console.log('No runs found.');
|
|
131
|
+
return;
|
|
132
|
+
}
|
|
133
|
+
for (const run of runsResult.data.runs) {
|
|
134
|
+
const evalName = run.eval?.eval_name || run.eval_fk.slice(0, 8);
|
|
135
|
+
const status = run.response_match === true ? 'PASS' : run.response_match === false ? 'FAIL' : 'PENDING';
|
|
136
|
+
const time = run.execution_time_ms != null ? `(${(run.execution_time_ms / 1000).toFixed(1)}s)` : '';
|
|
137
|
+
console.log(`${evalName} — Run #${run.run_number} [${status}] ${time}`);
|
|
138
|
+
if (run.eval_response?.content) {
|
|
139
|
+
console.log(` Response: "${truncate(run.eval_response.content, 80)}"`);
|
|
140
|
+
}
|
|
141
|
+
if (run.response_comment) {
|
|
142
|
+
console.log(` Comment: ${run.response_comment}`);
|
|
143
|
+
}
|
|
144
|
+
if (run.scores && Object.keys(run.scores).length > 0) {
|
|
145
|
+
const scoresStr = Object.entries(run.scores)
|
|
146
|
+
.map(([key, val]) => `${key}: ${Math.round(val * 100)}%`)
|
|
147
|
+
.join(', ');
|
|
148
|
+
console.log(` Scores: ${scoresStr}`);
|
|
149
|
+
}
|
|
150
|
+
console.log('');
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
function truncate(str, maxLen) {
|
|
154
|
+
if (str.length <= maxLen)
|
|
155
|
+
return str;
|
|
156
|
+
return str.slice(0, maxLen - 3) + '...';
|
|
157
|
+
}
|
|
158
|
+
//# sourceMappingURL=eval-results.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval-results.js","sourceRoot":"","sources":["../../src/commands/eval-results.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC7C,OAAO,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AAC1D,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACjD,OAAO,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AAE1D,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,IAAc;IACrD,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,WAAW,EAAE,CAAC;IACpD,MAAM,UAAU,GAAG,iBAAiB,EAAE,CAAC;IAEvC,aAAa;IACb,IAAI,SAA6B,CAAC;IAClC,IAAI,QAA4B,CAAC;IACjC,IAAI,QAAQ,GAAG,KAAK,CAAC;IACrB,IAAI,UAAU,GAAG,KAAK,CAAC;IAEvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,IAAI,GAAG,KAAK,WAAW,EAAE,CAAC;YACxB,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;gBAAC,OAAO,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;gBAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAAC,CAAC;YAC3F,SAAS,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;QACxB,CAAC;aAAM,IAAI,GAAG,KAAK,QAAQ,EAAE,CAAC;YAC5B,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;gBAAC,OAAO,CAAC,KAAK,CAAC,yBAAyB,CAAC,CAAC;gBAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAAC,CAAC;YACxF,QAAQ,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;QACvB,CAAC;aAAM,IAAI,GAAG,KAAK,QAAQ,EAAE,CAAC;YAC5B,QAAQ,GAAG,IAAI,CAAC;QAClB,CAAC;aAAM,IAAI,GAAG,KAAK,QAAQ,EAAE,CAAC;YAC5B,UAAU,GAAG,IAAI,CAAC;QACpB,CAAC;IACH,CAAC;IAED,IAAI,CAAC,SAAS,IAAI,CAAC,QAAQ,EAAE,CAAC;QAC5B,OAAO,CAAC,KAAK,CAAC,4DAA4D,CAAC,CAAC;QAC5E,OAAO,CAAC,KAAK,CAAC,2DAA2D,CAAC,CAAC;QAC3E,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,OAAO,EAAE,WAAW,EAAE,CAAC,CAAC;IAEtE,oEAAoE;IACpE,IAAI,QAAQ,IAAI,CAAC,SAAS,EAAE,CAAC;QAC3B,SAAS,GAAG,MAAM,wBAAwB,CAAC,MAAM,EAAE,UAAU,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;IAClF,CAAC;IAED,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,qDAAqD;QACrD,OAAO;IACT,CAAC;IAED,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,cAAc,CAAC,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC;IAChE,CAAC;SAAM,CAAC;QACN,MAAM,oBAAoB,CAAC,MAAM,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC;IAC5D,CAAC;AACH,CAAC;AAED,KAAK,UAAU,wBAAwB,CACrC,MAAiB,EACjB,KAAa,EACb,QAAgB;IAEhB,iDAAiD;IACjD,MAAM,CAAC,WAAW,EAAE,cAAc,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;QACtD,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC;QACtB,MAAM,CAAC,eAAe,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC;KACvD,CAAC,CAAC;IAEH,MAAM,YAAY,GAAG,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAC9C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,WAAW,EAAE,KAAK,QAAQ,CAAC,WAAW,EAAE,CAC5D,CAAC;IAEF,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,OAAO,CAAC,KAAK,CAAC,SAAS,QAAQ,0BAA0B,CAAC,CAAC;QAC3D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,iBAAiB,GAAG,cAAc,CAAC,IAAI,CAAC,QAAQ,CAAC;IAEvD,IAAI,iBAAiB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACnC,OAAO,CAAC,KAAK,CAAC,2CAA2C,CAAC,CAAC;QAC3D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,wEAAwE;IACxE,KAAK,MAAM,OAAO,IAAI,iBAAiB,EAAE,CAAC;QACxC,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,qBAAqB,CAAC,OAAO,CAAC,eAAe,CAAC,CAAC;QAC5E,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,YAAY,CAAC,OAAO,CAAC,CAAC;QACrF,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,OAAO,CAAC,eAAe,CAAC;QACjC,CAAC;IACH,CAAC;IAED,OAAO,CAAC,KAAK,CAAC,+CAA+C,QAAQ,IAAI,CAAC,CAAC;IAC3E,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,KAAK,UAAU,oBAAoB,CACjC,MAAiB,EACjB,SAAiB,EACjB,UAAmB;IAEnB,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,qBAAqB,CAAC,SAAS,CAAC,CAAC;IAE9D,IAAI,UAAU,EAAE,CAAC;QACf,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QACnD,OAAO;IACT,CAAC;IAED,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC;IACrC,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC;IAErC,OAAO,CAAC,GAAG,CAAC,YAAY,OAAO,CAAC,YAAY,KAAK,OAAO,CAAC,cAAc,GAAG,CAAC,CAAC;IAC5E,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;QACvB,OAAO,CAAC,GAAG,CAAC,YAAY,IAAI,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC;IAC3E,CAAC;IACD,IAAI,OAAO,CAAC,YAAY,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;QAC/C,MAAM,UAAU,GAAG,IAAI,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC,OAAO,EAAE,GAAG,IAAI,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,OAAO,EAAE,CAAC;QACrG,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,UAAU,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IAC9D,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAEhB,iBAAiB,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC;AACzC,CAAC;AAED,KAAK,UAAU,cAAc,CAC3B,MAAiB,EACjB,SAAiB,EACjB,cAAkC,EAClC,UAAmB;IAEnB,iEAAiE;IACjE,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,qBAAqB,CAAC,SAAS,CAAC,CAAC;IAE9D,8EAA8E;IAC9E,IAAI,MAA0B,CAAC;IAC/B,IAAI,cAAc,EAAE,CAAC;QACnB,MAAM,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAC9C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,KAAK,cAAc,CAAC,WAAW,EAAE,CACzE,CAAC;QACF,IAAI,cAAc,EAAE,CAAC;YACnB,MAAM,GAAG,cAAc,CAAC,OAAO,CAAC;QAClC,CAAC;IACH,CAAC;IAED,MAAM,UAAU,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,SAAS,EAAE,EAAE,MAAM,EAAE,CAAC,CAAC;IAE1E,IAAI,UAAU,EAAE,CAAC;QACf,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QACtD,OAAO;IACT,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,YAAY,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,KAAK,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,cAAc,KAAK,CAAC,CAAC;IAExG,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;QAC9B,OAAO;IACT,CAAC;IAED,KAAK,MAAM,GAAG,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QACvC,MAAM,QAAQ,GAAG,GAAG,CAAC,IAAI,EAAE,SAAS,IAAI,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAChE,MAAM,MAAM,GAAG,GAAG,CAAC,cAAc,KAAK,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,cAAc,KAAK,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC;QACxG,MAAM,IAAI,GAAG,GAAG,CAAC,iBAAiB,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,iBAAiB,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;QAEpG,OAAO,CAAC,GAAG,CAAC,GAAG,QAAQ,WAAW,GAAG,CAAC,UAAU,KAAK,MAAM,KAAK,IAAI,EAAE,CAAC,CAAC;QAExE,IAAI,GAAG,CAAC,aAAa,EAAE,OAAO,EAAE,CAAC;YAC/B,OAAO,CAAC,GAAG,CAAC,gBAAgB,QAAQ,CAAC,GAAG,CAAC,aAAa,CAAC,OAAO,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC;QAC1E,CAAC;QAED,IAAI,GAAG,CAAC,gBAAgB,EAAE,CAAC;YACzB,OAAO,CAAC,GAAG,CAAC,cAAc,GAAG,CAAC,gBAAgB,EAAE,CAAC,CAAC;QACpD,CAAC;QAED,IAAI,GAAG,CAAC,MAAM,IAAI,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACrD,MAAM,SAAS,GAAG,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC;iBACzC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,KAAK,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC;iBACxD,IAAI,CAAC,IAAI,CAAC,CAAC;YACd,OAAO,CAAC,GAAG,CAAC,aAAa,SAAS,EAAE,CAAC,CAAC;QACxC,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,SAAS,QAAQ,CAAC,GAAW,EAAE,MAAc;IAC3C,IAAI,GAAG,CAAC,MAAM,IAAI,MAAM;QAAE,OAAO,GAAG,CAAC;IACrC,OAAO,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,GAAG,CAAC,CAAC,GAAG,KAAK,CAAC;AAC1C,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* wayai evals — list evaluation scenarios for the current hub
|
|
3
|
+
*
|
|
4
|
+
* Usage:
|
|
5
|
+
* wayai evals # list all evals
|
|
6
|
+
* wayai evals --enabled # show only enabled evals
|
|
7
|
+
* wayai evals --disabled # show only disabled evals
|
|
8
|
+
*/
|
|
9
|
+
export declare function evalsCommand(args: string[]): Promise<void>;
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* wayai evals — list evaluation scenarios for the current hub
|
|
3
|
+
*
|
|
4
|
+
* Usage:
|
|
5
|
+
* wayai evals # list all evals
|
|
6
|
+
* wayai evals --enabled # show only enabled evals
|
|
7
|
+
* wayai evals --disabled # show only disabled evals
|
|
8
|
+
*/
|
|
9
|
+
import { requireAuth } from '../lib/auth.js';
|
|
10
|
+
import { requireRepoConfig } from '../lib/repo-config.js';
|
|
11
|
+
import { ApiClient } from '../lib/api-client.js';
|
|
12
|
+
export async function evalsCommand(args) {
|
|
13
|
+
const { config, accessToken } = await requireAuth();
|
|
14
|
+
const repoConfig = requireRepoConfig();
|
|
15
|
+
// Parse args
|
|
16
|
+
let enabledFilter;
|
|
17
|
+
for (let i = 0; i < args.length; i++) {
|
|
18
|
+
const arg = args[i];
|
|
19
|
+
if (arg === '--enabled') {
|
|
20
|
+
enabledFilter = 'true';
|
|
21
|
+
}
|
|
22
|
+
else if (arg === '--disabled') {
|
|
23
|
+
enabledFilter = 'false';
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
const client = new ApiClient({ apiUrl: config.api_url, accessToken });
|
|
27
|
+
const result = await client.getEvals(repoConfig.hub_id, { enabled: enabledFilter });
|
|
28
|
+
if (result.data.evals.length === 0) {
|
|
29
|
+
console.log('No evals found.');
|
|
30
|
+
return;
|
|
31
|
+
}
|
|
32
|
+
console.log(`Evals (${result.data.evals.length}):\n`);
|
|
33
|
+
for (const eval_ of result.data.evals) {
|
|
34
|
+
const status = eval_.enabled ? 'enabled' : 'disabled';
|
|
35
|
+
const runs = `${eval_.number_of_runs} run${eval_.number_of_runs === 1 ? '' : 's'}`;
|
|
36
|
+
const agentName = eval_.agent?.agent_name || 'unknown';
|
|
37
|
+
console.log(` ${eval_.eval_name.padEnd(20)} ${eval_.eval_id.slice(0, 8)} ${status.padEnd(9)} ${runs.padEnd(7)} Agent: ${agentName}`);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
//# sourceMappingURL=evals.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evals.js","sourceRoot":"","sources":["../../src/commands/evals.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC7C,OAAO,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AAC1D,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAEjD,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,IAAc;IAC/C,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,WAAW,EAAE,CAAC;IACpD,MAAM,UAAU,GAAG,iBAAiB,EAAE,CAAC;IAEvC,aAAa;IACb,IAAI,aAAiC,CAAC;IAEtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,IAAI,GAAG,KAAK,WAAW,EAAE,CAAC;YACxB,aAAa,GAAG,MAAM,CAAC;QACzB,CAAC;aAAM,IAAI,GAAG,KAAK,YAAY,EAAE,CAAC;YAChC,aAAa,GAAG,OAAO,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,OAAO,EAAE,WAAW,EAAE,CAAC,CAAC;IACtE,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAC,MAAM,EAAE,EAAE,OAAO,EAAE,aAAa,EAAE,CAAC,CAAC;IAEpF,IAAI,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACnC,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAC;QAC/B,OAAO;IACT,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,UAAU,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,MAAM,CAAC,CAAC;IAEtD,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;QACtC,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,UAAU,CAAC;QACtD,MAAM,IAAI,GAAG,GAAG,KAAK,CAAC,cAAc,OAAO,KAAK,CAAC,cAAc,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;QACnF,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,EAAE,UAAU,IAAI,SAAS,CAAC;QACvD,OAAO,CAAC,GAAG,CAAC,KAAK,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,KAAK,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,KAAK,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,YAAY,SAAS,EAAE,CAAC,CAAC;IAC5I,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* wayai run-eval — run all enabled evals and show results
|
|
3
|
+
*
|
|
4
|
+
* Creates a session, runs it (all enabled evals), polls until completion, prints summary.
|
|
5
|
+
*
|
|
6
|
+
* Usage:
|
|
7
|
+
* wayai run-eval # run all enabled evals and wait for results
|
|
8
|
+
* wayai run-eval --no-wait # start session and exit (prints session ID)
|
|
9
|
+
* wayai run-eval --timeout 120 # wait max 2 minutes
|
|
10
|
+
*/
|
|
11
|
+
export declare function runEvalCommand(args: string[]): Promise<void>;
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* wayai run-eval — run all enabled evals and show results
|
|
3
|
+
*
|
|
4
|
+
* Creates a session, runs it (all enabled evals), polls until completion, prints summary.
|
|
5
|
+
*
|
|
6
|
+
* Usage:
|
|
7
|
+
* wayai run-eval # run all enabled evals and wait for results
|
|
8
|
+
* wayai run-eval --no-wait # start session and exit (prints session ID)
|
|
9
|
+
* wayai run-eval --timeout 120 # wait max 2 minutes
|
|
10
|
+
*/
|
|
11
|
+
import { requireAuth } from '../lib/auth.js';
|
|
12
|
+
import { requireRepoConfig } from '../lib/repo-config.js';
|
|
13
|
+
import { ApiClient } from '../lib/api-client.js';
|
|
14
|
+
import { printResultsTable } from '../lib/eval-format.js';
|
|
15
|
+
const TERMINAL_STATUSES = new Set(['completed', 'failed', 'cancelled', 'awaiting_review']);
|
|
16
|
+
const POLL_INTERVAL_MS = 3000;
|
|
17
|
+
const DEFAULT_TIMEOUT_S = 600;
|
|
18
|
+
export async function runEvalCommand(args) {
|
|
19
|
+
const { config, accessToken } = await requireAuth();
|
|
20
|
+
const repoConfig = requireRepoConfig();
|
|
21
|
+
// Parse args
|
|
22
|
+
let noWait = false;
|
|
23
|
+
let timeoutSeconds = DEFAULT_TIMEOUT_S;
|
|
24
|
+
for (let i = 0; i < args.length; i++) {
|
|
25
|
+
const arg = args[i];
|
|
26
|
+
if (arg === '--no-wait') {
|
|
27
|
+
noWait = true;
|
|
28
|
+
}
|
|
29
|
+
else if (arg === '--timeout') {
|
|
30
|
+
timeoutSeconds = parseInt(args[++i], 10);
|
|
31
|
+
if (isNaN(timeoutSeconds) || timeoutSeconds <= 0) {
|
|
32
|
+
console.error('--timeout must be a positive number (seconds)');
|
|
33
|
+
process.exit(1);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
const client = new ApiClient({ apiUrl: config.api_url, accessToken });
|
|
38
|
+
// 1. Verify hub has enabled evals
|
|
39
|
+
const evalsResult = await client.getEvals(repoConfig.hub_id, { enabled: 'true' });
|
|
40
|
+
const enabledCount = evalsResult.data.evals.length;
|
|
41
|
+
if (enabledCount === 0) {
|
|
42
|
+
console.error('No enabled evals found for this hub. Enable evals in the platform UI first.');
|
|
43
|
+
process.exit(1);
|
|
44
|
+
}
|
|
45
|
+
console.log(`Found ${enabledCount} enabled eval${enabledCount === 1 ? '' : 's'}`);
|
|
46
|
+
// 2. Create session
|
|
47
|
+
const now = new Date();
|
|
48
|
+
const sessionName = `CLI run ${now.toISOString().slice(0, 16).replace('T', ' ')}`;
|
|
49
|
+
const createResult = await client.createEvalSession(repoConfig.hub_id, sessionName);
|
|
50
|
+
const sessionId = createResult.data.session.eval_session_id;
|
|
51
|
+
console.log(`Created session: ${sessionName} (${sessionId})`);
|
|
52
|
+
// 3. Run session
|
|
53
|
+
await client.runEvalSession(sessionId);
|
|
54
|
+
console.log('Session started');
|
|
55
|
+
// 4. If --no-wait, print session ID and exit
|
|
56
|
+
if (noWait) {
|
|
57
|
+
console.log(`\nSession ID: ${sessionId}`);
|
|
58
|
+
console.log('Use `wayai eval-results --session ' + sessionId + '` to check results.');
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
// 5. Poll until terminal status
|
|
62
|
+
const deadline = Date.now() + timeoutSeconds * 1000;
|
|
63
|
+
while (Date.now() < deadline) {
|
|
64
|
+
await sleep(POLL_INTERVAL_MS);
|
|
65
|
+
const details = await client.getEvalSessionDetails(sessionId);
|
|
66
|
+
const session = details.data.session;
|
|
67
|
+
const results = details.data.results;
|
|
68
|
+
// Calculate progress
|
|
69
|
+
const totalRuns = results.reduce((sum, r) => sum + r.total_runs, 0);
|
|
70
|
+
const completedRuns = results.reduce((sum, r) => sum + r.successful_runs + r.failed_runs, 0);
|
|
71
|
+
if (TERMINAL_STATUSES.has(session.session_status)) {
|
|
72
|
+
console.log('');
|
|
73
|
+
console.log(`Session ${session.session_status}: ${session.session_name} (${session.eval_session_id})\n`);
|
|
74
|
+
printResultsTable(results, 'compact');
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
process.stdout.write(`\rRunning... ${completedRuns}/${totalRuns} completed`);
|
|
78
|
+
}
|
|
79
|
+
console.log(`\n\nTimeout after ${timeoutSeconds}s. Session is still running.`);
|
|
80
|
+
console.log(`Check results: wayai eval-results --session ${sessionId}`);
|
|
81
|
+
}
|
|
82
|
+
function sleep(ms) {
|
|
83
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
84
|
+
}
|
|
85
|
+
//# sourceMappingURL=run-eval.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"run-eval.js","sourceRoot":"","sources":["../../src/commands/run-eval.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC7C,OAAO,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AAC1D,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACjD,OAAO,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AAE1D,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,CAAC,WAAW,EAAE,QAAQ,EAAE,WAAW,EAAE,iBAAiB,CAAC,CAAC,CAAC;AAC3F,MAAM,gBAAgB,GAAG,IAAI,CAAC;AAC9B,MAAM,iBAAiB,GAAG,GAAG,CAAC;AAE9B,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,IAAc;IACjD,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,WAAW,EAAE,CAAC;IACpD,MAAM,UAAU,GAAG,iBAAiB,EAAE,CAAC;IAEvC,aAAa;IACb,IAAI,MAAM,GAAG,KAAK,CAAC;IACnB,IAAI,cAAc,GAAG,iBAAiB,CAAC;IAEvC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,IAAI,GAAG,KAAK,WAAW,EAAE,CAAC;YACxB,MAAM,GAAG,IAAI,CAAC;QAChB,CAAC;aAAM,IAAI,GAAG,KAAK,WAAW,EAAE,CAAC;YAC/B,cAAc,GAAG,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACzC,IAAI,KAAK,CAAC,cAAc,CAAC,IAAI,cAAc,IAAI,CAAC,EAAE,CAAC;gBACjD,OAAO,CAAC,KAAK,CAAC,+CAA+C,CAAC,CAAC;gBAC/D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,OAAO,EAAE,WAAW,EAAE,CAAC,CAAC;IAEtE,kCAAkC;IAClC,MAAM,WAAW,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAC,MAAM,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;IAClF,MAAM,YAAY,GAAG,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC;IAEnD,IAAI,YAAY,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,KAAK,CAAC,6EAA6E,CAAC,CAAC;QAC7F,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,SAAS,YAAY,gBAAgB,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;IAElF,oBAAoB;IACpB,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC;IACvB,MAAM,WAAW,GAAG,WAAW,GAAG,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC;IAClF,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,UAAU,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;IACpF,MAAM,SAAS,GAAG,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,eAAe,CAAC;IAE5D,OAAO,CAAC,GAAG,CAAC,oBAAoB,WAAW,KAAK,SAAS,GAAG,CAAC,CAAC;IAE9D,iBAAiB;IACjB,MAAM,MAAM,CAAC,cAAc,CAAC,SAAS,CAAC,CAAC;IACvC,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAC;IAE/B,6CAA6C;IAC7C,IAAI,MAAM,EAAE,CAAC;QACX,OAAO,CAAC,GAAG,CAAC,iBAAiB,SAAS,EAAE,CAAC,CAAC;QAC1C,OAAO,CAAC,GAAG,CAAC,oCAAoC,GAAG,SAAS,GAAG,qBAAqB,CAAC,CAAC;QACtF,OAAO;IACT,CAAC;IAED,gCAAgC;IAChC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,cAAc,GAAG,IAAI,CAAC;IAEpD,OAAO,IAAI,CAAC,GAAG,EAAE,GAAG,QAAQ,EAAE,CAAC;QAC7B,MAAM,KAAK,CAAC,gBAAgB,CAAC,CAAC;QAE9B,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,qBAAqB,CAAC,SAAS,CAAC,CAAC;QAC9D,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC;QACrC,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC;QAErC,qBAAqB;QACrB,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;QACpE,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,eAAe,GAAG,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;QAE7F,IAAI,iBAAiB,CAAC,GAAG,CAAC,OAAO,CAAC,cAAc,CAAC,EAAE,CAAC;YAClD,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAChB,OAAO,CAAC,GAAG,CAAC,WAAW,OAAO,CAAC,cAAc,KAAK,OAAO,CAAC,YAAY,KAAK,OAAO,CAAC,eAAe,KAAK,CAAC,CAAC;YACzG,iBAAiB,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;YACtC,OAAO;QACT,CAAC;QAED,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,gBAAgB,aAAa,IAAI,SAAS,YAAY,CAAC,CAAC;IAC/E,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,qBAAqB,cAAc,8BAA8B,CAAC,CAAC;IAC/E,OAAO,CAAC,GAAG,CAAC,+CAA+C,SAAS,EAAE,CAAC,CAAC;AAC1E,CAAC;AAED,SAAS,KAAK,CAAC,EAAU;IACvB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,CAAC;AAC3D,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -79,6 +79,21 @@ async function main() {
|
|
|
79
79
|
await syncSkillsCommand(args);
|
|
80
80
|
break;
|
|
81
81
|
}
|
|
82
|
+
case 'evals': {
|
|
83
|
+
const { evalsCommand } = await import('./commands/evals.js');
|
|
84
|
+
await evalsCommand(args);
|
|
85
|
+
break;
|
|
86
|
+
}
|
|
87
|
+
case 'run-eval': {
|
|
88
|
+
const { runEvalCommand } = await import('./commands/run-eval.js');
|
|
89
|
+
await runEvalCommand(args);
|
|
90
|
+
break;
|
|
91
|
+
}
|
|
92
|
+
case 'eval-results': {
|
|
93
|
+
const { evalResultsCommand } = await import('./commands/eval-results.js');
|
|
94
|
+
await evalResultsCommand(args);
|
|
95
|
+
break;
|
|
96
|
+
}
|
|
82
97
|
case 'list': {
|
|
83
98
|
const { listCommand } = await import('./commands/list.js');
|
|
84
99
|
await listCommand(args);
|
|
@@ -123,6 +138,9 @@ Commands:
|
|
|
123
138
|
send-message Send a test message to a preview hub
|
|
124
139
|
conversations List or inspect conversations
|
|
125
140
|
delete-history Delete all your conversations on a preview hub
|
|
141
|
+
evals List evaluation scenarios for the current hub
|
|
142
|
+
run-eval Run all enabled evals and show results
|
|
143
|
+
eval-results View results from a completed eval session
|
|
126
144
|
sync-skills Sync skills to Anthropic provider connections
|
|
127
145
|
list List organizations, projects, and hubs
|
|
128
146
|
update Update CLI to the latest version
|
|
@@ -132,6 +150,13 @@ Flags:
|
|
|
132
150
|
--hub <uuid> Set hub directly in init (for agents/scripting)
|
|
133
151
|
--connection-id <uuid> Limit sync-skills to a specific connection
|
|
134
152
|
--org <uuid> Filter list to a specific organization
|
|
153
|
+
--enabled/--disabled Filter evals by status (evals)
|
|
154
|
+
--no-wait Don't wait for eval session to complete (run-eval)
|
|
155
|
+
--timeout <seconds> Max wait time for eval session (default: 600)
|
|
156
|
+
--session <uuid> Specify eval session ID (eval-results)
|
|
157
|
+
--eval <name> Filter by eval name (eval-results)
|
|
158
|
+
--runs Show individual run details (eval-results)
|
|
159
|
+
--json Output raw JSON (eval-results)
|
|
135
160
|
--version, -v Show CLI version
|
|
136
161
|
|
|
137
162
|
Hub-scoped mode:
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAC7E,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,UAAU,EAAE,gBAAgB,EAAE,oBAAoB,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAClG,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAEhD,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAC1D,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,cAAc,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC;AAErF,MAAM,CAAC,EAAC,EAAE,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;AAE3C,oDAAoD;AACpD,UAAU,CAAC,OAAO,CAAC,CAAC;AAEpB,KAAK,UAAU,IAAI;IACjB,QAAQ,OAAO,EAAE,CAAC;QAChB,KAAK,OAAO,CAAC,CAAC,CAAC;YACb,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,qBAAqB,CAAC,CAAC;YAC7D,MAAM,YAAY,CAAC,IAAI,CAAC,CAAC;YACzB,MAAM;QACR,CAAC;QACD,KAAK,QAAQ,CAAC,CAAC,CAAC;YACd,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CAAC,sBAAsB,CAAC,CAAC;YAC/D,MAAM,aAAa,EAAE,CAAC;YACtB,MAAM;QACR,CAAC;QACD,KAAK,QAAQ,CAAC,CAAC,CAAC;YACd,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CAAC,sBAAsB,CAAC,CAAC;YAC/D,MAAM,aAAa,EAAE,CAAC;YACtB,MAAM;QACR,CAAC;QACD,KAAK,MAAM,CAAC,CAAC,CAAC;YACZ,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;YAC3D,MAAM,WAAW,CAAC,IAAI,CAAC,CAAC;YACxB,MAAM;QACR,CAAC;QACD,KAAK,MAAM,CAAC,CAAC,CAAC;YACZ,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;YAC3D,MAAM,WAAW,CAAC,IAAI,CAAC,CAAC;YACxB,MAAM;QACR,CAAC;QACD,KAAK,MAAM,CAAC,CAAC,CAAC;YACZ,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;YAC3D,MAAM,WAAW,CAAC,IAAI,CAAC,CAAC;YACxB,MAAM;QACR,CAAC;QACD,KAAK,cAAc,CAAC,CAAC,CAAC;YACpB,MAAM,EAAE,kBAAkB,EAAE,GAAG,MAAM,MAAM,CAAC,4BAA4B,CAAC,CAAC;YAC1E,MAAM,kBAAkB,CAAC,IAAI,CAAC,CAAC;YAC/B,MAAM;QACR,CAAC;QACD,KAAK,eAAe,CAAC,CAAC,CAAC;YACrB,MAAM,EAAE,oBAAoB,EAAE,GAAG,MAAM,MAAM,CAAC,6BAA6B,CAAC,CAAC;YAC7E,MAAM,oBAAoB,CAAC,IAAI,CAAC,CAAC;YACjC,MAAM;QACR,CAAC;QACD,KAAK,gBAAgB,CAAC,CAAC,CAAC;YACtB,MAAM,EAAE,oBAAoB,EAAE,GAAG,MAAM,MAAM,CAAC,8BAA8B,CAAC,CAAC;YAC9E,MAAM,oBAAoB,CAAC,IAAI,CAAC,CAAC;YACjC,MAAM;QACR,CAAC;QACD,KAAK,aAAa,CAAC,CAAC,CAAC;YACnB,MAAM,EAAE,iBAAiB,EAAE,GAAG,MAAM,MAAM,CAAC,2BAA2B,CAAC,CAAC;YACxE,MAAM,iBAAiB,CAAC,IAAI,CAAC,CAAC;YAC9B,MAAM;QACR,CAAC;QACD,KAAK,MAAM,CAAC,CAAC,CAAC;YACZ,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;YAC3D,MAAM,WAAW,CAAC,IAAI,CAAC,CAAC;YACxB,MAAM;QACR,CAAC;QACD,KAAK,QAAQ,CAAC,CAAC,CAAC;YACd,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CAAC,sBAAsB,CAAC,CAAC;YAC/D,MAAM,aAAa,CAAC,GAAG,CAAC,CAAC;YACzB,MAAM;QACR,CAAC;QACD,KAAK,MAAM,CAAC;QACZ,KAAK,QAAQ,CAAC;QACd,KAAK,IAAI,CAAC;QACV,KAAK,SAAS;YACZ,SAAS,EAAE,CAAC;YACZ,MAAM;QACR,KAAK,WAAW,CAAC;QACjB,KAAK,IAAI;YACP,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;YACzB,MAAM;QACR;YACE,OAAO,CAAC,KAAK,CAAC,oBAAoB,OAAO,EAAE,CAAC,CAAC;YAC7C,SAAS,EAAE,CAAC;YACZ,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;AACH,CAAC;AAED,SAAS,SAAS;IAChB,OAAO,CAAC,GAAG,CAAC
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAC7E,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,UAAU,EAAE,gBAAgB,EAAE,oBAAoB,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAClG,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAEhD,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAC1D,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,cAAc,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC;AAErF,MAAM,CAAC,EAAC,EAAE,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;AAE3C,oDAAoD;AACpD,UAAU,CAAC,OAAO,CAAC,CAAC;AAEpB,KAAK,UAAU,IAAI;IACjB,QAAQ,OAAO,EAAE,CAAC;QAChB,KAAK,OAAO,CAAC,CAAC,CAAC;YACb,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,qBAAqB,CAAC,CAAC;YAC7D,MAAM,YAAY,CAAC,IAAI,CAAC,CAAC;YACzB,MAAM;QACR,CAAC;QACD,KAAK,QAAQ,CAAC,CAAC,CAAC;YACd,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CAAC,sBAAsB,CAAC,CAAC;YAC/D,MAAM,aAAa,EAAE,CAAC;YACtB,MAAM;QACR,CAAC;QACD,KAAK,QAAQ,CAAC,CAAC,CAAC;YACd,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CAAC,sBAAsB,CAAC,CAAC;YAC/D,MAAM,aAAa,EAAE,CAAC;YACtB,MAAM;QACR,CAAC;QACD,KAAK,MAAM,CAAC,CAAC,CAAC;YACZ,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;YAC3D,MAAM,WAAW,CAAC,IAAI,CAAC,CAAC;YACxB,MAAM;QACR,CAAC;QACD,KAAK,MAAM,CAAC,CAAC,CAAC;YACZ,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;YAC3D,MAAM,WAAW,CAAC,IAAI,CAAC,CAAC;YACxB,MAAM;QACR,CAAC;QACD,KAAK,MAAM,CAAC,CAAC,CAAC;YACZ,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;YAC3D,MAAM,WAAW,CAAC,IAAI,CAAC,CAAC;YACxB,MAAM;QACR,CAAC;QACD,KAAK,cAAc,CAAC,CAAC,CAAC;YACpB,MAAM,EAAE,kBAAkB,EAAE,GAAG,MAAM,MAAM,CAAC,4BAA4B,CAAC,CAAC;YAC1E,MAAM,kBAAkB,CAAC,IAAI,CAAC,CAAC;YAC/B,MAAM;QACR,CAAC;QACD,KAAK,eAAe,CAAC,CAAC,CAAC;YACrB,MAAM,EAAE,oBAAoB,EAAE,GAAG,MAAM,MAAM,CAAC,6BAA6B,CAAC,CAAC;YAC7E,MAAM,oBAAoB,CAAC,IAAI,CAAC,CAAC;YACjC,MAAM;QACR,CAAC;QACD,KAAK,gBAAgB,CAAC,CAAC,CAAC;YACtB,MAAM,EAAE,oBAAoB,EAAE,GAAG,MAAM,MAAM,CAAC,8BAA8B,CAAC,CAAC;YAC9E,MAAM,oBAAoB,CAAC,IAAI,CAAC,CAAC;YACjC,MAAM;QACR,CAAC;QACD,KAAK,aAAa,CAAC,CAAC,CAAC;YACnB,MAAM,EAAE,iBAAiB,EAAE,GAAG,MAAM,MAAM,CAAC,2BAA2B,CAAC,CAAC;YACxE,MAAM,iBAAiB,CAAC,IAAI,CAAC,CAAC;YAC9B,MAAM;QACR,CAAC;QACD,KAAK,OAAO,CAAC,CAAC,CAAC;YACb,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,qBAAqB,CAAC,CAAC;YAC7D,MAAM,YAAY,CAAC,IAAI,CAAC,CAAC;YACzB,MAAM;QACR,CAAC;QACD,KAAK,UAAU,CAAC,CAAC,CAAC;YAChB,MAAM,EAAE,cAAc,EAAE,GAAG,MAAM,MAAM,CAAC,wBAAwB,CAAC,CAAC;YAClE,MAAM,cAAc,CAAC,IAAI,CAAC,CAAC;YAC3B,MAAM;QACR,CAAC;QACD,KAAK,cAAc,CAAC,CAAC,CAAC;YACpB,MAAM,EAAE,kBAAkB,EAAE,GAAG,MAAM,MAAM,CAAC,4BAA4B,CAAC,CAAC;YAC1E,MAAM,kBAAkB,CAAC,IAAI,CAAC,CAAC;YAC/B,MAAM;QACR,CAAC;QACD,KAAK,MAAM,CAAC,CAAC,CAAC;YACZ,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;YAC3D,MAAM,WAAW,CAAC,IAAI,CAAC,CAAC;YACxB,MAAM;QACR,CAAC;QACD,KAAK,QAAQ,CAAC,CAAC,CAAC;YACd,MAAM,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CAAC,sBAAsB,CAAC,CAAC;YAC/D,MAAM,aAAa,CAAC,GAAG,CAAC,CAAC;YACzB,MAAM;QACR,CAAC;QACD,KAAK,MAAM,CAAC;QACZ,KAAK,QAAQ,CAAC;QACd,KAAK,IAAI,CAAC;QACV,KAAK,SAAS;YACZ,SAAS,EAAE,CAAC;YACZ,MAAM;QACR,KAAK,WAAW,CAAC;QACjB,KAAK,IAAI;YACP,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;YACzB,MAAM;QACR;YACE,OAAO,CAAC,KAAK,CAAC,oBAAoB,OAAO,EAAE,CAAC,CAAC;YAC7C,SAAS,EAAE,CAAC;YACZ,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;AACH,CAAC;AAED,SAAS,SAAS;IAChB,OAAO,CAAC,GAAG,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoDX,CAAC,IAAI,EAAE,CAAC,CAAC;AACZ,CAAC;AAED,MAAM,iBAAiB,GAAG,CAAC,WAAW,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;AAChF,MAAM,wBAAwB,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,CAAC,WAAW;AAEjE,SAAS,eAAe;IACtB,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,EAAE,EAAE,QAAQ,CAAC,CAAC;QAC3C,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,EAAE,mBAAmB,CAAC,CAAC;QAEtD,IAAI,KAAK,GAAiD,IAAI,CAAC;QAC/D,IAAI,CAAC;YACH,IAAI,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;gBAC1B,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC;YACvD,CAAC;QACH,CAAC;QAAC,MAAM,CAAC,CAAC,8CAA8C,CAAC,CAAC;QAE1D,IAAI,KAAK,IAAI,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,SAAS,GAAG,wBAAwB,EAAE,CAAC;YACrE,IAAI,KAAK,CAAC,MAAM,IAAI,cAAc,CAAC,KAAK,CAAC,MAAM,EAAE,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC9D,OAAO,CAAC,KAAK,CACX,uBAAuB,GAAG,CAAC,OAAO,MAAM,KAAK,CAAC,MAAM,mCAAmC,CACxF,CAAC;YACJ,CAAC;YACD,OAAO;QACT,CAAC;QAED,8EAA8E;QAC9E,IAAI,CAAC;YACH,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC;gBAAE,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YACpE,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC;QACrG,CAAC;QAAC,MAAM,CAAC,CAAC,mDAAmD,CAAC,CAAC;QAE/D,8DAA8D;QAC9D,QAAQ,CACN,KAAK,EACL,CAAC,MAAM,EAAE,YAAY,EAAE,SAAS,CAAC,EACjC,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,EAC9B,CAAC,GAAG,EAAE,MAAM,EAAE,EAAE;YACd,IAAI,GAAG;gBAAE,OAAO;YAChB,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC;YAC7B,IAAI,CAAC;gBACH,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC;YAC9E,CAAC;YAAC,MAAM,CAAC,CAAC,iCAAiC,CAAC,CAAC;YAC7C,IAAI,MAAM,IAAI,cAAc,CAAC,MAAM,EAAE,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;gBAClD,OAAO,CAAC,KAAK,CACX,uBAAuB,GAAG,CAAC,OAAO,MAAM,MAAM,mCAAmC,CAClF,CAAC;YACJ,CAAC;QACH,CAAC,CACF,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC,CAAC,kDAAkD,CAAC,CAAC;AAChE,CAAC;AAED,IAAI,EAAE;KACH,IAAI,CAAC,KAAK,IAAI,EAAE;IACf,IAAI,OAAO,IAAI,CAAC,iBAAiB,CAAC,QAAQ,CAAC,OAAO,CAAC;QAAE,eAAe,EAAE,CAAC;IACvE,MAAM,WAAW,EAAE,CAAC;AACtB,CAAC,CAAC;KACD,KAAK,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE;IACnB,IAAI,oBAAoB,CAAC,GAAG,CAAC,EAAE,CAAC;QAC9B,gBAAgB,CAAC,GAAG,CAAC,CAAC;IACxB,CAAC;IACD,MAAM,WAAW,EAAE,CAAC;IACpB,OAAO,CAAC,KAAK,CAAC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;IAChE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
|
package/dist/lib/api-client.d.ts
CHANGED
|
@@ -65,6 +65,133 @@ export declare class ApiClient {
|
|
|
65
65
|
downloadScaffoldZip(): Promise<{
|
|
66
66
|
download_url: string;
|
|
67
67
|
}>;
|
|
68
|
+
getEvals(hubId: string, opts?: {
|
|
69
|
+
enabled?: string;
|
|
70
|
+
}): Promise<{
|
|
71
|
+
success: true;
|
|
72
|
+
data: {
|
|
73
|
+
evals: Array<{
|
|
74
|
+
eval_id: string;
|
|
75
|
+
eval_name: string;
|
|
76
|
+
enabled: boolean;
|
|
77
|
+
number_of_runs: number;
|
|
78
|
+
responder_agent_fk: string;
|
|
79
|
+
hub_fk: string;
|
|
80
|
+
agent?: {
|
|
81
|
+
agent_id: string;
|
|
82
|
+
agent_name: string;
|
|
83
|
+
agent_role: string;
|
|
84
|
+
} | null;
|
|
85
|
+
}>;
|
|
86
|
+
total_count: number;
|
|
87
|
+
};
|
|
88
|
+
}>;
|
|
89
|
+
getEvalSessions(hubId: string, opts?: {
|
|
90
|
+
status?: string;
|
|
91
|
+
}): Promise<{
|
|
92
|
+
success: true;
|
|
93
|
+
data: {
|
|
94
|
+
sessions: Array<{
|
|
95
|
+
eval_session_id: string;
|
|
96
|
+
session_name: string;
|
|
97
|
+
session_status: string;
|
|
98
|
+
hub_fk: string;
|
|
99
|
+
total_evals: number;
|
|
100
|
+
total_runs: number;
|
|
101
|
+
successful_runs: number;
|
|
102
|
+
failed_runs: number;
|
|
103
|
+
started_at?: string;
|
|
104
|
+
completed_at?: string;
|
|
105
|
+
created_at: string;
|
|
106
|
+
}>;
|
|
107
|
+
total_count: number;
|
|
108
|
+
};
|
|
109
|
+
}>;
|
|
110
|
+
createEvalSession(hubId: string, sessionName: string): Promise<{
|
|
111
|
+
success: true;
|
|
112
|
+
data: {
|
|
113
|
+
session: {
|
|
114
|
+
eval_session_id: string;
|
|
115
|
+
session_name: string;
|
|
116
|
+
session_status: string;
|
|
117
|
+
hub_fk: string;
|
|
118
|
+
};
|
|
119
|
+
};
|
|
120
|
+
message: string;
|
|
121
|
+
}>;
|
|
122
|
+
runEvalSession(sessionId: string): Promise<{
|
|
123
|
+
success: true;
|
|
124
|
+
data: {
|
|
125
|
+
session: {
|
|
126
|
+
eval_session_id: string;
|
|
127
|
+
session_status: string;
|
|
128
|
+
};
|
|
129
|
+
};
|
|
130
|
+
message: string;
|
|
131
|
+
}>;
|
|
132
|
+
getEvalSessionDetails(sessionId: string): Promise<{
|
|
133
|
+
success: true;
|
|
134
|
+
data: {
|
|
135
|
+
session: {
|
|
136
|
+
eval_session_id: string;
|
|
137
|
+
session_name: string;
|
|
138
|
+
session_status: string;
|
|
139
|
+
started_at?: string;
|
|
140
|
+
completed_at?: string;
|
|
141
|
+
};
|
|
142
|
+
results: Array<{
|
|
143
|
+
eval_session_result_id: string;
|
|
144
|
+
eval_fk: string;
|
|
145
|
+
total_runs: number;
|
|
146
|
+
successful_runs: number;
|
|
147
|
+
failed_runs: number;
|
|
148
|
+
pending_runs: number;
|
|
149
|
+
running_runs: number;
|
|
150
|
+
success_rate: number;
|
|
151
|
+
avg_execution_time_ms?: number;
|
|
152
|
+
aggregated_scores?: Record<string, {
|
|
153
|
+
avg: number;
|
|
154
|
+
min: number;
|
|
155
|
+
max: number;
|
|
156
|
+
}>;
|
|
157
|
+
eval_status: string;
|
|
158
|
+
eval?: {
|
|
159
|
+
eval_id: string;
|
|
160
|
+
eval_name: string;
|
|
161
|
+
responder_agent_fk: string;
|
|
162
|
+
} | null;
|
|
163
|
+
}>;
|
|
164
|
+
};
|
|
165
|
+
}>;
|
|
166
|
+
getEvalSessionRuns(sessionId: string, opts?: {
|
|
167
|
+
evalId?: string;
|
|
168
|
+
limit?: number;
|
|
169
|
+
}): Promise<{
|
|
170
|
+
success: true;
|
|
171
|
+
data: {
|
|
172
|
+
runs: Array<{
|
|
173
|
+
eval_session_run_id: string;
|
|
174
|
+
eval_fk: string;
|
|
175
|
+
run_number: number;
|
|
176
|
+
eval_response: {
|
|
177
|
+
role: string;
|
|
178
|
+
content: string;
|
|
179
|
+
} | null;
|
|
180
|
+
response_match: boolean | null;
|
|
181
|
+
response_comment?: string;
|
|
182
|
+
execution_time_ms?: number;
|
|
183
|
+
run_status: string;
|
|
184
|
+
scores?: Record<string, number>;
|
|
185
|
+
eval?: {
|
|
186
|
+
eval_id: string;
|
|
187
|
+
eval_name: string;
|
|
188
|
+
responder_agent_fk: string;
|
|
189
|
+
} | null;
|
|
190
|
+
}>;
|
|
191
|
+
total_count: number;
|
|
192
|
+
session_id: string;
|
|
193
|
+
};
|
|
194
|
+
}>;
|
|
68
195
|
getConversation(hubId: string, conversationId: string, opts?: {
|
|
69
196
|
limit?: number;
|
|
70
197
|
offset?: number;
|
package/dist/lib/api-client.js
CHANGED
|
@@ -96,6 +96,41 @@ export class ApiClient {
|
|
|
96
96
|
async downloadScaffoldZip() {
|
|
97
97
|
return this.request('POST', '/api/files/download', { type: 'scaffold' });
|
|
98
98
|
}
|
|
99
|
+
// ---------------------------------------------------------------------------
|
|
100
|
+
// Evals
|
|
101
|
+
// ---------------------------------------------------------------------------
|
|
102
|
+
async getEvals(hubId, opts) {
|
|
103
|
+
const params = new URLSearchParams({ hub_id: hubId });
|
|
104
|
+
if (opts?.enabled)
|
|
105
|
+
params.set('enabled', opts.enabled);
|
|
106
|
+
return this.request('GET', `/api/evals?${params.toString()}`);
|
|
107
|
+
}
|
|
108
|
+
async getEvalSessions(hubId, opts) {
|
|
109
|
+
const params = new URLSearchParams({ hub_id: hubId });
|
|
110
|
+
if (opts?.status)
|
|
111
|
+
params.set('status', opts.status);
|
|
112
|
+
return this.request('GET', `/api/evals/sessions?${params.toString()}`);
|
|
113
|
+
}
|
|
114
|
+
async createEvalSession(hubId, sessionName) {
|
|
115
|
+
return this.request('POST', '/api/evals/sessions', {
|
|
116
|
+
session: { hub_fk: hubId, session_name: sessionName },
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
async runEvalSession(sessionId) {
|
|
120
|
+
return this.request('POST', `/api/evals/sessions/${sessionId}/run`);
|
|
121
|
+
}
|
|
122
|
+
async getEvalSessionDetails(sessionId) {
|
|
123
|
+
return this.request('GET', `/api/evals/sessions/${sessionId}`);
|
|
124
|
+
}
|
|
125
|
+
async getEvalSessionRuns(sessionId, opts) {
|
|
126
|
+
const params = new URLSearchParams();
|
|
127
|
+
if (opts?.evalId)
|
|
128
|
+
params.set('eval_id', opts.evalId);
|
|
129
|
+
if (opts?.limit)
|
|
130
|
+
params.set('limit', String(opts.limit));
|
|
131
|
+
const qs = params.toString();
|
|
132
|
+
return this.request('GET', `/api/evals/sessions/${sessionId}/runs${qs ? `?${qs}` : ''}`);
|
|
133
|
+
}
|
|
99
134
|
async getConversation(hubId, conversationId, opts) {
|
|
100
135
|
const params = new URLSearchParams({ hub_id: hubId });
|
|
101
136
|
if (opts?.limit)
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"api-client.js","sourceRoot":"","sources":["../../src/lib/api-client.ts"],"names":[],"mappings":"AAAA;;GAEG;AAWH,OAAO,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAE/C,MAAM,OAAO,QAAS,SAAQ,KAAK;IACxB,MAAM,CAAS;IACf,IAAI,CAAS;IAEtB,YAAY,MAAc,EAAE,IAAY,EAAE,MAAc,EAAE,IAAY;QACpE,KAAK,CAAC,uBAAuB,MAAM,IAAI,IAAI,KAAK,MAAM,MAAM,IAAI,EAAE,CAAC,CAAC;QACpE,IAAI,CAAC,IAAI,GAAG,UAAU,CAAC;QACvB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;IACnB,CAAC;IAED,mGAAmG;IACnG,IAAI,UAAU;QACZ,OAAO,IAAI,CAAC,MAAM,IAAI,GAAG,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC;IACjD,CAAC;CACF;AAOD,MAAM,OAAO,SAAS;IACZ,MAAM,CAAS;IACf,WAAW,CAAS;IAE5B,YAAY,OAAyB;QACnC,IAAI,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;QAC7B,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;IACzC,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,KAAa,EAAE,cAAuB;QAC/C,MAAM,EAAE,GAAG,cAAc,CAAC,CAAC,CAAC,IAAI,IAAI,eAAe,CAAC,EAAE,eAAe,EAAE,cAAc,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAChG,OAAO,IAAI,CAAC,OAAO,CAAmB,KAAK,EAAE,gBAAgB,KAAK,GAAG,EAAE,EAAE,CAAC,CAAC;IAC7E,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,KAAa,EAAE,MAAwB,EAAE,YAA6B,MAAM,EAAE,cAAuB;QAC9G,OAAO,IAAI,CAAC,OAAO,CAAiB,MAAM,EAAE,cAAc,EAAE;YAC1D,MAAM,EAAE,KAAK;YACb,MAAM;YACN,SAAS;YACT,GAAG,CAAC,cAAc,IAAI,EAAE,eAAe,EAAE,cAAc,EAAE,CAAC;SAC3D,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,KAAa,EAAE,MAAwB,EAAE,cAAuB;QACzE,OAAO,IAAI,CAAC,OAAO,CAAiB,MAAM,EAAE,cAAc,EAAE;YAC1D,MAAM,EAAE,KAAK;YACb,MAAM;YACN,GAAG,CAAC,cAAc,IAAI,EAAE,eAAe,EAAE,cAAc,EAAE,CAAC;SAC3D,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,KAAa,EAAE,UAAkB,EAAE,MAAwB,EAAE,SAAkB;QACxF,OAAO,IAAI,CAAC,OAAO,CAAiB,MAAM,EAAE,cAAc,EAAE;YAC1D,MAAM,EAAE,KAAK;YACb,WAAW,EAAE,UAAU;YACvB,MAAM;YACN,UAAU,EAAE,SAAS;SACtB,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,IAAY,EAAE,IAAsD;QAC/E,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC;QAC7C,IAAI,IAAI,EAAE,cAAc;YAAE,MAAM,CAAC,GAAG,CAAC,iBAAiB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;QAC7E,IAAI,IAAI,EAAE,SAAS;YAAE,MAAM,CAAC,GAAG,CAAC,YAAY,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;QAC9D,OAAO,IAAI,CAAC,OAAO,CAAmB,KAAK,EAAE,kBAAkB,MAAM,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;IACtF,CAAC;IAED,KAAK,CAAC,aAAa;QACjB,OAAO,IAAI,CAAC,OAAO,CAA0B,KAAK,EAAE,uBAAuB,CAAC,CAAC;IAC/E,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,KAAa,EAAE,YAAqB;QACnD,OAAO,IAAI,CAAC,OAAO,CAAqB,MAAM,EAAE,kCAAkC,EAAE;YAClF,MAAM,EAAE,KAAK;YACb,GAAG,CAAC,YAAY,IAAI,EAAE,aAAa,EAAE,YAAY,EAAE,CAAC;SACrD,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,cAAsB,EAAE,SAAkB;QACvD,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC,EAAE,eAAe,EAAE,cAAc,EAAE,CAAC,CAAC;QACxE,IAAI,SAAS;YAAE,MAAM,CAAC,GAAG,CAAC,YAAY,EAAE,SAAS,CAAC,CAAC;QACnD,OAAO,IAAI,CAAC,OAAO,CAAoB,KAAK,EAAE,gBAAgB,MAAM,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;IACrF,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,KAAa,EAAE,OAAe,EAAE,IAAkC;QAQlF,OAAO,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,iCAAiC,EAAE;YAC7D,MAAM,EAAE,KAAK;YACb,OAAO;YACP,GAAG,CAAC,IAAI,EAAE,cAAc,IAAI,EAAE,eAAe,EAAE,IAAI,CAAC,cAAc,EAAE,CAAC;SACtE,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,aAAa,CAAC,KAAa;QAC/B,OAAO,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,wCAAwC,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;IAC3F,CAAC;IAED,KAAK,CAAC,gBAAgB,CAAC,KAAa,EAAE,IAA2D;QAa/F,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;QACtD,IAAI,IAAI,EAAE,MAAM;YAAE,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QACpD,IAAI,IAAI,EAAE,KAAK;YAAE,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;QACzD,IAAI,IAAI,EAAE,MAAM;YAAE,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;QAC5D,OAAO,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,sBAAsB,MAAM,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;IACxE,CAAC;IAED,KAAK,CAAC,mBAAmB;QACvB,OAAO,IAAI,CAAC,OAAO,CAA2B,MAAM,EAAE,qBAAqB,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC,CAAC;IACrG,CAAC;IAED,KAAK,CAAC,eAAe,CAAC,KAAa,EAAE,cAAsB,EAAE,IAA0C;QAoBrG,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;QACtD,IAAI,IAAI,EAAE,KAAK;YAAE,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;QACzD,IAAI,IAAI,EAAE,MAAM;YAAE,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;QAC5D,OAAO,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,sBAAsB,cAAc,WAAW,MAAM,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;IACjG,CAAC;IAEO,KAAK,CAAC,OAAO,CAAI,MAAc,EAAE,IAAY,EAAE,IAAc;QACnE,gBAAgB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;QAE/B,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,MAAM,GAAG,IAAI,EAAE,CAAC;QACpC,MAAM,OAAO,GAA2B;YACtC,aAAa,EAAE,UAAU,IAAI,CAAC,WAAW,EAAE;YAC3C,cAAc,EAAE,kBAAkB;SACnC,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAChC,MAAM;YACN,OAAO;YACP,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS;SAC9C,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,QAAQ,CAAC,MAAM,EAAE,IAAI,EAAE,QAAQ,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;QAC/D,CAAC;QAED,OAAO,QAAQ,CAAC,IAAI,EAAgB,CAAC;IACvC,CAAC;CACF"}
|
|
1
|
+
{"version":3,"file":"api-client.js","sourceRoot":"","sources":["../../src/lib/api-client.ts"],"names":[],"mappings":"AAAA;;GAEG;AAWH,OAAO,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAE/C,MAAM,OAAO,QAAS,SAAQ,KAAK;IACxB,MAAM,CAAS;IACf,IAAI,CAAS;IAEtB,YAAY,MAAc,EAAE,IAAY,EAAE,MAAc,EAAE,IAAY;QACpE,KAAK,CAAC,uBAAuB,MAAM,IAAI,IAAI,KAAK,MAAM,MAAM,IAAI,EAAE,CAAC,CAAC;QACpE,IAAI,CAAC,IAAI,GAAG,UAAU,CAAC;QACvB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;IACnB,CAAC;IAED,mGAAmG;IACnG,IAAI,UAAU;QACZ,OAAO,IAAI,CAAC,MAAM,IAAI,GAAG,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC;IACjD,CAAC;CACF;AAOD,MAAM,OAAO,SAAS;IACZ,MAAM,CAAS;IACf,WAAW,CAAS;IAE5B,YAAY,OAAyB;QACnC,IAAI,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;QAC7B,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;IACzC,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,KAAa,EAAE,cAAuB;QAC/C,MAAM,EAAE,GAAG,cAAc,CAAC,CAAC,CAAC,IAAI,IAAI,eAAe,CAAC,EAAE,eAAe,EAAE,cAAc,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAChG,OAAO,IAAI,CAAC,OAAO,CAAmB,KAAK,EAAE,gBAAgB,KAAK,GAAG,EAAE,EAAE,CAAC,CAAC;IAC7E,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,KAAa,EAAE,MAAwB,EAAE,YAA6B,MAAM,EAAE,cAAuB;QAC9G,OAAO,IAAI,CAAC,OAAO,CAAiB,MAAM,EAAE,cAAc,EAAE;YAC1D,MAAM,EAAE,KAAK;YACb,MAAM;YACN,SAAS;YACT,GAAG,CAAC,cAAc,IAAI,EAAE,eAAe,EAAE,cAAc,EAAE,CAAC;SAC3D,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,KAAa,EAAE,MAAwB,EAAE,cAAuB;QACzE,OAAO,IAAI,CAAC,OAAO,CAAiB,MAAM,EAAE,cAAc,EAAE;YAC1D,MAAM,EAAE,KAAK;YACb,MAAM;YACN,GAAG,CAAC,cAAc,IAAI,EAAE,eAAe,EAAE,cAAc,EAAE,CAAC;SAC3D,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,KAAa,EAAE,UAAkB,EAAE,MAAwB,EAAE,SAAkB;QACxF,OAAO,IAAI,CAAC,OAAO,CAAiB,MAAM,EAAE,cAAc,EAAE;YAC1D,MAAM,EAAE,KAAK;YACb,WAAW,EAAE,UAAU;YACvB,MAAM;YACN,UAAU,EAAE,SAAS;SACtB,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,IAAY,EAAE,IAAsD;QAC/E,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC;QAC7C,IAAI,IAAI,EAAE,cAAc;YAAE,MAAM,CAAC,GAAG,CAAC,iBAAiB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;QAC7E,IAAI,IAAI,EAAE,SAAS;YAAE,MAAM,CAAC,GAAG,CAAC,YAAY,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;QAC9D,OAAO,IAAI,CAAC,OAAO,CAAmB,KAAK,EAAE,kBAAkB,MAAM,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;IACtF,CAAC;IAED,KAAK,CAAC,aAAa;QACjB,OAAO,IAAI,CAAC,OAAO,CAA0B,KAAK,EAAE,uBAAuB,CAAC,CAAC;IAC/E,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,KAAa,EAAE,YAAqB;QACnD,OAAO,IAAI,CAAC,OAAO,CAAqB,MAAM,EAAE,kCAAkC,EAAE;YAClF,MAAM,EAAE,KAAK;YACb,GAAG,CAAC,YAAY,IAAI,EAAE,aAAa,EAAE,YAAY,EAAE,CAAC;SACrD,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,cAAsB,EAAE,SAAkB;QACvD,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC,EAAE,eAAe,EAAE,cAAc,EAAE,CAAC,CAAC;QACxE,IAAI,SAAS;YAAE,MAAM,CAAC,GAAG,CAAC,YAAY,EAAE,SAAS,CAAC,CAAC;QACnD,OAAO,IAAI,CAAC,OAAO,CAAoB,KAAK,EAAE,gBAAgB,MAAM,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;IACrF,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,KAAa,EAAE,OAAe,EAAE,IAAkC;QAQlF,OAAO,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,iCAAiC,EAAE;YAC7D,MAAM,EAAE,KAAK;YACb,OAAO;YACP,GAAG,CAAC,IAAI,EAAE,cAAc,IAAI,EAAE,eAAe,EAAE,IAAI,CAAC,cAAc,EAAE,CAAC;SACtE,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,aAAa,CAAC,KAAa;QAC/B,OAAO,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,wCAAwC,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;IAC3F,CAAC;IAED,KAAK,CAAC,gBAAgB,CAAC,KAAa,EAAE,IAA2D;QAa/F,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;QACtD,IAAI,IAAI,EAAE,MAAM;YAAE,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QACpD,IAAI,IAAI,EAAE,KAAK;YAAE,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;QACzD,IAAI,IAAI,EAAE,MAAM;YAAE,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;QAC5D,OAAO,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,sBAAsB,MAAM,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;IACxE,CAAC;IAED,KAAK,CAAC,mBAAmB;QACvB,OAAO,IAAI,CAAC,OAAO,CAA2B,MAAM,EAAE,qBAAqB,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC,CAAC;IACrG,CAAC;IAED,8EAA8E;IAC9E,QAAQ;IACR,8EAA8E;IAE9E,KAAK,CAAC,QAAQ,CAAC,KAAa,EAAE,IAA2B;QAevD,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;QACtD,IAAI,IAAI,EAAE,OAAO;YAAE,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;QACvD,OAAO,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,cAAc,MAAM,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;IAChE,CAAC;IAED,KAAK,CAAC,eAAe,CAAC,KAAa,EAAE,IAA0B;QAmB7D,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;QACtD,IAAI,IAAI,EAAE,MAAM;YAAE,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QACpD,OAAO,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,uBAAuB,MAAM,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;IACzE,CAAC;IAED,KAAK,CAAC,iBAAiB,CAAC,KAAa,EAAE,WAAmB;QAYxD,OAAO,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,qBAAqB,EAAE;YACjD,OAAO,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,YAAY,EAAE,WAAW,EAAE;SACtD,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,cAAc,CAAC,SAAiB;QAKpC,OAAO,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,uBAAuB,SAAS,MAAM,CAAC,CAAC;IACtE,CAAC;IAED,KAAK,CAAC,qBAAqB,CAAC,SAAiB;QA0B3C,OAAO,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,uBAAuB,SAAS,EAAE,CAAC,CAAC;IACjE,CAAC;IAED,KAAK,CAAC,kBAAkB,CAAC,SAAiB,EAAE,IAA0C;QAmBpF,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,IAAI,IAAI,EAAE,MAAM;YAAE,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QACrD,IAAI,IAAI,EAAE,KAAK;YAAE,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;QACzD,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAC;QAC7B,OAAO,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,uBAAuB,SAAS,QAAQ,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAC3F,CAAC;IAED,KAAK,CAAC,eAAe,CAAC,KAAa,EAAE,cAAsB,EAAE,IAA0C;QAoBrG,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;QACtD,IAAI,IAAI,EAAE,KAAK;YAAE,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;QACzD,IAAI,IAAI,EAAE,MAAM;YAAE,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;QAC5D,OAAO,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,sBAAsB,cAAc,WAAW,MAAM,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;IACjG,CAAC;IAEO,KAAK,CAAC,OAAO,CAAI,MAAc,EAAE,IAAY,EAAE,IAAc;QACnE,gBAAgB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;QAE/B,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,MAAM,GAAG,IAAI,EAAE,CAAC;QACpC,MAAM,OAAO,GAA2B;YACtC,aAAa,EAAE,UAAU,IAAI,CAAC,WAAW,EAAE;YAC3C,cAAc,EAAE,kBAAkB;SACnC,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAChC,MAAM;YACN,OAAO;YACP,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS;SAC9C,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,QAAQ,CAAC,MAAM,EAAE,IAAI,EAAE,QAAQ,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;QAC/D,CAAC;QAED,OAAO,QAAQ,CAAC,IAAI,EAAgB,CAAC;IACvC,CAAC;CACF"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared formatting helpers for eval results display.
|
|
3
|
+
*/
|
|
4
|
+
interface ResultRow {
|
|
5
|
+
eval_fk: string;
|
|
6
|
+
total_runs: number;
|
|
7
|
+
successful_runs: number;
|
|
8
|
+
failed_runs: number;
|
|
9
|
+
avg_execution_time_ms?: number;
|
|
10
|
+
aggregated_scores?: Record<string, {
|
|
11
|
+
avg: number;
|
|
12
|
+
min: number;
|
|
13
|
+
max: number;
|
|
14
|
+
}>;
|
|
15
|
+
eval?: {
|
|
16
|
+
eval_id: string;
|
|
17
|
+
eval_name: string;
|
|
18
|
+
} | null;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Print a results table and overall summary.
|
|
22
|
+
*
|
|
23
|
+
* @param mode - 'compact': scores show avg only (used by run-eval summary)
|
|
24
|
+
* 'detailed': scores show avg/min/max (used by eval-results)
|
|
25
|
+
*/
|
|
26
|
+
export declare function printResultsTable(results: ResultRow[], mode: 'compact' | 'detailed'): void;
|
|
27
|
+
export {};
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared formatting helpers for eval results display.
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Print a results table and overall summary.
|
|
6
|
+
*
|
|
7
|
+
* @param mode - 'compact': scores show avg only (used by run-eval summary)
|
|
8
|
+
* 'detailed': scores show avg/min/max (used by eval-results)
|
|
9
|
+
*/
|
|
10
|
+
export function printResultsTable(results, mode) {
|
|
11
|
+
let totalPassed = 0;
|
|
12
|
+
let totalRuns = 0;
|
|
13
|
+
for (const r of results) {
|
|
14
|
+
const evalName = r.eval?.eval_name || r.eval_fk.slice(0, 8);
|
|
15
|
+
const passed = `${r.successful_runs}/${r.total_runs} passed`;
|
|
16
|
+
const avgTime = r.avg_execution_time_ms != null ? `avg ${(r.avg_execution_time_ms / 1000).toFixed(1)}s` : '';
|
|
17
|
+
let scoresStr = '';
|
|
18
|
+
if (r.aggregated_scores) {
|
|
19
|
+
const parts = Object.entries(r.aggregated_scores).map(([key, val]) => mode === 'compact'
|
|
20
|
+
? `${key}: ${Math.round(val.avg * 100)}%`
|
|
21
|
+
: `${key}: avg ${Math.round(val.avg * 100)}%, min ${Math.round(val.min * 100)}%, max ${Math.round(val.max * 100)}%`);
|
|
22
|
+
if (parts.length > 0)
|
|
23
|
+
scoresStr = ' ' + parts.join(' ');
|
|
24
|
+
}
|
|
25
|
+
console.log(` ${evalName.padEnd(20)} ${passed.padEnd(14)} ${avgTime}${scoresStr}`);
|
|
26
|
+
totalPassed += r.successful_runs;
|
|
27
|
+
totalRuns += r.total_runs;
|
|
28
|
+
}
|
|
29
|
+
const pct = totalRuns > 0 ? ((totalPassed / totalRuns) * 100).toFixed(1) : '0.0';
|
|
30
|
+
console.log(`\nOverall: ${totalPassed}/${totalRuns} passed (${pct}%)`);
|
|
31
|
+
}
|
|
32
|
+
//# sourceMappingURL=eval-format.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval-format.js","sourceRoot":"","sources":["../../src/lib/eval-format.ts"],"names":[],"mappings":"AAAA;;GAEG;AAYH;;;;;GAKG;AACH,MAAM,UAAU,iBAAiB,CAC/B,OAAoB,EACpB,IAA4B;IAE5B,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,MAAM,QAAQ,GAAG,CAAC,CAAC,IAAI,EAAE,SAAS,IAAI,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5D,MAAM,MAAM,GAAG,GAAG,CAAC,CAAC,eAAe,IAAI,CAAC,CAAC,UAAU,SAAS,CAAC;QAC7D,MAAM,OAAO,GAAG,CAAC,CAAC,qBAAqB,IAAI,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,qBAAqB,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QAE7G,IAAI,SAAS,GAAG,EAAE,CAAC;QACnB,IAAI,CAAC,CAAC,iBAAiB,EAAE,CAAC;YACxB,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,EAAE,CACnE,IAAI,KAAK,SAAS;gBAChB,CAAC,CAAC,GAAG,GAAG,KAAK,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,GAAG,GAAG,CAAC,GAAG;gBACzC,CAAC,CAAC,GAAG,GAAG,SAAS,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,GAAG,GAAG,CAAC,UAAU,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,GAAG,GAAG,CAAC,UAAU,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,GAAG,GAAG,CAAC,GAAG,CACtH,CAAC;YACF,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;gBAAE,SAAS,GAAG,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC5D,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,KAAK,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,KAAK,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,KAAK,OAAO,GAAG,SAAS,EAAE,CAAC,CAAC;QAEtF,WAAW,IAAI,CAAC,CAAC,eAAe,CAAC;QACjC,SAAS,IAAI,CAAC,CAAC,UAAU,CAAC;IAC5B,CAAC;IAED,MAAM,GAAG,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,GAAG,SAAS,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;IACjF,OAAO,CAAC,GAAG,CAAC,cAAc,WAAW,IAAI,SAAS,YAAY,GAAG,IAAI,CAAC,CAAC;AACzE,CAAC"}
|