katt 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -1
- package/dist/index.js +155 -177
- package/dist/katt.js +1 -1
- package/dist/runCli-C7uxWavX.js +312 -0
- package/package.json +1 -1
- package/dist/runCli-B3oIBxOl.js +0 -317
package/README.md
CHANGED
|
@@ -9,6 +9,7 @@ Katt is a lightweight testing framework for running AI Evals, inspired by [Jest]
|
|
|
9
9
|
|
|
10
10
|
- [Overview](#overview)
|
|
11
11
|
- [API Documentation](#api-documentation)
|
|
12
|
+
- [Articles](#articles)
|
|
12
13
|
- [Hello World - Example](#hello-world---example)
|
|
13
14
|
- [Main Features](#main-features)
|
|
14
15
|
- [Usage](#usage)
|
|
@@ -34,6 +35,10 @@ Katt is designed to evaluate and validate the behavior of AI agents like **Claud
|
|
|
34
35
|
|
|
35
36
|
For a complete list of features and usage examples, see [docs/api-documentation.md](https://github.com/raphaelpor/katt/blob/main/docs/api-documentation.md).
|
|
36
37
|
|
|
38
|
+
## Articles
|
|
39
|
+
|
|
40
|
+
- [Introducing Katt](https://github.com/raphaelpor/katt/blob/main/docs/articles/introduction-to-katt.md)
|
|
41
|
+
|
|
37
42
|
## Hello World - Example
|
|
38
43
|
|
|
39
44
|
```typescript
|
|
@@ -86,7 +91,7 @@ expect(result).toContain("hello world");
|
|
|
86
91
|
2. Run Katt from your project directory:
|
|
87
92
|
|
|
88
93
|
```bash
|
|
89
|
-
katt
|
|
94
|
+
npx katt
|
|
90
95
|
```
|
|
91
96
|
|
|
92
97
|
### Using promptFile
|
package/dist/index.js
CHANGED
|
@@ -1,56 +1,56 @@
|
|
|
1
|
-
import { r as
|
|
2
|
-
import { t as
|
|
3
|
-
import { CopilotClient as
|
|
4
|
-
import { readFile as
|
|
5
|
-
import { resolve as
|
|
6
|
-
import { readFileSync as
|
|
7
|
-
function
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
const n =
|
|
1
|
+
import { r as E, c as N, a as O, p as I, b as m, l as d, d as w, g as S, e as U, f as C, s as z, h as R, i as A, j as B, k as W, m as K, n as H, o as Y, q } from "./runCli-C7uxWavX.js";
|
|
2
|
+
import { t as Et } from "./runCli-C7uxWavX.js";
|
|
3
|
+
import { CopilotClient as J } from "@github/copilot-sdk";
|
|
4
|
+
import { readFile as j } from "node:fs/promises";
|
|
5
|
+
import { resolve as P, dirname as v, isAbsolute as G, basename as Q, join as V } from "node:path";
|
|
6
|
+
import { readFileSync as X, writeFileSync as F, mkdirSync as Z } from "node:fs";
|
|
7
|
+
function Tt(t, e) {
|
|
8
|
+
E(() => {
|
|
9
|
+
O(), I(t);
|
|
10
|
+
const n = S(), o = Date.now(), i = () => S() === n, r = () => Date.now() - o;
|
|
11
11
|
try {
|
|
12
|
-
const
|
|
13
|
-
if (
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
12
|
+
const s = e();
|
|
13
|
+
if (s && typeof s.then == "function") {
|
|
14
|
+
m(
|
|
15
|
+
s.then(() => {
|
|
16
|
+
d(!0, r());
|
|
17
17
|
}).catch((a) => {
|
|
18
|
-
throw
|
|
18
|
+
throw d(!1, r()), a;
|
|
19
19
|
}).finally(() => {
|
|
20
|
-
|
|
20
|
+
w();
|
|
21
21
|
})
|
|
22
22
|
);
|
|
23
23
|
return;
|
|
24
24
|
}
|
|
25
|
-
} catch (
|
|
26
|
-
throw
|
|
25
|
+
} catch (s) {
|
|
26
|
+
throw d(!1, r()), w(), s;
|
|
27
27
|
}
|
|
28
|
-
|
|
29
|
-
},
|
|
28
|
+
d(i(), r()), w();
|
|
29
|
+
}, N());
|
|
30
30
|
}
|
|
31
|
-
function
|
|
32
|
-
|
|
33
|
-
|
|
31
|
+
function St(t, e) {
|
|
32
|
+
E(() => {
|
|
33
|
+
U(t);
|
|
34
34
|
try {
|
|
35
35
|
const n = e();
|
|
36
36
|
if (n && typeof n.then == "function") {
|
|
37
|
-
|
|
37
|
+
m(
|
|
38
38
|
n.finally(() => {
|
|
39
|
-
|
|
39
|
+
C();
|
|
40
40
|
})
|
|
41
41
|
);
|
|
42
42
|
return;
|
|
43
43
|
}
|
|
44
44
|
} catch (n) {
|
|
45
|
-
throw
|
|
45
|
+
throw C(), n;
|
|
46
46
|
}
|
|
47
|
-
|
|
48
|
-
},
|
|
47
|
+
C();
|
|
48
|
+
}, N());
|
|
49
49
|
}
|
|
50
|
-
function
|
|
50
|
+
function tt(t, e) {
|
|
51
51
|
return typeof t == "object" && t !== null && "code" in t && t.code === e;
|
|
52
52
|
}
|
|
53
|
-
function
|
|
53
|
+
function et(t) {
|
|
54
54
|
try {
|
|
55
55
|
const e = JSON.parse(t);
|
|
56
56
|
return typeof e == "object" && e !== null ? e : void 0;
|
|
@@ -59,19 +59,19 @@ function J(t) {
|
|
|
59
59
|
return;
|
|
60
60
|
}
|
|
61
61
|
}
|
|
62
|
-
async function
|
|
63
|
-
const t =
|
|
62
|
+
async function nt() {
|
|
63
|
+
const t = P(process.cwd(), "katt.json");
|
|
64
64
|
try {
|
|
65
|
-
const e = await
|
|
66
|
-
return
|
|
65
|
+
const e = await j(t, "utf8");
|
|
66
|
+
return et(e);
|
|
67
67
|
} catch (e) {
|
|
68
|
-
if (
|
|
68
|
+
if (tt(e, "ENOENT"))
|
|
69
69
|
return;
|
|
70
70
|
console.warn(`Failed to read katt.json: ${String(e)}`);
|
|
71
71
|
return;
|
|
72
72
|
}
|
|
73
73
|
}
|
|
74
|
-
function
|
|
74
|
+
function ot(t) {
|
|
75
75
|
const e = t?.copilot;
|
|
76
76
|
if (typeof e != "object" || e === null || Array.isArray(e))
|
|
77
77
|
return;
|
|
@@ -80,134 +80,112 @@ function Q(t) {
|
|
|
80
80
|
}, o = n.model;
|
|
81
81
|
return (typeof o != "string" || o.length === 0) && delete n.model, Object.keys(n).length > 0 ? n : void 0;
|
|
82
82
|
}
|
|
83
|
-
function
|
|
83
|
+
function rt(t) {
|
|
84
84
|
if (!(typeof t != "number" || !Number.isFinite(t)) && !(t <= 0))
|
|
85
85
|
return Math.floor(t);
|
|
86
86
|
}
|
|
87
|
-
function
|
|
87
|
+
function it(t) {
|
|
88
88
|
const e = t?.prompt;
|
|
89
89
|
if (!(typeof e != "object" || e === null || Array.isArray(e)))
|
|
90
|
-
return
|
|
90
|
+
return rt(e.timeoutMs);
|
|
91
91
|
}
|
|
92
|
-
async function
|
|
93
|
-
const t = await
|
|
92
|
+
async function st() {
|
|
93
|
+
const t = await nt();
|
|
94
94
|
return {
|
|
95
|
-
copilot:
|
|
96
|
-
promptTimeoutMs:
|
|
95
|
+
copilot: ot(t),
|
|
96
|
+
promptTimeoutMs: it(t)
|
|
97
97
|
};
|
|
98
98
|
}
|
|
99
|
-
const
|
|
100
|
-
function
|
|
99
|
+
const at = 6e5;
|
|
100
|
+
function _(t) {
|
|
101
101
|
return typeof t == "string" && t.length > 0 ? t : void 0;
|
|
102
102
|
}
|
|
103
|
-
function
|
|
103
|
+
function b(t) {
|
|
104
104
|
if (!t)
|
|
105
105
|
return;
|
|
106
106
|
const e = { ...t };
|
|
107
107
|
if (e.model !== void 0) {
|
|
108
|
-
const n =
|
|
108
|
+
const n = _(e.model);
|
|
109
109
|
n ? e.model = n : delete e.model;
|
|
110
110
|
}
|
|
111
111
|
return Object.keys(e).length > 0 ? e : void 0;
|
|
112
112
|
}
|
|
113
|
-
function
|
|
113
|
+
function M(t) {
|
|
114
114
|
if (!(typeof t != "number" || !Number.isFinite(t)) && !(t <= 0))
|
|
115
115
|
return Math.floor(t);
|
|
116
116
|
}
|
|
117
|
-
function
|
|
117
|
+
function h(t) {
|
|
118
118
|
return !Number.isFinite(t) || (t ?? 0) <= 0 ? 0 : Math.floor(t ?? 0);
|
|
119
119
|
}
|
|
120
|
-
function
|
|
121
|
-
return
|
|
120
|
+
function ct(t) {
|
|
121
|
+
return h(t.inputTokens) + h(t.outputTokens) + h(t.cacheReadTokens) + h(t.cacheWriteTokens);
|
|
122
122
|
}
|
|
123
|
-
function
|
|
124
|
-
const { timeoutMs: n, ...o } =
|
|
123
|
+
async function $(t, e = {}) {
|
|
124
|
+
const { timeoutMs: n, ...o } = e, i = await st(), r = b(i.copilot), s = b(
|
|
125
125
|
o
|
|
126
|
-
),
|
|
126
|
+
), a = b({
|
|
127
127
|
...r ?? {},
|
|
128
128
|
...s ?? {}
|
|
129
|
-
}),
|
|
130
|
-
|
|
131
|
-
sessionOptions: i,
|
|
132
|
-
model: M(i?.model),
|
|
133
|
-
timeoutMs: D
|
|
134
|
-
};
|
|
135
|
-
}
|
|
136
|
-
async function ot(t, e) {
|
|
137
|
-
const n = [];
|
|
138
|
-
if (e.unsubscribeUsage?.(), e.usedTokens > 0 && j(e.usedTokens), e.session)
|
|
139
|
-
try {
|
|
140
|
-
await e.session.destroy();
|
|
141
|
-
} catch (o) {
|
|
142
|
-
n.push(o);
|
|
143
|
-
}
|
|
144
|
-
try {
|
|
145
|
-
const o = await t.stop();
|
|
146
|
-
n.push(...o);
|
|
147
|
-
} catch (o) {
|
|
148
|
-
n.push(o);
|
|
149
|
-
}
|
|
150
|
-
n.length > 0 && console.error(
|
|
151
|
-
`Copilot cleanup encountered ${n.length} error(s).`
|
|
152
|
-
);
|
|
153
|
-
}
|
|
154
|
-
async function y(t, e = {}) {
|
|
155
|
-
const n = await Z(), o = nt(e, n), r = new z({ useLoggedInUser: !0 }), s = {
|
|
156
|
-
session: void 0,
|
|
157
|
-
unsubscribeUsage: void 0,
|
|
158
|
-
usedTokens: 0
|
|
159
|
-
};
|
|
129
|
+
}), c = M(i.promptTimeoutMs), L = M(n) ?? c ?? at, x = _(a?.model), g = new J({ useLoggedInUser: !0 });
|
|
130
|
+
let p, T, y = 0;
|
|
160
131
|
try {
|
|
161
|
-
await
|
|
162
|
-
|
|
163
|
-
)
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
s.usedTokens += et(a.data);
|
|
167
|
-
}
|
|
168
|
-
);
|
|
169
|
-
const i = await s.session.sendAndWait(
|
|
170
|
-
{ prompt: t },
|
|
171
|
-
o.timeoutMs
|
|
172
|
-
);
|
|
173
|
-
if (!i?.data?.content)
|
|
132
|
+
await g.start(), p = await g.createSession(a), T = p.on("assistant.usage", (f) => {
|
|
133
|
+
y += ct(f.data);
|
|
134
|
+
});
|
|
135
|
+
const l = await p.sendAndWait({ prompt: t }, L);
|
|
136
|
+
if (!l?.data?.content)
|
|
174
137
|
throw new Error("Copilot did not return a response.");
|
|
175
|
-
return
|
|
138
|
+
return x && z(x), l.data.content;
|
|
176
139
|
} finally {
|
|
177
|
-
|
|
140
|
+
const l = [];
|
|
141
|
+
if (T?.(), y > 0 && R(y), p)
|
|
142
|
+
try {
|
|
143
|
+
await p.destroy();
|
|
144
|
+
} catch (f) {
|
|
145
|
+
l.push(f);
|
|
146
|
+
}
|
|
147
|
+
try {
|
|
148
|
+
const f = await g.stop();
|
|
149
|
+
l.push(...f);
|
|
150
|
+
} catch (f) {
|
|
151
|
+
l.push(f);
|
|
152
|
+
}
|
|
153
|
+
l.length > 0 && console.error(
|
|
154
|
+
`Copilot cleanup encountered ${l.length} error(s).`
|
|
155
|
+
);
|
|
178
156
|
}
|
|
179
157
|
}
|
|
180
|
-
async function
|
|
181
|
-
const n =
|
|
182
|
-
return
|
|
158
|
+
async function Ft(t, e = {}) {
|
|
159
|
+
const n = A.getStore(), o = n?.evalFile ? v(n.evalFile) : process.cwd(), i = G(t) ? t : P(o, t), r = await j(i, "utf8");
|
|
160
|
+
return $(r, e);
|
|
183
161
|
}
|
|
184
|
-
function
|
|
185
|
-
|
|
186
|
-
describePath:
|
|
187
|
-
itPath:
|
|
162
|
+
function u(t) {
|
|
163
|
+
B({
|
|
164
|
+
describePath: K(),
|
|
165
|
+
itPath: W(),
|
|
188
166
|
message: t
|
|
189
167
|
});
|
|
190
168
|
}
|
|
191
|
-
async function
|
|
192
|
-
const n = Date.now(), o = `expected '${t}' to satisfy '${e}'`,
|
|
169
|
+
async function ut(t, e) {
|
|
170
|
+
const n = Date.now(), o = `expected '${t}' to satisfy '${e}'`, i = $(`Evaluate if the expectation is fulfiled in by the input.
|
|
193
171
|
Expectation: "${e}".
|
|
194
172
|
Input:
|
|
195
173
|
---
|
|
196
174
|
${t}
|
|
197
175
|
---
|
|
198
176
|
Important: Answer with "Yes" or "No" only, without any additional text.
|
|
199
|
-
`).then((
|
|
200
|
-
|
|
177
|
+
`).then((r) => {
|
|
178
|
+
r.includes("Yes") ? d(!0, Date.now() - n, "promptCheck") : r.includes("No") ? u(o) : u(`failed to evaluate expectation '${e}'`);
|
|
201
179
|
});
|
|
202
|
-
return
|
|
180
|
+
return m(i), i;
|
|
203
181
|
}
|
|
204
|
-
const
|
|
205
|
-
function
|
|
182
|
+
const lt = 3;
|
|
183
|
+
function ft(t) {
|
|
206
184
|
const e = t.match(/\b([1-5])\b/);
|
|
207
185
|
return !e || !e[1] ? null : Number(e[1]);
|
|
208
186
|
}
|
|
209
|
-
async function
|
|
210
|
-
const o = Date.now(),
|
|
187
|
+
async function dt(t, e, n) {
|
|
188
|
+
const o = Date.now(), i = n?.threshold ?? lt, r = $(
|
|
211
189
|
`Classify the input by how "${e}" it is on a scale of 1 to 5.
|
|
212
190
|
1 means "not ${e}" and 5 means "very ${e}".
|
|
213
191
|
Return only a single number: 1, 2, 3, 4, or 5.
|
|
@@ -217,141 +195,141 @@ Input:
|
|
|
217
195
|
${t}
|
|
218
196
|
---`,
|
|
219
197
|
n?.model ? { model: n.model } : void 0
|
|
220
|
-
).then((
|
|
221
|
-
const a =
|
|
198
|
+
).then((s) => {
|
|
199
|
+
const a = ft(s);
|
|
222
200
|
if (a === null) {
|
|
223
|
-
|
|
224
|
-
`failed to classify as '${e}'. Evaluator returned '${
|
|
201
|
+
u(
|
|
202
|
+
`failed to classify as '${e}'. Evaluator returned '${s}'`
|
|
225
203
|
);
|
|
226
204
|
return;
|
|
227
205
|
}
|
|
228
|
-
const
|
|
229
|
-
if (a <
|
|
230
|
-
c
|
|
206
|
+
const c = `expected response to be classified as '${e}' with score >= ${i}, got ${a}`;
|
|
207
|
+
if (a < i) {
|
|
208
|
+
u(c);
|
|
231
209
|
return;
|
|
232
210
|
}
|
|
233
|
-
|
|
211
|
+
d(
|
|
234
212
|
!0,
|
|
235
213
|
Date.now() - o,
|
|
236
214
|
"toBeClassifiedAs"
|
|
237
215
|
);
|
|
238
216
|
});
|
|
239
|
-
return
|
|
217
|
+
return m(r), r;
|
|
240
218
|
}
|
|
241
|
-
function
|
|
219
|
+
function pt(t, e) {
|
|
242
220
|
const n = `expected '${t}' to include '${e}'`;
|
|
243
|
-
t.includes(e) ||
|
|
221
|
+
t.includes(e) || u(n);
|
|
244
222
|
}
|
|
245
|
-
function
|
|
223
|
+
function k(t) {
|
|
246
224
|
const e = t.trim().replace(/[<>:"/\\|?*\x00-\x1f]/g, "_").replace(/\s+/g, "_");
|
|
247
225
|
return e.length > 0 ? e : "unnamed";
|
|
248
226
|
}
|
|
249
|
-
function
|
|
250
|
-
const t =
|
|
251
|
-
(o) =>
|
|
252
|
-
), e =
|
|
253
|
-
(o) =>
|
|
227
|
+
function ht() {
|
|
228
|
+
const t = Y().map(
|
|
229
|
+
(o) => k(o.description)
|
|
230
|
+
), e = q().map(
|
|
231
|
+
(o) => k(o.description)
|
|
254
232
|
), n = [...t, ...e];
|
|
255
233
|
return n.length === 0 ? "root" : n.join("__");
|
|
256
234
|
}
|
|
257
|
-
function
|
|
258
|
-
const n =
|
|
259
|
-
return
|
|
260
|
-
|
|
235
|
+
function mt(t) {
|
|
236
|
+
const n = Q(t).replace(/\.eval\.[^./\\]+$/, ""), o = ht();
|
|
237
|
+
return V(
|
|
238
|
+
v(t),
|
|
261
239
|
"__snapshots__",
|
|
262
240
|
`${n}__${o}.snap.md`
|
|
263
241
|
);
|
|
264
242
|
}
|
|
265
|
-
function
|
|
243
|
+
function D(t) {
|
|
266
244
|
return t.split(/\r?\n/);
|
|
267
245
|
}
|
|
268
|
-
function
|
|
246
|
+
function gt(t, e) {
|
|
269
247
|
if (t === e)
|
|
270
248
|
return " (no diff)";
|
|
271
|
-
const n =
|
|
272
|
-
for (let
|
|
273
|
-
const a = n[
|
|
274
|
-
if (a !==
|
|
275
|
-
if (a === void 0 &&
|
|
276
|
-
|
|
249
|
+
const n = D(t), o = D(e), i = Math.max(n.length, o.length), r = [];
|
|
250
|
+
for (let s = 0; s < i; s += 1) {
|
|
251
|
+
const a = n[s], c = o[s];
|
|
252
|
+
if (a !== c) {
|
|
253
|
+
if (a === void 0 && c !== void 0) {
|
|
254
|
+
r.push(`+ ${c}`);
|
|
277
255
|
continue;
|
|
278
256
|
}
|
|
279
|
-
if (a !== void 0 &&
|
|
280
|
-
|
|
257
|
+
if (a !== void 0 && c === void 0) {
|
|
258
|
+
r.push(`- ${a}`);
|
|
281
259
|
continue;
|
|
282
260
|
}
|
|
283
|
-
|
|
261
|
+
r.push(`- ${a ?? ""}`), r.push(`+ ${c ?? ""}`);
|
|
284
262
|
}
|
|
285
263
|
}
|
|
286
|
-
return
|
|
264
|
+
return r.join(`
|
|
287
265
|
`);
|
|
288
266
|
}
|
|
289
|
-
function
|
|
290
|
-
const e =
|
|
267
|
+
function yt(t) {
|
|
268
|
+
const e = A.getStore()?.evalFile;
|
|
291
269
|
if (!e) {
|
|
292
|
-
|
|
270
|
+
u(
|
|
293
271
|
"toMatchSnapshot can only be used while running an eval file."
|
|
294
272
|
);
|
|
295
273
|
return;
|
|
296
274
|
}
|
|
297
|
-
const n =
|
|
275
|
+
const n = mt(e);
|
|
298
276
|
try {
|
|
299
|
-
const o =
|
|
277
|
+
const o = X(n, "utf8");
|
|
300
278
|
if (o === t)
|
|
301
279
|
return;
|
|
302
|
-
if (
|
|
303
|
-
|
|
280
|
+
if (H()) {
|
|
281
|
+
F(n, t, "utf8");
|
|
304
282
|
return;
|
|
305
283
|
}
|
|
306
|
-
const
|
|
307
|
-
|
|
284
|
+
const i = gt(o, t);
|
|
285
|
+
u(
|
|
308
286
|
[
|
|
309
287
|
`Snapshot mismatch at ${n}`,
|
|
310
288
|
"",
|
|
311
289
|
"Diff:",
|
|
312
|
-
|
|
290
|
+
i,
|
|
313
291
|
"",
|
|
314
|
-
"Run katt
|
|
292
|
+
"Run 'npx katt --update-snapshots' (or -u) to accept this change."
|
|
315
293
|
].join(`
|
|
316
294
|
`)
|
|
317
295
|
);
|
|
318
296
|
} catch (o) {
|
|
319
297
|
if (o.code !== "ENOENT") {
|
|
320
|
-
|
|
298
|
+
u(
|
|
321
299
|
`Failed to read snapshot at ${n}: ${String(o)}`
|
|
322
300
|
);
|
|
323
301
|
return;
|
|
324
302
|
}
|
|
325
303
|
try {
|
|
326
|
-
|
|
327
|
-
} catch (
|
|
328
|
-
|
|
329
|
-
`Failed to write snapshot at ${n}: ${String(
|
|
304
|
+
Z(v(n), { recursive: !0 }), F(n, t, "utf8");
|
|
305
|
+
} catch (r) {
|
|
306
|
+
u(
|
|
307
|
+
`Failed to write snapshot at ${n}: ${String(r)}`
|
|
330
308
|
);
|
|
331
309
|
}
|
|
332
310
|
}
|
|
333
311
|
}
|
|
334
|
-
function
|
|
312
|
+
function Mt(t) {
|
|
335
313
|
return {
|
|
336
314
|
toContain: (e) => {
|
|
337
|
-
|
|
315
|
+
pt(t, e);
|
|
338
316
|
},
|
|
339
317
|
toMatchSnapshot: () => {
|
|
340
|
-
|
|
318
|
+
yt(t);
|
|
341
319
|
},
|
|
342
320
|
promptCheck: async (e) => {
|
|
343
|
-
await
|
|
321
|
+
await ut(t, e);
|
|
344
322
|
},
|
|
345
323
|
toBeClassifiedAs: async (e, n) => {
|
|
346
|
-
await
|
|
324
|
+
await dt(t, e, n);
|
|
347
325
|
}
|
|
348
326
|
};
|
|
349
327
|
}
|
|
350
328
|
export {
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
329
|
+
St as describe,
|
|
330
|
+
Mt as expect,
|
|
331
|
+
Tt as it,
|
|
332
|
+
$ as prompt,
|
|
333
|
+
Ft as promptFile,
|
|
334
|
+
Et as runCli
|
|
357
335
|
};
|
package/dist/katt.js
CHANGED
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
import { fileURLToPath as T, pathToFileURL as X } from "node:url";
|
|
2
|
+
import { readdir as M } from "node:fs/promises";
|
|
3
|
+
import { resolve as R } from "node:path";
|
|
4
|
+
import { AsyncLocalStorage as v } from "node:async_hooks";
|
|
5
|
+
import { readFileSync as U } from "node:fs";
|
|
6
|
+
const W = /\.eval\.(js|ts)$/, G = /* @__PURE__ */ new Set([".git", "node_modules"]);
|
|
7
|
+
async function J(t) {
|
|
8
|
+
const e = await M(t, { withFileTypes: !0 }), n = [];
|
|
9
|
+
return await Promise.all(
|
|
10
|
+
e.map(async (o) => {
|
|
11
|
+
const a = R(t, o.name);
|
|
12
|
+
if (o.isDirectory()) {
|
|
13
|
+
if (G.has(o.name))
|
|
14
|
+
return;
|
|
15
|
+
n.push(...await J(a));
|
|
16
|
+
return;
|
|
17
|
+
}
|
|
18
|
+
o.isFile() && W.test(o.name) && n.push(a);
|
|
19
|
+
})
|
|
20
|
+
), n;
|
|
21
|
+
}
|
|
22
|
+
const B = new v(), O = {
|
|
23
|
+
describeStack: [],
|
|
24
|
+
itStack: [],
|
|
25
|
+
tokenUsageStack: [],
|
|
26
|
+
modelStack: []
|
|
27
|
+
};
|
|
28
|
+
let L = 0, $ = 0;
|
|
29
|
+
const f = [], h = [];
|
|
30
|
+
let k = 0;
|
|
31
|
+
function i() {
|
|
32
|
+
return B.getStore() ?? O;
|
|
33
|
+
}
|
|
34
|
+
function Z(t) {
|
|
35
|
+
return {
|
|
36
|
+
describeStack: [...t.describeStack],
|
|
37
|
+
itStack: [...t.itStack],
|
|
38
|
+
tokenUsageStack: [...t.tokenUsageStack],
|
|
39
|
+
modelStack: [...t.modelStack]
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
function Y() {
|
|
43
|
+
return L += 1, `d${L}`;
|
|
44
|
+
}
|
|
45
|
+
function Q() {
|
|
46
|
+
return $ += 1, `i${$}`;
|
|
47
|
+
}
|
|
48
|
+
function St(t, e) {
|
|
49
|
+
const n = e ?? Z(i());
|
|
50
|
+
return B.run(n, t);
|
|
51
|
+
}
|
|
52
|
+
function At() {
|
|
53
|
+
return Z(i());
|
|
54
|
+
}
|
|
55
|
+
function kt(t) {
|
|
56
|
+
i().describeStack.push({ id: Y(), description: t });
|
|
57
|
+
}
|
|
58
|
+
function bt() {
|
|
59
|
+
i().describeStack.pop();
|
|
60
|
+
}
|
|
61
|
+
function V() {
|
|
62
|
+
return i().describeStack.map((t) => t.description).join(" > ");
|
|
63
|
+
}
|
|
64
|
+
function Lt() {
|
|
65
|
+
return [...i().describeStack];
|
|
66
|
+
}
|
|
67
|
+
function $t(t) {
|
|
68
|
+
i().itStack.push({ id: Q(), description: t }), i().tokenUsageStack.push(0), i().modelStack.push(void 0);
|
|
69
|
+
}
|
|
70
|
+
function wt() {
|
|
71
|
+
i().itStack.pop(), i().tokenUsageStack.pop(), i().modelStack.pop();
|
|
72
|
+
}
|
|
73
|
+
function K() {
|
|
74
|
+
return i().itStack.map((t) => t.description).join(" > ");
|
|
75
|
+
}
|
|
76
|
+
function jt() {
|
|
77
|
+
return [...i().itStack];
|
|
78
|
+
}
|
|
79
|
+
function vt(t) {
|
|
80
|
+
if (!Number.isFinite(t) || t <= 0)
|
|
81
|
+
return;
|
|
82
|
+
const e = i(), n = e.tokenUsageStack.length - 1;
|
|
83
|
+
n < 0 || (e.tokenUsageStack[n] += t);
|
|
84
|
+
}
|
|
85
|
+
function P() {
|
|
86
|
+
const t = i(), e = t.tokenUsageStack.length - 1;
|
|
87
|
+
return e < 0 ? 0 : t.tokenUsageStack[e] ?? 0;
|
|
88
|
+
}
|
|
89
|
+
function Jt(t) {
|
|
90
|
+
if (t.length === 0)
|
|
91
|
+
return;
|
|
92
|
+
const e = i(), n = e.modelStack.length - 1;
|
|
93
|
+
n < 0 || (e.modelStack[n] = t);
|
|
94
|
+
}
|
|
95
|
+
function z() {
|
|
96
|
+
const t = i(), e = t.modelStack.length - 1;
|
|
97
|
+
if (!(e < 0))
|
|
98
|
+
return t.modelStack[e];
|
|
99
|
+
}
|
|
100
|
+
function Bt(t) {
|
|
101
|
+
f.push(t);
|
|
102
|
+
}
|
|
103
|
+
function Zt() {
|
|
104
|
+
k += 1;
|
|
105
|
+
}
|
|
106
|
+
function E() {
|
|
107
|
+
return k;
|
|
108
|
+
}
|
|
109
|
+
function D() {
|
|
110
|
+
k = 0;
|
|
111
|
+
}
|
|
112
|
+
function xt(t) {
|
|
113
|
+
h.push(t);
|
|
114
|
+
}
|
|
115
|
+
function _() {
|
|
116
|
+
return [...h];
|
|
117
|
+
}
|
|
118
|
+
function Ht() {
|
|
119
|
+
return h.length;
|
|
120
|
+
}
|
|
121
|
+
function q() {
|
|
122
|
+
h.length = 0;
|
|
123
|
+
}
|
|
124
|
+
async function tt() {
|
|
125
|
+
const t = [];
|
|
126
|
+
for (; f.length > 0; ) {
|
|
127
|
+
const e = f.splice(0, f.length), n = await Promise.allSettled(e);
|
|
128
|
+
t.push(...n);
|
|
129
|
+
}
|
|
130
|
+
return t;
|
|
131
|
+
}
|
|
132
|
+
const et = "\x1B[1;36m", nt = "\x1B[33m", ot = "\x1B[38;5;208m", it = "\x1B[1;38;5;208m", m = "\x1B[0m";
|
|
133
|
+
function r(t) {
|
|
134
|
+
return `${et}${t}${m}`;
|
|
135
|
+
}
|
|
136
|
+
function p(t) {
|
|
137
|
+
return `${nt}${t}${m}`;
|
|
138
|
+
}
|
|
139
|
+
function w(t) {
|
|
140
|
+
return `${ot}${t}${m}`;
|
|
141
|
+
}
|
|
142
|
+
function st(t) {
|
|
143
|
+
return `${it}${t}${m}`;
|
|
144
|
+
}
|
|
145
|
+
let A = "";
|
|
146
|
+
function at() {
|
|
147
|
+
A = "";
|
|
148
|
+
}
|
|
149
|
+
function ct({
|
|
150
|
+
suitePath: t,
|
|
151
|
+
casePath: e,
|
|
152
|
+
didPass: n,
|
|
153
|
+
durationMs: o,
|
|
154
|
+
model: a,
|
|
155
|
+
tokenUsage: I
|
|
156
|
+
}) {
|
|
157
|
+
const c = t.length > 0 ? t : "(root)", C = e.length > 0 ? e : "(root)";
|
|
158
|
+
A !== c && (console.log(`Suite "${r(c)}"`), A = c);
|
|
159
|
+
const g = n ? "✅ Passed in" : "❌ Failed in", l = [
|
|
160
|
+
`Test "${r(C)}"`,
|
|
161
|
+
`- ${g} ${r(`${o}ms`)}`
|
|
162
|
+
];
|
|
163
|
+
a && l.push(`- Model ${r(a)}`), (I ?? 0) > 0 && l.push(`- Tokens used ${r(String(I))}`), l.push("---"), console.log(l.join(`
|
|
164
|
+
`));
|
|
165
|
+
}
|
|
166
|
+
function yt(t, e, n = "(root)") {
|
|
167
|
+
const o = K();
|
|
168
|
+
ct({
|
|
169
|
+
suitePath: V(),
|
|
170
|
+
casePath: o.length > 0 ? o : n,
|
|
171
|
+
didPass: t,
|
|
172
|
+
durationMs: e,
|
|
173
|
+
model: z(),
|
|
174
|
+
tokenUsage: P()
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
const rt = new v(), S = new URL("data:application/json;base64,ewogICJuYW1lIjogImthdHQiLAogICJ2ZXJzaW9uIjogIjAuMC42IiwKICAiZGVzY3JpcHRpb24iOiAiQ0xJIHRvb2wgdGhhdCB0ZXN0cyB0aGUgb3V0cHV0IG9mIGFnZW50aWMgQUkgdG9vbHMiLAogICJrZXl3b3JkcyI6IFsKICAgICJjbGkiLAogICAgImFpIiwKICAgICJhZ2VudGljLWFpIiwKICAgICJ0ZXN0aW5nIiwKICAgICJldmFsdWF0aW9uIgogIF0sCiAgImF1dGhvciI6ICJSYXBoYWVsIFBvcnRvIChodHRwczovL2dpdGh1Yi5jb20vcmFwaGFlbHBvcikiLAogICJsaWNlbnNlIjogIk1JVCIsCiAgInR5cGUiOiAibW9kdWxlIiwKICAibWFpbiI6ICJkaXN0L2luZGV4LmpzIiwKICAiZXhwb3J0cyI6IHsKICAgICIuIjogewogICAgICAidHlwZXMiOiAiLi9kaXN0L2luZGV4LmQudHMiLAogICAgICAiaW1wb3J0IjogIi4vZGlzdC9pbmRleC5qcyIKICAgIH0KICB9LAogICJiaW4iOiB7CiAgICAia2F0dCI6ICJkaXN0L2thdHQuanMiCiAgfSwKICAic2NyaXB0cyI6IHsKICAgICJidWlsZCI6ICJ2aXRlIGJ1aWxkIiwKICAgICJkZXYiOiAidHN4IHNyYy9pbmRleC50cyIsCiAgICAibGludCI6ICJiaW9tZSBsaW50IC4vc3JjIiwKICAgICJmb3JtYXQiOiAiYmlvbWUgZm9ybWF0IC0td3JpdGUgLi9zcmMiLAogICAgInRlc3QiOiAidml0ZXN0IiwKICAgICJ0eXBlY2hlY2siOiAidHNjIC1wIHRzY29uZmlnLmpzb24gLS1ub0VtaXQiLAogICAgInRlc3Q6YnVpbGQiOiAibm9kZSAuL2Rpc3Qva2F0dC5qcyIKICB9LAogICJ0eXBlcyI6ICJkaXN0L2luZGV4LmQudHMiLAogICJkZXZEZXBlbmRlbmNpZXMiOiB7CiAgICAiQGJpb21lanMvYmlvbWUiOiAiMS45LjQiLAogICAgIkB0eXBlcy9ub2RlIjogIjI1LjIuMCIsCiAgICAidHN4IjogIjQuMjEuMCIsCiAgICAidHlwZXNjcmlwdCI6ICI1LjguMiIsCiAgICAidml0ZSI6ICI3LjMuMSIsCiAgICAidml0ZS1wbHVnaW4tZHRzIjogIjQuNS40IiwKICAgICJ2aXRlc3QiOiAiMy4yLjQiLAogICAgInZzY29kZS1qc29ucnBjIjogIl44LjIuMSIKICB9LAogICJkZXBlbmRlbmNpZXMiOiB7CiAgICAiQGdpdGh1Yi9jb3BpbG90LXNkayI6ICJeMC4xLjIxIgogIH0sCiAgImJ1Z3MiOiB7CiAgICAidXJsIjogImh0dHBzOi8vZ2l0aHViLmNvbS9yYXBoYWVscG9yL2thdHQvaXNzdWVzIgogIH0sCiAgImhvbWVwYWdlIjogImh0dHBzOi8vZ2l0aHViLmNvbS9yYXBoYWVscG9yL2thdHQiCn0K", import.meta.url);
|
|
178
|
+
let d;
|
|
179
|
+
function lt() {
|
|
180
|
+
if (d !== void 0)
|
|
181
|
+
return d;
|
|
182
|
+
try {
|
|
183
|
+
const t = S.protocol === "data:" ? ut(S) : U(T(S), "utf8"), e = JSON.parse(t);
|
|
184
|
+
d = typeof e.version == "string" ? e.version : "unknown";
|
|
185
|
+
} catch {
|
|
186
|
+
d = "unknown";
|
|
187
|
+
}
|
|
188
|
+
return d;
|
|
189
|
+
}
|
|
190
|
+
function ut(t) {
|
|
191
|
+
const e = t.pathname.indexOf(",");
|
|
192
|
+
if (e < 0)
|
|
193
|
+
throw new Error("Invalid data URL.");
|
|
194
|
+
const n = t.pathname.slice(0, e), o = t.pathname.slice(e + 1);
|
|
195
|
+
return n.includes(";base64") ? Buffer.from(o, "base64").toString("utf8") : decodeURIComponent(o);
|
|
196
|
+
}
|
|
197
|
+
function j() {
|
|
198
|
+
const t = " ██╗ ██╗ █████╗ ████████╗████████╗", e = " ██║ ██╔╝██╔══██╗╚══██╔══╝╚══██╔══╝", n = " █████╔╝ ███████║ ██║ ██║", o = " ██╔═██╗ ██╔══██║ ██║ ██║", a = " ██║ ██╗██║ ██║ ██║ ██║", I = " ╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═╝", c = `v${lt()}`, C = Math.max(
|
|
199
|
+
0,
|
|
200
|
+
Math.floor((t.length - c.length) / 2)
|
|
201
|
+
), g = `${" ".repeat(C)}${c}`;
|
|
202
|
+
console.log(`
|
|
203
|
+
${p(t)}
|
|
204
|
+
${p(e)}
|
|
205
|
+
${p(n)}
|
|
206
|
+
${w(o)}
|
|
207
|
+
${w(a)}
|
|
208
|
+
${st(I)}
|
|
209
|
+
${p(g)}
|
|
210
|
+
`);
|
|
211
|
+
}
|
|
212
|
+
let x = !1;
|
|
213
|
+
function gt(t) {
|
|
214
|
+
x = t;
|
|
215
|
+
}
|
|
216
|
+
function Ft() {
|
|
217
|
+
return x;
|
|
218
|
+
}
|
|
219
|
+
function It(t) {
|
|
220
|
+
const e = String(t.getHours()).padStart(2, "0"), n = String(t.getMinutes()).padStart(2, "0"), o = String(t.getSeconds()).padStart(2, "0");
|
|
221
|
+
return `${e}:${n}:${o}`;
|
|
222
|
+
}
|
|
223
|
+
function dt() {
|
|
224
|
+
console.log(
|
|
225
|
+
[
|
|
226
|
+
"Usage:",
|
|
227
|
+
" katt [options]",
|
|
228
|
+
"",
|
|
229
|
+
"Options:",
|
|
230
|
+
" -h, --help Show CLI usage information",
|
|
231
|
+
" -u, --update-snapshots Update snapshot files on mismatch"
|
|
232
|
+
].join(`
|
|
233
|
+
`)
|
|
234
|
+
);
|
|
235
|
+
}
|
|
236
|
+
async function Nt() {
|
|
237
|
+
const t = process.argv.slice(2);
|
|
238
|
+
if (t.includes("--help") || t.includes("-h"))
|
|
239
|
+
return j(), dt(), 0;
|
|
240
|
+
const n = t.includes("--update-snapshots") || t.includes("-u");
|
|
241
|
+
gt(n), j();
|
|
242
|
+
const o = /* @__PURE__ */ new Date();
|
|
243
|
+
at(), q(), D();
|
|
244
|
+
const a = await J(process.cwd());
|
|
245
|
+
if (a.length === 0)
|
|
246
|
+
return console.log("No .eval.js or .eval.ts files found."), 1;
|
|
247
|
+
const c = (await Promise.allSettled(
|
|
248
|
+
a.map(
|
|
249
|
+
(s) => rt.run(
|
|
250
|
+
{ evalFile: s },
|
|
251
|
+
() => import(X(s).href)
|
|
252
|
+
)
|
|
253
|
+
)
|
|
254
|
+
)).map((s, u) => ({ result: s, file: a[u] })).filter(({ result: s }) => s.status === "rejected");
|
|
255
|
+
if (c.length > 0) {
|
|
256
|
+
for (const s of c) {
|
|
257
|
+
const u = s.result.status === "rejected" ? s.result.reason : void 0;
|
|
258
|
+
console.error(`Error executing ${s.file}: ${String(u)}`);
|
|
259
|
+
}
|
|
260
|
+
return 1;
|
|
261
|
+
}
|
|
262
|
+
const g = (await tt()).filter(
|
|
263
|
+
(s) => s.status === "rejected"
|
|
264
|
+
);
|
|
265
|
+
if (g.length > 0) {
|
|
266
|
+
for (const s of g)
|
|
267
|
+
s.status === "rejected" && console.error(`Error executing async test: ${String(s.reason)}`);
|
|
268
|
+
return 1;
|
|
269
|
+
}
|
|
270
|
+
const l = _();
|
|
271
|
+
if (l.length > 0) {
|
|
272
|
+
console.error("❌ Failed tests:");
|
|
273
|
+
for (const [s, u] of l.entries()) {
|
|
274
|
+
const b = [u.describePath, u.itPath].filter((N) => N.length > 0).join(" > "), F = b.length > 0 ? `${b}: ` : "";
|
|
275
|
+
console.error(`${s + 1}. ${F}${u.message}`);
|
|
276
|
+
}
|
|
277
|
+
return 1;
|
|
278
|
+
}
|
|
279
|
+
const H = E(), y = Date.now() - o.getTime();
|
|
280
|
+
return console.log(
|
|
281
|
+
[
|
|
282
|
+
"---",
|
|
283
|
+
`${r("Files")} ${a.length} passed`,
|
|
284
|
+
`${r("Evals")} ${H} passed`,
|
|
285
|
+
`${r("Start at")} ${It(o)}`,
|
|
286
|
+
`${r("Duration")} ${y}ms`
|
|
287
|
+
].join(`
|
|
288
|
+
`)
|
|
289
|
+
), 0;
|
|
290
|
+
}
|
|
291
|
+
export {
|
|
292
|
+
Zt as a,
|
|
293
|
+
Bt as b,
|
|
294
|
+
At as c,
|
|
295
|
+
wt as d,
|
|
296
|
+
kt as e,
|
|
297
|
+
bt as f,
|
|
298
|
+
Ht as g,
|
|
299
|
+
vt as h,
|
|
300
|
+
rt as i,
|
|
301
|
+
xt as j,
|
|
302
|
+
K as k,
|
|
303
|
+
yt as l,
|
|
304
|
+
V as m,
|
|
305
|
+
Ft as n,
|
|
306
|
+
Lt as o,
|
|
307
|
+
$t as p,
|
|
308
|
+
jt as q,
|
|
309
|
+
St as r,
|
|
310
|
+
Jt as s,
|
|
311
|
+
Nt as t
|
|
312
|
+
};
|
package/package.json
CHANGED
package/dist/runCli-B3oIBxOl.js
DELETED
|
@@ -1,317 +0,0 @@
|
|
|
1
|
-
import { fileURLToPath as Z, pathToFileURL as y } from "node:url";
|
|
2
|
-
import { readdir as N } from "node:fs/promises";
|
|
3
|
-
import { resolve as M } from "node:path";
|
|
4
|
-
import { AsyncLocalStorage as v } from "node:async_hooks";
|
|
5
|
-
import { readFileSync as X } from "node:fs";
|
|
6
|
-
const H = /\.eval\.(js|ts)$/, R = /* @__PURE__ */ new Set([".git", "node_modules"]);
|
|
7
|
-
async function w(t) {
|
|
8
|
-
const e = await N(t, { withFileTypes: !0 }), n = [];
|
|
9
|
-
return await Promise.all(
|
|
10
|
-
e.map(async (i) => {
|
|
11
|
-
const s = M(t, i.name);
|
|
12
|
-
if (i.isDirectory()) {
|
|
13
|
-
if (R.has(i.name))
|
|
14
|
-
return;
|
|
15
|
-
n.push(...await w(s));
|
|
16
|
-
return;
|
|
17
|
-
}
|
|
18
|
-
i.isFile() && H.test(i.name) && n.push(s);
|
|
19
|
-
})
|
|
20
|
-
), n;
|
|
21
|
-
}
|
|
22
|
-
const j = new v(), W = {
|
|
23
|
-
describeStack: [],
|
|
24
|
-
itStack: [],
|
|
25
|
-
tokenUsageStack: [],
|
|
26
|
-
modelStack: []
|
|
27
|
-
};
|
|
28
|
-
let b = 0, L = 0;
|
|
29
|
-
const p = [], f = [];
|
|
30
|
-
let k = 0;
|
|
31
|
-
function o() {
|
|
32
|
-
return j.getStore() ?? W;
|
|
33
|
-
}
|
|
34
|
-
function J(t) {
|
|
35
|
-
return {
|
|
36
|
-
describeStack: [...t.describeStack],
|
|
37
|
-
itStack: [...t.itStack],
|
|
38
|
-
tokenUsageStack: [...t.tokenUsageStack],
|
|
39
|
-
modelStack: [...t.modelStack]
|
|
40
|
-
};
|
|
41
|
-
}
|
|
42
|
-
function U() {
|
|
43
|
-
return b += 1, `d${b}`;
|
|
44
|
-
}
|
|
45
|
-
function G() {
|
|
46
|
-
return L += 1, `i${L}`;
|
|
47
|
-
}
|
|
48
|
-
function B(t) {
|
|
49
|
-
return t.tokenUsageStack.length - 1;
|
|
50
|
-
}
|
|
51
|
-
function F(t) {
|
|
52
|
-
return t.modelStack.length - 1;
|
|
53
|
-
}
|
|
54
|
-
function bt(t, e) {
|
|
55
|
-
const n = e ?? J(o());
|
|
56
|
-
return j.run(n, t);
|
|
57
|
-
}
|
|
58
|
-
function Lt() {
|
|
59
|
-
return J(o());
|
|
60
|
-
}
|
|
61
|
-
function $t(t) {
|
|
62
|
-
o().describeStack.push({ id: U(), description: t });
|
|
63
|
-
}
|
|
64
|
-
function vt() {
|
|
65
|
-
o().describeStack.pop();
|
|
66
|
-
}
|
|
67
|
-
function O() {
|
|
68
|
-
return o().describeStack.map((t) => t.description).join(" > ");
|
|
69
|
-
}
|
|
70
|
-
function wt() {
|
|
71
|
-
return [...o().describeStack];
|
|
72
|
-
}
|
|
73
|
-
function jt(t) {
|
|
74
|
-
const e = o();
|
|
75
|
-
e.itStack.push({ id: G(), description: t }), e.tokenUsageStack.push(0), e.modelStack.push(void 0);
|
|
76
|
-
}
|
|
77
|
-
function Jt() {
|
|
78
|
-
const t = o();
|
|
79
|
-
t.itStack.pop(), t.tokenUsageStack.pop(), t.modelStack.pop();
|
|
80
|
-
}
|
|
81
|
-
function Y() {
|
|
82
|
-
return o().itStack.map((t) => t.description).join(" > ");
|
|
83
|
-
}
|
|
84
|
-
function Bt() {
|
|
85
|
-
return [...o().itStack];
|
|
86
|
-
}
|
|
87
|
-
function Ft(t) {
|
|
88
|
-
if (!Number.isFinite(t) || t <= 0)
|
|
89
|
-
return;
|
|
90
|
-
const e = o(), n = B(e);
|
|
91
|
-
n < 0 || (e.tokenUsageStack[n] += t);
|
|
92
|
-
}
|
|
93
|
-
function Q() {
|
|
94
|
-
const t = o(), e = B(t);
|
|
95
|
-
return e < 0 ? 0 : t.tokenUsageStack[e] ?? 0;
|
|
96
|
-
}
|
|
97
|
-
function xt(t) {
|
|
98
|
-
if (t.length === 0)
|
|
99
|
-
return;
|
|
100
|
-
const e = o(), n = F(e);
|
|
101
|
-
n < 0 || (e.modelStack[n] = t);
|
|
102
|
-
}
|
|
103
|
-
function V() {
|
|
104
|
-
const t = o(), e = F(t);
|
|
105
|
-
if (!(e < 0))
|
|
106
|
-
return t.modelStack[e];
|
|
107
|
-
}
|
|
108
|
-
function Tt(t) {
|
|
109
|
-
p.push(t);
|
|
110
|
-
}
|
|
111
|
-
function Zt() {
|
|
112
|
-
k += 1;
|
|
113
|
-
}
|
|
114
|
-
function K() {
|
|
115
|
-
return k;
|
|
116
|
-
}
|
|
117
|
-
function P() {
|
|
118
|
-
k = 0;
|
|
119
|
-
}
|
|
120
|
-
function yt(t) {
|
|
121
|
-
f.push(t);
|
|
122
|
-
}
|
|
123
|
-
function E() {
|
|
124
|
-
return [...f];
|
|
125
|
-
}
|
|
126
|
-
function Nt() {
|
|
127
|
-
return f.length;
|
|
128
|
-
}
|
|
129
|
-
function z() {
|
|
130
|
-
f.length = 0;
|
|
131
|
-
}
|
|
132
|
-
async function D() {
|
|
133
|
-
const t = [];
|
|
134
|
-
for (; p.length > 0; ) {
|
|
135
|
-
const e = p.splice(0, p.length), n = await Promise.allSettled(e);
|
|
136
|
-
t.push(...n);
|
|
137
|
-
}
|
|
138
|
-
return t;
|
|
139
|
-
}
|
|
140
|
-
const _ = "\x1B[1;36m", q = "\x1B[33m", tt = "\x1B[38;5;208m", et = "\x1B[1;38;5;208m", m = "\x1B[0m";
|
|
141
|
-
function r(t) {
|
|
142
|
-
return `${_}${t}${m}`;
|
|
143
|
-
}
|
|
144
|
-
function C(t) {
|
|
145
|
-
return `${q}${t}${m}`;
|
|
146
|
-
}
|
|
147
|
-
function $(t) {
|
|
148
|
-
return `${tt}${t}${m}`;
|
|
149
|
-
}
|
|
150
|
-
function nt(t) {
|
|
151
|
-
return `${et}${t}${m}`;
|
|
152
|
-
}
|
|
153
|
-
let A = "";
|
|
154
|
-
function it() {
|
|
155
|
-
A = "";
|
|
156
|
-
}
|
|
157
|
-
function ot({
|
|
158
|
-
suitePath: t,
|
|
159
|
-
casePath: e,
|
|
160
|
-
didPass: n,
|
|
161
|
-
durationMs: i,
|
|
162
|
-
model: s,
|
|
163
|
-
tokenUsage: c
|
|
164
|
-
}) {
|
|
165
|
-
const l = t.length > 0 ? t : "(root)", u = e.length > 0 ? e : "(root)";
|
|
166
|
-
A !== l && (console.log(`Suite "${r(l)}"`), A = l);
|
|
167
|
-
const g = n ? "✅ Passed in" : "❌ Failed in", I = [
|
|
168
|
-
`Test "${r(u)}"`,
|
|
169
|
-
`- ${g} ${r(`${i}ms`)}`
|
|
170
|
-
];
|
|
171
|
-
s && I.push(`- Model ${r(s)}`), (c ?? 0) > 0 && I.push(`- Tokens used ${r(String(c))}`), I.push("---"), console.log(I.join(`
|
|
172
|
-
`));
|
|
173
|
-
}
|
|
174
|
-
function Mt(t, e, n = "(root)") {
|
|
175
|
-
const i = Y();
|
|
176
|
-
ot({
|
|
177
|
-
suitePath: O(),
|
|
178
|
-
casePath: i.length > 0 ? i : n,
|
|
179
|
-
didPass: t,
|
|
180
|
-
durationMs: e,
|
|
181
|
-
model: V(),
|
|
182
|
-
tokenUsage: Q()
|
|
183
|
-
});
|
|
184
|
-
}
|
|
185
|
-
const st = new v(), h = new URL("data:application/json;base64,ewogICJuYW1lIjogImthdHQiLAogICJ2ZXJzaW9uIjogIjAuMC41IiwKICAiZGVzY3JpcHRpb24iOiAiQ0xJIHRvb2wgdGhhdCB0ZXN0cyB0aGUgb3V0cHV0IG9mIGFnZW50aWMgQUkgdG9vbHMiLAogICJrZXl3b3JkcyI6IFsKICAgICJjbGkiLAogICAgImFpIiwKICAgICJhZ2VudGljLWFpIiwKICAgICJ0ZXN0aW5nIiwKICAgICJldmFsdWF0aW9uIgogIF0sCiAgImF1dGhvciI6ICJSYXBoYWVsIFBvcnRvIChodHRwczovL2dpdGh1Yi5jb20vcmFwaGFlbHBvcikiLAogICJsaWNlbnNlIjogIk1JVCIsCiAgInR5cGUiOiAibW9kdWxlIiwKICAibWFpbiI6ICJkaXN0L2luZGV4LmpzIiwKICAiZXhwb3J0cyI6IHsKICAgICIuIjogewogICAgICAidHlwZXMiOiAiLi9kaXN0L2luZGV4LmQudHMiLAogICAgICAiaW1wb3J0IjogIi4vZGlzdC9pbmRleC5qcyIKICAgIH0KICB9LAogICJiaW4iOiB7CiAgICAia2F0dCI6ICJkaXN0L2thdHQuanMiCiAgfSwKICAic2NyaXB0cyI6IHsKICAgICJidWlsZCI6ICJ2aXRlIGJ1aWxkIiwKICAgICJkZXYiOiAidHN4IHNyYy9pbmRleC50cyIsCiAgICAibGludCI6ICJiaW9tZSBsaW50IC4vc3JjIiwKICAgICJmb3JtYXQiOiAiYmlvbWUgZm9ybWF0IC0td3JpdGUgLi9zcmMiLAogICAgInRlc3QiOiAidml0ZXN0IiwKICAgICJ0eXBlY2hlY2siOiAidHNjIC1wIHRzY29uZmlnLmpzb24gLS1ub0VtaXQiLAogICAgInRlc3Q6YnVpbGQiOiAibm9kZSAuL2Rpc3Qva2F0dC5qcyIKICB9LAogICJ0eXBlcyI6ICJkaXN0L2luZGV4LmQudHMiLAogICJkZXZEZXBlbmRlbmNpZXMiOiB7CiAgICAiQGJpb21lanMvYmlvbWUiOiAiMS45LjQiLAogICAgIkB0eXBlcy9ub2RlIjogIjI1LjIuMCIsCiAgICAidHN4IjogIjQuMjEuMCIsCiAgICAidHlwZXNjcmlwdCI6ICI1LjguMiIsCiAgICAidml0ZSI6ICI3LjMuMSIsCiAgICAidml0ZS1wbHVnaW4tZHRzIjogIjQuNS40IiwKICAgICJ2aXRlc3QiOiAiMy4yLjQiLAogICAgInZzY29kZS1qc29ucnBjIjogIl44LjIuMSIKICB9LAogICJkZXBlbmRlbmNpZXMiOiB7CiAgICAiQGdpdGh1Yi9jb3BpbG90LXNkayI6ICJeMC4xLjIxIgogIH0sCiAgImJ1Z3MiOiB7CiAgICAidXJsIjogImh0dHBzOi8vZ2l0aHViLmNvbS9yYXBoYWVscG9yL2thdHQvaXNzdWVzIgogIH0sCiAgImhvbWVwYWdlIjogImh0dHBzOi8vZ2l0aHViLmNvbS9yYXBoYWVscG9yL2thdHQiCn0K", import.meta.url);
|
|
186
|
-
let d;
|
|
187
|
-
function at() {
|
|
188
|
-
if (d !== void 0)
|
|
189
|
-
return d;
|
|
190
|
-
try {
|
|
191
|
-
const t = h.protocol === "data:" ? rt(h) : X(Z(h), "utf8"), e = JSON.parse(t);
|
|
192
|
-
d = typeof e.version == "string" ? e.version : "unknown";
|
|
193
|
-
} catch {
|
|
194
|
-
d = "unknown";
|
|
195
|
-
}
|
|
196
|
-
return d;
|
|
197
|
-
}
|
|
198
|
-
function rt(t) {
|
|
199
|
-
const e = t.pathname.indexOf(",");
|
|
200
|
-
if (e < 0)
|
|
201
|
-
throw new Error("Invalid data URL.");
|
|
202
|
-
const n = t.pathname.slice(0, e), i = t.pathname.slice(e + 1);
|
|
203
|
-
return n.includes(";base64") ? Buffer.from(i, "base64").toString("utf8") : decodeURIComponent(i);
|
|
204
|
-
}
|
|
205
|
-
function ct() {
|
|
206
|
-
const t = " ██╗ ██╗ █████╗ ████████╗████████╗", e = " ██║ ██╔╝██╔══██╗╚══██╔══╝╚══██╔══╝", n = " █████╔╝ ███████║ ██║ ██║", i = " ██╔═██╗ ██╔══██║ ██║ ██║", s = " ██║ ██╗██║ ██║ ██║ ██║", c = " ╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═╝", l = `v${at()}`, u = Math.max(
|
|
207
|
-
0,
|
|
208
|
-
Math.floor((t.length - l.length) / 2)
|
|
209
|
-
), g = `${" ".repeat(u)}${l}`;
|
|
210
|
-
console.log(`
|
|
211
|
-
${C(t)}
|
|
212
|
-
${C(e)}
|
|
213
|
-
${C(n)}
|
|
214
|
-
${$(i)}
|
|
215
|
-
${$(s)}
|
|
216
|
-
${nt(c)}
|
|
217
|
-
${C(g)}
|
|
218
|
-
`);
|
|
219
|
-
}
|
|
220
|
-
let x = !1;
|
|
221
|
-
function lt(t) {
|
|
222
|
-
x = t;
|
|
223
|
-
}
|
|
224
|
-
function Xt() {
|
|
225
|
-
return x;
|
|
226
|
-
}
|
|
227
|
-
function ut(t) {
|
|
228
|
-
const e = String(t.getHours()).padStart(2, "0"), n = String(t.getMinutes()).padStart(2, "0"), i = String(t.getSeconds()).padStart(2, "0");
|
|
229
|
-
return `${e}:${n}:${i}`;
|
|
230
|
-
}
|
|
231
|
-
function gt(t) {
|
|
232
|
-
return t.includes("--update-snapshots") || t.includes("-u");
|
|
233
|
-
}
|
|
234
|
-
function It(t) {
|
|
235
|
-
return [t.describePath, t.itPath].filter((e) => e.length > 0).join(" > ");
|
|
236
|
-
}
|
|
237
|
-
function dt(t) {
|
|
238
|
-
for (const e of t)
|
|
239
|
-
console.error(`Error executing ${e.file}: ${String(e.reason)}`);
|
|
240
|
-
}
|
|
241
|
-
function Ct(t) {
|
|
242
|
-
for (const e of t)
|
|
243
|
-
console.error(`Error executing async test: ${String(e.reason)}`);
|
|
244
|
-
}
|
|
245
|
-
function pt(t) {
|
|
246
|
-
console.error("❌ Failed tests:");
|
|
247
|
-
for (const [e, n] of t.entries()) {
|
|
248
|
-
const i = It(n), s = i.length > 0 ? `${i}: ` : "";
|
|
249
|
-
console.error(`${e + 1}. ${s}${n.message}`);
|
|
250
|
-
}
|
|
251
|
-
}
|
|
252
|
-
function ft(t, e, n, i) {
|
|
253
|
-
return [
|
|
254
|
-
"---",
|
|
255
|
-
`${r("Files")} ${t} passed`,
|
|
256
|
-
`${r("Evals")} ${e} passed`,
|
|
257
|
-
`${r("Start at")} ${ut(n)}`,
|
|
258
|
-
`${r("Duration")} ${i}ms`
|
|
259
|
-
].join(`
|
|
260
|
-
`);
|
|
261
|
-
}
|
|
262
|
-
async function Ht() {
|
|
263
|
-
const t = process.argv.slice(2), e = gt(t);
|
|
264
|
-
lt(e), ct();
|
|
265
|
-
const n = /* @__PURE__ */ new Date();
|
|
266
|
-
it(), z(), P();
|
|
267
|
-
const i = await w(process.cwd());
|
|
268
|
-
if (i.length === 0)
|
|
269
|
-
return console.log("No .eval.js or .eval.ts files found."), 1;
|
|
270
|
-
const c = (await Promise.allSettled(
|
|
271
|
-
i.map(
|
|
272
|
-
(a) => st.run(
|
|
273
|
-
{ evalFile: a },
|
|
274
|
-
() => import(y(a).href)
|
|
275
|
-
)
|
|
276
|
-
)
|
|
277
|
-
)).map((a, S) => ({ result: a, file: i[S] })).filter(({ result: a }) => a.status === "rejected").map(({ result: a, file: S }) => ({
|
|
278
|
-
file: S,
|
|
279
|
-
reason: a.status === "rejected" ? a.reason : void 0
|
|
280
|
-
}));
|
|
281
|
-
if (c.length > 0)
|
|
282
|
-
return dt(c), 1;
|
|
283
|
-
const u = (await D()).filter(
|
|
284
|
-
(a) => a.status === "rejected"
|
|
285
|
-
);
|
|
286
|
-
if (u.length > 0)
|
|
287
|
-
return Ct(u), 1;
|
|
288
|
-
const g = E();
|
|
289
|
-
if (g.length > 0)
|
|
290
|
-
return pt(g), 1;
|
|
291
|
-
const I = K(), T = Date.now() - n.getTime();
|
|
292
|
-
return console.log(
|
|
293
|
-
ft(i.length, I, n, T)
|
|
294
|
-
), 0;
|
|
295
|
-
}
|
|
296
|
-
export {
|
|
297
|
-
Tt as a,
|
|
298
|
-
Jt as b,
|
|
299
|
-
Lt as c,
|
|
300
|
-
Zt as d,
|
|
301
|
-
$t as e,
|
|
302
|
-
vt as f,
|
|
303
|
-
Nt as g,
|
|
304
|
-
st as h,
|
|
305
|
-
Ft as i,
|
|
306
|
-
yt as j,
|
|
307
|
-
Y as k,
|
|
308
|
-
Mt as l,
|
|
309
|
-
O as m,
|
|
310
|
-
Xt as n,
|
|
311
|
-
wt as o,
|
|
312
|
-
jt as p,
|
|
313
|
-
Bt as q,
|
|
314
|
-
bt as r,
|
|
315
|
-
xt as s,
|
|
316
|
-
Ht as t
|
|
317
|
-
};
|