katt 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +6 -1
  2. package/dist/index.js +272 -249
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Katt is a lightweight testing framework for running AI Evals, inspired by [Jest](https://github.com/jestjs/jest).
4
4
 
5
- <img src="docs/logo.png" alt="Katt logo" width="250" />
5
+ <img src="https://raw.githubusercontent.com/raphaelpor/katt/main/docs/logo.png" alt="Katt logo" width="250" />
6
6
 
7
7
  ## Overview
8
8
 
@@ -37,6 +37,7 @@ describe("Greeting agent", () => {
37
37
  - **Classification Matcher**: Built-in `toBeClassifiedAs()` matcher to grade a response against a target label on a 1-5 scale
38
38
  - **Concurrent Execution**: Runs eval files concurrently for faster test execution
39
39
  - **Model Selection**: Support for specifying custom AI models
40
+ - **Configurable Timeouts**: Override prompt wait time per test or via `katt.json`
40
41
 
41
42
  ## Usage
42
43
 
@@ -89,6 +90,9 @@ You can also set a default model for the project by adding a `katt.json` file in
89
90
  {
90
91
  "copilot": {
91
92
  "model": "gpt-5-mini"
93
+ },
94
+ "prompt": {
95
+ "timeoutMs": 240000
92
96
  }
93
97
  }
94
98
  ```
@@ -97,6 +101,7 @@ When this file exists:
97
101
 
98
102
  - `prompt("...")` and `promptFile("...")` use `copilot.model` by default
99
103
  - `prompt("...", { model: "..." })` still overrides the config value
104
+ - `prompt.timeoutMs` sets the default wait timeout for long-running prompts
100
105
 
101
106
  ## Development
102
107
 
package/dist/index.js CHANGED
@@ -1,23 +1,23 @@
1
1
  #!/usr/bin/env node
2
- import { AsyncLocalStorage as E } from "node:async_hooks";
3
- import { CopilotClient as et } from "@github/copilot-sdk";
4
- import { readFile as M, readdir as nt } from "node:fs/promises";
5
- import { resolve as x, dirname as T, isAbsolute as ot, basename as it, join as st } from "node:path";
6
- import { readFileSync as Y, writeFileSync as Z, mkdirSync as rt } from "node:fs";
7
- import { fileURLToPath as ct, pathToFileURL as at } from "node:url";
8
- const D = new E(), lt = {
2
+ import { AsyncLocalStorage as z } from "node:async_hooks";
3
+ import { CopilotClient as it } from "@github/copilot-sdk";
4
+ import { readFile as X, readdir as st } from "node:fs/promises";
5
+ import { resolve as J, dirname as R, isAbsolute as rt, basename as ct, join as at } from "node:path";
6
+ import { readFileSync as V, writeFileSync as W, mkdirSync as lt } from "node:fs";
7
+ import { fileURLToPath as ut, pathToFileURL as dt } from "node:url";
8
+ const Y = new z(), ft = {
9
9
  describeStack: [],
10
10
  itStack: [],
11
11
  tokenUsageStack: [],
12
12
  modelStack: []
13
13
  };
14
- let U = 0, W = 0;
15
- const w = [], A = [];
16
- let J = 0;
14
+ let Z = 0, E = 0;
15
+ const w = [], y = [];
16
+ let U = 0;
17
17
  function u() {
18
- return D.getStore() ?? lt;
18
+ return Y.getStore() ?? ft;
19
19
  }
20
- function H(t) {
20
+ function Q(t) {
21
21
  return {
22
22
  describeStack: [...t.describeStack],
23
23
  itStack: [...t.itStack],
@@ -25,83 +25,83 @@ function H(t) {
25
25
  modelStack: [...t.modelStack]
26
26
  };
27
27
  }
28
- function ut() {
29
- return U += 1, `d${U}`;
28
+ function gt() {
29
+ return Z += 1, `d${Z}`;
30
30
  }
31
- function dt() {
32
- return W += 1, `i${W}`;
31
+ function pt() {
32
+ return E += 1, `i${E}`;
33
33
  }
34
- function X(t, e) {
35
- const n = e ?? H(u());
36
- return D.run(n, t);
34
+ function K(t, e) {
35
+ const n = e ?? Q(u());
36
+ return Y.run(n, t);
37
37
  }
38
- function z() {
39
- return H(u());
38
+ function _() {
39
+ return Q(u());
40
40
  }
41
- function gt(t) {
42
- u().describeStack.push({ id: ut(), description: t });
41
+ function ht(t) {
42
+ u().describeStack.push({ id: gt(), description: t });
43
43
  }
44
- function b() {
44
+ function x() {
45
45
  u().describeStack.pop();
46
46
  }
47
- function P() {
47
+ function q() {
48
48
  return u().describeStack.map((t) => t.description).join(" > ");
49
49
  }
50
- function ft(t) {
51
- u().itStack.push({ id: dt(), description: t }), u().tokenUsageStack.push(0), u().modelStack.push(void 0);
50
+ function It(t) {
51
+ u().itStack.push({ id: pt(), description: t }), u().tokenUsageStack.push(0), u().modelStack.push(void 0);
52
52
  }
53
- function v() {
53
+ function F() {
54
54
  u().itStack.pop(), u().tokenUsageStack.pop(), u().modelStack.pop();
55
55
  }
56
- function V() {
56
+ function tt() {
57
57
  return u().itStack.map((t) => t.description).join(" > ");
58
58
  }
59
- function pt(t) {
59
+ function mt(t) {
60
60
  if (!Number.isFinite(t) || t <= 0)
61
61
  return;
62
62
  const e = u(), n = e.tokenUsageStack.length - 1;
63
63
  n < 0 || (e.tokenUsageStack[n] += t);
64
64
  }
65
- function ht() {
65
+ function Ct() {
66
66
  const t = u(), e = t.tokenUsageStack.length - 1;
67
67
  return e < 0 ? 0 : t.tokenUsageStack[e] ?? 0;
68
68
  }
69
- function It(t) {
69
+ function St(t) {
70
70
  if (t.length === 0)
71
71
  return;
72
72
  const e = u(), n = e.modelStack.length - 1;
73
73
  n < 0 || (e.modelStack[n] = t);
74
74
  }
75
- function Ct() {
75
+ function At() {
76
76
  const t = u(), e = t.modelStack.length - 1;
77
77
  if (!(e < 0))
78
78
  return t.modelStack[e];
79
79
  }
80
- function $(t) {
80
+ function v(t) {
81
81
  w.push(t);
82
82
  }
83
- function mt() {
84
- J += 1;
83
+ function bt() {
84
+ U += 1;
85
85
  }
86
- function St() {
87
- return J;
86
+ function $t() {
87
+ return U;
88
88
  }
89
- function wt() {
90
- J = 0;
89
+ function kt() {
90
+ U = 0;
91
91
  }
92
- function At(t) {
93
- A.push(t);
92
+ function wt(t) {
93
+ y.push(t);
94
94
  }
95
- function $t() {
96
- return [...A];
95
+ function yt() {
96
+ return [...y];
97
97
  }
98
- function B() {
99
- return A.length;
98
+ function O() {
99
+ return y.length;
100
100
  }
101
- function kt() {
102
- A.length = 0;
101
+ function vt() {
102
+ y.length = 0;
103
103
  }
104
- async function yt() {
104
+ async function Lt() {
105
105
  const t = [];
106
106
  for (; w.length > 0; ) {
107
107
  const e = w.splice(0, w.length), n = await Promise.allSettled(e);
@@ -109,41 +109,41 @@ async function yt() {
109
109
  }
110
110
  return t;
111
111
  }
112
- function bt(t, e) {
113
- X(() => {
114
- gt(t);
112
+ function jt(t, e) {
113
+ K(() => {
114
+ ht(t);
115
115
  try {
116
116
  const n = e();
117
117
  if (n && typeof n.then == "function") {
118
- $(
118
+ v(
119
119
  n.finally(() => {
120
- b();
120
+ x();
121
121
  })
122
122
  );
123
123
  return;
124
124
  }
125
125
  } catch (n) {
126
- throw b(), n;
126
+ throw x(), n;
127
127
  }
128
- b();
129
- }, z());
128
+ x();
129
+ }, _());
130
130
  }
131
- const vt = "\x1B[1;36m", Lt = "\x1B[33m", jt = "\x1B[38;5;208m", Ft = "\x1B[1;38;5;208m", k = "\x1B[0m";
131
+ const Tt = "\x1B[1;36m", xt = "\x1B[33m", Ft = "\x1B[38;5;208m", Mt = "\x1B[1;38;5;208m", L = "\x1B[0m";
132
132
  function f(t) {
133
- return `${vt}${t}${k}`;
133
+ return `${Tt}${t}${L}`;
134
134
  }
135
- function m(t) {
136
- return `${Lt}${t}${k}`;
135
+ function $(t) {
136
+ return `${xt}${t}${L}`;
137
137
  }
138
- function G(t) {
139
- return `${jt}${t}${k}`;
138
+ function D(t) {
139
+ return `${Ft}${t}${L}`;
140
140
  }
141
- function xt(t) {
142
- return `${Ft}${t}${k}`;
141
+ function Nt(t) {
142
+ return `${Mt}${t}${L}`;
143
143
  }
144
- let F = "";
145
- function Tt() {
146
- F = "";
144
+ let B = "";
145
+ function Bt() {
146
+ B = "";
147
147
  }
148
148
  function Jt({
149
149
  suitePath: t,
@@ -153,31 +153,31 @@ function Jt({
153
153
  model: s,
154
154
  tokenUsage: i
155
155
  }) {
156
- const r = t.length > 0 ? t : "(root)", a = e.length > 0 ? e : "(root)";
157
- F !== r && (console.log(`Suite "${f(r)}"`), F = r);
158
- const l = n ? "✅ Passed in" : "❌ Failed in", d = [
159
- `Test "${f(a)}"`,
156
+ const r = t.length > 0 ? t : "(root)", c = e.length > 0 ? e : "(root)";
157
+ B !== r && (console.log(`Suite "${f(r)}"`), B = r);
158
+ const l = n ? "✅ Passed in" : "❌ Failed in", I = [
159
+ `Test "${f(c)}"`,
160
160
  `- ${l} ${f(`${o}ms`)}`
161
161
  ];
162
- s && d.push(`- Model ${f(s)}`), (i ?? 0) > 0 && d.push(`- Tokens used ${f(String(i))}`), d.push("---"), console.log(d.join(`
162
+ s && I.push(`- Model ${f(s)}`), (i ?? 0) > 0 && I.push(`- Tokens used ${f(String(i))}`), I.push("---"), console.log(I.join(`
163
163
  `));
164
164
  }
165
- function I(t, e, n = "(root)") {
166
- const o = V();
165
+ function C(t, e, n = "(root)") {
166
+ const o = tt();
167
167
  Jt({
168
- suitePath: P(),
168
+ suitePath: q(),
169
169
  casePath: o.length > 0 ? o : n,
170
170
  didPass: t,
171
171
  durationMs: e,
172
- model: Ct(),
173
- tokenUsage: ht()
172
+ model: At(),
173
+ tokenUsage: Ct()
174
174
  });
175
175
  }
176
- const N = new E();
177
- function Nt(t, e) {
176
+ const G = new z();
177
+ function Rt(t, e) {
178
178
  return typeof t == "object" && t !== null && "code" in t && t.code === e;
179
179
  }
180
- function Rt(t) {
180
+ function Ut(t) {
181
181
  try {
182
182
  const e = JSON.parse(t);
183
183
  return typeof e == "object" && e !== null ? e : void 0;
@@ -186,7 +186,19 @@ function Rt(t) {
186
186
  return;
187
187
  }
188
188
  }
189
- function Zt(t) {
189
+ async function Gt() {
190
+ const t = J(process.cwd(), "katt.json");
191
+ try {
192
+ const e = await X(t, "utf8");
193
+ return Ut(e);
194
+ } catch (e) {
195
+ if (Rt(e, "ENOENT"))
196
+ return;
197
+ console.warn(`Failed to read katt.json: ${String(e)}`);
198
+ return;
199
+ }
200
+ }
201
+ function Wt(t) {
190
202
  const e = t?.copilot;
191
203
  if (typeof e != "object" || e === null || Array.isArray(e))
192
204
  return;
@@ -195,83 +207,94 @@ function Zt(t) {
195
207
  }, o = n.model;
196
208
  return (typeof o != "string" || o.length === 0) && delete n.model, Object.keys(n).length > 0 ? n : void 0;
197
209
  }
198
- async function Ut() {
199
- const t = x(process.cwd(), "katt.json");
200
- try {
201
- const e = await M(t, "utf8");
202
- return Zt(Rt(e));
203
- } catch (e) {
204
- if (Nt(e, "ENOENT"))
205
- return;
206
- console.warn(`Failed to read katt.json: ${String(e)}`);
207
- return;
208
- }
210
+ function Zt(t) {
211
+ if (!(typeof t != "number" || !Number.isFinite(t)) && !(t <= 0))
212
+ return Math.floor(t);
213
+ }
214
+ function Et(t) {
215
+ const e = t?.prompt;
216
+ if (!(typeof e != "object" || e === null || Array.isArray(e)))
217
+ return Zt(e.timeoutMs);
218
+ }
219
+ async function Ot() {
220
+ const t = await Gt();
221
+ return {
222
+ copilot: Wt(t),
223
+ promptTimeoutMs: Et(t)
224
+ };
209
225
  }
210
- function K(t) {
226
+ const Dt = 6e5;
227
+ function et(t) {
211
228
  return typeof t == "string" && t.length > 0 ? t : void 0;
212
229
  }
213
- function L(t) {
230
+ function M(t) {
214
231
  if (!t)
215
232
  return;
216
233
  const e = { ...t };
217
234
  if (e.model !== void 0) {
218
- const n = K(e.model);
235
+ const n = et(e.model);
219
236
  n ? e.model = n : delete e.model;
220
237
  }
221
238
  return Object.keys(e).length > 0 ? e : void 0;
222
239
  }
223
- function S(t) {
240
+ function H(t) {
241
+ if (!(typeof t != "number" || !Number.isFinite(t)) && !(t <= 0))
242
+ return Math.floor(t);
243
+ }
244
+ function k(t) {
224
245
  return !Number.isFinite(t) || (t ?? 0) <= 0 ? 0 : Math.floor(t ?? 0);
225
246
  }
226
- function Wt(t) {
227
- return S(t.inputTokens) + S(t.outputTokens) + S(t.cacheReadTokens) + S(t.cacheWriteTokens);
228
- }
229
- async function y(t, e = {}) {
230
- const n = L(await Ut()), o = L(e), s = L({
231
- ...n ?? {},
232
- ...o ?? {}
233
- }), i = K(s?.model), r = new et({ useLoggedInUser: !0 });
234
- let a, l, d = 0;
247
+ function Ht(t) {
248
+ return k(t.inputTokens) + k(t.outputTokens) + k(t.cacheReadTokens) + k(t.cacheWriteTokens);
249
+ }
250
+ async function j(t, e = {}) {
251
+ const { timeoutMs: n, ...o } = e, s = await Ot(), i = M(s.copilot), r = M(
252
+ o
253
+ ), c = M({
254
+ ...i ?? {},
255
+ ...r ?? {}
256
+ }), l = H(s.promptTimeoutMs), T = H(n) ?? l ?? Dt, a = et(c?.model), d = new it({ useLoggedInUser: !0 });
257
+ let p, b, S = 0;
235
258
  try {
236
- await r.start(), a = await r.createSession(s), l = a.on("assistant.usage", (c) => {
237
- d += Wt(c.data);
259
+ await d.start(), p = await d.createSession(c), b = p.on("assistant.usage", (m) => {
260
+ S += Ht(m.data);
238
261
  });
239
- const g = await a.sendAndWait({ prompt: t });
240
- if (!g?.data?.content)
262
+ const h = await p.sendAndWait({ prompt: t }, T);
263
+ if (!h?.data?.content)
241
264
  throw new Error("Copilot did not return a response.");
242
- return i && It(i), g.data.content;
265
+ return a && St(a), h.data.content;
243
266
  } finally {
244
- const g = [];
245
- if (l?.(), d > 0 && pt(d), a)
267
+ const h = [];
268
+ if (b?.(), S > 0 && mt(S), p)
246
269
  try {
247
- await a.destroy();
248
- } catch (c) {
249
- g.push(c);
270
+ await p.destroy();
271
+ } catch (m) {
272
+ h.push(m);
250
273
  }
251
274
  try {
252
- const c = await r.stop();
253
- g.push(...c);
254
- } catch (c) {
255
- g.push(c);
275
+ const m = await d.stop();
276
+ h.push(...m);
277
+ } catch (m) {
278
+ h.push(m);
256
279
  }
257
- g.length > 0 && console.error(
258
- `Copilot cleanup encountered ${g.length} error(s).`
280
+ h.length > 0 && console.error(
281
+ `Copilot cleanup encountered ${h.length} error(s).`
259
282
  );
260
283
  }
261
284
  }
262
- async function Bt(t, e = {}) {
263
- const n = N.getStore(), o = n?.evalFile ? T(n.evalFile) : process.cwd(), s = ot(t) ? t : x(o, t), i = await M(s, "utf8");
264
- return y(i, e);
285
+ async function Pt(t, e = {}) {
286
+ const n = G.getStore(), o = n?.evalFile ? R(n.evalFile) : process.cwd(), s = rt(t) ? t : J(o, t), i = await X(s, "utf8");
287
+ return j(i, e);
265
288
  }
266
- function p(t) {
267
- At({
268
- describePath: P(),
269
- itPath: V(),
289
+ function g(t) {
290
+ wt({
291
+ describePath: q(),
292
+ itPath: tt(),
270
293
  message: t
271
294
  });
272
295
  }
273
- async function Gt(t, e) {
274
- const n = Date.now(), o = `expected '${t}' to satisfy '${e}'`, s = y(`Evaluate if the expectation is fulfiled in by the input.
296
+ async function zt(t, e) {
297
+ const n = Date.now(), o = `expected '${t}' to satisfy '${e}'`, s = j(`Evaluate if the expectation is fulfiled in by the input.
275
298
  Expectation: "${e}".
276
299
  Input:
277
300
  ---
@@ -279,17 +302,17 @@ async function Gt(t, e) {
279
302
  ---
280
303
  Important: Answer with "Yes" or "No" only, without any additional text.
281
304
  `).then((i) => {
282
- i.includes("Yes") ? I(!0, Date.now() - n, "promptCheck") : i.includes("No") ? p(o) : p(`failed to evaluate expectation '${e}'`);
305
+ i.includes("Yes") ? C(!0, Date.now() - n, "promptCheck") : i.includes("No") ? g(o) : g(`failed to evaluate expectation '${e}'`);
283
306
  });
284
- return $(s), s;
307
+ return v(s), s;
285
308
  }
286
- const Ot = 3;
287
- function Et(t) {
309
+ const Xt = 3;
310
+ function Vt(t) {
288
311
  const e = t.match(/\b([1-5])\b/);
289
312
  return !e || !e[1] ? null : Number(e[1]);
290
313
  }
291
- async function Mt(t, e, n) {
292
- const o = Date.now(), s = n?.threshold ?? Ot, i = y(
314
+ async function Yt(t, e, n) {
315
+ const o = Date.now(), s = n?.threshold ?? Xt, i = j(
293
316
  `Classify the input by how "${e}" it is on a scale of 1 to 5.
294
317
  1 means "not ${e}" and 5 means "very ${e}".
295
318
  Return only a single number: 1, 2, 3, 4, or 5.
@@ -300,88 +323,88 @@ ${t}
300
323
  ---`,
301
324
  n?.model ? { model: n.model } : void 0
302
325
  ).then((r) => {
303
- const a = Et(r);
304
- if (a === null) {
305
- p(
326
+ const c = Vt(r);
327
+ if (c === null) {
328
+ g(
306
329
  `failed to classify as '${e}'. Evaluator returned '${r}'`
307
330
  );
308
331
  return;
309
332
  }
310
- const l = `expected response to be classified as '${e}' with score >= ${s}, got ${a}`;
311
- if (a < s) {
312
- p(l);
333
+ const l = `expected response to be classified as '${e}' with score >= ${s}, got ${c}`;
334
+ if (c < s) {
335
+ g(l);
313
336
  return;
314
337
  }
315
- I(
338
+ C(
316
339
  !0,
317
340
  Date.now() - o,
318
341
  "toBeClassifiedAs"
319
342
  );
320
343
  });
321
- return $(i), i;
344
+ return v(i), i;
322
345
  }
323
- function Yt(t, e) {
346
+ function Qt(t, e) {
324
347
  const n = `expected '${t}' to include '${e}'`;
325
- t.includes(e) || p(n);
348
+ t.includes(e) || g(n);
326
349
  }
327
- let _ = !1;
328
- function Dt(t) {
329
- _ = t;
350
+ let nt = !1;
351
+ function Kt(t) {
352
+ nt = t;
330
353
  }
331
- function Ht() {
332
- return _;
354
+ function _t() {
355
+ return nt;
333
356
  }
334
- function Xt(t) {
335
- const n = it(t).replace(/\.eval\.[^./\\]+$/, "");
336
- return st(
337
- T(t),
357
+ function qt(t) {
358
+ const n = ct(t).replace(/\.eval\.[^./\\]+$/, "");
359
+ return at(
360
+ R(t),
338
361
  "__snapshots__",
339
362
  `${n}.snap.md`
340
363
  );
341
364
  }
342
- function O(t) {
365
+ function P(t) {
343
366
  return t.split(/\r?\n/);
344
367
  }
345
- function zt(t, e) {
368
+ function te(t, e) {
346
369
  if (t === e)
347
370
  return " (no diff)";
348
- const n = O(t), o = O(e), s = Math.max(n.length, o.length), i = [];
371
+ const n = P(t), o = P(e), s = Math.max(n.length, o.length), i = [];
349
372
  for (let r = 0; r < s; r += 1) {
350
- const a = n[r], l = o[r];
351
- if (a !== l) {
352
- if (a === void 0 && l !== void 0) {
373
+ const c = n[r], l = o[r];
374
+ if (c !== l) {
375
+ if (c === void 0 && l !== void 0) {
353
376
  i.push(`+ ${l}`);
354
377
  continue;
355
378
  }
356
- if (a !== void 0 && l === void 0) {
357
- i.push(`- ${a}`);
379
+ if (c !== void 0 && l === void 0) {
380
+ i.push(`- ${c}`);
358
381
  continue;
359
382
  }
360
- i.push(`- ${a ?? ""}`), i.push(`+ ${l ?? ""}`);
383
+ i.push(`- ${c ?? ""}`), i.push(`+ ${l ?? ""}`);
361
384
  }
362
385
  }
363
386
  return i.join(`
364
387
  `);
365
388
  }
366
- function Pt(t) {
367
- const e = N.getStore()?.evalFile;
389
+ function ee(t) {
390
+ const e = G.getStore()?.evalFile;
368
391
  if (!e) {
369
- p(
392
+ g(
370
393
  "toMatchSnapshot can only be used while running an eval file."
371
394
  );
372
395
  return;
373
396
  }
374
- const n = Xt(e);
397
+ const n = qt(e);
375
398
  try {
376
- const o = Y(n, "utf8");
399
+ const o = V(n, "utf8");
377
400
  if (o === t)
378
401
  return;
379
- if (Ht()) {
380
- Z(n, t, "utf8");
402
+ if (_t()) {
403
+ W(n, t, "utf8");
381
404
  return;
382
405
  }
383
- const s = zt(o, t);
384
- p(
406
+ const s = te(o, t);
407
+ g(
385
408
  [
386
409
  `Snapshot mismatch at ${n}`,
387
410
  "",
@@ -394,173 +417,173 @@ function Pt(t) {
394
417
  );
395
418
  } catch (o) {
396
419
  if (o.code !== "ENOENT") {
397
- p(
420
+ g(
398
421
  `Failed to read snapshot at ${n}: ${String(o)}`
399
422
  );
400
423
  return;
401
424
  }
402
425
  try {
403
- rt(T(n), { recursive: !0 }), Z(n, t, "utf8");
426
+ lt(R(n), { recursive: !0 }), W(n, t, "utf8");
404
427
  } catch (i) {
405
- p(
428
+ g(
406
429
  `Failed to write snapshot at ${n}: ${String(i)}`
407
430
  );
408
431
  }
409
432
  }
410
433
  }
411
- function Vt(t) {
434
+ function ne(t) {
412
435
  return {
413
436
  toContain: (e) => {
414
- Yt(t, e);
437
+ Qt(t, e);
415
438
  },
416
439
  toMatchSnapshot: () => {
417
- Pt(t);
440
+ ee(t);
418
441
  },
419
442
  promptCheck: async (e) => {
420
- await Gt(t, e);
443
+ await zt(t, e);
421
444
  },
422
445
  toBeClassifiedAs: async (e, n) => {
423
- await Mt(t, e, n);
446
+ await Yt(t, e, n);
424
447
  }
425
448
  };
426
449
  }
427
- function Kt(t, e) {
428
- X(() => {
429
- mt(), ft(t);
430
- const n = B(), o = Date.now(), s = () => B() === n, i = () => Date.now() - o;
450
+ function oe(t, e) {
451
+ K(() => {
452
+ bt(), It(t);
453
+ const n = O(), o = Date.now(), s = () => O() === n, i = () => Date.now() - o;
431
454
  try {
432
455
  const r = e();
433
456
  if (r && typeof r.then == "function") {
434
- $(
457
+ v(
435
458
  r.then(() => {
436
- I(!0, i());
437
- }).catch((a) => {
438
- throw I(!1, i()), a;
459
+ C(!0, i());
460
+ }).catch((c) => {
461
+ throw C(!1, i()), c;
439
462
  }).finally(() => {
440
- v();
463
+ F();
441
464
  })
442
465
  );
443
466
  return;
444
467
  }
445
468
  } catch (r) {
446
- throw I(!1, i()), v(), r;
469
+ throw C(!1, i()), F(), r;
447
470
  }
448
- I(s(), i()), v();
449
- }, z());
471
+ C(s(), i()), F();
472
+ }, _());
450
473
  }
451
- const _t = /\.eval\.(js|ts)$/, Qt = /* @__PURE__ */ new Set([".git", "node_modules"]);
452
- async function Q(t) {
453
- const e = await nt(t, { withFileTypes: !0 }), n = [];
474
+ const ie = /\.eval\.(js|ts)$/, se = /* @__PURE__ */ new Set([".git", "node_modules"]);
475
+ async function ot(t) {
476
+ const e = await st(t, { withFileTypes: !0 }), n = [];
454
477
  return await Promise.all(
455
478
  e.map(async (o) => {
456
- const s = x(t, o.name);
479
+ const s = J(t, o.name);
457
480
  if (o.isDirectory()) {
458
- if (Qt.has(o.name))
481
+ if (se.has(o.name))
459
482
  return;
460
- n.push(...await Q(s));
483
+ n.push(...await ot(s));
461
484
  return;
462
485
  }
463
- o.isFile() && _t.test(o.name) && n.push(s);
486
+ o.isFile() && ie.test(o.name) && n.push(s);
464
487
  })
465
488
  ), n;
466
489
  }
467
- const j = new URL("data:application/json;base64,ewogICJuYW1lIjogImthdHQiLAogICJ2ZXJzaW9uIjogIjAuMS4wIiwKICAiZGVzY3JpcHRpb24iOiAiQ0xJIHRvb2wgdGhhdCB0ZXN0cyB0aGUgb3V0cHV0IG9mIGFnZW50aWMgQUkgdG9vbHMiLAogICJrZXl3b3JkcyI6IFsKICAgICJjbGkiLAogICAgImFpIiwKICAgICJhZ2VudGljLWFpIiwKICAgICJ0ZXN0aW5nIiwKICAgICJldmFsdWF0aW9uIgogIF0sCiAgImF1dGhvciI6ICJSYXBoYWVsIFBvcnRvIChodHRwczovL2dpdGh1Yi5jb20vcmFwaGFlbHBvcikiLAogICJsaWNlbnNlIjogIk1JVCIsCiAgInByaXZhdGUiOiB0cnVlLAogICJ0eXBlIjogIm1vZHVsZSIsCiAgIm1haW4iOiAiZGlzdC9pbmRleC5qcyIsCiAgImJpbiI6IHsKICAgICJrYXR0IjogImRpc3QvaW5kZXguanMiCiAgfSwKICAic2NyaXB0cyI6IHsKICAgICJidWlsZCI6ICJ2aXRlIGJ1aWxkIiwKICAgICJkZXYiOiAidHN4IHNyYy9pbmRleC50cyIsCiAgICAibGludCI6ICJiaW9tZSBsaW50IC4vc3JjIiwKICAgICJmb3JtYXQiOiAiYmlvbWUgZm9ybWF0IC0td3JpdGUgLi9zcmMiLAogICAgInRlc3QiOiAidml0ZXN0IiwKICAgICJ0eXBlY2hlY2siOiAidHNjIC1wIHRzY29uZmlnLmpzb24gLS1ub0VtaXQiLAogICAgInRlc3Q6YnVpbGQiOiAibm9kZSAuL2Rpc3QvaW5kZXguanMiCiAgfSwKICAidHlwZXMiOiAiZGlzdC9pbmRleC5kLnRzIiwKICAiZGV2RGVwZW5kZW5jaWVzIjogewogICAgIkBiaW9tZWpzL2Jpb21lIjogIjEuOS40IiwKICAgICJAdHlwZXMvbm9kZSI6ICIyNS4yLjAiLAogICAgInRzeCI6ICI0LjIxLjAiLAogICAgInR5cGVzY3JpcHQiOiAiNS44LjIiLAogICAgInZpdGUiOiAiNy4zLjEiLAogICAgInZpdGUtcGx1Z2luLWR0cyI6ICI0LjUuNCIsCiAgICAidml0ZXN0IjogIjMuMi40IiwKICAgICJ2c2NvZGUtanNvbnJwYyI6ICJeOC4yLjEiCiAgfSwKICAiZGVwZW5kZW5jaWVzIjogewogICAgIkBnaXRodWIvY29waWxvdC1zZGsiOiAiXjAuMS4yMSIKICB9LAogICJidWdzIjogewogICAgInVybCI6ICJodHRwczovL2dpdGh1Yi5jb20vcmFwaGFlbHBvci9rYXR0L2lzc3VlcyIKICB9LAogICJob21lcGFnZSI6ICJodHRwczovL2dpdGh1Yi5jb20vcmFwaGFlbHBvci9rYXR0Igp9Cg==", import.meta.url);
468
- let C;
469
- function qt() {
470
- if (C !== void 0)
471
- return C;
490
+ const N = new URL("data:application/json;base64,ewogICJuYW1lIjogImthdHQiLAogICJ2ZXJzaW9uIjogIjAuMC4zIiwKICAiZGVzY3JpcHRpb24iOiAiQ0xJIHRvb2wgdGhhdCB0ZXN0cyB0aGUgb3V0cHV0IG9mIGFnZW50aWMgQUkgdG9vbHMiLAogICJrZXl3b3JkcyI6IFsKICAgICJjbGkiLAogICAgImFpIiwKICAgICJhZ2VudGljLWFpIiwKICAgICJ0ZXN0aW5nIiwKICAgICJldmFsdWF0aW9uIgogIF0sCiAgImF1dGhvciI6ICJSYXBoYWVsIFBvcnRvIChodHRwczovL2dpdGh1Yi5jb20vcmFwaGFlbHBvcikiLAogICJsaWNlbnNlIjogIk1JVCIsCiAgInR5cGUiOiAibW9kdWxlIiwKICAibWFpbiI6ICJkaXN0L2luZGV4LmpzIiwKICAiYmluIjogewogICAgImthdHQiOiAiZGlzdC9pbmRleC5qcyIKICB9LAogICJzY3JpcHRzIjogewogICAgImJ1aWxkIjogInZpdGUgYnVpbGQiLAogICAgImRldiI6ICJ0c3ggc3JjL2luZGV4LnRzIiwKICAgICJsaW50IjogImJpb21lIGxpbnQgLi9zcmMiLAogICAgImZvcm1hdCI6ICJiaW9tZSBmb3JtYXQgLS13cml0ZSAuL3NyYyIsCiAgICAidGVzdCI6ICJ2aXRlc3QiLAogICAgInR5cGVjaGVjayI6ICJ0c2MgLXAgdHNjb25maWcuanNvbiAtLW5vRW1pdCIsCiAgICAidGVzdDpidWlsZCI6ICJub2RlIC4vZGlzdC9pbmRleC5qcyIKICB9LAogICJ0eXBlcyI6ICJkaXN0L2luZGV4LmQudHMiLAogICJkZXZEZXBlbmRlbmNpZXMiOiB7CiAgICAiQGJpb21lanMvYmlvbWUiOiAiMS45LjQiLAogICAgIkB0eXBlcy9ub2RlIjogIjI1LjIuMCIsCiAgICAidHN4IjogIjQuMjEuMCIsCiAgICAidHlwZXNjcmlwdCI6ICI1LjguMiIsCiAgICAidml0ZSI6ICI3LjMuMSIsCiAgICAidml0ZS1wbHVnaW4tZHRzIjogIjQuNS40IiwKICAgICJ2aXRlc3QiOiAiMy4yLjQiLAogICAgInZzY29kZS1qc29ucnBjIjogIl44LjIuMSIKICB9LAogICJkZXBlbmRlbmNpZXMiOiB7CiAgICAiQGdpdGh1Yi9jb3BpbG90LXNkayI6ICJeMC4xLjIxIgogIH0sCiAgImJ1Z3MiOiB7CiAgICAidXJsIjogImh0dHBzOi8vZ2l0aHViLmNvbS9yYXBoYWVscG9yL2thdHQvaXNzdWVzIgogIH0sCiAgImhvbWVwYWdlIjogImh0dHBzOi8vZ2l0aHViLmNvbS9yYXBoYWVscG9yL2thdHQiCn0K", import.meta.url);
491
+ let A;
492
+ function re() {
493
+ if (A !== void 0)
494
+ return A;
472
495
  try {
473
- const t = j.protocol === "data:" ? te(j) : Y(ct(j), "utf8"), e = JSON.parse(t);
474
- C = typeof e.version == "string" ? e.version : "unknown";
496
+ const t = N.protocol === "data:" ? ce(N) : V(ut(N), "utf8"), e = JSON.parse(t);
497
+ A = typeof e.version == "string" ? e.version : "unknown";
475
498
  } catch {
476
- C = "unknown";
499
+ A = "unknown";
477
500
  }
478
- return C;
501
+ return A;
479
502
  }
480
- function te(t) {
503
+ function ce(t) {
481
504
  const e = t.pathname.indexOf(",");
482
505
  if (e < 0)
483
506
  throw new Error("Invalid data URL.");
484
507
  const n = t.pathname.slice(0, e), o = t.pathname.slice(e + 1);
485
508
  return n.includes(";base64") ? Buffer.from(o, "base64").toString("utf8") : decodeURIComponent(o);
486
509
  }
487
- function ee() {
488
- const t = " ██╗ ██╗ █████╗ ████████╗████████╗", e = " ██║ ██╔╝██╔══██╗╚══██╔══╝╚══██╔══╝", n = " █████╔╝ ███████║ ██║ ██║", o = " ██╔═██╗ ██╔══██║ ██║ ██║", s = " ██║ ██╗██║ ██║ ██║ ██║", i = " ╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═╝", r = `v${qt()}`, a = Math.max(
510
+ function ae() {
511
+ const t = " ██╗ ██╗ █████╗ ████████╗████████╗", e = " ██║ ██╔╝██╔══██╗╚══██╔══╝╚══██╔══╝", n = " █████╔╝ ███████║ ██║ ██║", o = " ██╔═██╗ ██╔══██║ ██║ ██║", s = " ██║ ██╗██║ ██║ ██║ ██║", i = " ╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═╝", r = `v${re()}`, c = Math.max(
489
512
  0,
490
513
  Math.floor((t.length - r.length) / 2)
491
- ), l = `${" ".repeat(a)}${r}`;
514
+ ), l = `${" ".repeat(c)}${r}`;
492
515
  console.log(`
493
- ${m(t)}
494
- ${m(e)}
495
- ${m(n)}
496
- ${G(o)}
497
- ${G(s)}
498
- ${xt(i)}
499
- ${m(l)}
516
+ ${$(t)}
517
+ ${$(e)}
518
+ ${$(n)}
519
+ ${D(o)}
520
+ ${D(s)}
521
+ ${Nt(i)}
522
+ ${$(l)}
500
523
  `);
501
524
  }
502
- function ne(t) {
525
+ function le(t) {
503
526
  const e = String(t.getHours()).padStart(2, "0"), n = String(t.getMinutes()).padStart(2, "0"), o = String(t.getSeconds()).padStart(2, "0");
504
527
  return `${e}:${n}:${o}`;
505
528
  }
506
- async function oe() {
529
+ async function ue() {
507
530
  const t = process.argv.slice(2), e = t.includes("--update-snapshots") || t.includes("-u");
508
- Dt(e), ee();
531
+ Kt(e), ae();
509
532
  const n = /* @__PURE__ */ new Date();
510
- Tt(), kt(), wt();
511
- const o = await Q(process.cwd());
533
+ Bt(), vt(), kt();
534
+ const o = await ot(process.cwd());
512
535
  if (o.length === 0)
513
536
  return console.log("No .eval.js or .eval.ts files found."), 1;
514
537
  const i = (await Promise.allSettled(
515
538
  o.map(
516
- (c) => N.run(
517
- { evalFile: c },
518
- () => import(at(c).href)
539
+ (a) => G.run(
540
+ { evalFile: a },
541
+ () => import(dt(a).href)
519
542
  )
520
543
  )
521
- )).map((c, h) => ({ result: c, file: o[h] })).filter(({ result: c }) => c.status === "rejected");
544
+ )).map((a, d) => ({ result: a, file: o[d] })).filter(({ result: a }) => a.status === "rejected");
522
545
  if (i.length > 0) {
523
- for (const c of i) {
524
- const h = c.result.status === "rejected" ? c.result.reason : void 0;
525
- console.error(`Error executing ${c.file}: ${String(h)}`);
546
+ for (const a of i) {
547
+ const d = a.result.status === "rejected" ? a.result.reason : void 0;
548
+ console.error(`Error executing ${a.file}: ${String(d)}`);
526
549
  }
527
550
  return 1;
528
551
  }
529
- const a = (await yt()).filter(
530
- (c) => c.status === "rejected"
552
+ const c = (await Lt()).filter(
553
+ (a) => a.status === "rejected"
531
554
  );
532
- if (a.length > 0) {
533
- for (const c of a)
534
- c.status === "rejected" && console.error(`Error executing async test: ${String(c.reason)}`);
555
+ if (c.length > 0) {
556
+ for (const a of c)
557
+ a.status === "rejected" && console.error(`Error executing async test: ${String(a.reason)}`);
535
558
  return 1;
536
559
  }
537
- const l = $t();
560
+ const l = yt();
538
561
  if (l.length > 0) {
539
562
  console.error("❌ Failed tests:");
540
- for (const [c, h] of l.entries()) {
541
- const R = [h.describePath, h.itPath].filter((tt) => tt.length > 0).join(" > "), q = R.length > 0 ? `${R}: ` : "";
542
- console.error(`${c + 1}. ${q}${h.message}`);
563
+ for (const [a, d] of l.entries()) {
564
+ const p = [d.describePath, d.itPath].filter((S) => S.length > 0).join(" > "), b = p.length > 0 ? `${p}: ` : "";
565
+ console.error(`${a + 1}. ${b}${d.message}`);
543
566
  }
544
567
  return 1;
545
568
  }
546
- const d = St(), g = Date.now() - n.getTime();
569
+ const I = $t(), T = Date.now() - n.getTime();
547
570
  return console.log(
548
571
  [
549
572
  "---",
550
573
  `${f("Files")} ${o.length} passed`,
551
- `${f("Evals")} ${d} passed`,
552
- `${f("Start at")} ${ne(n)}`,
553
- `${f("Duration")} ${g}ms`
574
+ `${f("Evals")} ${I} passed`,
575
+ `${f("Start at")} ${le(n)}`,
576
+ `${f("Duration")} ${T}ms`
554
577
  ].join(`
555
578
  `)
556
579
  ), 0;
557
580
  }
558
- Object.assign(globalThis, { describe: bt, it: Kt, expect: Vt, prompt: y, promptFile: Bt });
559
- oe().then((t) => {
581
+ Object.assign(globalThis, { describe: jt, it: oe, expect: ne, prompt: j, promptFile: Pt });
582
+ ue().then((t) => {
560
583
  process.exit(t);
561
584
  }).catch((t) => {
562
585
  console.error(`Unexpected error: ${String(t)}`), process.exit(1);
563
586
  });
564
587
  export {
565
- oe as runCli
588
+ ue as runCli
566
589
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "katt",
3
- "version": "0.0.2",
3
+ "version": "0.0.4",
4
4
  "description": "CLI tool that tests the output of agentic AI tools",
5
5
  "keywords": [
6
6
  "cli",