katt 0.0.5 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -9,6 +9,7 @@ Katt is a lightweight testing framework for running AI Evals, inspired by [Jest]
9
9
 
10
10
  - [Overview](#overview)
11
11
  - [API Documentation](#api-documentation)
12
+ - [Articles](#articles)
12
13
  - [Hello World - Example](#hello-world---example)
13
14
  - [Main Features](#main-features)
14
15
  - [Usage](#usage)
@@ -34,6 +35,10 @@ Katt is designed to evaluate and validate the behavior of AI agents like **Claud
34
35
 
35
36
  For a complete list of features and usage examples, see [docs/api-documentation.md](https://github.com/raphaelpor/katt/blob/main/docs/api-documentation.md).
36
37
 
38
+ ## Articles
39
+
40
+ - [Introducing Katt](https://github.com/raphaelpor/katt/blob/main/docs/articles/introduction-to-katt.md)
41
+
37
42
  ## Hello World - Example
38
43
 
39
44
  ```typescript
@@ -86,7 +91,7 @@ expect(result).toContain("hello world");
86
91
  2. Run Katt from your project directory:
87
92
 
88
93
  ```bash
89
- katt
94
+ npx katt
90
95
  ```
91
96
 
92
97
  ### Using promptFile
package/dist/index.js CHANGED
@@ -1,21 +1,21 @@
1
- import { r as E, c as N, a as O, p as I, b as m, l as d, d as w, g as F, e as U, f as C, s as _, h as R, i as D, j as z, k as B, m as W, n as K } from "./runCli-CDRmZ5hw.js";
2
- import { o as St } from "./runCli-CDRmZ5hw.js";
3
- import { CopilotClient as H } from "@github/copilot-sdk";
4
- import { readFile as A } from "node:fs/promises";
5
- import { resolve as j, dirname as T, isAbsolute as Y, basename as J, join as q } from "node:path";
6
- import { readFileSync as G, writeFileSync as S, mkdirSync as Q } from "node:fs";
7
- function vt(t, e) {
1
+ import { r as E, c as N, a as O, p as I, b as m, l as d, d as w, g as S, e as U, f as C, s as z, h as R, i as A, j as B, k as W, m as K, n as H, o as Y, q } from "./runCli-C7uxWavX.js";
2
+ import { t as Et } from "./runCli-C7uxWavX.js";
3
+ import { CopilotClient as J } from "@github/copilot-sdk";
4
+ import { readFile as j } from "node:fs/promises";
5
+ import { resolve as P, dirname as v, isAbsolute as G, basename as Q, join as V } from "node:path";
6
+ import { readFileSync as X, writeFileSync as F, mkdirSync as Z } from "node:fs";
7
+ function Tt(t, e) {
8
8
  E(() => {
9
9
  O(), I(t);
10
- const n = F(), r = Date.now(), i = () => F() === n, o = () => Date.now() - r;
10
+ const n = S(), o = Date.now(), i = () => S() === n, r = () => Date.now() - o;
11
11
  try {
12
12
  const s = e();
13
13
  if (s && typeof s.then == "function") {
14
14
  m(
15
15
  s.then(() => {
16
- d(!0, o());
16
+ d(!0, r());
17
17
  }).catch((a) => {
18
- throw d(!1, o()), a;
18
+ throw d(!1, r()), a;
19
19
  }).finally(() => {
20
20
  w();
21
21
  })
@@ -23,12 +23,12 @@ function vt(t, e) {
23
23
  return;
24
24
  }
25
25
  } catch (s) {
26
- throw d(!1, o()), w(), s;
26
+ throw d(!1, r()), w(), s;
27
27
  }
28
- d(i(), o()), w();
28
+ d(i(), r()), w();
29
29
  }, N());
30
30
  }
31
- function Tt(t, e) {
31
+ function St(t, e) {
32
32
  E(() => {
33
33
  U(t);
34
34
  try {
@@ -47,10 +47,10 @@ function Tt(t, e) {
47
47
  C();
48
48
  }, N());
49
49
  }
50
- function V(t, e) {
50
+ function tt(t, e) {
51
51
  return typeof t == "object" && t !== null && "code" in t && t.code === e;
52
52
  }
53
- function X(t) {
53
+ function et(t) {
54
54
  try {
55
55
  const e = JSON.parse(t);
56
56
  return typeof e == "object" && e !== null ? e : void 0;
@@ -59,53 +59,53 @@ function X(t) {
59
59
  return;
60
60
  }
61
61
  }
62
- async function Z() {
63
- const t = j(process.cwd(), "katt.json");
62
+ async function nt() {
63
+ const t = P(process.cwd(), "katt.json");
64
64
  try {
65
- const e = await A(t, "utf8");
66
- return X(e);
65
+ const e = await j(t, "utf8");
66
+ return et(e);
67
67
  } catch (e) {
68
- if (V(e, "ENOENT"))
68
+ if (tt(e, "ENOENT"))
69
69
  return;
70
70
  console.warn(`Failed to read katt.json: ${String(e)}`);
71
71
  return;
72
72
  }
73
73
  }
74
- function tt(t) {
74
+ function ot(t) {
75
75
  const e = t?.copilot;
76
76
  if (typeof e != "object" || e === null || Array.isArray(e))
77
77
  return;
78
78
  const n = {
79
79
  ...e
80
- }, r = n.model;
81
- return (typeof r != "string" || r.length === 0) && delete n.model, Object.keys(n).length > 0 ? n : void 0;
80
+ }, o = n.model;
81
+ return (typeof o != "string" || o.length === 0) && delete n.model, Object.keys(n).length > 0 ? n : void 0;
82
82
  }
83
- function et(t) {
83
+ function rt(t) {
84
84
  if (!(typeof t != "number" || !Number.isFinite(t)) && !(t <= 0))
85
85
  return Math.floor(t);
86
86
  }
87
- function nt(t) {
87
+ function it(t) {
88
88
  const e = t?.prompt;
89
89
  if (!(typeof e != "object" || e === null || Array.isArray(e)))
90
- return et(e.timeoutMs);
90
+ return rt(e.timeoutMs);
91
91
  }
92
- async function ot() {
93
- const t = await Z();
92
+ async function st() {
93
+ const t = await nt();
94
94
  return {
95
- copilot: tt(t),
96
- promptTimeoutMs: nt(t)
95
+ copilot: ot(t),
96
+ promptTimeoutMs: it(t)
97
97
  };
98
98
  }
99
- const rt = 6e5;
100
- function P(t) {
99
+ const at = 6e5;
100
+ function _(t) {
101
101
  return typeof t == "string" && t.length > 0 ? t : void 0;
102
102
  }
103
- function v(t) {
103
+ function b(t) {
104
104
  if (!t)
105
105
  return;
106
106
  const e = { ...t };
107
107
  if (e.model !== void 0) {
108
- const n = P(e.model);
108
+ const n = _(e.model);
109
109
  n ? e.model = n : delete e.model;
110
110
  }
111
111
  return Object.keys(e).length > 0 ? e : void 0;
@@ -117,28 +117,28 @@ function M(t) {
117
117
  function h(t) {
118
118
  return !Number.isFinite(t) || (t ?? 0) <= 0 ? 0 : Math.floor(t ?? 0);
119
119
  }
120
- function it(t) {
120
+ function ct(t) {
121
121
  return h(t.inputTokens) + h(t.outputTokens) + h(t.cacheReadTokens) + h(t.cacheWriteTokens);
122
122
  }
123
123
  async function $(t, e = {}) {
124
- const { timeoutMs: n, ...r } = e, i = await ot(), o = v(i.copilot), s = v(
125
- r
126
- ), a = v({
127
- ...o ?? {},
124
+ const { timeoutMs: n, ...o } = e, i = await st(), r = b(i.copilot), s = b(
125
+ o
126
+ ), a = b({
127
+ ...r ?? {},
128
128
  ...s ?? {}
129
- }), c = M(i.promptTimeoutMs), L = M(n) ?? c ?? rt, b = P(a?.model), g = new H({ useLoggedInUser: !0 });
130
- let p, x, y = 0;
129
+ }), c = M(i.promptTimeoutMs), L = M(n) ?? c ?? at, x = _(a?.model), g = new J({ useLoggedInUser: !0 });
130
+ let p, T, y = 0;
131
131
  try {
132
- await g.start(), p = await g.createSession(a), x = p.on("assistant.usage", (f) => {
133
- y += it(f.data);
132
+ await g.start(), p = await g.createSession(a), T = p.on("assistant.usage", (f) => {
133
+ y += ct(f.data);
134
134
  });
135
135
  const l = await p.sendAndWait({ prompt: t }, L);
136
136
  if (!l?.data?.content)
137
137
  throw new Error("Copilot did not return a response.");
138
- return b && _(b), l.data.content;
138
+ return x && z(x), l.data.content;
139
139
  } finally {
140
140
  const l = [];
141
- if (x?.(), y > 0 && R(y), p)
141
+ if (T?.(), y > 0 && R(y), p)
142
142
  try {
143
143
  await p.destroy();
144
144
  } catch (f) {
@@ -155,37 +155,37 @@ async function $(t, e = {}) {
155
155
  );
156
156
  }
157
157
  }
158
- async function $t(t, e = {}) {
159
- const n = D.getStore(), r = n?.evalFile ? T(n.evalFile) : process.cwd(), i = Y(t) ? t : j(r, t), o = await A(i, "utf8");
160
- return $(o, e);
158
+ async function Ft(t, e = {}) {
159
+ const n = A.getStore(), o = n?.evalFile ? v(n.evalFile) : process.cwd(), i = G(t) ? t : P(o, t), r = await j(i, "utf8");
160
+ return $(r, e);
161
161
  }
162
162
  function u(t) {
163
- z({
164
- describePath: W(),
165
- itPath: B(),
163
+ B({
164
+ describePath: K(),
165
+ itPath: W(),
166
166
  message: t
167
167
  });
168
168
  }
169
- async function st(t, e) {
170
- const n = Date.now(), r = `expected '${t}' to satisfy '${e}'`, i = $(`Evaluate if the expectation is fulfiled in by the input.
169
+ async function ut(t, e) {
170
+ const n = Date.now(), o = `expected '${t}' to satisfy '${e}'`, i = $(`Evaluate if the expectation is fulfiled in by the input.
171
171
  Expectation: "${e}".
172
172
  Input:
173
173
  ---
174
174
  ${t}
175
175
  ---
176
176
  Important: Answer with "Yes" or "No" only, without any additional text.
177
- `).then((o) => {
178
- o.includes("Yes") ? d(!0, Date.now() - n, "promptCheck") : o.includes("No") ? u(r) : u(`failed to evaluate expectation '${e}'`);
177
+ `).then((r) => {
178
+ r.includes("Yes") ? d(!0, Date.now() - n, "promptCheck") : r.includes("No") ? u(o) : u(`failed to evaluate expectation '${e}'`);
179
179
  });
180
180
  return m(i), i;
181
181
  }
182
- const at = 3;
183
- function ct(t) {
182
+ const lt = 3;
183
+ function ft(t) {
184
184
  const e = t.match(/\b([1-5])\b/);
185
185
  return !e || !e[1] ? null : Number(e[1]);
186
186
  }
187
- async function ut(t, e, n) {
188
- const r = Date.now(), i = n?.threshold ?? at, o = $(
187
+ async function dt(t, e, n) {
188
+ const o = Date.now(), i = n?.threshold ?? lt, r = $(
189
189
  `Classify the input by how "${e}" it is on a scale of 1 to 5.
190
190
  1 means "not ${e}" and 5 means "very ${e}".
191
191
  Return only a single number: 1, 2, 3, 4, or 5.
@@ -196,7 +196,7 @@ ${t}
196
196
  ---`,
197
197
  n?.model ? { model: n.model } : void 0
198
198
  ).then((s) => {
199
- const a = ct(s);
199
+ const a = ft(s);
200
200
  if (a === null) {
201
201
  u(
202
202
  `failed to classify as '${e}'. Evaluator returned '${s}'`
@@ -210,66 +210,78 @@ ${t}
210
210
  }
211
211
  d(
212
212
  !0,
213
- Date.now() - r,
213
+ Date.now() - o,
214
214
  "toBeClassifiedAs"
215
215
  );
216
216
  });
217
- return m(o), o;
217
+ return m(r), r;
218
218
  }
219
- function lt(t, e) {
219
+ function pt(t, e) {
220
220
  const n = `expected '${t}' to include '${e}'`;
221
221
  t.includes(e) || u(n);
222
222
  }
223
- function ft(t) {
224
- const n = J(t).replace(/\.eval\.[^./\\]+$/, "");
225
- return q(
226
- T(t),
223
+ function k(t) {
224
+ const e = t.trim().replace(/[<>:"/\\|?*\x00-\x1f]/g, "_").replace(/\s+/g, "_");
225
+ return e.length > 0 ? e : "unnamed";
226
+ }
227
+ function ht() {
228
+ const t = Y().map(
229
+ (o) => k(o.description)
230
+ ), e = q().map(
231
+ (o) => k(o.description)
232
+ ), n = [...t, ...e];
233
+ return n.length === 0 ? "root" : n.join("__");
234
+ }
235
+ function mt(t) {
236
+ const n = Q(t).replace(/\.eval\.[^./\\]+$/, ""), o = ht();
237
+ return V(
238
+ v(t),
227
239
  "__snapshots__",
228
- `${n}.snap.md`
240
+ `${n}__${o}.snap.md`
229
241
  );
230
242
  }
231
- function k(t) {
243
+ function D(t) {
232
244
  return t.split(/\r?\n/);
233
245
  }
234
- function dt(t, e) {
246
+ function gt(t, e) {
235
247
  if (t === e)
236
248
  return " (no diff)";
237
- const n = k(t), r = k(e), i = Math.max(n.length, r.length), o = [];
249
+ const n = D(t), o = D(e), i = Math.max(n.length, o.length), r = [];
238
250
  for (let s = 0; s < i; s += 1) {
239
- const a = n[s], c = r[s];
251
+ const a = n[s], c = o[s];
240
252
  if (a !== c) {
241
253
  if (a === void 0 && c !== void 0) {
242
- o.push(`+ ${c}`);
254
+ r.push(`+ ${c}`);
243
255
  continue;
244
256
  }
245
257
  if (a !== void 0 && c === void 0) {
246
- o.push(`- ${a}`);
258
+ r.push(`- ${a}`);
247
259
  continue;
248
260
  }
249
- o.push(`- ${a ?? ""}`), o.push(`+ ${c ?? ""}`);
261
+ r.push(`- ${a ?? ""}`), r.push(`+ ${c ?? ""}`);
250
262
  }
251
263
  }
252
- return o.join(`
264
+ return r.join(`
253
265
  `);
254
266
  }
255
- function pt(t) {
256
- const e = D.getStore()?.evalFile;
267
+ function yt(t) {
268
+ const e = A.getStore()?.evalFile;
257
269
  if (!e) {
258
270
  u(
259
271
  "toMatchSnapshot can only be used while running an eval file."
260
272
  );
261
273
  return;
262
274
  }
263
- const n = ft(e);
275
+ const n = mt(e);
264
276
  try {
265
- const r = G(n, "utf8");
266
- if (r === t)
277
+ const o = X(n, "utf8");
278
+ if (o === t)
267
279
  return;
268
- if (K()) {
269
- S(n, t, "utf8");
280
+ if (H()) {
281
+ F(n, t, "utf8");
270
282
  return;
271
283
  }
272
- const i = dt(r, t);
284
+ const i = gt(o, t);
273
285
  u(
274
286
  [
275
287
  `Snapshot mismatch at ${n}`,
@@ -277,47 +289,47 @@ function pt(t) {
277
289
  "Diff:",
278
290
  i,
279
291
  "",
280
- "Run katt with --update-snapshots (or -u) to accept this change."
292
+ "Run 'npx katt --update-snapshots' (or -u) to accept this change."
281
293
  ].join(`
282
294
  `)
283
295
  );
284
- } catch (r) {
285
- if (r.code !== "ENOENT") {
296
+ } catch (o) {
297
+ if (o.code !== "ENOENT") {
286
298
  u(
287
- `Failed to read snapshot at ${n}: ${String(r)}`
299
+ `Failed to read snapshot at ${n}: ${String(o)}`
288
300
  );
289
301
  return;
290
302
  }
291
303
  try {
292
- Q(T(n), { recursive: !0 }), S(n, t, "utf8");
293
- } catch (o) {
304
+ Z(v(n), { recursive: !0 }), F(n, t, "utf8");
305
+ } catch (r) {
294
306
  u(
295
- `Failed to write snapshot at ${n}: ${String(o)}`
307
+ `Failed to write snapshot at ${n}: ${String(r)}`
296
308
  );
297
309
  }
298
310
  }
299
311
  }
300
- function bt(t) {
312
+ function Mt(t) {
301
313
  return {
302
314
  toContain: (e) => {
303
- lt(t, e);
315
+ pt(t, e);
304
316
  },
305
317
  toMatchSnapshot: () => {
306
- pt(t);
318
+ yt(t);
307
319
  },
308
320
  promptCheck: async (e) => {
309
- await st(t, e);
321
+ await ut(t, e);
310
322
  },
311
323
  toBeClassifiedAs: async (e, n) => {
312
- await ut(t, e, n);
324
+ await dt(t, e, n);
313
325
  }
314
326
  };
315
327
  }
316
328
  export {
317
- Tt as describe,
318
- bt as expect,
319
- vt as it,
329
+ St as describe,
330
+ Mt as expect,
331
+ Tt as it,
320
332
  $ as prompt,
321
- $t as promptFile,
322
- St as runCli
333
+ Ft as promptFile,
334
+ Et as runCli
323
335
  };
package/dist/katt.js CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { o as r } from "./runCli-CDRmZ5hw.js";
2
+ import { t as r } from "./runCli-C7uxWavX.js";
3
3
  r().then((e) => {
4
4
  process.exit(e);
5
5
  }).catch((e) => {
@@ -0,0 +1,312 @@
1
+ import { fileURLToPath as T, pathToFileURL as X } from "node:url";
2
+ import { readdir as M } from "node:fs/promises";
3
+ import { resolve as R } from "node:path";
4
+ import { AsyncLocalStorage as v } from "node:async_hooks";
5
+ import { readFileSync as U } from "node:fs";
6
+ const W = /\.eval\.(js|ts)$/, G = /* @__PURE__ */ new Set([".git", "node_modules"]);
7
+ async function J(t) {
8
+ const e = await M(t, { withFileTypes: !0 }), n = [];
9
+ return await Promise.all(
10
+ e.map(async (o) => {
11
+ const a = R(t, o.name);
12
+ if (o.isDirectory()) {
13
+ if (G.has(o.name))
14
+ return;
15
+ n.push(...await J(a));
16
+ return;
17
+ }
18
+ o.isFile() && W.test(o.name) && n.push(a);
19
+ })
20
+ ), n;
21
+ }
22
+ const B = new v(), O = {
23
+ describeStack: [],
24
+ itStack: [],
25
+ tokenUsageStack: [],
26
+ modelStack: []
27
+ };
28
+ let L = 0, $ = 0;
29
+ const f = [], h = [];
30
+ let k = 0;
31
+ function i() {
32
+ return B.getStore() ?? O;
33
+ }
34
+ function Z(t) {
35
+ return {
36
+ describeStack: [...t.describeStack],
37
+ itStack: [...t.itStack],
38
+ tokenUsageStack: [...t.tokenUsageStack],
39
+ modelStack: [...t.modelStack]
40
+ };
41
+ }
42
+ function Y() {
43
+ return L += 1, `d${L}`;
44
+ }
45
+ function Q() {
46
+ return $ += 1, `i${$}`;
47
+ }
48
+ function St(t, e) {
49
+ const n = e ?? Z(i());
50
+ return B.run(n, t);
51
+ }
52
+ function At() {
53
+ return Z(i());
54
+ }
55
+ function kt(t) {
56
+ i().describeStack.push({ id: Y(), description: t });
57
+ }
58
+ function bt() {
59
+ i().describeStack.pop();
60
+ }
61
+ function V() {
62
+ return i().describeStack.map((t) => t.description).join(" > ");
63
+ }
64
+ function Lt() {
65
+ return [...i().describeStack];
66
+ }
67
+ function $t(t) {
68
+ i().itStack.push({ id: Q(), description: t }), i().tokenUsageStack.push(0), i().modelStack.push(void 0);
69
+ }
70
+ function wt() {
71
+ i().itStack.pop(), i().tokenUsageStack.pop(), i().modelStack.pop();
72
+ }
73
+ function K() {
74
+ return i().itStack.map((t) => t.description).join(" > ");
75
+ }
76
+ function jt() {
77
+ return [...i().itStack];
78
+ }
79
+ function vt(t) {
80
+ if (!Number.isFinite(t) || t <= 0)
81
+ return;
82
+ const e = i(), n = e.tokenUsageStack.length - 1;
83
+ n < 0 || (e.tokenUsageStack[n] += t);
84
+ }
85
+ function P() {
86
+ const t = i(), e = t.tokenUsageStack.length - 1;
87
+ return e < 0 ? 0 : t.tokenUsageStack[e] ?? 0;
88
+ }
89
+ function Jt(t) {
90
+ if (t.length === 0)
91
+ return;
92
+ const e = i(), n = e.modelStack.length - 1;
93
+ n < 0 || (e.modelStack[n] = t);
94
+ }
95
+ function z() {
96
+ const t = i(), e = t.modelStack.length - 1;
97
+ if (!(e < 0))
98
+ return t.modelStack[e];
99
+ }
100
+ function Bt(t) {
101
+ f.push(t);
102
+ }
103
+ function Zt() {
104
+ k += 1;
105
+ }
106
+ function E() {
107
+ return k;
108
+ }
109
+ function D() {
110
+ k = 0;
111
+ }
112
+ function xt(t) {
113
+ h.push(t);
114
+ }
115
+ function _() {
116
+ return [...h];
117
+ }
118
+ function Ht() {
119
+ return h.length;
120
+ }
121
+ function q() {
122
+ h.length = 0;
123
+ }
124
+ async function tt() {
125
+ const t = [];
126
+ for (; f.length > 0; ) {
127
+ const e = f.splice(0, f.length), n = await Promise.allSettled(e);
128
+ t.push(...n);
129
+ }
130
+ return t;
131
+ }
132
+ const et = "\x1B[1;36m", nt = "\x1B[33m", ot = "\x1B[38;5;208m", it = "\x1B[1;38;5;208m", m = "\x1B[0m";
133
+ function r(t) {
134
+ return `${et}${t}${m}`;
135
+ }
136
+ function p(t) {
137
+ return `${nt}${t}${m}`;
138
+ }
139
+ function w(t) {
140
+ return `${ot}${t}${m}`;
141
+ }
142
+ function st(t) {
143
+ return `${it}${t}${m}`;
144
+ }
145
+ let A = "";
146
+ function at() {
147
+ A = "";
148
+ }
149
+ function ct({
150
+ suitePath: t,
151
+ casePath: e,
152
+ didPass: n,
153
+ durationMs: o,
154
+ model: a,
155
+ tokenUsage: I
156
+ }) {
157
+ const c = t.length > 0 ? t : "(root)", C = e.length > 0 ? e : "(root)";
158
+ A !== c && (console.log(`Suite "${r(c)}"`), A = c);
159
+ const g = n ? "✅ Passed in" : "❌ Failed in", l = [
160
+ `Test "${r(C)}"`,
161
+ `- ${g} ${r(`${o}ms`)}`
162
+ ];
163
+ a && l.push(`- Model ${r(a)}`), (I ?? 0) > 0 && l.push(`- Tokens used ${r(String(I))}`), l.push("---"), console.log(l.join(`
164
+ `));
165
+ }
166
+ function yt(t, e, n = "(root)") {
167
+ const o = K();
168
+ ct({
169
+ suitePath: V(),
170
+ casePath: o.length > 0 ? o : n,
171
+ didPass: t,
172
+ durationMs: e,
173
+ model: z(),
174
+ tokenUsage: P()
175
+ });
176
+ }
177
+ const rt = new v(), S = new URL("data:application/json;base64,ewogICJuYW1lIjogImthdHQiLAogICJ2ZXJzaW9uIjogIjAuMC42IiwKICAiZGVzY3JpcHRpb24iOiAiQ0xJIHRvb2wgdGhhdCB0ZXN0cyB0aGUgb3V0cHV0IG9mIGFnZW50aWMgQUkgdG9vbHMiLAogICJrZXl3b3JkcyI6IFsKICAgICJjbGkiLAogICAgImFpIiwKICAgICJhZ2VudGljLWFpIiwKICAgICJ0ZXN0aW5nIiwKICAgICJldmFsdWF0aW9uIgogIF0sCiAgImF1dGhvciI6ICJSYXBoYWVsIFBvcnRvIChodHRwczovL2dpdGh1Yi5jb20vcmFwaGFlbHBvcikiLAogICJsaWNlbnNlIjogIk1JVCIsCiAgInR5cGUiOiAibW9kdWxlIiwKICAibWFpbiI6ICJkaXN0L2luZGV4LmpzIiwKICAiZXhwb3J0cyI6IHsKICAgICIuIjogewogICAgICAidHlwZXMiOiAiLi9kaXN0L2luZGV4LmQudHMiLAogICAgICAiaW1wb3J0IjogIi4vZGlzdC9pbmRleC5qcyIKICAgIH0KICB9LAogICJiaW4iOiB7CiAgICAia2F0dCI6ICJkaXN0L2thdHQuanMiCiAgfSwKICAic2NyaXB0cyI6IHsKICAgICJidWlsZCI6ICJ2aXRlIGJ1aWxkIiwKICAgICJkZXYiOiAidHN4IHNyYy9pbmRleC50cyIsCiAgICAibGludCI6ICJiaW9tZSBsaW50IC4vc3JjIiwKICAgICJmb3JtYXQiOiAiYmlvbWUgZm9ybWF0IC0td3JpdGUgLi9zcmMiLAogICAgInRlc3QiOiAidml0ZXN0IiwKICAgICJ0eXBlY2hlY2siOiAidHNjIC1wIHRzY29uZmlnLmpzb24gLS1ub0VtaXQiLAogICAgInRlc3Q6YnVpbGQiOiAibm9kZSAuL2Rpc3Qva2F0dC5qcyIKICB9LAogICJ0eXBlcyI6ICJkaXN0L2luZGV4LmQudHMiLAogICJkZXZEZXBlbmRlbmNpZXMiOiB7CiAgICAiQGJpb21lanMvYmlvbWUiOiAiMS45LjQiLAogICAgIkB0eXBlcy9ub2RlIjogIjI1LjIuMCIsCiAgICAidHN4IjogIjQuMjEuMCIsCiAgICAidHlwZXNjcmlwdCI6ICI1LjguMiIsCiAgICAidml0ZSI6ICI3LjMuMSIsCiAgICAidml0ZS1wbHVnaW4tZHRzIjogIjQuNS40IiwKICAgICJ2aXRlc3QiOiAiMy4yLjQiLAogICAgInZzY29kZS1qc29ucnBjIjogIl44LjIuMSIKICB9LAogICJkZXBlbmRlbmNpZXMiOiB7CiAgICAiQGdpdGh1Yi9jb3BpbG90LXNkayI6ICJeMC4xLjIxIgogIH0sCiAgImJ1Z3MiOiB7CiAgICAidXJsIjogImh0dHBzOi8vZ2l0aHViLmNvbS9yYXBoYWVscG9yL2thdHQvaXNzdWVzIgogIH0sCiAgImhvbWVwYWdlIjogImh0dHBzOi8vZ2l0aHViLmNvbS9yYXBoYWVscG9yL2thdHQiCn0K", import.meta.url);
178
+ let d;
179
+ function lt() {
180
+ if (d !== void 0)
181
+ return d;
182
+ try {
183
+ const t = S.protocol === "data:" ? ut(S) : U(T(S), "utf8"), e = JSON.parse(t);
184
+ d = typeof e.version == "string" ? e.version : "unknown";
185
+ } catch {
186
+ d = "unknown";
187
+ }
188
+ return d;
189
+ }
190
+ function ut(t) {
191
+ const e = t.pathname.indexOf(",");
192
+ if (e < 0)
193
+ throw new Error("Invalid data URL.");
194
+ const n = t.pathname.slice(0, e), o = t.pathname.slice(e + 1);
195
+ return n.includes(";base64") ? Buffer.from(o, "base64").toString("utf8") : decodeURIComponent(o);
196
+ }
197
+ function j() {
198
+ const t = " ██╗ ██╗ █████╗ ████████╗████████╗", e = " ██║ ██╔╝██╔══██╗╚══██╔══╝╚══██╔══╝", n = " █████╔╝ ███████║ ██║ ██║", o = " ██╔═██╗ ██╔══██║ ██║ ██║", a = " ██║ ██╗██║ ██║ ██║ ██║", I = " ╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═╝", c = `v${lt()}`, C = Math.max(
199
+ 0,
200
+ Math.floor((t.length - c.length) / 2)
201
+ ), g = `${" ".repeat(C)}${c}`;
202
+ console.log(`
203
+ ${p(t)}
204
+ ${p(e)}
205
+ ${p(n)}
206
+ ${w(o)}
207
+ ${w(a)}
208
+ ${st(I)}
209
+ ${p(g)}
210
+ `);
211
+ }
212
+ let x = !1;
213
+ function gt(t) {
214
+ x = t;
215
+ }
216
+ function Ft() {
217
+ return x;
218
+ }
219
+ function It(t) {
220
+ const e = String(t.getHours()).padStart(2, "0"), n = String(t.getMinutes()).padStart(2, "0"), o = String(t.getSeconds()).padStart(2, "0");
221
+ return `${e}:${n}:${o}`;
222
+ }
223
+ function dt() {
224
+ console.log(
225
+ [
226
+ "Usage:",
227
+ " katt [options]",
228
+ "",
229
+ "Options:",
230
+ " -h, --help Show CLI usage information",
231
+ " -u, --update-snapshots Update snapshot files on mismatch"
232
+ ].join(`
233
+ `)
234
+ );
235
+ }
236
+ async function Nt() {
237
+ const t = process.argv.slice(2);
238
+ if (t.includes("--help") || t.includes("-h"))
239
+ return j(), dt(), 0;
240
+ const n = t.includes("--update-snapshots") || t.includes("-u");
241
+ gt(n), j();
242
+ const o = /* @__PURE__ */ new Date();
243
+ at(), q(), D();
244
+ const a = await J(process.cwd());
245
+ if (a.length === 0)
246
+ return console.log("No .eval.js or .eval.ts files found."), 1;
247
+ const c = (await Promise.allSettled(
248
+ a.map(
249
+ (s) => rt.run(
250
+ { evalFile: s },
251
+ () => import(X(s).href)
252
+ )
253
+ )
254
+ )).map((s, u) => ({ result: s, file: a[u] })).filter(({ result: s }) => s.status === "rejected");
255
+ if (c.length > 0) {
256
+ for (const s of c) {
257
+ const u = s.result.status === "rejected" ? s.result.reason : void 0;
258
+ console.error(`Error executing ${s.file}: ${String(u)}`);
259
+ }
260
+ return 1;
261
+ }
262
+ const g = (await tt()).filter(
263
+ (s) => s.status === "rejected"
264
+ );
265
+ if (g.length > 0) {
266
+ for (const s of g)
267
+ s.status === "rejected" && console.error(`Error executing async test: ${String(s.reason)}`);
268
+ return 1;
269
+ }
270
+ const l = _();
271
+ if (l.length > 0) {
272
+ console.error("❌ Failed tests:");
273
+ for (const [s, u] of l.entries()) {
274
+ const b = [u.describePath, u.itPath].filter((N) => N.length > 0).join(" > "), F = b.length > 0 ? `${b}: ` : "";
275
+ console.error(`${s + 1}. ${F}${u.message}`);
276
+ }
277
+ return 1;
278
+ }
279
+ const H = E(), y = Date.now() - o.getTime();
280
+ return console.log(
281
+ [
282
+ "---",
283
+ `${r("Files")} ${a.length} passed`,
284
+ `${r("Evals")} ${H} passed`,
285
+ `${r("Start at")} ${It(o)}`,
286
+ `${r("Duration")} ${y}ms`
287
+ ].join(`
288
+ `)
289
+ ), 0;
290
+ }
291
+ export {
292
+ Zt as a,
293
+ Bt as b,
294
+ At as c,
295
+ wt as d,
296
+ kt as e,
297
+ bt as f,
298
+ Ht as g,
299
+ vt as h,
300
+ rt as i,
301
+ xt as j,
302
+ K as k,
303
+ yt as l,
304
+ V as m,
305
+ Ft as n,
306
+ Lt as o,
307
+ $t as p,
308
+ jt as q,
309
+ St as r,
310
+ Jt as s,
311
+ Nt as t
312
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "katt",
3
- "version": "0.0.5",
3
+ "version": "0.0.7",
4
4
  "description": "CLI tool that tests the output of agentic AI tools",
5
5
  "keywords": [
6
6
  "cli",
@@ -1,288 +0,0 @@
1
- import { fileURLToPath as x, pathToFileURL as y } from "node:url";
2
- import { readdir as X } from "node:fs/promises";
3
- import { resolve as H } from "node:path";
4
- import { AsyncLocalStorage as j } from "node:async_hooks";
5
- import { readFileSync as M } from "node:fs";
6
- const R = /\.eval\.(js|ts)$/, W = /* @__PURE__ */ new Set([".git", "node_modules"]);
7
- async function v(t) {
8
- const e = await X(t, { withFileTypes: !0 }), n = [];
9
- return await Promise.all(
10
- e.map(async (o) => {
11
- const a = H(t, o.name);
12
- if (o.isDirectory()) {
13
- if (W.has(o.name))
14
- return;
15
- n.push(...await v(a));
16
- return;
17
- }
18
- o.isFile() && R.test(o.name) && n.push(a);
19
- })
20
- ), n;
21
- }
22
- const J = new j(), G = {
23
- describeStack: [],
24
- itStack: [],
25
- tokenUsageStack: [],
26
- modelStack: []
27
- };
28
- let L = 0, $ = 0;
29
- const p = [], m = [];
30
- let k = 0;
31
- function s() {
32
- return J.getStore() ?? G;
33
- }
34
- function B(t) {
35
- return {
36
- describeStack: [...t.describeStack],
37
- itStack: [...t.itStack],
38
- tokenUsageStack: [...t.tokenUsageStack],
39
- modelStack: [...t.modelStack]
40
- };
41
- }
42
- function U() {
43
- return L += 1, `d${L}`;
44
- }
45
- function O() {
46
- return $ += 1, `i${$}`;
47
- }
48
- function mt(t, e) {
49
- const n = e ?? B(s());
50
- return J.run(n, t);
51
- }
52
- function ht() {
53
- return B(s());
54
- }
55
- function St(t) {
56
- s().describeStack.push({ id: U(), description: t });
57
- }
58
- function At() {
59
- s().describeStack.pop();
60
- }
61
- function Y() {
62
- return s().describeStack.map((t) => t.description).join(" > ");
63
- }
64
- function kt(t) {
65
- s().itStack.push({ id: O(), description: t }), s().tokenUsageStack.push(0), s().modelStack.push(void 0);
66
- }
67
- function bt() {
68
- s().itStack.pop(), s().tokenUsageStack.pop(), s().modelStack.pop();
69
- }
70
- function Q() {
71
- return s().itStack.map((t) => t.description).join(" > ");
72
- }
73
- function Lt(t) {
74
- if (!Number.isFinite(t) || t <= 0)
75
- return;
76
- const e = s(), n = e.tokenUsageStack.length - 1;
77
- n < 0 || (e.tokenUsageStack[n] += t);
78
- }
79
- function V() {
80
- const t = s(), e = t.tokenUsageStack.length - 1;
81
- return e < 0 ? 0 : t.tokenUsageStack[e] ?? 0;
82
- }
83
- function $t(t) {
84
- if (t.length === 0)
85
- return;
86
- const e = s(), n = e.modelStack.length - 1;
87
- n < 0 || (e.modelStack[n] = t);
88
- }
89
- function K() {
90
- const t = s(), e = t.modelStack.length - 1;
91
- if (!(e < 0))
92
- return t.modelStack[e];
93
- }
94
- function wt(t) {
95
- p.push(t);
96
- }
97
- function jt() {
98
- k += 1;
99
- }
100
- function P() {
101
- return k;
102
- }
103
- function z() {
104
- k = 0;
105
- }
106
- function vt(t) {
107
- m.push(t);
108
- }
109
- function E() {
110
- return [...m];
111
- }
112
- function Jt() {
113
- return m.length;
114
- }
115
- function D() {
116
- m.length = 0;
117
- }
118
- async function _() {
119
- const t = [];
120
- for (; p.length > 0; ) {
121
- const e = p.splice(0, p.length), n = await Promise.allSettled(e);
122
- t.push(...n);
123
- }
124
- return t;
125
- }
126
- const q = "\x1B[1;36m", tt = "\x1B[33m", et = "\x1B[38;5;208m", nt = "\x1B[1;38;5;208m", h = "\x1B[0m";
127
- function c(t) {
128
- return `${q}${t}${h}`;
129
- }
130
- function f(t) {
131
- return `${tt}${t}${h}`;
132
- }
133
- function w(t) {
134
- return `${et}${t}${h}`;
135
- }
136
- function ot(t) {
137
- return `${nt}${t}${h}`;
138
- }
139
- let A = "";
140
- function it() {
141
- A = "";
142
- }
143
- function st({
144
- suitePath: t,
145
- casePath: e,
146
- didPass: n,
147
- durationMs: o,
148
- model: a,
149
- tokenUsage: r
150
- }) {
151
- const l = t.length > 0 ? t : "(root)", g = e.length > 0 ? e : "(root)";
152
- A !== l && (console.log(`Suite "${c(l)}"`), A = l);
153
- const I = n ? "✅ Passed in" : "❌ Failed in", d = [
154
- `Test "${c(g)}"`,
155
- `- ${I} ${c(`${o}ms`)}`
156
- ];
157
- a && d.push(`- Model ${c(a)}`), (r ?? 0) > 0 && d.push(`- Tokens used ${c(String(r))}`), d.push("---"), console.log(d.join(`
158
- `));
159
- }
160
- function Bt(t, e, n = "(root)") {
161
- const o = Q();
162
- st({
163
- suitePath: Y(),
164
- casePath: o.length > 0 ? o : n,
165
- didPass: t,
166
- durationMs: e,
167
- model: K(),
168
- tokenUsage: V()
169
- });
170
- }
171
- const at = new j(), S = new URL("data:application/json;base64,ewogICJuYW1lIjogImthdHQiLAogICJ2ZXJzaW9uIjogIjAuMC40IiwKICAiZGVzY3JpcHRpb24iOiAiQ0xJIHRvb2wgdGhhdCB0ZXN0cyB0aGUgb3V0cHV0IG9mIGFnZW50aWMgQUkgdG9vbHMiLAogICJrZXl3b3JkcyI6IFsKICAgICJjbGkiLAogICAgImFpIiwKICAgICJhZ2VudGljLWFpIiwKICAgICJ0ZXN0aW5nIiwKICAgICJldmFsdWF0aW9uIgogIF0sCiAgImF1dGhvciI6ICJSYXBoYWVsIFBvcnRvIChodHRwczovL2dpdGh1Yi5jb20vcmFwaGFlbHBvcikiLAogICJsaWNlbnNlIjogIk1JVCIsCiAgInR5cGUiOiAibW9kdWxlIiwKICAibWFpbiI6ICJkaXN0L2luZGV4LmpzIiwKICAiZXhwb3J0cyI6IHsKICAgICIuIjogewogICAgICAidHlwZXMiOiAiLi9kaXN0L2luZGV4LmQudHMiLAogICAgICAiaW1wb3J0IjogIi4vZGlzdC9pbmRleC5qcyIKICAgIH0KICB9LAogICJiaW4iOiB7CiAgICAia2F0dCI6ICJkaXN0L2thdHQuanMiCiAgfSwKICAic2NyaXB0cyI6IHsKICAgICJidWlsZCI6ICJ2aXRlIGJ1aWxkIiwKICAgICJkZXYiOiAidHN4IHNyYy9pbmRleC50cyIsCiAgICAibGludCI6ICJiaW9tZSBsaW50IC4vc3JjIiwKICAgICJmb3JtYXQiOiAiYmlvbWUgZm9ybWF0IC0td3JpdGUgLi9zcmMiLAogICAgInRlc3QiOiAidml0ZXN0IiwKICAgICJ0eXBlY2hlY2siOiAidHNjIC1wIHRzY29uZmlnLmpzb24gLS1ub0VtaXQiLAogICAgInRlc3Q6YnVpbGQiOiAibm9kZSAuL2Rpc3Qva2F0dC5qcyIKICB9LAogICJ0eXBlcyI6ICJkaXN0L2luZGV4LmQudHMiLAogICJkZXZEZXBlbmRlbmNpZXMiOiB7CiAgICAiQGJpb21lanMvYmlvbWUiOiAiMS45LjQiLAogICAgIkB0eXBlcy9ub2RlIjogIjI1LjIuMCIsCiAgICAidHN4IjogIjQuMjEuMCIsCiAgICAidHlwZXNjcmlwdCI6ICI1LjguMiIsCiAgICAidml0ZSI6ICI3LjMuMSIsCiAgICAidml0ZS1wbHVnaW4tZHRzIjogIjQuNS40IiwKICAgICJ2aXRlc3QiOiAiMy4yLjQiLAogICAgInZzY29kZS1qc29ucnBjIjogIl44LjIuMSIKICB9LAogICJkZXBlbmRlbmNpZXMiOiB7CiAgICAiQGdpdGh1Yi9jb3BpbG90LXNkayI6ICJeMC4xLjIxIgogIH0sCiAgImJ1Z3MiOiB7CiAgICAidXJsIjogImh0dHBzOi8vZ2l0aHViLmNvbS9yYXBoYWVscG9yL2thdHQvaXNzdWVzIgogIH0sCiAgImhvbWVwYWdlIjogImh0dHBzOi8vZ2l0aHViLmNvbS9yYXBoYWVscG9yL2thdHQiCn0K", import.meta.url);
172
- let C;
173
- function ct() {
174
- if (C !== void 0)
175
- return C;
176
- try {
177
- const t = S.protocol === "data:" ? rt(S) : M(x(S), "utf8"), e = JSON.parse(t);
178
- C = typeof e.version == "string" ? e.version : "unknown";
179
- } catch {
180
- C = "unknown";
181
- }
182
- return C;
183
- }
184
- function rt(t) {
185
- const e = t.pathname.indexOf(",");
186
- if (e < 0)
187
- throw new Error("Invalid data URL.");
188
- const n = t.pathname.slice(0, e), o = t.pathname.slice(e + 1);
189
- return n.includes(";base64") ? Buffer.from(o, "base64").toString("utf8") : decodeURIComponent(o);
190
- }
191
- function lt() {
192
- const t = " ██╗ ██╗ █████╗ ████████╗████████╗", e = " ██║ ██╔╝██╔══██╗╚══██╔══╝╚══██╔══╝", n = " █████╔╝ ███████║ ██║ ██║", o = " ██╔═██╗ ██╔══██║ ██║ ██║", a = " ██║ ██╗██║ ██║ ██║ ██║", r = " ╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═╝", l = `v${ct()}`, g = Math.max(
193
- 0,
194
- Math.floor((t.length - l.length) / 2)
195
- ), I = `${" ".repeat(g)}${l}`;
196
- console.log(`
197
- ${f(t)}
198
- ${f(e)}
199
- ${f(n)}
200
- ${w(o)}
201
- ${w(a)}
202
- ${ot(r)}
203
- ${f(I)}
204
- `);
205
- }
206
- let Z = !1;
207
- function ut(t) {
208
- Z = t;
209
- }
210
- function Zt() {
211
- return Z;
212
- }
213
- function gt(t) {
214
- const e = String(t.getHours()).padStart(2, "0"), n = String(t.getMinutes()).padStart(2, "0"), o = String(t.getSeconds()).padStart(2, "0");
215
- return `${e}:${n}:${o}`;
216
- }
217
- async function Ft() {
218
- const t = process.argv.slice(2), e = t.includes("--update-snapshots") || t.includes("-u");
219
- ut(e), lt();
220
- const n = /* @__PURE__ */ new Date();
221
- it(), D(), z();
222
- const o = await v(process.cwd());
223
- if (o.length === 0)
224
- return console.log("No .eval.js or .eval.ts files found."), 1;
225
- const r = (await Promise.allSettled(
226
- o.map(
227
- (i) => at.run(
228
- { evalFile: i },
229
- () => import(y(i).href)
230
- )
231
- )
232
- )).map((i, u) => ({ result: i, file: o[u] })).filter(({ result: i }) => i.status === "rejected");
233
- if (r.length > 0) {
234
- for (const i of r) {
235
- const u = i.result.status === "rejected" ? i.result.reason : void 0;
236
- console.error(`Error executing ${i.file}: ${String(u)}`);
237
- }
238
- return 1;
239
- }
240
- const g = (await _()).filter(
241
- (i) => i.status === "rejected"
242
- );
243
- if (g.length > 0) {
244
- for (const i of g)
245
- i.status === "rejected" && console.error(`Error executing async test: ${String(i.reason)}`);
246
- return 1;
247
- }
248
- const I = E();
249
- if (I.length > 0) {
250
- console.error("❌ Failed tests:");
251
- for (const [i, u] of I.entries()) {
252
- const b = [u.describePath, u.itPath].filter((T) => T.length > 0).join(" > "), N = b.length > 0 ? `${b}: ` : "";
253
- console.error(`${i + 1}. ${N}${u.message}`);
254
- }
255
- return 1;
256
- }
257
- const d = P(), F = Date.now() - n.getTime();
258
- return console.log(
259
- [
260
- "---",
261
- `${c("Files")} ${o.length} passed`,
262
- `${c("Evals")} ${d} passed`,
263
- `${c("Start at")} ${gt(n)}`,
264
- `${c("Duration")} ${F}ms`
265
- ].join(`
266
- `)
267
- ), 0;
268
- }
269
- export {
270
- jt as a,
271
- wt as b,
272
- ht as c,
273
- bt as d,
274
- St as e,
275
- At as f,
276
- Jt as g,
277
- Lt as h,
278
- at as i,
279
- vt as j,
280
- Q as k,
281
- Bt as l,
282
- Y as m,
283
- Zt as n,
284
- Ft as o,
285
- kt as p,
286
- mt as r,
287
- $t as s
288
- };