katt 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -4
- package/dist/index.js +161 -427
- package/dist/katt.js +7 -0
- package/dist/runCli-CDRmZ5hw.js +288 -0
- package/package.json +9 -3
package/README.md
CHANGED
|
@@ -1,20 +1,44 @@
|
|
|
1
1
|
# Katt
|
|
2
|
+
[](https://github.com/raphaelpor/katt/blob/main/LICENSE) [](https://www.npmjs.com/package/katt)
|
|
2
3
|
|
|
3
4
|
Katt is a lightweight testing framework for running AI Evals, inspired by [Jest](https://github.com/jestjs/jest).
|
|
4
5
|
|
|
5
6
|
<img src="https://raw.githubusercontent.com/raphaelpor/katt/main/docs/logo.png" alt="Katt logo" width="250" />
|
|
6
7
|
|
|
8
|
+
## Table of Contents
|
|
9
|
+
|
|
10
|
+
- [Overview](#overview)
|
|
11
|
+
- [API Documentation](#api-documentation)
|
|
12
|
+
- [Hello World - Example](#hello-world---example)
|
|
13
|
+
- [Main Features](#main-features)
|
|
14
|
+
- [Usage](#usage)
|
|
15
|
+
- [Installation](#installation)
|
|
16
|
+
- [Basic Usage](#basic-usage)
|
|
17
|
+
- [Using promptFile](#using-promptfile)
|
|
18
|
+
- [Specifying AI Models](#specifying-ai-models)
|
|
19
|
+
- [Development](#development)
|
|
20
|
+
- [Setup](#setup)
|
|
21
|
+
- [Available Scripts](#available-scripts)
|
|
22
|
+
- [Verification Process](#verification-process)
|
|
23
|
+
- [Project Structure](#project-structure)
|
|
24
|
+
- [How It Works](#how-it-works)
|
|
25
|
+
- [Requirements](#requirements)
|
|
26
|
+
- [License](#license)
|
|
27
|
+
- [Contributing](#contributing)
|
|
28
|
+
|
|
7
29
|
## Overview
|
|
8
30
|
|
|
9
31
|
Katt is designed to evaluate and validate the behavior of AI agents like **Claude Code**, **GitHub Copilot**, **OpenAI Codex** and more. It provides a simple, intuitive API for writing tests that interact with AI models and assert their responses.
|
|
10
32
|
|
|
11
33
|
## API Documentation
|
|
12
34
|
|
|
13
|
-
For a complete list of features and usage examples, see [docs/api-documentation.md](docs/api-documentation.md).
|
|
35
|
+
For a complete list of features and usage examples, see [docs/api-documentation.md](https://github.com/raphaelpor/katt/blob/main/docs/api-documentation.md).
|
|
14
36
|
|
|
15
37
|
## Hello World - Example
|
|
16
38
|
|
|
17
39
|
```typescript
|
|
40
|
+
import { expect, prompt } from "katt";
|
|
41
|
+
|
|
18
42
|
const result = await prompt("If you read this just say 'hello world'");
|
|
19
43
|
expect(result).toContain("hello world");
|
|
20
44
|
```
|
|
@@ -22,6 +46,8 @@ expect(result).toContain("hello world");
|
|
|
22
46
|
It also supports the familiar `describe` and `it` syntax for organizing tests:
|
|
23
47
|
|
|
24
48
|
```typescript
|
|
49
|
+
import { describe, expect, it, prompt } from "katt";
|
|
50
|
+
|
|
25
51
|
describe("Greeting agent", () => {
|
|
26
52
|
it("should say hello world", async () => {
|
|
27
53
|
const result = await prompt("If you read this just say 'hello world'");
|
|
@@ -41,10 +67,18 @@ describe("Greeting agent", () => {
|
|
|
41
67
|
|
|
42
68
|
## Usage
|
|
43
69
|
|
|
70
|
+
### Installation
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
npm install -g katt
|
|
74
|
+
```
|
|
75
|
+
|
|
44
76
|
### Basic Usage
|
|
45
77
|
|
|
46
78
|
1. Create a file with the `.eval.ts` or `.eval.js` extension and write your tests.
|
|
47
79
|
```typescript
|
|
80
|
+
import { expect, prompt } from "katt";
|
|
81
|
+
|
|
48
82
|
const result = await prompt("If you read this just say 'hello world'");
|
|
49
83
|
expect(result).toContain("hello world");
|
|
50
84
|
```
|
|
@@ -52,7 +86,7 @@ expect(result).toContain("hello world");
|
|
|
52
86
|
2. Run Katt from your project directory:
|
|
53
87
|
|
|
54
88
|
```bash
|
|
55
|
-
|
|
89
|
+
katt
|
|
56
90
|
```
|
|
57
91
|
|
|
58
92
|
### Using promptFile
|
|
@@ -61,6 +95,8 @@ Load prompts from external files:
|
|
|
61
95
|
|
|
62
96
|
```javascript
|
|
63
97
|
// test.eval.js
|
|
98
|
+
import { describe, expect, it, promptFile } from "katt";
|
|
99
|
+
|
|
64
100
|
describe("Working with files", () => {
|
|
65
101
|
it("should load the file and respond", async () => {
|
|
66
102
|
const result = await promptFile("./myPrompt.md");
|
|
@@ -74,6 +110,8 @@ describe("Working with files", () => {
|
|
|
74
110
|
You can specify a custom model for your prompts:
|
|
75
111
|
|
|
76
112
|
```javascript
|
|
113
|
+
import { describe, expect, it, prompt } from "katt";
|
|
114
|
+
|
|
77
115
|
describe("Model selection", () => {
|
|
78
116
|
it("should use a specific model", async () => {
|
|
79
117
|
const promptString = "You are a helpful agent. Say hi and ask what you could help the user with.";
|
|
@@ -167,7 +205,7 @@ MIT
|
|
|
167
205
|
|
|
168
206
|
## Contributing
|
|
169
207
|
|
|
170
|
-
We welcome contributions from the community! Please see our [CONTRIBUTING.md](CONTRIBUTING.md) guide for detailed information on how to contribute to Katt.
|
|
208
|
+
We welcome contributions from the community! Please see our [CONTRIBUTING.md](https://github.com/raphaelpor/katt/blob/main/CONTRIBUTING.md) guide for detailed information on how to contribute to Katt.
|
|
171
209
|
|
|
172
210
|
Quick start:
|
|
173
211
|
1. Fork the repository
|
|
@@ -176,4 +214,4 @@ Quick start:
|
|
|
176
214
|
4. Run the verification process
|
|
177
215
|
5. Submit a pull request
|
|
178
216
|
|
|
179
|
-
For detailed guidelines, development setup, coding standards, and more, check out our [contribution guide](CONTRIBUTING.md).
|
|
217
|
+
For detailed guidelines, development setup, coding standards, and more, check out our [contribution guide](https://github.com/raphaelpor/katt/blob/main/CONTRIBUTING.md).
|
package/dist/index.js
CHANGED
|
@@ -1,183 +1,56 @@
|
|
|
1
|
-
|
|
2
|
-
import {
|
|
3
|
-
import { CopilotClient as
|
|
4
|
-
import { readFile as
|
|
5
|
-
import { resolve as
|
|
6
|
-
import { readFileSync as
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
}
|
|
31
|
-
function pt() {
|
|
32
|
-
return E += 1, `i${E}`;
|
|
33
|
-
}
|
|
34
|
-
function K(t, e) {
|
|
35
|
-
const n = e ?? Q(u());
|
|
36
|
-
return Y.run(n, t);
|
|
37
|
-
}
|
|
38
|
-
function _() {
|
|
39
|
-
return Q(u());
|
|
40
|
-
}
|
|
41
|
-
function ht(t) {
|
|
42
|
-
u().describeStack.push({ id: gt(), description: t });
|
|
43
|
-
}
|
|
44
|
-
function x() {
|
|
45
|
-
u().describeStack.pop();
|
|
46
|
-
}
|
|
47
|
-
function q() {
|
|
48
|
-
return u().describeStack.map((t) => t.description).join(" > ");
|
|
49
|
-
}
|
|
50
|
-
function It(t) {
|
|
51
|
-
u().itStack.push({ id: pt(), description: t }), u().tokenUsageStack.push(0), u().modelStack.push(void 0);
|
|
52
|
-
}
|
|
53
|
-
function F() {
|
|
54
|
-
u().itStack.pop(), u().tokenUsageStack.pop(), u().modelStack.pop();
|
|
55
|
-
}
|
|
56
|
-
function tt() {
|
|
57
|
-
return u().itStack.map((t) => t.description).join(" > ");
|
|
58
|
-
}
|
|
59
|
-
function mt(t) {
|
|
60
|
-
if (!Number.isFinite(t) || t <= 0)
|
|
61
|
-
return;
|
|
62
|
-
const e = u(), n = e.tokenUsageStack.length - 1;
|
|
63
|
-
n < 0 || (e.tokenUsageStack[n] += t);
|
|
64
|
-
}
|
|
65
|
-
function Ct() {
|
|
66
|
-
const t = u(), e = t.tokenUsageStack.length - 1;
|
|
67
|
-
return e < 0 ? 0 : t.tokenUsageStack[e] ?? 0;
|
|
68
|
-
}
|
|
69
|
-
function St(t) {
|
|
70
|
-
if (t.length === 0)
|
|
71
|
-
return;
|
|
72
|
-
const e = u(), n = e.modelStack.length - 1;
|
|
73
|
-
n < 0 || (e.modelStack[n] = t);
|
|
74
|
-
}
|
|
75
|
-
function At() {
|
|
76
|
-
const t = u(), e = t.modelStack.length - 1;
|
|
77
|
-
if (!(e < 0))
|
|
78
|
-
return t.modelStack[e];
|
|
79
|
-
}
|
|
80
|
-
function v(t) {
|
|
81
|
-
w.push(t);
|
|
82
|
-
}
|
|
83
|
-
function bt() {
|
|
84
|
-
U += 1;
|
|
85
|
-
}
|
|
86
|
-
function $t() {
|
|
87
|
-
return U;
|
|
88
|
-
}
|
|
89
|
-
function kt() {
|
|
90
|
-
U = 0;
|
|
91
|
-
}
|
|
92
|
-
function wt(t) {
|
|
93
|
-
y.push(t);
|
|
94
|
-
}
|
|
95
|
-
function yt() {
|
|
96
|
-
return [...y];
|
|
97
|
-
}
|
|
98
|
-
function O() {
|
|
99
|
-
return y.length;
|
|
100
|
-
}
|
|
101
|
-
function vt() {
|
|
102
|
-
y.length = 0;
|
|
103
|
-
}
|
|
104
|
-
async function Lt() {
|
|
105
|
-
const t = [];
|
|
106
|
-
for (; w.length > 0; ) {
|
|
107
|
-
const e = w.splice(0, w.length), n = await Promise.allSettled(e);
|
|
108
|
-
t.push(...n);
|
|
109
|
-
}
|
|
110
|
-
return t;
|
|
1
|
+
import { r as E, c as N, a as O, p as I, b as m, l as d, d as w, g as F, e as U, f as C, s as _, h as R, i as D, j as z, k as B, m as W, n as K } from "./runCli-CDRmZ5hw.js";
|
|
2
|
+
import { o as St } from "./runCli-CDRmZ5hw.js";
|
|
3
|
+
import { CopilotClient as H } from "@github/copilot-sdk";
|
|
4
|
+
import { readFile as A } from "node:fs/promises";
|
|
5
|
+
import { resolve as j, dirname as T, isAbsolute as Y, basename as J, join as q } from "node:path";
|
|
6
|
+
import { readFileSync as G, writeFileSync as S, mkdirSync as Q } from "node:fs";
|
|
7
|
+
function vt(t, e) {
|
|
8
|
+
E(() => {
|
|
9
|
+
O(), I(t);
|
|
10
|
+
const n = F(), r = Date.now(), i = () => F() === n, o = () => Date.now() - r;
|
|
11
|
+
try {
|
|
12
|
+
const s = e();
|
|
13
|
+
if (s && typeof s.then == "function") {
|
|
14
|
+
m(
|
|
15
|
+
s.then(() => {
|
|
16
|
+
d(!0, o());
|
|
17
|
+
}).catch((a) => {
|
|
18
|
+
throw d(!1, o()), a;
|
|
19
|
+
}).finally(() => {
|
|
20
|
+
w();
|
|
21
|
+
})
|
|
22
|
+
);
|
|
23
|
+
return;
|
|
24
|
+
}
|
|
25
|
+
} catch (s) {
|
|
26
|
+
throw d(!1, o()), w(), s;
|
|
27
|
+
}
|
|
28
|
+
d(i(), o()), w();
|
|
29
|
+
}, N());
|
|
111
30
|
}
|
|
112
|
-
function
|
|
113
|
-
|
|
114
|
-
|
|
31
|
+
function Tt(t, e) {
|
|
32
|
+
E(() => {
|
|
33
|
+
U(t);
|
|
115
34
|
try {
|
|
116
35
|
const n = e();
|
|
117
36
|
if (n && typeof n.then == "function") {
|
|
118
|
-
|
|
37
|
+
m(
|
|
119
38
|
n.finally(() => {
|
|
120
|
-
|
|
39
|
+
C();
|
|
121
40
|
})
|
|
122
41
|
);
|
|
123
42
|
return;
|
|
124
43
|
}
|
|
125
44
|
} catch (n) {
|
|
126
|
-
throw
|
|
45
|
+
throw C(), n;
|
|
127
46
|
}
|
|
128
|
-
|
|
129
|
-
},
|
|
130
|
-
}
|
|
131
|
-
const Tt = "\x1B[1;36m", xt = "\x1B[33m", Ft = "\x1B[38;5;208m", Mt = "\x1B[1;38;5;208m", L = "\x1B[0m";
|
|
132
|
-
function f(t) {
|
|
133
|
-
return `${Tt}${t}${L}`;
|
|
134
|
-
}
|
|
135
|
-
function $(t) {
|
|
136
|
-
return `${xt}${t}${L}`;
|
|
137
|
-
}
|
|
138
|
-
function D(t) {
|
|
139
|
-
return `${Ft}${t}${L}`;
|
|
140
|
-
}
|
|
141
|
-
function Nt(t) {
|
|
142
|
-
return `${Mt}${t}${L}`;
|
|
47
|
+
C();
|
|
48
|
+
}, N());
|
|
143
49
|
}
|
|
144
|
-
|
|
145
|
-
function Bt() {
|
|
146
|
-
B = "";
|
|
147
|
-
}
|
|
148
|
-
function Jt({
|
|
149
|
-
suitePath: t,
|
|
150
|
-
casePath: e,
|
|
151
|
-
didPass: n,
|
|
152
|
-
durationMs: o,
|
|
153
|
-
model: s,
|
|
154
|
-
tokenUsage: i
|
|
155
|
-
}) {
|
|
156
|
-
const r = t.length > 0 ? t : "(root)", c = e.length > 0 ? e : "(root)";
|
|
157
|
-
B !== r && (console.log(`Suite "${f(r)}"`), B = r);
|
|
158
|
-
const l = n ? "✅ Passed in" : "❌ Failed in", I = [
|
|
159
|
-
`Test "${f(c)}"`,
|
|
160
|
-
`- ${l} ${f(`${o}ms`)}`
|
|
161
|
-
];
|
|
162
|
-
s && I.push(`- Model ${f(s)}`), (i ?? 0) > 0 && I.push(`- Tokens used ${f(String(i))}`), I.push("---"), console.log(I.join(`
|
|
163
|
-
`));
|
|
164
|
-
}
|
|
165
|
-
function C(t, e, n = "(root)") {
|
|
166
|
-
const o = tt();
|
|
167
|
-
Jt({
|
|
168
|
-
suitePath: q(),
|
|
169
|
-
casePath: o.length > 0 ? o : n,
|
|
170
|
-
didPass: t,
|
|
171
|
-
durationMs: e,
|
|
172
|
-
model: At(),
|
|
173
|
-
tokenUsage: Ct()
|
|
174
|
-
});
|
|
175
|
-
}
|
|
176
|
-
const G = new z();
|
|
177
|
-
function Rt(t, e) {
|
|
50
|
+
function V(t, e) {
|
|
178
51
|
return typeof t == "object" && t !== null && "code" in t && t.code === e;
|
|
179
52
|
}
|
|
180
|
-
function
|
|
53
|
+
function X(t) {
|
|
181
54
|
try {
|
|
182
55
|
const e = JSON.parse(t);
|
|
183
56
|
return typeof e == "object" && e !== null ? e : void 0;
|
|
@@ -186,133 +59,133 @@ function Ut(t) {
|
|
|
186
59
|
return;
|
|
187
60
|
}
|
|
188
61
|
}
|
|
189
|
-
async function
|
|
190
|
-
const t =
|
|
62
|
+
async function Z() {
|
|
63
|
+
const t = j(process.cwd(), "katt.json");
|
|
191
64
|
try {
|
|
192
|
-
const e = await
|
|
193
|
-
return
|
|
65
|
+
const e = await A(t, "utf8");
|
|
66
|
+
return X(e);
|
|
194
67
|
} catch (e) {
|
|
195
|
-
if (
|
|
68
|
+
if (V(e, "ENOENT"))
|
|
196
69
|
return;
|
|
197
70
|
console.warn(`Failed to read katt.json: ${String(e)}`);
|
|
198
71
|
return;
|
|
199
72
|
}
|
|
200
73
|
}
|
|
201
|
-
function
|
|
74
|
+
function tt(t) {
|
|
202
75
|
const e = t?.copilot;
|
|
203
76
|
if (typeof e != "object" || e === null || Array.isArray(e))
|
|
204
77
|
return;
|
|
205
78
|
const n = {
|
|
206
79
|
...e
|
|
207
|
-
},
|
|
208
|
-
return (typeof
|
|
80
|
+
}, r = n.model;
|
|
81
|
+
return (typeof r != "string" || r.length === 0) && delete n.model, Object.keys(n).length > 0 ? n : void 0;
|
|
209
82
|
}
|
|
210
|
-
function
|
|
83
|
+
function et(t) {
|
|
211
84
|
if (!(typeof t != "number" || !Number.isFinite(t)) && !(t <= 0))
|
|
212
85
|
return Math.floor(t);
|
|
213
86
|
}
|
|
214
|
-
function
|
|
87
|
+
function nt(t) {
|
|
215
88
|
const e = t?.prompt;
|
|
216
89
|
if (!(typeof e != "object" || e === null || Array.isArray(e)))
|
|
217
|
-
return
|
|
90
|
+
return et(e.timeoutMs);
|
|
218
91
|
}
|
|
219
|
-
async function
|
|
220
|
-
const t = await
|
|
92
|
+
async function ot() {
|
|
93
|
+
const t = await Z();
|
|
221
94
|
return {
|
|
222
|
-
copilot:
|
|
223
|
-
promptTimeoutMs:
|
|
95
|
+
copilot: tt(t),
|
|
96
|
+
promptTimeoutMs: nt(t)
|
|
224
97
|
};
|
|
225
98
|
}
|
|
226
|
-
const
|
|
227
|
-
function
|
|
99
|
+
const rt = 6e5;
|
|
100
|
+
function P(t) {
|
|
228
101
|
return typeof t == "string" && t.length > 0 ? t : void 0;
|
|
229
102
|
}
|
|
230
|
-
function
|
|
103
|
+
function v(t) {
|
|
231
104
|
if (!t)
|
|
232
105
|
return;
|
|
233
106
|
const e = { ...t };
|
|
234
107
|
if (e.model !== void 0) {
|
|
235
|
-
const n =
|
|
108
|
+
const n = P(e.model);
|
|
236
109
|
n ? e.model = n : delete e.model;
|
|
237
110
|
}
|
|
238
111
|
return Object.keys(e).length > 0 ? e : void 0;
|
|
239
112
|
}
|
|
240
|
-
function
|
|
113
|
+
function M(t) {
|
|
241
114
|
if (!(typeof t != "number" || !Number.isFinite(t)) && !(t <= 0))
|
|
242
115
|
return Math.floor(t);
|
|
243
116
|
}
|
|
244
|
-
function
|
|
117
|
+
function h(t) {
|
|
245
118
|
return !Number.isFinite(t) || (t ?? 0) <= 0 ? 0 : Math.floor(t ?? 0);
|
|
246
119
|
}
|
|
247
|
-
function
|
|
248
|
-
return
|
|
249
|
-
}
|
|
250
|
-
async function
|
|
251
|
-
const { timeoutMs: n, ...
|
|
252
|
-
|
|
253
|
-
),
|
|
254
|
-
...
|
|
255
|
-
...
|
|
256
|
-
}),
|
|
257
|
-
let p,
|
|
120
|
+
function it(t) {
|
|
121
|
+
return h(t.inputTokens) + h(t.outputTokens) + h(t.cacheReadTokens) + h(t.cacheWriteTokens);
|
|
122
|
+
}
|
|
123
|
+
async function $(t, e = {}) {
|
|
124
|
+
const { timeoutMs: n, ...r } = e, i = await ot(), o = v(i.copilot), s = v(
|
|
125
|
+
r
|
|
126
|
+
), a = v({
|
|
127
|
+
...o ?? {},
|
|
128
|
+
...s ?? {}
|
|
129
|
+
}), c = M(i.promptTimeoutMs), L = M(n) ?? c ?? rt, b = P(a?.model), g = new H({ useLoggedInUser: !0 });
|
|
130
|
+
let p, x, y = 0;
|
|
258
131
|
try {
|
|
259
|
-
await
|
|
260
|
-
|
|
132
|
+
await g.start(), p = await g.createSession(a), x = p.on("assistant.usage", (f) => {
|
|
133
|
+
y += it(f.data);
|
|
261
134
|
});
|
|
262
|
-
const
|
|
263
|
-
if (!
|
|
135
|
+
const l = await p.sendAndWait({ prompt: t }, L);
|
|
136
|
+
if (!l?.data?.content)
|
|
264
137
|
throw new Error("Copilot did not return a response.");
|
|
265
|
-
return
|
|
138
|
+
return b && _(b), l.data.content;
|
|
266
139
|
} finally {
|
|
267
|
-
const
|
|
268
|
-
if (
|
|
140
|
+
const l = [];
|
|
141
|
+
if (x?.(), y > 0 && R(y), p)
|
|
269
142
|
try {
|
|
270
143
|
await p.destroy();
|
|
271
|
-
} catch (
|
|
272
|
-
|
|
144
|
+
} catch (f) {
|
|
145
|
+
l.push(f);
|
|
273
146
|
}
|
|
274
147
|
try {
|
|
275
|
-
const
|
|
276
|
-
|
|
277
|
-
} catch (
|
|
278
|
-
|
|
148
|
+
const f = await g.stop();
|
|
149
|
+
l.push(...f);
|
|
150
|
+
} catch (f) {
|
|
151
|
+
l.push(f);
|
|
279
152
|
}
|
|
280
|
-
|
|
281
|
-
`Copilot cleanup encountered ${
|
|
153
|
+
l.length > 0 && console.error(
|
|
154
|
+
`Copilot cleanup encountered ${l.length} error(s).`
|
|
282
155
|
);
|
|
283
156
|
}
|
|
284
157
|
}
|
|
285
|
-
async function
|
|
286
|
-
const n =
|
|
287
|
-
return
|
|
158
|
+
async function $t(t, e = {}) {
|
|
159
|
+
const n = D.getStore(), r = n?.evalFile ? T(n.evalFile) : process.cwd(), i = Y(t) ? t : j(r, t), o = await A(i, "utf8");
|
|
160
|
+
return $(o, e);
|
|
288
161
|
}
|
|
289
|
-
function
|
|
290
|
-
|
|
291
|
-
describePath:
|
|
292
|
-
itPath:
|
|
162
|
+
function u(t) {
|
|
163
|
+
z({
|
|
164
|
+
describePath: W(),
|
|
165
|
+
itPath: B(),
|
|
293
166
|
message: t
|
|
294
167
|
});
|
|
295
168
|
}
|
|
296
|
-
async function
|
|
297
|
-
const n = Date.now(),
|
|
169
|
+
async function st(t, e) {
|
|
170
|
+
const n = Date.now(), r = `expected '${t}' to satisfy '${e}'`, i = $(`Evaluate if the expectation is fulfiled in by the input.
|
|
298
171
|
Expectation: "${e}".
|
|
299
172
|
Input:
|
|
300
173
|
---
|
|
301
174
|
${t}
|
|
302
175
|
---
|
|
303
176
|
Important: Answer with "Yes" or "No" only, without any additional text.
|
|
304
|
-
`).then((
|
|
305
|
-
|
|
177
|
+
`).then((o) => {
|
|
178
|
+
o.includes("Yes") ? d(!0, Date.now() - n, "promptCheck") : o.includes("No") ? u(r) : u(`failed to evaluate expectation '${e}'`);
|
|
306
179
|
});
|
|
307
|
-
return
|
|
180
|
+
return m(i), i;
|
|
308
181
|
}
|
|
309
|
-
const
|
|
310
|
-
function
|
|
182
|
+
const at = 3;
|
|
183
|
+
function ct(t) {
|
|
311
184
|
const e = t.match(/\b([1-5])\b/);
|
|
312
185
|
return !e || !e[1] ? null : Number(e[1]);
|
|
313
186
|
}
|
|
314
|
-
async function
|
|
315
|
-
const
|
|
187
|
+
async function ut(t, e, n) {
|
|
188
|
+
const r = Date.now(), i = n?.threshold ?? at, o = $(
|
|
316
189
|
`Classify the input by how "${e}" it is on a scale of 1 to 5.
|
|
317
190
|
1 means "not ${e}" and 5 means "very ${e}".
|
|
318
191
|
Return only a single number: 1, 2, 3, 4, or 5.
|
|
@@ -322,268 +195,129 @@ Input:
|
|
|
322
195
|
${t}
|
|
323
196
|
---`,
|
|
324
197
|
n?.model ? { model: n.model } : void 0
|
|
325
|
-
).then((
|
|
326
|
-
const
|
|
327
|
-
if (
|
|
328
|
-
|
|
329
|
-
`failed to classify as '${e}'. Evaluator returned '${
|
|
198
|
+
).then((s) => {
|
|
199
|
+
const a = ct(s);
|
|
200
|
+
if (a === null) {
|
|
201
|
+
u(
|
|
202
|
+
`failed to classify as '${e}'. Evaluator returned '${s}'`
|
|
330
203
|
);
|
|
331
204
|
return;
|
|
332
205
|
}
|
|
333
|
-
const
|
|
334
|
-
if (
|
|
335
|
-
|
|
206
|
+
const c = `expected response to be classified as '${e}' with score >= ${i}, got ${a}`;
|
|
207
|
+
if (a < i) {
|
|
208
|
+
u(c);
|
|
336
209
|
return;
|
|
337
210
|
}
|
|
338
|
-
|
|
211
|
+
d(
|
|
339
212
|
!0,
|
|
340
|
-
Date.now() -
|
|
213
|
+
Date.now() - r,
|
|
341
214
|
"toBeClassifiedAs"
|
|
342
215
|
);
|
|
343
216
|
});
|
|
344
|
-
return
|
|
217
|
+
return m(o), o;
|
|
345
218
|
}
|
|
346
|
-
function
|
|
219
|
+
function lt(t, e) {
|
|
347
220
|
const n = `expected '${t}' to include '${e}'`;
|
|
348
|
-
t.includes(e) ||
|
|
349
|
-
}
|
|
350
|
-
let nt = !1;
|
|
351
|
-
function Kt(t) {
|
|
352
|
-
nt = t;
|
|
353
|
-
}
|
|
354
|
-
function _t() {
|
|
355
|
-
return nt;
|
|
221
|
+
t.includes(e) || u(n);
|
|
356
222
|
}
|
|
357
|
-
function
|
|
358
|
-
const n =
|
|
359
|
-
return
|
|
360
|
-
|
|
223
|
+
function ft(t) {
|
|
224
|
+
const n = J(t).replace(/\.eval\.[^./\\]+$/, "");
|
|
225
|
+
return q(
|
|
226
|
+
T(t),
|
|
361
227
|
"__snapshots__",
|
|
362
228
|
`${n}.snap.md`
|
|
363
229
|
);
|
|
364
230
|
}
|
|
365
|
-
function
|
|
231
|
+
function k(t) {
|
|
366
232
|
return t.split(/\r?\n/);
|
|
367
233
|
}
|
|
368
|
-
function
|
|
234
|
+
function dt(t, e) {
|
|
369
235
|
if (t === e)
|
|
370
236
|
return " (no diff)";
|
|
371
|
-
const n =
|
|
372
|
-
for (let
|
|
373
|
-
const
|
|
374
|
-
if (
|
|
375
|
-
if (
|
|
376
|
-
|
|
237
|
+
const n = k(t), r = k(e), i = Math.max(n.length, r.length), o = [];
|
|
238
|
+
for (let s = 0; s < i; s += 1) {
|
|
239
|
+
const a = n[s], c = r[s];
|
|
240
|
+
if (a !== c) {
|
|
241
|
+
if (a === void 0 && c !== void 0) {
|
|
242
|
+
o.push(`+ ${c}`);
|
|
377
243
|
continue;
|
|
378
244
|
}
|
|
379
|
-
if (
|
|
380
|
-
|
|
245
|
+
if (a !== void 0 && c === void 0) {
|
|
246
|
+
o.push(`- ${a}`);
|
|
381
247
|
continue;
|
|
382
248
|
}
|
|
383
|
-
|
|
249
|
+
o.push(`- ${a ?? ""}`), o.push(`+ ${c ?? ""}`);
|
|
384
250
|
}
|
|
385
251
|
}
|
|
386
|
-
return
|
|
252
|
+
return o.join(`
|
|
387
253
|
`);
|
|
388
254
|
}
|
|
389
|
-
function
|
|
390
|
-
const e =
|
|
255
|
+
function pt(t) {
|
|
256
|
+
const e = D.getStore()?.evalFile;
|
|
391
257
|
if (!e) {
|
|
392
|
-
|
|
258
|
+
u(
|
|
393
259
|
"toMatchSnapshot can only be used while running an eval file."
|
|
394
260
|
);
|
|
395
261
|
return;
|
|
396
262
|
}
|
|
397
|
-
const n =
|
|
263
|
+
const n = ft(e);
|
|
398
264
|
try {
|
|
399
|
-
const
|
|
400
|
-
if (
|
|
265
|
+
const r = G(n, "utf8");
|
|
266
|
+
if (r === t)
|
|
401
267
|
return;
|
|
402
|
-
if (
|
|
403
|
-
|
|
268
|
+
if (K()) {
|
|
269
|
+
S(n, t, "utf8");
|
|
404
270
|
return;
|
|
405
271
|
}
|
|
406
|
-
const
|
|
407
|
-
|
|
272
|
+
const i = dt(r, t);
|
|
273
|
+
u(
|
|
408
274
|
[
|
|
409
275
|
`Snapshot mismatch at ${n}`,
|
|
410
276
|
"",
|
|
411
277
|
"Diff:",
|
|
412
|
-
|
|
278
|
+
i,
|
|
413
279
|
"",
|
|
414
280
|
"Run katt with --update-snapshots (or -u) to accept this change."
|
|
415
281
|
].join(`
|
|
416
282
|
`)
|
|
417
283
|
);
|
|
418
|
-
} catch (
|
|
419
|
-
if (
|
|
420
|
-
|
|
421
|
-
`Failed to read snapshot at ${n}: ${String(
|
|
284
|
+
} catch (r) {
|
|
285
|
+
if (r.code !== "ENOENT") {
|
|
286
|
+
u(
|
|
287
|
+
`Failed to read snapshot at ${n}: ${String(r)}`
|
|
422
288
|
);
|
|
423
289
|
return;
|
|
424
290
|
}
|
|
425
291
|
try {
|
|
426
|
-
|
|
427
|
-
} catch (
|
|
428
|
-
|
|
429
|
-
`Failed to write snapshot at ${n}: ${String(
|
|
292
|
+
Q(T(n), { recursive: !0 }), S(n, t, "utf8");
|
|
293
|
+
} catch (o) {
|
|
294
|
+
u(
|
|
295
|
+
`Failed to write snapshot at ${n}: ${String(o)}`
|
|
430
296
|
);
|
|
431
297
|
}
|
|
432
298
|
}
|
|
433
299
|
}
|
|
434
|
-
function
|
|
300
|
+
function bt(t) {
|
|
435
301
|
return {
|
|
436
302
|
toContain: (e) => {
|
|
437
|
-
|
|
303
|
+
lt(t, e);
|
|
438
304
|
},
|
|
439
305
|
toMatchSnapshot: () => {
|
|
440
|
-
|
|
306
|
+
pt(t);
|
|
441
307
|
},
|
|
442
308
|
promptCheck: async (e) => {
|
|
443
|
-
await
|
|
309
|
+
await st(t, e);
|
|
444
310
|
},
|
|
445
311
|
toBeClassifiedAs: async (e, n) => {
|
|
446
|
-
await
|
|
312
|
+
await ut(t, e, n);
|
|
447
313
|
}
|
|
448
314
|
};
|
|
449
315
|
}
|
|
450
|
-
function oe(t, e) {
|
|
451
|
-
K(() => {
|
|
452
|
-
bt(), It(t);
|
|
453
|
-
const n = O(), o = Date.now(), s = () => O() === n, i = () => Date.now() - o;
|
|
454
|
-
try {
|
|
455
|
-
const r = e();
|
|
456
|
-
if (r && typeof r.then == "function") {
|
|
457
|
-
v(
|
|
458
|
-
r.then(() => {
|
|
459
|
-
C(!0, i());
|
|
460
|
-
}).catch((c) => {
|
|
461
|
-
throw C(!1, i()), c;
|
|
462
|
-
}).finally(() => {
|
|
463
|
-
F();
|
|
464
|
-
})
|
|
465
|
-
);
|
|
466
|
-
return;
|
|
467
|
-
}
|
|
468
|
-
} catch (r) {
|
|
469
|
-
throw C(!1, i()), F(), r;
|
|
470
|
-
}
|
|
471
|
-
C(s(), i()), F();
|
|
472
|
-
}, _());
|
|
473
|
-
}
|
|
474
|
-
const ie = /\.eval\.(js|ts)$/, se = /* @__PURE__ */ new Set([".git", "node_modules"]);
|
|
475
|
-
async function ot(t) {
|
|
476
|
-
const e = await st(t, { withFileTypes: !0 }), n = [];
|
|
477
|
-
return await Promise.all(
|
|
478
|
-
e.map(async (o) => {
|
|
479
|
-
const s = J(t, o.name);
|
|
480
|
-
if (o.isDirectory()) {
|
|
481
|
-
if (se.has(o.name))
|
|
482
|
-
return;
|
|
483
|
-
n.push(...await ot(s));
|
|
484
|
-
return;
|
|
485
|
-
}
|
|
486
|
-
o.isFile() && ie.test(o.name) && n.push(s);
|
|
487
|
-
})
|
|
488
|
-
), n;
|
|
489
|
-
}
|
|
490
|
-
const N = new URL("data:application/json;base64,ewogICJuYW1lIjogImthdHQiLAogICJ2ZXJzaW9uIjogIjAuMC4zIiwKICAiZGVzY3JpcHRpb24iOiAiQ0xJIHRvb2wgdGhhdCB0ZXN0cyB0aGUgb3V0cHV0IG9mIGFnZW50aWMgQUkgdG9vbHMiLAogICJrZXl3b3JkcyI6IFsKICAgICJjbGkiLAogICAgImFpIiwKICAgICJhZ2VudGljLWFpIiwKICAgICJ0ZXN0aW5nIiwKICAgICJldmFsdWF0aW9uIgogIF0sCiAgImF1dGhvciI6ICJSYXBoYWVsIFBvcnRvIChodHRwczovL2dpdGh1Yi5jb20vcmFwaGFlbHBvcikiLAogICJsaWNlbnNlIjogIk1JVCIsCiAgInR5cGUiOiAibW9kdWxlIiwKICAibWFpbiI6ICJkaXN0L2luZGV4LmpzIiwKICAiYmluIjogewogICAgImthdHQiOiAiZGlzdC9pbmRleC5qcyIKICB9LAogICJzY3JpcHRzIjogewogICAgImJ1aWxkIjogInZpdGUgYnVpbGQiLAogICAgImRldiI6ICJ0c3ggc3JjL2luZGV4LnRzIiwKICAgICJsaW50IjogImJpb21lIGxpbnQgLi9zcmMiLAogICAgImZvcm1hdCI6ICJiaW9tZSBmb3JtYXQgLS13cml0ZSAuL3NyYyIsCiAgICAidGVzdCI6ICJ2aXRlc3QiLAogICAgInR5cGVjaGVjayI6ICJ0c2MgLXAgdHNjb25maWcuanNvbiAtLW5vRW1pdCIsCiAgICAidGVzdDpidWlsZCI6ICJub2RlIC4vZGlzdC9pbmRleC5qcyIKICB9LAogICJ0eXBlcyI6ICJkaXN0L2luZGV4LmQudHMiLAogICJkZXZEZXBlbmRlbmNpZXMiOiB7CiAgICAiQGJpb21lanMvYmlvbWUiOiAiMS45LjQiLAogICAgIkB0eXBlcy9ub2RlIjogIjI1LjIuMCIsCiAgICAidHN4IjogIjQuMjEuMCIsCiAgICAidHlwZXNjcmlwdCI6ICI1LjguMiIsCiAgICAidml0ZSI6ICI3LjMuMSIsCiAgICAidml0ZS1wbHVnaW4tZHRzIjogIjQuNS40IiwKICAgICJ2aXRlc3QiOiAiMy4yLjQiLAogICAgInZzY29kZS1qc29ucnBjIjogIl44LjIuMSIKICB9LAogICJkZXBlbmRlbmNpZXMiOiB7CiAgICAiQGdpdGh1Yi9jb3BpbG90LXNkayI6ICJeMC4xLjIxIgogIH0sCiAgImJ1Z3MiOiB7CiAgICAidXJsIjogImh0dHBzOi8vZ2l0aHViLmNvbS9yYXBoYWVscG9yL2thdHQvaXNzdWVzIgogIH0sCiAgImhvbWVwYWdlIjogImh0dHBzOi8vZ2l0aHViLmNvbS9yYXBoYWVscG9yL2thdHQiCn0K", import.meta.url);
|
|
491
|
-
let A;
|
|
492
|
-
function re() {
|
|
493
|
-
if (A !== void 0)
|
|
494
|
-
return A;
|
|
495
|
-
try {
|
|
496
|
-
const t = N.protocol === "data:" ? ce(N) : V(ut(N), "utf8"), e = JSON.parse(t);
|
|
497
|
-
A = typeof e.version == "string" ? e.version : "unknown";
|
|
498
|
-
} catch {
|
|
499
|
-
A = "unknown";
|
|
500
|
-
}
|
|
501
|
-
return A;
|
|
502
|
-
}
|
|
503
|
-
function ce(t) {
|
|
504
|
-
const e = t.pathname.indexOf(",");
|
|
505
|
-
if (e < 0)
|
|
506
|
-
throw new Error("Invalid data URL.");
|
|
507
|
-
const n = t.pathname.slice(0, e), o = t.pathname.slice(e + 1);
|
|
508
|
-
return n.includes(";base64") ? Buffer.from(o, "base64").toString("utf8") : decodeURIComponent(o);
|
|
509
|
-
}
|
|
510
|
-
function ae() {
|
|
511
|
-
const t = " ██╗ ██╗ █████╗ ████████╗████████╗", e = " ██║ ██╔╝██╔══██╗╚══██╔══╝╚══██╔══╝", n = " █████╔╝ ███████║ ██║ ██║", o = " ██╔═██╗ ██╔══██║ ██║ ██║", s = " ██║ ██╗██║ ██║ ██║ ██║", i = " ╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═╝", r = `v${re()}`, c = Math.max(
|
|
512
|
-
0,
|
|
513
|
-
Math.floor((t.length - r.length) / 2)
|
|
514
|
-
), l = `${" ".repeat(c)}${r}`;
|
|
515
|
-
console.log(`
|
|
516
|
-
${$(t)}
|
|
517
|
-
${$(e)}
|
|
518
|
-
${$(n)}
|
|
519
|
-
${D(o)}
|
|
520
|
-
${D(s)}
|
|
521
|
-
${Nt(i)}
|
|
522
|
-
${$(l)}
|
|
523
|
-
`);
|
|
524
|
-
}
|
|
525
|
-
function le(t) {
|
|
526
|
-
const e = String(t.getHours()).padStart(2, "0"), n = String(t.getMinutes()).padStart(2, "0"), o = String(t.getSeconds()).padStart(2, "0");
|
|
527
|
-
return `${e}:${n}:${o}`;
|
|
528
|
-
}
|
|
529
|
-
async function ue() {
|
|
530
|
-
const t = process.argv.slice(2), e = t.includes("--update-snapshots") || t.includes("-u");
|
|
531
|
-
Kt(e), ae();
|
|
532
|
-
const n = /* @__PURE__ */ new Date();
|
|
533
|
-
Bt(), vt(), kt();
|
|
534
|
-
const o = await ot(process.cwd());
|
|
535
|
-
if (o.length === 0)
|
|
536
|
-
return console.log("No .eval.js or .eval.ts files found."), 1;
|
|
537
|
-
const i = (await Promise.allSettled(
|
|
538
|
-
o.map(
|
|
539
|
-
(a) => G.run(
|
|
540
|
-
{ evalFile: a },
|
|
541
|
-
() => import(dt(a).href)
|
|
542
|
-
)
|
|
543
|
-
)
|
|
544
|
-
)).map((a, d) => ({ result: a, file: o[d] })).filter(({ result: a }) => a.status === "rejected");
|
|
545
|
-
if (i.length > 0) {
|
|
546
|
-
for (const a of i) {
|
|
547
|
-
const d = a.result.status === "rejected" ? a.result.reason : void 0;
|
|
548
|
-
console.error(`Error executing ${a.file}: ${String(d)}`);
|
|
549
|
-
}
|
|
550
|
-
return 1;
|
|
551
|
-
}
|
|
552
|
-
const c = (await Lt()).filter(
|
|
553
|
-
(a) => a.status === "rejected"
|
|
554
|
-
);
|
|
555
|
-
if (c.length > 0) {
|
|
556
|
-
for (const a of c)
|
|
557
|
-
a.status === "rejected" && console.error(`Error executing async test: ${String(a.reason)}`);
|
|
558
|
-
return 1;
|
|
559
|
-
}
|
|
560
|
-
const l = yt();
|
|
561
|
-
if (l.length > 0) {
|
|
562
|
-
console.error("❌ Failed tests:");
|
|
563
|
-
for (const [a, d] of l.entries()) {
|
|
564
|
-
const p = [d.describePath, d.itPath].filter((S) => S.length > 0).join(" > "), b = p.length > 0 ? `${p}: ` : "";
|
|
565
|
-
console.error(`${a + 1}. ${b}${d.message}`);
|
|
566
|
-
}
|
|
567
|
-
return 1;
|
|
568
|
-
}
|
|
569
|
-
const I = $t(), T = Date.now() - n.getTime();
|
|
570
|
-
return console.log(
|
|
571
|
-
[
|
|
572
|
-
"---",
|
|
573
|
-
`${f("Files")} ${o.length} passed`,
|
|
574
|
-
`${f("Evals")} ${I} passed`,
|
|
575
|
-
`${f("Start at")} ${le(n)}`,
|
|
576
|
-
`${f("Duration")} ${T}ms`
|
|
577
|
-
].join(`
|
|
578
|
-
`)
|
|
579
|
-
), 0;
|
|
580
|
-
}
|
|
581
|
-
Object.assign(globalThis, { describe: jt, it: oe, expect: ne, prompt: j, promptFile: Pt });
|
|
582
|
-
ue().then((t) => {
|
|
583
|
-
process.exit(t);
|
|
584
|
-
}).catch((t) => {
|
|
585
|
-
console.error(`Unexpected error: ${String(t)}`), process.exit(1);
|
|
586
|
-
});
|
|
587
316
|
export {
|
|
588
|
-
|
|
317
|
+
Tt as describe,
|
|
318
|
+
bt as expect,
|
|
319
|
+
vt as it,
|
|
320
|
+
$ as prompt,
|
|
321
|
+
$t as promptFile,
|
|
322
|
+
St as runCli
|
|
589
323
|
};
|
package/dist/katt.js
ADDED
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
import { fileURLToPath as x, pathToFileURL as y } from "node:url";
|
|
2
|
+
import { readdir as X } from "node:fs/promises";
|
|
3
|
+
import { resolve as H } from "node:path";
|
|
4
|
+
import { AsyncLocalStorage as j } from "node:async_hooks";
|
|
5
|
+
import { readFileSync as M } from "node:fs";
|
|
6
|
+
const R = /\.eval\.(js|ts)$/, W = /* @__PURE__ */ new Set([".git", "node_modules"]);
|
|
7
|
+
async function v(t) {
|
|
8
|
+
const e = await X(t, { withFileTypes: !0 }), n = [];
|
|
9
|
+
return await Promise.all(
|
|
10
|
+
e.map(async (o) => {
|
|
11
|
+
const a = H(t, o.name);
|
|
12
|
+
if (o.isDirectory()) {
|
|
13
|
+
if (W.has(o.name))
|
|
14
|
+
return;
|
|
15
|
+
n.push(...await v(a));
|
|
16
|
+
return;
|
|
17
|
+
}
|
|
18
|
+
o.isFile() && R.test(o.name) && n.push(a);
|
|
19
|
+
})
|
|
20
|
+
), n;
|
|
21
|
+
}
|
|
22
|
+
const J = new j(), G = {
|
|
23
|
+
describeStack: [],
|
|
24
|
+
itStack: [],
|
|
25
|
+
tokenUsageStack: [],
|
|
26
|
+
modelStack: []
|
|
27
|
+
};
|
|
28
|
+
let L = 0, $ = 0;
|
|
29
|
+
const p = [], m = [];
|
|
30
|
+
let k = 0;
|
|
31
|
+
function s() {
|
|
32
|
+
return J.getStore() ?? G;
|
|
33
|
+
}
|
|
34
|
+
function B(t) {
|
|
35
|
+
return {
|
|
36
|
+
describeStack: [...t.describeStack],
|
|
37
|
+
itStack: [...t.itStack],
|
|
38
|
+
tokenUsageStack: [...t.tokenUsageStack],
|
|
39
|
+
modelStack: [...t.modelStack]
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
function U() {
|
|
43
|
+
return L += 1, `d${L}`;
|
|
44
|
+
}
|
|
45
|
+
function O() {
|
|
46
|
+
return $ += 1, `i${$}`;
|
|
47
|
+
}
|
|
48
|
+
function mt(t, e) {
|
|
49
|
+
const n = e ?? B(s());
|
|
50
|
+
return J.run(n, t);
|
|
51
|
+
}
|
|
52
|
+
function ht() {
|
|
53
|
+
return B(s());
|
|
54
|
+
}
|
|
55
|
+
function St(t) {
|
|
56
|
+
s().describeStack.push({ id: U(), description: t });
|
|
57
|
+
}
|
|
58
|
+
function At() {
|
|
59
|
+
s().describeStack.pop();
|
|
60
|
+
}
|
|
61
|
+
function Y() {
|
|
62
|
+
return s().describeStack.map((t) => t.description).join(" > ");
|
|
63
|
+
}
|
|
64
|
+
function kt(t) {
|
|
65
|
+
s().itStack.push({ id: O(), description: t }), s().tokenUsageStack.push(0), s().modelStack.push(void 0);
|
|
66
|
+
}
|
|
67
|
+
function bt() {
|
|
68
|
+
s().itStack.pop(), s().tokenUsageStack.pop(), s().modelStack.pop();
|
|
69
|
+
}
|
|
70
|
+
function Q() {
|
|
71
|
+
return s().itStack.map((t) => t.description).join(" > ");
|
|
72
|
+
}
|
|
73
|
+
function Lt(t) {
|
|
74
|
+
if (!Number.isFinite(t) || t <= 0)
|
|
75
|
+
return;
|
|
76
|
+
const e = s(), n = e.tokenUsageStack.length - 1;
|
|
77
|
+
n < 0 || (e.tokenUsageStack[n] += t);
|
|
78
|
+
}
|
|
79
|
+
function V() {
|
|
80
|
+
const t = s(), e = t.tokenUsageStack.length - 1;
|
|
81
|
+
return e < 0 ? 0 : t.tokenUsageStack[e] ?? 0;
|
|
82
|
+
}
|
|
83
|
+
function $t(t) {
|
|
84
|
+
if (t.length === 0)
|
|
85
|
+
return;
|
|
86
|
+
const e = s(), n = e.modelStack.length - 1;
|
|
87
|
+
n < 0 || (e.modelStack[n] = t);
|
|
88
|
+
}
|
|
89
|
+
function K() {
|
|
90
|
+
const t = s(), e = t.modelStack.length - 1;
|
|
91
|
+
if (!(e < 0))
|
|
92
|
+
return t.modelStack[e];
|
|
93
|
+
}
|
|
94
|
+
function wt(t) {
|
|
95
|
+
p.push(t);
|
|
96
|
+
}
|
|
97
|
+
function jt() {
|
|
98
|
+
k += 1;
|
|
99
|
+
}
|
|
100
|
+
function P() {
|
|
101
|
+
return k;
|
|
102
|
+
}
|
|
103
|
+
function z() {
|
|
104
|
+
k = 0;
|
|
105
|
+
}
|
|
106
|
+
function vt(t) {
|
|
107
|
+
m.push(t);
|
|
108
|
+
}
|
|
109
|
+
function E() {
|
|
110
|
+
return [...m];
|
|
111
|
+
}
|
|
112
|
+
function Jt() {
|
|
113
|
+
return m.length;
|
|
114
|
+
}
|
|
115
|
+
function D() {
|
|
116
|
+
m.length = 0;
|
|
117
|
+
}
|
|
118
|
+
async function _() {
|
|
119
|
+
const t = [];
|
|
120
|
+
for (; p.length > 0; ) {
|
|
121
|
+
const e = p.splice(0, p.length), n = await Promise.allSettled(e);
|
|
122
|
+
t.push(...n);
|
|
123
|
+
}
|
|
124
|
+
return t;
|
|
125
|
+
}
|
|
126
|
+
const q = "\x1B[1;36m", tt = "\x1B[33m", et = "\x1B[38;5;208m", nt = "\x1B[1;38;5;208m", h = "\x1B[0m";
|
|
127
|
+
function c(t) {
|
|
128
|
+
return `${q}${t}${h}`;
|
|
129
|
+
}
|
|
130
|
+
function f(t) {
|
|
131
|
+
return `${tt}${t}${h}`;
|
|
132
|
+
}
|
|
133
|
+
function w(t) {
|
|
134
|
+
return `${et}${t}${h}`;
|
|
135
|
+
}
|
|
136
|
+
function ot(t) {
|
|
137
|
+
return `${nt}${t}${h}`;
|
|
138
|
+
}
|
|
139
|
+
let A = "";
|
|
140
|
+
function it() {
|
|
141
|
+
A = "";
|
|
142
|
+
}
|
|
143
|
+
function st({
|
|
144
|
+
suitePath: t,
|
|
145
|
+
casePath: e,
|
|
146
|
+
didPass: n,
|
|
147
|
+
durationMs: o,
|
|
148
|
+
model: a,
|
|
149
|
+
tokenUsage: r
|
|
150
|
+
}) {
|
|
151
|
+
const l = t.length > 0 ? t : "(root)", g = e.length > 0 ? e : "(root)";
|
|
152
|
+
A !== l && (console.log(`Suite "${c(l)}"`), A = l);
|
|
153
|
+
const I = n ? "✅ Passed in" : "❌ Failed in", d = [
|
|
154
|
+
`Test "${c(g)}"`,
|
|
155
|
+
`- ${I} ${c(`${o}ms`)}`
|
|
156
|
+
];
|
|
157
|
+
a && d.push(`- Model ${c(a)}`), (r ?? 0) > 0 && d.push(`- Tokens used ${c(String(r))}`), d.push("---"), console.log(d.join(`
|
|
158
|
+
`));
|
|
159
|
+
}
|
|
160
|
+
function Bt(t, e, n = "(root)") {
|
|
161
|
+
const o = Q();
|
|
162
|
+
st({
|
|
163
|
+
suitePath: Y(),
|
|
164
|
+
casePath: o.length > 0 ? o : n,
|
|
165
|
+
didPass: t,
|
|
166
|
+
durationMs: e,
|
|
167
|
+
model: K(),
|
|
168
|
+
tokenUsage: V()
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
const at = new j(), S = new URL("data:application/json;base64,ewogICJuYW1lIjogImthdHQiLAogICJ2ZXJzaW9uIjogIjAuMC40IiwKICAiZGVzY3JpcHRpb24iOiAiQ0xJIHRvb2wgdGhhdCB0ZXN0cyB0aGUgb3V0cHV0IG9mIGFnZW50aWMgQUkgdG9vbHMiLAogICJrZXl3b3JkcyI6IFsKICAgICJjbGkiLAogICAgImFpIiwKICAgICJhZ2VudGljLWFpIiwKICAgICJ0ZXN0aW5nIiwKICAgICJldmFsdWF0aW9uIgogIF0sCiAgImF1dGhvciI6ICJSYXBoYWVsIFBvcnRvIChodHRwczovL2dpdGh1Yi5jb20vcmFwaGFlbHBvcikiLAogICJsaWNlbnNlIjogIk1JVCIsCiAgInR5cGUiOiAibW9kdWxlIiwKICAibWFpbiI6ICJkaXN0L2luZGV4LmpzIiwKICAiZXhwb3J0cyI6IHsKICAgICIuIjogewogICAgICAidHlwZXMiOiAiLi9kaXN0L2luZGV4LmQudHMiLAogICAgICAiaW1wb3J0IjogIi4vZGlzdC9pbmRleC5qcyIKICAgIH0KICB9LAogICJiaW4iOiB7CiAgICAia2F0dCI6ICJkaXN0L2thdHQuanMiCiAgfSwKICAic2NyaXB0cyI6IHsKICAgICJidWlsZCI6ICJ2aXRlIGJ1aWxkIiwKICAgICJkZXYiOiAidHN4IHNyYy9pbmRleC50cyIsCiAgICAibGludCI6ICJiaW9tZSBsaW50IC4vc3JjIiwKICAgICJmb3JtYXQiOiAiYmlvbWUgZm9ybWF0IC0td3JpdGUgLi9zcmMiLAogICAgInRlc3QiOiAidml0ZXN0IiwKICAgICJ0eXBlY2hlY2siOiAidHNjIC1wIHRzY29uZmlnLmpzb24gLS1ub0VtaXQiLAogICAgInRlc3Q6YnVpbGQiOiAibm9kZSAuL2Rpc3Qva2F0dC5qcyIKICB9LAogICJ0eXBlcyI6ICJkaXN0L2luZGV4LmQudHMiLAogICJkZXZEZXBlbmRlbmNpZXMiOiB7CiAgICAiQGJpb21lanMvYmlvbWUiOiAiMS45LjQiLAogICAgIkB0eXBlcy9ub2RlIjogIjI1LjIuMCIsCiAgICAidHN4IjogIjQuMjEuMCIsCiAgICAidHlwZXNjcmlwdCI6ICI1LjguMiIsCiAgICAidml0ZSI6ICI3LjMuMSIsCiAgICAidml0ZS1wbHVnaW4tZHRzIjogIjQuNS40IiwKICAgICJ2aXRlc3QiOiAiMy4yLjQiLAogICAgInZzY29kZS1qc29ucnBjIjogIl44LjIuMSIKICB9LAogICJkZXBlbmRlbmNpZXMiOiB7CiAgICAiQGdpdGh1Yi9jb3BpbG90LXNkayI6ICJeMC4xLjIxIgogIH0sCiAgImJ1Z3MiOiB7CiAgICAidXJsIjogImh0dHBzOi8vZ2l0aHViLmNvbS9yYXBoYWVscG9yL2thdHQvaXNzdWVzIgogIH0sCiAgImhvbWVwYWdlIjogImh0dHBzOi8vZ2l0aHViLmNvbS9yYXBoYWVscG9yL2thdHQiCn0K", import.meta.url);
|
|
172
|
+
let C;
|
|
173
|
+
function ct() {
|
|
174
|
+
if (C !== void 0)
|
|
175
|
+
return C;
|
|
176
|
+
try {
|
|
177
|
+
const t = S.protocol === "data:" ? rt(S) : M(x(S), "utf8"), e = JSON.parse(t);
|
|
178
|
+
C = typeof e.version == "string" ? e.version : "unknown";
|
|
179
|
+
} catch {
|
|
180
|
+
C = "unknown";
|
|
181
|
+
}
|
|
182
|
+
return C;
|
|
183
|
+
}
|
|
184
|
+
function rt(t) {
|
|
185
|
+
const e = t.pathname.indexOf(",");
|
|
186
|
+
if (e < 0)
|
|
187
|
+
throw new Error("Invalid data URL.");
|
|
188
|
+
const n = t.pathname.slice(0, e), o = t.pathname.slice(e + 1);
|
|
189
|
+
return n.includes(";base64") ? Buffer.from(o, "base64").toString("utf8") : decodeURIComponent(o);
|
|
190
|
+
}
|
|
191
|
+
function lt() {
|
|
192
|
+
const t = " ██╗ ██╗ █████╗ ████████╗████████╗", e = " ██║ ██╔╝██╔══██╗╚══██╔══╝╚══██╔══╝", n = " █████╔╝ ███████║ ██║ ██║", o = " ██╔═██╗ ██╔══██║ ██║ ██║", a = " ██║ ██╗██║ ██║ ██║ ██║", r = " ╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═╝", l = `v${ct()}`, g = Math.max(
|
|
193
|
+
0,
|
|
194
|
+
Math.floor((t.length - l.length) / 2)
|
|
195
|
+
), I = `${" ".repeat(g)}${l}`;
|
|
196
|
+
console.log(`
|
|
197
|
+
${f(t)}
|
|
198
|
+
${f(e)}
|
|
199
|
+
${f(n)}
|
|
200
|
+
${w(o)}
|
|
201
|
+
${w(a)}
|
|
202
|
+
${ot(r)}
|
|
203
|
+
${f(I)}
|
|
204
|
+
`);
|
|
205
|
+
}
|
|
206
|
+
let Z = !1;
|
|
207
|
+
function ut(t) {
|
|
208
|
+
Z = t;
|
|
209
|
+
}
|
|
210
|
+
function Zt() {
|
|
211
|
+
return Z;
|
|
212
|
+
}
|
|
213
|
+
function gt(t) {
|
|
214
|
+
const e = String(t.getHours()).padStart(2, "0"), n = String(t.getMinutes()).padStart(2, "0"), o = String(t.getSeconds()).padStart(2, "0");
|
|
215
|
+
return `${e}:${n}:${o}`;
|
|
216
|
+
}
|
|
217
|
+
async function Ft() {
|
|
218
|
+
const t = process.argv.slice(2), e = t.includes("--update-snapshots") || t.includes("-u");
|
|
219
|
+
ut(e), lt();
|
|
220
|
+
const n = /* @__PURE__ */ new Date();
|
|
221
|
+
it(), D(), z();
|
|
222
|
+
const o = await v(process.cwd());
|
|
223
|
+
if (o.length === 0)
|
|
224
|
+
return console.log("No .eval.js or .eval.ts files found."), 1;
|
|
225
|
+
const r = (await Promise.allSettled(
|
|
226
|
+
o.map(
|
|
227
|
+
(i) => at.run(
|
|
228
|
+
{ evalFile: i },
|
|
229
|
+
() => import(y(i).href)
|
|
230
|
+
)
|
|
231
|
+
)
|
|
232
|
+
)).map((i, u) => ({ result: i, file: o[u] })).filter(({ result: i }) => i.status === "rejected");
|
|
233
|
+
if (r.length > 0) {
|
|
234
|
+
for (const i of r) {
|
|
235
|
+
const u = i.result.status === "rejected" ? i.result.reason : void 0;
|
|
236
|
+
console.error(`Error executing ${i.file}: ${String(u)}`);
|
|
237
|
+
}
|
|
238
|
+
return 1;
|
|
239
|
+
}
|
|
240
|
+
const g = (await _()).filter(
|
|
241
|
+
(i) => i.status === "rejected"
|
|
242
|
+
);
|
|
243
|
+
if (g.length > 0) {
|
|
244
|
+
for (const i of g)
|
|
245
|
+
i.status === "rejected" && console.error(`Error executing async test: ${String(i.reason)}`);
|
|
246
|
+
return 1;
|
|
247
|
+
}
|
|
248
|
+
const I = E();
|
|
249
|
+
if (I.length > 0) {
|
|
250
|
+
console.error("❌ Failed tests:");
|
|
251
|
+
for (const [i, u] of I.entries()) {
|
|
252
|
+
const b = [u.describePath, u.itPath].filter((T) => T.length > 0).join(" > "), N = b.length > 0 ? `${b}: ` : "";
|
|
253
|
+
console.error(`${i + 1}. ${N}${u.message}`);
|
|
254
|
+
}
|
|
255
|
+
return 1;
|
|
256
|
+
}
|
|
257
|
+
const d = P(), F = Date.now() - n.getTime();
|
|
258
|
+
return console.log(
|
|
259
|
+
[
|
|
260
|
+
"---",
|
|
261
|
+
`${c("Files")} ${o.length} passed`,
|
|
262
|
+
`${c("Evals")} ${d} passed`,
|
|
263
|
+
`${c("Start at")} ${gt(n)}`,
|
|
264
|
+
`${c("Duration")} ${F}ms`
|
|
265
|
+
].join(`
|
|
266
|
+
`)
|
|
267
|
+
), 0;
|
|
268
|
+
}
|
|
269
|
+
export {
|
|
270
|
+
jt as a,
|
|
271
|
+
wt as b,
|
|
272
|
+
ht as c,
|
|
273
|
+
bt as d,
|
|
274
|
+
St as e,
|
|
275
|
+
At as f,
|
|
276
|
+
Jt as g,
|
|
277
|
+
Lt as h,
|
|
278
|
+
at as i,
|
|
279
|
+
vt as j,
|
|
280
|
+
Q as k,
|
|
281
|
+
Bt as l,
|
|
282
|
+
Y as m,
|
|
283
|
+
Zt as n,
|
|
284
|
+
Ft as o,
|
|
285
|
+
kt as p,
|
|
286
|
+
mt as r,
|
|
287
|
+
$t as s
|
|
288
|
+
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "katt",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.5",
|
|
4
4
|
"description": "CLI tool that tests the output of agentic AI tools",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"cli",
|
|
@@ -13,8 +13,14 @@
|
|
|
13
13
|
"license": "MIT",
|
|
14
14
|
"type": "module",
|
|
15
15
|
"main": "dist/index.js",
|
|
16
|
+
"exports": {
|
|
17
|
+
".": {
|
|
18
|
+
"types": "./dist/index.d.ts",
|
|
19
|
+
"import": "./dist/index.js"
|
|
20
|
+
}
|
|
21
|
+
},
|
|
16
22
|
"bin": {
|
|
17
|
-
"katt": "dist/
|
|
23
|
+
"katt": "dist/katt.js"
|
|
18
24
|
},
|
|
19
25
|
"scripts": {
|
|
20
26
|
"build": "vite build",
|
|
@@ -23,7 +29,7 @@
|
|
|
23
29
|
"format": "biome format --write ./src",
|
|
24
30
|
"test": "vitest",
|
|
25
31
|
"typecheck": "tsc -p tsconfig.json --noEmit",
|
|
26
|
-
"test:build": "node ./dist/
|
|
32
|
+
"test:build": "node ./dist/katt.js"
|
|
27
33
|
},
|
|
28
34
|
"types": "dist/index.d.ts",
|
|
29
35
|
"devDependencies": {
|