katt 0.0.4 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -4
- package/dist/index.js +194 -426
- package/dist/katt.js +7 -0
- package/dist/runCli-B3oIBxOl.js +317 -0
- package/package.json +9 -3
package/README.md
CHANGED
|
@@ -1,20 +1,44 @@
|
|
|
1
1
|
# Katt
|
|
2
|
+
[](https://github.com/raphaelpor/katt/blob/main/LICENSE) [](https://www.npmjs.com/package/katt)
|
|
2
3
|
|
|
3
4
|
Katt is a lightweight testing framework for running AI Evals, inspired by [Jest](https://github.com/jestjs/jest).
|
|
4
5
|
|
|
5
6
|
<img src="https://raw.githubusercontent.com/raphaelpor/katt/main/docs/logo.png" alt="Katt logo" width="250" />
|
|
6
7
|
|
|
8
|
+
## Table of Contents
|
|
9
|
+
|
|
10
|
+
- [Overview](#overview)
|
|
11
|
+
- [API Documentation](#api-documentation)
|
|
12
|
+
- [Hello World - Example](#hello-world---example)
|
|
13
|
+
- [Main Features](#main-features)
|
|
14
|
+
- [Usage](#usage)
|
|
15
|
+
- [Installation](#installation)
|
|
16
|
+
- [Basic Usage](#basic-usage)
|
|
17
|
+
- [Using promptFile](#using-promptfile)
|
|
18
|
+
- [Specifying AI Models](#specifying-ai-models)
|
|
19
|
+
- [Development](#development)
|
|
20
|
+
- [Setup](#setup)
|
|
21
|
+
- [Available Scripts](#available-scripts)
|
|
22
|
+
- [Verification Process](#verification-process)
|
|
23
|
+
- [Project Structure](#project-structure)
|
|
24
|
+
- [How It Works](#how-it-works)
|
|
25
|
+
- [Requirements](#requirements)
|
|
26
|
+
- [License](#license)
|
|
27
|
+
- [Contributing](#contributing)
|
|
28
|
+
|
|
7
29
|
## Overview
|
|
8
30
|
|
|
9
31
|
Katt is designed to evaluate and validate the behavior of AI agents like **Claude Code**, **GitHub Copilot**, **OpenAI Codex** and more. It provides a simple, intuitive API for writing tests that interact with AI models and assert their responses.
|
|
10
32
|
|
|
11
33
|
## API Documentation
|
|
12
34
|
|
|
13
|
-
For a complete list of features and usage examples, see [docs/api-documentation.md](docs/api-documentation.md).
|
|
35
|
+
For a complete list of features and usage examples, see [docs/api-documentation.md](https://github.com/raphaelpor/katt/blob/main/docs/api-documentation.md).
|
|
14
36
|
|
|
15
37
|
## Hello World - Example
|
|
16
38
|
|
|
17
39
|
```typescript
|
|
40
|
+
import { expect, prompt } from "katt";
|
|
41
|
+
|
|
18
42
|
const result = await prompt("If you read this just say 'hello world'");
|
|
19
43
|
expect(result).toContain("hello world");
|
|
20
44
|
```
|
|
@@ -22,6 +46,8 @@ expect(result).toContain("hello world");
|
|
|
22
46
|
It also supports the familiar `describe` and `it` syntax for organizing tests:
|
|
23
47
|
|
|
24
48
|
```typescript
|
|
49
|
+
import { describe, expect, it, prompt } from "katt";
|
|
50
|
+
|
|
25
51
|
describe("Greeting agent", () => {
|
|
26
52
|
it("should say hello world", async () => {
|
|
27
53
|
const result = await prompt("If you read this just say 'hello world'");
|
|
@@ -41,10 +67,18 @@ describe("Greeting agent", () => {
|
|
|
41
67
|
|
|
42
68
|
## Usage
|
|
43
69
|
|
|
70
|
+
### Installation
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
npm install -g katt
|
|
74
|
+
```
|
|
75
|
+
|
|
44
76
|
### Basic Usage
|
|
45
77
|
|
|
46
78
|
1. Create a file with the `.eval.ts` or `.eval.js` extension and write your tests.
|
|
47
79
|
```typescript
|
|
80
|
+
import { expect, prompt } from "katt";
|
|
81
|
+
|
|
48
82
|
const result = await prompt("If you read this just say 'hello world'");
|
|
49
83
|
expect(result).toContain("hello world");
|
|
50
84
|
```
|
|
@@ -52,7 +86,7 @@ expect(result).toContain("hello world");
|
|
|
52
86
|
2. Run Katt from your project directory:
|
|
53
87
|
|
|
54
88
|
```bash
|
|
55
|
-
|
|
89
|
+
katt
|
|
56
90
|
```
|
|
57
91
|
|
|
58
92
|
### Using promptFile
|
|
@@ -61,6 +95,8 @@ Load prompts from external files:
|
|
|
61
95
|
|
|
62
96
|
```javascript
|
|
63
97
|
// test.eval.js
|
|
98
|
+
import { describe, expect, it, promptFile } from "katt";
|
|
99
|
+
|
|
64
100
|
describe("Working with files", () => {
|
|
65
101
|
it("should load the file and respond", async () => {
|
|
66
102
|
const result = await promptFile("./myPrompt.md");
|
|
@@ -74,6 +110,8 @@ describe("Working with files", () => {
|
|
|
74
110
|
You can specify a custom model for your prompts:
|
|
75
111
|
|
|
76
112
|
```javascript
|
|
113
|
+
import { describe, expect, it, prompt } from "katt";
|
|
114
|
+
|
|
77
115
|
describe("Model selection", () => {
|
|
78
116
|
it("should use a specific model", async () => {
|
|
79
117
|
const promptString = "You are a helpful agent. Say hi and ask what you could help the user with.";
|
|
@@ -167,7 +205,7 @@ MIT
|
|
|
167
205
|
|
|
168
206
|
## Contributing
|
|
169
207
|
|
|
170
|
-
We welcome contributions from the community! Please see our [CONTRIBUTING.md](CONTRIBUTING.md) guide for detailed information on how to contribute to Katt.
|
|
208
|
+
We welcome contributions from the community! Please see our [CONTRIBUTING.md](https://github.com/raphaelpor/katt/blob/main/CONTRIBUTING.md) guide for detailed information on how to contribute to Katt.
|
|
171
209
|
|
|
172
210
|
Quick start:
|
|
173
211
|
1. Fork the repository
|
|
@@ -176,4 +214,4 @@ Quick start:
|
|
|
176
214
|
4. Run the verification process
|
|
177
215
|
5. Submit a pull request
|
|
178
216
|
|
|
179
|
-
For detailed guidelines, development setup, coding standards, and more, check out our [contribution guide](CONTRIBUTING.md).
|
|
217
|
+
For detailed guidelines, development setup, coding standards, and more, check out our [contribution guide](https://github.com/raphaelpor/katt/blob/main/CONTRIBUTING.md).
|
package/dist/index.js
CHANGED
|
@@ -1,183 +1,56 @@
|
|
|
1
|
-
|
|
2
|
-
import {
|
|
3
|
-
import { CopilotClient as
|
|
4
|
-
import { readFile as
|
|
5
|
-
import { resolve as
|
|
6
|
-
import { readFileSync as
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
}
|
|
31
|
-
function pt() {
|
|
32
|
-
return E += 1, `i${E}`;
|
|
33
|
-
}
|
|
34
|
-
function K(t, e) {
|
|
35
|
-
const n = e ?? Q(u());
|
|
36
|
-
return Y.run(n, t);
|
|
37
|
-
}
|
|
38
|
-
function _() {
|
|
39
|
-
return Q(u());
|
|
40
|
-
}
|
|
41
|
-
function ht(t) {
|
|
42
|
-
u().describeStack.push({ id: gt(), description: t });
|
|
43
|
-
}
|
|
44
|
-
function x() {
|
|
45
|
-
u().describeStack.pop();
|
|
46
|
-
}
|
|
47
|
-
function q() {
|
|
48
|
-
return u().describeStack.map((t) => t.description).join(" > ");
|
|
49
|
-
}
|
|
50
|
-
function It(t) {
|
|
51
|
-
u().itStack.push({ id: pt(), description: t }), u().tokenUsageStack.push(0), u().modelStack.push(void 0);
|
|
52
|
-
}
|
|
53
|
-
function F() {
|
|
54
|
-
u().itStack.pop(), u().tokenUsageStack.pop(), u().modelStack.pop();
|
|
55
|
-
}
|
|
56
|
-
function tt() {
|
|
57
|
-
return u().itStack.map((t) => t.description).join(" > ");
|
|
58
|
-
}
|
|
59
|
-
function mt(t) {
|
|
60
|
-
if (!Number.isFinite(t) || t <= 0)
|
|
61
|
-
return;
|
|
62
|
-
const e = u(), n = e.tokenUsageStack.length - 1;
|
|
63
|
-
n < 0 || (e.tokenUsageStack[n] += t);
|
|
64
|
-
}
|
|
65
|
-
function Ct() {
|
|
66
|
-
const t = u(), e = t.tokenUsageStack.length - 1;
|
|
67
|
-
return e < 0 ? 0 : t.tokenUsageStack[e] ?? 0;
|
|
68
|
-
}
|
|
69
|
-
function St(t) {
|
|
70
|
-
if (t.length === 0)
|
|
71
|
-
return;
|
|
72
|
-
const e = u(), n = e.modelStack.length - 1;
|
|
73
|
-
n < 0 || (e.modelStack[n] = t);
|
|
74
|
-
}
|
|
75
|
-
function At() {
|
|
76
|
-
const t = u(), e = t.modelStack.length - 1;
|
|
77
|
-
if (!(e < 0))
|
|
78
|
-
return t.modelStack[e];
|
|
79
|
-
}
|
|
80
|
-
function v(t) {
|
|
81
|
-
w.push(t);
|
|
82
|
-
}
|
|
83
|
-
function bt() {
|
|
84
|
-
U += 1;
|
|
85
|
-
}
|
|
86
|
-
function $t() {
|
|
87
|
-
return U;
|
|
88
|
-
}
|
|
89
|
-
function kt() {
|
|
90
|
-
U = 0;
|
|
91
|
-
}
|
|
92
|
-
function wt(t) {
|
|
93
|
-
y.push(t);
|
|
94
|
-
}
|
|
95
|
-
function yt() {
|
|
96
|
-
return [...y];
|
|
97
|
-
}
|
|
98
|
-
function O() {
|
|
99
|
-
return y.length;
|
|
100
|
-
}
|
|
101
|
-
function vt() {
|
|
102
|
-
y.length = 0;
|
|
103
|
-
}
|
|
104
|
-
async function Lt() {
|
|
105
|
-
const t = [];
|
|
106
|
-
for (; w.length > 0; ) {
|
|
107
|
-
const e = w.splice(0, w.length), n = await Promise.allSettled(e);
|
|
108
|
-
t.push(...n);
|
|
109
|
-
}
|
|
110
|
-
return t;
|
|
1
|
+
import { r as $, c as x, p as E, a as d, l, b as p, g as w, d as N, e as P, f as h, s as A, h as S, i as j, j as _, k as O, m as L, n as I, o as U, q as R } from "./runCli-B3oIBxOl.js";
|
|
2
|
+
import { t as xt } from "./runCli-B3oIBxOl.js";
|
|
3
|
+
import { CopilotClient as z } from "@github/copilot-sdk";
|
|
4
|
+
import { readFile as F } from "node:fs/promises";
|
|
5
|
+
import { resolve as k, dirname as g, isAbsolute as B, basename as W, join as K } from "node:path";
|
|
6
|
+
import { readFileSync as H, writeFileSync as C, mkdirSync as Y } from "node:fs";
|
|
7
|
+
function wt(t, e) {
|
|
8
|
+
$(() => {
|
|
9
|
+
N(), E(t);
|
|
10
|
+
const n = w(), o = Date.now(), r = () => w() === n, s = () => Date.now() - o;
|
|
11
|
+
try {
|
|
12
|
+
const i = e();
|
|
13
|
+
if (i && typeof i.then == "function") {
|
|
14
|
+
d(
|
|
15
|
+
i.then(() => {
|
|
16
|
+
l(!0, s());
|
|
17
|
+
}).catch((a) => {
|
|
18
|
+
throw l(!1, s()), a;
|
|
19
|
+
}).finally(() => {
|
|
20
|
+
p();
|
|
21
|
+
})
|
|
22
|
+
);
|
|
23
|
+
return;
|
|
24
|
+
}
|
|
25
|
+
} catch (i) {
|
|
26
|
+
throw l(!1, s()), p(), i;
|
|
27
|
+
}
|
|
28
|
+
l(r(), s()), p();
|
|
29
|
+
}, x());
|
|
111
30
|
}
|
|
112
|
-
function
|
|
113
|
-
|
|
114
|
-
|
|
31
|
+
function Ct(t, e) {
|
|
32
|
+
$(() => {
|
|
33
|
+
P(t);
|
|
115
34
|
try {
|
|
116
35
|
const n = e();
|
|
117
36
|
if (n && typeof n.then == "function") {
|
|
118
|
-
|
|
37
|
+
d(
|
|
119
38
|
n.finally(() => {
|
|
120
|
-
|
|
39
|
+
h();
|
|
121
40
|
})
|
|
122
41
|
);
|
|
123
42
|
return;
|
|
124
43
|
}
|
|
125
44
|
} catch (n) {
|
|
126
|
-
throw
|
|
45
|
+
throw h(), n;
|
|
127
46
|
}
|
|
128
|
-
|
|
129
|
-
},
|
|
130
|
-
}
|
|
131
|
-
const Tt = "\x1B[1;36m", xt = "\x1B[33m", Ft = "\x1B[38;5;208m", Mt = "\x1B[1;38;5;208m", L = "\x1B[0m";
|
|
132
|
-
function f(t) {
|
|
133
|
-
return `${Tt}${t}${L}`;
|
|
134
|
-
}
|
|
135
|
-
function $(t) {
|
|
136
|
-
return `${xt}${t}${L}`;
|
|
137
|
-
}
|
|
138
|
-
function D(t) {
|
|
139
|
-
return `${Ft}${t}${L}`;
|
|
140
|
-
}
|
|
141
|
-
function Nt(t) {
|
|
142
|
-
return `${Mt}${t}${L}`;
|
|
143
|
-
}
|
|
144
|
-
let B = "";
|
|
145
|
-
function Bt() {
|
|
146
|
-
B = "";
|
|
47
|
+
h();
|
|
48
|
+
}, x());
|
|
147
49
|
}
|
|
148
|
-
function
|
|
149
|
-
suitePath: t,
|
|
150
|
-
casePath: e,
|
|
151
|
-
didPass: n,
|
|
152
|
-
durationMs: o,
|
|
153
|
-
model: s,
|
|
154
|
-
tokenUsage: i
|
|
155
|
-
}) {
|
|
156
|
-
const r = t.length > 0 ? t : "(root)", c = e.length > 0 ? e : "(root)";
|
|
157
|
-
B !== r && (console.log(`Suite "${f(r)}"`), B = r);
|
|
158
|
-
const l = n ? "✅ Passed in" : "❌ Failed in", I = [
|
|
159
|
-
`Test "${f(c)}"`,
|
|
160
|
-
`- ${l} ${f(`${o}ms`)}`
|
|
161
|
-
];
|
|
162
|
-
s && I.push(`- Model ${f(s)}`), (i ?? 0) > 0 && I.push(`- Tokens used ${f(String(i))}`), I.push("---"), console.log(I.join(`
|
|
163
|
-
`));
|
|
164
|
-
}
|
|
165
|
-
function C(t, e, n = "(root)") {
|
|
166
|
-
const o = tt();
|
|
167
|
-
Jt({
|
|
168
|
-
suitePath: q(),
|
|
169
|
-
casePath: o.length > 0 ? o : n,
|
|
170
|
-
didPass: t,
|
|
171
|
-
durationMs: e,
|
|
172
|
-
model: At(),
|
|
173
|
-
tokenUsage: Ct()
|
|
174
|
-
});
|
|
175
|
-
}
|
|
176
|
-
const G = new z();
|
|
177
|
-
function Rt(t, e) {
|
|
50
|
+
function q(t, e) {
|
|
178
51
|
return typeof t == "object" && t !== null && "code" in t && t.code === e;
|
|
179
52
|
}
|
|
180
|
-
function
|
|
53
|
+
function J(t) {
|
|
181
54
|
try {
|
|
182
55
|
const e = JSON.parse(t);
|
|
183
56
|
return typeof e == "object" && e !== null ? e : void 0;
|
|
@@ -186,19 +59,19 @@ function Ut(t) {
|
|
|
186
59
|
return;
|
|
187
60
|
}
|
|
188
61
|
}
|
|
189
|
-
async function
|
|
190
|
-
const t =
|
|
62
|
+
async function G() {
|
|
63
|
+
const t = k(process.cwd(), "katt.json");
|
|
191
64
|
try {
|
|
192
|
-
const e = await
|
|
193
|
-
return
|
|
65
|
+
const e = await F(t, "utf8");
|
|
66
|
+
return J(e);
|
|
194
67
|
} catch (e) {
|
|
195
|
-
if (
|
|
68
|
+
if (q(e, "ENOENT"))
|
|
196
69
|
return;
|
|
197
70
|
console.warn(`Failed to read katt.json: ${String(e)}`);
|
|
198
71
|
return;
|
|
199
72
|
}
|
|
200
73
|
}
|
|
201
|
-
function
|
|
74
|
+
function Q(t) {
|
|
202
75
|
const e = t?.copilot;
|
|
203
76
|
if (typeof e != "object" || e === null || Array.isArray(e))
|
|
204
77
|
return;
|
|
@@ -207,112 +80,134 @@ function Wt(t) {
|
|
|
207
80
|
}, o = n.model;
|
|
208
81
|
return (typeof o != "string" || o.length === 0) && delete n.model, Object.keys(n).length > 0 ? n : void 0;
|
|
209
82
|
}
|
|
210
|
-
function
|
|
83
|
+
function V(t) {
|
|
211
84
|
if (!(typeof t != "number" || !Number.isFinite(t)) && !(t <= 0))
|
|
212
85
|
return Math.floor(t);
|
|
213
86
|
}
|
|
214
|
-
function
|
|
87
|
+
function X(t) {
|
|
215
88
|
const e = t?.prompt;
|
|
216
89
|
if (!(typeof e != "object" || e === null || Array.isArray(e)))
|
|
217
|
-
return
|
|
90
|
+
return V(e.timeoutMs);
|
|
218
91
|
}
|
|
219
|
-
async function
|
|
220
|
-
const t = await
|
|
92
|
+
async function Z() {
|
|
93
|
+
const t = await G();
|
|
221
94
|
return {
|
|
222
|
-
copilot:
|
|
223
|
-
promptTimeoutMs:
|
|
95
|
+
copilot: Q(t),
|
|
96
|
+
promptTimeoutMs: X(t)
|
|
224
97
|
};
|
|
225
98
|
}
|
|
226
|
-
const
|
|
227
|
-
function
|
|
99
|
+
const tt = 6e5;
|
|
100
|
+
function M(t) {
|
|
228
101
|
return typeof t == "string" && t.length > 0 ? t : void 0;
|
|
229
102
|
}
|
|
230
|
-
function
|
|
103
|
+
function m(t) {
|
|
231
104
|
if (!t)
|
|
232
105
|
return;
|
|
233
106
|
const e = { ...t };
|
|
234
107
|
if (e.model !== void 0) {
|
|
235
|
-
const n =
|
|
108
|
+
const n = M(e.model);
|
|
236
109
|
n ? e.model = n : delete e.model;
|
|
237
110
|
}
|
|
238
111
|
return Object.keys(e).length > 0 ? e : void 0;
|
|
239
112
|
}
|
|
240
|
-
function
|
|
113
|
+
function b(t) {
|
|
241
114
|
if (!(typeof t != "number" || !Number.isFinite(t)) && !(t <= 0))
|
|
242
115
|
return Math.floor(t);
|
|
243
116
|
}
|
|
244
|
-
function
|
|
117
|
+
function f(t) {
|
|
245
118
|
return !Number.isFinite(t) || (t ?? 0) <= 0 ? 0 : Math.floor(t ?? 0);
|
|
246
119
|
}
|
|
247
|
-
function
|
|
248
|
-
return
|
|
120
|
+
function et(t) {
|
|
121
|
+
return f(t.inputTokens) + f(t.outputTokens) + f(t.cacheReadTokens) + f(t.cacheWriteTokens);
|
|
249
122
|
}
|
|
250
|
-
|
|
251
|
-
const { timeoutMs: n, ...o } =
|
|
123
|
+
function nt(t, e) {
|
|
124
|
+
const { timeoutMs: n, ...o } = t, r = m(e.copilot), s = m(
|
|
252
125
|
o
|
|
253
|
-
),
|
|
254
|
-
...
|
|
255
|
-
...
|
|
256
|
-
}),
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
} finally {
|
|
267
|
-
const h = [];
|
|
268
|
-
if (b?.(), S > 0 && mt(S), p)
|
|
269
|
-
try {
|
|
270
|
-
await p.destroy();
|
|
271
|
-
} catch (m) {
|
|
272
|
-
h.push(m);
|
|
273
|
-
}
|
|
126
|
+
), i = m({
|
|
127
|
+
...r ?? {},
|
|
128
|
+
...s ?? {}
|
|
129
|
+
}), a = b(e.promptTimeoutMs), D = b(n) ?? a ?? tt;
|
|
130
|
+
return {
|
|
131
|
+
sessionOptions: i,
|
|
132
|
+
model: M(i?.model),
|
|
133
|
+
timeoutMs: D
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
async function ot(t, e) {
|
|
137
|
+
const n = [];
|
|
138
|
+
if (e.unsubscribeUsage?.(), e.usedTokens > 0 && j(e.usedTokens), e.session)
|
|
274
139
|
try {
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
h.push(m);
|
|
140
|
+
await e.session.destroy();
|
|
141
|
+
} catch (o) {
|
|
142
|
+
n.push(o);
|
|
279
143
|
}
|
|
280
|
-
|
|
281
|
-
|
|
144
|
+
try {
|
|
145
|
+
const o = await t.stop();
|
|
146
|
+
n.push(...o);
|
|
147
|
+
} catch (o) {
|
|
148
|
+
n.push(o);
|
|
149
|
+
}
|
|
150
|
+
n.length > 0 && console.error(
|
|
151
|
+
`Copilot cleanup encountered ${n.length} error(s).`
|
|
152
|
+
);
|
|
153
|
+
}
|
|
154
|
+
async function y(t, e = {}) {
|
|
155
|
+
const n = await Z(), o = nt(e, n), r = new z({ useLoggedInUser: !0 }), s = {
|
|
156
|
+
session: void 0,
|
|
157
|
+
unsubscribeUsage: void 0,
|
|
158
|
+
usedTokens: 0
|
|
159
|
+
};
|
|
160
|
+
try {
|
|
161
|
+
await r.start(), s.session = await r.createSession(
|
|
162
|
+
o.sessionOptions
|
|
163
|
+
), s.unsubscribeUsage = s.session.on(
|
|
164
|
+
"assistant.usage",
|
|
165
|
+
(a) => {
|
|
166
|
+
s.usedTokens += et(a.data);
|
|
167
|
+
}
|
|
168
|
+
);
|
|
169
|
+
const i = await s.session.sendAndWait(
|
|
170
|
+
{ prompt: t },
|
|
171
|
+
o.timeoutMs
|
|
282
172
|
);
|
|
173
|
+
if (!i?.data?.content)
|
|
174
|
+
throw new Error("Copilot did not return a response.");
|
|
175
|
+
return o.model && A(o.model), i.data.content;
|
|
176
|
+
} finally {
|
|
177
|
+
await ot(r, s);
|
|
283
178
|
}
|
|
284
179
|
}
|
|
285
|
-
async function
|
|
286
|
-
const n =
|
|
287
|
-
return
|
|
180
|
+
async function bt(t, e = {}) {
|
|
181
|
+
const n = S.getStore(), o = n?.evalFile ? g(n.evalFile) : process.cwd(), r = B(t) ? t : k(o, t), s = await F(r, "utf8");
|
|
182
|
+
return y(s, e);
|
|
288
183
|
}
|
|
289
|
-
function
|
|
290
|
-
|
|
291
|
-
describePath:
|
|
292
|
-
itPath:
|
|
184
|
+
function c(t) {
|
|
185
|
+
_({
|
|
186
|
+
describePath: L(),
|
|
187
|
+
itPath: O(),
|
|
293
188
|
message: t
|
|
294
189
|
});
|
|
295
190
|
}
|
|
296
|
-
async function
|
|
297
|
-
const n = Date.now(), o = `expected '${t}' to satisfy '${e}'`,
|
|
191
|
+
async function st(t, e) {
|
|
192
|
+
const n = Date.now(), o = `expected '${t}' to satisfy '${e}'`, r = y(`Evaluate if the expectation is fulfiled in by the input.
|
|
298
193
|
Expectation: "${e}".
|
|
299
194
|
Input:
|
|
300
195
|
---
|
|
301
196
|
${t}
|
|
302
197
|
---
|
|
303
198
|
Important: Answer with "Yes" or "No" only, without any additional text.
|
|
304
|
-
`).then((
|
|
305
|
-
|
|
199
|
+
`).then((s) => {
|
|
200
|
+
s.includes("Yes") ? l(!0, Date.now() - n, "promptCheck") : s.includes("No") ? c(o) : c(`failed to evaluate expectation '${e}'`);
|
|
306
201
|
});
|
|
307
|
-
return
|
|
202
|
+
return d(r), r;
|
|
308
203
|
}
|
|
309
|
-
const
|
|
310
|
-
function
|
|
204
|
+
const rt = 3;
|
|
205
|
+
function it(t) {
|
|
311
206
|
const e = t.match(/\b([1-5])\b/);
|
|
312
207
|
return !e || !e[1] ? null : Number(e[1]);
|
|
313
208
|
}
|
|
314
|
-
async function
|
|
315
|
-
const o = Date.now(),
|
|
209
|
+
async function at(t, e, n) {
|
|
210
|
+
const o = Date.now(), r = n?.threshold ?? rt, s = y(
|
|
316
211
|
`Classify the input by how "${e}" it is on a scale of 1 to 5.
|
|
317
212
|
1 means "not ${e}" and 5 means "very ${e}".
|
|
318
213
|
Return only a single number: 1, 2, 3, 4, or 5.
|
|
@@ -322,94 +217,99 @@ Input:
|
|
|
322
217
|
${t}
|
|
323
218
|
---`,
|
|
324
219
|
n?.model ? { model: n.model } : void 0
|
|
325
|
-
).then((
|
|
326
|
-
const
|
|
327
|
-
if (
|
|
328
|
-
|
|
329
|
-
`failed to classify as '${e}'. Evaluator returned '${
|
|
220
|
+
).then((i) => {
|
|
221
|
+
const a = it(i);
|
|
222
|
+
if (a === null) {
|
|
223
|
+
c(
|
|
224
|
+
`failed to classify as '${e}'. Evaluator returned '${i}'`
|
|
330
225
|
);
|
|
331
226
|
return;
|
|
332
227
|
}
|
|
333
|
-
const
|
|
334
|
-
if (
|
|
335
|
-
|
|
228
|
+
const u = `expected response to be classified as '${e}' with score >= ${r}, got ${a}`;
|
|
229
|
+
if (a < r) {
|
|
230
|
+
c(u);
|
|
336
231
|
return;
|
|
337
232
|
}
|
|
338
|
-
|
|
233
|
+
l(
|
|
339
234
|
!0,
|
|
340
235
|
Date.now() - o,
|
|
341
236
|
"toBeClassifiedAs"
|
|
342
237
|
);
|
|
343
238
|
});
|
|
344
|
-
return
|
|
239
|
+
return d(s), s;
|
|
345
240
|
}
|
|
346
|
-
function
|
|
241
|
+
function ct(t, e) {
|
|
347
242
|
const n = `expected '${t}' to include '${e}'`;
|
|
348
|
-
t.includes(e) ||
|
|
349
|
-
}
|
|
350
|
-
let nt = !1;
|
|
351
|
-
function Kt(t) {
|
|
352
|
-
nt = t;
|
|
353
|
-
}
|
|
354
|
-
function _t() {
|
|
355
|
-
return nt;
|
|
243
|
+
t.includes(e) || c(n);
|
|
356
244
|
}
|
|
357
|
-
function
|
|
358
|
-
const
|
|
359
|
-
return
|
|
360
|
-
|
|
245
|
+
function v(t) {
|
|
246
|
+
const e = t.trim().replace(/[<>:"/\\|?*\x00-\x1f]/g, "_").replace(/\s+/g, "_");
|
|
247
|
+
return e.length > 0 ? e : "unnamed";
|
|
248
|
+
}
|
|
249
|
+
function ut() {
|
|
250
|
+
const t = U().map(
|
|
251
|
+
(o) => v(o.description)
|
|
252
|
+
), e = R().map(
|
|
253
|
+
(o) => v(o.description)
|
|
254
|
+
), n = [...t, ...e];
|
|
255
|
+
return n.length === 0 ? "root" : n.join("__");
|
|
256
|
+
}
|
|
257
|
+
function lt(t) {
|
|
258
|
+
const n = W(t).replace(/\.eval\.[^./\\]+$/, ""), o = ut();
|
|
259
|
+
return K(
|
|
260
|
+
g(t),
|
|
361
261
|
"__snapshots__",
|
|
362
|
-
`${n}.snap.md`
|
|
262
|
+
`${n}__${o}.snap.md`
|
|
363
263
|
);
|
|
364
264
|
}
|
|
365
|
-
function
|
|
265
|
+
function T(t) {
|
|
366
266
|
return t.split(/\r?\n/);
|
|
367
267
|
}
|
|
368
|
-
function
|
|
268
|
+
function ft(t, e) {
|
|
369
269
|
if (t === e)
|
|
370
270
|
return " (no diff)";
|
|
371
|
-
const n =
|
|
372
|
-
for (let
|
|
373
|
-
const
|
|
374
|
-
if (
|
|
375
|
-
if (
|
|
376
|
-
|
|
271
|
+
const n = T(t), o = T(e), r = Math.max(n.length, o.length), s = [];
|
|
272
|
+
for (let i = 0; i < r; i += 1) {
|
|
273
|
+
const a = n[i], u = o[i];
|
|
274
|
+
if (a !== u) {
|
|
275
|
+
if (a === void 0 && u !== void 0) {
|
|
276
|
+
s.push(`+ ${u}`);
|
|
377
277
|
continue;
|
|
378
278
|
}
|
|
379
|
-
if (
|
|
380
|
-
|
|
279
|
+
if (a !== void 0 && u === void 0) {
|
|
280
|
+
s.push(`- ${a}`);
|
|
381
281
|
continue;
|
|
382
282
|
}
|
|
383
|
-
|
|
283
|
+
s.push(`- ${a ?? ""}`), s.push(`+ ${u ?? ""}`);
|
|
384
284
|
}
|
|
385
285
|
}
|
|
386
|
-
return
|
|
286
|
+
return s.join(`
|
|
387
287
|
`);
|
|
388
288
|
}
|
|
389
|
-
function
|
|
390
|
-
const e =
|
|
289
|
+
function dt(t) {
|
|
290
|
+
const e = S.getStore()?.evalFile;
|
|
391
291
|
if (!e) {
|
|
392
|
-
|
|
292
|
+
c(
|
|
393
293
|
"toMatchSnapshot can only be used while running an eval file."
|
|
394
294
|
);
|
|
395
295
|
return;
|
|
396
296
|
}
|
|
397
|
-
const n =
|
|
297
|
+
const n = lt(e);
|
|
398
298
|
try {
|
|
399
|
-
const o =
|
|
299
|
+
const o = H(n, "utf8");
|
|
400
300
|
if (o === t)
|
|
401
301
|
return;
|
|
402
|
-
if (
|
|
403
|
-
|
|
302
|
+
if (I()) {
|
|
303
|
+
C(n, t, "utf8");
|
|
404
304
|
return;
|
|
405
305
|
}
|
|
406
|
-
const
|
|
407
|
-
|
|
306
|
+
const r = ft(o, t);
|
|
307
|
+
c(
|
|
408
308
|
[
|
|
409
309
|
`Snapshot mismatch at ${n}`,
|
|
410
310
|
"",
|
|
411
311
|
"Diff:",
|
|
412
|
-
|
|
312
|
+
r,
|
|
413
313
|
"",
|
|
414
314
|
"Run katt with --update-snapshots (or -u) to accept this change."
|
|
415
315
|
].join(`
|
|
@@ -417,173 +317,41 @@ function ee(t) {
|
|
|
417
317
|
);
|
|
418
318
|
} catch (o) {
|
|
419
319
|
if (o.code !== "ENOENT") {
|
|
420
|
-
|
|
320
|
+
c(
|
|
421
321
|
`Failed to read snapshot at ${n}: ${String(o)}`
|
|
422
322
|
);
|
|
423
323
|
return;
|
|
424
324
|
}
|
|
425
325
|
try {
|
|
426
|
-
|
|
427
|
-
} catch (
|
|
428
|
-
|
|
429
|
-
`Failed to write snapshot at ${n}: ${String(
|
|
326
|
+
Y(g(n), { recursive: !0 }), C(n, t, "utf8");
|
|
327
|
+
} catch (s) {
|
|
328
|
+
c(
|
|
329
|
+
`Failed to write snapshot at ${n}: ${String(s)}`
|
|
430
330
|
);
|
|
431
331
|
}
|
|
432
332
|
}
|
|
433
333
|
}
|
|
434
|
-
function
|
|
334
|
+
function vt(t) {
|
|
435
335
|
return {
|
|
436
336
|
toContain: (e) => {
|
|
437
|
-
|
|
337
|
+
ct(t, e);
|
|
438
338
|
},
|
|
439
339
|
toMatchSnapshot: () => {
|
|
440
|
-
|
|
340
|
+
dt(t);
|
|
441
341
|
},
|
|
442
342
|
promptCheck: async (e) => {
|
|
443
|
-
await
|
|
343
|
+
await st(t, e);
|
|
444
344
|
},
|
|
445
345
|
toBeClassifiedAs: async (e, n) => {
|
|
446
|
-
await
|
|
346
|
+
await at(t, e, n);
|
|
447
347
|
}
|
|
448
348
|
};
|
|
449
349
|
}
|
|
450
|
-
function oe(t, e) {
|
|
451
|
-
K(() => {
|
|
452
|
-
bt(), It(t);
|
|
453
|
-
const n = O(), o = Date.now(), s = () => O() === n, i = () => Date.now() - o;
|
|
454
|
-
try {
|
|
455
|
-
const r = e();
|
|
456
|
-
if (r && typeof r.then == "function") {
|
|
457
|
-
v(
|
|
458
|
-
r.then(() => {
|
|
459
|
-
C(!0, i());
|
|
460
|
-
}).catch((c) => {
|
|
461
|
-
throw C(!1, i()), c;
|
|
462
|
-
}).finally(() => {
|
|
463
|
-
F();
|
|
464
|
-
})
|
|
465
|
-
);
|
|
466
|
-
return;
|
|
467
|
-
}
|
|
468
|
-
} catch (r) {
|
|
469
|
-
throw C(!1, i()), F(), r;
|
|
470
|
-
}
|
|
471
|
-
C(s(), i()), F();
|
|
472
|
-
}, _());
|
|
473
|
-
}
|
|
474
|
-
const ie = /\.eval\.(js|ts)$/, se = /* @__PURE__ */ new Set([".git", "node_modules"]);
|
|
475
|
-
async function ot(t) {
|
|
476
|
-
const e = await st(t, { withFileTypes: !0 }), n = [];
|
|
477
|
-
return await Promise.all(
|
|
478
|
-
e.map(async (o) => {
|
|
479
|
-
const s = J(t, o.name);
|
|
480
|
-
if (o.isDirectory()) {
|
|
481
|
-
if (se.has(o.name))
|
|
482
|
-
return;
|
|
483
|
-
n.push(...await ot(s));
|
|
484
|
-
return;
|
|
485
|
-
}
|
|
486
|
-
o.isFile() && ie.test(o.name) && n.push(s);
|
|
487
|
-
})
|
|
488
|
-
), n;
|
|
489
|
-
}
|
|
490
|
-
const N = new URL("data:application/json;base64,ewogICJuYW1lIjogImthdHQiLAogICJ2ZXJzaW9uIjogIjAuMC4zIiwKICAiZGVzY3JpcHRpb24iOiAiQ0xJIHRvb2wgdGhhdCB0ZXN0cyB0aGUgb3V0cHV0IG9mIGFnZW50aWMgQUkgdG9vbHMiLAogICJrZXl3b3JkcyI6IFsKICAgICJjbGkiLAogICAgImFpIiwKICAgICJhZ2VudGljLWFpIiwKICAgICJ0ZXN0aW5nIiwKICAgICJldmFsdWF0aW9uIgogIF0sCiAgImF1dGhvciI6ICJSYXBoYWVsIFBvcnRvIChodHRwczovL2dpdGh1Yi5jb20vcmFwaGFlbHBvcikiLAogICJsaWNlbnNlIjogIk1JVCIsCiAgInR5cGUiOiAibW9kdWxlIiwKICAibWFpbiI6ICJkaXN0L2luZGV4LmpzIiwKICAiYmluIjogewogICAgImthdHQiOiAiZGlzdC9pbmRleC5qcyIKICB9LAogICJzY3JpcHRzIjogewogICAgImJ1aWxkIjogInZpdGUgYnVpbGQiLAogICAgImRldiI6ICJ0c3ggc3JjL2luZGV4LnRzIiwKICAgICJsaW50IjogImJpb21lIGxpbnQgLi9zcmMiLAogICAgImZvcm1hdCI6ICJiaW9tZSBmb3JtYXQgLS13cml0ZSAuL3NyYyIsCiAgICAidGVzdCI6ICJ2aXRlc3QiLAogICAgInR5cGVjaGVjayI6ICJ0c2MgLXAgdHNjb25maWcuanNvbiAtLW5vRW1pdCIsCiAgICAidGVzdDpidWlsZCI6ICJub2RlIC4vZGlzdC9pbmRleC5qcyIKICB9LAogICJ0eXBlcyI6ICJkaXN0L2luZGV4LmQudHMiLAogICJkZXZEZXBlbmRlbmNpZXMiOiB7CiAgICAiQGJpb21lanMvYmlvbWUiOiAiMS45LjQiLAogICAgIkB0eXBlcy9ub2RlIjogIjI1LjIuMCIsCiAgICAidHN4IjogIjQuMjEuMCIsCiAgICAidHlwZXNjcmlwdCI6ICI1LjguMiIsCiAgICAidml0ZSI6ICI3LjMuMSIsCiAgICAidml0ZS1wbHVnaW4tZHRzIjogIjQuNS40IiwKICAgICJ2aXRlc3QiOiAiMy4yLjQiLAogICAgInZzY29kZS1qc29ucnBjIjogIl44LjIuMSIKICB9LAogICJkZXBlbmRlbmNpZXMiOiB7CiAgICAiQGdpdGh1Yi9jb3BpbG90LXNkayI6ICJeMC4xLjIxIgogIH0sCiAgImJ1Z3MiOiB7CiAgICAidXJsIjogImh0dHBzOi8vZ2l0aHViLmNvbS9yYXBoYWVscG9yL2thdHQvaXNzdWVzIgogIH0sCiAgImhvbWVwYWdlIjogImh0dHBzOi8vZ2l0aHViLmNvbS9yYXBoYWVscG9yL2thdHQiCn0K", import.meta.url);
|
|
491
|
-
let A;
|
|
492
|
-
function re() {
|
|
493
|
-
if (A !== void 0)
|
|
494
|
-
return A;
|
|
495
|
-
try {
|
|
496
|
-
const t = N.protocol === "data:" ? ce(N) : V(ut(N), "utf8"), e = JSON.parse(t);
|
|
497
|
-
A = typeof e.version == "string" ? e.version : "unknown";
|
|
498
|
-
} catch {
|
|
499
|
-
A = "unknown";
|
|
500
|
-
}
|
|
501
|
-
return A;
|
|
502
|
-
}
|
|
503
|
-
function ce(t) {
|
|
504
|
-
const e = t.pathname.indexOf(",");
|
|
505
|
-
if (e < 0)
|
|
506
|
-
throw new Error("Invalid data URL.");
|
|
507
|
-
const n = t.pathname.slice(0, e), o = t.pathname.slice(e + 1);
|
|
508
|
-
return n.includes(";base64") ? Buffer.from(o, "base64").toString("utf8") : decodeURIComponent(o);
|
|
509
|
-
}
|
|
510
|
-
function ae() {
|
|
511
|
-
const t = " ██╗ ██╗ █████╗ ████████╗████████╗", e = " ██║ ██╔╝██╔══██╗╚══██╔══╝╚══██╔══╝", n = " █████╔╝ ███████║ ██║ ██║", o = " ██╔═██╗ ██╔══██║ ██║ ██║", s = " ██║ ██╗██║ ██║ ██║ ██║", i = " ╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═╝", r = `v${re()}`, c = Math.max(
|
|
512
|
-
0,
|
|
513
|
-
Math.floor((t.length - r.length) / 2)
|
|
514
|
-
), l = `${" ".repeat(c)}${r}`;
|
|
515
|
-
console.log(`
|
|
516
|
-
${$(t)}
|
|
517
|
-
${$(e)}
|
|
518
|
-
${$(n)}
|
|
519
|
-
${D(o)}
|
|
520
|
-
${D(s)}
|
|
521
|
-
${Nt(i)}
|
|
522
|
-
${$(l)}
|
|
523
|
-
`);
|
|
524
|
-
}
|
|
525
|
-
function le(t) {
|
|
526
|
-
const e = String(t.getHours()).padStart(2, "0"), n = String(t.getMinutes()).padStart(2, "0"), o = String(t.getSeconds()).padStart(2, "0");
|
|
527
|
-
return `${e}:${n}:${o}`;
|
|
528
|
-
}
|
|
529
|
-
async function ue() {
|
|
530
|
-
const t = process.argv.slice(2), e = t.includes("--update-snapshots") || t.includes("-u");
|
|
531
|
-
Kt(e), ae();
|
|
532
|
-
const n = /* @__PURE__ */ new Date();
|
|
533
|
-
Bt(), vt(), kt();
|
|
534
|
-
const o = await ot(process.cwd());
|
|
535
|
-
if (o.length === 0)
|
|
536
|
-
return console.log("No .eval.js or .eval.ts files found."), 1;
|
|
537
|
-
const i = (await Promise.allSettled(
|
|
538
|
-
o.map(
|
|
539
|
-
(a) => G.run(
|
|
540
|
-
{ evalFile: a },
|
|
541
|
-
() => import(dt(a).href)
|
|
542
|
-
)
|
|
543
|
-
)
|
|
544
|
-
)).map((a, d) => ({ result: a, file: o[d] })).filter(({ result: a }) => a.status === "rejected");
|
|
545
|
-
if (i.length > 0) {
|
|
546
|
-
for (const a of i) {
|
|
547
|
-
const d = a.result.status === "rejected" ? a.result.reason : void 0;
|
|
548
|
-
console.error(`Error executing ${a.file}: ${String(d)}`);
|
|
549
|
-
}
|
|
550
|
-
return 1;
|
|
551
|
-
}
|
|
552
|
-
const c = (await Lt()).filter(
|
|
553
|
-
(a) => a.status === "rejected"
|
|
554
|
-
);
|
|
555
|
-
if (c.length > 0) {
|
|
556
|
-
for (const a of c)
|
|
557
|
-
a.status === "rejected" && console.error(`Error executing async test: ${String(a.reason)}`);
|
|
558
|
-
return 1;
|
|
559
|
-
}
|
|
560
|
-
const l = yt();
|
|
561
|
-
if (l.length > 0) {
|
|
562
|
-
console.error("❌ Failed tests:");
|
|
563
|
-
for (const [a, d] of l.entries()) {
|
|
564
|
-
const p = [d.describePath, d.itPath].filter((S) => S.length > 0).join(" > "), b = p.length > 0 ? `${p}: ` : "";
|
|
565
|
-
console.error(`${a + 1}. ${b}${d.message}`);
|
|
566
|
-
}
|
|
567
|
-
return 1;
|
|
568
|
-
}
|
|
569
|
-
const I = $t(), T = Date.now() - n.getTime();
|
|
570
|
-
return console.log(
|
|
571
|
-
[
|
|
572
|
-
"---",
|
|
573
|
-
`${f("Files")} ${o.length} passed`,
|
|
574
|
-
`${f("Evals")} ${I} passed`,
|
|
575
|
-
`${f("Start at")} ${le(n)}`,
|
|
576
|
-
`${f("Duration")} ${T}ms`
|
|
577
|
-
].join(`
|
|
578
|
-
`)
|
|
579
|
-
), 0;
|
|
580
|
-
}
|
|
581
|
-
Object.assign(globalThis, { describe: jt, it: oe, expect: ne, prompt: j, promptFile: Pt });
|
|
582
|
-
ue().then((t) => {
|
|
583
|
-
process.exit(t);
|
|
584
|
-
}).catch((t) => {
|
|
585
|
-
console.error(`Unexpected error: ${String(t)}`), process.exit(1);
|
|
586
|
-
});
|
|
587
350
|
export {
|
|
588
|
-
|
|
351
|
+
Ct as describe,
|
|
352
|
+
vt as expect,
|
|
353
|
+
wt as it,
|
|
354
|
+
y as prompt,
|
|
355
|
+
bt as promptFile,
|
|
356
|
+
xt as runCli
|
|
589
357
|
};
|
package/dist/katt.js
ADDED
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
import { fileURLToPath as Z, pathToFileURL as y } from "node:url";
|
|
2
|
+
import { readdir as N } from "node:fs/promises";
|
|
3
|
+
import { resolve as M } from "node:path";
|
|
4
|
+
import { AsyncLocalStorage as v } from "node:async_hooks";
|
|
5
|
+
import { readFileSync as X } from "node:fs";
|
|
6
|
+
const H = /\.eval\.(js|ts)$/, R = /* @__PURE__ */ new Set([".git", "node_modules"]);
|
|
7
|
+
async function w(t) {
|
|
8
|
+
const e = await N(t, { withFileTypes: !0 }), n = [];
|
|
9
|
+
return await Promise.all(
|
|
10
|
+
e.map(async (i) => {
|
|
11
|
+
const s = M(t, i.name);
|
|
12
|
+
if (i.isDirectory()) {
|
|
13
|
+
if (R.has(i.name))
|
|
14
|
+
return;
|
|
15
|
+
n.push(...await w(s));
|
|
16
|
+
return;
|
|
17
|
+
}
|
|
18
|
+
i.isFile() && H.test(i.name) && n.push(s);
|
|
19
|
+
})
|
|
20
|
+
), n;
|
|
21
|
+
}
|
|
22
|
+
const j = new v(), W = {
|
|
23
|
+
describeStack: [],
|
|
24
|
+
itStack: [],
|
|
25
|
+
tokenUsageStack: [],
|
|
26
|
+
modelStack: []
|
|
27
|
+
};
|
|
28
|
+
let b = 0, L = 0;
|
|
29
|
+
const p = [], f = [];
|
|
30
|
+
let k = 0;
|
|
31
|
+
function o() {
|
|
32
|
+
return j.getStore() ?? W;
|
|
33
|
+
}
|
|
34
|
+
function J(t) {
|
|
35
|
+
return {
|
|
36
|
+
describeStack: [...t.describeStack],
|
|
37
|
+
itStack: [...t.itStack],
|
|
38
|
+
tokenUsageStack: [...t.tokenUsageStack],
|
|
39
|
+
modelStack: [...t.modelStack]
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
function U() {
|
|
43
|
+
return b += 1, `d${b}`;
|
|
44
|
+
}
|
|
45
|
+
function G() {
|
|
46
|
+
return L += 1, `i${L}`;
|
|
47
|
+
}
|
|
48
|
+
function B(t) {
|
|
49
|
+
return t.tokenUsageStack.length - 1;
|
|
50
|
+
}
|
|
51
|
+
function F(t) {
|
|
52
|
+
return t.modelStack.length - 1;
|
|
53
|
+
}
|
|
54
|
+
function bt(t, e) {
|
|
55
|
+
const n = e ?? J(o());
|
|
56
|
+
return j.run(n, t);
|
|
57
|
+
}
|
|
58
|
+
function Lt() {
|
|
59
|
+
return J(o());
|
|
60
|
+
}
|
|
61
|
+
function $t(t) {
|
|
62
|
+
o().describeStack.push({ id: U(), description: t });
|
|
63
|
+
}
|
|
64
|
+
function vt() {
|
|
65
|
+
o().describeStack.pop();
|
|
66
|
+
}
|
|
67
|
+
function O() {
|
|
68
|
+
return o().describeStack.map((t) => t.description).join(" > ");
|
|
69
|
+
}
|
|
70
|
+
function wt() {
|
|
71
|
+
return [...o().describeStack];
|
|
72
|
+
}
|
|
73
|
+
function jt(t) {
|
|
74
|
+
const e = o();
|
|
75
|
+
e.itStack.push({ id: G(), description: t }), e.tokenUsageStack.push(0), e.modelStack.push(void 0);
|
|
76
|
+
}
|
|
77
|
+
function Jt() {
|
|
78
|
+
const t = o();
|
|
79
|
+
t.itStack.pop(), t.tokenUsageStack.pop(), t.modelStack.pop();
|
|
80
|
+
}
|
|
81
|
+
function Y() {
|
|
82
|
+
return o().itStack.map((t) => t.description).join(" > ");
|
|
83
|
+
}
|
|
84
|
+
function Bt() {
|
|
85
|
+
return [...o().itStack];
|
|
86
|
+
}
|
|
87
|
+
function Ft(t) {
|
|
88
|
+
if (!Number.isFinite(t) || t <= 0)
|
|
89
|
+
return;
|
|
90
|
+
const e = o(), n = B(e);
|
|
91
|
+
n < 0 || (e.tokenUsageStack[n] += t);
|
|
92
|
+
}
|
|
93
|
+
function Q() {
|
|
94
|
+
const t = o(), e = B(t);
|
|
95
|
+
return e < 0 ? 0 : t.tokenUsageStack[e] ?? 0;
|
|
96
|
+
}
|
|
97
|
+
function xt(t) {
|
|
98
|
+
if (t.length === 0)
|
|
99
|
+
return;
|
|
100
|
+
const e = o(), n = F(e);
|
|
101
|
+
n < 0 || (e.modelStack[n] = t);
|
|
102
|
+
}
|
|
103
|
+
function V() {
|
|
104
|
+
const t = o(), e = F(t);
|
|
105
|
+
if (!(e < 0))
|
|
106
|
+
return t.modelStack[e];
|
|
107
|
+
}
|
|
108
|
+
function Tt(t) {
|
|
109
|
+
p.push(t);
|
|
110
|
+
}
|
|
111
|
+
function Zt() {
|
|
112
|
+
k += 1;
|
|
113
|
+
}
|
|
114
|
+
function K() {
|
|
115
|
+
return k;
|
|
116
|
+
}
|
|
117
|
+
function P() {
|
|
118
|
+
k = 0;
|
|
119
|
+
}
|
|
120
|
+
function yt(t) {
|
|
121
|
+
f.push(t);
|
|
122
|
+
}
|
|
123
|
+
function E() {
|
|
124
|
+
return [...f];
|
|
125
|
+
}
|
|
126
|
+
function Nt() {
|
|
127
|
+
return f.length;
|
|
128
|
+
}
|
|
129
|
+
function z() {
|
|
130
|
+
f.length = 0;
|
|
131
|
+
}
|
|
132
|
+
async function D() {
|
|
133
|
+
const t = [];
|
|
134
|
+
for (; p.length > 0; ) {
|
|
135
|
+
const e = p.splice(0, p.length), n = await Promise.allSettled(e);
|
|
136
|
+
t.push(...n);
|
|
137
|
+
}
|
|
138
|
+
return t;
|
|
139
|
+
}
|
|
140
|
+
const _ = "\x1B[1;36m", q = "\x1B[33m", tt = "\x1B[38;5;208m", et = "\x1B[1;38;5;208m", m = "\x1B[0m";
|
|
141
|
+
function r(t) {
|
|
142
|
+
return `${_}${t}${m}`;
|
|
143
|
+
}
|
|
144
|
+
function C(t) {
|
|
145
|
+
return `${q}${t}${m}`;
|
|
146
|
+
}
|
|
147
|
+
function $(t) {
|
|
148
|
+
return `${tt}${t}${m}`;
|
|
149
|
+
}
|
|
150
|
+
function nt(t) {
|
|
151
|
+
return `${et}${t}${m}`;
|
|
152
|
+
}
|
|
153
|
+
let A = "";
|
|
154
|
+
function it() {
|
|
155
|
+
A = "";
|
|
156
|
+
}
|
|
157
|
+
function ot({
|
|
158
|
+
suitePath: t,
|
|
159
|
+
casePath: e,
|
|
160
|
+
didPass: n,
|
|
161
|
+
durationMs: i,
|
|
162
|
+
model: s,
|
|
163
|
+
tokenUsage: c
|
|
164
|
+
}) {
|
|
165
|
+
const l = t.length > 0 ? t : "(root)", u = e.length > 0 ? e : "(root)";
|
|
166
|
+
A !== l && (console.log(`Suite "${r(l)}"`), A = l);
|
|
167
|
+
const g = n ? "✅ Passed in" : "❌ Failed in", I = [
|
|
168
|
+
`Test "${r(u)}"`,
|
|
169
|
+
`- ${g} ${r(`${i}ms`)}`
|
|
170
|
+
];
|
|
171
|
+
s && I.push(`- Model ${r(s)}`), (c ?? 0) > 0 && I.push(`- Tokens used ${r(String(c))}`), I.push("---"), console.log(I.join(`
|
|
172
|
+
`));
|
|
173
|
+
}
|
|
174
|
+
function Mt(t, e, n = "(root)") {
|
|
175
|
+
const i = Y();
|
|
176
|
+
ot({
|
|
177
|
+
suitePath: O(),
|
|
178
|
+
casePath: i.length > 0 ? i : n,
|
|
179
|
+
didPass: t,
|
|
180
|
+
durationMs: e,
|
|
181
|
+
model: V(),
|
|
182
|
+
tokenUsage: Q()
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
const st = new v(), h = new URL("data:application/json;base64,ewogICJuYW1lIjogImthdHQiLAogICJ2ZXJzaW9uIjogIjAuMC41IiwKICAiZGVzY3JpcHRpb24iOiAiQ0xJIHRvb2wgdGhhdCB0ZXN0cyB0aGUgb3V0cHV0IG9mIGFnZW50aWMgQUkgdG9vbHMiLAogICJrZXl3b3JkcyI6IFsKICAgICJjbGkiLAogICAgImFpIiwKICAgICJhZ2VudGljLWFpIiwKICAgICJ0ZXN0aW5nIiwKICAgICJldmFsdWF0aW9uIgogIF0sCiAgImF1dGhvciI6ICJSYXBoYWVsIFBvcnRvIChodHRwczovL2dpdGh1Yi5jb20vcmFwaGFlbHBvcikiLAogICJsaWNlbnNlIjogIk1JVCIsCiAgInR5cGUiOiAibW9kdWxlIiwKICAibWFpbiI6ICJkaXN0L2luZGV4LmpzIiwKICAiZXhwb3J0cyI6IHsKICAgICIuIjogewogICAgICAidHlwZXMiOiAiLi9kaXN0L2luZGV4LmQudHMiLAogICAgICAiaW1wb3J0IjogIi4vZGlzdC9pbmRleC5qcyIKICAgIH0KICB9LAogICJiaW4iOiB7CiAgICAia2F0dCI6ICJkaXN0L2thdHQuanMiCiAgfSwKICAic2NyaXB0cyI6IHsKICAgICJidWlsZCI6ICJ2aXRlIGJ1aWxkIiwKICAgICJkZXYiOiAidHN4IHNyYy9pbmRleC50cyIsCiAgICAibGludCI6ICJiaW9tZSBsaW50IC4vc3JjIiwKICAgICJmb3JtYXQiOiAiYmlvbWUgZm9ybWF0IC0td3JpdGUgLi9zcmMiLAogICAgInRlc3QiOiAidml0ZXN0IiwKICAgICJ0eXBlY2hlY2siOiAidHNjIC1wIHRzY29uZmlnLmpzb24gLS1ub0VtaXQiLAogICAgInRlc3Q6YnVpbGQiOiAibm9kZSAuL2Rpc3Qva2F0dC5qcyIKICB9LAogICJ0eXBlcyI6ICJkaXN0L2luZGV4LmQudHMiLAogICJkZXZEZXBlbmRlbmNpZXMiOiB7CiAgICAiQGJpb21lanMvYmlvbWUiOiAiMS45LjQiLAogICAgIkB0eXBlcy9ub2RlIjogIjI1LjIuMCIsCiAgICAidHN4IjogIjQuMjEuMCIsCiAgICAidHlwZXNjcmlwdCI6ICI1LjguMiIsCiAgICAidml0ZSI6ICI3LjMuMSIsCiAgICAidml0ZS1wbHVnaW4tZHRzIjogIjQuNS40IiwKICAgICJ2aXRlc3QiOiAiMy4yLjQiLAogICAgInZzY29kZS1qc29ucnBjIjogIl44LjIuMSIKICB9LAogICJkZXBlbmRlbmNpZXMiOiB7CiAgICAiQGdpdGh1Yi9jb3BpbG90LXNkayI6ICJeMC4xLjIxIgogIH0sCiAgImJ1Z3MiOiB7CiAgICAidXJsIjogImh0dHBzOi8vZ2l0aHViLmNvbS9yYXBoYWVscG9yL2thdHQvaXNzdWVzIgogIH0sCiAgImhvbWVwYWdlIjogImh0dHBzOi8vZ2l0aHViLmNvbS9yYXBoYWVscG9yL2thdHQiCn0K", import.meta.url);
|
|
186
|
+
let d;
|
|
187
|
+
function at() {
|
|
188
|
+
if (d !== void 0)
|
|
189
|
+
return d;
|
|
190
|
+
try {
|
|
191
|
+
const t = h.protocol === "data:" ? rt(h) : X(Z(h), "utf8"), e = JSON.parse(t);
|
|
192
|
+
d = typeof e.version == "string" ? e.version : "unknown";
|
|
193
|
+
} catch {
|
|
194
|
+
d = "unknown";
|
|
195
|
+
}
|
|
196
|
+
return d;
|
|
197
|
+
}
|
|
198
|
+
function rt(t) {
|
|
199
|
+
const e = t.pathname.indexOf(",");
|
|
200
|
+
if (e < 0)
|
|
201
|
+
throw new Error("Invalid data URL.");
|
|
202
|
+
const n = t.pathname.slice(0, e), i = t.pathname.slice(e + 1);
|
|
203
|
+
return n.includes(";base64") ? Buffer.from(i, "base64").toString("utf8") : decodeURIComponent(i);
|
|
204
|
+
}
|
|
205
|
+
function ct() {
|
|
206
|
+
const t = " ██╗ ██╗ █████╗ ████████╗████████╗", e = " ██║ ██╔╝██╔══██╗╚══██╔══╝╚══██╔══╝", n = " █████╔╝ ███████║ ██║ ██║", i = " ██╔═██╗ ██╔══██║ ██║ ██║", s = " ██║ ██╗██║ ██║ ██║ ██║", c = " ╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═╝", l = `v${at()}`, u = Math.max(
|
|
207
|
+
0,
|
|
208
|
+
Math.floor((t.length - l.length) / 2)
|
|
209
|
+
), g = `${" ".repeat(u)}${l}`;
|
|
210
|
+
console.log(`
|
|
211
|
+
${C(t)}
|
|
212
|
+
${C(e)}
|
|
213
|
+
${C(n)}
|
|
214
|
+
${$(i)}
|
|
215
|
+
${$(s)}
|
|
216
|
+
${nt(c)}
|
|
217
|
+
${C(g)}
|
|
218
|
+
`);
|
|
219
|
+
}
|
|
220
|
+
let x = !1;
|
|
221
|
+
function lt(t) {
|
|
222
|
+
x = t;
|
|
223
|
+
}
|
|
224
|
+
function Xt() {
|
|
225
|
+
return x;
|
|
226
|
+
}
|
|
227
|
+
function ut(t) {
|
|
228
|
+
const e = String(t.getHours()).padStart(2, "0"), n = String(t.getMinutes()).padStart(2, "0"), i = String(t.getSeconds()).padStart(2, "0");
|
|
229
|
+
return `${e}:${n}:${i}`;
|
|
230
|
+
}
|
|
231
|
+
function gt(t) {
|
|
232
|
+
return t.includes("--update-snapshots") || t.includes("-u");
|
|
233
|
+
}
|
|
234
|
+
function It(t) {
|
|
235
|
+
return [t.describePath, t.itPath].filter((e) => e.length > 0).join(" > ");
|
|
236
|
+
}
|
|
237
|
+
function dt(t) {
|
|
238
|
+
for (const e of t)
|
|
239
|
+
console.error(`Error executing ${e.file}: ${String(e.reason)}`);
|
|
240
|
+
}
|
|
241
|
+
function Ct(t) {
|
|
242
|
+
for (const e of t)
|
|
243
|
+
console.error(`Error executing async test: ${String(e.reason)}`);
|
|
244
|
+
}
|
|
245
|
+
function pt(t) {
|
|
246
|
+
console.error("❌ Failed tests:");
|
|
247
|
+
for (const [e, n] of t.entries()) {
|
|
248
|
+
const i = It(n), s = i.length > 0 ? `${i}: ` : "";
|
|
249
|
+
console.error(`${e + 1}. ${s}${n.message}`);
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
function ft(t, e, n, i) {
|
|
253
|
+
return [
|
|
254
|
+
"---",
|
|
255
|
+
`${r("Files")} ${t} passed`,
|
|
256
|
+
`${r("Evals")} ${e} passed`,
|
|
257
|
+
`${r("Start at")} ${ut(n)}`,
|
|
258
|
+
`${r("Duration")} ${i}ms`
|
|
259
|
+
].join(`
|
|
260
|
+
`);
|
|
261
|
+
}
|
|
262
|
+
async function Ht() {
|
|
263
|
+
const t = process.argv.slice(2), e = gt(t);
|
|
264
|
+
lt(e), ct();
|
|
265
|
+
const n = /* @__PURE__ */ new Date();
|
|
266
|
+
it(), z(), P();
|
|
267
|
+
const i = await w(process.cwd());
|
|
268
|
+
if (i.length === 0)
|
|
269
|
+
return console.log("No .eval.js or .eval.ts files found."), 1;
|
|
270
|
+
const c = (await Promise.allSettled(
|
|
271
|
+
i.map(
|
|
272
|
+
(a) => st.run(
|
|
273
|
+
{ evalFile: a },
|
|
274
|
+
() => import(y(a).href)
|
|
275
|
+
)
|
|
276
|
+
)
|
|
277
|
+
)).map((a, S) => ({ result: a, file: i[S] })).filter(({ result: a }) => a.status === "rejected").map(({ result: a, file: S }) => ({
|
|
278
|
+
file: S,
|
|
279
|
+
reason: a.status === "rejected" ? a.reason : void 0
|
|
280
|
+
}));
|
|
281
|
+
if (c.length > 0)
|
|
282
|
+
return dt(c), 1;
|
|
283
|
+
const u = (await D()).filter(
|
|
284
|
+
(a) => a.status === "rejected"
|
|
285
|
+
);
|
|
286
|
+
if (u.length > 0)
|
|
287
|
+
return Ct(u), 1;
|
|
288
|
+
const g = E();
|
|
289
|
+
if (g.length > 0)
|
|
290
|
+
return pt(g), 1;
|
|
291
|
+
const I = K(), T = Date.now() - n.getTime();
|
|
292
|
+
return console.log(
|
|
293
|
+
ft(i.length, I, n, T)
|
|
294
|
+
), 0;
|
|
295
|
+
}
|
|
296
|
+
export {
|
|
297
|
+
Tt as a,
|
|
298
|
+
Jt as b,
|
|
299
|
+
Lt as c,
|
|
300
|
+
Zt as d,
|
|
301
|
+
$t as e,
|
|
302
|
+
vt as f,
|
|
303
|
+
Nt as g,
|
|
304
|
+
st as h,
|
|
305
|
+
Ft as i,
|
|
306
|
+
yt as j,
|
|
307
|
+
Y as k,
|
|
308
|
+
Mt as l,
|
|
309
|
+
O as m,
|
|
310
|
+
Xt as n,
|
|
311
|
+
wt as o,
|
|
312
|
+
jt as p,
|
|
313
|
+
Bt as q,
|
|
314
|
+
bt as r,
|
|
315
|
+
xt as s,
|
|
316
|
+
Ht as t
|
|
317
|
+
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "katt",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.6",
|
|
4
4
|
"description": "CLI tool that tests the output of agentic AI tools",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"cli",
|
|
@@ -13,8 +13,14 @@
|
|
|
13
13
|
"license": "MIT",
|
|
14
14
|
"type": "module",
|
|
15
15
|
"main": "dist/index.js",
|
|
16
|
+
"exports": {
|
|
17
|
+
".": {
|
|
18
|
+
"types": "./dist/index.d.ts",
|
|
19
|
+
"import": "./dist/index.js"
|
|
20
|
+
}
|
|
21
|
+
},
|
|
16
22
|
"bin": {
|
|
17
|
-
"katt": "dist/
|
|
23
|
+
"katt": "dist/katt.js"
|
|
18
24
|
},
|
|
19
25
|
"scripts": {
|
|
20
26
|
"build": "vite build",
|
|
@@ -23,7 +29,7 @@
|
|
|
23
29
|
"format": "biome format --write ./src",
|
|
24
30
|
"test": "vitest",
|
|
25
31
|
"typecheck": "tsc -p tsconfig.json --noEmit",
|
|
26
|
-
"test:build": "node ./dist/
|
|
32
|
+
"test:build": "node ./dist/katt.js"
|
|
27
33
|
},
|
|
28
34
|
"types": "dist/index.d.ts",
|
|
29
35
|
"devDependencies": {
|