katt 0.0.3 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -4
- package/dist/index.js +177 -420
- package/dist/katt.js +7 -0
- package/dist/runCli-CDRmZ5hw.js +288 -0
- package/package.json +9 -3
package/README.md
CHANGED
|
@@ -1,20 +1,44 @@
|
|
|
1
1
|
# Katt
|
|
2
|
+
[](https://github.com/raphaelpor/katt/blob/main/LICENSE) [](https://www.npmjs.com/package/katt)
|
|
2
3
|
|
|
3
4
|
Katt is a lightweight testing framework for running AI Evals, inspired by [Jest](https://github.com/jestjs/jest).
|
|
4
5
|
|
|
5
6
|
<img src="https://raw.githubusercontent.com/raphaelpor/katt/main/docs/logo.png" alt="Katt logo" width="250" />
|
|
6
7
|
|
|
8
|
+
## Table of Contents
|
|
9
|
+
|
|
10
|
+
- [Overview](#overview)
|
|
11
|
+
- [API Documentation](#api-documentation)
|
|
12
|
+
- [Hello World - Example](#hello-world---example)
|
|
13
|
+
- [Main Features](#main-features)
|
|
14
|
+
- [Usage](#usage)
|
|
15
|
+
- [Installation](#installation)
|
|
16
|
+
- [Basic Usage](#basic-usage)
|
|
17
|
+
- [Using promptFile](#using-promptfile)
|
|
18
|
+
- [Specifying AI Models](#specifying-ai-models)
|
|
19
|
+
- [Development](#development)
|
|
20
|
+
- [Setup](#setup)
|
|
21
|
+
- [Available Scripts](#available-scripts)
|
|
22
|
+
- [Verification Process](#verification-process)
|
|
23
|
+
- [Project Structure](#project-structure)
|
|
24
|
+
- [How It Works](#how-it-works)
|
|
25
|
+
- [Requirements](#requirements)
|
|
26
|
+
- [License](#license)
|
|
27
|
+
- [Contributing](#contributing)
|
|
28
|
+
|
|
7
29
|
## Overview
|
|
8
30
|
|
|
9
31
|
Katt is designed to evaluate and validate the behavior of AI agents like **Claude Code**, **GitHub Copilot**, **OpenAI Codex** and more. It provides a simple, intuitive API for writing tests that interact with AI models and assert their responses.
|
|
10
32
|
|
|
11
33
|
## API Documentation
|
|
12
34
|
|
|
13
|
-
For a complete list of features and usage examples, see [docs/api-documentation.md](docs/api-documentation.md).
|
|
35
|
+
For a complete list of features and usage examples, see [docs/api-documentation.md](https://github.com/raphaelpor/katt/blob/main/docs/api-documentation.md).
|
|
14
36
|
|
|
15
37
|
## Hello World - Example
|
|
16
38
|
|
|
17
39
|
```typescript
|
|
40
|
+
import { expect, prompt } from "katt";
|
|
41
|
+
|
|
18
42
|
const result = await prompt("If you read this just say 'hello world'");
|
|
19
43
|
expect(result).toContain("hello world");
|
|
20
44
|
```
|
|
@@ -22,6 +46,8 @@ expect(result).toContain("hello world");
|
|
|
22
46
|
It also supports the familiar `describe` and `it` syntax for organizing tests:
|
|
23
47
|
|
|
24
48
|
```typescript
|
|
49
|
+
import { describe, expect, it, prompt } from "katt";
|
|
50
|
+
|
|
25
51
|
describe("Greeting agent", () => {
|
|
26
52
|
it("should say hello world", async () => {
|
|
27
53
|
const result = await prompt("If you read this just say 'hello world'");
|
|
@@ -37,13 +63,22 @@ describe("Greeting agent", () => {
|
|
|
37
63
|
- **Classification Matcher**: Built-in `toBeClassifiedAs()` matcher to grade a response against a target label on a 1-5 scale
|
|
38
64
|
- **Concurrent Execution**: Runs eval files concurrently for faster test execution
|
|
39
65
|
- **Model Selection**: Support for specifying custom AI models
|
|
66
|
+
- **Configurable Timeouts**: Override prompt wait time per test or via `katt.json`
|
|
40
67
|
|
|
41
68
|
## Usage
|
|
42
69
|
|
|
70
|
+
### Installation
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
npm install -g katt
|
|
74
|
+
```
|
|
75
|
+
|
|
43
76
|
### Basic Usage
|
|
44
77
|
|
|
45
78
|
1. Create a file with the `.eval.ts` or `.eval.js` extension and write your tests.
|
|
46
79
|
```typescript
|
|
80
|
+
import { expect, prompt } from "katt";
|
|
81
|
+
|
|
47
82
|
const result = await prompt("If you read this just say 'hello world'");
|
|
48
83
|
expect(result).toContain("hello world");
|
|
49
84
|
```
|
|
@@ -51,7 +86,7 @@ expect(result).toContain("hello world");
|
|
|
51
86
|
2. Run Katt from your project directory:
|
|
52
87
|
|
|
53
88
|
```bash
|
|
54
|
-
|
|
89
|
+
katt
|
|
55
90
|
```
|
|
56
91
|
|
|
57
92
|
### Using promptFile
|
|
@@ -60,6 +95,8 @@ Load prompts from external files:
|
|
|
60
95
|
|
|
61
96
|
```javascript
|
|
62
97
|
// test.eval.js
|
|
98
|
+
import { describe, expect, it, promptFile } from "katt";
|
|
99
|
+
|
|
63
100
|
describe("Working with files", () => {
|
|
64
101
|
it("should load the file and respond", async () => {
|
|
65
102
|
const result = await promptFile("./myPrompt.md");
|
|
@@ -73,6 +110,8 @@ describe("Working with files", () => {
|
|
|
73
110
|
You can specify a custom model for your prompts:
|
|
74
111
|
|
|
75
112
|
```javascript
|
|
113
|
+
import { describe, expect, it, prompt } from "katt";
|
|
114
|
+
|
|
76
115
|
describe("Model selection", () => {
|
|
77
116
|
it("should use a specific model", async () => {
|
|
78
117
|
const promptString = "You are a helpful agent. Say hi and ask what you could help the user with.";
|
|
@@ -89,6 +128,9 @@ You can also set a default model for the project by adding a `katt.json` file in
|
|
|
89
128
|
{
|
|
90
129
|
"copilot": {
|
|
91
130
|
"model": "gpt-5-mini"
|
|
131
|
+
},
|
|
132
|
+
"prompt": {
|
|
133
|
+
"timeoutMs": 240000
|
|
92
134
|
}
|
|
93
135
|
}
|
|
94
136
|
```
|
|
@@ -97,6 +139,7 @@ When this file exists:
|
|
|
97
139
|
|
|
98
140
|
- `prompt("...")` and `promptFile("...")` use `copilot.model` by default
|
|
99
141
|
- `prompt("...", { model: "..." })` still overrides the config value
|
|
142
|
+
- `prompt.timeoutMs` sets the default wait timeout for long-running prompts
|
|
100
143
|
|
|
101
144
|
## Development
|
|
102
145
|
|
|
@@ -162,7 +205,7 @@ MIT
|
|
|
162
205
|
|
|
163
206
|
## Contributing
|
|
164
207
|
|
|
165
|
-
We welcome contributions from the community! Please see our [CONTRIBUTING.md](CONTRIBUTING.md) guide for detailed information on how to contribute to Katt.
|
|
208
|
+
We welcome contributions from the community! Please see our [CONTRIBUTING.md](https://github.com/raphaelpor/katt/blob/main/CONTRIBUTING.md) guide for detailed information on how to contribute to Katt.
|
|
166
209
|
|
|
167
210
|
Quick start:
|
|
168
211
|
1. Fork the repository
|
|
@@ -171,4 +214,4 @@ Quick start:
|
|
|
171
214
|
4. Run the verification process
|
|
172
215
|
5. Submit a pull request
|
|
173
216
|
|
|
174
|
-
For detailed guidelines, development setup, coding standards, and more, check out our [contribution guide](CONTRIBUTING.md).
|
|
217
|
+
For detailed guidelines, development setup, coding standards, and more, check out our [contribution guide](https://github.com/raphaelpor/katt/blob/main/CONTRIBUTING.md).
|
package/dist/index.js
CHANGED
|
@@ -1,183 +1,56 @@
|
|
|
1
|
-
|
|
2
|
-
import {
|
|
3
|
-
import { CopilotClient as
|
|
4
|
-
import { readFile as
|
|
5
|
-
import { resolve as
|
|
6
|
-
import { readFileSync as
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
}
|
|
31
|
-
function dt() {
|
|
32
|
-
return G += 1, `i${G}`;
|
|
33
|
-
}
|
|
34
|
-
function V(t, e) {
|
|
35
|
-
const n = e ?? X(u());
|
|
36
|
-
return D.run(n, t);
|
|
37
|
-
}
|
|
38
|
-
function P() {
|
|
39
|
-
return X(u());
|
|
40
|
-
}
|
|
41
|
-
function gt(t) {
|
|
42
|
-
u().describeStack.push({ id: ut(), description: t });
|
|
43
|
-
}
|
|
44
|
-
function y() {
|
|
45
|
-
u().describeStack.pop();
|
|
46
|
-
}
|
|
47
|
-
function Y() {
|
|
48
|
-
return u().describeStack.map((t) => t.description).join(" > ");
|
|
49
|
-
}
|
|
50
|
-
function ft(t) {
|
|
51
|
-
u().itStack.push({ id: dt(), description: t }), u().tokenUsageStack.push(0), u().modelStack.push(void 0);
|
|
52
|
-
}
|
|
53
|
-
function v() {
|
|
54
|
-
u().itStack.pop(), u().tokenUsageStack.pop(), u().modelStack.pop();
|
|
55
|
-
}
|
|
56
|
-
function z() {
|
|
57
|
-
return u().itStack.map((t) => t.description).join(" > ");
|
|
58
|
-
}
|
|
59
|
-
function pt(t) {
|
|
60
|
-
if (!Number.isFinite(t) || t <= 0)
|
|
61
|
-
return;
|
|
62
|
-
const e = u(), n = e.tokenUsageStack.length - 1;
|
|
63
|
-
n < 0 || (e.tokenUsageStack[n] += t);
|
|
64
|
-
}
|
|
65
|
-
function ht() {
|
|
66
|
-
const t = u(), e = t.tokenUsageStack.length - 1;
|
|
67
|
-
return e < 0 ? 0 : t.tokenUsageStack[e] ?? 0;
|
|
68
|
-
}
|
|
69
|
-
function It(t) {
|
|
70
|
-
if (t.length === 0)
|
|
71
|
-
return;
|
|
72
|
-
const e = u(), n = e.modelStack.length - 1;
|
|
73
|
-
n < 0 || (e.modelStack[n] = t);
|
|
74
|
-
}
|
|
75
|
-
function mt() {
|
|
76
|
-
const t = u(), e = t.modelStack.length - 1;
|
|
77
|
-
if (!(e < 0))
|
|
78
|
-
return t.modelStack[e];
|
|
79
|
-
}
|
|
80
|
-
function b(t) {
|
|
81
|
-
A.push(t);
|
|
82
|
-
}
|
|
83
|
-
function Ct() {
|
|
84
|
-
T += 1;
|
|
85
|
-
}
|
|
86
|
-
function St() {
|
|
87
|
-
return T;
|
|
88
|
-
}
|
|
89
|
-
function At() {
|
|
90
|
-
T = 0;
|
|
91
|
-
}
|
|
92
|
-
function $t(t) {
|
|
93
|
-
$.push(t);
|
|
94
|
-
}
|
|
95
|
-
function bt() {
|
|
96
|
-
return [...$];
|
|
97
|
-
}
|
|
98
|
-
function U() {
|
|
99
|
-
return $.length;
|
|
100
|
-
}
|
|
101
|
-
function kt() {
|
|
102
|
-
$.length = 0;
|
|
103
|
-
}
|
|
104
|
-
async function wt() {
|
|
105
|
-
const t = [];
|
|
106
|
-
for (; A.length > 0; ) {
|
|
107
|
-
const e = A.splice(0, A.length), n = await Promise.allSettled(e);
|
|
108
|
-
t.push(...n);
|
|
109
|
-
}
|
|
110
|
-
return t;
|
|
1
|
+
import { r as E, c as N, a as O, p as I, b as m, l as d, d as w, g as F, e as U, f as C, s as _, h as R, i as D, j as z, k as B, m as W, n as K } from "./runCli-CDRmZ5hw.js";
|
|
2
|
+
import { o as St } from "./runCli-CDRmZ5hw.js";
|
|
3
|
+
import { CopilotClient as H } from "@github/copilot-sdk";
|
|
4
|
+
import { readFile as A } from "node:fs/promises";
|
|
5
|
+
import { resolve as j, dirname as T, isAbsolute as Y, basename as J, join as q } from "node:path";
|
|
6
|
+
import { readFileSync as G, writeFileSync as S, mkdirSync as Q } from "node:fs";
|
|
7
|
+
function vt(t, e) {
|
|
8
|
+
E(() => {
|
|
9
|
+
O(), I(t);
|
|
10
|
+
const n = F(), r = Date.now(), i = () => F() === n, o = () => Date.now() - r;
|
|
11
|
+
try {
|
|
12
|
+
const s = e();
|
|
13
|
+
if (s && typeof s.then == "function") {
|
|
14
|
+
m(
|
|
15
|
+
s.then(() => {
|
|
16
|
+
d(!0, o());
|
|
17
|
+
}).catch((a) => {
|
|
18
|
+
throw d(!1, o()), a;
|
|
19
|
+
}).finally(() => {
|
|
20
|
+
w();
|
|
21
|
+
})
|
|
22
|
+
);
|
|
23
|
+
return;
|
|
24
|
+
}
|
|
25
|
+
} catch (s) {
|
|
26
|
+
throw d(!1, o()), w(), s;
|
|
27
|
+
}
|
|
28
|
+
d(i(), o()), w();
|
|
29
|
+
}, N());
|
|
111
30
|
}
|
|
112
|
-
function
|
|
113
|
-
|
|
114
|
-
|
|
31
|
+
function Tt(t, e) {
|
|
32
|
+
E(() => {
|
|
33
|
+
U(t);
|
|
115
34
|
try {
|
|
116
35
|
const n = e();
|
|
117
36
|
if (n && typeof n.then == "function") {
|
|
118
|
-
|
|
37
|
+
m(
|
|
119
38
|
n.finally(() => {
|
|
120
|
-
|
|
39
|
+
C();
|
|
121
40
|
})
|
|
122
41
|
);
|
|
123
42
|
return;
|
|
124
43
|
}
|
|
125
44
|
} catch (n) {
|
|
126
|
-
throw
|
|
45
|
+
throw C(), n;
|
|
127
46
|
}
|
|
128
|
-
|
|
129
|
-
},
|
|
130
|
-
}
|
|
131
|
-
const vt = "\x1B[1;36m", Lt = "\x1B[33m", jt = "\x1B[38;5;208m", xt = "\x1B[1;38;5;208m", k = "\x1B[0m";
|
|
132
|
-
function f(t) {
|
|
133
|
-
return `${vt}${t}${k}`;
|
|
134
|
-
}
|
|
135
|
-
function C(t) {
|
|
136
|
-
return `${Lt}${t}${k}`;
|
|
137
|
-
}
|
|
138
|
-
function W(t) {
|
|
139
|
-
return `${jt}${t}${k}`;
|
|
47
|
+
C();
|
|
48
|
+
}, N());
|
|
140
49
|
}
|
|
141
|
-
function
|
|
142
|
-
return `${xt}${t}${k}`;
|
|
143
|
-
}
|
|
144
|
-
let x = "";
|
|
145
|
-
function Nt() {
|
|
146
|
-
x = "";
|
|
147
|
-
}
|
|
148
|
-
function Tt({
|
|
149
|
-
suitePath: t,
|
|
150
|
-
casePath: e,
|
|
151
|
-
didPass: n,
|
|
152
|
-
durationMs: o,
|
|
153
|
-
model: i,
|
|
154
|
-
tokenUsage: s
|
|
155
|
-
}) {
|
|
156
|
-
const r = t.length > 0 ? t : "(root)", a = e.length > 0 ? e : "(root)";
|
|
157
|
-
x !== r && (console.log(`Suite "${f(r)}"`), x = r);
|
|
158
|
-
const l = n ? "✅ Passed in" : "❌ Failed in", d = [
|
|
159
|
-
`Test "${f(a)}"`,
|
|
160
|
-
`- ${l} ${f(`${o}ms`)}`
|
|
161
|
-
];
|
|
162
|
-
i && d.push(`- Model ${f(i)}`), (s ?? 0) > 0 && d.push(`- Tokens used ${f(String(s))}`), d.push("---"), console.log(d.join(`
|
|
163
|
-
`));
|
|
164
|
-
}
|
|
165
|
-
function I(t, e, n = "(root)") {
|
|
166
|
-
const o = z();
|
|
167
|
-
Tt({
|
|
168
|
-
suitePath: Y(),
|
|
169
|
-
casePath: o.length > 0 ? o : n,
|
|
170
|
-
didPass: t,
|
|
171
|
-
durationMs: e,
|
|
172
|
-
model: mt(),
|
|
173
|
-
tokenUsage: ht()
|
|
174
|
-
});
|
|
175
|
-
}
|
|
176
|
-
const B = new E();
|
|
177
|
-
function Bt(t, e) {
|
|
50
|
+
function V(t, e) {
|
|
178
51
|
return typeof t == "object" && t !== null && "code" in t && t.code === e;
|
|
179
52
|
}
|
|
180
|
-
function
|
|
53
|
+
function X(t) {
|
|
181
54
|
try {
|
|
182
55
|
const e = JSON.parse(t);
|
|
183
56
|
return typeof e == "object" && e !== null ? e : void 0;
|
|
@@ -186,110 +59,133 @@ function Jt(t) {
|
|
|
186
59
|
return;
|
|
187
60
|
}
|
|
188
61
|
}
|
|
189
|
-
function
|
|
190
|
-
const
|
|
191
|
-
if (typeof e != "object" || e === null || Array.isArray(e))
|
|
192
|
-
return;
|
|
193
|
-
const n = {
|
|
194
|
-
...e
|
|
195
|
-
}, o = n.model;
|
|
196
|
-
return (typeof o != "string" || o.length === 0) && delete n.model, Object.keys(n).length > 0 ? n : void 0;
|
|
197
|
-
}
|
|
198
|
-
async function Mt() {
|
|
199
|
-
const t = F(process.cwd(), "katt.json");
|
|
62
|
+
async function Z() {
|
|
63
|
+
const t = j(process.cwd(), "katt.json");
|
|
200
64
|
try {
|
|
201
|
-
const e = await
|
|
202
|
-
return
|
|
65
|
+
const e = await A(t, "utf8");
|
|
66
|
+
return X(e);
|
|
203
67
|
} catch (e) {
|
|
204
|
-
if (
|
|
68
|
+
if (V(e, "ENOENT"))
|
|
205
69
|
return;
|
|
206
70
|
console.warn(`Failed to read katt.json: ${String(e)}`);
|
|
207
71
|
return;
|
|
208
72
|
}
|
|
209
73
|
}
|
|
210
|
-
function
|
|
74
|
+
function tt(t) {
|
|
75
|
+
const e = t?.copilot;
|
|
76
|
+
if (typeof e != "object" || e === null || Array.isArray(e))
|
|
77
|
+
return;
|
|
78
|
+
const n = {
|
|
79
|
+
...e
|
|
80
|
+
}, r = n.model;
|
|
81
|
+
return (typeof r != "string" || r.length === 0) && delete n.model, Object.keys(n).length > 0 ? n : void 0;
|
|
82
|
+
}
|
|
83
|
+
function et(t) {
|
|
84
|
+
if (!(typeof t != "number" || !Number.isFinite(t)) && !(t <= 0))
|
|
85
|
+
return Math.floor(t);
|
|
86
|
+
}
|
|
87
|
+
function nt(t) {
|
|
88
|
+
const e = t?.prompt;
|
|
89
|
+
if (!(typeof e != "object" || e === null || Array.isArray(e)))
|
|
90
|
+
return et(e.timeoutMs);
|
|
91
|
+
}
|
|
92
|
+
async function ot() {
|
|
93
|
+
const t = await Z();
|
|
94
|
+
return {
|
|
95
|
+
copilot: tt(t),
|
|
96
|
+
promptTimeoutMs: nt(t)
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
const rt = 6e5;
|
|
100
|
+
function P(t) {
|
|
211
101
|
return typeof t == "string" && t.length > 0 ? t : void 0;
|
|
212
102
|
}
|
|
213
|
-
function
|
|
103
|
+
function v(t) {
|
|
214
104
|
if (!t)
|
|
215
105
|
return;
|
|
216
106
|
const e = { ...t };
|
|
217
107
|
if (e.model !== void 0) {
|
|
218
|
-
const n =
|
|
108
|
+
const n = P(e.model);
|
|
219
109
|
n ? e.model = n : delete e.model;
|
|
220
110
|
}
|
|
221
111
|
return Object.keys(e).length > 0 ? e : void 0;
|
|
222
112
|
}
|
|
223
|
-
function
|
|
224
|
-
|
|
113
|
+
function M(t) {
|
|
114
|
+
if (!(typeof t != "number" || !Number.isFinite(t)) && !(t <= 0))
|
|
115
|
+
return Math.floor(t);
|
|
225
116
|
}
|
|
226
|
-
function
|
|
227
|
-
return
|
|
117
|
+
function h(t) {
|
|
118
|
+
return !Number.isFinite(t) || (t ?? 0) <= 0 ? 0 : Math.floor(t ?? 0);
|
|
228
119
|
}
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
}
|
|
234
|
-
|
|
120
|
+
function it(t) {
|
|
121
|
+
return h(t.inputTokens) + h(t.outputTokens) + h(t.cacheReadTokens) + h(t.cacheWriteTokens);
|
|
122
|
+
}
|
|
123
|
+
async function $(t, e = {}) {
|
|
124
|
+
const { timeoutMs: n, ...r } = e, i = await ot(), o = v(i.copilot), s = v(
|
|
125
|
+
r
|
|
126
|
+
), a = v({
|
|
127
|
+
...o ?? {},
|
|
128
|
+
...s ?? {}
|
|
129
|
+
}), c = M(i.promptTimeoutMs), L = M(n) ?? c ?? rt, b = P(a?.model), g = new H({ useLoggedInUser: !0 });
|
|
130
|
+
let p, x, y = 0;
|
|
235
131
|
try {
|
|
236
|
-
await
|
|
237
|
-
|
|
132
|
+
await g.start(), p = await g.createSession(a), x = p.on("assistant.usage", (f) => {
|
|
133
|
+
y += it(f.data);
|
|
238
134
|
});
|
|
239
|
-
const
|
|
240
|
-
if (!
|
|
135
|
+
const l = await p.sendAndWait({ prompt: t }, L);
|
|
136
|
+
if (!l?.data?.content)
|
|
241
137
|
throw new Error("Copilot did not return a response.");
|
|
242
|
-
return
|
|
138
|
+
return b && _(b), l.data.content;
|
|
243
139
|
} finally {
|
|
244
|
-
const
|
|
245
|
-
if (
|
|
140
|
+
const l = [];
|
|
141
|
+
if (x?.(), y > 0 && R(y), p)
|
|
246
142
|
try {
|
|
247
|
-
await
|
|
248
|
-
} catch (
|
|
249
|
-
|
|
143
|
+
await p.destroy();
|
|
144
|
+
} catch (f) {
|
|
145
|
+
l.push(f);
|
|
250
146
|
}
|
|
251
147
|
try {
|
|
252
|
-
const
|
|
253
|
-
|
|
254
|
-
} catch (
|
|
255
|
-
|
|
148
|
+
const f = await g.stop();
|
|
149
|
+
l.push(...f);
|
|
150
|
+
} catch (f) {
|
|
151
|
+
l.push(f);
|
|
256
152
|
}
|
|
257
|
-
|
|
258
|
-
`Copilot cleanup encountered ${
|
|
153
|
+
l.length > 0 && console.error(
|
|
154
|
+
`Copilot cleanup encountered ${l.length} error(s).`
|
|
259
155
|
);
|
|
260
156
|
}
|
|
261
157
|
}
|
|
262
|
-
async function
|
|
263
|
-
const n =
|
|
264
|
-
return
|
|
158
|
+
async function $t(t, e = {}) {
|
|
159
|
+
const n = D.getStore(), r = n?.evalFile ? T(n.evalFile) : process.cwd(), i = Y(t) ? t : j(r, t), o = await A(i, "utf8");
|
|
160
|
+
return $(o, e);
|
|
265
161
|
}
|
|
266
|
-
function
|
|
267
|
-
|
|
268
|
-
describePath:
|
|
269
|
-
itPath:
|
|
162
|
+
function u(t) {
|
|
163
|
+
z({
|
|
164
|
+
describePath: W(),
|
|
165
|
+
itPath: B(),
|
|
270
166
|
message: t
|
|
271
167
|
});
|
|
272
168
|
}
|
|
273
|
-
async function
|
|
274
|
-
const n = Date.now(),
|
|
169
|
+
async function st(t, e) {
|
|
170
|
+
const n = Date.now(), r = `expected '${t}' to satisfy '${e}'`, i = $(`Evaluate if the expectation is fulfiled in by the input.
|
|
275
171
|
Expectation: "${e}".
|
|
276
172
|
Input:
|
|
277
173
|
---
|
|
278
174
|
${t}
|
|
279
175
|
---
|
|
280
176
|
Important: Answer with "Yes" or "No" only, without any additional text.
|
|
281
|
-
`).then((
|
|
282
|
-
|
|
177
|
+
`).then((o) => {
|
|
178
|
+
o.includes("Yes") ? d(!0, Date.now() - n, "promptCheck") : o.includes("No") ? u(r) : u(`failed to evaluate expectation '${e}'`);
|
|
283
179
|
});
|
|
284
|
-
return
|
|
180
|
+
return m(i), i;
|
|
285
181
|
}
|
|
286
|
-
const
|
|
287
|
-
function
|
|
182
|
+
const at = 3;
|
|
183
|
+
function ct(t) {
|
|
288
184
|
const e = t.match(/\b([1-5])\b/);
|
|
289
185
|
return !e || !e[1] ? null : Number(e[1]);
|
|
290
186
|
}
|
|
291
|
-
async function
|
|
292
|
-
const
|
|
187
|
+
async function ut(t, e, n) {
|
|
188
|
+
const r = Date.now(), i = n?.threshold ?? at, o = $(
|
|
293
189
|
`Classify the input by how "${e}" it is on a scale of 1 to 5.
|
|
294
190
|
1 means "not ${e}" and 5 means "very ${e}".
|
|
295
191
|
Return only a single number: 1, 2, 3, 4, or 5.
|
|
@@ -299,89 +195,82 @@ Input:
|
|
|
299
195
|
${t}
|
|
300
196
|
---`,
|
|
301
197
|
n?.model ? { model: n.model } : void 0
|
|
302
|
-
).then((
|
|
303
|
-
const a =
|
|
198
|
+
).then((s) => {
|
|
199
|
+
const a = ct(s);
|
|
304
200
|
if (a === null) {
|
|
305
|
-
|
|
306
|
-
`failed to classify as '${e}'. Evaluator returned '${
|
|
201
|
+
u(
|
|
202
|
+
`failed to classify as '${e}'. Evaluator returned '${s}'`
|
|
307
203
|
);
|
|
308
204
|
return;
|
|
309
205
|
}
|
|
310
|
-
const
|
|
206
|
+
const c = `expected response to be classified as '${e}' with score >= ${i}, got ${a}`;
|
|
311
207
|
if (a < i) {
|
|
312
|
-
|
|
208
|
+
u(c);
|
|
313
209
|
return;
|
|
314
210
|
}
|
|
315
|
-
|
|
211
|
+
d(
|
|
316
212
|
!0,
|
|
317
|
-
Date.now() -
|
|
213
|
+
Date.now() - r,
|
|
318
214
|
"toBeClassifiedAs"
|
|
319
215
|
);
|
|
320
216
|
});
|
|
321
|
-
return
|
|
217
|
+
return m(o), o;
|
|
322
218
|
}
|
|
323
|
-
function
|
|
219
|
+
function lt(t, e) {
|
|
324
220
|
const n = `expected '${t}' to include '${e}'`;
|
|
325
|
-
t.includes(e) ||
|
|
221
|
+
t.includes(e) || u(n);
|
|
326
222
|
}
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
function Xt() {
|
|
332
|
-
return K;
|
|
333
|
-
}
|
|
334
|
-
function Vt(t) {
|
|
335
|
-
const n = st(t).replace(/\.eval\.[^./\\]+$/, "");
|
|
336
|
-
return it(
|
|
337
|
-
N(t),
|
|
223
|
+
function ft(t) {
|
|
224
|
+
const n = J(t).replace(/\.eval\.[^./\\]+$/, "");
|
|
225
|
+
return q(
|
|
226
|
+
T(t),
|
|
338
227
|
"__snapshots__",
|
|
339
228
|
`${n}.snap.md`
|
|
340
229
|
);
|
|
341
230
|
}
|
|
342
|
-
function
|
|
231
|
+
function k(t) {
|
|
343
232
|
return t.split(/\r?\n/);
|
|
344
233
|
}
|
|
345
|
-
function
|
|
234
|
+
function dt(t, e) {
|
|
346
235
|
if (t === e)
|
|
347
236
|
return " (no diff)";
|
|
348
|
-
const n =
|
|
349
|
-
for (let
|
|
350
|
-
const a = n[
|
|
351
|
-
if (a !==
|
|
352
|
-
if (a === void 0 &&
|
|
353
|
-
|
|
237
|
+
const n = k(t), r = k(e), i = Math.max(n.length, r.length), o = [];
|
|
238
|
+
for (let s = 0; s < i; s += 1) {
|
|
239
|
+
const a = n[s], c = r[s];
|
|
240
|
+
if (a !== c) {
|
|
241
|
+
if (a === void 0 && c !== void 0) {
|
|
242
|
+
o.push(`+ ${c}`);
|
|
354
243
|
continue;
|
|
355
244
|
}
|
|
356
|
-
if (a !== void 0 &&
|
|
357
|
-
|
|
245
|
+
if (a !== void 0 && c === void 0) {
|
|
246
|
+
o.push(`- ${a}`);
|
|
358
247
|
continue;
|
|
359
248
|
}
|
|
360
|
-
|
|
249
|
+
o.push(`- ${a ?? ""}`), o.push(`+ ${c ?? ""}`);
|
|
361
250
|
}
|
|
362
251
|
}
|
|
363
|
-
return
|
|
252
|
+
return o.join(`
|
|
364
253
|
`);
|
|
365
254
|
}
|
|
366
|
-
function
|
|
367
|
-
const e =
|
|
255
|
+
function pt(t) {
|
|
256
|
+
const e = D.getStore()?.evalFile;
|
|
368
257
|
if (!e) {
|
|
369
|
-
|
|
258
|
+
u(
|
|
370
259
|
"toMatchSnapshot can only be used while running an eval file."
|
|
371
260
|
);
|
|
372
261
|
return;
|
|
373
262
|
}
|
|
374
|
-
const n =
|
|
263
|
+
const n = ft(e);
|
|
375
264
|
try {
|
|
376
|
-
const
|
|
377
|
-
if (
|
|
265
|
+
const r = G(n, "utf8");
|
|
266
|
+
if (r === t)
|
|
378
267
|
return;
|
|
379
|
-
if (
|
|
380
|
-
|
|
268
|
+
if (K()) {
|
|
269
|
+
S(n, t, "utf8");
|
|
381
270
|
return;
|
|
382
271
|
}
|
|
383
|
-
const i =
|
|
384
|
-
|
|
272
|
+
const i = dt(r, t);
|
|
273
|
+
u(
|
|
385
274
|
[
|
|
386
275
|
`Snapshot mismatch at ${n}`,
|
|
387
276
|
"",
|
|
@@ -392,175 +281,43 @@ function Yt(t) {
|
|
|
392
281
|
].join(`
|
|
393
282
|
`)
|
|
394
283
|
);
|
|
395
|
-
} catch (
|
|
396
|
-
if (
|
|
397
|
-
|
|
398
|
-
`Failed to read snapshot at ${n}: ${String(
|
|
284
|
+
} catch (r) {
|
|
285
|
+
if (r.code !== "ENOENT") {
|
|
286
|
+
u(
|
|
287
|
+
`Failed to read snapshot at ${n}: ${String(r)}`
|
|
399
288
|
);
|
|
400
289
|
return;
|
|
401
290
|
}
|
|
402
291
|
try {
|
|
403
|
-
|
|
404
|
-
} catch (
|
|
405
|
-
|
|
406
|
-
`Failed to write snapshot at ${n}: ${String(
|
|
292
|
+
Q(T(n), { recursive: !0 }), S(n, t, "utf8");
|
|
293
|
+
} catch (o) {
|
|
294
|
+
u(
|
|
295
|
+
`Failed to write snapshot at ${n}: ${String(o)}`
|
|
407
296
|
);
|
|
408
297
|
}
|
|
409
298
|
}
|
|
410
299
|
}
|
|
411
|
-
function
|
|
300
|
+
function bt(t) {
|
|
412
301
|
return {
|
|
413
302
|
toContain: (e) => {
|
|
414
|
-
|
|
303
|
+
lt(t, e);
|
|
415
304
|
},
|
|
416
305
|
toMatchSnapshot: () => {
|
|
417
|
-
|
|
306
|
+
pt(t);
|
|
418
307
|
},
|
|
419
308
|
promptCheck: async (e) => {
|
|
420
|
-
await
|
|
309
|
+
await st(t, e);
|
|
421
310
|
},
|
|
422
311
|
toBeClassifiedAs: async (e, n) => {
|
|
423
|
-
await
|
|
312
|
+
await ut(t, e, n);
|
|
424
313
|
}
|
|
425
314
|
};
|
|
426
315
|
}
|
|
427
|
-
function Qt(t, e) {
|
|
428
|
-
V(() => {
|
|
429
|
-
Ct(), ft(t);
|
|
430
|
-
const n = U(), o = Date.now(), i = () => U() === n, s = () => Date.now() - o;
|
|
431
|
-
try {
|
|
432
|
-
const r = e();
|
|
433
|
-
if (r && typeof r.then == "function") {
|
|
434
|
-
b(
|
|
435
|
-
r.then(() => {
|
|
436
|
-
I(!0, s());
|
|
437
|
-
}).catch((a) => {
|
|
438
|
-
throw I(!1, s()), a;
|
|
439
|
-
}).finally(() => {
|
|
440
|
-
v();
|
|
441
|
-
})
|
|
442
|
-
);
|
|
443
|
-
return;
|
|
444
|
-
}
|
|
445
|
-
} catch (r) {
|
|
446
|
-
throw I(!1, s()), v(), r;
|
|
447
|
-
}
|
|
448
|
-
I(i(), s()), v();
|
|
449
|
-
}, P());
|
|
450
|
-
}
|
|
451
|
-
const Kt = /\.eval\.(js|ts)$/, _t = /* @__PURE__ */ new Set([".git", "node_modules"]);
|
|
452
|
-
async function _(t) {
|
|
453
|
-
const e = await nt(t, { withFileTypes: !0 }), n = [];
|
|
454
|
-
return await Promise.all(
|
|
455
|
-
e.map(async (o) => {
|
|
456
|
-
const i = F(t, o.name);
|
|
457
|
-
if (o.isDirectory()) {
|
|
458
|
-
if (_t.has(o.name))
|
|
459
|
-
return;
|
|
460
|
-
n.push(...await _(i));
|
|
461
|
-
return;
|
|
462
|
-
}
|
|
463
|
-
o.isFile() && Kt.test(o.name) && n.push(i);
|
|
464
|
-
})
|
|
465
|
-
), n;
|
|
466
|
-
}
|
|
467
|
-
const j = new URL("data:application/json;base64,ewogICJuYW1lIjogImthdHQiLAogICJ2ZXJzaW9uIjogIjAuMC4yIiwKICAiZGVzY3JpcHRpb24iOiAiQ0xJIHRvb2wgdGhhdCB0ZXN0cyB0aGUgb3V0cHV0IG9mIGFnZW50aWMgQUkgdG9vbHMiLAogICJrZXl3b3JkcyI6IFsKICAgICJjbGkiLAogICAgImFpIiwKICAgICJhZ2VudGljLWFpIiwKICAgICJ0ZXN0aW5nIiwKICAgICJldmFsdWF0aW9uIgogIF0sCiAgImF1dGhvciI6ICJSYXBoYWVsIFBvcnRvIChodHRwczovL2dpdGh1Yi5jb20vcmFwaGFlbHBvcikiLAogICJsaWNlbnNlIjogIk1JVCIsCiAgInR5cGUiOiAibW9kdWxlIiwKICAibWFpbiI6ICJkaXN0L2luZGV4LmpzIiwKICAiYmluIjogewogICAgImthdHQiOiAiZGlzdC9pbmRleC5qcyIKICB9LAogICJzY3JpcHRzIjogewogICAgImJ1aWxkIjogInZpdGUgYnVpbGQiLAogICAgImRldiI6ICJ0c3ggc3JjL2luZGV4LnRzIiwKICAgICJsaW50IjogImJpb21lIGxpbnQgLi9zcmMiLAogICAgImZvcm1hdCI6ICJiaW9tZSBmb3JtYXQgLS13cml0ZSAuL3NyYyIsCiAgICAidGVzdCI6ICJ2aXRlc3QiLAogICAgInR5cGVjaGVjayI6ICJ0c2MgLXAgdHNjb25maWcuanNvbiAtLW5vRW1pdCIsCiAgICAidGVzdDpidWlsZCI6ICJub2RlIC4vZGlzdC9pbmRleC5qcyIKICB9LAogICJ0eXBlcyI6ICJkaXN0L2luZGV4LmQudHMiLAogICJkZXZEZXBlbmRlbmNpZXMiOiB7CiAgICAiQGJpb21lanMvYmlvbWUiOiAiMS45LjQiLAogICAgIkB0eXBlcy9ub2RlIjogIjI1LjIuMCIsCiAgICAidHN4IjogIjQuMjEuMCIsCiAgICAidHlwZXNjcmlwdCI6ICI1LjguMiIsCiAgICAidml0ZSI6ICI3LjMuMSIsCiAgICAidml0ZS1wbHVnaW4tZHRzIjogIjQuNS40IiwKICAgICJ2aXRlc3QiOiAiMy4yLjQiLAogICAgInZzY29kZS1qc29ucnBjIjogIl44LjIuMSIKICB9LAogICJkZXBlbmRlbmNpZXMiOiB7CiAgICAiQGdpdGh1Yi9jb3BpbG90LXNkayI6ICJeMC4xLjIxIgogIH0sCiAgImJ1Z3MiOiB7CiAgICAidXJsIjogImh0dHBzOi8vZ2l0aHViLmNvbS9yYXBoYWVscG9yL2thdHQvaXNzdWVzIgogIH0sCiAgImhvbWVwYWdlIjogImh0dHBzOi8vZ2l0aHViLmNvbS9yYXBoYWVscG9yL2thdHQiCn0K", import.meta.url);
|
|
468
|
-
let m;
|
|
469
|
-
function qt() {
|
|
470
|
-
if (m !== void 0)
|
|
471
|
-
return m;
|
|
472
|
-
try {
|
|
473
|
-
const t = j.protocol === "data:" ? te(j) : H(ct(j), "utf8"), e = JSON.parse(t);
|
|
474
|
-
m = typeof e.version == "string" ? e.version : "unknown";
|
|
475
|
-
} catch {
|
|
476
|
-
m = "unknown";
|
|
477
|
-
}
|
|
478
|
-
return m;
|
|
479
|
-
}
|
|
480
|
-
function te(t) {
|
|
481
|
-
const e = t.pathname.indexOf(",");
|
|
482
|
-
if (e < 0)
|
|
483
|
-
throw new Error("Invalid data URL.");
|
|
484
|
-
const n = t.pathname.slice(0, e), o = t.pathname.slice(e + 1);
|
|
485
|
-
return n.includes(";base64") ? Buffer.from(o, "base64").toString("utf8") : decodeURIComponent(o);
|
|
486
|
-
}
|
|
487
|
-
function ee() {
|
|
488
|
-
const t = " ██╗ ██╗ █████╗ ████████╗████████╗", e = " ██║ ██╔╝██╔══██╗╚══██╔══╝╚══██╔══╝", n = " █████╔╝ ███████║ ██║ ██║", o = " ██╔═██╗ ██╔══██║ ██║ ██║", i = " ██║ ██╗██║ ██║ ██║ ██║", s = " ╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═╝", r = `v${qt()}`, a = Math.max(
|
|
489
|
-
0,
|
|
490
|
-
Math.floor((t.length - r.length) / 2)
|
|
491
|
-
), l = `${" ".repeat(a)}${r}`;
|
|
492
|
-
console.log(`
|
|
493
|
-
${C(t)}
|
|
494
|
-
${C(e)}
|
|
495
|
-
${C(n)}
|
|
496
|
-
${W(o)}
|
|
497
|
-
${W(i)}
|
|
498
|
-
${Ft(s)}
|
|
499
|
-
${C(l)}
|
|
500
|
-
`);
|
|
501
|
-
}
|
|
502
|
-
function ne(t) {
|
|
503
|
-
const e = String(t.getHours()).padStart(2, "0"), n = String(t.getMinutes()).padStart(2, "0"), o = String(t.getSeconds()).padStart(2, "0");
|
|
504
|
-
return `${e}:${n}:${o}`;
|
|
505
|
-
}
|
|
506
|
-
async function oe() {
|
|
507
|
-
const t = process.argv.slice(2), e = t.includes("--update-snapshots") || t.includes("-u");
|
|
508
|
-
Dt(e), ee();
|
|
509
|
-
const n = /* @__PURE__ */ new Date();
|
|
510
|
-
Nt(), kt(), At();
|
|
511
|
-
const o = await _(process.cwd());
|
|
512
|
-
if (o.length === 0)
|
|
513
|
-
return console.log("No .eval.js or .eval.ts files found."), 1;
|
|
514
|
-
const s = (await Promise.allSettled(
|
|
515
|
-
o.map(
|
|
516
|
-
(c) => B.run(
|
|
517
|
-
{ evalFile: c },
|
|
518
|
-
() => import(at(c).href)
|
|
519
|
-
)
|
|
520
|
-
)
|
|
521
|
-
)).map((c, h) => ({ result: c, file: o[h] })).filter(({ result: c }) => c.status === "rejected");
|
|
522
|
-
if (s.length > 0) {
|
|
523
|
-
for (const c of s) {
|
|
524
|
-
const h = c.result.status === "rejected" ? c.result.reason : void 0;
|
|
525
|
-
console.error(`Error executing ${c.file}: ${String(h)}`);
|
|
526
|
-
}
|
|
527
|
-
return 1;
|
|
528
|
-
}
|
|
529
|
-
const a = (await wt()).filter(
|
|
530
|
-
(c) => c.status === "rejected"
|
|
531
|
-
);
|
|
532
|
-
if (a.length > 0) {
|
|
533
|
-
for (const c of a)
|
|
534
|
-
c.status === "rejected" && console.error(`Error executing async test: ${String(c.reason)}`);
|
|
535
|
-
return 1;
|
|
536
|
-
}
|
|
537
|
-
const l = bt();
|
|
538
|
-
if (l.length > 0) {
|
|
539
|
-
console.error("❌ Failed tests:");
|
|
540
|
-
for (const [c, h] of l.entries()) {
|
|
541
|
-
const J = [h.describePath, h.itPath].filter((tt) => tt.length > 0).join(" > "), q = J.length > 0 ? `${J}: ` : "";
|
|
542
|
-
console.error(`${c + 1}. ${q}${h.message}`);
|
|
543
|
-
}
|
|
544
|
-
return 1;
|
|
545
|
-
}
|
|
546
|
-
const d = St(), g = Date.now() - n.getTime();
|
|
547
|
-
return console.log(
|
|
548
|
-
[
|
|
549
|
-
"---",
|
|
550
|
-
`${f("Files")} ${o.length} passed`,
|
|
551
|
-
`${f("Evals")} ${d} passed`,
|
|
552
|
-
`${f("Start at")} ${ne(n)}`,
|
|
553
|
-
`${f("Duration")} ${g}ms`
|
|
554
|
-
].join(`
|
|
555
|
-
`)
|
|
556
|
-
), 0;
|
|
557
|
-
}
|
|
558
|
-
Object.assign(globalThis, { describe: yt, it: Qt, expect: zt, prompt: w, promptFile: Ut });
|
|
559
|
-
oe().then((t) => {
|
|
560
|
-
process.exit(t);
|
|
561
|
-
}).catch((t) => {
|
|
562
|
-
console.error(`Unexpected error: ${String(t)}`), process.exit(1);
|
|
563
|
-
});
|
|
564
316
|
export {
|
|
565
|
-
|
|
317
|
+
Tt as describe,
|
|
318
|
+
bt as expect,
|
|
319
|
+
vt as it,
|
|
320
|
+
$ as prompt,
|
|
321
|
+
$t as promptFile,
|
|
322
|
+
St as runCli
|
|
566
323
|
};
|
package/dist/katt.js
ADDED
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
import { fileURLToPath as x, pathToFileURL as y } from "node:url";
|
|
2
|
+
import { readdir as X } from "node:fs/promises";
|
|
3
|
+
import { resolve as H } from "node:path";
|
|
4
|
+
import { AsyncLocalStorage as j } from "node:async_hooks";
|
|
5
|
+
import { readFileSync as M } from "node:fs";
|
|
6
|
+
const R = /\.eval\.(js|ts)$/, W = /* @__PURE__ */ new Set([".git", "node_modules"]);
|
|
7
|
+
async function v(t) {
|
|
8
|
+
const e = await X(t, { withFileTypes: !0 }), n = [];
|
|
9
|
+
return await Promise.all(
|
|
10
|
+
e.map(async (o) => {
|
|
11
|
+
const a = H(t, o.name);
|
|
12
|
+
if (o.isDirectory()) {
|
|
13
|
+
if (W.has(o.name))
|
|
14
|
+
return;
|
|
15
|
+
n.push(...await v(a));
|
|
16
|
+
return;
|
|
17
|
+
}
|
|
18
|
+
o.isFile() && R.test(o.name) && n.push(a);
|
|
19
|
+
})
|
|
20
|
+
), n;
|
|
21
|
+
}
|
|
22
|
+
const J = new j(), G = {
|
|
23
|
+
describeStack: [],
|
|
24
|
+
itStack: [],
|
|
25
|
+
tokenUsageStack: [],
|
|
26
|
+
modelStack: []
|
|
27
|
+
};
|
|
28
|
+
let L = 0, $ = 0;
|
|
29
|
+
const p = [], m = [];
|
|
30
|
+
let k = 0;
|
|
31
|
+
function s() {
|
|
32
|
+
return J.getStore() ?? G;
|
|
33
|
+
}
|
|
34
|
+
function B(t) {
|
|
35
|
+
return {
|
|
36
|
+
describeStack: [...t.describeStack],
|
|
37
|
+
itStack: [...t.itStack],
|
|
38
|
+
tokenUsageStack: [...t.tokenUsageStack],
|
|
39
|
+
modelStack: [...t.modelStack]
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
function U() {
|
|
43
|
+
return L += 1, `d${L}`;
|
|
44
|
+
}
|
|
45
|
+
function O() {
|
|
46
|
+
return $ += 1, `i${$}`;
|
|
47
|
+
}
|
|
48
|
+
function mt(t, e) {
|
|
49
|
+
const n = e ?? B(s());
|
|
50
|
+
return J.run(n, t);
|
|
51
|
+
}
|
|
52
|
+
function ht() {
|
|
53
|
+
return B(s());
|
|
54
|
+
}
|
|
55
|
+
function St(t) {
|
|
56
|
+
s().describeStack.push({ id: U(), description: t });
|
|
57
|
+
}
|
|
58
|
+
function At() {
|
|
59
|
+
s().describeStack.pop();
|
|
60
|
+
}
|
|
61
|
+
function Y() {
|
|
62
|
+
return s().describeStack.map((t) => t.description).join(" > ");
|
|
63
|
+
}
|
|
64
|
+
function kt(t) {
|
|
65
|
+
s().itStack.push({ id: O(), description: t }), s().tokenUsageStack.push(0), s().modelStack.push(void 0);
|
|
66
|
+
}
|
|
67
|
+
function bt() {
|
|
68
|
+
s().itStack.pop(), s().tokenUsageStack.pop(), s().modelStack.pop();
|
|
69
|
+
}
|
|
70
|
+
function Q() {
|
|
71
|
+
return s().itStack.map((t) => t.description).join(" > ");
|
|
72
|
+
}
|
|
73
|
+
function Lt(t) {
|
|
74
|
+
if (!Number.isFinite(t) || t <= 0)
|
|
75
|
+
return;
|
|
76
|
+
const e = s(), n = e.tokenUsageStack.length - 1;
|
|
77
|
+
n < 0 || (e.tokenUsageStack[n] += t);
|
|
78
|
+
}
|
|
79
|
+
function V() {
|
|
80
|
+
const t = s(), e = t.tokenUsageStack.length - 1;
|
|
81
|
+
return e < 0 ? 0 : t.tokenUsageStack[e] ?? 0;
|
|
82
|
+
}
|
|
83
|
+
function $t(t) {
|
|
84
|
+
if (t.length === 0)
|
|
85
|
+
return;
|
|
86
|
+
const e = s(), n = e.modelStack.length - 1;
|
|
87
|
+
n < 0 || (e.modelStack[n] = t);
|
|
88
|
+
}
|
|
89
|
+
function K() {
|
|
90
|
+
const t = s(), e = t.modelStack.length - 1;
|
|
91
|
+
if (!(e < 0))
|
|
92
|
+
return t.modelStack[e];
|
|
93
|
+
}
|
|
94
|
+
function wt(t) {
|
|
95
|
+
p.push(t);
|
|
96
|
+
}
|
|
97
|
+
function jt() {
|
|
98
|
+
k += 1;
|
|
99
|
+
}
|
|
100
|
+
function P() {
|
|
101
|
+
return k;
|
|
102
|
+
}
|
|
103
|
+
function z() {
|
|
104
|
+
k = 0;
|
|
105
|
+
}
|
|
106
|
+
function vt(t) {
|
|
107
|
+
m.push(t);
|
|
108
|
+
}
|
|
109
|
+
function E() {
|
|
110
|
+
return [...m];
|
|
111
|
+
}
|
|
112
|
+
function Jt() {
|
|
113
|
+
return m.length;
|
|
114
|
+
}
|
|
115
|
+
function D() {
|
|
116
|
+
m.length = 0;
|
|
117
|
+
}
|
|
118
|
+
async function _() {
|
|
119
|
+
const t = [];
|
|
120
|
+
for (; p.length > 0; ) {
|
|
121
|
+
const e = p.splice(0, p.length), n = await Promise.allSettled(e);
|
|
122
|
+
t.push(...n);
|
|
123
|
+
}
|
|
124
|
+
return t;
|
|
125
|
+
}
|
|
126
|
+
const q = "\x1B[1;36m", tt = "\x1B[33m", et = "\x1B[38;5;208m", nt = "\x1B[1;38;5;208m", h = "\x1B[0m";
|
|
127
|
+
function c(t) {
|
|
128
|
+
return `${q}${t}${h}`;
|
|
129
|
+
}
|
|
130
|
+
function f(t) {
|
|
131
|
+
return `${tt}${t}${h}`;
|
|
132
|
+
}
|
|
133
|
+
function w(t) {
|
|
134
|
+
return `${et}${t}${h}`;
|
|
135
|
+
}
|
|
136
|
+
function ot(t) {
|
|
137
|
+
return `${nt}${t}${h}`;
|
|
138
|
+
}
|
|
139
|
+
let A = "";
|
|
140
|
+
function it() {
|
|
141
|
+
A = "";
|
|
142
|
+
}
|
|
143
|
+
function st({
|
|
144
|
+
suitePath: t,
|
|
145
|
+
casePath: e,
|
|
146
|
+
didPass: n,
|
|
147
|
+
durationMs: o,
|
|
148
|
+
model: a,
|
|
149
|
+
tokenUsage: r
|
|
150
|
+
}) {
|
|
151
|
+
const l = t.length > 0 ? t : "(root)", g = e.length > 0 ? e : "(root)";
|
|
152
|
+
A !== l && (console.log(`Suite "${c(l)}"`), A = l);
|
|
153
|
+
const I = n ? "✅ Passed in" : "❌ Failed in", d = [
|
|
154
|
+
`Test "${c(g)}"`,
|
|
155
|
+
`- ${I} ${c(`${o}ms`)}`
|
|
156
|
+
];
|
|
157
|
+
a && d.push(`- Model ${c(a)}`), (r ?? 0) > 0 && d.push(`- Tokens used ${c(String(r))}`), d.push("---"), console.log(d.join(`
|
|
158
|
+
`));
|
|
159
|
+
}
|
|
160
|
+
function Bt(t, e, n = "(root)") {
|
|
161
|
+
const o = Q();
|
|
162
|
+
st({
|
|
163
|
+
suitePath: Y(),
|
|
164
|
+
casePath: o.length > 0 ? o : n,
|
|
165
|
+
didPass: t,
|
|
166
|
+
durationMs: e,
|
|
167
|
+
model: K(),
|
|
168
|
+
tokenUsage: V()
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
const at = new j(), S = new URL("data:application/json;base64,ewogICJuYW1lIjogImthdHQiLAogICJ2ZXJzaW9uIjogIjAuMC40IiwKICAiZGVzY3JpcHRpb24iOiAiQ0xJIHRvb2wgdGhhdCB0ZXN0cyB0aGUgb3V0cHV0IG9mIGFnZW50aWMgQUkgdG9vbHMiLAogICJrZXl3b3JkcyI6IFsKICAgICJjbGkiLAogICAgImFpIiwKICAgICJhZ2VudGljLWFpIiwKICAgICJ0ZXN0aW5nIiwKICAgICJldmFsdWF0aW9uIgogIF0sCiAgImF1dGhvciI6ICJSYXBoYWVsIFBvcnRvIChodHRwczovL2dpdGh1Yi5jb20vcmFwaGFlbHBvcikiLAogICJsaWNlbnNlIjogIk1JVCIsCiAgInR5cGUiOiAibW9kdWxlIiwKICAibWFpbiI6ICJkaXN0L2luZGV4LmpzIiwKICAiZXhwb3J0cyI6IHsKICAgICIuIjogewogICAgICAidHlwZXMiOiAiLi9kaXN0L2luZGV4LmQudHMiLAogICAgICAiaW1wb3J0IjogIi4vZGlzdC9pbmRleC5qcyIKICAgIH0KICB9LAogICJiaW4iOiB7CiAgICAia2F0dCI6ICJkaXN0L2thdHQuanMiCiAgfSwKICAic2NyaXB0cyI6IHsKICAgICJidWlsZCI6ICJ2aXRlIGJ1aWxkIiwKICAgICJkZXYiOiAidHN4IHNyYy9pbmRleC50cyIsCiAgICAibGludCI6ICJiaW9tZSBsaW50IC4vc3JjIiwKICAgICJmb3JtYXQiOiAiYmlvbWUgZm9ybWF0IC0td3JpdGUgLi9zcmMiLAogICAgInRlc3QiOiAidml0ZXN0IiwKICAgICJ0eXBlY2hlY2siOiAidHNjIC1wIHRzY29uZmlnLmpzb24gLS1ub0VtaXQiLAogICAgInRlc3Q6YnVpbGQiOiAibm9kZSAuL2Rpc3Qva2F0dC5qcyIKICB9LAogICJ0eXBlcyI6ICJkaXN0L2luZGV4LmQudHMiLAogICJkZXZEZXBlbmRlbmNpZXMiOiB7CiAgICAiQGJpb21lanMvYmlvbWUiOiAiMS45LjQiLAogICAgIkB0eXBlcy9ub2RlIjogIjI1LjIuMCIsCiAgICAidHN4IjogIjQuMjEuMCIsCiAgICAidHlwZXNjcmlwdCI6ICI1LjguMiIsCiAgICAidml0ZSI6ICI3LjMuMSIsCiAgICAidml0ZS1wbHVnaW4tZHRzIjogIjQuNS40IiwKICAgICJ2aXRlc3QiOiAiMy4yLjQiLAogICAgInZzY29kZS1qc29ucnBjIjogIl44LjIuMSIKICB9LAogICJkZXBlbmRlbmNpZXMiOiB7CiAgICAiQGdpdGh1Yi9jb3BpbG90LXNkayI6ICJeMC4xLjIxIgogIH0sCiAgImJ1Z3MiOiB7CiAgICAidXJsIjogImh0dHBzOi8vZ2l0aHViLmNvbS9yYXBoYWVscG9yL2thdHQvaXNzdWVzIgogIH0sCiAgImhvbWVwYWdlIjogImh0dHBzOi8vZ2l0aHViLmNvbS9yYXBoYWVscG9yL2thdHQiCn0K", import.meta.url);
|
|
172
|
+
let C;
|
|
173
|
+
function ct() {
|
|
174
|
+
if (C !== void 0)
|
|
175
|
+
return C;
|
|
176
|
+
try {
|
|
177
|
+
const t = S.protocol === "data:" ? rt(S) : M(x(S), "utf8"), e = JSON.parse(t);
|
|
178
|
+
C = typeof e.version == "string" ? e.version : "unknown";
|
|
179
|
+
} catch {
|
|
180
|
+
C = "unknown";
|
|
181
|
+
}
|
|
182
|
+
return C;
|
|
183
|
+
}
|
|
184
|
+
function rt(t) {
|
|
185
|
+
const e = t.pathname.indexOf(",");
|
|
186
|
+
if (e < 0)
|
|
187
|
+
throw new Error("Invalid data URL.");
|
|
188
|
+
const n = t.pathname.slice(0, e), o = t.pathname.slice(e + 1);
|
|
189
|
+
return n.includes(";base64") ? Buffer.from(o, "base64").toString("utf8") : decodeURIComponent(o);
|
|
190
|
+
}
|
|
191
|
+
function lt() {
|
|
192
|
+
const t = " ██╗ ██╗ █████╗ ████████╗████████╗", e = " ██║ ██╔╝██╔══██╗╚══██╔══╝╚══██╔══╝", n = " █████╔╝ ███████║ ██║ ██║", o = " ██╔═██╗ ██╔══██║ ██║ ██║", a = " ██║ ██╗██║ ██║ ██║ ██║", r = " ╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═╝", l = `v${ct()}`, g = Math.max(
|
|
193
|
+
0,
|
|
194
|
+
Math.floor((t.length - l.length) / 2)
|
|
195
|
+
), I = `${" ".repeat(g)}${l}`;
|
|
196
|
+
console.log(`
|
|
197
|
+
${f(t)}
|
|
198
|
+
${f(e)}
|
|
199
|
+
${f(n)}
|
|
200
|
+
${w(o)}
|
|
201
|
+
${w(a)}
|
|
202
|
+
${ot(r)}
|
|
203
|
+
${f(I)}
|
|
204
|
+
`);
|
|
205
|
+
}
|
|
206
|
+
let Z = !1;
|
|
207
|
+
function ut(t) {
|
|
208
|
+
Z = t;
|
|
209
|
+
}
|
|
210
|
+
function Zt() {
|
|
211
|
+
return Z;
|
|
212
|
+
}
|
|
213
|
+
function gt(t) {
|
|
214
|
+
const e = String(t.getHours()).padStart(2, "0"), n = String(t.getMinutes()).padStart(2, "0"), o = String(t.getSeconds()).padStart(2, "0");
|
|
215
|
+
return `${e}:${n}:${o}`;
|
|
216
|
+
}
|
|
217
|
+
async function Ft() {
|
|
218
|
+
const t = process.argv.slice(2), e = t.includes("--update-snapshots") || t.includes("-u");
|
|
219
|
+
ut(e), lt();
|
|
220
|
+
const n = /* @__PURE__ */ new Date();
|
|
221
|
+
it(), D(), z();
|
|
222
|
+
const o = await v(process.cwd());
|
|
223
|
+
if (o.length === 0)
|
|
224
|
+
return console.log("No .eval.js or .eval.ts files found."), 1;
|
|
225
|
+
const r = (await Promise.allSettled(
|
|
226
|
+
o.map(
|
|
227
|
+
(i) => at.run(
|
|
228
|
+
{ evalFile: i },
|
|
229
|
+
() => import(y(i).href)
|
|
230
|
+
)
|
|
231
|
+
)
|
|
232
|
+
)).map((i, u) => ({ result: i, file: o[u] })).filter(({ result: i }) => i.status === "rejected");
|
|
233
|
+
if (r.length > 0) {
|
|
234
|
+
for (const i of r) {
|
|
235
|
+
const u = i.result.status === "rejected" ? i.result.reason : void 0;
|
|
236
|
+
console.error(`Error executing ${i.file}: ${String(u)}`);
|
|
237
|
+
}
|
|
238
|
+
return 1;
|
|
239
|
+
}
|
|
240
|
+
const g = (await _()).filter(
|
|
241
|
+
(i) => i.status === "rejected"
|
|
242
|
+
);
|
|
243
|
+
if (g.length > 0) {
|
|
244
|
+
for (const i of g)
|
|
245
|
+
i.status === "rejected" && console.error(`Error executing async test: ${String(i.reason)}`);
|
|
246
|
+
return 1;
|
|
247
|
+
}
|
|
248
|
+
const I = E();
|
|
249
|
+
if (I.length > 0) {
|
|
250
|
+
console.error("❌ Failed tests:");
|
|
251
|
+
for (const [i, u] of I.entries()) {
|
|
252
|
+
const b = [u.describePath, u.itPath].filter((T) => T.length > 0).join(" > "), N = b.length > 0 ? `${b}: ` : "";
|
|
253
|
+
console.error(`${i + 1}. ${N}${u.message}`);
|
|
254
|
+
}
|
|
255
|
+
return 1;
|
|
256
|
+
}
|
|
257
|
+
const d = P(), F = Date.now() - n.getTime();
|
|
258
|
+
return console.log(
|
|
259
|
+
[
|
|
260
|
+
"---",
|
|
261
|
+
`${c("Files")} ${o.length} passed`,
|
|
262
|
+
`${c("Evals")} ${d} passed`,
|
|
263
|
+
`${c("Start at")} ${gt(n)}`,
|
|
264
|
+
`${c("Duration")} ${F}ms`
|
|
265
|
+
].join(`
|
|
266
|
+
`)
|
|
267
|
+
), 0;
|
|
268
|
+
}
|
|
269
|
+
export {
|
|
270
|
+
jt as a,
|
|
271
|
+
wt as b,
|
|
272
|
+
ht as c,
|
|
273
|
+
bt as d,
|
|
274
|
+
St as e,
|
|
275
|
+
At as f,
|
|
276
|
+
Jt as g,
|
|
277
|
+
Lt as h,
|
|
278
|
+
at as i,
|
|
279
|
+
vt as j,
|
|
280
|
+
Q as k,
|
|
281
|
+
Bt as l,
|
|
282
|
+
Y as m,
|
|
283
|
+
Zt as n,
|
|
284
|
+
Ft as o,
|
|
285
|
+
kt as p,
|
|
286
|
+
mt as r,
|
|
287
|
+
$t as s
|
|
288
|
+
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "katt",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.5",
|
|
4
4
|
"description": "CLI tool that tests the output of agentic AI tools",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"cli",
|
|
@@ -13,8 +13,14 @@
|
|
|
13
13
|
"license": "MIT",
|
|
14
14
|
"type": "module",
|
|
15
15
|
"main": "dist/index.js",
|
|
16
|
+
"exports": {
|
|
17
|
+
".": {
|
|
18
|
+
"types": "./dist/index.d.ts",
|
|
19
|
+
"import": "./dist/index.js"
|
|
20
|
+
}
|
|
21
|
+
},
|
|
16
22
|
"bin": {
|
|
17
|
-
"katt": "dist/
|
|
23
|
+
"katt": "dist/katt.js"
|
|
18
24
|
},
|
|
19
25
|
"scripts": {
|
|
20
26
|
"build": "vite build",
|
|
@@ -23,7 +29,7 @@
|
|
|
23
29
|
"format": "biome format --write ./src",
|
|
24
30
|
"test": "vitest",
|
|
25
31
|
"typecheck": "tsc -p tsconfig.json --noEmit",
|
|
26
|
-
"test:build": "node ./dist/
|
|
32
|
+
"test:build": "node ./dist/katt.js"
|
|
27
33
|
},
|
|
28
34
|
"types": "dist/index.d.ts",
|
|
29
35
|
"devDependencies": {
|