katt 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.nvmrc +1 -0
- package/README.md +29 -6
- package/build-tests/__snapshots__/check1.snap.md +1 -0
- package/build-tests/__snapshots__/check1__Hello_World__should_return_the_date_in_a_json_format.snap.md +1 -0
- package/build-tests/__snapshots__/check1__root.snap.md +1 -0
- package/build-tests/check1.eval.js +19 -0
- package/build-tests/check2.eval.js +15 -0
- package/build-tests/customPrompt.md +1 -0
- package/dist/index.js +225 -167
- package/dist/katt.js +1 -1
- package/dist/runCli-425rgVp8.js +424 -0
- package/katt-codex.json +4 -0
- package/package.json +8 -7
- package/renovate.json +6 -0
- package/dist/runCli-C7uxWavX.js +0 -312
package/.nvmrc
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
24
|
package/README.md
CHANGED
|
@@ -68,6 +68,7 @@ describe("Greeting agent", () => {
|
|
|
68
68
|
- **Classification Matcher**: Built-in `toBeClassifiedAs()` matcher to grade a response against a target label on a 1-5 scale
|
|
69
69
|
- **Concurrent Execution**: Runs eval files concurrently for faster test execution
|
|
70
70
|
- **Model Selection**: Support for specifying custom AI models
|
|
71
|
+
- **Runtime Selection**: Run prompts through GitHub Copilot (default) or Codex
|
|
71
72
|
- **Configurable Timeouts**: Override prompt wait time per test or via `katt.json`
|
|
72
73
|
|
|
73
74
|
## Usage
|
|
@@ -127,11 +128,14 @@ describe("Model selection", () => {
|
|
|
127
128
|
});
|
|
128
129
|
```
|
|
129
130
|
|
|
130
|
-
You can also set
|
|
131
|
+
You can also set runtime defaults in `katt.json`.
|
|
132
|
+
|
|
133
|
+
Copilot (default runtime):
|
|
131
134
|
|
|
132
135
|
```json
|
|
133
136
|
{
|
|
134
|
-
"
|
|
137
|
+
"agent": "gh-copilot",
|
|
138
|
+
"agentOptions": {
|
|
135
139
|
"model": "gpt-5-mini"
|
|
136
140
|
},
|
|
137
141
|
"prompt": {
|
|
@@ -140,10 +144,29 @@ You can also set a default model for the project by adding a `katt.json` file in
|
|
|
140
144
|
}
|
|
141
145
|
```
|
|
142
146
|
|
|
147
|
+
Codex:
|
|
148
|
+
|
|
149
|
+
```json
|
|
150
|
+
{
|
|
151
|
+
"agent": "codex",
|
|
152
|
+
"agentOptions": {
|
|
153
|
+
"model": "gpt-5-codex",
|
|
154
|
+
"profile": "default",
|
|
155
|
+
"sandbox": "workspace-write"
|
|
156
|
+
},
|
|
157
|
+
"prompt": {
|
|
158
|
+
"timeoutMs": 240000
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
```
|
|
162
|
+
|
|
143
163
|
When this file exists:
|
|
144
164
|
|
|
145
|
-
-
|
|
146
|
-
- `
|
|
165
|
+
- Supported agents are:
|
|
166
|
+
- `gh-copilot` (default when `agent` is missing or unsupported)
|
|
167
|
+
- `codex`
|
|
168
|
+
- `prompt("...")` and `promptFile("...")` merge `agentOptions` with call-time options
|
|
169
|
+
- `prompt("...", { model: "..." })` overrides the model from config
|
|
147
170
|
- `prompt.timeoutMs` sets the default wait timeout for long-running prompts
|
|
148
171
|
|
|
149
172
|
## Development
|
|
@@ -201,8 +224,8 @@ katt/
|
|
|
201
224
|
## Requirements
|
|
202
225
|
|
|
203
226
|
- Node.js
|
|
204
|
-
-
|
|
205
|
-
-
|
|
227
|
+
- For `gh-copilot` runtime: access to GitHub Copilot with a logged-in user
|
|
228
|
+
- For `codex` runtime: Codex CLI installed and authenticated (`codex login`)
|
|
206
229
|
|
|
207
230
|
## License
|
|
208
231
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
heeey
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{ year: 2026 }
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
heeey
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { describe, expect, it, prompt } from "katt";
|
|
2
|
+
|
|
3
|
+
describe('Hello World', () => {
|
|
4
|
+
it('should return the date in a json format', async () => {
|
|
5
|
+
const currentData = new Date(Date.now());
|
|
6
|
+
|
|
7
|
+
const result = await prompt('Return the current year in the format "{ year: YYYY }"');
|
|
8
|
+
expect(result).toContain(`{ year: ${currentData.getFullYear()} }`);
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
it('should classify a response as helpful', async () => {
|
|
12
|
+
const response = await prompt('You are a helpful assistant. Give one short tip for learning JavaScript.');
|
|
13
|
+
await expect(response).toBeClassifiedAs('helpful', { threshold: 3 });
|
|
14
|
+
});
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
const result2 = await prompt('If you read this just say heeey');
|
|
19
|
+
expect(result2.toLowerCase()).toMatchSnapshot();
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { describe, expect, it, prompt, promptFile } from "katt";
|
|
2
|
+
|
|
3
|
+
describe('Working with files', () => {
|
|
4
|
+
it('It should load the file and compare', async () => {
|
|
5
|
+
const result = await promptFile('./customPrompt.md');
|
|
6
|
+
expect(result.toLowerCase()).toContain('hola');
|
|
7
|
+
});
|
|
8
|
+
});
|
|
9
|
+
|
|
10
|
+
describe('Working with prompt as expectation', () => {
|
|
11
|
+
it('It should be friendly', async () => {
|
|
12
|
+
const result = await prompt('You are a friendly assistant. If you read this, say "Hola"!', { model: 'gpt-5.2' });
|
|
13
|
+
expect(result).promptCheck('To be friendly, the response should contain a greeting.');
|
|
14
|
+
});
|
|
15
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
If you read this, say "Hola"!
|
package/dist/index.js
CHANGED
|
@@ -1,191 +1,249 @@
|
|
|
1
|
-
import { r as
|
|
2
|
-
import {
|
|
3
|
-
import { CopilotClient as
|
|
4
|
-
import { readFile as
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
1
|
+
import { r as N, c as I, a as j, p as z, b as C, l as h, d as T, g as D, e as B, f as $, h as H, s as A, i as W, j as L, k as G, m as X, n as Y, o as q, q as K, t as J } from "./runCli-425rgVp8.js";
|
|
2
|
+
import { u as jt } from "./runCli-425rgVp8.js";
|
|
3
|
+
import { CopilotClient as Q } from "@github/copilot-sdk";
|
|
4
|
+
import { mkdtemp as V, rm as Z, readFile as R } from "node:fs/promises";
|
|
5
|
+
import { join as S, dirname as v, isAbsolute as tt, resolve as et, basename as nt } from "node:path";
|
|
6
|
+
import { spawn as ot } from "node:child_process";
|
|
7
|
+
import { tmpdir as rt } from "node:os";
|
|
8
|
+
import { readFileSync as st, writeFileSync as M, mkdirSync as it } from "node:fs";
|
|
9
|
+
function Pt(t, e) {
|
|
10
|
+
N(() => {
|
|
11
|
+
j(), z(t);
|
|
12
|
+
const n = D(), o = Date.now(), i = () => D() === n, s = () => Date.now() - o;
|
|
11
13
|
try {
|
|
12
|
-
const
|
|
13
|
-
if (
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
14
|
+
const r = e();
|
|
15
|
+
if (r && typeof r.then == "function") {
|
|
16
|
+
C(
|
|
17
|
+
r.then(() => {
|
|
18
|
+
h(!0, s());
|
|
17
19
|
}).catch((a) => {
|
|
18
|
-
throw
|
|
20
|
+
throw h(!1, s()), a;
|
|
19
21
|
}).finally(() => {
|
|
20
|
-
|
|
22
|
+
T();
|
|
21
23
|
})
|
|
22
24
|
);
|
|
23
25
|
return;
|
|
24
26
|
}
|
|
25
|
-
} catch (
|
|
26
|
-
throw
|
|
27
|
+
} catch (r) {
|
|
28
|
+
throw h(!1, s()), T(), r;
|
|
27
29
|
}
|
|
28
|
-
|
|
29
|
-
},
|
|
30
|
+
h(i(), s()), T();
|
|
31
|
+
}, I());
|
|
30
32
|
}
|
|
31
|
-
function
|
|
32
|
-
|
|
33
|
-
|
|
33
|
+
function Nt(t, e) {
|
|
34
|
+
N(() => {
|
|
35
|
+
B(t);
|
|
34
36
|
try {
|
|
35
37
|
const n = e();
|
|
36
38
|
if (n && typeof n.then == "function") {
|
|
37
|
-
|
|
39
|
+
C(
|
|
38
40
|
n.finally(() => {
|
|
39
|
-
|
|
41
|
+
$();
|
|
40
42
|
})
|
|
41
43
|
);
|
|
42
44
|
return;
|
|
43
45
|
}
|
|
44
46
|
} catch (n) {
|
|
45
|
-
throw
|
|
47
|
+
throw $(), n;
|
|
46
48
|
}
|
|
47
|
-
|
|
48
|
-
},
|
|
49
|
+
$();
|
|
50
|
+
}, I());
|
|
49
51
|
}
|
|
50
|
-
|
|
52
|
+
const at = "katt-codex-", ct = "last-message.txt";
|
|
53
|
+
function ut(t, e) {
|
|
51
54
|
return typeof t == "object" && t !== null && "code" in t && t.code === e;
|
|
52
55
|
}
|
|
53
|
-
function
|
|
54
|
-
|
|
55
|
-
const e = JSON.parse(t);
|
|
56
|
-
return typeof e == "object" && e !== null ? e : void 0;
|
|
57
|
-
} catch (e) {
|
|
58
|
-
console.warn(`Failed to parse katt.json: ${String(e)}`);
|
|
59
|
-
return;
|
|
60
|
-
}
|
|
56
|
+
function m(t) {
|
|
57
|
+
return typeof t == "string" && t.length > 0;
|
|
61
58
|
}
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
try {
|
|
65
|
-
const e = await j(t, "utf8");
|
|
66
|
-
return et(e);
|
|
67
|
-
} catch (e) {
|
|
68
|
-
if (tt(e, "ENOENT"))
|
|
69
|
-
return;
|
|
70
|
-
console.warn(`Failed to read katt.json: ${String(e)}`);
|
|
71
|
-
return;
|
|
72
|
-
}
|
|
59
|
+
function dt(t) {
|
|
60
|
+
return m(t) ? [t] : Array.isArray(t) ? t.filter(m) : [];
|
|
73
61
|
}
|
|
74
|
-
function
|
|
75
|
-
const e =
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
62
|
+
function lt(t, e) {
|
|
63
|
+
const n = e ?? {}, o = [
|
|
64
|
+
"exec",
|
|
65
|
+
"--color",
|
|
66
|
+
"never",
|
|
67
|
+
"--output-last-message",
|
|
68
|
+
t
|
|
69
|
+
];
|
|
70
|
+
m(n.model) && o.push("--model", n.model), m(n.profile) && o.push("--profile", n.profile), m(n.sandbox) && o.push("--sandbox", n.sandbox), n.fullAuto === !0 && o.push("--full-auto"), n.skipGitRepoCheck === !0 && o.push("--skip-git-repo-check"), n.dangerouslyBypassApprovalsAndSandbox === !0 && o.push("--dangerously-bypass-approvals-and-sandbox");
|
|
71
|
+
for (const i of dt(n.config))
|
|
72
|
+
o.push("--config", i);
|
|
73
|
+
return o.push("-"), o;
|
|
82
74
|
}
|
|
83
|
-
function
|
|
84
|
-
|
|
85
|
-
|
|
75
|
+
function ft(t, e, n, o) {
|
|
76
|
+
return new Promise((i, s) => {
|
|
77
|
+
const r = ot("codex", e, {
|
|
78
|
+
cwd: o,
|
|
79
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
80
|
+
});
|
|
81
|
+
let a = "", c = "", p = !1;
|
|
82
|
+
r.stdout.setEncoding("utf8"), r.stderr.setEncoding("utf8"), r.stdout.on("data", (u) => {
|
|
83
|
+
a += u;
|
|
84
|
+
}), r.stderr.on("data", (u) => {
|
|
85
|
+
c += u;
|
|
86
|
+
}), r.stdin.on("error", () => {
|
|
87
|
+
});
|
|
88
|
+
const g = setTimeout(() => {
|
|
89
|
+
p = !0, r.kill("SIGTERM");
|
|
90
|
+
}, n);
|
|
91
|
+
r.once("error", (u) => {
|
|
92
|
+
clearTimeout(g), s(
|
|
93
|
+
new Error(
|
|
94
|
+
`Failed to start Codex CLI. Ensure codex is installed and available on PATH. ${String(
|
|
95
|
+
u
|
|
96
|
+
)}`
|
|
97
|
+
)
|
|
98
|
+
);
|
|
99
|
+
}), r.once("close", (u, x) => {
|
|
100
|
+
clearTimeout(g), i({
|
|
101
|
+
exitCode: u,
|
|
102
|
+
signal: x,
|
|
103
|
+
stdout: a.trim(),
|
|
104
|
+
stderr: c.trim(),
|
|
105
|
+
timedOut: p
|
|
106
|
+
});
|
|
107
|
+
}), r.stdin.end(t);
|
|
108
|
+
});
|
|
86
109
|
}
|
|
87
|
-
function
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
110
|
+
async function pt(t, e) {
|
|
111
|
+
try {
|
|
112
|
+
return await R(t, "utf8");
|
|
113
|
+
} catch (n) {
|
|
114
|
+
if (!ut(n, "ENOENT"))
|
|
115
|
+
throw n;
|
|
116
|
+
return e;
|
|
117
|
+
}
|
|
91
118
|
}
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
};
|
|
119
|
+
function ht(t) {
|
|
120
|
+
if (t.timedOut)
|
|
121
|
+
return "Codex timed out before returning a response.";
|
|
122
|
+
if (t.exitCode === null)
|
|
123
|
+
return `Codex exited due to signal ${t.signal ?? "unknown"}.`;
|
|
124
|
+
const e = t.stderr.length > 0 ? ` ${t.stderr}` : "";
|
|
125
|
+
return `Codex exited with code ${t.exitCode}.${e}`;
|
|
98
126
|
}
|
|
99
|
-
|
|
100
|
-
|
|
127
|
+
async function mt(t, e, n) {
|
|
128
|
+
const o = n ?? {}, i = m(o.workingDirectory) ? o.workingDirectory : process.cwd(), s = await V(S(rt(), at)), r = S(s, ct);
|
|
129
|
+
try {
|
|
130
|
+
const a = lt(r, n), c = await ft(
|
|
131
|
+
t,
|
|
132
|
+
a,
|
|
133
|
+
e,
|
|
134
|
+
i
|
|
135
|
+
);
|
|
136
|
+
if (c.timedOut)
|
|
137
|
+
throw new Error(`Codex timed out after ${e}ms.`);
|
|
138
|
+
if (c.exitCode !== 0)
|
|
139
|
+
throw new Error(ht(c));
|
|
140
|
+
const p = await pt(r, c.stdout);
|
|
141
|
+
if (p.length === 0)
|
|
142
|
+
throw new Error("Codex did not return a response.");
|
|
143
|
+
return p;
|
|
144
|
+
} finally {
|
|
145
|
+
await Z(s, { recursive: !0, force: !0 });
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
const gt = 6e5;
|
|
149
|
+
function U(t) {
|
|
101
150
|
return typeof t == "string" && t.length > 0 ? t : void 0;
|
|
102
151
|
}
|
|
103
|
-
function
|
|
152
|
+
function E(t) {
|
|
104
153
|
if (!t)
|
|
105
154
|
return;
|
|
106
155
|
const e = { ...t };
|
|
107
156
|
if (e.model !== void 0) {
|
|
108
|
-
const n =
|
|
157
|
+
const n = U(
|
|
158
|
+
typeof e.model == "string" ? e.model : void 0
|
|
159
|
+
);
|
|
109
160
|
n ? e.model = n : delete e.model;
|
|
110
161
|
}
|
|
111
162
|
return Object.keys(e).length > 0 ? e : void 0;
|
|
112
163
|
}
|
|
113
|
-
function
|
|
164
|
+
function O(t) {
|
|
114
165
|
if (!(typeof t != "number" || !Number.isFinite(t)) && !(t <= 0))
|
|
115
166
|
return Math.floor(t);
|
|
116
167
|
}
|
|
117
|
-
function
|
|
168
|
+
function y(t) {
|
|
118
169
|
return !Number.isFinite(t) || (t ?? 0) <= 0 ? 0 : Math.floor(t ?? 0);
|
|
119
170
|
}
|
|
120
|
-
function
|
|
121
|
-
return
|
|
171
|
+
function xt(t) {
|
|
172
|
+
return y(t.inputTokens) + y(t.outputTokens) + y(t.cacheReadTokens) + y(t.cacheWriteTokens);
|
|
122
173
|
}
|
|
123
|
-
async function
|
|
124
|
-
const { timeoutMs: n, ...o } = e, i = await
|
|
174
|
+
async function F(t, e = {}) {
|
|
175
|
+
const { timeoutMs: n, ...o } = e, i = await H(), s = E(i.agentOptions), r = E(
|
|
125
176
|
o
|
|
126
|
-
), a =
|
|
127
|
-
...
|
|
128
|
-
...
|
|
129
|
-
}), c =
|
|
130
|
-
|
|
177
|
+
), a = E({
|
|
178
|
+
...s ?? {},
|
|
179
|
+
...r ?? {}
|
|
180
|
+
}), c = O(i.promptTimeoutMs), g = O(n) ?? c ?? gt, u = U(
|
|
181
|
+
typeof a?.model == "string" ? a.model : void 0
|
|
182
|
+
);
|
|
183
|
+
if (i.agent === "codex") {
|
|
184
|
+
const d = await mt(t, g, a);
|
|
185
|
+
return u && A(u), d;
|
|
186
|
+
}
|
|
187
|
+
const x = new Q({ useLoggedInUser: !0 });
|
|
188
|
+
let w, k, b = 0;
|
|
131
189
|
try {
|
|
132
|
-
await
|
|
133
|
-
|
|
190
|
+
await x.start(), w = await x.createSession(a), k = w.on("assistant.usage", (f) => {
|
|
191
|
+
b += xt(f.data);
|
|
134
192
|
});
|
|
135
|
-
const
|
|
136
|
-
if (!
|
|
193
|
+
const d = await w.sendAndWait({ prompt: t }, g);
|
|
194
|
+
if (!d?.data?.content)
|
|
137
195
|
throw new Error("Copilot did not return a response.");
|
|
138
|
-
return
|
|
196
|
+
return u && A(u), d.data.content;
|
|
139
197
|
} finally {
|
|
140
|
-
const
|
|
141
|
-
if (
|
|
198
|
+
const d = [];
|
|
199
|
+
if (k?.(), b > 0 && W(b), w)
|
|
142
200
|
try {
|
|
143
|
-
await
|
|
201
|
+
await w.destroy();
|
|
144
202
|
} catch (f) {
|
|
145
|
-
|
|
203
|
+
d.push(f);
|
|
146
204
|
}
|
|
147
205
|
try {
|
|
148
|
-
const f = await
|
|
149
|
-
|
|
206
|
+
const f = await x.stop();
|
|
207
|
+
d.push(...f);
|
|
150
208
|
} catch (f) {
|
|
151
|
-
|
|
209
|
+
d.push(f);
|
|
152
210
|
}
|
|
153
|
-
|
|
154
|
-
`Copilot cleanup encountered ${
|
|
211
|
+
d.length > 0 && console.error(
|
|
212
|
+
`Copilot cleanup encountered ${d.length} error(s).`
|
|
155
213
|
);
|
|
156
214
|
}
|
|
157
215
|
}
|
|
158
|
-
async function
|
|
159
|
-
const n =
|
|
160
|
-
return
|
|
216
|
+
async function It(t, e = {}) {
|
|
217
|
+
const n = L.getStore(), o = n?.evalFile ? v(n.evalFile) : process.cwd(), i = tt(t) ? t : et(o, t), s = await R(i, "utf8");
|
|
218
|
+
return F(s, e);
|
|
161
219
|
}
|
|
162
|
-
function
|
|
163
|
-
|
|
164
|
-
describePath:
|
|
165
|
-
itPath:
|
|
220
|
+
function l(t) {
|
|
221
|
+
G({
|
|
222
|
+
describePath: Y(),
|
|
223
|
+
itPath: X(),
|
|
166
224
|
message: t
|
|
167
225
|
});
|
|
168
226
|
}
|
|
169
|
-
async function
|
|
170
|
-
const n = Date.now(), o = `expected '${t}' to satisfy '${e}'`, i =
|
|
227
|
+
async function wt(t, e) {
|
|
228
|
+
const n = Date.now(), o = `expected '${t}' to satisfy '${e}'`, i = F(`Evaluate if the expectation is fulfiled in by the input.
|
|
171
229
|
Expectation: "${e}".
|
|
172
230
|
Input:
|
|
173
231
|
---
|
|
174
232
|
${t}
|
|
175
233
|
---
|
|
176
234
|
Important: Answer with "Yes" or "No" only, without any additional text.
|
|
177
|
-
`).then((
|
|
178
|
-
|
|
235
|
+
`).then((s) => {
|
|
236
|
+
s.includes("Yes") ? h(!0, Date.now() - n, "promptCheck") : s.includes("No") ? l(o) : l(`failed to evaluate expectation '${e}'`);
|
|
179
237
|
});
|
|
180
|
-
return
|
|
238
|
+
return C(i), i;
|
|
181
239
|
}
|
|
182
|
-
const
|
|
183
|
-
function
|
|
240
|
+
const yt = 3;
|
|
241
|
+
function Ct(t) {
|
|
184
242
|
const e = t.match(/\b([1-5])\b/);
|
|
185
243
|
return !e || !e[1] ? null : Number(e[1]);
|
|
186
244
|
}
|
|
187
|
-
async function
|
|
188
|
-
const o = Date.now(), i = n?.threshold ??
|
|
245
|
+
async function bt(t, e, n) {
|
|
246
|
+
const o = Date.now(), i = n?.threshold ?? yt, s = F(
|
|
189
247
|
`Classify the input by how "${e}" it is on a scale of 1 to 5.
|
|
190
248
|
1 means "not ${e}" and 5 means "very ${e}".
|
|
191
249
|
Return only a single number: 1, 2, 3, 4, or 5.
|
|
@@ -195,94 +253,94 @@ Input:
|
|
|
195
253
|
${t}
|
|
196
254
|
---`,
|
|
197
255
|
n?.model ? { model: n.model } : void 0
|
|
198
|
-
).then((
|
|
199
|
-
const a =
|
|
256
|
+
).then((r) => {
|
|
257
|
+
const a = Ct(r);
|
|
200
258
|
if (a === null) {
|
|
201
|
-
|
|
202
|
-
`failed to classify as '${e}'. Evaluator returned '${
|
|
259
|
+
l(
|
|
260
|
+
`failed to classify as '${e}'. Evaluator returned '${r}'`
|
|
203
261
|
);
|
|
204
262
|
return;
|
|
205
263
|
}
|
|
206
264
|
const c = `expected response to be classified as '${e}' with score >= ${i}, got ${a}`;
|
|
207
265
|
if (a < i) {
|
|
208
|
-
|
|
266
|
+
l(c);
|
|
209
267
|
return;
|
|
210
268
|
}
|
|
211
|
-
|
|
269
|
+
h(
|
|
212
270
|
!0,
|
|
213
271
|
Date.now() - o,
|
|
214
272
|
"toBeClassifiedAs"
|
|
215
273
|
);
|
|
216
274
|
});
|
|
217
|
-
return
|
|
275
|
+
return C(s), s;
|
|
218
276
|
}
|
|
219
|
-
function
|
|
277
|
+
function Tt(t, e) {
|
|
220
278
|
const n = `expected '${t}' to include '${e}'`;
|
|
221
|
-
t.includes(e) ||
|
|
279
|
+
t.includes(e) || l(n);
|
|
222
280
|
}
|
|
223
|
-
function
|
|
281
|
+
function _(t) {
|
|
224
282
|
const e = t.trim().replace(/[<>:"/\\|?*\x00-\x1f]/g, "_").replace(/\s+/g, "_");
|
|
225
283
|
return e.length > 0 ? e : "unnamed";
|
|
226
284
|
}
|
|
227
|
-
function
|
|
228
|
-
const t =
|
|
229
|
-
(o) =>
|
|
230
|
-
), e =
|
|
231
|
-
(o) =>
|
|
285
|
+
function $t() {
|
|
286
|
+
const t = K().map(
|
|
287
|
+
(o) => _(o.description)
|
|
288
|
+
), e = J().map(
|
|
289
|
+
(o) => _(o.description)
|
|
232
290
|
), n = [...t, ...e];
|
|
233
291
|
return n.length === 0 ? "root" : n.join("__");
|
|
234
292
|
}
|
|
235
|
-
function
|
|
236
|
-
const n =
|
|
237
|
-
return
|
|
293
|
+
function Et(t) {
|
|
294
|
+
const n = nt(t).replace(/\.eval\.[^./\\]+$/, ""), o = $t();
|
|
295
|
+
return S(
|
|
238
296
|
v(t),
|
|
239
297
|
"__snapshots__",
|
|
240
298
|
`${n}__${o}.snap.md`
|
|
241
299
|
);
|
|
242
300
|
}
|
|
243
|
-
function
|
|
301
|
+
function P(t) {
|
|
244
302
|
return t.split(/\r?\n/);
|
|
245
303
|
}
|
|
246
|
-
function
|
|
304
|
+
function St(t, e) {
|
|
247
305
|
if (t === e)
|
|
248
306
|
return " (no diff)";
|
|
249
|
-
const n =
|
|
250
|
-
for (let
|
|
251
|
-
const a = n[
|
|
307
|
+
const n = P(t), o = P(e), i = Math.max(n.length, o.length), s = [];
|
|
308
|
+
for (let r = 0; r < i; r += 1) {
|
|
309
|
+
const a = n[r], c = o[r];
|
|
252
310
|
if (a !== c) {
|
|
253
311
|
if (a === void 0 && c !== void 0) {
|
|
254
|
-
|
|
312
|
+
s.push(`+ ${c}`);
|
|
255
313
|
continue;
|
|
256
314
|
}
|
|
257
315
|
if (a !== void 0 && c === void 0) {
|
|
258
|
-
|
|
316
|
+
s.push(`- ${a}`);
|
|
259
317
|
continue;
|
|
260
318
|
}
|
|
261
|
-
|
|
319
|
+
s.push(`- ${a ?? ""}`), s.push(`+ ${c ?? ""}`);
|
|
262
320
|
}
|
|
263
321
|
}
|
|
264
|
-
return
|
|
322
|
+
return s.join(`
|
|
265
323
|
`);
|
|
266
324
|
}
|
|
267
|
-
function
|
|
268
|
-
const e =
|
|
325
|
+
function vt(t) {
|
|
326
|
+
const e = L.getStore()?.evalFile;
|
|
269
327
|
if (!e) {
|
|
270
|
-
|
|
328
|
+
l(
|
|
271
329
|
"toMatchSnapshot can only be used while running an eval file."
|
|
272
330
|
);
|
|
273
331
|
return;
|
|
274
332
|
}
|
|
275
|
-
const n =
|
|
333
|
+
const n = Et(e);
|
|
276
334
|
try {
|
|
277
|
-
const o =
|
|
335
|
+
const o = st(n, "utf8");
|
|
278
336
|
if (o === t)
|
|
279
337
|
return;
|
|
280
|
-
if (
|
|
281
|
-
|
|
338
|
+
if (q()) {
|
|
339
|
+
M(n, t, "utf8");
|
|
282
340
|
return;
|
|
283
341
|
}
|
|
284
|
-
const i =
|
|
285
|
-
|
|
342
|
+
const i = St(o, t);
|
|
343
|
+
l(
|
|
286
344
|
[
|
|
287
345
|
`Snapshot mismatch at ${n}`,
|
|
288
346
|
"",
|
|
@@ -295,41 +353,41 @@ function yt(t) {
|
|
|
295
353
|
);
|
|
296
354
|
} catch (o) {
|
|
297
355
|
if (o.code !== "ENOENT") {
|
|
298
|
-
|
|
356
|
+
l(
|
|
299
357
|
`Failed to read snapshot at ${n}: ${String(o)}`
|
|
300
358
|
);
|
|
301
359
|
return;
|
|
302
360
|
}
|
|
303
361
|
try {
|
|
304
|
-
|
|
305
|
-
} catch (
|
|
306
|
-
|
|
307
|
-
`Failed to write snapshot at ${n}: ${String(
|
|
362
|
+
it(v(n), { recursive: !0 }), M(n, t, "utf8");
|
|
363
|
+
} catch (s) {
|
|
364
|
+
l(
|
|
365
|
+
`Failed to write snapshot at ${n}: ${String(s)}`
|
|
308
366
|
);
|
|
309
367
|
}
|
|
310
368
|
}
|
|
311
369
|
}
|
|
312
|
-
function
|
|
370
|
+
function Lt(t) {
|
|
313
371
|
return {
|
|
314
372
|
toContain: (e) => {
|
|
315
|
-
|
|
373
|
+
Tt(t, e);
|
|
316
374
|
},
|
|
317
375
|
toMatchSnapshot: () => {
|
|
318
|
-
|
|
376
|
+
vt(t);
|
|
319
377
|
},
|
|
320
378
|
promptCheck: async (e) => {
|
|
321
|
-
await
|
|
379
|
+
await wt(t, e);
|
|
322
380
|
},
|
|
323
381
|
toBeClassifiedAs: async (e, n) => {
|
|
324
|
-
await
|
|
382
|
+
await bt(t, e, n);
|
|
325
383
|
}
|
|
326
384
|
};
|
|
327
385
|
}
|
|
328
386
|
export {
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
387
|
+
Nt as describe,
|
|
388
|
+
Lt as expect,
|
|
389
|
+
Pt as it,
|
|
390
|
+
F as prompt,
|
|
391
|
+
It as promptFile,
|
|
392
|
+
jt as runCli
|
|
335
393
|
};
|