katt 0.0.6 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.nvmrc +1 -0
- package/README.md +35 -7
- package/build-tests/__snapshots__/check1.snap.md +1 -0
- package/build-tests/__snapshots__/check1__Hello_World__should_return_the_date_in_a_json_format.snap.md +1 -0
- package/build-tests/__snapshots__/check1__root.snap.md +1 -0
- package/build-tests/check1.eval.js +19 -0
- package/build-tests/check2.eval.js +15 -0
- package/build-tests/customPrompt.md +1 -0
- package/dist/index.js +235 -199
- package/dist/katt.js +1 -1
- package/dist/runCli-425rgVp8.js +424 -0
- package/katt-codex.json +4 -0
- package/package.json +8 -7
- package/renovate.json +6 -0
- package/dist/runCli-B3oIBxOl.js +0 -317
package/.nvmrc
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
24
|
package/README.md
CHANGED
|
@@ -9,6 +9,7 @@ Katt is a lightweight testing framework for running AI Evals, inspired by [Jest]
|
|
|
9
9
|
|
|
10
10
|
- [Overview](#overview)
|
|
11
11
|
- [API Documentation](#api-documentation)
|
|
12
|
+
- [Articles](#articles)
|
|
12
13
|
- [Hello World - Example](#hello-world---example)
|
|
13
14
|
- [Main Features](#main-features)
|
|
14
15
|
- [Usage](#usage)
|
|
@@ -34,6 +35,10 @@ Katt is designed to evaluate and validate the behavior of AI agents like **Claud
|
|
|
34
35
|
|
|
35
36
|
For a complete list of features and usage examples, see [docs/api-documentation.md](https://github.com/raphaelpor/katt/blob/main/docs/api-documentation.md).
|
|
36
37
|
|
|
38
|
+
## Articles
|
|
39
|
+
|
|
40
|
+
- [Introducing Katt](https://github.com/raphaelpor/katt/blob/main/docs/articles/introduction-to-katt.md)
|
|
41
|
+
|
|
37
42
|
## Hello World - Example
|
|
38
43
|
|
|
39
44
|
```typescript
|
|
@@ -63,6 +68,7 @@ describe("Greeting agent", () => {
|
|
|
63
68
|
- **Classification Matcher**: Built-in `toBeClassifiedAs()` matcher to grade a response against a target label on a 1-5 scale
|
|
64
69
|
- **Concurrent Execution**: Runs eval files concurrently for faster test execution
|
|
65
70
|
- **Model Selection**: Support for specifying custom AI models
|
|
71
|
+
- **Runtime Selection**: Run prompts through GitHub Copilot (default) or Codex
|
|
66
72
|
- **Configurable Timeouts**: Override prompt wait time per test or via `katt.json`
|
|
67
73
|
|
|
68
74
|
## Usage
|
|
@@ -86,7 +92,7 @@ expect(result).toContain("hello world");
|
|
|
86
92
|
2. Run Katt from your project directory:
|
|
87
93
|
|
|
88
94
|
```bash
|
|
89
|
-
katt
|
|
95
|
+
npx katt
|
|
90
96
|
```
|
|
91
97
|
|
|
92
98
|
### Using promptFile
|
|
@@ -122,11 +128,14 @@ describe("Model selection", () => {
|
|
|
122
128
|
});
|
|
123
129
|
```
|
|
124
130
|
|
|
125
|
-
You can also set
|
|
131
|
+
You can also set runtime defaults in `katt.json`.
|
|
132
|
+
|
|
133
|
+
Copilot (default runtime):
|
|
126
134
|
|
|
127
135
|
```json
|
|
128
136
|
{
|
|
129
|
-
"
|
|
137
|
+
"agent": "gh-copilot",
|
|
138
|
+
"agentOptions": {
|
|
130
139
|
"model": "gpt-5-mini"
|
|
131
140
|
},
|
|
132
141
|
"prompt": {
|
|
@@ -135,10 +144,29 @@ You can also set a default model for the project by adding a `katt.json` file in
|
|
|
135
144
|
}
|
|
136
145
|
```
|
|
137
146
|
|
|
147
|
+
Codex:
|
|
148
|
+
|
|
149
|
+
```json
|
|
150
|
+
{
|
|
151
|
+
"agent": "codex",
|
|
152
|
+
"agentOptions": {
|
|
153
|
+
"model": "gpt-5-codex",
|
|
154
|
+
"profile": "default",
|
|
155
|
+
"sandbox": "workspace-write"
|
|
156
|
+
},
|
|
157
|
+
"prompt": {
|
|
158
|
+
"timeoutMs": 240000
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
```
|
|
162
|
+
|
|
138
163
|
When this file exists:
|
|
139
164
|
|
|
140
|
-
-
|
|
141
|
-
- `
|
|
165
|
+
- Supported agents are:
|
|
166
|
+
- `gh-copilot` (default when `agent` is missing or unsupported)
|
|
167
|
+
- `codex`
|
|
168
|
+
- `prompt("...")` and `promptFile("...")` merge `agentOptions` with call-time options
|
|
169
|
+
- `prompt("...", { model: "..." })` overrides the model from config
|
|
142
170
|
- `prompt.timeoutMs` sets the default wait timeout for long-running prompts
|
|
143
171
|
|
|
144
172
|
## Development
|
|
@@ -196,8 +224,8 @@ katt/
|
|
|
196
224
|
## Requirements
|
|
197
225
|
|
|
198
226
|
- Node.js
|
|
199
|
-
-
|
|
200
|
-
-
|
|
227
|
+
- For `gh-copilot` runtime: access to GitHub Copilot with a logged-in user
|
|
228
|
+
- For `codex` runtime: Codex CLI installed and authenticated (`codex login`)
|
|
201
229
|
|
|
202
230
|
## License
|
|
203
231
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
heeey
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{ year: 2026 }
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
heeey
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { describe, expect, it, prompt } from "katt";
|
|
2
|
+
|
|
3
|
+
describe('Hello World', () => {
|
|
4
|
+
it('should return the date in a json format', async () => {
|
|
5
|
+
const currentData = new Date(Date.now());
|
|
6
|
+
|
|
7
|
+
const result = await prompt('Return the current year in the format "{ year: YYYY }"');
|
|
8
|
+
expect(result).toContain(`{ year: ${currentData.getFullYear()} }`);
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
it('should classify a response as helpful', async () => {
|
|
12
|
+
const response = await prompt('You are a helpful assistant. Give one short tip for learning JavaScript.');
|
|
13
|
+
await expect(response).toBeClassifiedAs('helpful', { threshold: 3 });
|
|
14
|
+
});
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
const result2 = await prompt('If you read this just say heeey');
|
|
19
|
+
expect(result2.toLowerCase()).toMatchSnapshot();
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { describe, expect, it, prompt, promptFile } from "katt";
|
|
2
|
+
|
|
3
|
+
describe('Working with files', () => {
|
|
4
|
+
it('It should load the file and compare', async () => {
|
|
5
|
+
const result = await promptFile('./customPrompt.md');
|
|
6
|
+
expect(result.toLowerCase()).toContain('hola');
|
|
7
|
+
});
|
|
8
|
+
});
|
|
9
|
+
|
|
10
|
+
describe('Working with prompt as expectation', () => {
|
|
11
|
+
it('It should be friendly', async () => {
|
|
12
|
+
const result = await prompt('You are a friendly assistant. If you read this, say "Hola"!', { model: 'gpt-5.2' });
|
|
13
|
+
expect(result).promptCheck('To be friendly, the response should contain a greeting.');
|
|
14
|
+
});
|
|
15
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
If you read this, say "Hola"!
|