katt 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -19
- package/build-tests/check1.eval.js +0 -9
- package/build-tests/check2.eval.js +0 -7
- package/dist/index.js +286 -357
- package/dist/katt.js +6 -4
- package/dist/runCli-DkkiL_uk.js +388 -0
- package/package.json +7 -7
- package/build-tests/__snapshots__/check1.snap.md +0 -1
- package/build-tests/__snapshots__/check1__Hello_World__should_return_the_date_in_a_json_format.snap.md +0 -1
- package/build-tests/__snapshots__/check1__root.snap.md +0 -1
- package/dist/runCli-j5xhVCdB.js +0 -424
package/README.md
CHANGED
|
@@ -17,6 +17,8 @@ Katt is a lightweight testing framework for running AI Evals, inspired by [Jest]
|
|
|
17
17
|
- [Specifying AI Models](#specifying-ai-models)
|
|
18
18
|
- [Development](#development)
|
|
19
19
|
- [How It Works](#how-it-works)
|
|
20
|
+
- [Execution Flow](#execution-flow)
|
|
21
|
+
- [Architecture](#architecture)
|
|
20
22
|
- [Requirements](#requirements)
|
|
21
23
|
- [License](#license)
|
|
22
24
|
- [Contributing](#contributing)
|
|
@@ -125,7 +127,7 @@ describe("Model selection", () => {
|
|
|
125
127
|
|
|
126
128
|
You can also set runtime defaults in `katt.json`.
|
|
127
129
|
|
|
128
|
-
Copilot (default runtime):
|
|
130
|
+
GitHub Copilot (default runtime):
|
|
129
131
|
|
|
130
132
|
```json
|
|
131
133
|
{
|
|
@@ -184,29 +186,36 @@ npm install
|
|
|
184
186
|
|
|
185
187
|
### Verification Process
|
|
186
188
|
|
|
187
|
-
|
|
189
|
+
To verify your changes before opening a pull request, run:
|
|
188
190
|
|
|
189
|
-
1. `npm
|
|
191
|
+
1. `npm test`
|
|
190
192
|
2. `npm run typecheck`
|
|
191
|
-
3. `npm run
|
|
192
|
-
4. `npm run
|
|
193
|
-
5. `npm run test:build`
|
|
193
|
+
3. `npm run lint`
|
|
194
|
+
4. `npm run format`
|
|
194
195
|
|
|
195
|
-
|
|
196
|
+
For more details, see the [verification process section in CONTRIBUTING.md](./CONTRIBUTING.md#verification-process).
|
|
197
|
+
## How It Works
|
|
196
198
|
|
|
199
|
+
Katt runs eval files as executable test programs and coordinates collection, assertion failures, and reporting through its runtime context.
|
|
200
|
+
|
|
201
|
+
## Execution Flow
|
|
202
|
+
|
|
203
|
+
```mermaid
|
|
204
|
+
sequenceDiagram
|
|
205
|
+
participant User as User/CI
|
|
206
|
+
participant CLI as katt CLI
|
|
207
|
+
participant FS as File Scanner
|
|
208
|
+
participant Eval as Eval Runtime
|
|
209
|
+
participant Report as Reporter
|
|
210
|
+
|
|
211
|
+
User->>CLI: Run `npx katt`
|
|
212
|
+
CLI->>FS: Discover `*.eval.js` and `*.eval.ts`
|
|
213
|
+
FS-->>CLI: Return eval file list
|
|
214
|
+
CLI->>Eval: Execute eval files
|
|
215
|
+
Eval-->>CLI: Return pass/fail results
|
|
216
|
+
CLI->>Report: Print per-test output + summary
|
|
217
|
+
Report-->>User: Exit code (`0` pass, `1` fail)
|
|
197
218
|
```
|
|
198
|
-
katt/
|
|
199
|
-
├── src/ # Source code
|
|
200
|
-
│ ├── cli/ # CLI implementation
|
|
201
|
-
│ ├── lib/ # Core libraries (describe, it, expect, prompt)
|
|
202
|
-
│ └── types/ # TypeScript type definitions
|
|
203
|
-
├── examples/ # Example eval files
|
|
204
|
-
├── specs/ # Markdown specifications
|
|
205
|
-
├── package.json # Package configuration
|
|
206
|
-
└── tsconfig.json # TypeScript configuration
|
|
207
|
-
```
|
|
208
|
-
|
|
209
|
-
## How It Works
|
|
210
219
|
|
|
211
220
|
1. Katt searches the current directory recursively for `*.eval.js` and `*.eval.ts` files
|
|
212
221
|
2. It skips `.git` and `node_modules` directories
|
|
@@ -216,6 +225,22 @@ katt/
|
|
|
216
225
|
6. A summary is displayed showing passed/failed tests and total duration
|
|
217
226
|
7. Katt exits with code `0` on success or `1` on failure
|
|
218
227
|
|
|
228
|
+
## Architecture
|
|
229
|
+
|
|
230
|
+
```mermaid
|
|
231
|
+
flowchart LR
|
|
232
|
+
User["Developer"] --> CLI["katt CLI"]
|
|
233
|
+
CLI --> EvalFiles["Eval files (*.eval.ts / *.eval.js)"]
|
|
234
|
+
CLI --> Config["katt.json config"]
|
|
235
|
+
EvalFiles --> Runtime["Test runtime (describe/it context)"]
|
|
236
|
+
Config --> Runtime
|
|
237
|
+
Runtime --> Assertions["Assertions + snapshots"]
|
|
238
|
+
Runtime --> Prompts["prompt() / promptFile()"]
|
|
239
|
+
Prompts --> AI["AI runtime (GitHub Copilot or Codex CLI)"]
|
|
240
|
+
Assertions --> Report["Terminal report + exit code"]
|
|
241
|
+
AI --> Report
|
|
242
|
+
```
|
|
243
|
+
|
|
219
244
|
## Requirements
|
|
220
245
|
|
|
221
246
|
- Node.js
|
|
@@ -7,13 +7,4 @@ describe('Hello World', () => {
|
|
|
7
7
|
const result = await prompt('Return the current year in the format "{ year: YYYY }"');
|
|
8
8
|
expect(result).toContain(`{ year: ${currentData.getFullYear()} }`);
|
|
9
9
|
});
|
|
10
|
-
|
|
11
|
-
it('should classify a response as helpful', async () => {
|
|
12
|
-
const response = await prompt('You are a helpful assistant. Give one short tip for learning JavaScript.');
|
|
13
|
-
await expect(response).toBeClassifiedAs('helpful', { threshold: 3 });
|
|
14
|
-
});
|
|
15
10
|
});
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
const result2 = await prompt('If you read this just say heeey');
|
|
19
|
-
expect(result2.toLowerCase()).toMatchSnapshot();
|
|
@@ -6,10 +6,3 @@ describe('Working with files', () => {
|
|
|
6
6
|
expect(result.toLowerCase()).toContain('hola');
|
|
7
7
|
});
|
|
8
8
|
});
|
|
9
|
-
|
|
10
|
-
describe('Working with prompt as expectation', () => {
|
|
11
|
-
it('It should be friendly', async () => {
|
|
12
|
-
const result = await prompt('You are a friendly assistant. If you read this, say "Hola"!', { model: 'gpt-5.2' });
|
|
13
|
-
expect(result).promptCheck('To be friendly, the response should contain a greeting.');
|
|
14
|
-
});
|
|
15
|
-
});
|