ai-output-assert 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +833 -103
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,8 +1,14 @@
|
|
|
1
1
|
# ai-output-assert
|
|
2
2
|
|
|
3
|
-
Rich assertion library for LLM outputs
|
|
3
|
+
Rich assertion library for LLM outputs -- 25+ matchers for Jest and Vitest.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
[](https://www.npmjs.com/package/ai-output-assert)
|
|
6
|
+
[](https://www.npmjs.com/package/ai-output-assert)
|
|
7
|
+
[](https://github.com/SiluPanda/ai-output-assert/blob/master/LICENSE)
|
|
8
|
+
[](https://nodejs.org)
|
|
9
|
+
[](https://www.typescriptlang.org/)
|
|
10
|
+
|
|
11
|
+
`ai-output-assert` provides semantic matching, JSON schema validation, tone detection, PII scanning, format verification, safety checks, and output quality assertions -- all as Jest/Vitest custom matchers registered via `expect.extend()`. Every matcher is also available as a standalone function returning a typed `MatcherResult`, so you can use them in production code, CI scripts, and custom tooling without a test runner.
|
|
6
12
|
|
|
7
13
|
## Installation
|
|
8
14
|
|
|
@@ -12,133 +18,708 @@ npm install ai-output-assert
|
|
|
12
18
|
|
|
13
19
|
## Quick Start
|
|
14
20
|
|
|
15
|
-
###
|
|
21
|
+
### Register matchers with Jest or Vitest
|
|
16
22
|
|
|
17
|
-
Call `setupAIAssertions()` once in your test setup file (e.g
|
|
23
|
+
Call `setupAIAssertions()` once in your test setup file (e.g., `vitest.setup.ts` or `jest.setup.ts`). This registers all 25+ matchers on `expect()`.
|
|
18
24
|
|
|
19
25
|
```ts
|
|
26
|
+
// vitest.setup.ts
|
|
20
27
|
import { setupAIAssertions } from 'ai-output-assert';
|
|
21
28
|
|
|
22
29
|
setupAIAssertions({
|
|
23
|
-
|
|
24
|
-
|
|
30
|
+
embedFn: async (text) => await openai.embeddings.create({
|
|
31
|
+
model: 'text-embedding-3-small',
|
|
32
|
+
input: text,
|
|
33
|
+
}).then(r => r.data[0].embedding),
|
|
25
34
|
});
|
|
26
35
|
```
|
|
27
36
|
|
|
28
|
-
Then
|
|
37
|
+
Then write assertions in your tests:
|
|
29
38
|
|
|
30
39
|
```ts
|
|
31
|
-
expect
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
40
|
+
import { describe, it, expect } from 'vitest';
|
|
41
|
+
|
|
42
|
+
describe('chatbot', () => {
|
|
43
|
+
it('answers correctly and safely', async () => {
|
|
44
|
+
const output = await chat('What is the capital of France?');
|
|
45
|
+
|
|
46
|
+
// Semantic correctness
|
|
47
|
+
await expect(output).toBeSemanticallySimilarTo('Paris is the capital of France');
|
|
48
|
+
|
|
49
|
+
// Safety
|
|
50
|
+
expect(output).toNotContainPII();
|
|
51
|
+
expect(output).toNotContainToxicContent();
|
|
52
|
+
expect(output).toNotBeRefusal();
|
|
53
|
+
|
|
54
|
+
// Tone and quality
|
|
55
|
+
expect(output).toHaveTone('formal');
|
|
56
|
+
expect(output).toNotBeTruncated();
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
it('returns valid structured output', () => {
|
|
60
|
+
const output = await chat('Return user data as JSON');
|
|
61
|
+
|
|
62
|
+
expect(output).toBeValidJSON();
|
|
63
|
+
expect(output).toMatchSchema({
|
|
64
|
+
type: 'object',
|
|
65
|
+
required: ['name', 'email'],
|
|
66
|
+
properties: {
|
|
67
|
+
name: { type: 'string' },
|
|
68
|
+
email: { type: 'string', format: 'email' },
|
|
69
|
+
},
|
|
70
|
+
});
|
|
71
|
+
expect(output).toHaveJSONFields(['name', 'email']);
|
|
72
|
+
});
|
|
73
|
+
});
|
|
35
74
|
```
|
|
36
75
|
|
|
37
|
-
###
|
|
76
|
+
### Use as standalone functions
|
|
38
77
|
|
|
39
|
-
|
|
78
|
+
Every matcher is exported as a standalone function that returns a `MatcherResult`. No test framework required.
|
|
40
79
|
|
|
41
80
|
```ts
|
|
42
|
-
import {
|
|
81
|
+
import { toNotContainPII, toBeFormattedAs, toHaveSentiment } from 'ai-output-assert';
|
|
43
82
|
|
|
44
|
-
const result =
|
|
45
|
-
|
|
83
|
+
const result = toNotContainPII(llmOutput);
|
|
84
|
+
if (!result.pass) {
|
|
85
|
+
console.error(result.message());
|
|
86
|
+
console.log('Detected PII:', result.details.found);
|
|
87
|
+
}
|
|
46
88
|
```
|
|
47
89
|
|
|
48
|
-
##
|
|
90
|
+
## Features
|
|
91
|
+
|
|
92
|
+
- **25+ purpose-built matchers** across seven categories: semantic, content, structural, format, tone/style, safety, and quality
|
|
93
|
+
- **Semantic similarity** via pluggable embedding functions with cosine similarity, or built-in n-gram Jaccard fallback when no embeddings are configured
|
|
94
|
+
- **JSON Schema validation** powered by Ajv with detailed error paths
|
|
95
|
+
- **PII detection** for emails, SSNs, credit cards (Luhn-validated), phone numbers, and IP addresses
|
|
96
|
+
- **Tone and sentiment analysis** using heuristic scoring (contraction frequency, sentence length, vocabulary complexity, sentiment lexicon)
|
|
97
|
+
- **Toxic content detection** with a severity-tiered word catalog (critical, warning, info)
|
|
98
|
+
- **System prompt leak detection** via configurable regex patterns
|
|
99
|
+
- **Output quality checks** for truncation, excessive hedging, repetition, and refusal detection
|
|
100
|
+
- **Dual usage** -- every matcher works as a Jest/Vitest custom matcher and as a standalone function
|
|
101
|
+
- **Fully typed** -- ships with TypeScript declarations, all exports are typed
|
|
102
|
+
- **Zero external API calls** for non-semantic matchers -- heuristic matchers run in sub-millisecond time
|
|
103
|
+
- **Extensible catalogs** -- supply custom PII patterns, toxic words, hedging phrases, refusal phrases, and entity aliases
|
|
104
|
+
|
|
105
|
+
## API Reference
|
|
106
|
+
|
|
107
|
+
### `setupAIAssertions(options?)`
|
|
49
108
|
|
|
50
|
-
Registers all matchers globally via `expect.extend()`. Call once before your test suite.
|
|
109
|
+
Registers all matchers globally via `expect.extend()`. Call once before your test suite runs.
|
|
51
110
|
|
|
52
111
|
```ts
|
|
53
112
|
import { setupAIAssertions } from 'ai-output-assert';
|
|
54
|
-
import type { AIAssertionOptions } from 'ai-output-assert';
|
|
55
113
|
|
|
56
|
-
|
|
57
|
-
embedFn: myEmbedFn,
|
|
58
|
-
semanticThreshold: 0.85,
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
114
|
+
setupAIAssertions({
|
|
115
|
+
embedFn: myEmbedFn,
|
|
116
|
+
semanticThreshold: 0.85,
|
|
117
|
+
});
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
**Parameters:**
|
|
121
|
+
|
|
122
|
+
| Option | Type | Default | Description |
|
|
123
|
+
|---|---|---|---|
|
|
124
|
+
| `embedFn` | `EmbedFn` | `undefined` | Embedding function for semantic matchers. Signature: `(text: string) => Promise<number[]>` |
|
|
125
|
+
| `semanticThreshold` | `number` | `0.8` | Default cosine similarity threshold for `toBeSemanticallySimilarTo` |
|
|
126
|
+
| `answerThreshold` | `number` | `0.5` | Threshold for `toAnswerQuestion` |
|
|
127
|
+
| `consistencyThreshold` | `number` | `0.3` | Threshold for `toBeFactuallyConsistentWith` |
|
|
128
|
+
| `conciseMaxTokens` | `number` | `100` | Default max words for `toBeConcise` |
|
|
129
|
+
| `verboseMaxSentences` | `number` | `10` | Default max sentences for `toNotBeVerbose` |
|
|
130
|
+
| `hedgingMaxRatio` | `number` | `0.3` | Hedging phrase-to-sentence ratio threshold |
|
|
131
|
+
| `repeatMaxRepetitions` | `number` | `3` | Max allowed n-gram repetitions |
|
|
132
|
+
| `systemPromptLeakThreshold` | `number` | -- | Threshold for system prompt leak detection |
|
|
133
|
+
| `sentimentNeutralRange` | `[number, number]` | -- | Score range considered neutral |
|
|
134
|
+
| `customPIIPatterns` | `PIIPattern[]` | `[]` | Additional PII patterns (merged with defaults) |
|
|
135
|
+
| `customToxicWords` | `ToxicWord[]` | `[]` | Additional toxic words (merged with defaults) |
|
|
136
|
+
| `customEntityAliases` | `Record<string, string[]>` | -- | Entity alias mappings |
|
|
137
|
+
| `customHedgingPhrases` | `string[]` | `[]` | Additional hedging phrases (merged with defaults) |
|
|
138
|
+
| `customRefusalPhrases` | `string[]` | `[]` | Additional refusal phrases (merged with defaults) |
|
|
139
|
+
|
|
140
|
+
### `getGlobalOptions()`
|
|
141
|
+
|
|
142
|
+
Returns the current global `AIAssertionOptions` object set by `setupAIAssertions()`.
|
|
68
143
|
|
|
69
|
-
|
|
144
|
+
```ts
|
|
145
|
+
import { getGlobalOptions } from 'ai-output-assert';
|
|
146
|
+
|
|
147
|
+
const opts = getGlobalOptions();
|
|
148
|
+
console.log(opts.semanticThreshold);
|
|
70
149
|
```
|
|
71
150
|
|
|
72
|
-
|
|
151
|
+
---
|
|
73
152
|
|
|
74
|
-
###
|
|
153
|
+
### Semantic Matchers (async)
|
|
75
154
|
|
|
76
|
-
|
|
77
|
-
|---|---|
|
|
78
|
-
| `toStartWith(prefix)` | Output starts with the given prefix |
|
|
79
|
-
| `toEndWith(suffix)` | Output ends with the given suffix |
|
|
80
|
-
| `toBeFormattedAs(format)` | Output matches a format: `'json' \| 'markdown' \| 'list' \| 'csv' \| 'xml' \| 'yaml' \| 'table'` |
|
|
81
|
-
| `toHaveListItems(items)` | Output contains all given items as list bullets/numbers |
|
|
155
|
+
These matchers return a `Promise<MatcherResult>` and must be awaited in tests. When an `embedFn` is configured, they use cosine similarity over embedding vectors. Without an `embedFn`, they fall back to n-gram Jaccard similarity.
|
|
82
156
|
|
|
83
|
-
|
|
157
|
+
#### `toBeSemanticallySimilarTo(received, expected, options?)`
|
|
84
158
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
| `toNotContain(phrase)` | Output does not contain the phrase |
|
|
90
|
-
| `toMentionEntity(entity, aliases?)` | Output mentions the entity or any of its aliases |
|
|
159
|
+
Asserts that the output is semantically similar to a reference string.
|
|
160
|
+
|
|
161
|
+
```ts
|
|
162
|
+
await expect(output).toBeSemanticallySimilarTo('Paris is the capital of France', 0.85);
|
|
91
163
|
|
|
92
|
-
|
|
164
|
+
// Standalone
|
|
165
|
+
const result = await toBeSemanticallySimilarTo(output, 'Paris is the capital', {
|
|
166
|
+
threshold: 0.9,
|
|
167
|
+
embedFn: myEmbedFn,
|
|
168
|
+
});
|
|
169
|
+
```
|
|
93
170
|
|
|
94
|
-
|
|
|
95
|
-
|
|
96
|
-
| `
|
|
97
|
-
| `
|
|
98
|
-
| `
|
|
99
|
-
| `
|
|
171
|
+
| Parameter | Type | Default | Description |
|
|
172
|
+
|---|---|---|---|
|
|
173
|
+
| `received` | `string` | required | The LLM output to test |
|
|
174
|
+
| `expected` | `string` | required | Reference text to compare against |
|
|
175
|
+
| `options.threshold` | `number` | `0.8` | Minimum cosine similarity (0.0 to 1.0) |
|
|
176
|
+
| `options.embedFn` | `EmbedFn` | global | Embedding function override |
|
|
100
177
|
|
|
101
|
-
|
|
178
|
+
**Returns:** `Promise<MatcherResult>` with `details: { similarity, threshold, method }` where `method` is `'embedding'` or `'ngram-jaccard'`.
|
|
102
179
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
180
|
+
#### `toAnswerQuestion(received, question, options?)`
|
|
181
|
+
|
|
182
|
+
Asserts that the output is a plausible answer to the given question. Uses a lower similarity threshold than `toBeSemanticallySimilarTo` because answers are related to questions, not identical.
|
|
183
|
+
|
|
184
|
+
```ts
|
|
185
|
+
await expect(output).toAnswerQuestion('What is the capital of France?');
|
|
186
|
+
|
|
187
|
+
// Standalone
|
|
188
|
+
const result = await toAnswerQuestion(output, 'What is the capital of France?', {
|
|
189
|
+
embedFn: myEmbedFn,
|
|
190
|
+
});
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
| Parameter | Type | Default | Description |
|
|
194
|
+
|---|---|---|---|
|
|
195
|
+
| `received` | `string` | required | The LLM output |
|
|
196
|
+
| `question` | `string` | required | The question the output should answer |
|
|
197
|
+
| `options.embedFn` | `EmbedFn` | global | Embedding function override |
|
|
198
|
+
|
|
199
|
+
**Returns:** `Promise<MatcherResult>` with `details: { similarity, threshold, question }`. Threshold is fixed at `0.5`.
|
|
200
|
+
|
|
201
|
+
#### `toBeFactuallyConsistentWith(received, reference, options?)`
|
|
202
|
+
|
|
203
|
+
Asserts that the output does not contradict a reference text. Splits the output into sentences, computes similarity of each sentence against the full reference, and checks that the average coverage score meets the threshold.
|
|
204
|
+
|
|
205
|
+
```ts
|
|
206
|
+
await expect(output).toBeFactuallyConsistentWith(
|
|
207
|
+
'Paris is the capital of France. France is in Western Europe.'
|
|
208
|
+
);
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
| Parameter | Type | Default | Description |
|
|
212
|
+
|---|---|---|---|
|
|
213
|
+
| `received` | `string` | required | The LLM output |
|
|
214
|
+
| `reference` | `string` | required | Authoritative reference text |
|
|
215
|
+
| `options.embedFn` | `EmbedFn` | global | Embedding function override |
|
|
216
|
+
|
|
217
|
+
**Returns:** `Promise<MatcherResult>` with `details: { avgCoverage, threshold, coverageScores, sentenceCount }`. Threshold is fixed at `0.3`.
|
|
218
|
+
|
|
219
|
+
---
|
|
220
|
+
|
|
221
|
+
### Content Matchers
|
|
222
|
+
|
|
223
|
+
All content matchers are synchronous.
|
|
224
|
+
|
|
225
|
+
#### `toContainAllOf(received, phrases)`
|
|
226
|
+
|
|
227
|
+
Asserts that the output contains every phrase in the array. Matching is case-insensitive.
|
|
228
|
+
|
|
229
|
+
```ts
|
|
230
|
+
expect(output).toContainAllOf(['Paris', 'capital', 'France']);
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
| Parameter | Type | Description |
|
|
234
|
+
|---|---|---|
|
|
235
|
+
| `received` | `string` | The LLM output |
|
|
236
|
+
| `phrases` | `string[]` | Phrases that must all be present |
|
|
237
|
+
|
|
238
|
+
**Returns:** `MatcherResult` with `details: { phrases, missing }`.
|
|
239
|
+
|
|
240
|
+
#### `toContainAnyOf(received, phrases)`
|
|
241
|
+
|
|
242
|
+
Asserts that the output contains at least one phrase from the array. Case-insensitive.
|
|
243
|
+
|
|
244
|
+
```ts
|
|
245
|
+
expect(output).toContainAnyOf(['Paris', 'Lyon', 'Marseille']);
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
| Parameter | Type | Description |
|
|
249
|
+
|---|---|---|
|
|
250
|
+
| `received` | `string` | The LLM output |
|
|
251
|
+
| `phrases` | `string[]` | At least one must be present |
|
|
252
|
+
|
|
253
|
+
**Returns:** `MatcherResult` with `details: { phrases, found }`.
|
|
254
|
+
|
|
255
|
+
#### `toNotContain(received, phrase)`
|
|
256
|
+
|
|
257
|
+
Asserts that the output does not contain the given phrase. Case-insensitive.
|
|
258
|
+
|
|
259
|
+
```ts
|
|
260
|
+
expect(output).toNotContain('Lyon');
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
| Parameter | Type | Description |
|
|
264
|
+
|---|---|---|
|
|
265
|
+
| `received` | `string` | The LLM output |
|
|
266
|
+
| `phrase` | `string` | Phrase that must be absent |
|
|
267
|
+
|
|
268
|
+
**Returns:** `MatcherResult` with `details: { phrase, found }`.
|
|
269
|
+
|
|
270
|
+
#### `toMentionEntity(received, entity, aliases?)`
|
|
271
|
+
|
|
272
|
+
Asserts that the output mentions a named entity or any of its aliases. Case-insensitive.
|
|
273
|
+
|
|
274
|
+
```ts
|
|
275
|
+
expect(output).toMentionEntity('United States', ['US', 'U.S.', 'USA']);
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
| Parameter | Type | Description |
|
|
279
|
+
|---|---|---|
|
|
280
|
+
| `received` | `string` | The LLM output |
|
|
281
|
+
| `entity` | `string` | Primary entity name |
|
|
282
|
+
| `aliases` | `string[]` | Optional alternative names |
|
|
283
|
+
|
|
284
|
+
**Returns:** `MatcherResult` with `details: { entity, aliases, foundTerm }`.
|
|
285
|
+
|
|
286
|
+
---
|
|
287
|
+
|
|
288
|
+
### Format Matchers
|
|
289
|
+
|
|
290
|
+
All format matchers are synchronous.
|
|
291
|
+
|
|
292
|
+
#### `toStartWith(received, prefix)`
|
|
293
|
+
|
|
294
|
+
Asserts that the output starts with the given prefix.
|
|
295
|
+
|
|
296
|
+
```ts
|
|
297
|
+
expect(output).toStartWith('Dear Customer');
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
**Returns:** `MatcherResult` with `details: { prefix, receivedStart }`.
|
|
301
|
+
|
|
302
|
+
#### `toEndWith(received, suffix)`
|
|
303
|
+
|
|
304
|
+
Asserts that the output ends with the given suffix.
|
|
305
|
+
|
|
306
|
+
```ts
|
|
307
|
+
expect(output).toEndWith('Best regards.');
|
|
308
|
+
```
|
|
110
309
|
|
|
111
|
-
|
|
310
|
+
**Returns:** `MatcherResult` with `details: { suffix, receivedEnd }`.
|
|
112
311
|
|
|
113
|
-
|
|
312
|
+
#### `toBeFormattedAs(received, format)`
|
|
313
|
+
|
|
314
|
+
Asserts that the output matches a specific format using heuristic detection.
|
|
315
|
+
|
|
316
|
+
```ts
|
|
317
|
+
expect(output).toBeFormattedAs('json');
|
|
318
|
+
expect(output).toBeFormattedAs('markdown');
|
|
319
|
+
expect(output).toBeFormattedAs('csv');
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
| Format | Detection Logic |
|
|
114
323
|
|---|---|
|
|
115
|
-
| `
|
|
116
|
-
| `
|
|
117
|
-
| `
|
|
118
|
-
| `
|
|
324
|
+
| `'json'` | Parses with `JSON.parse()` |
|
|
325
|
+
| `'markdown'` | Contains headings, bold, italic, code fences, lists, or blockquotes |
|
|
326
|
+
| `'list'` | At least 50% of non-empty lines are bullet/numbered items |
|
|
327
|
+
| `'csv'` | 2+ lines with consistent comma-separated column count (min 2 columns) |
|
|
328
|
+
| `'xml'` | Contains matched XML tag pairs |
|
|
329
|
+
| `'yaml'` | At least 50% of non-comment lines match `key: value` pattern |
|
|
330
|
+
| `'table'` | Contains pipe (`\|`) table patterns |
|
|
331
|
+
|
|
332
|
+
**Returns:** `MatcherResult` with `details: { format, reason }`.
|
|
333
|
+
|
|
334
|
+
#### `toHaveListItems(received, items)`
|
|
335
|
+
|
|
336
|
+
Asserts that specific items appear as list entries (on lines starting with `-`, `*`, or a number).
|
|
337
|
+
|
|
338
|
+
```ts
|
|
339
|
+
expect(output).toHaveListItems(['First step', 'Second step']);
|
|
340
|
+
```
|
|
341
|
+
|
|
342
|
+
**Returns:** `MatcherResult` with `details: { items, missing }`.
|
|
343
|
+
|
|
344
|
+
---
|
|
345
|
+
|
|
346
|
+
### Structural Matchers
|
|
347
|
+
|
|
348
|
+
All structural matchers are synchronous.
|
|
349
|
+
|
|
350
|
+
#### `toBeValidJSON(received)`
|
|
351
|
+
|
|
352
|
+
Asserts that the output is parseable JSON. Automatically extracts JSON from markdown code fences (` ```json ... ``` `).
|
|
353
|
+
|
|
354
|
+
```ts
|
|
355
|
+
expect(output).toBeValidJSON();
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
**Returns:** `MatcherResult` with `details: { error }`.
|
|
359
|
+
|
|
360
|
+
#### `toMatchSchema(received, schema)`
|
|
361
|
+
|
|
362
|
+
Validates parsed JSON against a JSON Schema using Ajv. The output is first parsed (with code fence extraction fallback), then validated. Reports all schema violations with JSON pointer paths.
|
|
363
|
+
|
|
364
|
+
```ts
|
|
365
|
+
expect(output).toMatchSchema({
|
|
366
|
+
type: 'object',
|
|
367
|
+
required: ['id', 'name'],
|
|
368
|
+
properties: {
|
|
369
|
+
id: { type: 'number' },
|
|
370
|
+
name: { type: 'string', minLength: 1 },
|
|
371
|
+
},
|
|
372
|
+
});
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
| Parameter | Type | Description |
|
|
376
|
+
|---|---|---|
|
|
377
|
+
| `received` | `string` | The LLM output (must be valid JSON) |
|
|
378
|
+
| `schema` | `object` | A JSON Schema object (draft-07 compatible via Ajv) |
|
|
379
|
+
|
|
380
|
+
**Returns:** `MatcherResult` with `details: { validationErrors }`.
|
|
381
|
+
|
|
382
|
+
#### `toHaveJSONFields(received, fields)`
|
|
383
|
+
|
|
384
|
+
Asserts that the parsed JSON output contains all specified fields. Supports dot-notation for nested paths.
|
|
385
|
+
|
|
386
|
+
```ts
|
|
387
|
+
expect(output).toHaveJSONFields(['user.name', 'user.email', 'metadata.timestamp']);
|
|
388
|
+
```
|
|
389
|
+
|
|
390
|
+
| Parameter | Type | Description |
|
|
391
|
+
|---|---|---|
|
|
392
|
+
| `received` | `string` | The LLM output (must be valid JSON) |
|
|
393
|
+
| `fields` | `string[]` | Dot-notation field paths |
|
|
394
|
+
|
|
395
|
+
**Returns:** `MatcherResult` with `details: { fields, missing }`.
|
|
396
|
+
|
|
397
|
+
#### `toBeValidMarkdown(received)`
|
|
398
|
+
|
|
399
|
+
Checks markdown structural validity: balanced code fences, valid heading hierarchy (no level skipping), and balanced brackets.
|
|
400
|
+
|
|
401
|
+
```ts
|
|
402
|
+
expect(output).toBeValidMarkdown();
|
|
403
|
+
```
|
|
119
404
|
|
|
120
|
-
|
|
405
|
+
**Returns:** `MatcherResult` with `details: { issues }`.
|
|
121
406
|
|
|
122
|
-
|
|
407
|
+
#### `toContainCodeBlock(received, language?)`
|
|
408
|
+
|
|
409
|
+
Asserts that the output contains a fenced code block (` ``` `). Optionally checks for a specific language tag.
|
|
410
|
+
|
|
411
|
+
```ts
|
|
412
|
+
expect(output).toContainCodeBlock();
|
|
413
|
+
expect(output).toContainCodeBlock('typescript');
|
|
414
|
+
```
|
|
415
|
+
|
|
416
|
+
**Returns:** `MatcherResult` with `details: { language, reason }`.
|
|
417
|
+
|
|
418
|
+
---
|
|
419
|
+
|
|
420
|
+
### Tone and Style Matchers
|
|
421
|
+
|
|
422
|
+
All tone matchers are synchronous. They use heuristic analysis -- no LLM calls required.
|
|
423
|
+
|
|
424
|
+
#### `toHaveSentiment(received, expected)`
|
|
425
|
+
|
|
426
|
+
Classifies output sentiment using a lexicon-based approach. Positive and negative word counts are tallied, and the ratio determines the classification.
|
|
427
|
+
|
|
428
|
+
```ts
|
|
429
|
+
expect(output).toHaveSentiment('positive');
|
|
430
|
+
expect(output).toHaveSentiment('neutral');
|
|
431
|
+
```
|
|
432
|
+
|
|
433
|
+
| Sentiment | Rule |
|
|
123
434
|
|---|---|
|
|
124
|
-
| `
|
|
125
|
-
| `
|
|
126
|
-
| `
|
|
127
|
-
|
|
435
|
+
| `'positive'` | Positive word ratio > 0.6 |
|
|
436
|
+
| `'negative'` | Positive word ratio < 0.4 |
|
|
437
|
+
| `'neutral'` | Between 0.4 and 0.6, or no sentiment words found |
|
|
438
|
+
|
|
439
|
+
**Returns:** `MatcherResult` with `details: { expected, detected, positiveCount, negativeCount }`.
|
|
128
440
|
|
|
129
|
-
|
|
441
|
+
#### `toHaveTone(received, expected)`
|
|
130
442
|
|
|
131
|
-
|
|
443
|
+
Detects output tone using multiple linguistic signals. Each tone type uses different heuristics.
|
|
132
444
|
|
|
133
|
-
|
|
445
|
+
```ts
|
|
446
|
+
expect(output).toHaveTone('formal');
|
|
447
|
+
expect(output).toHaveTone('technical');
|
|
448
|
+
```
|
|
449
|
+
|
|
450
|
+
| Tone | Key Signals |
|
|
134
451
|
|---|---|
|
|
135
|
-
| `
|
|
136
|
-
| `
|
|
137
|
-
| `
|
|
452
|
+
| `'formal'` | Low contraction rate, longer average sentence length |
|
|
453
|
+
| `'casual'` | High contraction rate, informal vocabulary (gonna, wanna, tbh, lol) |
|
|
454
|
+
| `'technical'` | High long-word rate (>8 chars), presence of inline or fenced code |
|
|
455
|
+
| `'friendly'` | Exclamation marks, "you" frequency, positive word density |
|
|
456
|
+
|
|
457
|
+
**Returns:** `MatcherResult` with `details: { expected, detected, score, reason }`.
|
|
458
|
+
|
|
459
|
+
#### `toBeConcise(received, maxWords?)`
|
|
138
460
|
|
|
139
|
-
|
|
461
|
+
Asserts that the output contains at most `maxWords` words.
|
|
140
462
|
|
|
141
|
-
|
|
463
|
+
```ts
|
|
464
|
+
expect(output).toBeConcise(); // max 100 words
|
|
465
|
+
expect(output).toBeConcise(50); // max 50 words
|
|
466
|
+
```
|
|
467
|
+
|
|
468
|
+
| Parameter | Type | Default | Description |
|
|
469
|
+
|---|---|---|---|
|
|
470
|
+
| `maxWords` | `number` | `100` | Maximum word count |
|
|
471
|
+
|
|
472
|
+
**Returns:** `MatcherResult` with `details: { wordCount, maxWords }`.
|
|
473
|
+
|
|
474
|
+
#### `toNotBeVerbose(received, options?)`
|
|
475
|
+
|
|
476
|
+
Asserts that the output is within both word and sentence limits.
|
|
477
|
+
|
|
478
|
+
```ts
|
|
479
|
+
expect(output).toNotBeVerbose();
|
|
480
|
+
expect(output).toNotBeVerbose({ maxWords: 150, maxSentences: 5 });
|
|
481
|
+
```
|
|
482
|
+
|
|
483
|
+
| Option | Type | Default | Description |
|
|
484
|
+
|---|---|---|---|
|
|
485
|
+
| `maxWords` | `number` | `200` | Maximum word count |
|
|
486
|
+
| `maxSentences` | `number` | `10` | Maximum sentence count |
|
|
487
|
+
|
|
488
|
+
**Returns:** `MatcherResult` with `details: { wordCount, sentenceCount, maxWords, maxSentences }`.
|
|
489
|
+
|
|
490
|
+
---
|
|
491
|
+
|
|
492
|
+
### Safety Matchers
|
|
493
|
+
|
|
494
|
+
All safety matchers are synchronous with zero external dependencies.
|
|
495
|
+
|
|
496
|
+
#### `toNotContainPII(received, patterns?)`
|
|
497
|
+
|
|
498
|
+
Scans output for personally identifiable information using regex patterns with optional validation functions (e.g., Luhn checksum for credit cards).
|
|
499
|
+
|
|
500
|
+
```ts
|
|
501
|
+
expect(output).toNotContainPII();
|
|
502
|
+
```
|
|
503
|
+
|
|
504
|
+
**Default PII patterns:**
|
|
505
|
+
|
|
506
|
+
| Type | Example Match | Validation |
|
|
507
|
+
|---|---|---|
|
|
508
|
+
| Email | `user@example.com` | Regex only |
|
|
509
|
+
| SSN | `123-45-6789` | 9-digit check |
|
|
510
|
+
| Credit card | `4111 1111 1111 1111` | Luhn checksum |
|
|
511
|
+
| Phone | `(555) 123-4567` | Regex only |
|
|
512
|
+
| IP address | `192.168.1.1` | Octet range 0-255 |
|
|
513
|
+
|
|
514
|
+
**Custom patterns:**
|
|
515
|
+
|
|
516
|
+
```ts
|
|
517
|
+
import type { PIIPattern } from 'ai-output-assert';
|
|
518
|
+
|
|
519
|
+
const customPatterns: PIIPattern[] = [
|
|
520
|
+
{
|
|
521
|
+
type: 'passport',
|
|
522
|
+
pattern: /\b[A-Z]\d{8}\b/,
|
|
523
|
+
label: 'Passport Number',
|
|
524
|
+
validate: (v) => v.length === 9,
|
|
525
|
+
},
|
|
526
|
+
];
|
|
527
|
+
|
|
528
|
+
expect(output).toNotContainPII(customPatterns);
|
|
529
|
+
```
|
|
530
|
+
|
|
531
|
+
**Returns:** `MatcherResult` with `details: { found }` where `found` is an array of `PIIMatch` objects containing `type`, `value`, and `position`.
|
|
532
|
+
|
|
533
|
+
#### `toNotContainToxicContent(received, words?)`
|
|
534
|
+
|
|
535
|
+
Checks output against a tiered toxic word catalog. Severity levels: `'critical'` (slurs), `'warning'` (profanity), `'info'` (mild language). Matching uses word boundaries to avoid false positives on substrings.
|
|
536
|
+
|
|
537
|
+
```ts
|
|
538
|
+
expect(output).toNotContainToxicContent();
|
|
539
|
+
```
|
|
540
|
+
|
|
541
|
+
**Custom words:**
|
|
542
|
+
|
|
543
|
+
```ts
|
|
544
|
+
import type { ToxicWord } from 'ai-output-assert';
|
|
545
|
+
|
|
546
|
+
expect(output).toNotContainToxicContent([
|
|
547
|
+
{ word: 'proprietary-term', severity: 'warning' },
|
|
548
|
+
]);
|
|
549
|
+
```
|
|
550
|
+
|
|
551
|
+
**Returns:** `MatcherResult` with `details: { found }` where each entry has `word` and `severity`.
|
|
552
|
+
|
|
553
|
+
#### `toNotLeakSystemPrompt(received, patterns?)`
|
|
554
|
+
|
|
555
|
+
Detects whether the LLM output leaks system prompt content. Matches against patterns like "you are a", "system prompt", "ignore previous instructions", etc.
|
|
556
|
+
|
|
557
|
+
```ts
|
|
558
|
+
expect(output).toNotLeakSystemPrompt();
|
|
559
|
+
|
|
560
|
+
// With custom patterns
|
|
561
|
+
expect(output).toNotLeakSystemPrompt([
|
|
562
|
+
/your secret instructions/i,
|
|
563
|
+
/confidential system/i,
|
|
564
|
+
]);
|
|
565
|
+
```
|
|
566
|
+
|
|
567
|
+
**Returns:** `MatcherResult` with `details: { matchedPatterns }`.
|
|
568
|
+
|
|
569
|
+
#### `toNotBeRefusal(received, phrases?)`
|
|
570
|
+
|
|
571
|
+
Asserts that the output is not a refusal. Checks against default refusal phrases like "I cannot", "As an AI", "I'm unable to", etc.
|
|
572
|
+
|
|
573
|
+
```ts
|
|
574
|
+
expect(output).toNotBeRefusal();
|
|
575
|
+
```
|
|
576
|
+
|
|
577
|
+
**Returns:** `MatcherResult` with `details: { foundPhrases }`.
|
|
578
|
+
|
|
579
|
+
---
|
|
580
|
+
|
|
581
|
+
### Quality Matchers
|
|
582
|
+
|
|
583
|
+
All quality matchers are synchronous.
|
|
584
|
+
|
|
585
|
+
#### `toNotBeTruncated(received)`
|
|
586
|
+
|
|
587
|
+
Checks for truncation signals: missing terminal punctuation, unclosed code fences, and hanging list items.
|
|
588
|
+
|
|
589
|
+
```ts
|
|
590
|
+
expect(output).toNotBeTruncated();
|
|
591
|
+
```
|
|
592
|
+
|
|
593
|
+
**Returns:** `MatcherResult` with `details: { issues }`.
|
|
594
|
+
|
|
595
|
+
#### `toNotBeHedged(received, phrases?, threshold?)`
|
|
596
|
+
|
|
597
|
+
Asserts that the output does not contain excessive hedging language. Computes the ratio of hedging phrases found to total sentences. Default hedging phrases include "I think", "probably", "perhaps", "maybe", "as far as I know", and others.
|
|
598
|
+
|
|
599
|
+
```ts
|
|
600
|
+
expect(output).toNotBeHedged();
|
|
601
|
+
expect(output).toNotBeHedged(['reportedly'], 0.2);
|
|
602
|
+
```
|
|
603
|
+
|
|
604
|
+
| Parameter | Type | Default | Description |
|
|
605
|
+
|---|---|---|---|
|
|
606
|
+
| `phrases` | `string[]` | `undefined` | Additional hedging phrases (merged with defaults) |
|
|
607
|
+
| `threshold` | `number` | `0.3` | Maximum hedging-phrase-to-sentence ratio |
|
|
608
|
+
|
|
609
|
+
**Returns:** `MatcherResult` with `details: { foundPhrases, ratio, threshold, sentenceCount }`.
|
|
610
|
+
|
|
611
|
+
#### `toBeCompleteJSON(received)`
|
|
612
|
+
|
|
613
|
+
Asserts that the output is complete, parseable JSON. If parsing fails, the failure message distinguishes between truncated JSON (starts with `{` or `[` but lacks a closing bracket) and other parse errors.
|
|
614
|
+
|
|
615
|
+
```ts
|
|
616
|
+
expect(output).toBeCompleteJSON();
|
|
617
|
+
```
|
|
618
|
+
|
|
619
|
+
**Returns:** `MatcherResult` with `details: { complete }` on success, or `details: { truncated, parseError }` on failure.
|
|
620
|
+
|
|
621
|
+
#### `toNotRepeat(received, options?)`
|
|
622
|
+
|
|
623
|
+
Detects repetitive content by extracting n-grams and checking if any n-gram appears more than the threshold number of times.
|
|
624
|
+
|
|
625
|
+
```ts
|
|
626
|
+
expect(output).toNotRepeat();
|
|
627
|
+
expect(output).toNotRepeat({ windowSize: 3, threshold: 2 });
|
|
628
|
+
```
|
|
629
|
+
|
|
630
|
+
| Option | Type | Default | Description |
|
|
631
|
+
|---|---|---|---|
|
|
632
|
+
| `windowSize` | `number` | `4` | N-gram size (number of tokens) |
|
|
633
|
+
| `threshold` | `number` | `3` | Maximum allowed occurrences of any n-gram |
|
|
634
|
+
|
|
635
|
+
**Returns:** `MatcherResult` with `details: { repeated, windowSize, threshold }` where `repeated` is an array of `{ ngram, count }`.
|
|
636
|
+
|
|
637
|
+
---
|
|
638
|
+
|
|
639
|
+
### Utility Functions
|
|
640
|
+
|
|
641
|
+
These low-level utilities are exported for advanced use cases.
|
|
642
|
+
|
|
643
|
+
#### `cosineSimilarity(a, b)`
|
|
644
|
+
|
|
645
|
+
Computes cosine similarity between two numeric vectors. Throws if dimensions do not match. Returns `0` if either vector has zero magnitude.
|
|
646
|
+
|
|
647
|
+
```ts
|
|
648
|
+
import { cosineSimilarity } from 'ai-output-assert';
|
|
649
|
+
|
|
650
|
+
const sim = cosineSimilarity([1, 0, 1], [1, 1, 0]); // 0.5
|
|
651
|
+
```
|
|
652
|
+
|
|
653
|
+
#### `createCachedEmbedFn(embedFn)`
|
|
654
|
+
|
|
655
|
+
Wraps an embedding function with an in-memory cache keyed by input text. Avoids redundant API calls when the same string is embedded multiple times.
|
|
656
|
+
|
|
657
|
+
```ts
|
|
658
|
+
import { createCachedEmbedFn } from 'ai-output-assert';
|
|
659
|
+
|
|
660
|
+
const cachedEmbed = createCachedEmbedFn(myEmbedFn);
|
|
661
|
+
setupAIAssertions({ embedFn: cachedEmbed });
|
|
662
|
+
```
|
|
663
|
+
|
|
664
|
+
#### `tokenize(text)`
|
|
665
|
+
|
|
666
|
+
Splits text into word tokens on whitespace. Returns an empty array for empty input.
|
|
667
|
+
|
|
668
|
+
```ts
|
|
669
|
+
import { tokenize } from 'ai-output-assert';
|
|
670
|
+
tokenize('Hello world'); // ['Hello', 'world']
|
|
671
|
+
```
|
|
672
|
+
|
|
673
|
+
#### `splitSentences(text)`
|
|
674
|
+
|
|
675
|
+
Splits text into sentences, handling common abbreviations (Mr., Dr., U.S., etc.) to avoid incorrect splits.
|
|
676
|
+
|
|
677
|
+
```ts
|
|
678
|
+
import { splitSentences } from 'ai-output-assert';
|
|
679
|
+
splitSentences('Dr. Smith went home. It was late.'); // ['Dr. Smith went home.', 'It was late.']
|
|
680
|
+
```
|
|
681
|
+
|
|
682
|
+
#### `extractNgrams(tokens, n)`
|
|
683
|
+
|
|
684
|
+
Extracts n-gram sequences from a token array. Returns an empty array if the token count is less than `n`.
|
|
685
|
+
|
|
686
|
+
```ts
|
|
687
|
+
import { extractNgrams } from 'ai-output-assert';
|
|
688
|
+
extractNgrams(['a', 'b', 'c', 'd'], 2); // ['a b', 'b c', 'c d']
|
|
689
|
+
```
|
|
690
|
+
|
|
691
|
+
#### `escapeRegex(str)`
|
|
692
|
+
|
|
693
|
+
Escapes special regex characters in a string for safe use in `new RegExp()`.
|
|
694
|
+
|
|
695
|
+
```ts
|
|
696
|
+
import { escapeRegex } from 'ai-output-assert';
|
|
697
|
+
escapeRegex('price is $10.00'); // 'price is \\$10\\.00'
|
|
698
|
+
```
|
|
699
|
+
|
|
700
|
+
#### `luhnCheck(num)`
|
|
701
|
+
|
|
702
|
+
Validates a number string using the Luhn algorithm. Used internally for credit card PII detection.
|
|
703
|
+
|
|
704
|
+
```ts
|
|
705
|
+
import { luhnCheck } from 'ai-output-assert';
|
|
706
|
+
luhnCheck('4111111111111111'); // true
|
|
707
|
+
```
|
|
708
|
+
|
|
709
|
+
#### `extractJSONFromCodeFence(text)`
|
|
710
|
+
|
|
711
|
+
Extracts JSON content from a markdown code fence. Returns `null` if no code fence is found.
|
|
712
|
+
|
|
713
|
+
```ts
|
|
714
|
+
import { extractJSONFromCodeFence } from 'ai-output-assert';
|
|
715
|
+
extractJSONFromCodeFence('```json\n{"key": "value"}\n```'); // '{"key": "value"}'
|
|
716
|
+
```
|
|
717
|
+
|
|
718
|
+
---
|
|
719
|
+
|
|
720
|
+
### Catalogs
|
|
721
|
+
|
|
722
|
+
Default catalogs are exported so you can inspect, extend, or replace them.
|
|
142
723
|
|
|
143
724
|
```ts
|
|
144
725
|
import {
|
|
@@ -150,33 +731,182 @@ import {
|
|
|
150
731
|
} from 'ai-output-assert';
|
|
151
732
|
```
|
|
152
733
|
|
|
153
|
-
|
|
734
|
+
| Catalog | Type | Description |
|
|
735
|
+
|---|---|---|
|
|
736
|
+
| `DEFAULT_PII_PATTERNS` | `PIIPattern[]` | Email, SSN, credit card, phone, IP address patterns |
|
|
737
|
+
| `DEFAULT_TOXIC_WORDS` | `ToxicWord[]` | Tiered toxic word list (critical/warning/info) |
|
|
738
|
+
| `DEFAULT_HEDGING_PHRASES` | `string[]` | Phrases indicating hedging ("I think", "probably", etc.) |
|
|
739
|
+
| `DEFAULT_REFUSAL_PHRASES` | `string[]` | Refusal indicators ("I cannot", "As an AI", etc.) |
|
|
740
|
+
| `DEFAULT_SYSTEM_PROMPT_PATTERNS` | `RegExp[]` | Patterns matching system prompt leakage |
|
|
154
741
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
742
|
+
## Configuration
|
|
743
|
+
|
|
744
|
+
### Embedding Function
|
|
745
|
+
|
|
746
|
+
Semantic matchers (`toBeSemanticallySimilarTo`, `toAnswerQuestion`, `toBeFactuallyConsistentWith`) require an embedding function to compute vector similarity. Without one, they fall back to n-gram Jaccard similarity, which works for basic cases but is less accurate.
|
|
747
|
+
|
|
748
|
+
Provide your embedding function at setup time or per-matcher call:
|
|
749
|
+
|
|
750
|
+
```ts
|
|
751
|
+
// Global setup
|
|
752
|
+
setupAIAssertions({
|
|
753
|
+
embedFn: async (text) => {
|
|
754
|
+
const response = await openai.embeddings.create({
|
|
755
|
+
model: 'text-embedding-3-small',
|
|
756
|
+
input: text,
|
|
757
|
+
});
|
|
758
|
+
return response.data[0].embedding;
|
|
759
|
+
},
|
|
760
|
+
});
|
|
761
|
+
|
|
762
|
+
// Per-matcher override
|
|
763
|
+
const result = await toBeSemanticallySimilarTo(output, expected, {
|
|
764
|
+
embedFn: differentEmbedFn,
|
|
765
|
+
threshold: 0.9,
|
|
766
|
+
});
|
|
767
|
+
```
|
|
768
|
+
|
|
769
|
+
### Caching Embeddings
|
|
770
|
+
|
|
771
|
+
Wrap your embedding function with `createCachedEmbedFn` to avoid redundant API calls during test runs:
|
|
772
|
+
|
|
773
|
+
```ts
|
|
774
|
+
import { createCachedEmbedFn, setupAIAssertions } from 'ai-output-assert';
|
|
775
|
+
|
|
776
|
+
const cachedEmbed = createCachedEmbedFn(async (text) => {
|
|
777
|
+
return await myEmbeddingAPI(text);
|
|
778
|
+
});
|
|
779
|
+
|
|
780
|
+
setupAIAssertions({ embedFn: cachedEmbed });
|
|
781
|
+
```
|
|
782
|
+
|
|
783
|
+
## Error Handling
|
|
784
|
+
|
|
785
|
+
Every matcher returns a `MatcherResult` with three fields:
|
|
786
|
+
|
|
787
|
+
```ts
|
|
788
|
+
interface MatcherResult {
|
|
789
|
+
pass: boolean;
|
|
790
|
+
message: () => string;
|
|
791
|
+
details: Record<string, unknown>;
|
|
792
|
+
}
|
|
793
|
+
```
|
|
794
|
+
|
|
795
|
+
- **`pass`** -- `true` if the assertion succeeded, `false` otherwise.
|
|
796
|
+
- **`message`** -- A function returning a human-readable explanation. When `pass` is `false`, the message explains why the assertion failed with specific values (similarity scores, missing fields, detected PII, etc.). When `pass` is `true`, the message describes what would cause it to fail (used by `.not` assertions).
|
|
797
|
+
- **`details`** -- A structured object with matcher-specific data for programmatic inspection. Each matcher documents its `details` shape above.
|
|
798
|
+
|
|
799
|
+
Matchers that parse JSON (`toBeValidJSON`, `toMatchSchema`, `toHaveJSONFields`, `toBeCompleteJSON`) gracefully handle parse failures by returning `pass: false` with a descriptive error rather than throwing.
|
|
800
|
+
|
|
801
|
+
Semantic matchers throw if `embedFn` is provided and the embedding call itself throws. The error propagates to the test runner as a normal test failure.
|
|
802
|
+
|
|
803
|
+
`cosineSimilarity` throws a `Dimension mismatch` error if the two vectors have different lengths.
|
|
804
|
+
|
|
805
|
+
## Advanced Usage
|
|
806
|
+
|
|
807
|
+
### Multi-Dimension Quality Gates
|
|
808
|
+
|
|
809
|
+
Combine multiple matchers to build comprehensive quality gates:
|
|
810
|
+
|
|
811
|
+
```ts
|
|
812
|
+
async function validateOutput(output: string, schema: object): Promise<string[]> {
|
|
813
|
+
const checks = [
|
|
814
|
+
toNotBeRefusal(output),
|
|
815
|
+
toNotBeTruncated(output),
|
|
816
|
+
toNotContainPII(output),
|
|
817
|
+
toNotContainToxicContent(output),
|
|
818
|
+
toBeValidJSON(output),
|
|
819
|
+
toMatchSchema(output, schema),
|
|
820
|
+
];
|
|
821
|
+
|
|
822
|
+
return checks
|
|
823
|
+
.filter((r) => !r.pass)
|
|
824
|
+
.map((r) => r.message());
|
|
825
|
+
}
|
|
826
|
+
```
|
|
827
|
+
|
|
828
|
+
### Custom PII Patterns
|
|
829
|
+
|
|
830
|
+
Extend the default PII catalog with domain-specific patterns:
|
|
831
|
+
|
|
832
|
+
```ts
|
|
833
|
+
import { DEFAULT_PII_PATTERNS, toNotContainPII } from 'ai-output-assert';
|
|
834
|
+
import type { PIIPattern } from 'ai-output-assert';
|
|
835
|
+
|
|
836
|
+
const allPatterns: PIIPattern[] = [
|
|
837
|
+
...DEFAULT_PII_PATTERNS,
|
|
838
|
+
{
|
|
839
|
+
type: 'employee-id',
|
|
840
|
+
pattern: /\bEMP-\d{6}\b/,
|
|
841
|
+
label: 'Employee ID',
|
|
842
|
+
},
|
|
843
|
+
{
|
|
844
|
+
type: 'medical-record',
|
|
845
|
+
pattern: /\bMRN-\d{8}\b/,
|
|
846
|
+
label: 'Medical Record Number',
|
|
847
|
+
validate: (v) => v.length === 12,
|
|
848
|
+
},
|
|
849
|
+
];
|
|
850
|
+
|
|
851
|
+
const result = toNotContainPII(output, allPatterns);
|
|
852
|
+
```
|
|
853
|
+
|
|
854
|
+
### Using Without a Test Framework
|
|
855
|
+
|
|
856
|
+
All matchers work as plain functions. Use them in production validation pipelines, CI scripts, or custom tooling:
|
|
857
|
+
|
|
858
|
+
```ts
|
|
859
|
+
import {
|
|
860
|
+
toNotContainPII,
|
|
861
|
+
toBeFormattedAs,
|
|
862
|
+
toNotBeRefusal,
|
|
863
|
+
toNotBeTruncated,
|
|
864
|
+
} from 'ai-output-assert';
|
|
865
|
+
|
|
866
|
+
function validateLLMResponse(output: string): { valid: boolean; errors: string[] } {
|
|
867
|
+
const matchers = [
|
|
868
|
+
toNotContainPII(output),
|
|
869
|
+
toBeFormattedAs(output, 'json'),
|
|
870
|
+
toNotBeRefusal(output),
|
|
871
|
+
toNotBeTruncated(output),
|
|
872
|
+
];
|
|
873
|
+
|
|
874
|
+
const errors = matchers.filter((r) => !r.pass).map((r) => r.message());
|
|
875
|
+
return { valid: errors.length === 0, errors };
|
|
876
|
+
}
|
|
877
|
+
```
|
|
878
|
+
|
|
879
|
+
## TypeScript
|
|
880
|
+
|
|
881
|
+
The package ships with full TypeScript declarations. All types are exported from the package root:
|
|
882
|
+
|
|
883
|
+
```ts
|
|
884
|
+
import type {
|
|
885
|
+
MatcherResult,
|
|
886
|
+
EmbedFn,
|
|
887
|
+
Tone,
|
|
888
|
+
ToneScores,
|
|
889
|
+
Sentiment,
|
|
890
|
+
OutputFormat,
|
|
891
|
+
PIIPattern,
|
|
892
|
+
ToxicWord,
|
|
893
|
+
PIIMatch,
|
|
894
|
+
AIAssertionOptions,
|
|
895
|
+
} from 'ai-output-assert';
|
|
896
|
+
```
|
|
897
|
+
|
|
898
|
+
| Type | Definition |
|
|
169
899
|
|---|---|
|
|
170
|
-
| `MatcherResult` |
|
|
171
|
-
| `EmbedFn` |
|
|
900
|
+
| `MatcherResult` | `{ pass: boolean; message: () => string; details: Record<string, unknown> }` |
|
|
901
|
+
| `EmbedFn` | `(text: string) => Promise<number[]>` |
|
|
172
902
|
| `Tone` | `'formal' \| 'casual' \| 'technical' \| 'friendly'` |
|
|
173
903
|
| `ToneScores` | `Record<Tone, number>` |
|
|
174
904
|
| `Sentiment` | `'positive' \| 'negative' \| 'neutral'` |
|
|
175
905
|
| `OutputFormat` | `'json' \| 'markdown' \| 'list' \| 'csv' \| 'xml' \| 'yaml' \| 'table'` |
|
|
176
|
-
| `PIIPattern` |
|
|
177
|
-
| `ToxicWord` |
|
|
178
|
-
| `PIIMatch` |
|
|
179
|
-
| `AIAssertionOptions` | Configuration for `setupAIAssertions()` |
|
|
906
|
+
| `PIIPattern` | `{ type: string; pattern: RegExp; validate?: (match: string) => boolean; label: string }` |
|
|
907
|
+
| `ToxicWord` | `{ word: string; severity: 'critical' \| 'warning' \| 'info' }` |
|
|
908
|
+
| `PIIMatch` | `{ type: string; value: string; position: [number, number] }` |
|
|
909
|
+
| `AIAssertionOptions` | Configuration object for `setupAIAssertions()` -- see [API Reference](#setupaiassertionsoptions) |
|
|
180
910
|
|
|
181
911
|
## License
|
|
182
912
|
|