@evalgate/sdk 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/CHANGELOG.md +638 -0
  2. package/README.md +398 -0
  3. package/dist/assertions.d.ts +189 -0
  4. package/dist/assertions.js +662 -0
  5. package/dist/batch.d.ts +68 -0
  6. package/dist/batch.js +179 -0
  7. package/dist/cache.d.ts +65 -0
  8. package/dist/cache.js +131 -0
  9. package/dist/cli/api.d.ts +108 -0
  10. package/dist/cli/api.js +132 -0
  11. package/dist/cli/baseline.d.ts +10 -0
  12. package/dist/cli/baseline.js +172 -0
  13. package/dist/cli/check.d.ts +73 -0
  14. package/dist/cli/check.js +355 -0
  15. package/dist/cli/ci-context.d.ts +6 -0
  16. package/dist/cli/ci-context.js +112 -0
  17. package/dist/cli/ci.d.ts +45 -0
  18. package/dist/cli/ci.js +192 -0
  19. package/dist/cli/config.d.ts +30 -0
  20. package/dist/cli/config.js +230 -0
  21. package/dist/cli/constants.d.ts +15 -0
  22. package/dist/cli/constants.js +18 -0
  23. package/dist/cli/diff.d.ts +173 -0
  24. package/dist/cli/diff.js +685 -0
  25. package/dist/cli/discover.d.ts +84 -0
  26. package/dist/cli/discover.js +419 -0
  27. package/dist/cli/doctor.d.ts +88 -0
  28. package/dist/cli/doctor.js +675 -0
  29. package/dist/cli/env.d.ts +21 -0
  30. package/dist/cli/env.js +42 -0
  31. package/dist/cli/explain.d.ts +58 -0
  32. package/dist/cli/explain.js +561 -0
  33. package/dist/cli/formatters/github.d.ts +8 -0
  34. package/dist/cli/formatters/github.js +135 -0
  35. package/dist/cli/formatters/human.d.ts +6 -0
  36. package/dist/cli/formatters/human.js +110 -0
  37. package/dist/cli/formatters/json.d.ts +6 -0
  38. package/dist/cli/formatters/json.js +10 -0
  39. package/dist/cli/formatters/pr-comment.d.ts +12 -0
  40. package/dist/cli/formatters/pr-comment.js +103 -0
  41. package/dist/cli/formatters/types.d.ts +103 -0
  42. package/dist/cli/formatters/types.js +8 -0
  43. package/dist/cli/gate.d.ts +21 -0
  44. package/dist/cli/gate.js +179 -0
  45. package/dist/cli/impact-analysis.d.ts +63 -0
  46. package/dist/cli/impact-analysis.js +252 -0
  47. package/dist/cli/index.d.ts +9 -0
  48. package/dist/cli/index.js +332 -0
  49. package/dist/cli/init.d.ts +16 -0
  50. package/dist/cli/init.js +292 -0
  51. package/dist/cli/manifest.d.ts +103 -0
  52. package/dist/cli/manifest.js +282 -0
  53. package/dist/cli/migrate.d.ts +41 -0
  54. package/dist/cli/migrate.js +349 -0
  55. package/dist/cli/policy-packs.d.ts +23 -0
  56. package/dist/cli/policy-packs.js +89 -0
  57. package/dist/cli/print-config.d.ts +29 -0
  58. package/dist/cli/print-config.js +270 -0
  59. package/dist/cli/profiles.d.ts +28 -0
  60. package/dist/cli/profiles.js +30 -0
  61. package/dist/cli/reason-codes.d.ts +17 -0
  62. package/dist/cli/reason-codes.js +19 -0
  63. package/dist/cli/regression-gate.d.ts +15 -0
  64. package/dist/cli/regression-gate.js +341 -0
  65. package/dist/cli/render/snippet.d.ts +5 -0
  66. package/dist/cli/render/snippet.js +15 -0
  67. package/dist/cli/render/sort.d.ts +10 -0
  68. package/dist/cli/render/sort.js +24 -0
  69. package/dist/cli/report/build-check-report.d.ts +19 -0
  70. package/dist/cli/report/build-check-report.js +132 -0
  71. package/dist/cli/run.d.ts +101 -0
  72. package/dist/cli/run.js +395 -0
  73. package/dist/cli/share.d.ts +17 -0
  74. package/dist/cli/share.js +91 -0
  75. package/dist/cli/upgrade.d.ts +15 -0
  76. package/dist/cli/upgrade.js +492 -0
  77. package/dist/cli/workspace.d.ts +31 -0
  78. package/dist/cli/workspace.js +68 -0
  79. package/dist/client.d.ts +368 -0
  80. package/dist/client.js +893 -0
  81. package/dist/client.request.test.d.ts +1 -0
  82. package/dist/client.request.test.js +232 -0
  83. package/dist/context.d.ts +134 -0
  84. package/dist/context.js +215 -0
  85. package/dist/errors.d.ts +82 -0
  86. package/dist/errors.js +298 -0
  87. package/dist/export.d.ts +195 -0
  88. package/dist/export.js +344 -0
  89. package/dist/index.d.ts +44 -0
  90. package/dist/index.js +153 -0
  91. package/dist/integrations/anthropic.d.ts +91 -0
  92. package/dist/integrations/anthropic.js +163 -0
  93. package/dist/integrations/openai-eval.d.ts +57 -0
  94. package/dist/integrations/openai-eval.js +232 -0
  95. package/dist/integrations/openai.d.ts +92 -0
  96. package/dist/integrations/openai.js +160 -0
  97. package/dist/local.d.ts +39 -0
  98. package/dist/local.js +148 -0
  99. package/dist/logger.d.ts +128 -0
  100. package/dist/logger.js +227 -0
  101. package/dist/matchers/index.d.ts +1 -0
  102. package/dist/matchers/index.js +6 -0
  103. package/dist/matchers/to-pass-gate.d.ts +29 -0
  104. package/dist/matchers/to-pass-gate.js +35 -0
  105. package/dist/pagination.d.ts +74 -0
  106. package/dist/pagination.js +139 -0
  107. package/dist/regression.d.ts +100 -0
  108. package/dist/regression.js +44 -0
  109. package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
  110. package/dist/runtime/adapters/config-to-dsl.js +400 -0
  111. package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
  112. package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
  113. package/dist/runtime/context.d.ts +26 -0
  114. package/dist/runtime/context.js +74 -0
  115. package/dist/runtime/eval.d.ts +46 -0
  116. package/dist/runtime/eval.js +244 -0
  117. package/dist/runtime/execution-mode.d.ts +80 -0
  118. package/dist/runtime/execution-mode.js +357 -0
  119. package/dist/runtime/executor.d.ts +16 -0
  120. package/dist/runtime/executor.js +152 -0
  121. package/dist/runtime/registry.d.ts +78 -0
  122. package/dist/runtime/registry.js +403 -0
  123. package/dist/runtime/run-report.d.ts +200 -0
  124. package/dist/runtime/run-report.js +222 -0
  125. package/dist/runtime/types.d.ts +356 -0
  126. package/dist/runtime/types.js +76 -0
  127. package/dist/snapshot.d.ts +176 -0
  128. package/dist/snapshot.js +322 -0
  129. package/dist/streaming.d.ts +173 -0
  130. package/dist/streaming.js +268 -0
  131. package/dist/testing.d.ts +273 -0
  132. package/dist/testing.js +317 -0
  133. package/dist/types.d.ts +754 -0
  134. package/dist/types.js +54 -0
  135. package/dist/utils/input-hash.d.ts +8 -0
  136. package/dist/utils/input-hash.js +41 -0
  137. package/dist/version.d.ts +7 -0
  138. package/dist/version.js +10 -0
  139. package/dist/workflows.d.ts +389 -0
  140. package/dist/workflows.js +671 -0
  141. package/package.json +117 -0
package/README.md ADDED
@@ -0,0 +1,398 @@
1
+ # @evalgate/sdk
2
+
3
+ [![npm version](https://img.shields.io/npm/v/@evalgate/sdk.svg)](https://www.npmjs.com/package/@evalgate/sdk)
4
+ [![npm downloads](https://img.shields.io/npm/dm/@evalgate/sdk.svg)](https://www.npmjs.com/package/@evalgate/sdk)
5
+ [![TypeScript](https://img.shields.io/badge/TypeScript-strict-blue.svg)](https://www.typescriptlang.org/)
6
+ [![SDK Tests](https://img.shields.io/badge/tests-172%20passed-brightgreen.svg)](#)
7
+ [![Contract Version](https://img.shields.io/badge/report%20schema-v1-blue.svg)](#)
8
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
9
+
10
+ **One-command CI for AI evaluation. Complete pipeline: discover → manifest → impact → run → diff → PR summary.**
11
+
12
+ Zero to production CI in 60 seconds. No infra. No lock-in. Remove anytime.
13
+
14
+ ---
15
+
16
+ ## Quick Start (60 seconds)
17
+
18
+ Add this to your `.github/workflows/evalai.yml`:
19
+
20
+ ```yaml
21
+ name: EvalGate CI
22
+ on: [push, pull_request]
23
+ jobs:
24
+ evalai:
25
+ runs-on: ubuntu-latest
26
+ steps:
27
+ - uses: actions/checkout@v4
28
+ - uses: actions/setup-node@v4
29
+ - run: npm ci
30
+ - run: npx @evalgate/sdk ci --format github --write-results --base main
31
+ - uses: actions/upload-artifact@v4
32
+ if: always()
33
+ with:
34
+ name: evalai-results
35
+ path: .evalai/
36
+ ```
37
+
38
+ Create `eval/your-spec.spec.ts`:
39
+
40
+ ```typescript
41
+ import { defineEval } from "@evalgate/sdk";
42
+
43
+ defineEval({
44
+ name: "Basic Math Operations",
45
+ description: "Test fundamental arithmetic",
46
+ prompt: "Test: 1+1=2, string concatenation, array includes",
47
+ expected: "All tests should pass",
48
+ tags: ["basic", "math"],
49
+ category: "unit-test"
50
+ });
51
+ ```
52
+
53
+ ```bash
54
+ git add .github/workflows/evalai.yml eval/
55
+ git commit -m "feat: add EvalGate CI pipeline"
56
+ git push
57
+ ```
58
+
59
+ That's it! Your CI now:
60
+ - ✅ Discovers evaluation specs automatically
61
+ - ✅ Runs only impacted specs (smart caching)
62
+ - ✅ Compares results against base branch
63
+ - ✅ Posts rich summary in PR with regressions
64
+ - ✅ Exits with proper codes (0=clean, 1=regressions, 2=config)
65
+
66
+ ---
67
+
68
+ ## 🚀 New in v2.0.0: One-Command CI
69
+
70
+ ### `evalai ci` - Complete CI Pipeline
71
+
72
+ ```bash
73
+ npx @evalgate/sdk ci --format github --write-results --base main
74
+ ```
75
+
76
+ **What it does:**
77
+ 1. **Discover** - Finds all evaluation specs automatically
78
+ 2. **Manifest** - Builds stable manifest if missing
79
+ 3. **Impact Analysis** - Runs only specs impacted by changes (optional)
80
+ 4. **Run** - Executes evaluations with artifact retention
81
+ 5. **Diff** - Compares results against base branch
82
+ 6. **PR Summary** - Posts rich markdown summary to GitHub
83
+ 7. **Debug Flow** - Prints copy/paste next step on failure
84
+
85
+ **Advanced Options:**
86
+ ```bash
87
+ npx @evalgate/sdk ci --base main --impacted-only # Run only impacted specs
88
+ npx @evalgate/sdk ci --format json --write-results # JSON output for automation
89
+ npx @evalgate/sdk ci --base develop # Custom base branch
90
+ ```
91
+
92
+ ### Smart Diffing & GitHub Integration
93
+
94
+ ```bash
95
+ npx @evalgate/sdk diff --base main --head last --format github
96
+ ```
97
+
98
+ **Features:**
99
+ - 📊 Pass rate delta and score changes
100
+ - 🚨 Regression detection with classifications
101
+ - 📈 Improvements and new specs
102
+ - 📁 Artifact links and technical details
103
+ - 🎯 Exit codes: 0=clean, 1=regressions, 2=config
104
+
105
+ ### Self-Documenting Failures
106
+
107
+ Every failure prints a clear next step:
108
+
109
+ ```
110
+ 🔧 Next step for debugging:
111
+ Download base artifact and run: evalai diff --base .evalai/base-run.json --head .evalai/last-run.json
112
+ Artifacts: .evalai/runs/
113
+ ```
114
+
115
+ ---
116
+
117
+ ## CLI Commands
118
+
119
+ ### 🚀 One-Command CI (v2.0.0)
120
+
121
+ | Command | Description |
122
+ |---------|-------------|
123
+ | `npx evalgate ci` | Complete CI pipeline: discover → manifest → impact → run → diff → PR summary |
124
+ | `npx evalgate ci --base main` | Run CI with diff against main branch |
125
+ | `npx evalgate ci --impacted-only` | Run only specs impacted by changes |
126
+ | `npx evalgate ci --format github` | GitHub Step Summary with rich markdown |
127
+ | `npx evalgate ci --format json` | JSON output for automation |
128
+
129
+ ### Discovery & Manifest
130
+
131
+ | Command | Description |
132
+ |---------|-------------|
133
+ | `npx evalgate discover` | Find and analyze evaluation specs |
134
+ | `npx evalgate discover --manifest` | Generate stable manifest for incremental analysis |
135
+
136
+ ### Impact Analysis
137
+
138
+ | Command | Description |
139
+ |---------|-------------|
140
+ | `npx evalgate impact-analysis --base main` | Analyze impact of changes |
141
+ | `npx evalgate impact-analysis --changed-files file1.ts,file2.ts` | Analyze specific changed files |
142
+
143
+ ### Run & Diff
144
+
145
+ | Command | Description |
146
+ |---------|-------------|
147
+ | `npx evalgate run` | Run evaluation specifications |
148
+ | `npx evalgate run --write-results` | Run with artifact retention |
149
+ | `npx evalgate diff --base main` | Compare results against base branch |
150
+ | `npx evalgate diff --base last --head last` | Compare last two runs |
151
+ | `npx evalgate diff --format github` | GitHub Step Summary with regressions |
152
+
153
+ ### Legacy Regression Gate (local, no account needed)
154
+
155
+ | Command | Description |
156
+ |---------|-------------|
157
+ | `npx evalgate init` | Full project scaffolder — creates everything you need |
158
+ | `npx evalgate gate` | Run regression gate locally |
159
+ | `npx evalgate gate --format json` | Machine-readable JSON output |
160
+ | `npx evalgate gate --format github` | GitHub Step Summary with delta table |
161
+ | `npx evalgate baseline init` | Create starter `evals/baseline.json` |
162
+ | `npx evalgate baseline update` | Re-run tests and update baseline with real scores |
163
+ | `npx evalgate upgrade --full` | Upgrade from Tier 1 (built-in) to Tier 2 (full gate) |
164
+
165
+ ### API Gate (requires account)
166
+
167
+ | Command | Description |
168
+ |---------|-------------|
169
+ | `npx evalgate check` | Gate on quality score from dashboard |
170
+ | `npx evalgate share` | Create share link for a run |
171
+
172
+ ### Debugging & Diagnostics
173
+
174
+ | Command | Description |
175
+ |---------|-------------|
176
+ | `npx evalgate doctor` | Comprehensive preflight checklist — verifies config, baseline, auth, API, CI wiring |
177
+ | `npx evalgate explain` | Offline report explainer — top failures, root cause classification, suggested fixes |
178
+ | `npx evalgate print-config` | Show resolved config with source-of-truth annotations (file/env/default/arg) |
179
+
180
+ ### Migration Tools
181
+
182
+ | Command | Description |
183
+ |---------|-------------|
184
+ | `npx evalgate migrate config --in evalai.config.json --out eval/migrated.spec.ts` | Convert legacy config to DSL |
185
+
186
+ **Guided failure flow:**
187
+
188
+ ```
189
+ evalai ci → fails → "Next: evalai explain --report .evalai/last-run.json"
190
+
191
+ evalai explain → root causes + fixes
192
+ ```
193
+
194
+ **GitHub Actions step summary** — CI result at a glance with regressions and artifacts:
195
+
196
+ ![GitHub Actions step summary showing CI pass/fail with delta table](../../docs/images/evalai-gate-step-summary.svg)
197
+
198
+ **`evalai explain` terminal output** — root causes + fix commands:
199
+
200
+ ![Terminal output of evalai explain showing top failures and suggested fixes](../../docs/images/evalai-explain-terminal.svg)
201
+
202
+ All commands automatically write artifacts so `explain` works with zero flags.
203
+
204
+ ### Gate Exit Codes
205
+
206
+ | Code | Meaning |
207
+ |------|---------|
208
+ | 0 | Pass — no regression |
209
+ | 1 | Regression detected |
210
+ | 2 | Infra error (baseline missing, tests crashed) |
211
+
212
+ ### Check Exit Codes (API mode)
213
+
214
+ | Code | Meaning |
215
+ |------|---------|
216
+ | 0 | Pass |
217
+ | 1 | Score below threshold |
218
+ | 2 | Regression failure |
219
+ | 3 | Policy violation |
220
+ | 4 | API error |
221
+ | 5 | Bad arguments |
222
+ | 6 | Low test count |
223
+ | 7 | Weak evidence |
224
+ | 8 | Warn (soft regression) |
225
+
226
+ ### Doctor Exit Codes
227
+
228
+ | Code | Meaning |
229
+ |------|---------|
230
+ | 0 | Ready — all checks passed |
231
+ | 2 | Not ready — one or more checks failed |
232
+ | 3 | Infrastructure error |
233
+
234
+ ---
235
+
236
+ ## How the Gate Works
237
+
238
+ **Built-in mode** (any Node project, no config needed):
239
+ - Runs `<pm> test`, captures exit code + test count
240
+ - Compares against `evals/baseline.json`
241
+ - Writes `evals/regression-report.json`
242
+ - Fails CI if tests regress
243
+
244
+ **Project mode** (advanced, for full regression gate):
245
+ - If `eval:regression-gate` script exists in `package.json`, delegates to it
246
+ - Supports golden eval scores, confidence tests, p95 latency, cost tracking
247
+ - Full delta table with tolerances
248
+
249
+ ---
250
+
251
+ ## Run a Regression Test Locally (no account)
252
+
253
+ ```bash
254
+ npm install @evalgate/sdk openai
255
+ ```
256
+
257
+ ```typescript
258
+ import { openAIChatEval } from "@evalgate/sdk";
259
+
260
+ await openAIChatEval({
261
+ name: "chat-regression",
262
+ cases: [
263
+ { input: "Hello", expectedOutput: "greeting" },
264
+ { input: "2 + 2 = ?", expectedOutput: "4" },
265
+ ],
266
+ });
267
+ ```
268
+
269
+ Output: `PASS 2/2 (score: 100)`. No account needed. Just a score.
270
+
271
+ ### Vitest Integration
272
+
273
+ ```typescript
274
+ import { openAIChatEval, extendExpectWithToPassGate } from "@evalgate/sdk";
275
+ import { expect } from "vitest";
276
+
277
+ extendExpectWithToPassGate(expect);
278
+
279
+ it("passes gate", async () => {
280
+ const result = await openAIChatEval({
281
+ name: "chat-regression",
282
+ cases: [
283
+ { input: "Hello", expectedOutput: "greeting" },
284
+ { input: "2 + 2 = ?", expectedOutput: "4" },
285
+ ],
286
+ });
287
+ expect(result).toPassGate();
288
+ });
289
+ ```
290
+
291
+ ---
292
+
293
+ ## SDK Exports
294
+
295
+ ### Regression Gate Constants
296
+
297
+ ```typescript
298
+ import {
299
+ GATE_EXIT, // { PASS: 0, REGRESSION: 1, INFRA_ERROR: 2, ... }
300
+ GATE_CATEGORY, // { PASS: "pass", REGRESSION: "regression", INFRA_ERROR: "infra_error" }
301
+ REPORT_SCHEMA_VERSION,
302
+ ARTIFACTS, // { BASELINE, REGRESSION_REPORT, CONFIDENCE_SUMMARY, LATENCY_BENCHMARK }
303
+ } from "@evalgate/sdk";
304
+
305
+ // Or tree-shakeable:
306
+ import { GATE_EXIT } from "@evalgate/sdk/regression";
307
+ ```
308
+
309
+ ### Types
310
+
311
+ ```typescript
312
+ import type {
313
+ RegressionReport,
314
+ RegressionDelta,
315
+ Baseline,
316
+ BaselineTolerance,
317
+ GateExitCode,
318
+ GateCategory,
319
+ } from "@evalgate/sdk/regression";
320
+ ```
321
+
322
+ ### Platform Client
323
+
324
+ ```typescript
325
+ import { AIEvalClient } from "@evalgate/sdk";
326
+
327
+ const client = AIEvalClient.init(); // from EVALAI_API_KEY env
328
+ // or
329
+ const client = new AIEvalClient({ apiKey: "...", organizationId: 123 });
330
+ ```
331
+
332
+ ### Framework Integrations
333
+
334
+ ```typescript
335
+ import { traceOpenAI } from "@evalgate/sdk/integrations/openai";
336
+ import { traceAnthropic } from "@evalgate/sdk/integrations/anthropic";
337
+ ```
338
+
339
+ ---
340
+
341
+ ## Installation
342
+
343
+ ```bash
344
+ npm install @evalgate/sdk
345
+ # or
346
+ yarn add @evalgate/sdk
347
+ # or
348
+ pnpm add @evalgate/sdk
349
+ ```
350
+
351
+ Add `openai` as a peer dependency if using `openAIChatEval`:
352
+
353
+ ```bash
354
+ npm install openai
355
+ ```
356
+
357
+ ## Environment Support
358
+
359
+ | Feature | Node.js | Browser |
360
+ |---------|---------|---------|
361
+ | Platform APIs (Traces, Evaluations, LLM Judge) | ✅ | ✅ |
362
+ | Assertions, Test Suites, Error Handling | ✅ | ✅ |
363
+ | CJS/ESM | ✅ | ✅ |
364
+ | CLI, Snapshots, File Export | ✅ | — |
365
+ | Context Propagation | ✅ Full | ⚠️ Basic |
366
+
367
+ ## No Lock-in
368
+
369
+ ```bash
370
+ rm evalai.config.json
371
+ ```
372
+
373
+ Your local `openAIChatEval` runs continue to work. No account cancellation. No data export required.
374
+
375
+ ## Changelog
376
+
377
+ See [CHANGELOG.md](CHANGELOG.md) for the full release history.
378
+
379
+ **v1.8.0** — `evalai doctor` rewrite (9-check checklist), `evalai explain` command, guided failure flow, CI template with doctor preflight
380
+
381
+ **v1.7.0** — `evalai init` scaffolder, `evalai upgrade --full`, `detectRunner()`, machine-readable gate output, init test matrix
382
+
383
+ **v1.6.0** — `evalai gate`, `evalai baseline`, regression gate constants & types
384
+
385
+ **v1.5.8** — secureRoute fix, test infra fixes, 304 handling fix
386
+
387
+ **v1.5.5** — PASS/WARN/FAIL semantics, flake intelligence, golden regression suite
388
+
389
+ **v1.5.0** — GitHub annotations, `--onFail import`, `evalai doctor`
390
+
391
+ ## License
392
+
393
+ MIT
394
+
395
+ ## Support
396
+
397
+ - **Docs:** https://evalgate.com/documentation
398
+ - **Issues:** https://github.com/pauly7610/ai-evaluation-platform/issues
@@ -0,0 +1,189 @@
1
+ /**
2
+ * Enhanced Assertion Library
3
+ * Tier 1.3: Pre-Built Assertion Library with 20+ built-in assertions
4
+ *
5
+ * @example
6
+ * ```typescript
7
+ * import { expect } from '@ai-eval-platform/sdk';
8
+ *
9
+ * const output = "Hello, how can I help you today?";
10
+ *
11
+ * expect(output).toContainKeywords(['help', 'today']);
12
+ * expect(output).toHaveSentiment('positive');
13
+ * expect(output).toMatchPattern(/help/i);
14
+ * expect(output).toHaveLength({ min: 10, max: 100 });
15
+ * ```
16
+ */
17
+ export interface AssertionResult {
18
+ name: string;
19
+ passed: boolean;
20
+ expected: unknown;
21
+ actual: unknown;
22
+ message?: string;
23
+ }
24
+ export declare class AssertionError extends Error {
25
+ expected: unknown;
26
+ actual: unknown;
27
+ constructor(message: string, expected: unknown, actual: unknown);
28
+ }
29
+ /**
30
+ * Fluent assertion builder
31
+ */
32
+ export declare class Expectation {
33
+ private value;
34
+ constructor(value: unknown);
35
+ /**
36
+ * Assert value equals expected
37
+ * @example expect(output).toEqual("Hello")
38
+ */
39
+ toEqual(expected: unknown, message?: string): AssertionResult;
40
+ /**
41
+ * Assert value contains substring
42
+ * @example expect(output).toContain("help")
43
+ */
44
+ toContain(substring: string, message?: string): AssertionResult;
45
+ /**
46
+ * Assert value contains all keywords
47
+ * @example expect(output).toContainKeywords(['help', 'support'])
48
+ */
49
+ toContainKeywords(keywords: string[], message?: string): AssertionResult;
50
+ /**
51
+ * Assert value does not contain substring
52
+ * @example expect(output).toNotContain("error")
53
+ */
54
+ toNotContain(substring: string, message?: string): AssertionResult;
55
+ /**
56
+ * Assert value does not contain PII (emails, phone numbers, SSN)
57
+ * @example expect(output).toNotContainPII()
58
+ */
59
+ toNotContainPII(message?: string): AssertionResult;
60
+ /**
61
+ * Assert value matches regular expression
62
+ * @example expect(output).toMatchPattern(/\d{3}-\d{3}-\d{4}/)
63
+ */
64
+ toMatchPattern(pattern: RegExp, message?: string): AssertionResult;
65
+ /**
66
+ * Assert value is valid JSON
67
+ * @example expect(output).toBeValidJSON()
68
+ */
69
+ toBeValidJSON(message?: string): AssertionResult;
70
+ /**
71
+ * Assert JSON matches schema
72
+ * @example expect(output).toMatchJSON({ status: 'success' })
73
+ */
74
+ toMatchJSON(schema: Record<string, unknown>, message?: string): AssertionResult;
75
+ /**
76
+ * Assert value has expected sentiment
77
+ * @example expect(output).toHaveSentiment('positive')
78
+ */
79
+ toHaveSentiment(expected: "positive" | "negative" | "neutral", message?: string): AssertionResult;
80
+ /**
81
+ * Assert string length is within range
82
+ * @example expect(output).toHaveLength({ min: 10, max: 100 })
83
+ */
84
+ toHaveLength(range: {
85
+ min?: number;
86
+ max?: number;
87
+ }, message?: string): AssertionResult;
88
+ /**
89
+ * Assert no hallucinations (all ground truth facts present)
90
+ * @example expect(output).toNotHallucinate(['fact1', 'fact2'])
91
+ */
92
+ toNotHallucinate(groundTruth: string[], message?: string): AssertionResult;
93
+ /**
94
+ * Assert response latency is within limit
95
+ * @example expect(durationMs).toBeFasterThan(1000)
96
+ */
97
+ toBeFasterThan(maxMs: number, message?: string): AssertionResult;
98
+ /**
99
+ * Assert value is truthy
100
+ * @example expect(result).toBeTruthy()
101
+ */
102
+ toBeTruthy(message?: string): AssertionResult;
103
+ /**
104
+ * Assert value is falsy
105
+ * @example expect(error).toBeFalsy()
106
+ */
107
+ toBeFalsy(message?: string): AssertionResult;
108
+ /**
109
+ * Assert value is greater than expected
110
+ * @example expect(score).toBeGreaterThan(0.8)
111
+ */
112
+ toBeGreaterThan(expected: number, message?: string): AssertionResult;
113
+ /**
114
+ * Assert value is less than expected
115
+ * @example expect(errorRate).toBeLessThan(0.05)
116
+ */
117
+ toBeLessThan(expected: number, message?: string): AssertionResult;
118
+ /**
119
+ * Assert value is between min and max
120
+ * @example expect(score).toBeBetween(0, 1)
121
+ */
122
+ toBeBetween(min: number, max: number, message?: string): AssertionResult;
123
+ /**
124
+ * Assert value contains code block
125
+ * @example expect(output).toContainCode()
126
+ */
127
+ toContainCode(message?: string): AssertionResult;
128
+ /**
129
+ * Assert value is professional tone (no profanity)
130
+ * @example expect(output).toBeProfessional()
131
+ */
132
+ toBeProfessional(message?: string): AssertionResult;
133
+ /**
134
+ * Assert value has proper grammar (basic checks)
135
+ * @example expect(output).toHaveProperGrammar()
136
+ */
137
+ toHaveProperGrammar(message?: string): AssertionResult;
138
+ }
139
+ /**
140
+ * Create an expectation for fluent assertions
141
+ *
142
+ * @example
143
+ * ```typescript
144
+ * const output = "Hello, how can I help you?";
145
+ *
146
+ * expect(output).toContain("help");
147
+ * expect(output).toHaveSentiment('positive');
148
+ * expect(output).toHaveLength({ min: 10, max: 100 });
149
+ * ```
150
+ */
151
+ export declare function expect(value: unknown): Expectation;
152
+ /**
153
+ * Run multiple assertions and collect results
154
+ *
155
+ * @example
156
+ * ```typescript
157
+ * const results = runAssertions([
158
+ * () => expect(output).toContain("help"),
159
+ * () => expect(output).toHaveSentiment('positive'),
160
+ * () => expect(output).toHaveLength({ min: 10 })
161
+ * ]);
162
+ *
163
+ * const allPassed = results.every(r => r.passed);
164
+ * ```
165
+ */
166
+ export declare function runAssertions(assertions: (() => AssertionResult)[]): AssertionResult[];
167
+ export declare function containsKeywords(text: string, keywords: string[]): boolean;
168
+ export declare function matchesPattern(text: string, pattern: RegExp): boolean;
169
+ export declare function hasLength(text: string, range: {
170
+ min?: number;
171
+ max?: number;
172
+ }): boolean;
173
+ export declare function containsJSON(text: string): boolean;
174
+ export declare function notContainsPII(text: string): boolean;
175
+ export declare function hasSentiment(text: string, expected: "positive" | "negative" | "neutral"): boolean;
176
+ export declare function similarTo(text1: string, text2: string, threshold?: number): boolean;
177
+ export declare function withinRange(value: number, min: number, max: number): boolean;
178
+ export declare function isValidEmail(email: string): boolean;
179
+ export declare function isValidURL(url: string): boolean;
180
+ export declare function hasNoHallucinations(text: string, groundTruth: string[]): boolean;
181
+ export declare function matchesSchema(value: unknown, schema: Record<string, unknown>): boolean;
182
+ export declare function hasReadabilityScore(text: string, minScore: number): boolean;
183
+ export declare function containsLanguage(text: string, language: string): boolean;
184
+ export declare function hasFactualAccuracy(text: string, facts: string[]): boolean;
185
+ export declare function respondedWithinTime(startTime: number, maxMs: number): boolean;
186
+ export declare function hasNoToxicity(text: string): boolean;
187
+ export declare function followsInstructions(text: string, instructions: string[]): boolean;
188
+ export declare function containsAllRequiredFields(obj: unknown, requiredFields: string[]): boolean;
189
+ export declare function hasValidCodeSyntax(code: string, language: string): boolean;