@pauly4010/evalai-sdk 1.3.0 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/README.md +80 -4
- package/dist/__tests__/assertions.test.d.ts +1 -0
- package/dist/__tests__/assertions.test.js +288 -0
- package/dist/__tests__/client.test.d.ts +1 -0
- package/dist/__tests__/client.test.js +185 -0
- package/dist/__tests__/testing.test.d.ts +1 -0
- package/dist/__tests__/testing.test.js +230 -0
- package/dist/__tests__/workflows.test.d.ts +1 -0
- package/dist/__tests__/workflows.test.js +222 -0
- package/dist/cli/check.d.ts +58 -0
- package/dist/cli/check.js +215 -0
- package/dist/cli/index.d.ts +4 -2
- package/dist/cli/index.js +38 -175
- package/dist/client.d.ts +14 -1
- package/dist/client.js +56 -6
- package/dist/index.d.ts +1 -0
- package/dist/index.js +6 -1
- package/dist/types.d.ts +8 -0
- package/dist/workflows.js +2 -7
- package/package.json +17 -29
- package/LICENSE +0 -21
package/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,18 @@ All notable changes to the @pauly4010/evalai-sdk package will be documented in t
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [1.4.1] - 2026-02-18
|
|
9
|
+
|
|
10
|
+
### ✨ Added
|
|
11
|
+
|
|
12
|
+
- **evalai check `--baseline production`** — Compare against latest run tagged with `environment=prod`
|
|
13
|
+
- **Baseline missing handling** — Clear failure when baseline not found and comparison requested
|
|
14
|
+
|
|
15
|
+
### 🔧 Changed
|
|
16
|
+
|
|
17
|
+
- **Package hardening** — `files`, `module`, `sideEffects: false` for leaner npm publish
|
|
18
|
+
- **CLI** — Passes `baseline` param to quality API for deterministic CI gates
|
|
19
|
+
|
|
8
20
|
## [1.3.0] - 2025-10-21
|
|
9
21
|
|
|
10
22
|
### ✨ Added
|
package/README.md
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
# @pauly4010/evalai-sdk
|
|
2
2
|
|
|
3
|
+
[](https://www.npmjs.com/package/@pauly4010/evalai-sdk)
|
|
4
|
+
[](https://www.npmjs.com/package/@pauly4010/evalai-sdk)
|
|
5
|
+
|
|
3
6
|
Official TypeScript/JavaScript SDK for the AI Evaluation Platform. Build confidence in your AI systems with comprehensive evaluation tools.
|
|
4
7
|
|
|
5
8
|
## Installation
|
|
@@ -498,9 +501,83 @@ console.log("Plan:", org.plan);
|
|
|
498
501
|
console.log("Status:", org.status);
|
|
499
502
|
```
|
|
500
503
|
|
|
504
|
+
## evalai CLI (v1.4.1)
|
|
505
|
+
|
|
506
|
+
The SDK includes a CLI for CI/CD evaluation gates. Install globally or use via `npx`:
|
|
507
|
+
|
|
508
|
+
```bash
|
|
509
|
+
# Via npx (no global install)
|
|
510
|
+
npx @pauly4010/evalai-sdk check --minScore 92 --evaluationId 42 --apiKey $EVALAI_API_KEY
|
|
511
|
+
|
|
512
|
+
# Or install globally
|
|
513
|
+
npm install -g @pauly4010/evalai-sdk
|
|
514
|
+
evalai check --minScore 92 --evaluationId 42
|
|
515
|
+
```
|
|
516
|
+
|
|
517
|
+
### evalai check
|
|
518
|
+
|
|
519
|
+
Gate deployments on quality scores, regression, and compliance:
|
|
520
|
+
|
|
521
|
+
| Option | Description |
|
|
522
|
+
|--------|-------------|
|
|
523
|
+
| `--evaluationId <id>` | **Required.** Evaluation to gate on |
|
|
524
|
+
| `--apiKey <key>` | API key (or `EVALAI_API_KEY` env) |
|
|
525
|
+
| `--minScore <n>` | Fail if score < n (0–100) |
|
|
526
|
+
| `--maxDrop <n>` | Fail if score dropped > n from baseline |
|
|
527
|
+
| `--minN <n>` | Fail if total test cases < n |
|
|
528
|
+
| `--allowWeakEvidence` | Permit weak evidence level |
|
|
529
|
+
| `--policy <name>` | Enforce HIPAA, SOC2, GDPR, PCI_DSS, FINRA_4511 |
|
|
530
|
+
| `--baseline <mode>` | `published`, `previous`, or `production` |
|
|
531
|
+
| `--baseUrl <url>` | API base URL |
|
|
532
|
+
|
|
533
|
+
**Exit codes:** 0=pass, 1=score below, 2=regression, 3=policy violation, 4=API error, 5=bad args, 6=low N, 7=weak evidence
|
|
534
|
+
|
|
501
535
|
## Changelog
|
|
502
536
|
|
|
503
|
-
### v1.
|
|
537
|
+
### v1.4.1 (Latest)
|
|
538
|
+
|
|
539
|
+
- **evalai check `--baseline production`** — Compare against latest prod-tagged run
|
|
540
|
+
- **Package hardening** — Leaner npm publish with `files`, `sideEffects: false`
|
|
541
|
+
|
|
542
|
+
### v1.4.0
|
|
543
|
+
|
|
544
|
+
- **evalai CLI** — Command-line tool for CI/CD evaluation gates
|
|
545
|
+
- `evalai check` — Gate deployments on quality scores, regression, and compliance
|
|
546
|
+
- `--minScore <n>` — Fail if quality score < n (0–100)
|
|
547
|
+
- `--maxDrop <n>` — Fail if score dropped > n points from baseline
|
|
548
|
+
- `--minN <n>` — Fail if total test cases < n
|
|
549
|
+
- `--allowWeakEvidence` — Permit weak evidence level (default: fail)
|
|
550
|
+
- `--policy <name>` — Enforce compliance (HIPAA, SOC2, GDPR, PCI_DSS, FINRA_4511)
|
|
551
|
+
- `--baseline <mode>` — Compare to `published` or `previous` run
|
|
552
|
+
- `--evaluationId <id>` — Required. Evaluation to gate on
|
|
553
|
+
- Environment: `EVALAI_API_KEY`, `EVALAI_BASE_URL`
|
|
554
|
+
- Exit codes: 0=pass, 1=score below, 2=regression, 3=policy violation, 4=API error, 5=bad args, 6=low N, 7=weak evidence
|
|
555
|
+
- **CLI Exports** — `parseArgs`, `runCheck`, `EXIT` from `@pauly4010/evalai-sdk` for programmatic use
|
|
556
|
+
|
|
557
|
+
### v1.3.0
|
|
558
|
+
|
|
559
|
+
- **Workflow Tracing** — Multi-agent orchestration with full lifecycle instrumentation
|
|
560
|
+
- `WorkflowTracer` class with `startWorkflow`, `endWorkflow`, `startAgentSpan`, `endAgentSpan`
|
|
561
|
+
- `createWorkflowTracer` convenience factory
|
|
562
|
+
- `traceWorkflowStep` generic wrapper for any async function
|
|
563
|
+
- Agent handoff recording (`delegation`, `escalation`, `parallel`, `fallback`)
|
|
564
|
+
- Decision auditing with alternatives, confidence scores, reasoning, and context factors
|
|
565
|
+
- Cost tracking per span/workflow with automatic pricing (16+ models)
|
|
566
|
+
- Cost breakdown by category (`llm`, `tool`, `embedding`, `other`)
|
|
567
|
+
- **Framework Integrations** — Wrap popular multi-agent frameworks:
|
|
568
|
+
- `traceLangChainAgent` — wraps `.invoke()` and `.call()` with auto-tracing
|
|
569
|
+
- `traceCrewAI` — wraps `.kickoff()` with workflow start/end
|
|
570
|
+
- `traceAutoGen` — wraps `.initiate_chat()` with workflow start/end
|
|
571
|
+
- **Performance Utilities**
|
|
572
|
+
- `RequestCache` with configurable TTL (`CacheTTL` presets)
|
|
573
|
+
- `PaginatedIterator` / `createPaginatedIterator` / `autoPaginate` for cursor-based pagination
|
|
574
|
+
- `RequestBatcher` for batching API calls
|
|
575
|
+
- `RateLimiter` client-side rate limit handling
|
|
576
|
+
- **Cost Tracking Types** — `CostRecord`, `CostBreakdown`, `ProviderPricing` interfaces
|
|
577
|
+
- **Agent Decision Auditing Types** — `AgentDecision`, `DecisionAlternative`, `RecordDecisionParams` interfaces
|
|
578
|
+
- **Benchmark Types** — `Benchmark`, `BenchmarkResult`, `AgentConfig` interfaces
|
|
579
|
+
|
|
580
|
+
### v1.2.1 (Bug Fixes)
|
|
504
581
|
|
|
505
582
|
- 🐛 **Critical Fixes**
|
|
506
583
|
- Fixed CLI import paths for proper npm package distribution
|
|
@@ -560,6 +637,5 @@ MIT
|
|
|
560
637
|
|
|
561
638
|
## Support
|
|
562
639
|
|
|
563
|
-
- Documentation: https://
|
|
564
|
-
- Issues: https://github.com/
|
|
565
|
-
- Discord: https://discord.gg/evalai
|
|
640
|
+
- Documentation: https://v0-ai-evaluation-platform-nu.vercel.app/documentation
|
|
641
|
+
- Issues: https://github.com/pauly7610/ai-evaluation-platform/issues
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const vitest_1 = require("vitest");
|
|
4
|
+
const assertions_1 = require("../assertions");
|
|
5
|
+
(0, vitest_1.describe)('Expectation fluent API', () => {
|
|
6
|
+
(0, vitest_1.describe)('toEqual', () => {
|
|
7
|
+
(0, vitest_1.it)('should pass when values are equal', () => {
|
|
8
|
+
const result = (0, assertions_1.expect)('hello').toEqual('hello');
|
|
9
|
+
(0, vitest_1.expect)(result.passed).toBe(true);
|
|
10
|
+
(0, vitest_1.expect)(result.name).toBe('toEqual');
|
|
11
|
+
});
|
|
12
|
+
(0, vitest_1.it)('should fail when values differ', () => {
|
|
13
|
+
const result = (0, assertions_1.expect)('hello').toEqual('world');
|
|
14
|
+
(0, vitest_1.expect)(result.passed).toBe(false);
|
|
15
|
+
});
|
|
16
|
+
(0, vitest_1.it)('should handle objects', () => {
|
|
17
|
+
const result = (0, assertions_1.expect)({ a: 1 }).toEqual({ a: 1 });
|
|
18
|
+
(0, vitest_1.expect)(result.passed).toBe(true);
|
|
19
|
+
});
|
|
20
|
+
});
|
|
21
|
+
(0, vitest_1.describe)('toContain', () => {
|
|
22
|
+
(0, vitest_1.it)('should pass when substring is found', () => {
|
|
23
|
+
const result = (0, assertions_1.expect)('Hello, world!').toContain('world');
|
|
24
|
+
(0, vitest_1.expect)(result.passed).toBe(true);
|
|
25
|
+
});
|
|
26
|
+
(0, vitest_1.it)('should fail when substring is missing', () => {
|
|
27
|
+
const result = (0, assertions_1.expect)('Hello, world!').toContain('foo');
|
|
28
|
+
(0, vitest_1.expect)(result.passed).toBe(false);
|
|
29
|
+
});
|
|
30
|
+
});
|
|
31
|
+
(0, vitest_1.describe)('toContainKeywords', () => {
|
|
32
|
+
(0, vitest_1.it)('should pass when all keywords are present (case insensitive)', () => {
|
|
33
|
+
const result = (0, assertions_1.expect)('The quick Brown fox').toContainKeywords(['quick', 'brown']);
|
|
34
|
+
(0, vitest_1.expect)(result.passed).toBe(true);
|
|
35
|
+
});
|
|
36
|
+
(0, vitest_1.it)('should fail when keywords are missing', () => {
|
|
37
|
+
const result = (0, assertions_1.expect)('The quick fox').toContainKeywords(['quick', 'brown']);
|
|
38
|
+
(0, vitest_1.expect)(result.passed).toBe(false);
|
|
39
|
+
(0, vitest_1.expect)(result.message).toContain('brown');
|
|
40
|
+
});
|
|
41
|
+
});
|
|
42
|
+
(0, vitest_1.describe)('toNotContain', () => {
|
|
43
|
+
(0, vitest_1.it)('should pass when substring is absent', () => {
|
|
44
|
+
const result = (0, assertions_1.expect)('safe text').toNotContain('danger');
|
|
45
|
+
(0, vitest_1.expect)(result.passed).toBe(true);
|
|
46
|
+
});
|
|
47
|
+
(0, vitest_1.it)('should fail when substring is present', () => {
|
|
48
|
+
const result = (0, assertions_1.expect)('some danger ahead').toNotContain('danger');
|
|
49
|
+
(0, vitest_1.expect)(result.passed).toBe(false);
|
|
50
|
+
});
|
|
51
|
+
});
|
|
52
|
+
(0, vitest_1.describe)('toNotContainPII', () => {
|
|
53
|
+
(0, vitest_1.it)('should pass with no PII', () => {
|
|
54
|
+
const result = (0, assertions_1.expect)('Just a normal sentence').toNotContainPII();
|
|
55
|
+
(0, vitest_1.expect)(result.passed).toBe(true);
|
|
56
|
+
});
|
|
57
|
+
(0, vitest_1.it)('should fail with an email', () => {
|
|
58
|
+
const result = (0, assertions_1.expect)('Contact me at user@example.com').toNotContainPII();
|
|
59
|
+
(0, vitest_1.expect)(result.passed).toBe(false);
|
|
60
|
+
});
|
|
61
|
+
(0, vitest_1.it)('should fail with a phone number', () => {
|
|
62
|
+
const result = (0, assertions_1.expect)('Call 555-123-4567').toNotContainPII();
|
|
63
|
+
(0, vitest_1.expect)(result.passed).toBe(false);
|
|
64
|
+
});
|
|
65
|
+
(0, vitest_1.it)('should fail with an SSN', () => {
|
|
66
|
+
const result = (0, assertions_1.expect)('SSN is 123-45-6789').toNotContainPII();
|
|
67
|
+
(0, vitest_1.expect)(result.passed).toBe(false);
|
|
68
|
+
});
|
|
69
|
+
});
|
|
70
|
+
(0, vitest_1.describe)('toMatchPattern', () => {
|
|
71
|
+
(0, vitest_1.it)('should pass when pattern matches', () => {
|
|
72
|
+
const result = (0, assertions_1.expect)('Order #12345').toMatchPattern(/Order #\d+/);
|
|
73
|
+
(0, vitest_1.expect)(result.passed).toBe(true);
|
|
74
|
+
});
|
|
75
|
+
(0, vitest_1.it)('should fail when pattern does not match', () => {
|
|
76
|
+
const result = (0, assertions_1.expect)('No order here').toMatchPattern(/Order #\d+/);
|
|
77
|
+
(0, vitest_1.expect)(result.passed).toBe(false);
|
|
78
|
+
});
|
|
79
|
+
});
|
|
80
|
+
(0, vitest_1.describe)('toBeValidJSON', () => {
|
|
81
|
+
(0, vitest_1.it)('should pass for valid JSON', () => {
|
|
82
|
+
const result = (0, assertions_1.expect)('{"key": "value"}').toBeValidJSON();
|
|
83
|
+
(0, vitest_1.expect)(result.passed).toBe(true);
|
|
84
|
+
});
|
|
85
|
+
(0, vitest_1.it)('should fail for invalid JSON', () => {
|
|
86
|
+
const result = (0, assertions_1.expect)('not json').toBeValidJSON();
|
|
87
|
+
(0, vitest_1.expect)(result.passed).toBe(false);
|
|
88
|
+
});
|
|
89
|
+
});
|
|
90
|
+
(0, vitest_1.describe)('toMatchJSON', () => {
|
|
91
|
+
(0, vitest_1.it)('should pass when all schema keys exist', () => {
|
|
92
|
+
const result = (0, assertions_1.expect)('{"status":"ok","data":1}').toMatchJSON({ status: '', data: '' });
|
|
93
|
+
(0, vitest_1.expect)(result.passed).toBe(true);
|
|
94
|
+
});
|
|
95
|
+
(0, vitest_1.it)('should fail when schema keys are missing', () => {
|
|
96
|
+
const result = (0, assertions_1.expect)('{"status":"ok"}').toMatchJSON({ status: '', missing: '' });
|
|
97
|
+
(0, vitest_1.expect)(result.passed).toBe(false);
|
|
98
|
+
});
|
|
99
|
+
});
|
|
100
|
+
(0, vitest_1.describe)('toHaveSentiment', () => {
|
|
101
|
+
(0, vitest_1.it)('should detect positive sentiment', () => {
|
|
102
|
+
const result = (0, assertions_1.expect)('This is great and amazing!').toHaveSentiment('positive');
|
|
103
|
+
(0, vitest_1.expect)(result.passed).toBe(true);
|
|
104
|
+
});
|
|
105
|
+
(0, vitest_1.it)('should detect negative sentiment', () => {
|
|
106
|
+
const result = (0, assertions_1.expect)('This is terrible and awful').toHaveSentiment('negative');
|
|
107
|
+
(0, vitest_1.expect)(result.passed).toBe(true);
|
|
108
|
+
});
|
|
109
|
+
(0, vitest_1.it)('should detect neutral sentiment', () => {
|
|
110
|
+
const result = (0, assertions_1.expect)('The sky is blue').toHaveSentiment('neutral');
|
|
111
|
+
(0, vitest_1.expect)(result.passed).toBe(true);
|
|
112
|
+
});
|
|
113
|
+
});
|
|
114
|
+
(0, vitest_1.describe)('toHaveLength', () => {
|
|
115
|
+
(0, vitest_1.it)('should pass when within range', () => {
|
|
116
|
+
const result = (0, assertions_1.expect)('hello').toHaveLength({ min: 3, max: 10 });
|
|
117
|
+
(0, vitest_1.expect)(result.passed).toBe(true);
|
|
118
|
+
});
|
|
119
|
+
(0, vitest_1.it)('should fail when too short', () => {
|
|
120
|
+
const result = (0, assertions_1.expect)('hi').toHaveLength({ min: 5 });
|
|
121
|
+
(0, vitest_1.expect)(result.passed).toBe(false);
|
|
122
|
+
});
|
|
123
|
+
(0, vitest_1.it)('should fail when too long', () => {
|
|
124
|
+
const result = (0, assertions_1.expect)('a very long string').toHaveLength({ max: 5 });
|
|
125
|
+
(0, vitest_1.expect)(result.passed).toBe(false);
|
|
126
|
+
});
|
|
127
|
+
});
|
|
128
|
+
(0, vitest_1.describe)('toNotHallucinate', () => {
|
|
129
|
+
(0, vitest_1.it)('should pass when all facts are present', () => {
|
|
130
|
+
const result = (0, assertions_1.expect)('Paris is the capital of France').toNotHallucinate(['paris', 'france']);
|
|
131
|
+
(0, vitest_1.expect)(result.passed).toBe(true);
|
|
132
|
+
});
|
|
133
|
+
(0, vitest_1.it)('should fail when facts are missing', () => {
|
|
134
|
+
const result = (0, assertions_1.expect)('Berlin is great').toNotHallucinate(['paris', 'france']);
|
|
135
|
+
(0, vitest_1.expect)(result.passed).toBe(false);
|
|
136
|
+
});
|
|
137
|
+
});
|
|
138
|
+
(0, vitest_1.describe)('toBeFasterThan', () => {
|
|
139
|
+
(0, vitest_1.it)('should pass when value is under threshold', () => {
|
|
140
|
+
const result = (0, assertions_1.expect)(500).toBeFasterThan(1000);
|
|
141
|
+
(0, vitest_1.expect)(result.passed).toBe(true);
|
|
142
|
+
});
|
|
143
|
+
(0, vitest_1.it)('should fail when value exceeds threshold', () => {
|
|
144
|
+
const result = (0, assertions_1.expect)(1500).toBeFasterThan(1000);
|
|
145
|
+
(0, vitest_1.expect)(result.passed).toBe(false);
|
|
146
|
+
});
|
|
147
|
+
});
|
|
148
|
+
(0, vitest_1.describe)('toBeTruthy / toBeFalsy', () => {
|
|
149
|
+
(0, vitest_1.it)('should pass for truthy values', () => {
|
|
150
|
+
(0, vitest_1.expect)((0, assertions_1.expect)('hello').toBeTruthy().passed).toBe(true);
|
|
151
|
+
(0, vitest_1.expect)((0, assertions_1.expect)(1).toBeTruthy().passed).toBe(true);
|
|
152
|
+
});
|
|
153
|
+
(0, vitest_1.it)('should pass for falsy values', () => {
|
|
154
|
+
(0, vitest_1.expect)((0, assertions_1.expect)('').toBeFalsy().passed).toBe(true);
|
|
155
|
+
(0, vitest_1.expect)((0, assertions_1.expect)(0).toBeFalsy().passed).toBe(true);
|
|
156
|
+
});
|
|
157
|
+
});
|
|
158
|
+
(0, vitest_1.describe)('toBeGreaterThan / toBeLessThan / toBeBetween', () => {
|
|
159
|
+
(0, vitest_1.it)('should work for greater than', () => {
|
|
160
|
+
(0, vitest_1.expect)((0, assertions_1.expect)(10).toBeGreaterThan(5).passed).toBe(true);
|
|
161
|
+
(0, vitest_1.expect)((0, assertions_1.expect)(3).toBeGreaterThan(5).passed).toBe(false);
|
|
162
|
+
});
|
|
163
|
+
(0, vitest_1.it)('should work for less than', () => {
|
|
164
|
+
(0, vitest_1.expect)((0, assertions_1.expect)(3).toBeLessThan(5).passed).toBe(true);
|
|
165
|
+
(0, vitest_1.expect)((0, assertions_1.expect)(10).toBeLessThan(5).passed).toBe(false);
|
|
166
|
+
});
|
|
167
|
+
(0, vitest_1.it)('should work for between', () => {
|
|
168
|
+
(0, vitest_1.expect)((0, assertions_1.expect)(5).toBeBetween(1, 10).passed).toBe(true);
|
|
169
|
+
(0, vitest_1.expect)((0, assertions_1.expect)(15).toBeBetween(1, 10).passed).toBe(false);
|
|
170
|
+
});
|
|
171
|
+
});
|
|
172
|
+
(0, vitest_1.describe)('toContainCode', () => {
|
|
173
|
+
(0, vitest_1.it)('should detect code blocks', () => {
|
|
174
|
+
const result = (0, assertions_1.expect)('Here is code:\n```js\nconsole.log("hi")\n```').toContainCode();
|
|
175
|
+
(0, vitest_1.expect)(result.passed).toBe(true);
|
|
176
|
+
});
|
|
177
|
+
(0, vitest_1.it)('should fail without code blocks', () => {
|
|
178
|
+
const result = (0, assertions_1.expect)('No code here').toContainCode();
|
|
179
|
+
(0, vitest_1.expect)(result.passed).toBe(false);
|
|
180
|
+
});
|
|
181
|
+
});
|
|
182
|
+
(0, vitest_1.describe)('toBeProfessional', () => {
|
|
183
|
+
(0, vitest_1.it)('should pass for professional text', () => {
|
|
184
|
+
const result = (0, assertions_1.expect)('Thank you for your inquiry.').toBeProfessional();
|
|
185
|
+
(0, vitest_1.expect)(result.passed).toBe(true);
|
|
186
|
+
});
|
|
187
|
+
(0, vitest_1.it)('should fail for unprofessional text', () => {
|
|
188
|
+
const result = (0, assertions_1.expect)('This is damn stupid').toBeProfessional();
|
|
189
|
+
(0, vitest_1.expect)(result.passed).toBe(false);
|
|
190
|
+
});
|
|
191
|
+
});
|
|
192
|
+
(0, vitest_1.describe)('toHaveProperGrammar', () => {
|
|
193
|
+
(0, vitest_1.it)('should pass for properly formatted text', () => {
|
|
194
|
+
const result = (0, assertions_1.expect)('This is a sentence.').toHaveProperGrammar();
|
|
195
|
+
(0, vitest_1.expect)(result.passed).toBe(true);
|
|
196
|
+
});
|
|
197
|
+
(0, vitest_1.it)('should fail for double spaces', () => {
|
|
198
|
+
const result = (0, assertions_1.expect)('This has double spaces.').toHaveProperGrammar();
|
|
199
|
+
(0, vitest_1.expect)(result.passed).toBe(false);
|
|
200
|
+
});
|
|
201
|
+
});
|
|
202
|
+
});
|
|
203
|
+
(0, vitest_1.describe)('runAssertions', () => {
|
|
204
|
+
(0, vitest_1.it)('should collect all results', () => {
|
|
205
|
+
const results = (0, assertions_1.runAssertions)([
|
|
206
|
+
() => (0, assertions_1.expect)('hello').toContain('hello'),
|
|
207
|
+
() => (0, assertions_1.expect)('hello').toContain('missing'),
|
|
208
|
+
]);
|
|
209
|
+
(0, vitest_1.expect)(results).toHaveLength(2);
|
|
210
|
+
(0, vitest_1.expect)(results[0].passed).toBe(true);
|
|
211
|
+
(0, vitest_1.expect)(results[1].passed).toBe(false);
|
|
212
|
+
});
|
|
213
|
+
(0, vitest_1.it)('should catch thrown errors', () => {
|
|
214
|
+
const results = (0, assertions_1.runAssertions)([
|
|
215
|
+
() => { throw new Error('boom'); },
|
|
216
|
+
]);
|
|
217
|
+
(0, vitest_1.expect)(results[0].passed).toBe(false);
|
|
218
|
+
(0, vitest_1.expect)(results[0].message).toBe('boom');
|
|
219
|
+
});
|
|
220
|
+
});
|
|
221
|
+
(0, vitest_1.describe)('Standalone assertion functions', () => {
|
|
222
|
+
(0, vitest_1.it)('containsKeywords', () => {
|
|
223
|
+
(0, vitest_1.expect)((0, assertions_1.containsKeywords)('The quick brown fox', ['quick', 'brown'])).toBe(true);
|
|
224
|
+
(0, vitest_1.expect)((0, assertions_1.containsKeywords)('The quick fox', ['quick', 'brown'])).toBe(false);
|
|
225
|
+
});
|
|
226
|
+
(0, vitest_1.it)('matchesPattern', () => {
|
|
227
|
+
(0, vitest_1.expect)((0, assertions_1.matchesPattern)('abc123', /\d+/)).toBe(true);
|
|
228
|
+
(0, vitest_1.expect)((0, assertions_1.matchesPattern)('abc', /\d+/)).toBe(false);
|
|
229
|
+
});
|
|
230
|
+
(0, vitest_1.it)('hasLength', () => {
|
|
231
|
+
(0, vitest_1.expect)((0, assertions_1.hasLength)('hello', { min: 3, max: 10 })).toBe(true);
|
|
232
|
+
(0, vitest_1.expect)((0, assertions_1.hasLength)('hi', { min: 5 })).toBe(false);
|
|
233
|
+
});
|
|
234
|
+
(0, vitest_1.it)('containsJSON', () => {
|
|
235
|
+
(0, vitest_1.expect)((0, assertions_1.containsJSON)('{"a":1}')).toBe(true);
|
|
236
|
+
(0, vitest_1.expect)((0, assertions_1.containsJSON)('not json')).toBe(false);
|
|
237
|
+
});
|
|
238
|
+
(0, vitest_1.it)('notContainsPII', () => {
|
|
239
|
+
(0, vitest_1.expect)((0, assertions_1.notContainsPII)('Just a normal text')).toBe(true);
|
|
240
|
+
(0, vitest_1.expect)((0, assertions_1.notContainsPII)('Email: user@example.com')).toBe(false);
|
|
241
|
+
});
|
|
242
|
+
(0, vitest_1.it)('hasSentiment', () => {
|
|
243
|
+
(0, vitest_1.expect)((0, assertions_1.hasSentiment)('This is great', 'positive')).toBe(true);
|
|
244
|
+
(0, vitest_1.expect)((0, assertions_1.hasSentiment)('This is terrible', 'negative')).toBe(true);
|
|
245
|
+
});
|
|
246
|
+
(0, vitest_1.it)('similarTo', () => {
|
|
247
|
+
(0, vitest_1.expect)((0, assertions_1.similarTo)('the quick brown fox', 'the quick brown dog', 0.5)).toBe(true);
|
|
248
|
+
(0, vitest_1.expect)((0, assertions_1.similarTo)('hello world', 'completely different', 0.8)).toBe(false);
|
|
249
|
+
});
|
|
250
|
+
(0, vitest_1.it)('withinRange', () => {
|
|
251
|
+
(0, vitest_1.expect)((0, assertions_1.withinRange)(5, 1, 10)).toBe(true);
|
|
252
|
+
(0, vitest_1.expect)((0, assertions_1.withinRange)(15, 1, 10)).toBe(false);
|
|
253
|
+
});
|
|
254
|
+
(0, vitest_1.it)('isValidEmail', () => {
|
|
255
|
+
(0, vitest_1.expect)((0, assertions_1.isValidEmail)('user@example.com')).toBe(true);
|
|
256
|
+
(0, vitest_1.expect)((0, assertions_1.isValidEmail)('not-an-email')).toBe(false);
|
|
257
|
+
});
|
|
258
|
+
(0, vitest_1.it)('isValidURL', () => {
|
|
259
|
+
(0, vitest_1.expect)((0, assertions_1.isValidURL)('https://example.com')).toBe(true);
|
|
260
|
+
(0, vitest_1.expect)((0, assertions_1.isValidURL)('not a url')).toBe(false);
|
|
261
|
+
});
|
|
262
|
+
(0, vitest_1.it)('hasNoHallucinations', () => {
|
|
263
|
+
(0, vitest_1.expect)((0, assertions_1.hasNoHallucinations)('Paris is in France', ['Paris', 'France'])).toBe(true);
|
|
264
|
+
(0, vitest_1.expect)((0, assertions_1.hasNoHallucinations)('Berlin is great', ['Paris'])).toBe(false);
|
|
265
|
+
});
|
|
266
|
+
(0, vitest_1.it)('matchesSchema', () => {
|
|
267
|
+
(0, vitest_1.expect)((0, assertions_1.matchesSchema)({ name: 'test', value: 1 }, { name: '', value: '' })).toBe(true);
|
|
268
|
+
(0, vitest_1.expect)((0, assertions_1.matchesSchema)({ name: 'test' }, { name: '', missing: '' })).toBe(false);
|
|
269
|
+
(0, vitest_1.expect)((0, assertions_1.matchesSchema)('not an object', { key: '' })).toBe(false);
|
|
270
|
+
});
|
|
271
|
+
(0, vitest_1.it)('hasNoToxicity', () => {
|
|
272
|
+
(0, vitest_1.expect)((0, assertions_1.hasNoToxicity)('Have a nice day')).toBe(true);
|
|
273
|
+
(0, vitest_1.expect)((0, assertions_1.hasNoToxicity)('You are an idiot')).toBe(false);
|
|
274
|
+
});
|
|
275
|
+
(0, vitest_1.it)('followsInstructions', () => {
|
|
276
|
+
(0, vitest_1.expect)((0, assertions_1.followsInstructions)('Hello world', ['Hello'])).toBe(true);
|
|
277
|
+
(0, vitest_1.expect)((0, assertions_1.followsInstructions)('Hello world', ['!goodbye'])).toBe(true);
|
|
278
|
+
(0, vitest_1.expect)((0, assertions_1.followsInstructions)('Hello world', ['missing'])).toBe(false);
|
|
279
|
+
});
|
|
280
|
+
(0, vitest_1.it)('containsAllRequiredFields', () => {
|
|
281
|
+
(0, vitest_1.expect)((0, assertions_1.containsAllRequiredFields)({ a: 1, b: 2 }, ['a', 'b'])).toBe(true);
|
|
282
|
+
(0, vitest_1.expect)((0, assertions_1.containsAllRequiredFields)({ a: 1 }, ['a', 'b'])).toBe(false);
|
|
283
|
+
});
|
|
284
|
+
(0, vitest_1.it)('hasValidCodeSyntax', () => {
|
|
285
|
+
(0, vitest_1.expect)((0, assertions_1.hasValidCodeSyntax)('{"valid": true}', 'json')).toBe(true);
|
|
286
|
+
(0, vitest_1.expect)((0, assertions_1.hasValidCodeSyntax)('{invalid}', 'json')).toBe(false);
|
|
287
|
+
});
|
|
288
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const vitest_1 = require("vitest");
|
|
4
|
+
const client_1 = require("../client");
|
|
5
|
+
// Mock fetch globally
|
|
6
|
+
const mockFetch = vitest_1.vi.fn();
|
|
7
|
+
(0, vitest_1.describe)('AIEvalClient', () => {
|
|
8
|
+
(0, vitest_1.beforeEach)(() => {
|
|
9
|
+
vitest_1.vi.stubGlobal('fetch', mockFetch);
|
|
10
|
+
mockFetch.mockReset();
|
|
11
|
+
});
|
|
12
|
+
(0, vitest_1.afterEach)(() => {
|
|
13
|
+
vitest_1.vi.unstubAllGlobals();
|
|
14
|
+
});
|
|
15
|
+
(0, vitest_1.describe)('constructor', () => {
|
|
16
|
+
(0, vitest_1.it)('should throw if no API key is provided', () => {
|
|
17
|
+
(0, vitest_1.expect)(() => new client_1.AIEvalClient({ apiKey: '' })).toThrow('API key is required');
|
|
18
|
+
});
|
|
19
|
+
(0, vitest_1.it)('should initialize with explicit config', () => {
|
|
20
|
+
const client = new client_1.AIEvalClient({
|
|
21
|
+
apiKey: 'test-key',
|
|
22
|
+
baseUrl: 'https://api.test.com',
|
|
23
|
+
organizationId: 42,
|
|
24
|
+
});
|
|
25
|
+
(0, vitest_1.expect)(client).toBeDefined();
|
|
26
|
+
(0, vitest_1.expect)(client.getOrganizationId()).toBe(42);
|
|
27
|
+
});
|
|
28
|
+
(0, vitest_1.it)('should have all API modules', () => {
|
|
29
|
+
const client = new client_1.AIEvalClient({ apiKey: 'test-key' });
|
|
30
|
+
(0, vitest_1.expect)(client.traces).toBeDefined();
|
|
31
|
+
(0, vitest_1.expect)(client.evaluations).toBeDefined();
|
|
32
|
+
(0, vitest_1.expect)(client.llmJudge).toBeDefined();
|
|
33
|
+
(0, vitest_1.expect)(client.annotations).toBeDefined();
|
|
34
|
+
(0, vitest_1.expect)(client.developer).toBeDefined();
|
|
35
|
+
(0, vitest_1.expect)(client.organizations).toBeDefined();
|
|
36
|
+
});
|
|
37
|
+
});
|
|
38
|
+
(0, vitest_1.describe)('request method', () => {
|
|
39
|
+
(0, vitest_1.it)('should send auth header', async () => {
|
|
40
|
+
mockFetch.mockResolvedValue({
|
|
41
|
+
ok: true,
|
|
42
|
+
json: async () => ({ data: 'test' }),
|
|
43
|
+
status: 200,
|
|
44
|
+
});
|
|
45
|
+
const client = new client_1.AIEvalClient({ apiKey: 'my-secret-key', baseUrl: 'http://localhost:3000' });
|
|
46
|
+
await client.request('/api/test');
|
|
47
|
+
(0, vitest_1.expect)(mockFetch).toHaveBeenCalledTimes(1);
|
|
48
|
+
const [url, options] = mockFetch.mock.calls[0];
|
|
49
|
+
(0, vitest_1.expect)(url).toBe('http://localhost:3000/api/test');
|
|
50
|
+
(0, vitest_1.expect)(options.headers['Authorization']).toBe('Bearer my-secret-key');
|
|
51
|
+
(0, vitest_1.expect)(options.headers['Content-Type']).toBe('application/json');
|
|
52
|
+
});
|
|
53
|
+
(0, vitest_1.it)('should return parsed JSON on success', async () => {
|
|
54
|
+
mockFetch.mockResolvedValue({
|
|
55
|
+
ok: true,
|
|
56
|
+
json: async () => ({ result: 'ok' }),
|
|
57
|
+
status: 200,
|
|
58
|
+
});
|
|
59
|
+
const client = new client_1.AIEvalClient({ apiKey: 'key', baseUrl: 'http://localhost:3000' });
|
|
60
|
+
const data = await client.request('/api/test');
|
|
61
|
+
(0, vitest_1.expect)(data).toEqual({ result: 'ok' });
|
|
62
|
+
});
|
|
63
|
+
(0, vitest_1.it)('should throw on non-ok response', async () => {
|
|
64
|
+
mockFetch.mockResolvedValue({
|
|
65
|
+
ok: false,
|
|
66
|
+
json: async () => ({ error: 'Not found', code: 'NOT_FOUND' }),
|
|
67
|
+
status: 404,
|
|
68
|
+
});
|
|
69
|
+
const client = new client_1.AIEvalClient({ apiKey: 'key', baseUrl: 'http://localhost:3000', retry: { maxAttempts: 1 } });
|
|
70
|
+
await (0, vitest_1.expect)(client.request('/api/test')).rejects.toThrow();
|
|
71
|
+
});
|
|
72
|
+
(0, vitest_1.it)('should retry on rate limit errors', async () => {
|
|
73
|
+
mockFetch
|
|
74
|
+
.mockResolvedValueOnce({
|
|
75
|
+
ok: false,
|
|
76
|
+
json: async () => ({ error: 'Rate limited', code: 'RATE_LIMIT_EXCEEDED' }),
|
|
77
|
+
status: 429,
|
|
78
|
+
})
|
|
79
|
+
.mockResolvedValueOnce({
|
|
80
|
+
ok: true,
|
|
81
|
+
json: async () => ({ result: 'success' }),
|
|
82
|
+
status: 200,
|
|
83
|
+
});
|
|
84
|
+
const client = new client_1.AIEvalClient({
|
|
85
|
+
apiKey: 'key',
|
|
86
|
+
baseUrl: 'http://localhost:3000',
|
|
87
|
+
retry: { maxAttempts: 3, backoff: 'fixed' },
|
|
88
|
+
});
|
|
89
|
+
const data = await client.request('/api/test');
|
|
90
|
+
(0, vitest_1.expect)(data).toEqual({ result: 'success' });
|
|
91
|
+
(0, vitest_1.expect)(mockFetch).toHaveBeenCalledTimes(2);
|
|
92
|
+
});
|
|
93
|
+
(0, vitest_1.it)('should handle timeout', async () => {
|
|
94
|
+
mockFetch.mockImplementation(() => new Promise((_, reject) => {
|
|
95
|
+
const abortError = new Error('The operation was aborted');
|
|
96
|
+
abortError.name = 'AbortError';
|
|
97
|
+
setTimeout(() => reject(abortError), 50);
|
|
98
|
+
}));
|
|
99
|
+
const client = new client_1.AIEvalClient({
|
|
100
|
+
apiKey: 'key',
|
|
101
|
+
baseUrl: 'http://localhost:3000',
|
|
102
|
+
timeout: 10,
|
|
103
|
+
retry: { maxAttempts: 1 },
|
|
104
|
+
});
|
|
105
|
+
await (0, vitest_1.expect)(client.request('/api/slow')).rejects.toThrow();
|
|
106
|
+
});
|
|
107
|
+
});
|
|
108
|
+
(0, vitest_1.describe)('TraceAPI', () => {
|
|
109
|
+
(0, vitest_1.it)('should call correct endpoint for traces.create', async () => {
|
|
110
|
+
mockFetch.mockResolvedValue({
|
|
111
|
+
ok: true,
|
|
112
|
+
json: async () => ({ id: 1, name: 'Test Trace', traceId: 'trace-1' }),
|
|
113
|
+
status: 200,
|
|
114
|
+
});
|
|
115
|
+
const client = new client_1.AIEvalClient({ apiKey: 'key', baseUrl: 'http://localhost:3000', organizationId: 1 });
|
|
116
|
+
const result = await client.traces.create({ name: 'Test Trace', traceId: 'trace-1' });
|
|
117
|
+
(0, vitest_1.expect)(result.name).toBe('Test Trace');
|
|
118
|
+
const [url, options] = mockFetch.mock.calls[0];
|
|
119
|
+
(0, vitest_1.expect)(url).toBe('http://localhost:3000/api/traces');
|
|
120
|
+
(0, vitest_1.expect)(options.method).toBe('POST');
|
|
121
|
+
});
|
|
122
|
+
(0, vitest_1.it)('should call correct endpoint for traces.list', async () => {
|
|
123
|
+
mockFetch.mockResolvedValue({
|
|
124
|
+
ok: true,
|
|
125
|
+
json: async () => [],
|
|
126
|
+
status: 200,
|
|
127
|
+
});
|
|
128
|
+
const client = new client_1.AIEvalClient({ apiKey: 'key', baseUrl: 'http://localhost:3000' });
|
|
129
|
+
await client.traces.list({ limit: 10 });
|
|
130
|
+
const [url] = mockFetch.mock.calls[0];
|
|
131
|
+
(0, vitest_1.expect)(url).toContain('/api/traces');
|
|
132
|
+
(0, vitest_1.expect)(url).toContain('limit=10');
|
|
133
|
+
});
|
|
134
|
+
});
|
|
135
|
+
(0, vitest_1.describe)('EvaluationAPI', () => {
|
|
136
|
+
(0, vitest_1.it)('should call correct endpoint for evaluations.create', async () => {
|
|
137
|
+
mockFetch.mockResolvedValue({
|
|
138
|
+
ok: true,
|
|
139
|
+
json: async () => ({ id: 1, name: 'Eval' }),
|
|
140
|
+
status: 200,
|
|
141
|
+
});
|
|
142
|
+
const client = new client_1.AIEvalClient({ apiKey: 'key', baseUrl: 'http://localhost:3000', organizationId: 1 });
|
|
143
|
+
await client.evaluations.create({
|
|
144
|
+
name: 'Eval',
|
|
145
|
+
type: 'unit_test',
|
|
146
|
+
organizationId: 1,
|
|
147
|
+
createdBy: 1,
|
|
148
|
+
});
|
|
149
|
+
const [url, options] = mockFetch.mock.calls[0];
|
|
150
|
+
(0, vitest_1.expect)(url).toBe('http://localhost:3000/api/evaluations');
|
|
151
|
+
(0, vitest_1.expect)(options.method).toBe('POST');
|
|
152
|
+
});
|
|
153
|
+
(0, vitest_1.it)('should call correct endpoint for evaluations.createRun', async () => {
|
|
154
|
+
mockFetch.mockResolvedValue({
|
|
155
|
+
ok: true,
|
|
156
|
+
json: async () => ({ id: 1, status: 'running' }),
|
|
157
|
+
status: 200,
|
|
158
|
+
});
|
|
159
|
+
const client = new client_1.AIEvalClient({ apiKey: 'key', baseUrl: 'http://localhost:3000' });
|
|
160
|
+
await client.evaluations.createRun(42, { status: 'running' });
|
|
161
|
+
const [url, options] = mockFetch.mock.calls[0];
|
|
162
|
+
(0, vitest_1.expect)(url).toBe('http://localhost:3000/api/evaluations/42/runs');
|
|
163
|
+
(0, vitest_1.expect)(options.method).toBe('POST');
|
|
164
|
+
});
|
|
165
|
+
});
|
|
166
|
+
(0, vitest_1.describe)('LLMJudgeAPI', () => {
|
|
167
|
+
(0, vitest_1.it)('should call correct endpoint for llmJudge.evaluate', async () => {
|
|
168
|
+
mockFetch.mockResolvedValue({
|
|
169
|
+
ok: true,
|
|
170
|
+
json: async () => ({ result: { score: 85 }, config: {} }),
|
|
171
|
+
status: 200,
|
|
172
|
+
});
|
|
173
|
+
const client = new client_1.AIEvalClient({ apiKey: 'key', baseUrl: 'http://localhost:3000' });
|
|
174
|
+
const result = await client.llmJudge.evaluate({
|
|
175
|
+
configId: 1,
|
|
176
|
+
input: 'test input',
|
|
177
|
+
output: 'test output',
|
|
178
|
+
});
|
|
179
|
+
(0, vitest_1.expect)(result.result.score).toBe(85);
|
|
180
|
+
const [url, options] = mockFetch.mock.calls[0];
|
|
181
|
+
(0, vitest_1.expect)(url).toBe('http://localhost:3000/api/llm-judge/evaluate');
|
|
182
|
+
(0, vitest_1.expect)(options.method).toBe('POST');
|
|
183
|
+
});
|
|
184
|
+
});
|
|
185
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|