@pauly4010/evalai-sdk 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -0
- package/README.md +108 -9
- package/dist/cli/api.d.ts +79 -0
- package/dist/cli/api.js +74 -0
- package/dist/cli/check.d.ts +16 -13
- package/dist/cli/check.js +117 -127
- package/dist/cli/ci-context.d.ts +6 -0
- package/dist/cli/ci-context.js +51 -0
- package/dist/cli/config.d.ts +24 -0
- package/dist/cli/config.js +158 -0
- package/dist/cli/constants.d.ts +13 -0
- package/dist/cli/constants.js +16 -0
- package/dist/cli/doctor.d.ts +11 -0
- package/dist/cli/doctor.js +82 -0
- package/dist/cli/formatters/github.d.ts +8 -0
- package/dist/cli/formatters/github.js +119 -0
- package/dist/cli/formatters/human.d.ts +6 -0
- package/dist/cli/formatters/human.js +92 -0
- package/dist/cli/formatters/json.d.ts +6 -0
- package/dist/cli/formatters/json.js +10 -0
- package/dist/cli/formatters/types.d.ts +76 -0
- package/dist/cli/formatters/types.js +5 -0
- package/dist/cli/gate.d.ts +13 -0
- package/dist/cli/gate.js +108 -0
- package/dist/cli/index.d.ts +1 -0
- package/dist/cli/index.js +31 -5
- package/dist/cli/init.d.ts +7 -0
- package/dist/cli/init.js +69 -0
- package/dist/cli/render/snippet.d.ts +5 -0
- package/dist/cli/render/snippet.js +15 -0
- package/dist/cli/render/sort.d.ts +10 -0
- package/dist/cli/render/sort.js +24 -0
- package/dist/cli/report/build-check-report.d.ts +16 -0
- package/dist/cli/report/build-check-report.js +94 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +4 -1
- package/dist/integrations/openai-eval.d.ts +53 -0
- package/dist/integrations/openai-eval.js +226 -0
- package/dist/utils/input-hash.d.ts +8 -0
- package/dist/utils/input-hash.js +38 -0
- package/package.json +10 -3
- package/.env.example +0 -0
- package/ADDITIONAL_ISSUES_FOUND.md +0 -174
- package/dist/__tests__/assertions.test.d.ts +0 -1
- package/dist/__tests__/assertions.test.js +0 -288
- package/dist/__tests__/client.test.d.ts +0 -1
- package/dist/__tests__/client.test.js +0 -185
- package/dist/__tests__/testing.test.d.ts +0 -1
- package/dist/__tests__/testing.test.js +0 -230
- package/dist/__tests__/workflows.test.d.ts +0 -1
- package/dist/__tests__/workflows.test.js +0 -222
- package/evalai-sdk-1.2.0.tgz +0 -0
- package/postcss.config.mjs +0 -2
package/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,52 @@ All notable changes to the @pauly4010/evalai-sdk package will be documented in t
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [1.5.0] - 2026-02-18
|
|
9
|
+
|
|
10
|
+
### ✨ Added
|
|
11
|
+
|
|
12
|
+
#### evalai CLI — CI DevX
|
|
13
|
+
|
|
14
|
+
- **`--format github`** — GitHub Actions annotations + step summary (`$GITHUB_STEP_SUMMARY`)
|
|
15
|
+
- **`--format json`** — Machine-readable output only
|
|
16
|
+
- **`--onFail import`** — On gate failure, import run metadata + failures to dashboard (idempotent per CI run)
|
|
17
|
+
- **`--explain`** — Show score breakdown (contribPts) and thresholds
|
|
18
|
+
- **`evalai doctor`** — Verify CI setup (config, API key, quality endpoint, baseline)
|
|
19
|
+
- **Pinned CLI invocation** — Use `npx -y @pauly4010/evalai-sdk@^1` for stable CI (avoids surprise v2 breaks)
|
|
20
|
+
|
|
21
|
+
#### Documentation
|
|
22
|
+
|
|
23
|
+
- **README** — 3-section adoption flow: 60s local → optional CI gate → no lock-in
|
|
24
|
+
- **Init output** — Shows path written, pinned snippet with `--format github --onFail import`
|
|
25
|
+
- **openAIChatEval** — "Gate this in CI" hint uses pinned invocation
|
|
26
|
+
|
|
27
|
+
### 🔧 Changed
|
|
28
|
+
|
|
29
|
+
- **evalai init** — Output: "Wrote evalai.config.json at {path}", one next step, uninstall line
|
|
30
|
+
- **Baseline missing** — Treated as config failure (BAD_ARGS), not API error
|
|
31
|
+
- **parseArgs** — Returns `{ ok, args }` or `{ ok: false }` (no `process.exit` inside) for testability
|
|
32
|
+
|
|
33
|
+
### 📦 Internal
|
|
34
|
+
|
|
35
|
+
- Refactored `check.ts` into modules: `api.ts`, `gate.ts`, `report/build-check-report.ts`, `formatters/`
|
|
36
|
+
- Deterministic helpers: `truncateSnippet`, `sortFailedCases`
|
|
37
|
+
- Formatter tests: `json.test.ts`, `github.test.ts`
|
|
38
|
+
- Doctor tests: `doctor.test.ts`
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## [1.4.1] - 2026-02-18
|
|
43
|
+
|
|
44
|
+
### ✨ Added
|
|
45
|
+
|
|
46
|
+
- **evalai check `--baseline production`** — Compare against latest run tagged with `environment=prod`
|
|
47
|
+
- **Baseline missing handling** — Clear failure when baseline not found and comparison requested
|
|
48
|
+
|
|
49
|
+
### 🔧 Changed
|
|
50
|
+
|
|
51
|
+
- **Package hardening** — `files`, `module`, `sideEffects: false` for leaner npm publish
|
|
52
|
+
- **CLI** — Passes `baseline` param to quality API for deterministic CI gates
|
|
53
|
+
|
|
8
54
|
## [1.3.0] - 2025-10-21
|
|
9
55
|
|
|
10
56
|
### ✨ Added
|
package/README.md
CHANGED
|
@@ -3,16 +3,87 @@
|
|
|
3
3
|
[](https://www.npmjs.com/package/@pauly4010/evalai-sdk)
|
|
4
4
|
[](https://www.npmjs.com/package/@pauly4010/evalai-sdk)
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
Evaluate your AI systems locally in 60 seconds. Add an optional CI gate in 2 minutes. No lock-in — remove by deleting the config file.
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
## 1. 60 seconds: Run locally (no account)
|
|
11
|
+
|
|
12
|
+
Install, run, get a score. No EvalAI account, no API key, no dashboard.
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
npm install @pauly4010/evalai-sdk openai
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
```typescript
|
|
19
|
+
import { openAIChatEval } from "@pauly4010/evalai-sdk";
|
|
20
|
+
|
|
21
|
+
await openAIChatEval({
|
|
22
|
+
name: "chat-regression",
|
|
23
|
+
cases: [
|
|
24
|
+
{ input: "Hello", expectedOutput: "greeting" },
|
|
25
|
+
{ input: "2 + 2 = ?", expectedOutput: "4" },
|
|
26
|
+
],
|
|
27
|
+
});
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Set `OPENAI_API_KEY` in your environment. You'll see something like:
|
|
31
|
+
|
|
32
|
+
```
|
|
33
|
+
PASS 2/2 (score: 100)
|
|
34
|
+
|
|
35
|
+
Tip: Want dashboards and history?
|
|
36
|
+
Set EVALAI_API_KEY and connect this to the platform.
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
With failures you get `FAIL 9/10 (score 90)`, failed cases listed, and a hint: `Gate this in CI: npx -y @pauly4010/evalai-sdk@^1 init`.
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## 2. Optional: Add a CI gate (2 minutes)
|
|
44
|
+
|
|
45
|
+
When you're ready to gate PRs on quality:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
npx -y @pauly4010/evalai-sdk@^1 init
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
**Create an evaluation in the dashboard → paste its ID into `evalai.config.json`:**
|
|
52
|
+
|
|
53
|
+
```json
|
|
54
|
+
{ "evaluationId": "42" }
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Then add to your CI:
|
|
58
|
+
|
|
59
|
+
```yaml
|
|
60
|
+
- name: EvalAI gate
|
|
61
|
+
env:
|
|
62
|
+
EVALAI_API_KEY: ${{ secrets.EVALAI_API_KEY }}
|
|
63
|
+
run: npx -y @pauly4010/evalai-sdk@^1 check --format github --onFail import
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
You'll get GitHub annotations + a step summary + a dashboard link.
|
|
67
|
+
|
|
68
|
+
- `--format github` — Annotations and step summary in GitHub Actions
|
|
69
|
+
- `--onFail import` — On failure, EvalAI imports the run metadata + failures into the dashboard (idempotent per CI run)
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
73
|
+
## 3. No lock-in
|
|
74
|
+
|
|
75
|
+
To stop using EvalAI: delete `evalai.config.json`. Your local `openAIChatEval` runs work the same without it. No account cancellation, no data export.
|
|
76
|
+
|
|
77
|
+
---
|
|
7
78
|
|
|
8
79
|
## Installation
|
|
9
80
|
|
|
10
81
|
```bash
|
|
11
|
-
npm install @pauly4010/evalai-sdk
|
|
82
|
+
npm install @pauly4010/evalai-sdk openai
|
|
12
83
|
# or
|
|
13
|
-
yarn add @pauly4010/evalai-sdk
|
|
84
|
+
yarn add @pauly4010/evalai-sdk openai
|
|
14
85
|
# or
|
|
15
|
-
pnpm add @pauly4010/evalai-sdk
|
|
86
|
+
pnpm add @pauly4010/evalai-sdk openai
|
|
16
87
|
```
|
|
17
88
|
|
|
18
89
|
## Environment Support
|
|
@@ -47,7 +118,7 @@ The following features require Node.js and **will not work in browsers**:
|
|
|
47
118
|
|
|
48
119
|
Use appropriate features based on your environment. The SDK will throw helpful errors if you try to use Node.js-only features in a browser.
|
|
49
120
|
|
|
50
|
-
##
|
|
121
|
+
## AIEvalClient (Platform API)
|
|
51
122
|
|
|
52
123
|
```typescript
|
|
53
124
|
import { AIEvalClient } from "@pauly4010/evalai-sdk";
|
|
@@ -501,13 +572,13 @@ console.log("Plan:", org.plan);
|
|
|
501
572
|
console.log("Status:", org.status);
|
|
502
573
|
```
|
|
503
574
|
|
|
504
|
-
## evalai CLI (v1.
|
|
575
|
+
## evalai CLI (v1.5.0)
|
|
505
576
|
|
|
506
577
|
The SDK includes a CLI for CI/CD evaluation gates. Install globally or use via `npx`:
|
|
507
578
|
|
|
508
579
|
```bash
|
|
509
580
|
# Via npx (no global install)
|
|
510
|
-
npx @pauly4010/evalai-sdk check --minScore 92 --evaluationId 42 --apiKey $EVALAI_API_KEY
|
|
581
|
+
npx -y @pauly4010/evalai-sdk@^1 check --minScore 92 --evaluationId 42 --apiKey $EVALAI_API_KEY
|
|
511
582
|
|
|
512
583
|
# Or install globally
|
|
513
584
|
npm install -g @pauly4010/evalai-sdk
|
|
@@ -522,19 +593,47 @@ Gate deployments on quality scores, regression, and compliance:
|
|
|
522
593
|
|--------|-------------|
|
|
523
594
|
| `--evaluationId <id>` | **Required.** Evaluation to gate on |
|
|
524
595
|
| `--apiKey <key>` | API key (or `EVALAI_API_KEY` env) |
|
|
596
|
+
| `--format <fmt>` | `human` (default), `json`, or `github` (annotations + step summary) |
|
|
597
|
+
| `--onFail import` | When gate fails, import run with CI context for debugging |
|
|
598
|
+
| `--explain` | Show score breakdown and thresholds |
|
|
525
599
|
| `--minScore <n>` | Fail if score < n (0–100) |
|
|
526
600
|
| `--maxDrop <n>` | Fail if score dropped > n from baseline |
|
|
527
601
|
| `--minN <n>` | Fail if total test cases < n |
|
|
528
602
|
| `--allowWeakEvidence` | Permit weak evidence level |
|
|
529
603
|
| `--policy <name>` | Enforce HIPAA, SOC2, GDPR, PCI_DSS, FINRA_4511 |
|
|
530
|
-
| `--baseline <mode>` | `published` or `
|
|
604
|
+
| `--baseline <mode>` | `published`, `previous`, or `production` |
|
|
531
605
|
| `--baseUrl <url>` | API base URL |
|
|
532
606
|
|
|
533
607
|
**Exit codes:** 0=pass, 1=score below, 2=regression, 3=policy violation, 4=API error, 5=bad args, 6=low N, 7=weak evidence
|
|
534
608
|
|
|
609
|
+
### evalai doctor
|
|
610
|
+
|
|
611
|
+
Verify CI/CD setup before running check:
|
|
612
|
+
|
|
613
|
+
```bash
|
|
614
|
+
npx -y @pauly4010/evalai-sdk@^1 doctor --evaluationId 42 --apiKey $EVALAI_API_KEY
|
|
615
|
+
```
|
|
616
|
+
|
|
617
|
+
Uses the same quality endpoint as `check` — if doctor passes, check works.
|
|
618
|
+
|
|
535
619
|
## Changelog
|
|
536
620
|
|
|
537
|
-
### v1.
|
|
621
|
+
### v1.5.0 (Latest)
|
|
622
|
+
|
|
623
|
+
- **`--format github`** — Annotations + step summary in GitHub Actions
|
|
624
|
+
- **`--format json`** — Machine-readable output
|
|
625
|
+
- **`--onFail import`** — Import failing runs to dashboard (idempotent per CI run)
|
|
626
|
+
- **`--explain`** — Score breakdown and thresholds
|
|
627
|
+
- **`evalai doctor`** — Verify CI setup
|
|
628
|
+
- **Pinned invocation** — Use `npx -y @pauly4010/evalai-sdk@^1` for stable CI
|
|
629
|
+
- **README** — 3-section adoption flow (60s local → CI gate → no lock-in)
|
|
630
|
+
|
|
631
|
+
### v1.4.1
|
|
632
|
+
|
|
633
|
+
- **evalai check `--baseline production`** — Compare against latest prod-tagged run
|
|
634
|
+
- **Package hardening** — Leaner npm publish with `files`, `sideEffects: false`
|
|
635
|
+
|
|
636
|
+
### v1.4.0
|
|
538
637
|
|
|
539
638
|
- **evalai CLI** — Command-line tool for CI/CD evaluation gates
|
|
540
639
|
- `evalai check` — Gate deployments on quality scores, regression, and compliance
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* API fetch helpers for evalai check.
|
|
3
|
+
* Captures x-request-id from response headers.
|
|
4
|
+
*/
|
|
5
|
+
export type QualityLatestData = {
|
|
6
|
+
score?: number;
|
|
7
|
+
total?: number | null;
|
|
8
|
+
evidenceLevel?: string | null;
|
|
9
|
+
baselineScore?: number | null;
|
|
10
|
+
regressionDelta?: number | null;
|
|
11
|
+
baselineMissing?: boolean | null;
|
|
12
|
+
breakdown?: {
|
|
13
|
+
passRate?: number;
|
|
14
|
+
safety?: number;
|
|
15
|
+
judge?: number;
|
|
16
|
+
};
|
|
17
|
+
flags?: string[];
|
|
18
|
+
evaluationRunId?: number;
|
|
19
|
+
evaluationId?: number;
|
|
20
|
+
};
|
|
21
|
+
export type RunDetailsData = {
|
|
22
|
+
results?: Array<{
|
|
23
|
+
testCaseId?: number;
|
|
24
|
+
status?: string;
|
|
25
|
+
output?: string;
|
|
26
|
+
durationMs?: number;
|
|
27
|
+
assertionsJson?: Record<string, unknown>;
|
|
28
|
+
test_cases?: {
|
|
29
|
+
name?: string;
|
|
30
|
+
input?: string;
|
|
31
|
+
expectedOutput?: string;
|
|
32
|
+
};
|
|
33
|
+
}>;
|
|
34
|
+
};
|
|
35
|
+
export declare function fetchQualityLatest(baseUrl: string, apiKey: string, evaluationId: string, baseline: string): Promise<{
|
|
36
|
+
ok: true;
|
|
37
|
+
data: QualityLatestData;
|
|
38
|
+
requestId?: string;
|
|
39
|
+
} | {
|
|
40
|
+
ok: false;
|
|
41
|
+
status: number;
|
|
42
|
+
body: string;
|
|
43
|
+
requestId?: string;
|
|
44
|
+
}>;
|
|
45
|
+
export declare function fetchRunDetails(baseUrl: string, apiKey: string, evaluationId: string, runId: number): Promise<{
|
|
46
|
+
ok: true;
|
|
47
|
+
data: RunDetailsData;
|
|
48
|
+
} | {
|
|
49
|
+
ok: false;
|
|
50
|
+
}>;
|
|
51
|
+
export type CiContext = {
|
|
52
|
+
provider?: 'github' | 'gitlab' | 'circle' | 'unknown';
|
|
53
|
+
repo?: string;
|
|
54
|
+
sha?: string;
|
|
55
|
+
branch?: string;
|
|
56
|
+
pr?: number;
|
|
57
|
+
runUrl?: string;
|
|
58
|
+
actor?: string;
|
|
59
|
+
};
|
|
60
|
+
export type ImportResult = {
|
|
61
|
+
testCaseId: number;
|
|
62
|
+
status: 'passed' | 'failed';
|
|
63
|
+
output: string;
|
|
64
|
+
latencyMs?: number;
|
|
65
|
+
costUsd?: number;
|
|
66
|
+
assertionsJson?: Record<string, unknown>;
|
|
67
|
+
};
|
|
68
|
+
export declare function importRunOnFail(baseUrl: string, apiKey: string, evaluationId: string, results: ImportResult[], options: {
|
|
69
|
+
idempotencyKey?: string;
|
|
70
|
+
ci?: CiContext;
|
|
71
|
+
importClientVersion?: string;
|
|
72
|
+
}): Promise<{
|
|
73
|
+
ok: true;
|
|
74
|
+
runId: number;
|
|
75
|
+
} | {
|
|
76
|
+
ok: false;
|
|
77
|
+
status: number;
|
|
78
|
+
body: string;
|
|
79
|
+
}>;
|
package/dist/cli/api.js
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* API fetch helpers for evalai check.
|
|
4
|
+
* Captures x-request-id from response headers.
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.fetchQualityLatest = fetchQualityLatest;
|
|
8
|
+
exports.fetchRunDetails = fetchRunDetails;
|
|
9
|
+
exports.importRunOnFail = importRunOnFail;
|
|
10
|
+
async function fetchQualityLatest(baseUrl, apiKey, evaluationId, baseline) {
|
|
11
|
+
const headers = { Authorization: `Bearer ${apiKey}` };
|
|
12
|
+
const url = `${baseUrl.replace(/\/$/, '')}/api/quality?evaluationId=${evaluationId}&action=latest&baseline=${baseline}`;
|
|
13
|
+
try {
|
|
14
|
+
const res = await fetch(url, { headers });
|
|
15
|
+
const requestId = res.headers.get('x-request-id') ?? undefined;
|
|
16
|
+
const body = await res.text();
|
|
17
|
+
if (!res.ok) {
|
|
18
|
+
return { ok: false, status: res.status, body, requestId };
|
|
19
|
+
}
|
|
20
|
+
const data = JSON.parse(body);
|
|
21
|
+
return { ok: true, data, requestId };
|
|
22
|
+
}
|
|
23
|
+
catch (err) {
|
|
24
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
25
|
+
return { ok: false, status: 0, body: msg, requestId: undefined };
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
async function fetchRunDetails(baseUrl, apiKey, evaluationId, runId) {
|
|
29
|
+
const headers = { Authorization: `Bearer ${apiKey}` };
|
|
30
|
+
const url = `${baseUrl.replace(/\/$/, '')}/api/evaluations/${evaluationId}/runs/${runId}`;
|
|
31
|
+
try {
|
|
32
|
+
const res = await fetch(url, { headers });
|
|
33
|
+
if (!res.ok)
|
|
34
|
+
return { ok: false };
|
|
35
|
+
const data = (await res.json());
|
|
36
|
+
return { ok: true, data };
|
|
37
|
+
}
|
|
38
|
+
catch {
|
|
39
|
+
return { ok: false };
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
async function importRunOnFail(baseUrl, apiKey, evaluationId, results, options) {
|
|
43
|
+
const headers = {
|
|
44
|
+
Authorization: `Bearer ${apiKey}`,
|
|
45
|
+
'Content-Type': 'application/json',
|
|
46
|
+
};
|
|
47
|
+
if (options.idempotencyKey) {
|
|
48
|
+
headers['Idempotency-Key'] = options.idempotencyKey;
|
|
49
|
+
}
|
|
50
|
+
const body = {
|
|
51
|
+
environment: 'dev',
|
|
52
|
+
results,
|
|
53
|
+
importClientVersion: options.importClientVersion ?? 'evalai-cli',
|
|
54
|
+
ci: options.ci,
|
|
55
|
+
};
|
|
56
|
+
const url = `${baseUrl.replace(/\/$/, '')}/api/evaluations/${evaluationId}/runs/import`;
|
|
57
|
+
try {
|
|
58
|
+
const res = await fetch(url, {
|
|
59
|
+
method: 'POST',
|
|
60
|
+
headers,
|
|
61
|
+
body: JSON.stringify(body),
|
|
62
|
+
});
|
|
63
|
+
const text = await res.text();
|
|
64
|
+
if (!res.ok) {
|
|
65
|
+
return { ok: false, status: res.status, body: text };
|
|
66
|
+
}
|
|
67
|
+
const data = JSON.parse(text);
|
|
68
|
+
return { ok: true, runId: data.runId };
|
|
69
|
+
}
|
|
70
|
+
catch (err) {
|
|
71
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
72
|
+
return { ok: false, status: 0, body: msg };
|
|
73
|
+
}
|
|
74
|
+
}
|
package/dist/cli/check.d.ts
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
* --minN <n> Fail if total test cases < n (low sample size)
|
|
15
15
|
* --allowWeakEvidence If false (default), fail when evidenceLevel is 'weak'
|
|
16
16
|
* --policy <name> Enforce a compliance policy (e.g. HIPAA, SOC2, GDPR)
|
|
17
|
-
* --baseline <mode>
|
|
17
|
+
* --baseline <mode> Baseline comparison mode: "published" (default), "previous", or "production"
|
|
18
18
|
* --evaluationId <id> Required. The evaluation to gate on.
|
|
19
19
|
* --baseUrl <url> API base URL (default: EVALAI_BASE_URL or http://localhost:3000)
|
|
20
20
|
* --apiKey <key> API key (default: EVALAI_API_KEY env var)
|
|
@@ -33,16 +33,8 @@
|
|
|
33
33
|
* EVALAI_BASE_URL — API base URL (default: http://localhost:3000)
|
|
34
34
|
* EVALAI_API_KEY — API key for authentication
|
|
35
35
|
*/
|
|
36
|
-
export
|
|
37
|
-
|
|
38
|
-
readonly SCORE_BELOW: 1;
|
|
39
|
-
readonly REGRESSION: 2;
|
|
40
|
-
readonly POLICY_VIOLATION: 3;
|
|
41
|
-
readonly API_ERROR: 4;
|
|
42
|
-
readonly BAD_ARGS: 5;
|
|
43
|
-
readonly LOW_N: 6;
|
|
44
|
-
readonly WEAK_EVIDENCE: 7;
|
|
45
|
-
};
|
|
36
|
+
export { EXIT } from './constants';
|
|
37
|
+
export type FormatType = 'human' | 'json' | 'github';
|
|
46
38
|
export interface CheckArgs {
|
|
47
39
|
baseUrl: string;
|
|
48
40
|
apiKey: string;
|
|
@@ -52,7 +44,18 @@ export interface CheckArgs {
|
|
|
52
44
|
allowWeakEvidence: boolean;
|
|
53
45
|
evaluationId: string;
|
|
54
46
|
policy?: string;
|
|
55
|
-
baseline: 'published' | 'previous';
|
|
47
|
+
baseline: 'published' | 'previous' | 'production';
|
|
48
|
+
format: FormatType;
|
|
49
|
+
explain: boolean;
|
|
50
|
+
onFail?: 'import';
|
|
56
51
|
}
|
|
57
|
-
export
|
|
52
|
+
export type ParseArgsResult = {
|
|
53
|
+
ok: true;
|
|
54
|
+
args: CheckArgs;
|
|
55
|
+
} | {
|
|
56
|
+
ok: false;
|
|
57
|
+
exitCode: number;
|
|
58
|
+
message: string;
|
|
59
|
+
};
|
|
60
|
+
export declare function parseArgs(argv: string[]): ParseArgsResult;
|
|
58
61
|
export declare function runCheck(args: CheckArgs): Promise<number>;
|