@pauly4010/evalai-sdk 1.8.0 → 1.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/CHANGELOG.md +54 -0
  2. package/README.md +136 -23
  3. package/dist/assertions.js +51 -18
  4. package/dist/batch.js +8 -2
  5. package/dist/cli/api.js +3 -1
  6. package/dist/cli/check.js +19 -6
  7. package/dist/cli/ci-context.js +3 -1
  8. package/dist/cli/ci.d.ts +45 -0
  9. package/dist/cli/ci.js +192 -0
  10. package/dist/cli/config.js +28 -8
  11. package/dist/cli/diff.d.ts +173 -0
  12. package/dist/cli/diff.js +685 -0
  13. package/dist/cli/discover.d.ts +84 -0
  14. package/dist/cli/discover.js +419 -0
  15. package/dist/cli/doctor.js +62 -19
  16. package/dist/cli/env.d.ts +21 -0
  17. package/dist/cli/env.js +42 -0
  18. package/dist/cli/explain.js +168 -36
  19. package/dist/cli/formatters/human.js +4 -1
  20. package/dist/cli/formatters/pr-comment.js +3 -1
  21. package/dist/cli/gate.js +6 -2
  22. package/dist/cli/impact-analysis.d.ts +63 -0
  23. package/dist/cli/impact-analysis.js +252 -0
  24. package/dist/cli/index.js +185 -0
  25. package/dist/cli/manifest.d.ts +103 -0
  26. package/dist/cli/manifest.js +282 -0
  27. package/dist/cli/migrate.d.ts +41 -0
  28. package/dist/cli/migrate.js +349 -0
  29. package/dist/cli/policy-packs.js +8 -2
  30. package/dist/cli/print-config.js +33 -14
  31. package/dist/cli/regression-gate.js +8 -2
  32. package/dist/cli/report/build-check-report.js +8 -2
  33. package/dist/cli/run.d.ts +101 -0
  34. package/dist/cli/run.js +395 -0
  35. package/dist/cli/share.js +3 -1
  36. package/dist/cli/upgrade.js +2 -1
  37. package/dist/cli/workspace.d.ts +28 -0
  38. package/dist/cli/workspace.js +58 -0
  39. package/dist/client.d.ts +16 -19
  40. package/dist/client.js +60 -43
  41. package/dist/client.request.test.d.ts +1 -1
  42. package/dist/client.request.test.js +222 -147
  43. package/dist/context.js +3 -1
  44. package/dist/errors.js +11 -4
  45. package/dist/export.js +3 -1
  46. package/dist/index.d.ts +8 -2
  47. package/dist/index.js +30 -5
  48. package/dist/integrations/anthropic.d.ts +20 -1
  49. package/dist/integrations/openai-eval.js +4 -2
  50. package/dist/integrations/openai.d.ts +24 -1
  51. package/dist/local.js +3 -1
  52. package/dist/logger.js +6 -2
  53. package/dist/pagination.js +6 -2
  54. package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
  55. package/dist/runtime/adapters/config-to-dsl.js +394 -0
  56. package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
  57. package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
  58. package/dist/runtime/context.d.ts +26 -0
  59. package/dist/runtime/context.js +74 -0
  60. package/dist/runtime/eval.d.ts +46 -0
  61. package/dist/runtime/eval.js +244 -0
  62. package/dist/runtime/execution-mode.d.ts +80 -0
  63. package/dist/runtime/execution-mode.js +357 -0
  64. package/dist/runtime/executor.d.ts +16 -0
  65. package/dist/runtime/executor.js +152 -0
  66. package/dist/runtime/registry.d.ts +78 -0
  67. package/dist/runtime/registry.js +403 -0
  68. package/dist/runtime/run-report.d.ts +200 -0
  69. package/dist/runtime/run-report.js +222 -0
  70. package/dist/runtime/types.d.ts +356 -0
  71. package/dist/runtime/types.js +76 -0
  72. package/dist/testing.d.ts +65 -0
  73. package/dist/testing.js +49 -2
  74. package/dist/types.d.ts +100 -69
  75. package/dist/utils/input-hash.js +4 -1
  76. package/dist/version.d.ts +1 -1
  77. package/dist/version.js +1 -1
  78. package/dist/workflows.js +62 -14
  79. package/package.json +115 -110
package/CHANGELOG.md CHANGED
@@ -5,6 +5,60 @@ All notable changes to the @pauly4010/evalai-sdk package will be documented in t
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [1.9.0] - 2026-02-27
9
+
10
+ ### ✨ Added
11
+
12
+ #### CLI — One-Command CI Loop (`evalai ci`)
13
+
14
+ - **`evalai ci`** — Single command teams put in GitHub workflows and never think about again
15
+ - **Complete CI pipeline**: discover → manifest → impact → run → diff → PR summary → safe failure → "next step"
16
+ - **Automatic manifest building**: Builds manifest if missing, no manual steps required
17
+ - **Impact analysis integration**: `--impacted-only` flag for targeted testing
18
+ - **Smart exit codes**: 0=clean, 1=regressions, 2=config/infra issues
19
+ - **Self-documenting failures**: Always prints copy/paste next step for debugging
20
+ - **GitHub Step Summary integration**: Automatic PR summaries with regressions and artifacts
21
+
22
+ #### CLI — Durable Run History & Diff System
23
+
24
+ - **Run artifact retention**: Timestamped artifacts in `.evalai/runs/run-<runId>.json`
25
+ - **Run index file**: `.evalai/runs/index.json` tracks all runs with metadata
26
+ - **Schema versioning**: `RunResult` and `DiffResult` include `schemaVersion` for compatibility
27
+ - **Base/head shortcuts**: `--base baseline`, `--base last`, `--head last` for common cases
28
+ - **Floating point normalization**: Consistent score/delta calculations across runs
29
+ - **Comprehensive diff comparison**: Classifies regressions, improvements, added, removed specs
30
+
31
+ #### CLI — Centralized Architecture
32
+
33
+ - **Environment detection**: `isCI()`, `isGitHubActions()`, `getGitHubStepSummaryPath()` unified
34
+ - **Workspace resolution**: `resolveEvalWorkspace()` provides all `.evalai` paths
35
+ - **Git reference detection**: Comprehensive patterns for branches, tags, and ranges
36
+ - **No more duplication**: All commands use shared utilities for consistency
37
+
38
+ #### CLI — CI Friendliness
39
+
40
+ - **Fail-safe base resolution**: Clear error messages when base artifacts missing in CI
41
+ - **GitHub Step Summary**: Rich markdown summaries with metrics, regressions, and artifact links
42
+ - **CI-specific error handling**: Exit code 2 for config issues with helpful guidance
43
+ - **Artifact download instructions**: Exact commands for manual base artifact setup
44
+
45
+ ### 🔧 Changed
46
+
47
+ - **Exit codes standardized**: 0=clean, 1=regressions, 2=config/infra issues across all commands
48
+ - **Schema compatibility**: Added `schemaVersion` validation for future-proofing
49
+ - **Path resolution**: All commands use centralized workspace helpers
50
+ - **Error messages**: More actionable and context-aware guidance
51
+
52
+ ### 📊 New Features Summary
53
+
54
+ - **One-command CI**: `evalai ci` replaces multi-step workflows
55
+ - **Durable history**: Run artifacts preserved with smart indexing
56
+ - **Smart diffing**: Automated regression detection with GitHub integration
57
+ - **Centralized utilities**: Environment detection and workspace resolution unified
58
+ - **Self-documenting**: Clear next steps for any failure scenario
59
+
60
+ ---
61
+
8
62
  ## [1.8.0] - 2026-02-26
9
63
 
10
64
  ### ✨ Added
package/README.md CHANGED
@@ -7,41 +7,150 @@
7
7
  [![Contract Version](https://img.shields.io/badge/report%20schema-v1-blue.svg)](#)
8
8
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
9
9
 
10
- **Stop LLM regressions in CI in minutes.**
10
+ **One-command CI for AI evaluation. Complete pipeline: discover → manifest → impact → run → diff → PR summary.**
11
11
 
12
- Zero to gate in under 5 minutes. No infra. No lock-in. Remove anytime.
12
+ Zero to production CI in 60 seconds. No infra. No lock-in. Remove anytime.
13
13
 
14
14
  ---
15
15
 
16
- ## Quick Start (2 minutes)
16
+ ## Quick Start (60 seconds)
17
+
18
+ Add this to your `.github/workflows/evalai.yml`:
19
+
20
+ ```yaml
21
+ name: EvalAI CI
22
+ on: [push, pull_request]
23
+ jobs:
24
+ evalai:
25
+ runs-on: ubuntu-latest
26
+ steps:
27
+ - uses: actions/checkout@v4
28
+ - uses: actions/setup-node@v4
29
+ - run: npm ci
30
+ - run: npx @pauly4010/evalai-sdk ci --format github --write-results --base main
31
+ - uses: actions/upload-artifact@v4
32
+ if: always()
33
+ with:
34
+ name: evalai-results
35
+ path: .evalai/
36
+ ```
37
+
38
+ Create `eval/your-spec.spec.ts`:
39
+
40
+ ```typescript
41
+ import { defineEval } from "@pauly4010/evalai-sdk";
42
+
43
+ defineEval({
44
+ name: "Basic Math Operations",
45
+ description: "Test fundamental arithmetic",
46
+ prompt: "Test: 1+1=2, string concatenation, array includes",
47
+ expected: "All tests should pass",
48
+ tags: ["basic", "math"],
49
+ category: "unit-test"
50
+ });
51
+ ```
17
52
 
18
53
  ```bash
19
- npx @pauly4010/evalai-sdk init
20
- git add evals/ .github/workflows/evalai-gate.yml evalai.config.json
21
- git commit -m "chore: add EvalAI regression gate"
54
+ git add .github/workflows/evalai.yml eval/
55
+ git commit -m "feat: add EvalAI CI pipeline"
22
56
  git push
23
57
  ```
24
58
 
25
- That's it. Open a PR and CI blocks regressions automatically.
26
-
27
- `evalai init` detects your project, creates a baseline from your current tests, and installs a GitHub Actions workflow. No manual config needed.
59
+ That's it! Your CI now:
60
+ - ✅ Discovers evaluation specs automatically
61
+ - Runs only impacted specs (smart caching)
62
+ - ✅ Compares results against base branch
63
+ - ✅ Posts rich summary in PR with regressions
64
+ - ✅ Exits with proper codes (0=clean, 1=regressions, 2=config)
28
65
 
29
66
  ---
30
67
 
31
- ## What `evalai init` does
68
+ ## 🚀 New in v1.9.0: One-Command CI
69
+
70
+ ### `evalai ci` - Complete CI Pipeline
71
+
72
+ ```bash
73
+ npx @pauly4010/evalai-sdk ci --format github --write-results --base main
74
+ ```
32
75
 
33
- 1. **Detects** your Node repo and package manager (npm/yarn/pnpm)
34
- 2. **Runs your tests** to capture a real baseline (pass/fail + test count)
35
- 3. **Creates `evals/baseline.json`** with provenance metadata
36
- 4. **Installs `.github/workflows/evalai-gate.yml`** (package-manager aware)
37
- 5. **Creates `evalai.config.json`**
38
- 6. **Prints next steps** just commit and push
76
+ **What it does:**
77
+ 1. **Discover** - Finds all evaluation specs automatically
78
+ 2. **Manifest** - Builds stable manifest if missing
79
+ 3. **Impact Analysis** - Runs only specs impacted by changes (optional)
80
+ 4. **Run** - Executes evaluations with artifact retention
81
+ 5. **Diff** - Compares results against base branch
82
+ 6. **PR Summary** - Posts rich markdown summary to GitHub
83
+ 7. **Debug Flow** - Prints copy/paste next step on failure
84
+
85
+ **Advanced Options:**
86
+ ```bash
87
+ npx @pauly4010/evalai-sdk ci --base main --impacted-only # Run only impacted specs
88
+ npx @pauly4010/evalai-sdk ci --format json --write-results # JSON output for automation
89
+ npx @pauly4010/evalai-sdk ci --base develop # Custom base branch
90
+ ```
91
+
92
+ ### Smart Diffing & GitHub Integration
93
+
94
+ ```bash
95
+ npx @pauly4010/evalai-sdk diff --base main --head last --format github
96
+ ```
97
+
98
+ **Features:**
99
+ - 📊 Pass rate delta and score changes
100
+ - 🚨 Regression detection with classifications
101
+ - 📈 Improvements and new specs
102
+ - 📁 Artifact links and technical details
103
+ - 🎯 Exit codes: 0=clean, 1=regressions, 2=config
104
+
105
+ ### Self-Documenting Failures
106
+
107
+ Every failure prints a clear next step:
108
+
109
+ ```
110
+ 🔧 Next step for debugging:
111
+ Download base artifact and run: evalai diff --base .evalai/base-run.json --head .evalai/last-run.json
112
+ Artifacts: .evalai/runs/
113
+ ```
39
114
 
40
115
  ---
41
116
 
42
117
  ## CLI Commands
43
118
 
44
- ### Regression Gate (local, no account needed)
119
+ ### 🚀 One-Command CI (v1.9.0)
120
+
121
+ | Command | Description |
122
+ |---------|-------------|
123
+ | `npx evalai ci` | Complete CI pipeline: discover → manifest → impact → run → diff → PR summary |
124
+ | `npx evalai ci --base main` | Run CI with diff against main branch |
125
+ | `npx evalai ci --impacted-only` | Run only specs impacted by changes |
126
+ | `npx evalai ci --format github` | GitHub Step Summary with rich markdown |
127
+ | `npx evalai ci --format json` | JSON output for automation |
128
+
129
+ ### Discovery & Manifest
130
+
131
+ | Command | Description |
132
+ |---------|-------------|
133
+ | `npx evalai discover` | Find and analyze evaluation specs |
134
+ | `npx evalai discover --manifest` | Generate stable manifest for incremental analysis |
135
+
136
+ ### Impact Analysis
137
+
138
+ | Command | Description |
139
+ |---------|-------------|
140
+ | `npx evalai impact-analysis --base main` | Analyze impact of changes |
141
+ | `npx evalai impact-analysis --changed-files file1.ts,file2.ts` | Analyze specific changed files |
142
+
143
+ ### Run & Diff
144
+
145
+ | Command | Description |
146
+ |---------|-------------|
147
+ | `npx evalai run` | Run evaluation specifications |
148
+ | `npx evalai run --write-results` | Run with artifact retention |
149
+ | `npx evalai diff --base main` | Compare results against base branch |
150
+ | `npx evalai diff --base last --head last` | Compare last two runs |
151
+ | `npx evalai diff --format github` | GitHub Step Summary with regressions |
152
+
153
+ ### Legacy Regression Gate (local, no account needed)
45
154
 
46
155
  | Command | Description |
47
156
  |---------|-------------|
@@ -68,25 +177,29 @@ That's it. Open a PR and CI blocks regressions automatically.
68
177
  | `npx evalai explain` | Offline report explainer — top failures, root cause classification, suggested fixes |
69
178
  | `npx evalai print-config` | Show resolved config with source-of-truth annotations (file/env/default/arg) |
70
179
 
180
+ ### Migration Tools
181
+
182
+ | Command | Description |
183
+ |---------|-------------|
184
+ | `npx evalai migrate config --in evalai.config.json --out eval/migrated.spec.ts` | Convert legacy config to DSL |
185
+
71
186
  **Guided failure flow:**
72
187
 
73
188
  ```
74
- evalai check → fails → "Next: evalai explain"
189
+ evalai ci → fails → "Next: evalai explain --report .evalai/last-run.json"
75
190
 
76
191
  evalai explain → root causes + fixes
77
192
  ```
78
193
 
79
- **GitHub Actions step summary** — gate result at a glance:
194
+ **GitHub Actions step summary** — CI result at a glance with regressions and artifacts:
80
195
 
81
- ![GitHub Actions step summary showing gate pass/fail with delta table](../../docs/images/evalai-gate-step-summary.svg)
196
+ ![GitHub Actions step summary showing CI pass/fail with delta table](../../docs/images/evalai-gate-step-summary.svg)
82
197
 
83
198
  **`evalai explain` terminal output** — root causes + fix commands:
84
199
 
85
200
  ![Terminal output of evalai explain showing top failures and suggested fixes](../../docs/images/evalai-explain-terminal.svg)
86
201
 
87
- `check` automatically writes `.evalai/last-report.json` so `explain` works with zero flags.
88
-
89
- `doctor` uses exit codes: **0** = ready, **2** = not ready, **3** = infra error. Use `--report` for a JSON diagnostic bundle.
202
+ All commands automatically write artifacts so `explain` works with zero flags.
90
203
 
91
204
  ### Gate Exit Codes
92
205
 
@@ -86,7 +86,9 @@ class Expectation {
86
86
  expected: substring,
87
87
  actual: text,
88
88
  message: message ||
89
- (passed ? `Text contains "${substring}"` : `Text does not contain "${substring}"`),
89
+ (passed
90
+ ? `Text contains "${substring}"`
91
+ : `Text does not contain "${substring}"`),
90
92
  };
91
93
  }
92
94
  /**
@@ -103,7 +105,9 @@ class Expectation {
103
105
  expected: keywords,
104
106
  actual: text,
105
107
  message: message ||
106
- (passed ? `Contains all keywords` : `Missing keywords: ${missingKeywords.join(", ")}`),
108
+ (passed
109
+ ? `Contains all keywords`
110
+ : `Missing keywords: ${missingKeywords.join(", ")}`),
107
111
  };
108
112
  }
109
113
  /**
@@ -119,7 +123,9 @@ class Expectation {
119
123
  expected: `not containing "${substring}"`,
120
124
  actual: text,
121
125
  message: message ||
122
- (passed ? `Text does not contain "${substring}"` : `Text contains "${substring}"`),
126
+ (passed
127
+ ? `Text does not contain "${substring}"`
128
+ : `Text contains "${substring}"`),
123
129
  };
124
130
  }
125
131
  /**
@@ -144,7 +150,8 @@ class Expectation {
144
150
  passed,
145
151
  expected: "no PII",
146
152
  actual: foundPII.length > 0 ? `Found: ${foundPII.join(", ")}` : "no PII",
147
- message: message || (passed ? "No PII detected" : `PII detected: ${foundPII.join(", ")}`),
153
+ message: message ||
154
+ (passed ? "No PII detected" : `PII detected: ${foundPII.join(", ")}`),
148
155
  };
149
156
  }
150
157
  /**
@@ -159,7 +166,10 @@ class Expectation {
159
166
  passed,
160
167
  expected: pattern.toString(),
161
168
  actual: text,
162
- message: message || (passed ? `Matches pattern ${pattern}` : `Does not match pattern ${pattern}`),
169
+ message: message ||
170
+ (passed
171
+ ? `Matches pattern ${pattern}`
172
+ : `Does not match pattern ${pattern}`),
163
173
  };
164
174
  }
165
175
  /**
@@ -205,7 +215,8 @@ class Expectation {
205
215
  passed,
206
216
  expected: schema,
207
217
  actual: parsedJson,
208
- message: message || (passed ? "JSON matches schema" : "JSON does not match schema"),
218
+ message: message ||
219
+ (passed ? "JSON matches schema" : "JSON does not match schema"),
209
220
  };
210
221
  }
211
222
  /**
@@ -253,7 +264,10 @@ class Expectation {
253
264
  passed,
254
265
  expected,
255
266
  actual,
256
- message: message || (passed ? `Sentiment is ${expected}` : `Expected ${expected}, got ${actual}`),
267
+ message: message ||
268
+ (passed
269
+ ? `Sentiment is ${expected}`
270
+ : `Expected ${expected}, got ${actual}`),
257
271
  };
258
272
  }
259
273
  /**
@@ -269,7 +283,10 @@ class Expectation {
269
283
  passed,
270
284
  expected: range,
271
285
  actual: length,
272
- message: message || (passed ? `Length ${length} is within range` : `Length ${length} not in range`),
286
+ message: message ||
287
+ (passed
288
+ ? `Length ${length} is within range`
289
+ : `Length ${length} not in range`),
273
290
  };
274
291
  }
275
292
  /**
@@ -284,9 +301,13 @@ class Expectation {
284
301
  name: "toNotHallucinate",
285
302
  passed,
286
303
  expected: "all ground truth facts",
287
- actual: missingFacts.length > 0 ? `Missing: ${missingFacts.join(", ")}` : "all facts present",
304
+ actual: missingFacts.length > 0
305
+ ? `Missing: ${missingFacts.join(", ")}`
306
+ : "all facts present",
288
307
  message: message ||
289
- (passed ? "No hallucinations detected" : `Missing facts: ${missingFacts.join(", ")}`),
308
+ (passed
309
+ ? "No hallucinations detected"
310
+ : `Missing facts: ${missingFacts.join(", ")}`),
290
311
  };
291
312
  }
292
313
  /**
@@ -301,7 +322,10 @@ class Expectation {
301
322
  passed,
302
323
  expected: `<= ${maxMs}ms`,
303
324
  actual: `${duration}ms`,
304
- message: message || (passed ? `${duration}ms within limit` : `${duration}ms exceeds ${maxMs}ms`),
325
+ message: message ||
326
+ (passed
327
+ ? `${duration}ms within limit`
328
+ : `${duration}ms exceeds ${maxMs}ms`),
305
329
  };
306
330
  }
307
331
  /**
@@ -344,7 +368,8 @@ class Expectation {
344
368
  passed,
345
369
  expected: `> ${expected}`,
346
370
  actual: value,
347
- message: message || (passed ? `${value} > ${expected}` : `${value} <= ${expected}`),
371
+ message: message ||
372
+ (passed ? `${value} > ${expected}` : `${value} <= ${expected}`),
348
373
  };
349
374
  }
350
375
  /**
@@ -359,7 +384,8 @@ class Expectation {
359
384
  passed,
360
385
  expected: `< ${expected}`,
361
386
  actual: value,
362
- message: message || (passed ? `${value} < ${expected}` : `${value} >= ${expected}`),
387
+ message: message ||
388
+ (passed ? `${value} < ${expected}` : `${value} >= ${expected}`),
363
389
  };
364
390
  }
365
391
  /**
@@ -374,7 +400,8 @@ class Expectation {
374
400
  passed,
375
401
  expected: `between ${min} and ${max}`,
376
402
  actual: value,
377
- message: message || (passed ? `${value} is within range` : `${value} is outside range`),
403
+ message: message ||
404
+ (passed ? `${value} is within range` : `${value} is outside range`),
378
405
  };
379
406
  }
380
407
  /**
@@ -389,7 +416,8 @@ class Expectation {
389
416
  passed: hasCodeBlock,
390
417
  expected: "code block",
391
418
  actual: text,
392
- message: message || (hasCodeBlock ? "Contains code block" : "No code block found"),
419
+ message: message ||
420
+ (hasCodeBlock ? "Contains code block" : "No code block found"),
393
421
  };
394
422
  }
395
423
  /**
@@ -405,9 +433,13 @@ class Expectation {
405
433
  name: "toBeProfessional",
406
434
  passed,
407
435
  expected: "professional tone",
408
- actual: foundProfanity.length > 0 ? `Found: ${foundProfanity.join(", ")}` : "professional",
436
+ actual: foundProfanity.length > 0
437
+ ? `Found: ${foundProfanity.join(", ")}`
438
+ : "professional",
409
439
  message: message ||
410
- (passed ? "Professional tone" : `Unprofessional language: ${foundProfanity.join(", ")}`),
440
+ (passed
441
+ ? "Professional tone"
442
+ : `Unprofessional language: ${foundProfanity.join(", ")}`),
411
443
  };
412
444
  }
413
445
  /**
@@ -432,7 +464,8 @@ class Expectation {
432
464
  passed,
433
465
  expected: "proper grammar",
434
466
  actual: issues.length > 0 ? `Issues: ${issues.join(", ")}` : "proper grammar",
435
- message: message || (passed ? "Proper grammar" : `Grammar issues: ${issues.join(", ")}`),
467
+ message: message ||
468
+ (passed ? "Proper grammar" : `Grammar issues: ${issues.join(", ")}`),
436
469
  };
437
470
  }
438
471
  }
package/dist/batch.js CHANGED
@@ -81,7 +81,8 @@ class RequestBatcher {
81
81
  pendingRequest.resolve(response.data);
82
82
  }
83
83
  else {
84
- pendingRequest.reject(new Error(response.error || `Request failed with status ${response.status}`));
84
+ pendingRequest.reject(new Error(response.error ||
85
+ `Request failed with status ${response.status}`));
85
86
  }
86
87
  }
87
88
  }
@@ -149,7 +150,12 @@ function canBatch(method, endpoint) {
149
150
  if (method !== "GET") {
150
151
  return false;
151
152
  }
152
- const batchableEndpoints = ["/traces", "/evaluations", "/annotations", "/results"];
153
+ const batchableEndpoints = [
154
+ "/traces",
155
+ "/evaluations",
156
+ "/annotations",
157
+ "/results",
158
+ ];
153
159
  return batchableEndpoints.some((pattern) => endpoint.includes(pattern));
154
160
  }
155
161
  /**
package/dist/cli/api.js CHANGED
@@ -73,7 +73,9 @@ async function publishShare(baseUrl, apiKey, evaluationId, exportData, evaluatio
73
73
  exportData,
74
74
  shareScope: "run",
75
75
  evaluationRunId,
76
- ...(options?.expiresInDays != null && { expiresInDays: options.expiresInDays }),
76
+ ...(options?.expiresInDays != null && {
77
+ expiresInDays: options.expiresInDays,
78
+ }),
77
79
  };
78
80
  const url = `${baseUrl.replace(/\/$/, "")}/api/evaluations/${evaluationId}/publish`;
79
81
  try {
package/dist/cli/check.js CHANGED
@@ -183,7 +183,11 @@ function parseArgs(argv) {
183
183
  };
184
184
  }
185
185
  if (Number.isNaN(minScore) || minScore < 0 || minScore > 100) {
186
- return { ok: false, exitCode: constants_1.EXIT.BAD_ARGS, message: "Error: --minScore must be 0-100" };
186
+ return {
187
+ ok: false,
188
+ exitCode: constants_1.EXIT.BAD_ARGS,
189
+ message: "Error: --minScore must be 0-100",
190
+ };
187
191
  }
188
192
  if (minN !== undefined && (Number.isNaN(minN) || minN < 1)) {
189
193
  return {
@@ -210,9 +214,15 @@ function parseArgs(argv) {
210
214
  onFail,
211
215
  share,
212
216
  prCommentOut,
213
- maxCostUsd: maxCostUsd != null && !Number.isNaN(maxCostUsd) ? maxCostUsd : undefined,
214
- maxLatencyMs: maxLatencyMs != null && !Number.isNaN(maxLatencyMs) ? maxLatencyMs : undefined,
215
- maxCostDeltaUsd: maxCostDeltaUsd != null && !Number.isNaN(maxCostDeltaUsd) ? maxCostDeltaUsd : undefined,
217
+ maxCostUsd: maxCostUsd != null && !Number.isNaN(maxCostUsd)
218
+ ? maxCostUsd
219
+ : undefined,
220
+ maxLatencyMs: maxLatencyMs != null && !Number.isNaN(maxLatencyMs)
221
+ ? maxLatencyMs
222
+ : undefined,
223
+ maxCostDeltaUsd: maxCostDeltaUsd != null && !Number.isNaN(maxCostDeltaUsd)
224
+ ? maxCostDeltaUsd
225
+ : undefined,
216
226
  },
217
227
  };
218
228
  }
@@ -297,7 +307,8 @@ async function runCheck(args) {
297
307
  runDetails?.results &&
298
308
  quality?.evaluationRunId) {
299
309
  const importResults = runDetails.results
300
- .filter((r) => r.testCaseId != null && (r.status === "passed" || r.status === "failed"))
310
+ .filter((r) => r.testCaseId != null &&
311
+ (r.status === "passed" || r.status === "failed"))
301
312
  .map((r) => ({
302
313
  testCaseId: r.testCaseId,
303
314
  status: r.status,
@@ -306,7 +317,9 @@ async function runCheck(args) {
306
317
  assertionsJson: r.assertionsJson,
307
318
  }));
308
319
  if (importResults.length > 0) {
309
- const idempotencyKey = ci ? (0, ci_context_1.computeIdempotencyKey)(args.evaluationId, ci) : undefined;
320
+ const idempotencyKey = ci
321
+ ? (0, ci_context_1.computeIdempotencyKey)(args.evaluationId, ci)
322
+ : undefined;
310
323
  const importRes = await (0, api_1.importRunOnFail)(args.baseUrl, args.apiKey, args.evaluationId, importResults, {
311
324
  idempotencyKey,
312
325
  ci,
@@ -89,7 +89,9 @@ function captureCiContext() {
89
89
  provider,
90
90
  repo,
91
91
  sha,
92
- branch: ref?.startsWith("refs/heads/") ? ref.slice("refs/heads/".length) : ref,
92
+ branch: ref?.startsWith("refs/heads/")
93
+ ? ref.slice("refs/heads/".length)
94
+ : ref,
93
95
  runUrl,
94
96
  actor,
95
97
  pr,
@@ -0,0 +1,45 @@
1
+ /**
2
+ * UX-401: One-command CI loop (evalai ci)
3
+ *
4
+ * Provides a single command teams put in .github/workflows/* and never think about again.
5
+ */
6
+ import type { DiffResult } from "./diff";
7
+ import type { RunResult } from "./run";
8
+ /**
9
+ * CI command options
10
+ */
11
+ export interface CIOptions {
12
+ /** Base reference for diff comparison */
13
+ base?: string;
14
+ /** Run only impacted specs */
15
+ impactedOnly?: boolean;
16
+ /** Output format */
17
+ format?: "human" | "json" | "github";
18
+ /** Write run results */
19
+ writeResults?: boolean;
20
+ }
21
+ /**
22
+ * CI execution result
23
+ */
24
+ export interface CIResult {
25
+ /** Success status */
26
+ success: boolean;
27
+ /** Exit code */
28
+ exitCode: number;
29
+ /** Execution narrative */
30
+ narrative: string;
31
+ /** Run result (if executed) */
32
+ runResult?: RunResult;
33
+ /** Diff result (if executed) */
34
+ diffResult?: DiffResult;
35
+ /** Error message (if failed) */
36
+ error?: string;
37
+ }
38
+ /**
39
+ * Run CI command
40
+ */
41
+ export declare function runCI(options: CIOptions, projectRoot?: string): Promise<CIResult>;
42
+ /**
43
+ * CLI entry point
44
+ */
45
+ export declare function runCICLI(options: CIOptions): Promise<void>;