@pauly4010/evalai-sdk 1.9.0 → 1.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +136 -23
  2. package/dist/assertions.js +51 -18
  3. package/dist/batch.js +8 -2
  4. package/dist/cli/api.js +3 -1
  5. package/dist/cli/check.js +19 -6
  6. package/dist/cli/ci-context.js +3 -1
  7. package/dist/cli/config.js +28 -8
  8. package/dist/cli/diff.js +14 -9
  9. package/dist/cli/discover.js +18 -7
  10. package/dist/cli/doctor.js +43 -9
  11. package/dist/cli/explain.js +37 -11
  12. package/dist/cli/formatters/human.js +4 -1
  13. package/dist/cli/formatters/pr-comment.js +3 -1
  14. package/dist/cli/gate.js +6 -2
  15. package/dist/cli/impact-analysis.js +6 -5
  16. package/dist/cli/index.js +18 -6
  17. package/dist/cli/manifest.d.ts +3 -5
  18. package/dist/cli/manifest.js +21 -14
  19. package/dist/cli/migrate.js +4 -4
  20. package/dist/cli/policy-packs.js +8 -2
  21. package/dist/cli/print-config.js +19 -4
  22. package/dist/cli/regression-gate.js +8 -2
  23. package/dist/cli/report/build-check-report.js +8 -2
  24. package/dist/cli/run.js +11 -5
  25. package/dist/cli/share.js +3 -1
  26. package/dist/cli/upgrade.js +2 -1
  27. package/dist/client.d.ts +16 -19
  28. package/dist/client.js +60 -43
  29. package/dist/client.request.test.d.ts +1 -1
  30. package/dist/client.request.test.js +222 -147
  31. package/dist/context.js +3 -1
  32. package/dist/errors.js +11 -4
  33. package/dist/export.js +3 -1
  34. package/dist/index.d.ts +8 -8
  35. package/dist/index.js +19 -19
  36. package/dist/integrations/anthropic.d.ts +20 -1
  37. package/dist/integrations/openai-eval.js +4 -2
  38. package/dist/integrations/openai.d.ts +24 -1
  39. package/dist/local.js +3 -1
  40. package/dist/logger.js +6 -2
  41. package/dist/pagination.js +6 -2
  42. package/dist/runtime/adapters/config-to-dsl.js +12 -9
  43. package/dist/runtime/adapters/testsuite-to-dsl.d.ts +1 -1
  44. package/dist/runtime/adapters/testsuite-to-dsl.js +11 -6
  45. package/dist/runtime/eval.d.ts +1 -1
  46. package/dist/runtime/eval.js +12 -5
  47. package/dist/runtime/execution-mode.js +13 -9
  48. package/dist/runtime/registry.js +8 -21
  49. package/dist/runtime/run-report.d.ts +0 -2
  50. package/dist/runtime/run-report.js +12 -10
  51. package/dist/testing.js +7 -2
  52. package/dist/types.d.ts +100 -69
  53. package/dist/utils/input-hash.js +4 -1
  54. package/dist/version.d.ts +1 -1
  55. package/dist/version.js +1 -1
  56. package/dist/workflows.js +62 -14
  57. package/package.json +115 -111
@@ -137,7 +137,10 @@ function runBuiltinGate(cwd) {
137
137
  };
138
138
  }
139
139
  const baselineMeta = baselineData.updatedAt
140
- ? { updatedAt: baselineData.updatedAt, updatedBy: baselineData.updatedBy ?? "unknown" }
140
+ ? {
141
+ updatedAt: baselineData.updatedAt,
142
+ updatedBy: baselineData.updatedBy ?? "unknown",
143
+ }
141
144
  : null;
142
145
  // Run tests
143
146
  const isWin = process.platform === "win32";
@@ -302,7 +305,10 @@ function runGate(argv) {
302
305
  process.stdout.write(fs.readFileSync(reportPath, "utf-8"));
303
306
  }
304
307
  else {
305
- console.error(JSON.stringify({ error: "regression-report.json not found", exitCode }));
308
+ console.error(JSON.stringify({
309
+ error: "regression-report.json not found",
310
+ exitCode,
311
+ }));
306
312
  }
307
313
  }
308
314
  else if (args.format === "github") {
@@ -60,7 +60,9 @@ function buildCheckReport(input) {
60
60
  }
61
61
  const failedCasesShown = Math.min(failedCases.length, TOP_N);
62
62
  const failedCasesMore = failedCases.length - failedCasesShown;
63
- const breakdown01 = Object.keys(breakdown).length > 0 ? breakdown : undefined;
63
+ const breakdown01 = Object.keys(breakdown).length > 0
64
+ ? breakdown
65
+ : undefined;
64
66
  const contribPts = args.explain && breakdown01 ? computeContribPts(breakdown01) : undefined;
65
67
  const gateSkipped = gateResult.gateSkipped === true;
66
68
  const gateApplied = !gateSkipped;
@@ -68,7 +70,11 @@ function buildCheckReport(input) {
68
70
  const actionableMessage = gateSkipped
69
71
  ? "Gate not applied: baseline missing. Publish a baseline from the dashboard, or run with --baseline previous once you have runs."
70
72
  : (gateResult.reasonMessage ?? undefined);
71
- const verdict = gateResult.reasonCode === "WARN_REGRESSION" ? "warn" : gateResult.passed ? "pass" : "fail";
73
+ const verdict = gateResult.reasonCode === "WARN_REGRESSION"
74
+ ? "warn"
75
+ : gateResult.passed
76
+ ? "pass"
77
+ : "fail";
72
78
  const report = {
73
79
  schemaVersion: types_1.CHECK_REPORT_SCHEMA_VERSION,
74
80
  evaluationId: args.evaluationId,
package/dist/cli/run.js CHANGED
@@ -49,9 +49,9 @@ exports.runEvaluations = runEvaluations;
49
49
  exports.printHumanResults = printHumanResults;
50
50
  exports.printJsonResults = printJsonResults;
51
51
  exports.runEvaluationsCLI = runEvaluationsCLI;
52
+ const node_child_process_1 = require("node:child_process");
52
53
  const fs = __importStar(require("node:fs/promises"));
53
54
  const path = __importStar(require("node:path"));
54
- const node_child_process_1 = require("node:child_process");
55
55
  const impact_analysis_1 = require("./impact-analysis");
56
56
  /**
57
57
  * Generate deterministic run ID
@@ -133,7 +133,7 @@ async function loadManifest(projectRoot = process.cwd()) {
133
133
  const content = await fs.readFile(manifestPath, "utf-8");
134
134
  return JSON.parse(content);
135
135
  }
136
- catch (error) {
136
+ catch (_error) {
137
137
  return null;
138
138
  }
139
139
  }
@@ -279,7 +279,7 @@ async function updateRunIndex(result, projectRoot = process.cwd()) {
279
279
  const existingContent = await fs.readFile(indexPath, "utf-8");
280
280
  index = JSON.parse(existingContent);
281
281
  }
282
- catch (error) {
282
+ catch (_error) {
283
283
  // Index doesn't exist yet, start with empty array
284
284
  }
285
285
  // Add new entry
@@ -350,8 +350,14 @@ function printHumanResults(result) {
350
350
  console.log(` 📊 Pass Rate: ${(result.summary.passRate * 100).toFixed(1)}%`);
351
351
  console.log("\n📋 Individual Results:");
352
352
  for (const spec of result.results) {
353
- const status = spec.result.status === "passed" ? "✅" : spec.result.status === "failed" ? "❌" : "⏭️";
354
- const score = spec.result.score ? ` (${(spec.result.score * 100).toFixed(1)}%)` : "";
353
+ const status = spec.result.status === "passed"
354
+ ? ""
355
+ : spec.result.status === "failed"
356
+ ? "❌"
357
+ : "⏭️";
358
+ const score = spec.result.score
359
+ ? ` (${(spec.result.score * 100).toFixed(1)}%)`
360
+ : "";
355
361
  const error = spec.result.error ? ` - ${spec.result.error}` : "";
356
362
  console.log(` ${status} ${spec.name}${score}${error}`);
357
363
  }
package/dist/cli/share.js CHANGED
@@ -50,7 +50,9 @@ function parseShareArgs(argv) {
50
50
  if (!evaluationId)
51
51
  return { error: "Error: --evaluationId is required" };
52
52
  if (Number.isNaN(runId) || runId < 1)
53
- return { error: "Error: --runId is required and must be a positive number" };
53
+ return {
54
+ error: "Error: --runId is required and must be a positive number",
55
+ };
54
56
  const expiresInDays = parseExpires(expires);
55
57
  if (expiresInDays == null || expiresInDays <= 0)
56
58
  return { error: "Error: --expires must be e.g. 7d, 24h, 60m, 1s" };
@@ -275,7 +275,8 @@ function addNpmScripts(cwd) {
275
275
  changed = true;
276
276
  }
277
277
  if (!scripts["eval:baseline-update"]) {
278
- scripts["eval:baseline-update"] = "npx tsx scripts/regression-gate.ts --update-baseline";
278
+ scripts["eval:baseline-update"] =
279
+ "npx tsx scripts/regression-gate.ts --update-baseline";
279
280
  changed = true;
280
281
  }
281
282
  if (changed) {
package/dist/client.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import { type Logger } from "./logger";
2
- import type { Annotation, AnnotationItem, AnnotationTask, APIKey, APIKeyUsage, APIKeyWithSecret, ClientConfig, CreateAnnotationItemParams, CreateAnnotationParams, CreateAnnotationTaskParams, CreateAPIKeyParams, CreateEvaluationParams, CreateLLMJudgeConfigParams, CreateRunParams, CreateSpanParams, CreateTestCaseParams, CreateTraceParams, CreateWebhookParams, Evaluation, EvaluationRun, GetLLMJudgeAlignmentParams, GetUsageParams, ListAnnotationItemsParams, ListAnnotationsParams, ListAnnotationTasksParams, ListAPIKeysParams, ListEvaluationsParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, ListTracesParams, ListWebhookDeliveriesParams, ListWebhooksParams, LLMJudgeAlignment, LLMJudgeConfig, LLMJudgeResult, Organization, OrganizationLimits, RunLLMJudgeParams, Span, TestCase, Trace, UpdateAPIKeyParams, UpdateEvaluationParams, UpdateTraceParams, UpdateWebhookParams, UsageStats, UsageSummary, Webhook, WebhookDelivery } from "./types";
2
+ import type { Annotation, AnnotationItem, AnnotationTask, APIKey, APIKeyUsage, APIKeyWithSecret, ClientConfig, CreateAnnotationItemParams, CreateAnnotationParams, CreateAnnotationTaskParams, CreateAPIKeyParams, CreateEvaluationParams, CreateLLMJudgeConfigParams, CreateRunParams, CreateSpanParams, CreateTestCaseParams, CreateTraceParams, CreateWebhookParams, Evaluation, EvaluationRun, EvaluationRunDetail, GetLLMJudgeAlignmentParams, GetUsageParams, ListAnnotationItemsParams, ListAnnotationsParams, ListAnnotationTasksParams, ListAPIKeysParams, ListEvaluationsParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, ListTracesParams, ListWebhookDeliveriesParams, ListWebhooksParams, LLMJudgeAlignment, LLMJudgeConfig, LLMJudgeEvaluateResult, LLMJudgeResult, Organization, OrganizationLimits, RunLLMJudgeParams, Span, TestCase, Trace, TraceDetail, UpdateAPIKeyParams, UpdateEvaluationParams, UpdateTraceParams, UpdateWebhookParams, UsageStats, UsageSummary, Webhook, WebhookDelivery } from "./types";
3
3
  /**
4
4
  * AI Evaluation Platform SDK Client
5
5
  *
@@ -81,15 +81,8 @@ export declare class AIEvalClient {
81
81
  */
82
82
  getLogger(): Logger;
83
83
  /**
84
- * Get organization resource limits and usage
85
- * Returns feature usage data for per-organization quotas
86
- *
87
- * @example
88
- * ```typescript
89
- * const limits = await client.getOrganizationLimits();
90
- * console.log('Traces:', limits.traces_per_organization);
91
- * console.log('Evaluations:', limits.evals_per_organization);
92
- * ```
84
+ * @deprecated The /api/organizations/:id/limits endpoint does not exist.
85
+ * Use `organizations.getCurrent()` to get org info instead.
93
86
  */
94
87
  getOrganizationLimits(): Promise<OrganizationLimits>;
95
88
  }
@@ -123,9 +116,9 @@ declare class TraceAPI {
123
116
  message: string;
124
117
  }>;
125
118
  /**
126
- * Get a single trace by ID
119
+ * Get a single trace by ID, including its spans
127
120
  */
128
- get(id: number): Promise<Trace>;
121
+ get(id: number): Promise<TraceDetail>;
129
122
  /**
130
123
  * Update an existing trace (e.g. set status, duration, metadata on completion)
131
124
  *
@@ -193,9 +186,9 @@ declare class EvaluationAPI {
193
186
  */
194
187
  listRuns(evaluationId: number): Promise<EvaluationRun[]>;
195
188
  /**
196
- * Get a specific run
189
+ * Get a specific run with its results
197
190
  */
198
- getRun(evaluationId: number, runId: number): Promise<EvaluationRun>;
191
+ getRun(evaluationId: number, runId: number): Promise<EvaluationRunDetail>;
199
192
  }
200
193
  /**
201
194
  * LLM Judge API methods
@@ -207,8 +200,7 @@ declare class LLMJudgeAPI {
207
200
  * Run an LLM judge evaluation
208
201
  */
209
202
  evaluate(params: RunLLMJudgeParams): Promise<{
210
- result: LLMJudgeResult;
211
- config: unknown;
203
+ result: LLMJudgeEvaluateResult;
212
204
  }>;
213
205
  /**
214
206
  * Create an LLM judge configuration
@@ -289,11 +281,13 @@ declare class DeveloperAPI {
289
281
  /**
290
282
  * Get usage statistics
291
283
  */
292
- getUsage(params: GetUsageParams): Promise<UsageStats>;
284
+ getUsage(params?: GetUsageParams): Promise<UsageStats>;
293
285
  /**
294
286
  * Get usage summary
295
287
  */
296
- getUsageSummary(organizationId: number): Promise<UsageSummary>;
288
+ getUsageSummary(params?: {
289
+ period?: "7d" | "30d" | "90d" | "all";
290
+ }): Promise<UsageSummary>;
297
291
  }
298
292
  /**
299
293
  * API Keys API methods
@@ -355,7 +349,10 @@ declare class WebhooksAPI {
355
349
  /**
356
350
  * Get webhook deliveries
357
351
  */
358
- getDeliveries(webhookId: number, params?: ListWebhookDeliveriesParams): Promise<WebhookDelivery[]>;
352
+ getDeliveries(webhookId: number, params?: ListWebhookDeliveriesParams): Promise<{
353
+ deliveries: WebhookDelivery[];
354
+ total: number;
355
+ }>;
359
356
  }
360
357
  /**
361
358
  * Organizations API methods
package/dist/client.js CHANGED
@@ -44,17 +44,24 @@ class AIEvalClient {
44
44
  constructor(config = {}) {
45
45
  // Tier 1.1: Zero-config with env variable detection (works in Node.js and browsers)
46
46
  this.apiKey =
47
- config.apiKey || getEnvVar("EVALAI_API_KEY") || getEnvVar("AI_EVAL_API_KEY") || "";
47
+ config.apiKey ||
48
+ getEnvVar("EVALAI_API_KEY") ||
49
+ getEnvVar("AI_EVAL_API_KEY") ||
50
+ "";
48
51
  if (!this.apiKey) {
49
52
  throw new errors_1.EvalAIError("API key is required. Provide via config.apiKey or EVALAI_API_KEY environment variable.", "MISSING_API_KEY", 0);
50
53
  }
51
54
  // Auto-detect organization ID from env
52
- const orgIdFromEnv = getEnvVar("EVALAI_ORGANIZATION_ID") || getEnvVar("AI_EVAL_ORGANIZATION_ID");
55
+ const orgIdFromEnv = getEnvVar("EVALAI_ORGANIZATION_ID") ||
56
+ getEnvVar("AI_EVAL_ORGANIZATION_ID");
53
57
  this.organizationId =
54
- config.organizationId || (orgIdFromEnv ? parseInt(orgIdFromEnv, 10) : undefined);
55
- // Default to relative URLs for browser, or allow custom baseUrl
58
+ config.organizationId ||
59
+ (orgIdFromEnv ? parseInt(orgIdFromEnv, 10) : undefined);
56
60
  const isBrowser = typeof globalThis.window !== "undefined";
57
- this.baseUrl = config.baseUrl || (isBrowser ? "" : "http://localhost:3000");
61
+ this.baseUrl =
62
+ config.baseUrl ||
63
+ getEnvVar("EVALAI_BASE_URL") ||
64
+ (isBrowser ? "" : "http://localhost:3000");
58
65
  this.timeout = config.timeout || 30000;
59
66
  // Tier 4.17: Debug mode with request logging
60
67
  const logLevel = config.logLevel || (config.debug ? "debug" : "info");
@@ -292,22 +299,11 @@ class AIEvalClient {
292
299
  return this.logger;
293
300
  }
294
301
  /**
295
- * Get organization resource limits and usage
296
- * Returns feature usage data for per-organization quotas
297
- *
298
- * @example
299
- * ```typescript
300
- * const limits = await client.getOrganizationLimits();
301
- * console.log('Traces:', limits.traces_per_organization);
302
- * console.log('Evaluations:', limits.evals_per_organization);
303
- * ```
302
+ * @deprecated The /api/organizations/:id/limits endpoint does not exist.
303
+ * Use `organizations.getCurrent()` to get org info instead.
304
304
  */
305
305
  async getOrganizationLimits() {
306
- const orgId = this.getOrganizationId();
307
- if (!orgId) {
308
- throw new errors_1.EvalAIError("Organization ID is required", "MISSING_ORGANIZATION_ID", 0);
309
- }
310
- return this.request(`/api/organizations/${orgId}/limits`);
306
+ return {};
311
307
  }
312
308
  }
313
309
  exports.AIEvalClient = AIEvalClient;
@@ -370,7 +366,7 @@ class TraceAPI {
370
366
  });
371
367
  }
372
368
  /**
373
- * Get a single trace by ID
369
+ * Get a single trace by ID, including its spans
374
370
  */
375
371
  async get(id) {
376
372
  return this.client.request(`/api/traces/${id}`);
@@ -504,7 +500,7 @@ class EvaluationAPI {
504
500
  return this.client.request(`/api/evaluations/${evaluationId}/runs`);
505
501
  }
506
502
  /**
507
- * Get a specific run
503
+ * Get a specific run with its results
508
504
  */
509
505
  async getRun(evaluationId, runId) {
510
506
  return this.client.request(`/api/evaluations/${evaluationId}/runs/${runId}`);
@@ -547,7 +543,9 @@ class LLMJudgeAPI {
547
543
  if (params.offset)
548
544
  searchParams.set("offset", params.offset.toString());
549
545
  const query = searchParams.toString();
550
- const endpoint = query ? `/api/llm-judge/configs?${query}` : "/api/llm-judge/configs";
546
+ const endpoint = query
547
+ ? `/api/llm-judge/configs?${query}`
548
+ : "/api/llm-judge/configs";
551
549
  return this.client.request(endpoint);
552
550
  }
553
551
  /**
@@ -564,7 +562,9 @@ class LLMJudgeAPI {
564
562
  if (params.offset)
565
563
  searchParams.set("offset", params.offset.toString());
566
564
  const query = searchParams.toString();
567
- const endpoint = query ? `/api/llm-judge/results?${query}` : "/api/llm-judge/results";
565
+ const endpoint = query
566
+ ? `/api/llm-judge/results?${query}`
567
+ : "/api/llm-judge/results";
568
568
  return this.client.request(endpoint);
569
569
  }
570
570
  /**
@@ -572,11 +572,7 @@ class LLMJudgeAPI {
572
572
  */
573
573
  async getAlignment(params) {
574
574
  const searchParams = new URLSearchParams();
575
- searchParams.set("configId", params.configId.toString());
576
- if (params.startDate)
577
- searchParams.set("startDate", params.startDate);
578
- if (params.endDate)
579
- searchParams.set("endDate", params.endDate);
575
+ searchParams.set("evaluationRunId", params.evaluationRunId.toString());
580
576
  const query = searchParams.toString();
581
577
  return this.client.request(`/api/llm-judge/alignment?${query}`);
582
578
  }
@@ -651,14 +647,18 @@ class AnnotationTasksAPI {
651
647
  if (params.offset)
652
648
  searchParams.set("offset", params.offset.toString());
653
649
  const query = searchParams.toString();
654
- const endpoint = query ? `/api/annotations/tasks?${query}` : "/api/annotations/tasks";
650
+ const endpoint = query
651
+ ? `/api/annotations/tasks?${query}`
652
+ : "/api/annotations/tasks";
655
653
  return this.client.request(endpoint);
656
654
  }
657
655
  /**
658
656
  * Get an annotation task
659
657
  */
660
658
  async get(taskId) {
661
- return this.client.request(`/api/annotations/tasks/${taskId}`);
659
+ return this.client
660
+ .request(`/api/annotations/tasks/${taskId}`)
661
+ .then((res) => res.task);
662
662
  }
663
663
  }
664
664
  /**
@@ -705,21 +705,34 @@ class DeveloperAPI {
705
705
  /**
706
706
  * Get usage statistics
707
707
  */
708
- async getUsage(params) {
708
+ async getUsage(params = {}) {
709
709
  const searchParams = new URLSearchParams();
710
- searchParams.set("organizationId", params.organizationId.toString());
711
- if (params.startDate)
712
- searchParams.set("startDate", params.startDate);
713
- if (params.endDate)
714
- searchParams.set("endDate", params.endDate);
710
+ if (params.period)
711
+ searchParams.set("period", params.period);
712
+ if (params.groupBy)
713
+ searchParams.set("groupBy", params.groupBy);
714
+ if (params.limit)
715
+ searchParams.set("limit", params.limit.toString());
716
+ if (params.offset)
717
+ searchParams.set("offset", params.offset.toString());
715
718
  const query = searchParams.toString();
716
- return this.client.request(`/api/developer/usage?${query}`);
719
+ const endpoint = query
720
+ ? `/api/developer/usage?${query}`
721
+ : "/api/developer/usage";
722
+ return this.client.request(endpoint);
717
723
  }
718
724
  /**
719
725
  * Get usage summary
720
726
  */
721
- async getUsageSummary(organizationId) {
722
- return this.client.request(`/api/developer/usage/summary?organizationId=${organizationId}`);
727
+ async getUsageSummary(params = {}) {
728
+ const searchParams = new URLSearchParams();
729
+ if (params.period)
730
+ searchParams.set("period", params.period);
731
+ const query = searchParams.toString();
732
+ const endpoint = query
733
+ ? `/api/developer/usage/summary?${query}`
734
+ : "/api/developer/usage/summary";
735
+ return this.client.request(endpoint);
723
736
  }
724
737
  }
725
738
  /**
@@ -750,7 +763,9 @@ class APIKeysAPI {
750
763
  if (params.offset)
751
764
  searchParams.set("offset", params.offset.toString());
752
765
  const query = searchParams.toString();
753
- const endpoint = query ? `/api/developer/api-keys?${query}` : "/api/developer/api-keys";
766
+ const endpoint = query
767
+ ? `/api/developer/api-keys?${query}`
768
+ : "/api/developer/api-keys";
754
769
  return this.client.request(endpoint);
755
770
  }
756
771
  /**
@@ -840,8 +855,8 @@ class WebhooksAPI {
840
855
  searchParams.set("limit", params.limit.toString());
841
856
  if (params.offset)
842
857
  searchParams.set("offset", params.offset.toString());
843
- if (params.success !== undefined)
844
- searchParams.set("success", params.success.toString());
858
+ if (params.status)
859
+ searchParams.set("status", params.status);
845
860
  const query = searchParams.toString();
846
861
  const endpoint = query
847
862
  ? `/api/developer/webhooks/${webhookId}/deliveries?${query}`
@@ -860,6 +875,8 @@ class OrganizationsAPI {
860
875
  * Get current organization
861
876
  */
862
877
  async getCurrent() {
863
- return this.client.request("/api/organizations/current");
878
+ return this.client
879
+ .request("/api/organizations/current")
880
+ .then((res) => res.organization);
864
881
  }
865
882
  }
@@ -1 +1 @@
1
- export {};
1
+ export {};