@pauly4010/evalai-sdk 1.8.0 → 1.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/CHANGELOG.md +54 -0
  2. package/README.md +136 -23
  3. package/dist/assertions.js +51 -18
  4. package/dist/batch.js +8 -2
  5. package/dist/cli/api.js +3 -1
  6. package/dist/cli/check.js +19 -6
  7. package/dist/cli/ci-context.js +3 -1
  8. package/dist/cli/ci.d.ts +45 -0
  9. package/dist/cli/ci.js +192 -0
  10. package/dist/cli/config.js +28 -8
  11. package/dist/cli/diff.d.ts +173 -0
  12. package/dist/cli/diff.js +685 -0
  13. package/dist/cli/discover.d.ts +84 -0
  14. package/dist/cli/discover.js +419 -0
  15. package/dist/cli/doctor.js +62 -19
  16. package/dist/cli/env.d.ts +21 -0
  17. package/dist/cli/env.js +42 -0
  18. package/dist/cli/explain.js +168 -36
  19. package/dist/cli/formatters/human.js +4 -1
  20. package/dist/cli/formatters/pr-comment.js +3 -1
  21. package/dist/cli/gate.js +6 -2
  22. package/dist/cli/impact-analysis.d.ts +63 -0
  23. package/dist/cli/impact-analysis.js +252 -0
  24. package/dist/cli/index.js +185 -0
  25. package/dist/cli/manifest.d.ts +103 -0
  26. package/dist/cli/manifest.js +282 -0
  27. package/dist/cli/migrate.d.ts +41 -0
  28. package/dist/cli/migrate.js +349 -0
  29. package/dist/cli/policy-packs.js +8 -2
  30. package/dist/cli/print-config.js +33 -14
  31. package/dist/cli/regression-gate.js +8 -2
  32. package/dist/cli/report/build-check-report.js +8 -2
  33. package/dist/cli/run.d.ts +101 -0
  34. package/dist/cli/run.js +395 -0
  35. package/dist/cli/share.js +3 -1
  36. package/dist/cli/upgrade.js +2 -1
  37. package/dist/cli/workspace.d.ts +28 -0
  38. package/dist/cli/workspace.js +58 -0
  39. package/dist/client.d.ts +16 -19
  40. package/dist/client.js +60 -43
  41. package/dist/client.request.test.d.ts +1 -1
  42. package/dist/client.request.test.js +222 -147
  43. package/dist/context.js +3 -1
  44. package/dist/errors.js +11 -4
  45. package/dist/export.js +3 -1
  46. package/dist/index.d.ts +8 -2
  47. package/dist/index.js +30 -5
  48. package/dist/integrations/anthropic.d.ts +20 -1
  49. package/dist/integrations/openai-eval.js +4 -2
  50. package/dist/integrations/openai.d.ts +24 -1
  51. package/dist/local.js +3 -1
  52. package/dist/logger.js +6 -2
  53. package/dist/pagination.js +6 -2
  54. package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
  55. package/dist/runtime/adapters/config-to-dsl.js +394 -0
  56. package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
  57. package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
  58. package/dist/runtime/context.d.ts +26 -0
  59. package/dist/runtime/context.js +74 -0
  60. package/dist/runtime/eval.d.ts +46 -0
  61. package/dist/runtime/eval.js +244 -0
  62. package/dist/runtime/execution-mode.d.ts +80 -0
  63. package/dist/runtime/execution-mode.js +357 -0
  64. package/dist/runtime/executor.d.ts +16 -0
  65. package/dist/runtime/executor.js +152 -0
  66. package/dist/runtime/registry.d.ts +78 -0
  67. package/dist/runtime/registry.js +403 -0
  68. package/dist/runtime/run-report.d.ts +200 -0
  69. package/dist/runtime/run-report.js +222 -0
  70. package/dist/runtime/types.d.ts +356 -0
  71. package/dist/runtime/types.js +76 -0
  72. package/dist/testing.d.ts +65 -0
  73. package/dist/testing.js +49 -2
  74. package/dist/types.d.ts +100 -69
  75. package/dist/utils/input-hash.js +4 -1
  76. package/dist/version.d.ts +1 -1
  77. package/dist/version.js +1 -1
  78. package/dist/workflows.js +62 -14
  79. package/package.json +115 -110
@@ -0,0 +1,58 @@
1
+ "use strict";
2
+ /**
3
+ * CORE-402: Centralized .evalai workspace resolution
4
+ *
5
+ * Provides unified workspace path resolution for all EvalAI CLI commands
6
+ */
7
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
8
+ if (k2 === undefined) k2 = k;
9
+ var desc = Object.getOwnPropertyDescriptor(m, k);
10
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
11
+ desc = { enumerable: true, get: function() { return m[k]; } };
12
+ }
13
+ Object.defineProperty(o, k2, desc);
14
+ }) : (function(o, m, k, k2) {
15
+ if (k2 === undefined) k2 = k;
16
+ o[k2] = m[k];
17
+ }));
18
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
19
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
20
+ }) : function(o, v) {
21
+ o["default"] = v;
22
+ });
23
+ var __importStar = (this && this.__importStar) || (function () {
24
+ var ownKeys = function(o) {
25
+ ownKeys = Object.getOwnPropertyNames || function (o) {
26
+ var ar = [];
27
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
28
+ return ar;
29
+ };
30
+ return ownKeys(o);
31
+ };
32
+ return function (mod) {
33
+ if (mod && mod.__esModule) return mod;
34
+ var result = {};
35
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
36
+ __setModuleDefault(result, mod);
37
+ return result;
38
+ };
39
+ })();
40
+ Object.defineProperty(exports, "__esModule", { value: true });
41
+ exports.resolveEvalWorkspace = resolveEvalWorkspace;
42
+ const path = __importStar(require("node:path"));
43
+ /**
44
+ * Resolve EvalAI workspace paths
45
+ */
46
+ function resolveEvalWorkspace(projectRoot = process.cwd()) {
47
+ const evalaiDir = path.join(projectRoot, ".evalai");
48
+ const runsDir = path.join(evalaiDir, "runs");
49
+ return {
50
+ root: projectRoot,
51
+ evalaiDir,
52
+ runsDir,
53
+ manifestPath: path.join(evalaiDir, "manifest.json"),
54
+ lastRunPath: path.join(evalaiDir, "last-run.json"),
55
+ indexPath: path.join(runsDir, "index.json"),
56
+ baselinePath: path.join(evalaiDir, "baseline-run.json"),
57
+ };
58
+ }
package/dist/client.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import { type Logger } from "./logger";
2
- import type { Annotation, AnnotationItem, AnnotationTask, APIKey, APIKeyUsage, APIKeyWithSecret, ClientConfig, CreateAnnotationItemParams, CreateAnnotationParams, CreateAnnotationTaskParams, CreateAPIKeyParams, CreateEvaluationParams, CreateLLMJudgeConfigParams, CreateRunParams, CreateSpanParams, CreateTestCaseParams, CreateTraceParams, CreateWebhookParams, Evaluation, EvaluationRun, GetLLMJudgeAlignmentParams, GetUsageParams, ListAnnotationItemsParams, ListAnnotationsParams, ListAnnotationTasksParams, ListAPIKeysParams, ListEvaluationsParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, ListTracesParams, ListWebhookDeliveriesParams, ListWebhooksParams, LLMJudgeAlignment, LLMJudgeConfig, LLMJudgeResult, Organization, OrganizationLimits, RunLLMJudgeParams, Span, TestCase, Trace, UpdateAPIKeyParams, UpdateEvaluationParams, UpdateTraceParams, UpdateWebhookParams, UsageStats, UsageSummary, Webhook, WebhookDelivery } from "./types";
2
+ import type { Annotation, AnnotationItem, AnnotationTask, APIKey, APIKeyUsage, APIKeyWithSecret, ClientConfig, CreateAnnotationItemParams, CreateAnnotationParams, CreateAnnotationTaskParams, CreateAPIKeyParams, CreateEvaluationParams, CreateLLMJudgeConfigParams, CreateRunParams, CreateSpanParams, CreateTestCaseParams, CreateTraceParams, CreateWebhookParams, Evaluation, EvaluationRun, EvaluationRunDetail, GetLLMJudgeAlignmentParams, GetUsageParams, ListAnnotationItemsParams, ListAnnotationsParams, ListAnnotationTasksParams, ListAPIKeysParams, ListEvaluationsParams, ListLLMJudgeConfigsParams, ListLLMJudgeResultsParams, ListTracesParams, ListWebhookDeliveriesParams, ListWebhooksParams, LLMJudgeAlignment, LLMJudgeConfig, LLMJudgeEvaluateResult, LLMJudgeResult, Organization, OrganizationLimits, RunLLMJudgeParams, Span, TestCase, Trace, TraceDetail, UpdateAPIKeyParams, UpdateEvaluationParams, UpdateTraceParams, UpdateWebhookParams, UsageStats, UsageSummary, Webhook, WebhookDelivery } from "./types";
3
3
  /**
4
4
  * AI Evaluation Platform SDK Client
5
5
  *
@@ -81,15 +81,8 @@ export declare class AIEvalClient {
81
81
  */
82
82
  getLogger(): Logger;
83
83
  /**
84
- * Get organization resource limits and usage
85
- * Returns feature usage data for per-organization quotas
86
- *
87
- * @example
88
- * ```typescript
89
- * const limits = await client.getOrganizationLimits();
90
- * console.log('Traces:', limits.traces_per_organization);
91
- * console.log('Evaluations:', limits.evals_per_organization);
92
- * ```
84
+ * @deprecated The /api/organizations/:id/limits endpoint does not exist.
85
+ * Use `organizations.getCurrent()` to get org info instead.
93
86
  */
94
87
  getOrganizationLimits(): Promise<OrganizationLimits>;
95
88
  }
@@ -123,9 +116,9 @@ declare class TraceAPI {
123
116
  message: string;
124
117
  }>;
125
118
  /**
126
- * Get a single trace by ID
119
+ * Get a single trace by ID, including its spans
127
120
  */
128
- get(id: number): Promise<Trace>;
121
+ get(id: number): Promise<TraceDetail>;
129
122
  /**
130
123
  * Update an existing trace (e.g. set status, duration, metadata on completion)
131
124
  *
@@ -193,9 +186,9 @@ declare class EvaluationAPI {
193
186
  */
194
187
  listRuns(evaluationId: number): Promise<EvaluationRun[]>;
195
188
  /**
196
- * Get a specific run
189
+ * Get a specific run with its results
197
190
  */
198
- getRun(evaluationId: number, runId: number): Promise<EvaluationRun>;
191
+ getRun(evaluationId: number, runId: number): Promise<EvaluationRunDetail>;
199
192
  }
200
193
  /**
201
194
  * LLM Judge API methods
@@ -207,8 +200,7 @@ declare class LLMJudgeAPI {
207
200
  * Run an LLM judge evaluation
208
201
  */
209
202
  evaluate(params: RunLLMJudgeParams): Promise<{
210
- result: LLMJudgeResult;
211
- config: unknown;
203
+ result: LLMJudgeEvaluateResult;
212
204
  }>;
213
205
  /**
214
206
  * Create an LLM judge configuration
@@ -289,11 +281,13 @@ declare class DeveloperAPI {
289
281
  /**
290
282
  * Get usage statistics
291
283
  */
292
- getUsage(params: GetUsageParams): Promise<UsageStats>;
284
+ getUsage(params?: GetUsageParams): Promise<UsageStats>;
293
285
  /**
294
286
  * Get usage summary
295
287
  */
296
- getUsageSummary(organizationId: number): Promise<UsageSummary>;
288
+ getUsageSummary(params?: {
289
+ period?: "7d" | "30d" | "90d" | "all";
290
+ }): Promise<UsageSummary>;
297
291
  }
298
292
  /**
299
293
  * API Keys API methods
@@ -355,7 +349,10 @@ declare class WebhooksAPI {
355
349
  /**
356
350
  * Get webhook deliveries
357
351
  */
358
- getDeliveries(webhookId: number, params?: ListWebhookDeliveriesParams): Promise<WebhookDelivery[]>;
352
+ getDeliveries(webhookId: number, params?: ListWebhookDeliveriesParams): Promise<{
353
+ deliveries: WebhookDelivery[];
354
+ total: number;
355
+ }>;
359
356
  }
360
357
  /**
361
358
  * Organizations API methods
package/dist/client.js CHANGED
@@ -44,17 +44,24 @@ class AIEvalClient {
44
44
  constructor(config = {}) {
45
45
  // Tier 1.1: Zero-config with env variable detection (works in Node.js and browsers)
46
46
  this.apiKey =
47
- config.apiKey || getEnvVar("EVALAI_API_KEY") || getEnvVar("AI_EVAL_API_KEY") || "";
47
+ config.apiKey ||
48
+ getEnvVar("EVALAI_API_KEY") ||
49
+ getEnvVar("AI_EVAL_API_KEY") ||
50
+ "";
48
51
  if (!this.apiKey) {
49
52
  throw new errors_1.EvalAIError("API key is required. Provide via config.apiKey or EVALAI_API_KEY environment variable.", "MISSING_API_KEY", 0);
50
53
  }
51
54
  // Auto-detect organization ID from env
52
- const orgIdFromEnv = getEnvVar("EVALAI_ORGANIZATION_ID") || getEnvVar("AI_EVAL_ORGANIZATION_ID");
55
+ const orgIdFromEnv = getEnvVar("EVALAI_ORGANIZATION_ID") ||
56
+ getEnvVar("AI_EVAL_ORGANIZATION_ID");
53
57
  this.organizationId =
54
- config.organizationId || (orgIdFromEnv ? parseInt(orgIdFromEnv, 10) : undefined);
55
- // Default to relative URLs for browser, or allow custom baseUrl
58
+ config.organizationId ||
59
+ (orgIdFromEnv ? parseInt(orgIdFromEnv, 10) : undefined);
56
60
  const isBrowser = typeof globalThis.window !== "undefined";
57
- this.baseUrl = config.baseUrl || (isBrowser ? "" : "http://localhost:3000");
61
+ this.baseUrl =
62
+ config.baseUrl ||
63
+ getEnvVar("EVALAI_BASE_URL") ||
64
+ (isBrowser ? "" : "http://localhost:3000");
58
65
  this.timeout = config.timeout || 30000;
59
66
  // Tier 4.17: Debug mode with request logging
60
67
  const logLevel = config.logLevel || (config.debug ? "debug" : "info");
@@ -292,22 +299,11 @@ class AIEvalClient {
292
299
  return this.logger;
293
300
  }
294
301
  /**
295
- * Get organization resource limits and usage
296
- * Returns feature usage data for per-organization quotas
297
- *
298
- * @example
299
- * ```typescript
300
- * const limits = await client.getOrganizationLimits();
301
- * console.log('Traces:', limits.traces_per_organization);
302
- * console.log('Evaluations:', limits.evals_per_organization);
303
- * ```
302
+ * @deprecated The /api/organizations/:id/limits endpoint does not exist.
303
+ * Use `organizations.getCurrent()` to get org info instead.
304
304
  */
305
305
  async getOrganizationLimits() {
306
- const orgId = this.getOrganizationId();
307
- if (!orgId) {
308
- throw new errors_1.EvalAIError("Organization ID is required", "MISSING_ORGANIZATION_ID", 0);
309
- }
310
- return this.request(`/api/organizations/${orgId}/limits`);
306
+ return {};
311
307
  }
312
308
  }
313
309
  exports.AIEvalClient = AIEvalClient;
@@ -370,7 +366,7 @@ class TraceAPI {
370
366
  });
371
367
  }
372
368
  /**
373
- * Get a single trace by ID
369
+ * Get a single trace by ID, including its spans
374
370
  */
375
371
  async get(id) {
376
372
  return this.client.request(`/api/traces/${id}`);
@@ -504,7 +500,7 @@ class EvaluationAPI {
504
500
  return this.client.request(`/api/evaluations/${evaluationId}/runs`);
505
501
  }
506
502
  /**
507
- * Get a specific run
503
+ * Get a specific run with its results
508
504
  */
509
505
  async getRun(evaluationId, runId) {
510
506
  return this.client.request(`/api/evaluations/${evaluationId}/runs/${runId}`);
@@ -547,7 +543,9 @@ class LLMJudgeAPI {
547
543
  if (params.offset)
548
544
  searchParams.set("offset", params.offset.toString());
549
545
  const query = searchParams.toString();
550
- const endpoint = query ? `/api/llm-judge/configs?${query}` : "/api/llm-judge/configs";
546
+ const endpoint = query
547
+ ? `/api/llm-judge/configs?${query}`
548
+ : "/api/llm-judge/configs";
551
549
  return this.client.request(endpoint);
552
550
  }
553
551
  /**
@@ -564,7 +562,9 @@ class LLMJudgeAPI {
564
562
  if (params.offset)
565
563
  searchParams.set("offset", params.offset.toString());
566
564
  const query = searchParams.toString();
567
- const endpoint = query ? `/api/llm-judge/results?${query}` : "/api/llm-judge/results";
565
+ const endpoint = query
566
+ ? `/api/llm-judge/results?${query}`
567
+ : "/api/llm-judge/results";
568
568
  return this.client.request(endpoint);
569
569
  }
570
570
  /**
@@ -572,11 +572,7 @@ class LLMJudgeAPI {
572
572
  */
573
573
  async getAlignment(params) {
574
574
  const searchParams = new URLSearchParams();
575
- searchParams.set("configId", params.configId.toString());
576
- if (params.startDate)
577
- searchParams.set("startDate", params.startDate);
578
- if (params.endDate)
579
- searchParams.set("endDate", params.endDate);
575
+ searchParams.set("evaluationRunId", params.evaluationRunId.toString());
580
576
  const query = searchParams.toString();
581
577
  return this.client.request(`/api/llm-judge/alignment?${query}`);
582
578
  }
@@ -651,14 +647,18 @@ class AnnotationTasksAPI {
651
647
  if (params.offset)
652
648
  searchParams.set("offset", params.offset.toString());
653
649
  const query = searchParams.toString();
654
- const endpoint = query ? `/api/annotations/tasks?${query}` : "/api/annotations/tasks";
650
+ const endpoint = query
651
+ ? `/api/annotations/tasks?${query}`
652
+ : "/api/annotations/tasks";
655
653
  return this.client.request(endpoint);
656
654
  }
657
655
  /**
658
656
  * Get an annotation task
659
657
  */
660
658
  async get(taskId) {
661
- return this.client.request(`/api/annotations/tasks/${taskId}`);
659
+ return this.client
660
+ .request(`/api/annotations/tasks/${taskId}`)
661
+ .then((res) => res.task);
662
662
  }
663
663
  }
664
664
  /**
@@ -705,21 +705,34 @@ class DeveloperAPI {
705
705
  /**
706
706
  * Get usage statistics
707
707
  */
708
- async getUsage(params) {
708
+ async getUsage(params = {}) {
709
709
  const searchParams = new URLSearchParams();
710
- searchParams.set("organizationId", params.organizationId.toString());
711
- if (params.startDate)
712
- searchParams.set("startDate", params.startDate);
713
- if (params.endDate)
714
- searchParams.set("endDate", params.endDate);
710
+ if (params.period)
711
+ searchParams.set("period", params.period);
712
+ if (params.groupBy)
713
+ searchParams.set("groupBy", params.groupBy);
714
+ if (params.limit)
715
+ searchParams.set("limit", params.limit.toString());
716
+ if (params.offset)
717
+ searchParams.set("offset", params.offset.toString());
715
718
  const query = searchParams.toString();
716
- return this.client.request(`/api/developer/usage?${query}`);
719
+ const endpoint = query
720
+ ? `/api/developer/usage?${query}`
721
+ : "/api/developer/usage";
722
+ return this.client.request(endpoint);
717
723
  }
718
724
  /**
719
725
  * Get usage summary
720
726
  */
721
- async getUsageSummary(organizationId) {
722
- return this.client.request(`/api/developer/usage/summary?organizationId=${organizationId}`);
727
+ async getUsageSummary(params = {}) {
728
+ const searchParams = new URLSearchParams();
729
+ if (params.period)
730
+ searchParams.set("period", params.period);
731
+ const query = searchParams.toString();
732
+ const endpoint = query
733
+ ? `/api/developer/usage/summary?${query}`
734
+ : "/api/developer/usage/summary";
735
+ return this.client.request(endpoint);
723
736
  }
724
737
  }
725
738
  /**
@@ -750,7 +763,9 @@ class APIKeysAPI {
750
763
  if (params.offset)
751
764
  searchParams.set("offset", params.offset.toString());
752
765
  const query = searchParams.toString();
753
- const endpoint = query ? `/api/developer/api-keys?${query}` : "/api/developer/api-keys";
766
+ const endpoint = query
767
+ ? `/api/developer/api-keys?${query}`
768
+ : "/api/developer/api-keys";
754
769
  return this.client.request(endpoint);
755
770
  }
756
771
  /**
@@ -840,8 +855,8 @@ class WebhooksAPI {
840
855
  searchParams.set("limit", params.limit.toString());
841
856
  if (params.offset)
842
857
  searchParams.set("offset", params.offset.toString());
843
- if (params.success !== undefined)
844
- searchParams.set("success", params.success.toString());
858
+ if (params.status)
859
+ searchParams.set("status", params.status);
845
860
  const query = searchParams.toString();
846
861
  const endpoint = query
847
862
  ? `/api/developer/webhooks/${webhookId}/deliveries?${query}`
@@ -860,6 +875,8 @@ class OrganizationsAPI {
860
875
  * Get current organization
861
876
  */
862
877
  async getCurrent() {
863
- return this.client.request("/api/organizations/current");
878
+ return this.client
879
+ .request("/api/organizations/current")
880
+ .then((res) => res.organization);
864
881
  }
865
882
  }
@@ -1 +1 @@
1
- export {};
1
+ export {};