@pauly4010/evalai-sdk 1.8.0 → 1.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/CHANGELOG.md +54 -0
  2. package/README.md +136 -23
  3. package/dist/assertions.js +51 -18
  4. package/dist/batch.js +8 -2
  5. package/dist/cli/api.js +3 -1
  6. package/dist/cli/check.js +19 -6
  7. package/dist/cli/ci-context.js +3 -1
  8. package/dist/cli/ci.d.ts +45 -0
  9. package/dist/cli/ci.js +192 -0
  10. package/dist/cli/config.js +28 -8
  11. package/dist/cli/diff.d.ts +173 -0
  12. package/dist/cli/diff.js +685 -0
  13. package/dist/cli/discover.d.ts +84 -0
  14. package/dist/cli/discover.js +419 -0
  15. package/dist/cli/doctor.js +62 -19
  16. package/dist/cli/env.d.ts +21 -0
  17. package/dist/cli/env.js +42 -0
  18. package/dist/cli/explain.js +168 -36
  19. package/dist/cli/formatters/human.js +4 -1
  20. package/dist/cli/formatters/pr-comment.js +3 -1
  21. package/dist/cli/gate.js +6 -2
  22. package/dist/cli/impact-analysis.d.ts +63 -0
  23. package/dist/cli/impact-analysis.js +252 -0
  24. package/dist/cli/index.js +185 -0
  25. package/dist/cli/manifest.d.ts +103 -0
  26. package/dist/cli/manifest.js +282 -0
  27. package/dist/cli/migrate.d.ts +41 -0
  28. package/dist/cli/migrate.js +349 -0
  29. package/dist/cli/policy-packs.js +8 -2
  30. package/dist/cli/print-config.js +33 -14
  31. package/dist/cli/regression-gate.js +8 -2
  32. package/dist/cli/report/build-check-report.js +8 -2
  33. package/dist/cli/run.d.ts +101 -0
  34. package/dist/cli/run.js +395 -0
  35. package/dist/cli/share.js +3 -1
  36. package/dist/cli/upgrade.js +2 -1
  37. package/dist/cli/workspace.d.ts +28 -0
  38. package/dist/cli/workspace.js +58 -0
  39. package/dist/client.d.ts +16 -19
  40. package/dist/client.js +60 -43
  41. package/dist/client.request.test.d.ts +1 -1
  42. package/dist/client.request.test.js +222 -147
  43. package/dist/context.js +3 -1
  44. package/dist/errors.js +11 -4
  45. package/dist/export.js +3 -1
  46. package/dist/index.d.ts +8 -2
  47. package/dist/index.js +30 -5
  48. package/dist/integrations/anthropic.d.ts +20 -1
  49. package/dist/integrations/openai-eval.js +4 -2
  50. package/dist/integrations/openai.d.ts +24 -1
  51. package/dist/local.js +3 -1
  52. package/dist/logger.js +6 -2
  53. package/dist/pagination.js +6 -2
  54. package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
  55. package/dist/runtime/adapters/config-to-dsl.js +394 -0
  56. package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
  57. package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
  58. package/dist/runtime/context.d.ts +26 -0
  59. package/dist/runtime/context.js +74 -0
  60. package/dist/runtime/eval.d.ts +46 -0
  61. package/dist/runtime/eval.js +244 -0
  62. package/dist/runtime/execution-mode.d.ts +80 -0
  63. package/dist/runtime/execution-mode.js +357 -0
  64. package/dist/runtime/executor.d.ts +16 -0
  65. package/dist/runtime/executor.js +152 -0
  66. package/dist/runtime/registry.d.ts +78 -0
  67. package/dist/runtime/registry.js +403 -0
  68. package/dist/runtime/run-report.d.ts +200 -0
  69. package/dist/runtime/run-report.js +222 -0
  70. package/dist/runtime/types.d.ts +356 -0
  71. package/dist/runtime/types.js +76 -0
  72. package/dist/testing.d.ts +65 -0
  73. package/dist/testing.js +49 -2
  74. package/dist/types.d.ts +100 -69
  75. package/dist/utils/input-hash.js +4 -1
  76. package/dist/version.d.ts +1 -1
  77. package/dist/version.js +1 -1
  78. package/dist/workflows.js +62 -14
  79. package/package.json +115 -110
@@ -0,0 +1,84 @@
1
+ /**
2
+ * TICKET 1 — evalai discover
3
+ *
4
+ * Your first "holy shit" moment feature
5
+ *
6
+ * Goal:
7
+ * npm install
8
+ * evalai discover
9
+ *
10
+ * Output:
11
+ * Found 42 behavioral specifications
12
+ * Safety: 12
13
+ * Accuracy: 18
14
+ * Agents: 7
15
+ * Tools: 5
16
+ *
17
+ * Why this matters:
18
+ * - makes EvalAI feel alive
19
+ * - proves DSL works
20
+ * - enables intelligence layer
21
+ *
22
+ * This becomes your entry point command.
23
+ */
24
+ /**
25
+ * Discovered specification statistics
26
+ */
27
+ export interface DiscoveryStats {
28
+ /** Total number of specifications found */
29
+ totalSpecs: number;
30
+ /** Specifications by category/tag */
31
+ categories: Record<string, number>;
32
+ /** Specifications by file */
33
+ files: Record<string, number>;
34
+ /** Execution mode information */
35
+ executionMode: {
36
+ mode: string;
37
+ hasSpecRuntime: boolean;
38
+ hasLegacyRuntime: boolean;
39
+ specFiles: string[];
40
+ legacyConfig?: string;
41
+ };
42
+ /** Project metadata */
43
+ project: {
44
+ root: string;
45
+ name: string;
46
+ hasPackageJson: boolean;
47
+ hasGit: boolean;
48
+ };
49
+ }
50
+ /**
51
+ * Specification analysis result
52
+ */
53
+ export interface SpecAnalysis {
54
+ /** Specification ID */
55
+ id: string;
56
+ /** Specification name */
57
+ name: string;
58
+ /** File path */
59
+ file: string;
60
+ /** Tags/categories */
61
+ tags: string[];
62
+ /** Has assertions */
63
+ hasAssertions: boolean;
64
+ /** Uses external models */
65
+ usesModels: boolean;
66
+ /** Uses tools */
67
+ usesTools: boolean;
68
+ /** Estimated complexity */
69
+ complexity: "simple" | "medium" | "complex";
70
+ }
71
+ /**
72
+ * Discover and analyze behavioral specifications in the current project
73
+ */
74
+ export declare function discoverSpecs(options?: {
75
+ manifest?: boolean;
76
+ }): Promise<DiscoveryStats>;
77
+ /**
78
+ * Print discovery results in a beautiful format
79
+ */
80
+ export declare function printDiscoveryResults(stats: DiscoveryStats): void;
81
+ /**
82
+ * Run discovery command
83
+ */
84
+ export declare function runDiscover(): Promise<void>;
@@ -0,0 +1,419 @@
1
+ "use strict";
2
+ /**
3
+ * TICKET 1 — evalai discover
4
+ *
5
+ * Your first "holy shit" moment feature
6
+ *
7
+ * Goal:
8
+ * npm install
9
+ * evalai discover
10
+ *
11
+ * Output:
12
+ * Found 42 behavioral specifications
13
+ * Safety: 12
14
+ * Accuracy: 18
15
+ * Agents: 7
16
+ * Tools: 5
17
+ *
18
+ * Why this matters:
19
+ * - makes EvalAI feel alive
20
+ * - proves DSL works
21
+ * - enables intelligence layer
22
+ *
23
+ * This becomes your entry point command.
24
+ */
25
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
26
+ if (k2 === undefined) k2 = k;
27
+ var desc = Object.getOwnPropertyDescriptor(m, k);
28
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
29
+ desc = { enumerable: true, get: function() { return m[k]; } };
30
+ }
31
+ Object.defineProperty(o, k2, desc);
32
+ }) : (function(o, m, k, k2) {
33
+ if (k2 === undefined) k2 = k;
34
+ o[k2] = m[k];
35
+ }));
36
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
37
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
38
+ }) : function(o, v) {
39
+ o["default"] = v;
40
+ });
41
+ var __importStar = (this && this.__importStar) || (function () {
42
+ var ownKeys = function(o) {
43
+ ownKeys = Object.getOwnPropertyNames || function (o) {
44
+ var ar = [];
45
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
46
+ return ar;
47
+ };
48
+ return ownKeys(o);
49
+ };
50
+ return function (mod) {
51
+ if (mod && mod.__esModule) return mod;
52
+ var result = {};
53
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
54
+ __setModuleDefault(result, mod);
55
+ return result;
56
+ };
57
+ })();
58
+ Object.defineProperty(exports, "__esModule", { value: true });
59
+ exports.discoverSpecs = discoverSpecs;
60
+ exports.printDiscoveryResults = printDiscoveryResults;
61
+ exports.runDiscover = runDiscover;
62
+ const fs = __importStar(require("node:fs/promises"));
63
+ const path = __importStar(require("node:path"));
64
+ const execution_mode_1 = require("../runtime/execution-mode");
65
+ const manifest_1 = require("./manifest");
66
+ /**
67
+ * Discover and analyze behavioral specifications in the current project
68
+ */
69
+ async function discoverSpecs(options = {}) {
70
+ try {
71
+ const projectRoot = process.cwd();
72
+ const executionMode = await (0, execution_mode_1.getExecutionMode)(projectRoot);
73
+ // Get project metadata
74
+ const project = await getProjectMetadata(projectRoot);
75
+ if (executionMode.specFiles.length === 0) {
76
+ console.log("\n✨ No behavioral specifications found.");
77
+ console.log("💡 Create files with defineEval() calls to get started.");
78
+ return {
79
+ totalSpecs: 0,
80
+ categories: {},
81
+ files: {},
82
+ executionMode: {
83
+ mode: executionMode.mode,
84
+ hasSpecRuntime: executionMode.hasSpecRuntime,
85
+ hasLegacyRuntime: executionMode.hasLegacyRuntime,
86
+ specFiles: executionMode.specFiles,
87
+ legacyConfig: executionMode.legacyConfig,
88
+ },
89
+ project,
90
+ };
91
+ }
92
+ // Analyze specifications
93
+ const specs = await analyzeSpecifications(executionMode.specFiles);
94
+ // Generate manifest if requested
95
+ if (options.manifest) {
96
+ console.log("🔧 Generating evaluation manifest...");
97
+ const manifest = await (0, manifest_1.generateManifest)(specs, projectRoot, project.name, executionMode);
98
+ await (0, manifest_1.writeManifest)(manifest, projectRoot);
99
+ console.log(`✅ Manifest written to .evalai/manifest.json`);
100
+ console.log(`✅ Lock file written to .evalai/manifest.lock.json`);
101
+ }
102
+ // Calculate statistics
103
+ const stats = calculateStats(specs, executionMode, project);
104
+ printDiscoveryResults(stats);
105
+ return stats;
106
+ }
107
+ catch (error) {
108
+ console.error("❌ Discovery failed:", error instanceof Error ? error.message : String(error));
109
+ throw error;
110
+ }
111
+ }
112
+ /**
113
+ * Get project metadata
114
+ */
115
+ async function getProjectMetadata(projectRoot) {
116
+ const packageJsonPath = path.join(projectRoot, "package.json");
117
+ const gitPath = path.join(projectRoot, ".git");
118
+ let hasPackageJson = false;
119
+ let projectName = "unknown";
120
+ try {
121
+ const packageJson = await fs.readFile(packageJsonPath, "utf-8");
122
+ const parsed = JSON.parse(packageJson);
123
+ hasPackageJson = true;
124
+ projectName = parsed.name || "unknown";
125
+ }
126
+ catch (_error) {
127
+ // No package.json
128
+ }
129
+ const hasGit = await fs
130
+ .access(gitPath)
131
+ .then(() => true)
132
+ .catch(() => false);
133
+ return {
134
+ root: projectRoot,
135
+ name: projectName,
136
+ hasPackageJson,
137
+ hasGit,
138
+ };
139
+ }
140
+ /**
141
+ * Analyze specification files
142
+ */
143
+ async function analyzeSpecifications(specFiles) {
144
+ const specs = [];
145
+ for (const filePath of specFiles) {
146
+ try {
147
+ const content = await fs.readFile(filePath, "utf-8");
148
+ const analysis = analyzeSpecFile(filePath, content);
149
+ specs.push(analysis);
150
+ }
151
+ catch (error) {
152
+ console.warn(`Warning: Could not analyze ${filePath}: ${error instanceof Error ? error.message : String(error)}`);
153
+ }
154
+ }
155
+ return specs;
156
+ }
157
+ /**
158
+ * Analyze a single specification file
159
+ */
160
+ function analyzeSpecFile(filePath, content) {
161
+ // Extract defineEval calls
162
+ const defineEvalMatches = content.match(/defineEval\s*\([^)]+\)/g) || [];
163
+ const specNames = defineEvalMatches.map((match) => {
164
+ const nameMatch = match.match(/["'`](.+?)["'`](?:\s*,|\s*\))/);
165
+ return nameMatch ? nameMatch[1] : "unnamed";
166
+ });
167
+ // Extract tags
168
+ const tags = extractTags(content);
169
+ // Analyze complexity
170
+ const complexity = analyzeComplexity(content);
171
+ // Check for models and tools
172
+ const usesModels = content.includes("model:") ||
173
+ content.includes("model=") ||
174
+ content.includes("openai") ||
175
+ content.includes("anthropic");
176
+ const usesTools = content.includes("tool:") ||
177
+ content.includes("function.") ||
178
+ content.includes("call(");
179
+ // Check for assertions
180
+ const hasAssertions = content.includes("assert") ||
181
+ content.includes("expect") ||
182
+ content.includes("should");
183
+ // Generate ID from file path
184
+ const id = generateSpecId(filePath);
185
+ return {
186
+ id,
187
+ name: specNames[0] || path.basename(filePath, ".ts"),
188
+ file: path.relative(process.cwd(), filePath),
189
+ tags,
190
+ hasAssertions,
191
+ usesModels,
192
+ usesTools,
193
+ complexity,
194
+ };
195
+ }
196
+ /**
197
+ * Extract tags from specification content
198
+ */
199
+ function extractTags(content) {
200
+ const tags = [];
201
+ // Extract tags parameter
202
+ const tagsMatch = content.match(/tags\s*:\s*\[([^\]]+)\]/);
203
+ if (tagsMatch) {
204
+ const tagContent = tagsMatch[1];
205
+ const tagStrings = tagContent.match(/["'`](.+?)["'`](?:\s*,|\s*)/g) || [];
206
+ tags.push(...tagStrings.map((tag) => tag.replace(/["'`](.+?)["'`](?:\s*,|\s*)/, "$1")));
207
+ }
208
+ // Extract from description and metadata
209
+ const descriptionMatch = content.match(/description\s*:\s*["'`](.+?)["'`](?:\s*,|\s*)/);
210
+ if (descriptionMatch) {
211
+ const description = descriptionMatch[1].toLowerCase();
212
+ // Auto-categorize based on description
213
+ if (description.includes("safety") || description.includes("security"))
214
+ tags.push("safety");
215
+ if (description.includes("accuracy") || description.includes("precision"))
216
+ tags.push("accuracy");
217
+ if (description.includes("agent") || description.includes("autonomous"))
218
+ tags.push("agents");
219
+ if (description.includes("tool") || description.includes("function"))
220
+ tags.push("tools");
221
+ if (description.includes("latency") || description.includes("speed"))
222
+ tags.push("performance");
223
+ if (description.includes("hallucination") || description.includes("fact"))
224
+ tags.push("factual");
225
+ if (description.includes("bias") || description.includes("fairness"))
226
+ tags.push("bias");
227
+ if (description.includes("privacy") || description.includes("pii"))
228
+ tags.push("privacy");
229
+ }
230
+ return [...new Set(tags)]; // Remove duplicates
231
+ }
232
+ /**
233
+ * Analyze specification complexity
234
+ */
235
+ function analyzeComplexity(content) {
236
+ const lines = content.split("\n").length;
237
+ const hasAsync = content.includes("async") || content.includes("await");
238
+ const hasLoops = content.includes("for") || content.includes("while");
239
+ const hasConditionals = content.includes("if") || content.includes("switch");
240
+ const hasTryCatch = content.includes("try") || content.includes("catch");
241
+ const hasExternalCalls = content.includes("fetch") ||
242
+ content.includes("http") ||
243
+ content.includes("api");
244
+ let complexityScore = 0;
245
+ if (lines > 50)
246
+ complexityScore += 2;
247
+ if (lines > 100)
248
+ complexityScore += 3;
249
+ if (hasAsync)
250
+ complexityScore += 2;
251
+ if (hasLoops)
252
+ complexityScore += 1;
253
+ if (hasConditionals)
254
+ complexityScore += 1;
255
+ if (hasTryCatch)
256
+ complexityScore += 1;
257
+ if (hasExternalCalls)
258
+ complexityScore += 2;
259
+ if (complexityScore <= 2)
260
+ return "simple";
261
+ if (complexityScore <= 5)
262
+ return "medium";
263
+ return "complex";
264
+ }
265
+ /**
266
+ * Generate specification ID from file path
267
+ */
268
+ function generateSpecId(filePath) {
269
+ const relativePath = path.relative(process.cwd(), filePath);
270
+ const hash = Buffer.from(relativePath)
271
+ .toString("base64")
272
+ .replace(/[+/=]/g, "")
273
+ .slice(0, 8);
274
+ return hash;
275
+ }
276
+ /**
277
+ * Calculate discovery statistics
278
+ */
279
+ function calculateStats(specs, executionMode, project) {
280
+ const categories = {};
281
+ const files = {};
282
+ // Count by categories
283
+ for (const spec of specs) {
284
+ for (const tag of spec.tags) {
285
+ categories[tag] = (categories[tag] || 0) + 1;
286
+ }
287
+ // Count by files
288
+ files[spec.file] = (files[spec.file] || 0) + 1;
289
+ }
290
+ // Add default categories if none found
291
+ if (Object.keys(categories).length === 0) {
292
+ categories.general = specs.length;
293
+ }
294
+ return {
295
+ totalSpecs: specs.length,
296
+ categories,
297
+ files,
298
+ executionMode: {
299
+ mode: executionMode.mode,
300
+ hasSpecRuntime: executionMode.hasSpecRuntime,
301
+ hasLegacyRuntime: executionMode.hasLegacyRuntime,
302
+ specFiles: executionMode.specFiles,
303
+ legacyConfig: executionMode.legacyConfig,
304
+ },
305
+ project,
306
+ };
307
+ }
308
+ /**
309
+ * Print discovery results in a beautiful format
310
+ */
311
+ function printDiscoveryResults(stats) {
312
+ console.log(`🔍 EvalAI Discovery Results`);
313
+ console.log(``);
314
+ console.log(`📊 Found ${stats.totalSpecs} behavioral specifications`);
315
+ console.log(``);
316
+ // Print categories
317
+ if (Object.keys(stats.categories).length > 0) {
318
+ console.log(`📋 Categories:`);
319
+ const sortedCategories = Object.entries(stats.categories)
320
+ .sort(([, a], [, b]) => b - a)
321
+ .slice(0, 10); // Top 10 categories
322
+ for (const [category, count] of sortedCategories) {
323
+ const icon = getCategoryIcon(category);
324
+ console.log(` ${icon} ${category}: ${count}`);
325
+ }
326
+ console.log(``);
327
+ }
328
+ // Print execution mode
329
+ console.log(`⚙️ Execution Mode: ${stats.executionMode.mode.toUpperCase()}`);
330
+ if (stats.executionMode.hasSpecRuntime) {
331
+ console.log(` ✅ Spec runtime: ${stats.executionMode.specFiles.length} files`);
332
+ }
333
+ if (stats.executionMode.hasLegacyRuntime) {
334
+ console.log(` ✅ Legacy runtime: ${stats.executionMode.legacyConfig ? path.basename(stats.executionMode.legacyConfig) : "config"}`);
335
+ }
336
+ console.log(``);
337
+ // Print project info
338
+ console.log(`📁 Project: ${stats.project.name}`);
339
+ console.log(` 📍 Root: ${stats.project.root}`);
340
+ console.log(` 📦 Package.json: ${stats.project.hasPackageJson ? "✅" : "❌"}`);
341
+ console.log(` 🔄 Git: ${stats.project.hasGit ? "✅" : "❌"}`);
342
+ console.log(``);
343
+ // Print recommendations
344
+ printRecommendations(stats);
345
+ }
346
+ /**
347
+ * Get icon for category
348
+ */
349
+ function getCategoryIcon(category) {
350
+ const icons = {
351
+ safety: "🛡️",
352
+ security: "🔒",
353
+ accuracy: "🎯",
354
+ precision: "🎯",
355
+ agents: "🤖",
356
+ autonomous: "🤖",
357
+ tools: "🔧",
358
+ functions: "🔧",
359
+ performance: "⚡",
360
+ latency: "⚡",
361
+ speed: "⚡",
362
+ factual: "📊",
363
+ hallucination: "📊",
364
+ bias: "⚖️",
365
+ fairness: "⚖️",
366
+ privacy: "🔐",
367
+ pii: "🔐",
368
+ general: "📝",
369
+ };
370
+ return icons[category.toLowerCase()] || "📝";
371
+ }
372
+ /**
373
+ * Print recommendations based on discovery results
374
+ */
375
+ function printRecommendations(stats) {
376
+ console.log(`💡 Recommendations:`);
377
+ if (stats.totalSpecs === 0) {
378
+ console.log(` 🚀 No specifications found. Create your first eval with:
379
+ echo 'import { defineEval } from "@pauly4010/evalai-sdk";
380
+ defineEval("hello-world", async (context) => {
381
+ return { pass: true, score: 100 };
382
+ });' > eval/hello.spec.ts`);
383
+ }
384
+ else if (stats.totalSpecs < 5) {
385
+ console.log(` 📈 Add more specifications to improve coverage`);
386
+ }
387
+ else if (stats.totalSpecs < 20) {
388
+ console.log(` 🎯 Good start! Consider organizing by categories`);
389
+ }
390
+ else {
391
+ console.log(` 🏆 Excellent coverage! Consider running evalai run`);
392
+ }
393
+ if (!stats.executionMode.hasSpecRuntime &&
394
+ !stats.executionMode.hasLegacyRuntime) {
395
+ console.log(` 🆕 New project? Try 'evalai init' to get started`);
396
+ }
397
+ if (stats.executionMode.hasLegacyRuntime &&
398
+ !stats.executionMode.hasSpecRuntime) {
399
+ console.log(` 🔄 Legacy project detected. Try 'evalai migrate config' to upgrade`);
400
+ }
401
+ if (stats.executionMode.hasSpecRuntime) {
402
+ console.log(` 🚀 Ready to run! Use 'evalai run' to execute specifications`);
403
+ }
404
+ console.log(``);
405
+ }
406
+ /**
407
+ * Run discovery command
408
+ */
409
+ async function runDiscover() {
410
+ try {
411
+ const stats = await discoverSpecs();
412
+ printDiscoveryResults(stats);
413
+ process.exit(0);
414
+ }
415
+ catch (error) {
416
+ console.error(`❌ Discovery failed: ${error instanceof Error ? error.message : String(error)}`);
417
+ process.exit(1);
418
+ }
419
+ }
@@ -96,8 +96,10 @@ function parseFlags(argv) {
96
96
  const baseUrl = raw.baseUrl || process.env.EVALAI_BASE_URL || "http://localhost:3000";
97
97
  const apiKey = raw.apiKey || process.env.EVALAI_API_KEY || "";
98
98
  let evaluationId = raw.evaluationId || "";
99
- const baseline = (raw.baseline === "previous" ? "previous"
100
- : raw.baseline === "production" ? "production"
99
+ const baseline = (raw.baseline === "previous"
100
+ ? "previous"
101
+ : raw.baseline === "production"
102
+ ? "production"
101
103
  : "published");
102
104
  // Try to fill evaluationId from config
103
105
  if (!evaluationId) {
@@ -111,7 +113,15 @@ function parseFlags(argv) {
111
113
  evaluationId = String(merged.evaluationId);
112
114
  }
113
115
  const strict = raw.strict === "true" || raw.strict === "1";
114
- return { report, format: report ? "json" : fmt, strict, baseUrl, apiKey, evaluationId, baseline };
116
+ return {
117
+ report,
118
+ format: report ? "json" : fmt,
119
+ strict,
120
+ baseUrl,
121
+ apiKey,
122
+ evaluationId,
123
+ baseline,
124
+ };
115
125
  }
116
126
  // ── Individual checks ──
117
127
  function checkProject(cwd) {
@@ -222,7 +232,10 @@ function checkBaseline(cwd) {
222
232
  };
223
233
  }
224
234
  const schemaVersion = typeof data.schemaVersion === "number" ? data.schemaVersion : undefined;
225
- const hash = (0, node_crypto_1.createHash)("sha256").update(JSON.stringify(data)).digest("hex").slice(0, 12);
235
+ const hash = (0, node_crypto_1.createHash)("sha256")
236
+ .update(JSON.stringify(data))
237
+ .digest("hex")
238
+ .slice(0, 12);
226
239
  const updatedAt = typeof data.updatedAt === "string" ? data.updatedAt : undefined;
227
240
  // Staleness: warn if baseline older than 30 days
228
241
  let stale = false;
@@ -237,7 +250,12 @@ function checkBaseline(cwd) {
237
250
  status: "fail",
238
251
  message: `Unsupported baseline schemaVersion: ${schemaVersion ?? "missing"}`,
239
252
  remediation: "Run: npx evalai baseline init (creates schemaVersion 1)",
240
- baselineInfo: { path: "evals/baseline.json", exists: true, hash, schemaVersion },
253
+ baselineInfo: {
254
+ path: "evals/baseline.json",
255
+ exists: true,
256
+ hash,
257
+ schemaVersion,
258
+ },
241
259
  };
242
260
  }
243
261
  if (stale) {
@@ -247,7 +265,13 @@ function checkBaseline(cwd) {
247
265
  status: "warn",
248
266
  message: `Baseline is stale (last updated ${updatedAt})`,
249
267
  remediation: "Run: npx evalai baseline update",
250
- baselineInfo: { path: "evals/baseline.json", exists: true, hash, schemaVersion, stale },
268
+ baselineInfo: {
269
+ path: "evals/baseline.json",
270
+ exists: true,
271
+ hash,
272
+ schemaVersion,
273
+ stale,
274
+ },
251
275
  };
252
276
  }
253
277
  return {
@@ -255,7 +279,13 @@ function checkBaseline(cwd) {
255
279
  label: "Baseline file",
256
280
  status: "pass",
257
281
  message: `schemaVersion ${schemaVersion}, hash ${hash}`,
258
- baselineInfo: { path: "evals/baseline.json", exists: true, hash, schemaVersion, stale },
282
+ baselineInfo: {
283
+ path: "evals/baseline.json",
284
+ exists: true,
285
+ hash,
286
+ schemaVersion,
287
+ stale,
288
+ },
259
289
  };
260
290
  }
261
291
  function checkAuth(apiKey) {
@@ -269,9 +299,7 @@ function checkAuth(apiKey) {
269
299
  };
270
300
  }
271
301
  // Redact key for display
272
- const redacted = apiKey.length > 8
273
- ? `${apiKey.slice(0, 4)}...${apiKey.slice(-4)}`
274
- : "****";
302
+ const redacted = apiKey.length > 8 ? `${apiKey.slice(0, 4)}...${apiKey.slice(-4)}` : "****";
275
303
  return {
276
304
  id: "auth",
277
305
  label: "Authentication",
@@ -437,7 +465,8 @@ function checkCiWiring(cwd) {
437
465
  ciInfo: { workflowPath, exists: true },
438
466
  };
439
467
  }
440
- if (!content.includes("evalai") && !content.includes("@pauly4010/evalai-sdk")) {
468
+ if (!content.includes("evalai") &&
469
+ !content.includes("@pauly4010/evalai-sdk")) {
441
470
  return {
442
471
  id: "ci_wiring",
443
472
  label: "CI wiring",
@@ -480,10 +509,14 @@ function checkProviderEnv() {
480
509
  // ── Output formatting ──
481
510
  function icon(status) {
482
511
  switch (status) {
483
- case "pass": return "\u2705"; // ✅
484
- case "fail": return "\u274C"; //
485
- case "warn": return "\u26A0\uFE0F"; // ⚠️
486
- case "skip": return "\u23ED\uFE0F"; // ⏭️
512
+ case "pass":
513
+ return "\u2705"; //
514
+ case "fail":
515
+ return "\u274C"; //
516
+ case "warn":
517
+ return "\u26A0\uFE0F"; // ⚠️
518
+ case "skip":
519
+ return "\u23ED\uFE0F"; // ⏭️
487
520
  }
488
521
  }
489
522
  function printHuman(checks, overall) {
@@ -539,10 +572,17 @@ async function runDoctor(argv) {
539
572
  message: "Infrastructure error during connectivity check",
540
573
  });
541
574
  infraError = true;
542
- connectivityResult = { id: "connectivity", label: "API connectivity", status: "fail", message: "" };
575
+ connectivityResult = {
576
+ id: "connectivity",
577
+ label: "API connectivity",
578
+ status: "fail",
579
+ message: "",
580
+ };
543
581
  }
544
582
  // 7. Eval access (async, depends on auth + connectivity)
545
- if (flags.apiKey && flags.evaluationId && connectivityResult.status !== "fail") {
583
+ if (flags.apiKey &&
584
+ flags.evaluationId &&
585
+ connectivityResult.status !== "fail") {
546
586
  try {
547
587
  const accessResult = await checkEvalAccess(flags.baseUrl, flags.apiKey, flags.evaluationId, flags.baseline);
548
588
  checks.push(accessResult);
@@ -583,7 +623,9 @@ async function runDoctor(argv) {
583
623
  if (flags.report || flags.format === "json") {
584
624
  const redactedConfig = {
585
625
  ...(configResult.config ?? {}),
586
- path: configResult.configPath ? path.relative(cwd, configResult.configPath) : null,
626
+ path: configResult.configPath
627
+ ? path.relative(cwd, configResult.configPath)
628
+ : null,
587
629
  };
588
630
  const bundle = {
589
631
  timestamp: new Date().toISOString(),
@@ -595,7 +637,8 @@ async function runDoctor(argv) {
595
637
  config: redactedConfig,
596
638
  baseline: baselineResult.baselineInfo,
597
639
  api: {
598
- reachable: connectivityResult.status === "pass" || connectivityResult.status === "warn",
640
+ reachable: connectivityResult.status === "pass" ||
641
+ connectivityResult.status === "warn",
599
642
  latencyMs: connectivityResult.latencyMs,
600
643
  },
601
644
  ci: ciResult.ciInfo,
@@ -0,0 +1,21 @@
1
+ /**
2
+ * CORE-401: Centralized environment detection
3
+ *
4
+ * Provides unified environment detection for all EvalAI CLI commands
5
+ */
6
+ /**
7
+ * Check if running in CI environment
8
+ */
9
+ export declare function isCI(): boolean;
10
+ /**
11
+ * Check if running in GitHub Actions
12
+ */
13
+ export declare function isGitHubActions(): boolean;
14
+ /**
15
+ * Get GitHub Step Summary path if available
16
+ */
17
+ export declare function getGitHubStepSummaryPath(): string | undefined;
18
+ /**
19
+ * Check if string looks like a git reference
20
+ */
21
+ export declare function isGitRef(ref: string): boolean;