@pauly4010/evalai-sdk 1.9.0 → 1.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +136 -23
  2. package/dist/assertions.js +51 -18
  3. package/dist/batch.js +8 -2
  4. package/dist/cli/api.js +3 -1
  5. package/dist/cli/check.js +19 -6
  6. package/dist/cli/ci-context.js +3 -1
  7. package/dist/cli/config.js +28 -8
  8. package/dist/cli/diff.js +14 -9
  9. package/dist/cli/discover.js +18 -7
  10. package/dist/cli/doctor.js +43 -9
  11. package/dist/cli/explain.js +37 -11
  12. package/dist/cli/formatters/human.js +4 -1
  13. package/dist/cli/formatters/pr-comment.js +3 -1
  14. package/dist/cli/gate.js +6 -2
  15. package/dist/cli/impact-analysis.js +6 -5
  16. package/dist/cli/index.js +18 -6
  17. package/dist/cli/manifest.d.ts +3 -5
  18. package/dist/cli/manifest.js +21 -14
  19. package/dist/cli/migrate.js +4 -4
  20. package/dist/cli/policy-packs.js +8 -2
  21. package/dist/cli/print-config.js +19 -4
  22. package/dist/cli/regression-gate.js +8 -2
  23. package/dist/cli/report/build-check-report.js +8 -2
  24. package/dist/cli/run.js +11 -5
  25. package/dist/cli/share.js +3 -1
  26. package/dist/cli/upgrade.js +2 -1
  27. package/dist/client.d.ts +16 -19
  28. package/dist/client.js +60 -43
  29. package/dist/client.request.test.d.ts +1 -1
  30. package/dist/client.request.test.js +222 -147
  31. package/dist/context.js +3 -1
  32. package/dist/errors.js +11 -4
  33. package/dist/export.js +3 -1
  34. package/dist/index.d.ts +8 -8
  35. package/dist/index.js +19 -19
  36. package/dist/integrations/anthropic.d.ts +20 -1
  37. package/dist/integrations/openai-eval.js +4 -2
  38. package/dist/integrations/openai.d.ts +24 -1
  39. package/dist/local.js +3 -1
  40. package/dist/logger.js +6 -2
  41. package/dist/pagination.js +6 -2
  42. package/dist/runtime/adapters/config-to-dsl.js +12 -9
  43. package/dist/runtime/adapters/testsuite-to-dsl.d.ts +1 -1
  44. package/dist/runtime/adapters/testsuite-to-dsl.js +11 -6
  45. package/dist/runtime/eval.d.ts +1 -1
  46. package/dist/runtime/eval.js +12 -5
  47. package/dist/runtime/execution-mode.js +13 -9
  48. package/dist/runtime/registry.js +8 -21
  49. package/dist/runtime/run-report.d.ts +0 -2
  50. package/dist/runtime/run-report.js +12 -10
  51. package/dist/testing.js +7 -2
  52. package/dist/types.d.ts +100 -69
  53. package/dist/utils/input-hash.js +4 -1
  54. package/dist/version.d.ts +1 -1
  55. package/dist/version.js +1 -1
  56. package/dist/workflows.js +62 -14
  57. package/package.json +115 -111
@@ -123,7 +123,7 @@ async function getProjectMetadata(projectRoot) {
123
123
  hasPackageJson = true;
124
124
  projectName = parsed.name || "unknown";
125
125
  }
126
- catch (error) {
126
+ catch (_error) {
127
127
  // No package.json
128
128
  }
129
129
  const hasGit = await fs
@@ -173,9 +173,13 @@ function analyzeSpecFile(filePath, content) {
173
173
  content.includes("model=") ||
174
174
  content.includes("openai") ||
175
175
  content.includes("anthropic");
176
- const usesTools = content.includes("tool:") || content.includes("function.") || content.includes("call(");
176
+ const usesTools = content.includes("tool:") ||
177
+ content.includes("function.") ||
178
+ content.includes("call(");
177
179
  // Check for assertions
178
- const hasAssertions = content.includes("assert") || content.includes("expect") || content.includes("should");
180
+ const hasAssertions = content.includes("assert") ||
181
+ content.includes("expect") ||
182
+ content.includes("should");
179
183
  // Generate ID from file path
180
184
  const id = generateSpecId(filePath);
181
185
  return {
@@ -234,7 +238,9 @@ function analyzeComplexity(content) {
234
238
  const hasLoops = content.includes("for") || content.includes("while");
235
239
  const hasConditionals = content.includes("if") || content.includes("switch");
236
240
  const hasTryCatch = content.includes("try") || content.includes("catch");
237
- const hasExternalCalls = content.includes("fetch") || content.includes("http") || content.includes("api");
241
+ const hasExternalCalls = content.includes("fetch") ||
242
+ content.includes("http") ||
243
+ content.includes("api");
238
244
  let complexityScore = 0;
239
245
  if (lines > 50)
240
246
  complexityScore += 2;
@@ -261,7 +267,10 @@ function analyzeComplexity(content) {
261
267
  */
262
268
  function generateSpecId(filePath) {
263
269
  const relativePath = path.relative(process.cwd(), filePath);
264
- const hash = Buffer.from(relativePath).toString("base64").replace(/[+/=]/g, "").slice(0, 8);
270
+ const hash = Buffer.from(relativePath)
271
+ .toString("base64")
272
+ .replace(/[+/=]/g, "")
273
+ .slice(0, 8);
265
274
  return hash;
266
275
  }
267
276
  /**
@@ -381,10 +390,12 @@ function printRecommendations(stats) {
381
390
  else {
382
391
  console.log(` 🏆 Excellent coverage! Consider running evalai run`);
383
392
  }
384
- if (!stats.executionMode.hasSpecRuntime && !stats.executionMode.hasLegacyRuntime) {
393
+ if (!stats.executionMode.hasSpecRuntime &&
394
+ !stats.executionMode.hasLegacyRuntime) {
385
395
  console.log(` 🆕 New project? Try 'evalai init' to get started`);
386
396
  }
387
- if (stats.executionMode.hasLegacyRuntime && !stats.executionMode.hasSpecRuntime) {
397
+ if (stats.executionMode.hasLegacyRuntime &&
398
+ !stats.executionMode.hasSpecRuntime) {
388
399
  console.log(` 🔄 Legacy project detected. Try 'evalai migrate config' to upgrade`);
389
400
  }
390
401
  if (stats.executionMode.hasSpecRuntime) {
@@ -113,7 +113,15 @@ function parseFlags(argv) {
113
113
  evaluationId = String(merged.evaluationId);
114
114
  }
115
115
  const strict = raw.strict === "true" || raw.strict === "1";
116
- return { report, format: report ? "json" : fmt, strict, baseUrl, apiKey, evaluationId, baseline };
116
+ return {
117
+ report,
118
+ format: report ? "json" : fmt,
119
+ strict,
120
+ baseUrl,
121
+ apiKey,
122
+ evaluationId,
123
+ baseline,
124
+ };
117
125
  }
118
126
  // ── Individual checks ──
119
127
  function checkProject(cwd) {
@@ -224,7 +232,10 @@ function checkBaseline(cwd) {
224
232
  };
225
233
  }
226
234
  const schemaVersion = typeof data.schemaVersion === "number" ? data.schemaVersion : undefined;
227
- const hash = (0, node_crypto_1.createHash)("sha256").update(JSON.stringify(data)).digest("hex").slice(0, 12);
235
+ const hash = (0, node_crypto_1.createHash)("sha256")
236
+ .update(JSON.stringify(data))
237
+ .digest("hex")
238
+ .slice(0, 12);
228
239
  const updatedAt = typeof data.updatedAt === "string" ? data.updatedAt : undefined;
229
240
  // Staleness: warn if baseline older than 30 days
230
241
  let stale = false;
@@ -239,7 +250,12 @@ function checkBaseline(cwd) {
239
250
  status: "fail",
240
251
  message: `Unsupported baseline schemaVersion: ${schemaVersion ?? "missing"}`,
241
252
  remediation: "Run: npx evalai baseline init (creates schemaVersion 1)",
242
- baselineInfo: { path: "evals/baseline.json", exists: true, hash, schemaVersion },
253
+ baselineInfo: {
254
+ path: "evals/baseline.json",
255
+ exists: true,
256
+ hash,
257
+ schemaVersion,
258
+ },
243
259
  };
244
260
  }
245
261
  if (stale) {
@@ -249,7 +265,13 @@ function checkBaseline(cwd) {
249
265
  status: "warn",
250
266
  message: `Baseline is stale (last updated ${updatedAt})`,
251
267
  remediation: "Run: npx evalai baseline update",
252
- baselineInfo: { path: "evals/baseline.json", exists: true, hash, schemaVersion, stale },
268
+ baselineInfo: {
269
+ path: "evals/baseline.json",
270
+ exists: true,
271
+ hash,
272
+ schemaVersion,
273
+ stale,
274
+ },
253
275
  };
254
276
  }
255
277
  return {
@@ -257,7 +279,13 @@ function checkBaseline(cwd) {
257
279
  label: "Baseline file",
258
280
  status: "pass",
259
281
  message: `schemaVersion ${schemaVersion}, hash ${hash}`,
260
- baselineInfo: { path: "evals/baseline.json", exists: true, hash, schemaVersion, stale },
282
+ baselineInfo: {
283
+ path: "evals/baseline.json",
284
+ exists: true,
285
+ hash,
286
+ schemaVersion,
287
+ stale,
288
+ },
261
289
  };
262
290
  }
263
291
  function checkAuth(apiKey) {
@@ -437,7 +465,8 @@ function checkCiWiring(cwd) {
437
465
  ciInfo: { workflowPath, exists: true },
438
466
  };
439
467
  }
440
- if (!content.includes("evalai") && !content.includes("@pauly4010/evalai-sdk")) {
468
+ if (!content.includes("evalai") &&
469
+ !content.includes("@pauly4010/evalai-sdk")) {
441
470
  return {
442
471
  id: "ci_wiring",
443
472
  label: "CI wiring",
@@ -551,7 +580,9 @@ async function runDoctor(argv) {
551
580
  };
552
581
  }
553
582
  // 7. Eval access (async, depends on auth + connectivity)
554
- if (flags.apiKey && flags.evaluationId && connectivityResult.status !== "fail") {
583
+ if (flags.apiKey &&
584
+ flags.evaluationId &&
585
+ connectivityResult.status !== "fail") {
555
586
  try {
556
587
  const accessResult = await checkEvalAccess(flags.baseUrl, flags.apiKey, flags.evaluationId, flags.baseline);
557
588
  checks.push(accessResult);
@@ -592,7 +623,9 @@ async function runDoctor(argv) {
592
623
  if (flags.report || flags.format === "json") {
593
624
  const redactedConfig = {
594
625
  ...(configResult.config ?? {}),
595
- path: configResult.configPath ? path.relative(cwd, configResult.configPath) : null,
626
+ path: configResult.configPath
627
+ ? path.relative(cwd, configResult.configPath)
628
+ : null,
596
629
  };
597
630
  const bundle = {
598
631
  timestamp: new Date().toISOString(),
@@ -604,7 +637,8 @@ async function runDoctor(argv) {
604
637
  config: redactedConfig,
605
638
  baseline: baselineResult.baselineInfo,
606
639
  api: {
607
- reachable: connectivityResult.status === "pass" || connectivityResult.status === "warn",
640
+ reachable: connectivityResult.status === "pass" ||
641
+ connectivityResult.status === "warn",
608
642
  latencyMs: connectivityResult.latencyMs,
609
643
  },
610
644
  ci: ciResult.ciInfo,
@@ -87,7 +87,9 @@ const REPORT_SEARCH_PATHS = [
87
87
  ];
88
88
  function findReport(cwd, explicitPath) {
89
89
  if (explicitPath) {
90
- const abs = path.isAbsolute(explicitPath) ? explicitPath : path.join(cwd, explicitPath);
90
+ const abs = path.isAbsolute(explicitPath)
91
+ ? explicitPath
92
+ : path.join(cwd, explicitPath);
91
93
  return fs.existsSync(abs) ? abs : null;
92
94
  }
93
95
  for (const rel of REPORT_SEARCH_PATHS) {
@@ -115,16 +117,20 @@ function classifyRootCauses(report) {
115
117
  causes.push("cost_regression");
116
118
  }
117
119
  // Latency regression
118
- if (reasonCode === "LATENCY_BUDGET_EXCEEDED" || reasonCode === "LATENCY_RISK") {
120
+ if (reasonCode === "LATENCY_BUDGET_EXCEEDED" ||
121
+ reasonCode === "LATENCY_RISK") {
119
122
  causes.push("latency_regression");
120
123
  }
121
124
  // Coverage drop (test count decreased)
122
- if (reasonCode === "LOW_SAMPLE_SIZE" || reasonCode === "INSUFFICIENT_EVIDENCE") {
125
+ if (reasonCode === "LOW_SAMPLE_SIZE" ||
126
+ reasonCode === "INSUFFICIENT_EVIDENCE") {
123
127
  causes.push("coverage_drop");
124
128
  }
125
129
  // Analyze failed cases for drift patterns
126
130
  if (failedCases.length > 0) {
127
- const outputs = failedCases.map((fc) => (fc.output ?? "").toLowerCase()).filter(Boolean);
131
+ const outputs = failedCases
132
+ .map((fc) => (fc.output ?? "").toLowerCase())
133
+ .filter(Boolean);
128
134
  const expectedOutputs = failedCases
129
135
  .map((fc) => (fc.expectedOutput ?? "").toLowerCase())
130
136
  .filter(Boolean);
@@ -136,7 +142,9 @@ function classifyRootCauses(report) {
136
142
  causes.push("formatting_drift");
137
143
  }
138
144
  // Tool use drift: output mentions tool calls or function calls
139
- const hasToolIssue = outputs.some((o) => o.includes("tool_call") || o.includes("function_call") || o.includes("tool_use"));
145
+ const hasToolIssue = outputs.some((o) => o.includes("tool_call") ||
146
+ o.includes("function_call") ||
147
+ o.includes("tool_use"));
140
148
  if (hasToolIssue) {
141
149
  causes.push("tool_use_drift");
142
150
  }
@@ -356,7 +364,9 @@ function buildExplainOutput(report, reportPath) {
356
364
  function buildFromCheckReport(report, reportPath) {
357
365
  const failedCases = report.failedCases ?? [];
358
366
  // Top failures (up to 3)
359
- const topFailures = failedCases.slice(0, 3).map((fc, i) => ({
367
+ const topFailures = failedCases
368
+ .slice(0, 3)
369
+ .map((fc, i) => ({
360
370
  rank: i + 1,
361
371
  name: fc.name,
362
372
  input: fc.inputSnippet || fc.input,
@@ -444,7 +454,11 @@ function buildFromBuiltinReport(report, reportPath) {
444
454
  }
445
455
  // ── Output formatting ──
446
456
  function printHuman(output) {
447
- const verdictIcon = output.verdict === "pass" ? "\u2705" : output.verdict === "warn" ? "\u26A0\uFE0F" : "\u274C";
457
+ const verdictIcon = output.verdict === "pass"
458
+ ? "\u2705"
459
+ : output.verdict === "warn"
460
+ ? "\u26A0\uFE0F"
461
+ : "\u274C";
448
462
  console.log(`\n evalai explain\n`);
449
463
  console.log(` ${verdictIcon} Verdict: ${output.verdict.toUpperCase()}`);
450
464
  if (output.score != null) {
@@ -460,7 +474,11 @@ function printHuman(output) {
460
474
  if (output.changes.length > 0) {
461
475
  console.log("\n What changed:");
462
476
  for (const c of output.changes) {
463
- const arrow = c.direction === "worse" ? "\u2193" : c.direction === "better" ? "\u2191" : "\u2192";
477
+ const arrow = c.direction === "worse"
478
+ ? "\u2193"
479
+ : c.direction === "better"
480
+ ? "\u2191"
481
+ : "\u2192";
464
482
  console.log(` ${arrow} ${c.metric}: ${c.baseline} \u2192 ${c.current}`);
465
483
  }
466
484
  }
@@ -490,7 +508,11 @@ function printHuman(output) {
490
508
  if (output.suggestedFixes.length > 0) {
491
509
  console.log("\n Suggested fixes:");
492
510
  for (const fix of output.suggestedFixes) {
493
- const pIcon = fix.priority === "high" ? "\u203C\uFE0F" : fix.priority === "medium" ? "\u2757" : "\u2022";
511
+ const pIcon = fix.priority === "high"
512
+ ? "\u203C\uFE0F"
513
+ : fix.priority === "medium"
514
+ ? "\u2757"
515
+ : "\u2022";
494
516
  console.log(` ${pIcon} ${fix.action}`);
495
517
  console.log(` ${fix.detail}`);
496
518
  }
@@ -503,7 +525,9 @@ async function runExplain(argv) {
503
525
  const cwd = process.cwd();
504
526
  const reportPath = findReport(cwd, flags.reportPath);
505
527
  if (!reportPath) {
506
- const searched = flags.reportPath ? flags.reportPath : REPORT_SEARCH_PATHS.join(", ");
528
+ const searched = flags.reportPath
529
+ ? flags.reportPath
530
+ : REPORT_SEARCH_PATHS.join(", ");
507
531
  console.error(`\n \u274C No report found. Searched: ${searched}`);
508
532
  console.error(" Run a gate first:");
509
533
  console.error(" npx evalai gate --format json");
@@ -519,7 +543,9 @@ async function runExplain(argv) {
519
543
  return 1;
520
544
  }
521
545
  // Schema version compatibility check
522
- const reportSchema = typeof reportData.schemaVersion === "number" ? reportData.schemaVersion : undefined;
546
+ const reportSchema = typeof reportData.schemaVersion === "number"
547
+ ? reportData.schemaVersion
548
+ : undefined;
523
549
  if (reportSchema != null && reportSchema > types_1.CHECK_REPORT_SCHEMA_VERSION) {
524
550
  console.error(`\n \u26A0\uFE0F Report schema version ${reportSchema} is newer than this CLI supports (v${types_1.CHECK_REPORT_SCHEMA_VERSION}).`);
525
551
  console.error(" Update your SDK: npm install @pauly4010/evalai-sdk@latest\n");
@@ -43,7 +43,10 @@ function formatHuman(report) {
43
43
  lines.push("Next: View full report above, fix failing cases, or adjust gate with --minScore / --maxDrop / --warnDrop");
44
44
  }
45
45
  if (report.explain &&
46
- (report.breakdown01 || report.contribPts || report.flags?.length || report.policyEvidence)) {
46
+ (report.breakdown01 ||
47
+ report.contribPts ||
48
+ report.flags?.length ||
49
+ report.policyEvidence)) {
47
50
  lines.push("");
48
51
  lines.push("--- Explain ---");
49
52
  if (report.contribPts) {
@@ -34,7 +34,9 @@ function buildPrComment(report) {
34
34
  }
35
35
  }
36
36
  else {
37
- lines.push(passed ? "## ✅ EvalAI Regression Gate — PASSED" : "## 🚨 EvalAI Regression Gate — FAILED");
37
+ lines.push(passed
38
+ ? "## ✅ EvalAI Regression Gate — PASSED"
39
+ : "## 🚨 EvalAI Regression Gate — FAILED");
38
40
  }
39
41
  lines.push("");
40
42
  // Score + Delta (skip when gate not applied)
package/dist/cli/gate.js CHANGED
@@ -55,7 +55,9 @@ function evaluateGate(args, quality) {
55
55
  reasonMessage: `cost $${costUsd.toFixed(4)} exceeds maxCostUsd $${args.maxCostUsd.toFixed(4)}`,
56
56
  };
57
57
  }
58
- if (args.maxLatencyMs != null && avgLatencyMs != null && avgLatencyMs > args.maxLatencyMs) {
58
+ if (args.maxLatencyMs != null &&
59
+ avgLatencyMs != null &&
60
+ avgLatencyMs > args.maxLatencyMs) {
59
61
  return {
60
62
  exitCode: constants_1.EXIT.SCORE_BELOW,
61
63
  passed: false,
@@ -102,7 +104,9 @@ function evaluateGate(args, quality) {
102
104
  };
103
105
  }
104
106
  // warnDrop: soft warning band; maxDrop: hard fail
105
- if (args.maxDrop !== undefined && regressionDelta !== null && regressionDelta < -args.maxDrop) {
107
+ if (args.maxDrop !== undefined &&
108
+ regressionDelta !== null &&
109
+ regressionDelta < -args.maxDrop) {
106
110
  return {
107
111
  exitCode: constants_1.EXIT.REGRESSION,
108
112
  passed: false,
@@ -48,9 +48,9 @@ exports.analyzeImpact = analyzeImpact;
48
48
  exports.printHumanResults = printHumanResults;
49
49
  exports.printJsonResults = printJsonResults;
50
50
  exports.runImpactAnalysisCLI = runImpactAnalysisCLI;
51
+ const node_child_process_1 = require("node:child_process");
51
52
  const fs = __importStar(require("node:fs/promises"));
52
53
  const path = __importStar(require("node:path"));
53
- const node_child_process_1 = require("node:child_process");
54
54
  /**
55
55
  * Run impact analysis
56
56
  */
@@ -87,7 +87,7 @@ async function readManifest(projectRoot = process.cwd()) {
87
87
  const content = await fs.readFile(manifestPath, "utf-8");
88
88
  return JSON.parse(content);
89
89
  }
90
- catch (error) {
90
+ catch (_error) {
91
91
  return null;
92
92
  }
93
93
  }
@@ -138,7 +138,7 @@ function analyzeImpact(changedFiles, manifest) {
138
138
  if (!specsByFile.has(spec.filePath)) {
139
139
  specsByFile.set(spec.filePath, []);
140
140
  }
141
- specsByFile.get(spec.filePath).push(spec);
141
+ specsByFile.get(spec.filePath)?.push(spec);
142
142
  // By dependencies
143
143
  const deps = [
144
144
  ...spec.dependsOn.prompts,
@@ -150,7 +150,7 @@ function analyzeImpact(changedFiles, manifest) {
150
150
  if (!specsByDependency.has(dep)) {
151
151
  specsByDependency.set(dep, []);
152
152
  }
153
- specsByDependency.get(dep).push(spec);
153
+ specsByDependency.get(dep)?.push(spec);
154
154
  }
155
155
  }
156
156
  // Analyze each changed file
@@ -179,7 +179,8 @@ function analyzeImpact(changedFiles, manifest) {
179
179
  // Add all specs
180
180
  for (const spec of manifest.specs) {
181
181
  impactedSpecIds.add(spec.id);
182
- reasonBySpecId[spec.id] = `Unknown file changed: ${changedFile} (safe fallback)`;
182
+ reasonBySpecId[spec.id] =
183
+ `Unknown file changed: ${changedFile} (safe fallback)`;
183
184
  }
184
185
  break; // No need to continue analyzing
185
186
  }
package/dist/cli/index.js CHANGED
@@ -160,9 +160,13 @@ else if (subcommand === "impact-analysis") {
160
160
  const changedFilesIndex = args.indexOf("--changed-files");
161
161
  const formatIndex = args.indexOf("--format");
162
162
  const baseBranch = baseIndex !== -1 ? args[baseIndex + 1] : "main";
163
- const changedFiles = changedFilesIndex !== -1 ? args[changedFilesIndex + 1]?.split(",") : undefined;
163
+ const changedFiles = changedFilesIndex !== -1
164
+ ? args[changedFilesIndex + 1]?.split(",")
165
+ : undefined;
164
166
  const format = formatIndex !== -1 ? args[formatIndex + 1] : "human";
165
- (0, impact_analysis_1.runImpactAnalysisCLI)({ baseBranch, changedFiles, format }).catch((err) => {
167
+ (0, impact_analysis_1.runImpactAnalysisCLI)({ baseBranch, changedFiles, format })
168
+ .then(() => process.exit(0))
169
+ .catch((err) => {
166
170
  console.error(`EvalAI ERROR: ${err instanceof Error ? err.message : String(err)}`);
167
171
  process.exit(2);
168
172
  });
@@ -186,7 +190,9 @@ else if (subcommand === "run") {
186
190
  baseBranch,
187
191
  format,
188
192
  writeResults,
189
- }).catch((err) => {
193
+ })
194
+ .then(() => process.exit(0))
195
+ .catch((err) => {
190
196
  console.error(`EvalAI ERROR: ${err instanceof Error ? err.message : String(err)}`);
191
197
  process.exit(2);
192
198
  });
@@ -200,7 +206,9 @@ else if (subcommand === "diff") {
200
206
  const base = baseIndex !== -1 ? args[baseIndex + 1] : undefined;
201
207
  const head = headIndex !== -1 ? args[headIndex + 1] : undefined;
202
208
  const format = formatIndex !== -1 ? args[formatIndex + 1] : "human";
203
- (0, diff_1.runDiffCLI)({ base, head, format }).catch((err) => {
209
+ (0, diff_1.runDiffCLI)({ base, head, format })
210
+ .then(() => process.exit(0))
211
+ .catch((err) => {
204
212
  console.error(`EvalAI ERROR: ${err instanceof Error ? err.message : String(err)}`);
205
213
  process.exit(2);
206
214
  });
@@ -214,9 +222,13 @@ else if (subcommand === "ci") {
214
222
  const writeResultsIndex = args.indexOf("--write-results");
215
223
  const base = baseIndex !== -1 ? args[baseIndex + 1] : undefined;
216
224
  const impactedOnly = impactedOnlyIndex !== -1;
217
- const format = formatIndex !== -1 ? args[formatIndex + 1] : "human";
225
+ const format = formatIndex !== -1
226
+ ? args[formatIndex + 1]
227
+ : "human";
218
228
  const writeResults = writeResultsIndex !== -1;
219
- (0, ci_1.runCICLI)({ base, impactedOnly, format, writeResults }).catch((err) => {
229
+ (0, ci_1.runCICLI)({ base, impactedOnly, format, writeResults })
230
+ .then(() => process.exit(0))
231
+ .catch((err) => {
220
232
  console.error(`EvalAI ERROR: ${err instanceof Error ? err.message : String(err)}`);
221
233
  process.exit(2);
222
234
  });
@@ -6,16 +6,14 @@
6
6
  *
7
7
  * This is the compiler output that everything else consumes.
8
8
  */
9
- import type { SpecAnalysis } from "./discover";
10
9
  import type { ExecutionModeConfig } from "../runtime/execution-mode";
10
+ import { SDK_VERSION } from "../version";
11
+ import type { SpecAnalysis } from "./discover";
12
+ export { SDK_VERSION };
11
13
  /**
12
14
  * Manifest schema version
13
15
  */
14
16
  export declare const MANIFEST_SCHEMA_VERSION = 1;
15
- /**
16
- * SDK version from package.json
17
- */
18
- export declare const SDK_VERSION = "1.8.0";
19
17
  /**
20
18
  * Evaluation Manifest Schema
21
19
  */
@@ -41,22 +41,20 @@ var __importStar = (this && this.__importStar) || (function () {
41
41
  };
42
42
  })();
43
43
  Object.defineProperty(exports, "__esModule", { value: true });
44
- exports.SDK_VERSION = exports.MANIFEST_SCHEMA_VERSION = void 0;
44
+ exports.MANIFEST_SCHEMA_VERSION = exports.SDK_VERSION = void 0;
45
45
  exports.generateManifest = generateManifest;
46
46
  exports.writeManifest = writeManifest;
47
47
  exports.readManifest = readManifest;
48
48
  exports.readLock = readLock;
49
+ const crypto = __importStar(require("node:crypto"));
49
50
  const fs = __importStar(require("node:fs/promises"));
50
51
  const path = __importStar(require("node:path"));
51
- const crypto = __importStar(require("node:crypto"));
52
+ const version_1 = require("../version");
53
+ Object.defineProperty(exports, "SDK_VERSION", { enumerable: true, get: function () { return version_1.SDK_VERSION; } });
52
54
  /**
53
55
  * Manifest schema version
54
56
  */
55
57
  exports.MANIFEST_SCHEMA_VERSION = 1;
56
- /**
57
- * SDK version from package.json
58
- */
59
- exports.SDK_VERSION = "1.8.0";
60
58
  /**
61
59
  * Generate evaluation manifest from discovery results
62
60
  */
@@ -73,7 +71,7 @@ async function generateManifest(specs, projectRoot, projectName, executionMode)
73
71
  if (!specsByFile.has(normalizedPath)) {
74
72
  specsByFile.set(normalizedPath, []);
75
73
  }
76
- specsByFile.get(normalizedPath).push(spec);
74
+ specsByFile.get(normalizedPath)?.push(spec);
77
75
  }
78
76
  // Process each file
79
77
  for (const [filePath, fileSpecs] of specsByFile) {
@@ -100,7 +98,7 @@ async function generateManifest(specs, projectRoot, projectName, executionMode)
100
98
  },
101
99
  runtime: {
102
100
  mode: executionMode.mode,
103
- sdkVersion: exports.SDK_VERSION,
101
+ sdkVersion: version_1.SDK_VERSION,
104
102
  },
105
103
  specFiles,
106
104
  specs: processedSpecs,
@@ -163,7 +161,8 @@ function extractDependencies(content) {
163
161
  const dependsOnMatch = content.match(/dependsOn\s*:\s*({[^}]+})/s);
164
162
  if (dependsOnMatch) {
165
163
  try {
166
- const deps = eval(`(${dependsOnMatch[1]})`);
164
+ // Use JSON.parse instead of eval for safety
165
+ const deps = JSON.parse(dependsOnMatch[1]);
167
166
  return {
168
167
  prompts: deps.prompts || [],
169
168
  datasets: deps.datasets || [],
@@ -171,8 +170,14 @@ function extractDependencies(content) {
171
170
  code: deps.code || [],
172
171
  };
173
172
  }
174
- catch (error) {
175
- // Fall back to simple extraction
173
+ catch (_error) {
174
+ // If parsing fails, return empty dependencies
175
+ return {
176
+ prompts: [],
177
+ datasets: [],
178
+ tools: [],
179
+ code: [],
180
+ };
176
181
  }
177
182
  }
178
183
  // Simple extraction as fallback
@@ -184,8 +189,10 @@ function extractDependencies(content) {
184
189
  };
185
190
  for (const [type, pattern] of Object.entries(patterns)) {
186
191
  let match;
187
- while ((match = pattern.exec(content)) !== null) {
192
+ match = pattern.exec(content);
193
+ while (match !== null) {
188
194
  dependsOn[type].push(match[1]);
195
+ match = pattern.exec(content);
189
196
  }
190
197
  }
191
198
  return dependsOn;
@@ -256,7 +263,7 @@ async function readManifest(projectRoot) {
256
263
  const content = await fs.readFile(manifestPath, "utf-8");
257
264
  return JSON.parse(content);
258
265
  }
259
- catch (error) {
266
+ catch (_error) {
260
267
  return null;
261
268
  }
262
269
  }
@@ -269,7 +276,7 @@ async function readLock(projectRoot) {
269
276
  const content = await fs.readFile(lockPath, "utf-8");
270
277
  return JSON.parse(content);
271
278
  }
272
- catch (error) {
279
+ catch (_error) {
273
280
  return null;
274
281
  }
275
282
  }
@@ -43,9 +43,9 @@ exports.migrateConfig = migrateConfig;
43
43
  exports.createMigrateCommand = createMigrateCommand;
44
44
  exports.validateConfigFile = validateConfigFile;
45
45
  exports.previewMigration = previewMigration;
46
- const commander_1 = require("commander");
47
46
  const fs = __importStar(require("node:fs/promises"));
48
47
  const path = __importStar(require("node:path"));
48
+ const commander_1 = require("commander");
49
49
  const testsuite_to_dsl_1 = require("../runtime/adapters/testsuite-to-dsl");
50
50
  const testing_1 = require("../testing");
51
51
  /**
@@ -97,7 +97,7 @@ function extractTestSuitesFromConfig(config) {
97
97
  /**
98
98
  * Generate DSL file header
99
99
  */
100
- function generateFileHeader(config, options) {
100
+ function generateFileHeader(_config, options) {
101
101
  const timestamp = new Date().toISOString();
102
102
  const inputPath = path.resolve(options.input);
103
103
  const outputPath = path.resolve(options.output);
@@ -127,7 +127,7 @@ function generateFileHeader(config, options) {
127
127
  /**
128
128
  * Generate helper functions for the entire file
129
129
  */
130
- function generateGlobalHelpers(config, options) {
130
+ function generateGlobalHelpers(config, _options) {
131
131
  const helpers = [];
132
132
  // Add executor helper if config has executor
133
133
  if (config.executor) {
@@ -164,7 +164,7 @@ function generateGlobalHelpers(config, options) {
164
164
  ` * Legacy test evaluation function`,
165
165
  ` * TODO: Adapt based on your original test logic`,
166
166
  ` */`,
167
- `async function evaluateLegacyTest(input: string, expected?: string): Promise<any> {`,
167
+ `async function evaluateLegacyTest(input: string, expected?: string): Promise<unknown> {`,
168
168
  ` const output = await legacyExecutor(input);`,
169
169
  ` const passed = evaluateAssertions(output, expected);`,
170
170
  ` `,
@@ -22,7 +22,10 @@ exports.POLICY_PACKS = {
22
22
  1: {
23
23
  policyId: "SOC2",
24
24
  version: 1,
25
- thresholds: { requiredSafetyRate: 0.95, maxFlags: ["SAFETY_RISK", "LOW_PASS_RATE"] },
25
+ thresholds: {
26
+ requiredSafetyRate: 0.95,
27
+ maxFlags: ["SAFETY_RISK", "LOW_PASS_RATE"],
28
+ },
26
29
  rationale: "SOC2 trust criteria for security and availability.",
27
30
  checks: ["safety_rate", "flag_restrictions"],
28
31
  },
@@ -40,7 +43,10 @@ exports.POLICY_PACKS = {
40
43
  1: {
41
44
  policyId: "PCI_DSS",
42
45
  version: 1,
43
- thresholds: { requiredSafetyRate: 0.99, maxFlags: ["SAFETY_RISK", "LOW_PASS_RATE"] },
46
+ thresholds: {
47
+ requiredSafetyRate: 0.99,
48
+ maxFlags: ["SAFETY_RISK", "LOW_PASS_RATE"],
49
+ },
44
50
  rationale: "PCI DSS cardholder data security standards.",
45
51
  checks: ["safety_rate", "flag_restrictions"],
46
52
  },
@@ -135,13 +135,20 @@ function buildResolvedConfig(cwd, flags) {
135
135
  : "default";
136
136
  fields.push({
137
137
  key: "baseUrl",
138
- value: flags.baseUrl || envBaseUrl || fileConfig?.baseUrl || "http://localhost:3000",
138
+ value: flags.baseUrl ||
139
+ envBaseUrl ||
140
+ fileConfig?.baseUrl ||
141
+ "http://localhost:3000",
139
142
  source: baseUrlSource,
140
143
  });
141
144
  // apiKey (always redacted)
142
145
  const envApiKey = process.env.EVALAI_API_KEY;
143
146
  const rawApiKey = flags.apiKey || envApiKey || "";
144
- const apiKeySource = flags.apiKey ? "arg" : envApiKey ? "env" : "default";
147
+ const apiKeySource = flags.apiKey
148
+ ? "arg"
149
+ : envApiKey
150
+ ? "env"
151
+ : "default";
145
152
  fields.push({
146
153
  key: "apiKey",
147
154
  value: redact(rawApiKey) ?? "(not set)",
@@ -150,7 +157,11 @@ function buildResolvedConfig(cwd, flags) {
150
157
  });
151
158
  // profile
152
159
  const profileName = (flags.profile || fileConfig?.profile);
153
- const profileSource = flags.profile ? "arg" : fileConfig?.profile ? "file" : "default";
160
+ const profileSource = flags.profile
161
+ ? "arg"
162
+ : fileConfig?.profile
163
+ ? "file"
164
+ : "default";
154
165
  fields.push({
155
166
  key: "profile",
156
167
  value: profileName ?? null,
@@ -184,7 +195,11 @@ function buildResolvedConfig(cwd, flags) {
184
195
  });
185
196
  }
186
197
  // baseline
187
- const baselineSource = flags.baseline ? "arg" : fileConfig?.baseline ? "file" : "default";
198
+ const baselineSource = flags.baseline
199
+ ? "arg"
200
+ : fileConfig?.baseline
201
+ ? "file"
202
+ : "default";
188
203
  fields.push({
189
204
  key: "baseline",
190
205
  value: merged.baseline ?? "published",