@pauly4010/evalai-sdk 1.8.0 → 1.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/CHANGELOG.md +54 -0
  2. package/README.md +136 -23
  3. package/dist/assertions.js +51 -18
  4. package/dist/batch.js +8 -2
  5. package/dist/cli/api.js +3 -1
  6. package/dist/cli/check.js +19 -6
  7. package/dist/cli/ci-context.js +3 -1
  8. package/dist/cli/ci.d.ts +45 -0
  9. package/dist/cli/ci.js +192 -0
  10. package/dist/cli/config.js +28 -8
  11. package/dist/cli/diff.d.ts +173 -0
  12. package/dist/cli/diff.js +685 -0
  13. package/dist/cli/discover.d.ts +84 -0
  14. package/dist/cli/discover.js +419 -0
  15. package/dist/cli/doctor.js +62 -19
  16. package/dist/cli/env.d.ts +21 -0
  17. package/dist/cli/env.js +42 -0
  18. package/dist/cli/explain.js +168 -36
  19. package/dist/cli/formatters/human.js +4 -1
  20. package/dist/cli/formatters/pr-comment.js +3 -1
  21. package/dist/cli/gate.js +6 -2
  22. package/dist/cli/impact-analysis.d.ts +63 -0
  23. package/dist/cli/impact-analysis.js +252 -0
  24. package/dist/cli/index.js +185 -0
  25. package/dist/cli/manifest.d.ts +103 -0
  26. package/dist/cli/manifest.js +282 -0
  27. package/dist/cli/migrate.d.ts +41 -0
  28. package/dist/cli/migrate.js +349 -0
  29. package/dist/cli/policy-packs.js +8 -2
  30. package/dist/cli/print-config.js +33 -14
  31. package/dist/cli/regression-gate.js +8 -2
  32. package/dist/cli/report/build-check-report.js +8 -2
  33. package/dist/cli/run.d.ts +101 -0
  34. package/dist/cli/run.js +395 -0
  35. package/dist/cli/share.js +3 -1
  36. package/dist/cli/upgrade.js +2 -1
  37. package/dist/cli/workspace.d.ts +28 -0
  38. package/dist/cli/workspace.js +58 -0
  39. package/dist/client.d.ts +16 -19
  40. package/dist/client.js +60 -43
  41. package/dist/client.request.test.d.ts +1 -1
  42. package/dist/client.request.test.js +222 -147
  43. package/dist/context.js +3 -1
  44. package/dist/errors.js +11 -4
  45. package/dist/export.js +3 -1
  46. package/dist/index.d.ts +8 -2
  47. package/dist/index.js +30 -5
  48. package/dist/integrations/anthropic.d.ts +20 -1
  49. package/dist/integrations/openai-eval.js +4 -2
  50. package/dist/integrations/openai.d.ts +24 -1
  51. package/dist/local.js +3 -1
  52. package/dist/logger.js +6 -2
  53. package/dist/pagination.js +6 -2
  54. package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
  55. package/dist/runtime/adapters/config-to-dsl.js +394 -0
  56. package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
  57. package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
  58. package/dist/runtime/context.d.ts +26 -0
  59. package/dist/runtime/context.js +74 -0
  60. package/dist/runtime/eval.d.ts +46 -0
  61. package/dist/runtime/eval.js +244 -0
  62. package/dist/runtime/execution-mode.d.ts +80 -0
  63. package/dist/runtime/execution-mode.js +357 -0
  64. package/dist/runtime/executor.d.ts +16 -0
  65. package/dist/runtime/executor.js +152 -0
  66. package/dist/runtime/registry.d.ts +78 -0
  67. package/dist/runtime/registry.js +403 -0
  68. package/dist/runtime/run-report.d.ts +200 -0
  69. package/dist/runtime/run-report.js +222 -0
  70. package/dist/runtime/types.d.ts +356 -0
  71. package/dist/runtime/types.js +76 -0
  72. package/dist/testing.d.ts +65 -0
  73. package/dist/testing.js +49 -2
  74. package/dist/types.d.ts +100 -69
  75. package/dist/utils/input-hash.js +4 -1
  76. package/dist/version.d.ts +1 -1
  77. package/dist/version.js +1 -1
  78. package/dist/workflows.js +62 -14
  79. package/package.json +115 -110
@@ -0,0 +1,252 @@
1
+ "use strict";
2
+ /**
3
+ * TICKET 3 — Impact Analysis CLI Command (v0)
4
+ *
5
+ * Goal: Modal-like perceived speed via incremental intelligence
6
+ *
7
+ * Algorithm v0 (practical, shippable):
8
+ * - Inputs: manifest.json + git diff --name-only base...HEAD
9
+ * - Rules: Direct file mapping, dependency tracking, safe fallback
10
+ * - Output: Human-readable counts + JSON for automation
11
+ */
12
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
13
+ if (k2 === undefined) k2 = k;
14
+ var desc = Object.getOwnPropertyDescriptor(m, k);
15
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
16
+ desc = { enumerable: true, get: function() { return m[k]; } };
17
+ }
18
+ Object.defineProperty(o, k2, desc);
19
+ }) : (function(o, m, k, k2) {
20
+ if (k2 === undefined) k2 = k;
21
+ o[k2] = m[k];
22
+ }));
23
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
24
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
25
+ }) : function(o, v) {
26
+ o["default"] = v;
27
+ });
28
+ var __importStar = (this && this.__importStar) || (function () {
29
+ var ownKeys = function(o) {
30
+ ownKeys = Object.getOwnPropertyNames || function (o) {
31
+ var ar = [];
32
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
33
+ return ar;
34
+ };
35
+ return ownKeys(o);
36
+ };
37
+ return function (mod) {
38
+ if (mod && mod.__esModule) return mod;
39
+ var result = {};
40
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
41
+ __setModuleDefault(result, mod);
42
+ return result;
43
+ };
44
+ })();
45
+ Object.defineProperty(exports, "__esModule", { value: true });
46
+ exports.runImpactAnalysis = runImpactAnalysis;
47
+ exports.analyzeImpact = analyzeImpact;
48
+ exports.printHumanResults = printHumanResults;
49
+ exports.printJsonResults = printJsonResults;
50
+ exports.runImpactAnalysisCLI = runImpactAnalysisCLI;
51
+ const node_child_process_1 = require("node:child_process");
52
+ const fs = __importStar(require("node:fs/promises"));
53
+ const path = __importStar(require("node:path"));
54
+ /**
55
+ * Run impact analysis
56
+ */
57
+ async function runImpactAnalysis(options, projectRoot = process.cwd()) {
58
+ const startTime = Date.now();
59
+ // Read manifest
60
+ const manifest = await readManifest(projectRoot);
61
+ if (!manifest) {
62
+ throw new Error("No evaluation manifest found. Run 'evalai discover --manifest' first.");
63
+ }
64
+ // Get changed files
65
+ const changedFiles = options.changedFiles || (await getChangedFiles(options.baseBranch));
66
+ // Analyze impact
67
+ const { impactedSpecIds, reasonBySpecId } = analyzeImpact(changedFiles, manifest);
68
+ const result = {
69
+ impactedSpecIds,
70
+ reasonBySpecId,
71
+ changedFiles,
72
+ metadata: {
73
+ baseBranch: options.baseBranch,
74
+ totalSpecs: manifest.specs.length,
75
+ impactedCount: impactedSpecIds.length,
76
+ analysisTime: Date.now() - startTime,
77
+ },
78
+ };
79
+ return result;
80
+ }
81
+ /**
82
+ * Read evaluation manifest
83
+ */
84
+ async function readManifest(projectRoot = process.cwd()) {
85
+ const manifestPath = path.join(projectRoot, ".evalai", "manifest.json");
86
+ try {
87
+ const content = await fs.readFile(manifestPath, "utf-8");
88
+ return JSON.parse(content);
89
+ }
90
+ catch (_error) {
91
+ return null;
92
+ }
93
+ }
94
+ /**
95
+ * Get changed files from git
96
+ */
97
+ async function getChangedFiles(baseBranch) {
98
+ return new Promise((resolve, reject) => {
99
+ const git = (0, node_child_process_1.spawn)("git", ["diff", "--name-only", `${baseBranch}...HEAD`], {
100
+ stdio: ["pipe", "pipe", "pipe"],
101
+ });
102
+ let output = "";
103
+ let error = "";
104
+ git.stdout?.on("data", (data) => {
105
+ output += data.toString();
106
+ });
107
+ git.stderr?.on("data", (data) => {
108
+ error += data.toString();
109
+ });
110
+ git.on("close", (code) => {
111
+ if (code !== 0) {
112
+ reject(new Error(`Git diff failed: ${error}`));
113
+ return;
114
+ }
115
+ const files = output
116
+ .split("\n")
117
+ .map((f) => f.trim())
118
+ .filter((f) => f.length > 0)
119
+ .map((f) => f.replace(/\\/g, "/")); // Normalize to POSIX
120
+ resolve(files);
121
+ });
122
+ });
123
+ }
124
+ /**
125
+ * Analyze impact of changed files
126
+ */
127
+ function analyzeImpact(changedFiles, manifest) {
128
+ const impactedSpecIds = new Set();
129
+ const reasonBySpecId = {};
130
+ // Normalize changed files to POSIX format
131
+ const normalizedChangedFiles = changedFiles.map((f) => f.replace(/\\/g, "/"));
132
+ // Create lookup maps
133
+ const specsByFile = new Map();
134
+ const specsByDependency = new Map();
135
+ // Index specs by file
136
+ for (const spec of manifest.specs) {
137
+ // By file path
138
+ if (!specsByFile.has(spec.filePath)) {
139
+ specsByFile.set(spec.filePath, []);
140
+ }
141
+ specsByFile.get(spec.filePath)?.push(spec);
142
+ // By dependencies
143
+ const deps = [
144
+ ...spec.dependsOn.prompts,
145
+ ...spec.dependsOn.datasets,
146
+ ...spec.dependsOn.tools,
147
+ ...spec.dependsOn.code,
148
+ ];
149
+ for (const dep of deps) {
150
+ if (!specsByDependency.has(dep)) {
151
+ specsByDependency.set(dep, []);
152
+ }
153
+ specsByDependency.get(dep)?.push(spec);
154
+ }
155
+ }
156
+ // Analyze each changed file
157
+ for (const changedFile of normalizedChangedFiles) {
158
+ // Rule 1: Direct spec file change
159
+ const specsInFile = specsByFile.get(changedFile);
160
+ if (specsInFile) {
161
+ for (const spec of specsInFile) {
162
+ impactedSpecIds.add(spec.id);
163
+ reasonBySpecId[spec.id] = `Spec file changed: ${changedFile}`;
164
+ }
165
+ }
166
+ // Rule 2: Dependency change
167
+ const specsUsingDep = specsByDependency.get(changedFile);
168
+ if (specsUsingDep) {
169
+ for (const spec of specsUsingDep) {
170
+ impactedSpecIds.add(spec.id);
171
+ reasonBySpecId[spec.id] = `Dependency changed: ${changedFile}`;
172
+ }
173
+ }
174
+ // Rule 3: Safe fallback for unknown files
175
+ if (!specsInFile && !specsUsingDep) {
176
+ // If we can't map the file, be conservative and run everything
177
+ console.warn(`⚠️ Unknown changed file: ${changedFile}`);
178
+ console.warn(`🛡️ Running full suite for safety`);
179
+ // Add all specs
180
+ for (const spec of manifest.specs) {
181
+ impactedSpecIds.add(spec.id);
182
+ reasonBySpecId[spec.id] =
183
+ `Unknown file changed: ${changedFile} (safe fallback)`;
184
+ }
185
+ break; // No need to continue analyzing
186
+ }
187
+ }
188
+ return {
189
+ impactedSpecIds: Array.from(impactedSpecIds).sort(),
190
+ reasonBySpecId,
191
+ };
192
+ }
193
+ /**
194
+ * Print human-readable results
195
+ */
196
+ function printHumanResults(result) {
197
+ console.log("\n🔍 Impact Analysis Results");
198
+ console.log(`📊 Base branch: ${result.metadata.baseBranch}`);
199
+ console.log(`📁 Changed files: ${result.changedFiles.length}`);
200
+ console.log(`🎯 Impacted specs: ${result.metadata.impactedCount}/${result.metadata.totalSpecs}`);
201
+ console.log(`⏱️ Analysis time: ${result.metadata.analysisTime}ms`);
202
+ if (result.changedFiles.length > 0) {
203
+ console.log("\n📝 Changed files:");
204
+ for (const file of result.changedFiles) {
205
+ console.log(` • ${file}`);
206
+ }
207
+ }
208
+ if (result.impactedSpecIds.length > 0) {
209
+ console.log("\n🎯 Impacted specifications:");
210
+ for (const specId of result.impactedSpecIds) {
211
+ const reason = result.reasonBySpecId[specId];
212
+ console.log(` • ${specId} (${reason})`);
213
+ }
214
+ console.log("\n💡 Suggested command:");
215
+ console.log(` evalai run --spec-ids ${result.impactedSpecIds.join(",")}`);
216
+ }
217
+ else {
218
+ console.log("\n✅ No specifications impacted");
219
+ console.log("💡 No tests needed to run");
220
+ }
221
+ }
222
+ /**
223
+ * Print JSON results
224
+ */
225
+ function printJsonResults(result) {
226
+ console.log(JSON.stringify(result, null, 2));
227
+ }
228
+ /**
229
+ * CLI entry point
230
+ */
231
+ async function runImpactAnalysisCLI(options) {
232
+ try {
233
+ const result = await runImpactAnalysis(options);
234
+ if (options.format === "json") {
235
+ printJsonResults(result);
236
+ }
237
+ else {
238
+ printHumanResults(result);
239
+ }
240
+ // Exit with appropriate code
241
+ if (result.metadata.impactedCount === 0) {
242
+ process.exit(0);
243
+ }
244
+ else {
245
+ process.exit(1); // Signal that tests should run
246
+ }
247
+ }
248
+ catch (error) {
249
+ console.error("❌ Impact analysis failed:", error instanceof Error ? error.message : String(error));
250
+ process.exit(2);
251
+ }
252
+ }
package/dist/cli/index.js CHANGED
@@ -10,11 +10,17 @@
10
10
  Object.defineProperty(exports, "__esModule", { value: true });
11
11
  const baseline_1 = require("./baseline");
12
12
  const check_1 = require("./check");
13
+ const ci_1 = require("./ci");
14
+ const diff_1 = require("./diff");
15
+ const discover_1 = require("./discover");
13
16
  const doctor_1 = require("./doctor");
14
17
  const explain_1 = require("./explain");
18
+ const impact_analysis_1 = require("./impact-analysis");
15
19
  const init_1 = require("./init");
20
+ const migrate_1 = require("./migrate");
16
21
  const print_config_1 = require("./print-config");
17
22
  const regression_gate_1 = require("./regression-gate");
23
+ const run_1 = require("./run");
18
24
  const share_1 = require("./share");
19
25
  const upgrade_1 = require("./upgrade");
20
26
  const argv = process.argv.slice(2);
@@ -32,6 +38,60 @@ else if (subcommand === "gate") {
32
38
  const code = (0, regression_gate_1.runGate)(argv.slice(1));
33
39
  process.exit(code);
34
40
  }
41
+ else if (subcommand === "migrate") {
42
+ // Handle migrate subcommand
43
+ const migrateSubcommand = argv[1];
44
+ if (migrateSubcommand === "config") {
45
+ // Parse migrate config arguments
46
+ let inputPath = "";
47
+ let outputPath = "";
48
+ let verbose = false;
49
+ let helpers = true;
50
+ let preserveIds = true;
51
+ let provenance = true;
52
+ for (let i = 2; i < argv.length; i++) {
53
+ const arg = argv[i];
54
+ if (arg === "--in" || arg === "-i") {
55
+ inputPath = argv[++i];
56
+ }
57
+ else if (arg === "--out" || arg === "-o") {
58
+ outputPath = argv[++i];
59
+ }
60
+ else if (arg === "--verbose" || arg === "-v") {
61
+ verbose = true;
62
+ }
63
+ else if (arg === "--no-helpers") {
64
+ helpers = false;
65
+ }
66
+ else if (arg === "--no-preserve-ids") {
67
+ preserveIds = false;
68
+ }
69
+ else if (arg === "--no-provenance") {
70
+ provenance = false;
71
+ }
72
+ }
73
+ if (!inputPath || !outputPath) {
74
+ console.error("Error: Both --in and --out options are required");
75
+ console.error("Usage: evalai migrate config --in <input> --out <output> [options]");
76
+ process.exit(1);
77
+ }
78
+ (0, migrate_1.migrateConfig)({
79
+ input: inputPath,
80
+ output: outputPath,
81
+ verbose,
82
+ helpers,
83
+ preserveIds,
84
+ provenance,
85
+ }).catch((err) => {
86
+ console.error(`Migration failed: ${err instanceof Error ? err.message : String(err)}`);
87
+ process.exit(1);
88
+ });
89
+ }
90
+ else {
91
+ console.error("Error: Unknown migrate subcommand. Use 'evalai migrate config'");
92
+ process.exit(1);
93
+ }
94
+ }
35
95
  else if (subcommand === "upgrade") {
36
96
  const code = (0, upgrade_1.runUpgrade)(argv.slice(1));
37
97
  process.exit(code);
@@ -82,11 +142,123 @@ else if (subcommand === "share") {
82
142
  process.exit(1);
83
143
  });
84
144
  }
145
+ else if (subcommand === "discover") {
146
+ // Parse arguments for discover command
147
+ const args = argv.slice(1);
148
+ const manifestFlag = args.includes("--manifest");
149
+ (0, discover_1.discoverSpecs)({ manifest: manifestFlag })
150
+ .then(() => process.exit(0))
151
+ .catch((err) => {
152
+ console.error(`EvalAI ERROR: ${err instanceof Error ? err.message : String(err)}`);
153
+ process.exit(1);
154
+ });
155
+ }
156
+ else if (subcommand === "impact-analysis") {
157
+ // Parse arguments for impact-analysis command
158
+ const args = argv.slice(1);
159
+ const baseIndex = args.indexOf("--base");
160
+ const changedFilesIndex = args.indexOf("--changed-files");
161
+ const formatIndex = args.indexOf("--format");
162
+ const baseBranch = baseIndex !== -1 ? args[baseIndex + 1] : "main";
163
+ const changedFiles = changedFilesIndex !== -1
164
+ ? args[changedFilesIndex + 1]?.split(",")
165
+ : undefined;
166
+ const format = formatIndex !== -1 ? args[formatIndex + 1] : "human";
167
+ (0, impact_analysis_1.runImpactAnalysisCLI)({ baseBranch, changedFiles, format })
168
+ .then(() => process.exit(0))
169
+ .catch((err) => {
170
+ console.error(`EvalAI ERROR: ${err instanceof Error ? err.message : String(err)}`);
171
+ process.exit(2);
172
+ });
173
+ }
174
+ else if (subcommand === "run") {
175
+ // Parse arguments for run command
176
+ const args = argv.slice(1);
177
+ const specIdsIndex = args.indexOf("--spec-ids");
178
+ const impactedOnlyIndex = args.indexOf("--impacted-only");
179
+ const baseIndex = args.indexOf("--base");
180
+ const formatIndex = args.indexOf("--format");
181
+ const writeResultsIndex = args.indexOf("--write-results");
182
+ const specIds = specIdsIndex !== -1 ? args[specIdsIndex + 1]?.split(",") : undefined;
183
+ const impactedOnly = impactedOnlyIndex !== -1;
184
+ const baseBranch = baseIndex !== -1 ? args[baseIndex + 1] : undefined;
185
+ const format = formatIndex !== -1 ? args[formatIndex + 1] : "human";
186
+ const writeResults = writeResultsIndex !== -1;
187
+ (0, run_1.runEvaluationsCLI)({
188
+ specIds,
189
+ impactedOnly: impactedOnly ? !!baseBranch : false,
190
+ baseBranch,
191
+ format,
192
+ writeResults,
193
+ })
194
+ .then(() => process.exit(0))
195
+ .catch((err) => {
196
+ console.error(`EvalAI ERROR: ${err instanceof Error ? err.message : String(err)}`);
197
+ process.exit(2);
198
+ });
199
+ }
200
+ else if (subcommand === "diff") {
201
+ // Parse arguments for diff command
202
+ const args = argv.slice(1);
203
+ const baseIndex = args.indexOf("--base");
204
+ const headIndex = args.indexOf("--head");
205
+ const formatIndex = args.indexOf("--format");
206
+ const base = baseIndex !== -1 ? args[baseIndex + 1] : undefined;
207
+ const head = headIndex !== -1 ? args[headIndex + 1] : undefined;
208
+ const format = formatIndex !== -1 ? args[formatIndex + 1] : "human";
209
+ (0, diff_1.runDiffCLI)({ base, head, format })
210
+ .then(() => process.exit(0))
211
+ .catch((err) => {
212
+ console.error(`EvalAI ERROR: ${err instanceof Error ? err.message : String(err)}`);
213
+ process.exit(2);
214
+ });
215
+ }
216
+ else if (subcommand === "ci") {
217
+ // Parse arguments for ci command
218
+ const args = argv.slice(1);
219
+ const baseIndex = args.indexOf("--base");
220
+ const impactedOnlyIndex = args.indexOf("--impacted-only");
221
+ const formatIndex = args.indexOf("--format");
222
+ const writeResultsIndex = args.indexOf("--write-results");
223
+ const base = baseIndex !== -1 ? args[baseIndex + 1] : undefined;
224
+ const impactedOnly = impactedOnlyIndex !== -1;
225
+ const format = formatIndex !== -1
226
+ ? args[formatIndex + 1]
227
+ : "human";
228
+ const writeResults = writeResultsIndex !== -1;
229
+ (0, ci_1.runCICLI)({ base, impactedOnly, format, writeResults })
230
+ .then(() => process.exit(0))
231
+ .catch((err) => {
232
+ console.error(`EvalAI ERROR: ${err instanceof Error ? err.message : String(err)}`);
233
+ process.exit(2);
234
+ });
235
+ }
85
236
  else {
86
237
  console.log(`EvalAI CLI
87
238
 
88
239
  Usage:
89
240
  evalai init Create evalai.config.json + baseline + CI workflow
241
+ evalai discover Discover behavioral specs in project and show statistics
242
+ evalai discover --manifest Generate evaluation manifest for incremental analysis
243
+ evalai impact-analysis Analyze impact of changes and suggest targeted tests
244
+ --base <branch> Base branch to compare against (default: main)
245
+ --changed-files <files> Comma-separated list of changed files (for CI)
246
+ --format <fmt> Output format: human (default), json
247
+ evalai ci One-command CI loop (manifest → impact → run → diff)
248
+ --base <ref> Base reference for diff (baseline|last|<runId>|<path>|<gitref>)
249
+ --impacted-only Run only specs impacted by changes
250
+ --format <fmt> Output format: human (default), json, github
251
+ --write-results Write run results to .evalai/last-run.json
252
+ evalai run Run evaluation specifications
253
+ --spec-ids <ids> Comma-separated list of spec IDs to run
254
+ --impacted-only Run only specs impacted by changes (requires --base)
255
+ --base <branch> Base branch for impact analysis (with --impacted-only)
256
+ --format <fmt> Output format: human (default), json
257
+ --write-results Write results to .evalai/last-run.json
258
+ evalai diff Compare two run reports and show behavioral changes
259
+ --base <branch> Base branch or report path (default: main)
260
+ --head <path> Head report path (default: .evalai/last-run.json)
261
+ --format <fmt> Output format: human (default), json
90
262
  evalai gate [options] Run regression gate (local test-based, no API needed)
91
263
  evalai check [options] CI/CD evaluation gate (API-based)
92
264
  evalai explain [options] Explain last gate/check failure with root causes + fixes
@@ -133,6 +305,19 @@ Options for doctor:
133
305
 
134
306
  Examples:
135
307
  evalai init
308
+ evalai discover
309
+ evalai discover --manifest
310
+ evalai impact-analysis --base main
311
+ evalai impact-analysis --base main --format json
312
+ evalai impact-analysis --changed-files src/utils.ts,datasets/test.json
313
+ evalai run
314
+ evalai run --spec-ids spec1,spec2
315
+ evalai run --impacted-only --base main
316
+ evalai run --format json --write-results
317
+ evalai diff
318
+ evalai diff --base main
319
+ evalai diff --base main --format json
320
+ evalai diff --a .evalai/runs/base.json --b .evalai/last-run.json
136
321
  evalai gate
137
322
  evalai gate --format json
138
323
  evalai explain
@@ -0,0 +1,103 @@
1
+ /**
2
+ * TICKET 2 — Evaluation Manifest Generation
3
+ *
4
+ * Goal: turn discovery output into a stable, versioned, machine-consumable artifact
5
+ * that becomes the input to run / impact / diff.
6
+ *
7
+ * This is the compiler output that everything else consumes.
8
+ */
9
+ import type { ExecutionModeConfig } from "../runtime/execution-mode";
10
+ import { SDK_VERSION } from "../version";
11
+ import type { SpecAnalysis } from "./discover";
12
+ export { SDK_VERSION };
13
+ /**
14
+ * Manifest schema version
15
+ */
16
+ export declare const MANIFEST_SCHEMA_VERSION = 1;
17
+ /**
18
+ * Evaluation Manifest Schema
19
+ */
20
+ export interface EvaluationManifest {
21
+ /** Schema version for compatibility */
22
+ schemaVersion: number;
23
+ /** When this manifest was generated */
24
+ generatedAt: number;
25
+ /** Project metadata */
26
+ project: {
27
+ name: string;
28
+ root: string;
29
+ namespace: string;
30
+ };
31
+ /** Runtime information */
32
+ runtime: {
33
+ mode: "spec" | "legacy";
34
+ sdkVersion: string;
35
+ };
36
+ /** Spec files with hashes */
37
+ specFiles: SpecFile[];
38
+ /** Individual specifications */
39
+ specs: Spec[];
40
+ }
41
+ /**
42
+ * Spec file information
43
+ */
44
+ export interface SpecFile {
45
+ /** POSIX-relative file path */
46
+ filePath: string;
47
+ /** SHA-256 hash of file content */
48
+ fileHash: string;
49
+ /** Number of specs in this file */
50
+ specCount: number;
51
+ }
52
+ /**
53
+ * Individual specification
54
+ */
55
+ export interface Spec {
56
+ /** Stable canonical ID */
57
+ id: string;
58
+ /** Spec name */
59
+ name: string;
60
+ /** Suite path from tags or file structure */
61
+ suitePath: string[];
62
+ /** POSIX-relative file path */
63
+ filePath: string;
64
+ /** Position in file */
65
+ position: {
66
+ line: number;
67
+ column: number;
68
+ };
69
+ /** Tags/categories */
70
+ tags: string[];
71
+ /** Dependencies */
72
+ dependsOn: {
73
+ prompts: string[];
74
+ datasets: string[];
75
+ tools: string[];
76
+ code: string[];
77
+ };
78
+ }
79
+ /**
80
+ * Lock file for caching
81
+ */
82
+ export interface ManifestLock {
83
+ /** When lock was generated */
84
+ generatedAt: number;
85
+ /** File hashes for incremental updates */
86
+ fileHashes: Record<string, string>;
87
+ }
88
+ /**
89
+ * Generate evaluation manifest from discovery results
90
+ */
91
+ export declare function generateManifest(specs: SpecAnalysis[], projectRoot: string, projectName: string, executionMode: ExecutionModeConfig): Promise<EvaluationManifest>;
92
+ /**
93
+ * Write manifest to disk
94
+ */
95
+ export declare function writeManifest(manifest: EvaluationManifest, projectRoot: string): Promise<void>;
96
+ /**
97
+ * Read existing manifest
98
+ */
99
+ export declare function readManifest(projectRoot: string): Promise<EvaluationManifest | null>;
100
+ /**
101
+ * Read existing lock file
102
+ */
103
+ export declare function readLock(projectRoot: string): Promise<ManifestLock | null>;