@pauly4010/evalai-sdk 1.8.0 → 1.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +54 -0
- package/README.md +136 -23
- package/dist/assertions.js +51 -18
- package/dist/batch.js +8 -2
- package/dist/cli/api.js +3 -1
- package/dist/cli/check.js +19 -6
- package/dist/cli/ci-context.js +3 -1
- package/dist/cli/ci.d.ts +45 -0
- package/dist/cli/ci.js +192 -0
- package/dist/cli/config.js +28 -8
- package/dist/cli/diff.d.ts +173 -0
- package/dist/cli/diff.js +685 -0
- package/dist/cli/discover.d.ts +84 -0
- package/dist/cli/discover.js +419 -0
- package/dist/cli/doctor.js +62 -19
- package/dist/cli/env.d.ts +21 -0
- package/dist/cli/env.js +42 -0
- package/dist/cli/explain.js +168 -36
- package/dist/cli/formatters/human.js +4 -1
- package/dist/cli/formatters/pr-comment.js +3 -1
- package/dist/cli/gate.js +6 -2
- package/dist/cli/impact-analysis.d.ts +63 -0
- package/dist/cli/impact-analysis.js +252 -0
- package/dist/cli/index.js +185 -0
- package/dist/cli/manifest.d.ts +103 -0
- package/dist/cli/manifest.js +282 -0
- package/dist/cli/migrate.d.ts +41 -0
- package/dist/cli/migrate.js +349 -0
- package/dist/cli/policy-packs.js +8 -2
- package/dist/cli/print-config.js +33 -14
- package/dist/cli/regression-gate.js +8 -2
- package/dist/cli/report/build-check-report.js +8 -2
- package/dist/cli/run.d.ts +101 -0
- package/dist/cli/run.js +395 -0
- package/dist/cli/share.js +3 -1
- package/dist/cli/upgrade.js +2 -1
- package/dist/cli/workspace.d.ts +28 -0
- package/dist/cli/workspace.js +58 -0
- package/dist/client.d.ts +16 -19
- package/dist/client.js +60 -43
- package/dist/client.request.test.d.ts +1 -1
- package/dist/client.request.test.js +222 -147
- package/dist/context.js +3 -1
- package/dist/errors.js +11 -4
- package/dist/export.js +3 -1
- package/dist/index.d.ts +8 -2
- package/dist/index.js +30 -5
- package/dist/integrations/anthropic.d.ts +20 -1
- package/dist/integrations/openai-eval.js +4 -2
- package/dist/integrations/openai.d.ts +24 -1
- package/dist/local.js +3 -1
- package/dist/logger.js +6 -2
- package/dist/pagination.js +6 -2
- package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
- package/dist/runtime/adapters/config-to-dsl.js +394 -0
- package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
- package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
- package/dist/runtime/context.d.ts +26 -0
- package/dist/runtime/context.js +74 -0
- package/dist/runtime/eval.d.ts +46 -0
- package/dist/runtime/eval.js +244 -0
- package/dist/runtime/execution-mode.d.ts +80 -0
- package/dist/runtime/execution-mode.js +357 -0
- package/dist/runtime/executor.d.ts +16 -0
- package/dist/runtime/executor.js +152 -0
- package/dist/runtime/registry.d.ts +78 -0
- package/dist/runtime/registry.js +403 -0
- package/dist/runtime/run-report.d.ts +200 -0
- package/dist/runtime/run-report.js +222 -0
- package/dist/runtime/types.d.ts +356 -0
- package/dist/runtime/types.js +76 -0
- package/dist/testing.d.ts +65 -0
- package/dist/testing.js +49 -2
- package/dist/types.d.ts +100 -69
- package/dist/utils/input-hash.js +4 -1
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/dist/workflows.js +62 -14
- package/package.json +115 -110
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* TICKET 3 — Impact Analysis CLI Command (v0)
|
|
4
|
+
*
|
|
5
|
+
* Goal: Modal-like perceived speed via incremental intelligence
|
|
6
|
+
*
|
|
7
|
+
* Algorithm v0 (practical, shippable):
|
|
8
|
+
* - Inputs: manifest.json + git diff --name-only base...HEAD
|
|
9
|
+
* - Rules: Direct file mapping, dependency tracking, safe fallback
|
|
10
|
+
* - Output: Human-readable counts + JSON for automation
|
|
11
|
+
*/
|
|
12
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
13
|
+
if (k2 === undefined) k2 = k;
|
|
14
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
15
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
16
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
17
|
+
}
|
|
18
|
+
Object.defineProperty(o, k2, desc);
|
|
19
|
+
}) : (function(o, m, k, k2) {
|
|
20
|
+
if (k2 === undefined) k2 = k;
|
|
21
|
+
o[k2] = m[k];
|
|
22
|
+
}));
|
|
23
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
24
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
25
|
+
}) : function(o, v) {
|
|
26
|
+
o["default"] = v;
|
|
27
|
+
});
|
|
28
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
29
|
+
var ownKeys = function(o) {
|
|
30
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
31
|
+
var ar = [];
|
|
32
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
33
|
+
return ar;
|
|
34
|
+
};
|
|
35
|
+
return ownKeys(o);
|
|
36
|
+
};
|
|
37
|
+
return function (mod) {
|
|
38
|
+
if (mod && mod.__esModule) return mod;
|
|
39
|
+
var result = {};
|
|
40
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
41
|
+
__setModuleDefault(result, mod);
|
|
42
|
+
return result;
|
|
43
|
+
};
|
|
44
|
+
})();
|
|
45
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
46
|
+
exports.runImpactAnalysis = runImpactAnalysis;
|
|
47
|
+
exports.analyzeImpact = analyzeImpact;
|
|
48
|
+
exports.printHumanResults = printHumanResults;
|
|
49
|
+
exports.printJsonResults = printJsonResults;
|
|
50
|
+
exports.runImpactAnalysisCLI = runImpactAnalysisCLI;
|
|
51
|
+
const node_child_process_1 = require("node:child_process");
|
|
52
|
+
const fs = __importStar(require("node:fs/promises"));
|
|
53
|
+
const path = __importStar(require("node:path"));
|
|
54
|
+
/**
|
|
55
|
+
* Run impact analysis
|
|
56
|
+
*/
|
|
57
|
+
async function runImpactAnalysis(options, projectRoot = process.cwd()) {
|
|
58
|
+
const startTime = Date.now();
|
|
59
|
+
// Read manifest
|
|
60
|
+
const manifest = await readManifest(projectRoot);
|
|
61
|
+
if (!manifest) {
|
|
62
|
+
throw new Error("No evaluation manifest found. Run 'evalai discover --manifest' first.");
|
|
63
|
+
}
|
|
64
|
+
// Get changed files
|
|
65
|
+
const changedFiles = options.changedFiles || (await getChangedFiles(options.baseBranch));
|
|
66
|
+
// Analyze impact
|
|
67
|
+
const { impactedSpecIds, reasonBySpecId } = analyzeImpact(changedFiles, manifest);
|
|
68
|
+
const result = {
|
|
69
|
+
impactedSpecIds,
|
|
70
|
+
reasonBySpecId,
|
|
71
|
+
changedFiles,
|
|
72
|
+
metadata: {
|
|
73
|
+
baseBranch: options.baseBranch,
|
|
74
|
+
totalSpecs: manifest.specs.length,
|
|
75
|
+
impactedCount: impactedSpecIds.length,
|
|
76
|
+
analysisTime: Date.now() - startTime,
|
|
77
|
+
},
|
|
78
|
+
};
|
|
79
|
+
return result;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Read evaluation manifest
|
|
83
|
+
*/
|
|
84
|
+
async function readManifest(projectRoot = process.cwd()) {
|
|
85
|
+
const manifestPath = path.join(projectRoot, ".evalai", "manifest.json");
|
|
86
|
+
try {
|
|
87
|
+
const content = await fs.readFile(manifestPath, "utf-8");
|
|
88
|
+
return JSON.parse(content);
|
|
89
|
+
}
|
|
90
|
+
catch (_error) {
|
|
91
|
+
return null;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Get changed files from git
|
|
96
|
+
*/
|
|
97
|
+
async function getChangedFiles(baseBranch) {
|
|
98
|
+
return new Promise((resolve, reject) => {
|
|
99
|
+
const git = (0, node_child_process_1.spawn)("git", ["diff", "--name-only", `${baseBranch}...HEAD`], {
|
|
100
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
101
|
+
});
|
|
102
|
+
let output = "";
|
|
103
|
+
let error = "";
|
|
104
|
+
git.stdout?.on("data", (data) => {
|
|
105
|
+
output += data.toString();
|
|
106
|
+
});
|
|
107
|
+
git.stderr?.on("data", (data) => {
|
|
108
|
+
error += data.toString();
|
|
109
|
+
});
|
|
110
|
+
git.on("close", (code) => {
|
|
111
|
+
if (code !== 0) {
|
|
112
|
+
reject(new Error(`Git diff failed: ${error}`));
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
const files = output
|
|
116
|
+
.split("\n")
|
|
117
|
+
.map((f) => f.trim())
|
|
118
|
+
.filter((f) => f.length > 0)
|
|
119
|
+
.map((f) => f.replace(/\\/g, "/")); // Normalize to POSIX
|
|
120
|
+
resolve(files);
|
|
121
|
+
});
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Analyze impact of changed files
|
|
126
|
+
*/
|
|
127
|
+
function analyzeImpact(changedFiles, manifest) {
|
|
128
|
+
const impactedSpecIds = new Set();
|
|
129
|
+
const reasonBySpecId = {};
|
|
130
|
+
// Normalize changed files to POSIX format
|
|
131
|
+
const normalizedChangedFiles = changedFiles.map((f) => f.replace(/\\/g, "/"));
|
|
132
|
+
// Create lookup maps
|
|
133
|
+
const specsByFile = new Map();
|
|
134
|
+
const specsByDependency = new Map();
|
|
135
|
+
// Index specs by file
|
|
136
|
+
for (const spec of manifest.specs) {
|
|
137
|
+
// By file path
|
|
138
|
+
if (!specsByFile.has(spec.filePath)) {
|
|
139
|
+
specsByFile.set(spec.filePath, []);
|
|
140
|
+
}
|
|
141
|
+
specsByFile.get(spec.filePath)?.push(spec);
|
|
142
|
+
// By dependencies
|
|
143
|
+
const deps = [
|
|
144
|
+
...spec.dependsOn.prompts,
|
|
145
|
+
...spec.dependsOn.datasets,
|
|
146
|
+
...spec.dependsOn.tools,
|
|
147
|
+
...spec.dependsOn.code,
|
|
148
|
+
];
|
|
149
|
+
for (const dep of deps) {
|
|
150
|
+
if (!specsByDependency.has(dep)) {
|
|
151
|
+
specsByDependency.set(dep, []);
|
|
152
|
+
}
|
|
153
|
+
specsByDependency.get(dep)?.push(spec);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
// Analyze each changed file
|
|
157
|
+
for (const changedFile of normalizedChangedFiles) {
|
|
158
|
+
// Rule 1: Direct spec file change
|
|
159
|
+
const specsInFile = specsByFile.get(changedFile);
|
|
160
|
+
if (specsInFile) {
|
|
161
|
+
for (const spec of specsInFile) {
|
|
162
|
+
impactedSpecIds.add(spec.id);
|
|
163
|
+
reasonBySpecId[spec.id] = `Spec file changed: ${changedFile}`;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
// Rule 2: Dependency change
|
|
167
|
+
const specsUsingDep = specsByDependency.get(changedFile);
|
|
168
|
+
if (specsUsingDep) {
|
|
169
|
+
for (const spec of specsUsingDep) {
|
|
170
|
+
impactedSpecIds.add(spec.id);
|
|
171
|
+
reasonBySpecId[spec.id] = `Dependency changed: ${changedFile}`;
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
// Rule 3: Safe fallback for unknown files
|
|
175
|
+
if (!specsInFile && !specsUsingDep) {
|
|
176
|
+
// If we can't map the file, be conservative and run everything
|
|
177
|
+
console.warn(`⚠️ Unknown changed file: ${changedFile}`);
|
|
178
|
+
console.warn(`🛡️ Running full suite for safety`);
|
|
179
|
+
// Add all specs
|
|
180
|
+
for (const spec of manifest.specs) {
|
|
181
|
+
impactedSpecIds.add(spec.id);
|
|
182
|
+
reasonBySpecId[spec.id] =
|
|
183
|
+
`Unknown file changed: ${changedFile} (safe fallback)`;
|
|
184
|
+
}
|
|
185
|
+
break; // No need to continue analyzing
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
return {
|
|
189
|
+
impactedSpecIds: Array.from(impactedSpecIds).sort(),
|
|
190
|
+
reasonBySpecId,
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Print human-readable results
|
|
195
|
+
*/
|
|
196
|
+
function printHumanResults(result) {
|
|
197
|
+
console.log("\n🔍 Impact Analysis Results");
|
|
198
|
+
console.log(`📊 Base branch: ${result.metadata.baseBranch}`);
|
|
199
|
+
console.log(`📁 Changed files: ${result.changedFiles.length}`);
|
|
200
|
+
console.log(`🎯 Impacted specs: ${result.metadata.impactedCount}/${result.metadata.totalSpecs}`);
|
|
201
|
+
console.log(`⏱️ Analysis time: ${result.metadata.analysisTime}ms`);
|
|
202
|
+
if (result.changedFiles.length > 0) {
|
|
203
|
+
console.log("\n📝 Changed files:");
|
|
204
|
+
for (const file of result.changedFiles) {
|
|
205
|
+
console.log(` • ${file}`);
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
if (result.impactedSpecIds.length > 0) {
|
|
209
|
+
console.log("\n🎯 Impacted specifications:");
|
|
210
|
+
for (const specId of result.impactedSpecIds) {
|
|
211
|
+
const reason = result.reasonBySpecId[specId];
|
|
212
|
+
console.log(` • ${specId} (${reason})`);
|
|
213
|
+
}
|
|
214
|
+
console.log("\n💡 Suggested command:");
|
|
215
|
+
console.log(` evalai run --spec-ids ${result.impactedSpecIds.join(",")}`);
|
|
216
|
+
}
|
|
217
|
+
else {
|
|
218
|
+
console.log("\n✅ No specifications impacted");
|
|
219
|
+
console.log("💡 No tests needed to run");
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
/**
|
|
223
|
+
* Print JSON results
|
|
224
|
+
*/
|
|
225
|
+
function printJsonResults(result) {
|
|
226
|
+
console.log(JSON.stringify(result, null, 2));
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
* CLI entry point
|
|
230
|
+
*/
|
|
231
|
+
async function runImpactAnalysisCLI(options) {
|
|
232
|
+
try {
|
|
233
|
+
const result = await runImpactAnalysis(options);
|
|
234
|
+
if (options.format === "json") {
|
|
235
|
+
printJsonResults(result);
|
|
236
|
+
}
|
|
237
|
+
else {
|
|
238
|
+
printHumanResults(result);
|
|
239
|
+
}
|
|
240
|
+
// Exit with appropriate code
|
|
241
|
+
if (result.metadata.impactedCount === 0) {
|
|
242
|
+
process.exit(0);
|
|
243
|
+
}
|
|
244
|
+
else {
|
|
245
|
+
process.exit(1); // Signal that tests should run
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
catch (error) {
|
|
249
|
+
console.error("❌ Impact analysis failed:", error instanceof Error ? error.message : String(error));
|
|
250
|
+
process.exit(2);
|
|
251
|
+
}
|
|
252
|
+
}
|
package/dist/cli/index.js
CHANGED
|
@@ -10,11 +10,17 @@
|
|
|
10
10
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
11
11
|
const baseline_1 = require("./baseline");
|
|
12
12
|
const check_1 = require("./check");
|
|
13
|
+
const ci_1 = require("./ci");
|
|
14
|
+
const diff_1 = require("./diff");
|
|
15
|
+
const discover_1 = require("./discover");
|
|
13
16
|
const doctor_1 = require("./doctor");
|
|
14
17
|
const explain_1 = require("./explain");
|
|
18
|
+
const impact_analysis_1 = require("./impact-analysis");
|
|
15
19
|
const init_1 = require("./init");
|
|
20
|
+
const migrate_1 = require("./migrate");
|
|
16
21
|
const print_config_1 = require("./print-config");
|
|
17
22
|
const regression_gate_1 = require("./regression-gate");
|
|
23
|
+
const run_1 = require("./run");
|
|
18
24
|
const share_1 = require("./share");
|
|
19
25
|
const upgrade_1 = require("./upgrade");
|
|
20
26
|
const argv = process.argv.slice(2);
|
|
@@ -32,6 +38,60 @@ else if (subcommand === "gate") {
|
|
|
32
38
|
const code = (0, regression_gate_1.runGate)(argv.slice(1));
|
|
33
39
|
process.exit(code);
|
|
34
40
|
}
|
|
41
|
+
else if (subcommand === "migrate") {
|
|
42
|
+
// Handle migrate subcommand
|
|
43
|
+
const migrateSubcommand = argv[1];
|
|
44
|
+
if (migrateSubcommand === "config") {
|
|
45
|
+
// Parse migrate config arguments
|
|
46
|
+
let inputPath = "";
|
|
47
|
+
let outputPath = "";
|
|
48
|
+
let verbose = false;
|
|
49
|
+
let helpers = true;
|
|
50
|
+
let preserveIds = true;
|
|
51
|
+
let provenance = true;
|
|
52
|
+
for (let i = 2; i < argv.length; i++) {
|
|
53
|
+
const arg = argv[i];
|
|
54
|
+
if (arg === "--in" || arg === "-i") {
|
|
55
|
+
inputPath = argv[++i];
|
|
56
|
+
}
|
|
57
|
+
else if (arg === "--out" || arg === "-o") {
|
|
58
|
+
outputPath = argv[++i];
|
|
59
|
+
}
|
|
60
|
+
else if (arg === "--verbose" || arg === "-v") {
|
|
61
|
+
verbose = true;
|
|
62
|
+
}
|
|
63
|
+
else if (arg === "--no-helpers") {
|
|
64
|
+
helpers = false;
|
|
65
|
+
}
|
|
66
|
+
else if (arg === "--no-preserve-ids") {
|
|
67
|
+
preserveIds = false;
|
|
68
|
+
}
|
|
69
|
+
else if (arg === "--no-provenance") {
|
|
70
|
+
provenance = false;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
if (!inputPath || !outputPath) {
|
|
74
|
+
console.error("Error: Both --in and --out options are required");
|
|
75
|
+
console.error("Usage: evalai migrate config --in <input> --out <output> [options]");
|
|
76
|
+
process.exit(1);
|
|
77
|
+
}
|
|
78
|
+
(0, migrate_1.migrateConfig)({
|
|
79
|
+
input: inputPath,
|
|
80
|
+
output: outputPath,
|
|
81
|
+
verbose,
|
|
82
|
+
helpers,
|
|
83
|
+
preserveIds,
|
|
84
|
+
provenance,
|
|
85
|
+
}).catch((err) => {
|
|
86
|
+
console.error(`Migration failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
87
|
+
process.exit(1);
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
console.error("Error: Unknown migrate subcommand. Use 'evalai migrate config'");
|
|
92
|
+
process.exit(1);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
35
95
|
else if (subcommand === "upgrade") {
|
|
36
96
|
const code = (0, upgrade_1.runUpgrade)(argv.slice(1));
|
|
37
97
|
process.exit(code);
|
|
@@ -82,11 +142,123 @@ else if (subcommand === "share") {
|
|
|
82
142
|
process.exit(1);
|
|
83
143
|
});
|
|
84
144
|
}
|
|
145
|
+
else if (subcommand === "discover") {
|
|
146
|
+
// Parse arguments for discover command
|
|
147
|
+
const args = argv.slice(1);
|
|
148
|
+
const manifestFlag = args.includes("--manifest");
|
|
149
|
+
(0, discover_1.discoverSpecs)({ manifest: manifestFlag })
|
|
150
|
+
.then(() => process.exit(0))
|
|
151
|
+
.catch((err) => {
|
|
152
|
+
console.error(`EvalAI ERROR: ${err instanceof Error ? err.message : String(err)}`);
|
|
153
|
+
process.exit(1);
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
else if (subcommand === "impact-analysis") {
|
|
157
|
+
// Parse arguments for impact-analysis command
|
|
158
|
+
const args = argv.slice(1);
|
|
159
|
+
const baseIndex = args.indexOf("--base");
|
|
160
|
+
const changedFilesIndex = args.indexOf("--changed-files");
|
|
161
|
+
const formatIndex = args.indexOf("--format");
|
|
162
|
+
const baseBranch = baseIndex !== -1 ? args[baseIndex + 1] : "main";
|
|
163
|
+
const changedFiles = changedFilesIndex !== -1
|
|
164
|
+
? args[changedFilesIndex + 1]?.split(",")
|
|
165
|
+
: undefined;
|
|
166
|
+
const format = formatIndex !== -1 ? args[formatIndex + 1] : "human";
|
|
167
|
+
(0, impact_analysis_1.runImpactAnalysisCLI)({ baseBranch, changedFiles, format })
|
|
168
|
+
.then(() => process.exit(0))
|
|
169
|
+
.catch((err) => {
|
|
170
|
+
console.error(`EvalAI ERROR: ${err instanceof Error ? err.message : String(err)}`);
|
|
171
|
+
process.exit(2);
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
else if (subcommand === "run") {
|
|
175
|
+
// Parse arguments for run command
|
|
176
|
+
const args = argv.slice(1);
|
|
177
|
+
const specIdsIndex = args.indexOf("--spec-ids");
|
|
178
|
+
const impactedOnlyIndex = args.indexOf("--impacted-only");
|
|
179
|
+
const baseIndex = args.indexOf("--base");
|
|
180
|
+
const formatIndex = args.indexOf("--format");
|
|
181
|
+
const writeResultsIndex = args.indexOf("--write-results");
|
|
182
|
+
const specIds = specIdsIndex !== -1 ? args[specIdsIndex + 1]?.split(",") : undefined;
|
|
183
|
+
const impactedOnly = impactedOnlyIndex !== -1;
|
|
184
|
+
const baseBranch = baseIndex !== -1 ? args[baseIndex + 1] : undefined;
|
|
185
|
+
const format = formatIndex !== -1 ? args[formatIndex + 1] : "human";
|
|
186
|
+
const writeResults = writeResultsIndex !== -1;
|
|
187
|
+
(0, run_1.runEvaluationsCLI)({
|
|
188
|
+
specIds,
|
|
189
|
+
impactedOnly: impactedOnly ? !!baseBranch : false,
|
|
190
|
+
baseBranch,
|
|
191
|
+
format,
|
|
192
|
+
writeResults,
|
|
193
|
+
})
|
|
194
|
+
.then(() => process.exit(0))
|
|
195
|
+
.catch((err) => {
|
|
196
|
+
console.error(`EvalAI ERROR: ${err instanceof Error ? err.message : String(err)}`);
|
|
197
|
+
process.exit(2);
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
else if (subcommand === "diff") {
|
|
201
|
+
// Parse arguments for diff command
|
|
202
|
+
const args = argv.slice(1);
|
|
203
|
+
const baseIndex = args.indexOf("--base");
|
|
204
|
+
const headIndex = args.indexOf("--head");
|
|
205
|
+
const formatIndex = args.indexOf("--format");
|
|
206
|
+
const base = baseIndex !== -1 ? args[baseIndex + 1] : undefined;
|
|
207
|
+
const head = headIndex !== -1 ? args[headIndex + 1] : undefined;
|
|
208
|
+
const format = formatIndex !== -1 ? args[formatIndex + 1] : "human";
|
|
209
|
+
(0, diff_1.runDiffCLI)({ base, head, format })
|
|
210
|
+
.then(() => process.exit(0))
|
|
211
|
+
.catch((err) => {
|
|
212
|
+
console.error(`EvalAI ERROR: ${err instanceof Error ? err.message : String(err)}`);
|
|
213
|
+
process.exit(2);
|
|
214
|
+
});
|
|
215
|
+
}
|
|
216
|
+
else if (subcommand === "ci") {
|
|
217
|
+
// Parse arguments for ci command
|
|
218
|
+
const args = argv.slice(1);
|
|
219
|
+
const baseIndex = args.indexOf("--base");
|
|
220
|
+
const impactedOnlyIndex = args.indexOf("--impacted-only");
|
|
221
|
+
const formatIndex = args.indexOf("--format");
|
|
222
|
+
const writeResultsIndex = args.indexOf("--write-results");
|
|
223
|
+
const base = baseIndex !== -1 ? args[baseIndex + 1] : undefined;
|
|
224
|
+
const impactedOnly = impactedOnlyIndex !== -1;
|
|
225
|
+
const format = formatIndex !== -1
|
|
226
|
+
? args[formatIndex + 1]
|
|
227
|
+
: "human";
|
|
228
|
+
const writeResults = writeResultsIndex !== -1;
|
|
229
|
+
(0, ci_1.runCICLI)({ base, impactedOnly, format, writeResults })
|
|
230
|
+
.then(() => process.exit(0))
|
|
231
|
+
.catch((err) => {
|
|
232
|
+
console.error(`EvalAI ERROR: ${err instanceof Error ? err.message : String(err)}`);
|
|
233
|
+
process.exit(2);
|
|
234
|
+
});
|
|
235
|
+
}
|
|
85
236
|
else {
|
|
86
237
|
console.log(`EvalAI CLI
|
|
87
238
|
|
|
88
239
|
Usage:
|
|
89
240
|
evalai init Create evalai.config.json + baseline + CI workflow
|
|
241
|
+
evalai discover Discover behavioral specs in project and show statistics
|
|
242
|
+
evalai discover --manifest Generate evaluation manifest for incremental analysis
|
|
243
|
+
evalai impact-analysis Analyze impact of changes and suggest targeted tests
|
|
244
|
+
--base <branch> Base branch to compare against (default: main)
|
|
245
|
+
--changed-files <files> Comma-separated list of changed files (for CI)
|
|
246
|
+
--format <fmt> Output format: human (default), json
|
|
247
|
+
evalai ci One-command CI loop (manifest → impact → run → diff)
|
|
248
|
+
--base <ref> Base reference for diff (baseline|last|<runId>|<path>|<gitref>)
|
|
249
|
+
--impacted-only Run only specs impacted by changes
|
|
250
|
+
--format <fmt> Output format: human (default), json, github
|
|
251
|
+
--write-results Write run results to .evalai/last-run.json
|
|
252
|
+
evalai run Run evaluation specifications
|
|
253
|
+
--spec-ids <ids> Comma-separated list of spec IDs to run
|
|
254
|
+
--impacted-only Run only specs impacted by changes (requires --base)
|
|
255
|
+
--base <branch> Base branch for impact analysis (with --impacted-only)
|
|
256
|
+
--format <fmt> Output format: human (default), json
|
|
257
|
+
--write-results Write results to .evalai/last-run.json
|
|
258
|
+
evalai diff Compare two run reports and show behavioral changes
|
|
259
|
+
--base <branch> Base branch or report path (default: main)
|
|
260
|
+
--head <path> Head report path (default: .evalai/last-run.json)
|
|
261
|
+
--format <fmt> Output format: human (default), json
|
|
90
262
|
evalai gate [options] Run regression gate (local test-based, no API needed)
|
|
91
263
|
evalai check [options] CI/CD evaluation gate (API-based)
|
|
92
264
|
evalai explain [options] Explain last gate/check failure with root causes + fixes
|
|
@@ -133,6 +305,19 @@ Options for doctor:
|
|
|
133
305
|
|
|
134
306
|
Examples:
|
|
135
307
|
evalai init
|
|
308
|
+
evalai discover
|
|
309
|
+
evalai discover --manifest
|
|
310
|
+
evalai impact-analysis --base main
|
|
311
|
+
evalai impact-analysis --base main --format json
|
|
312
|
+
evalai impact-analysis --changed-files src/utils.ts,datasets/test.json
|
|
313
|
+
evalai run
|
|
314
|
+
evalai run --spec-ids spec1,spec2
|
|
315
|
+
evalai run --impacted-only --base main
|
|
316
|
+
evalai run --format json --write-results
|
|
317
|
+
evalai diff
|
|
318
|
+
evalai diff --base main
|
|
319
|
+
evalai diff --base main --format json
|
|
320
|
+
evalai diff --a .evalai/runs/base.json --b .evalai/last-run.json
|
|
136
321
|
evalai gate
|
|
137
322
|
evalai gate --format json
|
|
138
323
|
evalai explain
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TICKET 2 — Evaluation Manifest Generation
|
|
3
|
+
*
|
|
4
|
+
* Goal: turn discovery output into a stable, versioned, machine-consumable artifact
|
|
5
|
+
* that becomes the input to run / impact / diff.
|
|
6
|
+
*
|
|
7
|
+
* This is the compiler output that everything else consumes.
|
|
8
|
+
*/
|
|
9
|
+
import type { ExecutionModeConfig } from "../runtime/execution-mode";
|
|
10
|
+
import { SDK_VERSION } from "../version";
|
|
11
|
+
import type { SpecAnalysis } from "./discover";
|
|
12
|
+
export { SDK_VERSION };
|
|
13
|
+
/**
|
|
14
|
+
* Manifest schema version
|
|
15
|
+
*/
|
|
16
|
+
export declare const MANIFEST_SCHEMA_VERSION = 1;
|
|
17
|
+
/**
|
|
18
|
+
* Evaluation Manifest Schema
|
|
19
|
+
*/
|
|
20
|
+
export interface EvaluationManifest {
|
|
21
|
+
/** Schema version for compatibility */
|
|
22
|
+
schemaVersion: number;
|
|
23
|
+
/** When this manifest was generated */
|
|
24
|
+
generatedAt: number;
|
|
25
|
+
/** Project metadata */
|
|
26
|
+
project: {
|
|
27
|
+
name: string;
|
|
28
|
+
root: string;
|
|
29
|
+
namespace: string;
|
|
30
|
+
};
|
|
31
|
+
/** Runtime information */
|
|
32
|
+
runtime: {
|
|
33
|
+
mode: "spec" | "legacy";
|
|
34
|
+
sdkVersion: string;
|
|
35
|
+
};
|
|
36
|
+
/** Spec files with hashes */
|
|
37
|
+
specFiles: SpecFile[];
|
|
38
|
+
/** Individual specifications */
|
|
39
|
+
specs: Spec[];
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Spec file information
|
|
43
|
+
*/
|
|
44
|
+
export interface SpecFile {
|
|
45
|
+
/** POSIX-relative file path */
|
|
46
|
+
filePath: string;
|
|
47
|
+
/** SHA-256 hash of file content */
|
|
48
|
+
fileHash: string;
|
|
49
|
+
/** Number of specs in this file */
|
|
50
|
+
specCount: number;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Individual specification
|
|
54
|
+
*/
|
|
55
|
+
export interface Spec {
|
|
56
|
+
/** Stable canonical ID */
|
|
57
|
+
id: string;
|
|
58
|
+
/** Spec name */
|
|
59
|
+
name: string;
|
|
60
|
+
/** Suite path from tags or file structure */
|
|
61
|
+
suitePath: string[];
|
|
62
|
+
/** POSIX-relative file path */
|
|
63
|
+
filePath: string;
|
|
64
|
+
/** Position in file */
|
|
65
|
+
position: {
|
|
66
|
+
line: number;
|
|
67
|
+
column: number;
|
|
68
|
+
};
|
|
69
|
+
/** Tags/categories */
|
|
70
|
+
tags: string[];
|
|
71
|
+
/** Dependencies */
|
|
72
|
+
dependsOn: {
|
|
73
|
+
prompts: string[];
|
|
74
|
+
datasets: string[];
|
|
75
|
+
tools: string[];
|
|
76
|
+
code: string[];
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Lock file for caching
|
|
81
|
+
*/
|
|
82
|
+
export interface ManifestLock {
|
|
83
|
+
/** When lock was generated */
|
|
84
|
+
generatedAt: number;
|
|
85
|
+
/** File hashes for incremental updates */
|
|
86
|
+
fileHashes: Record<string, string>;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Generate evaluation manifest from discovery results
|
|
90
|
+
*/
|
|
91
|
+
export declare function generateManifest(specs: SpecAnalysis[], projectRoot: string, projectName: string, executionMode: ExecutionModeConfig): Promise<EvaluationManifest>;
|
|
92
|
+
/**
|
|
93
|
+
* Write manifest to disk
|
|
94
|
+
*/
|
|
95
|
+
export declare function writeManifest(manifest: EvaluationManifest, projectRoot: string): Promise<void>;
|
|
96
|
+
/**
|
|
97
|
+
* Read existing manifest
|
|
98
|
+
*/
|
|
99
|
+
export declare function readManifest(projectRoot: string): Promise<EvaluationManifest | null>;
|
|
100
|
+
/**
|
|
101
|
+
* Read existing lock file
|
|
102
|
+
*/
|
|
103
|
+
export declare function readLock(projectRoot: string): Promise<ManifestLock | null>;
|