@pauly4010/evalai-sdk 1.8.0 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +54 -0
- package/dist/cli/ci.d.ts +45 -0
- package/dist/cli/ci.js +192 -0
- package/dist/cli/diff.d.ts +173 -0
- package/dist/cli/diff.js +680 -0
- package/dist/cli/discover.d.ts +84 -0
- package/dist/cli/discover.js +408 -0
- package/dist/cli/doctor.js +19 -10
- package/dist/cli/env.d.ts +21 -0
- package/dist/cli/env.js +42 -0
- package/dist/cli/explain.js +143 -37
- package/dist/cli/impact-analysis.d.ts +63 -0
- package/dist/cli/impact-analysis.js +251 -0
- package/dist/cli/index.js +173 -0
- package/dist/cli/manifest.d.ts +105 -0
- package/dist/cli/manifest.js +275 -0
- package/dist/cli/migrate.d.ts +41 -0
- package/dist/cli/migrate.js +349 -0
- package/dist/cli/print-config.js +18 -14
- package/dist/cli/run.d.ts +101 -0
- package/dist/cli/run.js +389 -0
- package/dist/cli/workspace.d.ts +28 -0
- package/dist/cli/workspace.js +58 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.js +30 -5
- package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
- package/dist/runtime/adapters/config-to-dsl.js +391 -0
- package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
- package/dist/runtime/adapters/testsuite-to-dsl.js +271 -0
- package/dist/runtime/context.d.ts +26 -0
- package/dist/runtime/context.js +74 -0
- package/dist/runtime/eval.d.ts +46 -0
- package/dist/runtime/eval.js +237 -0
- package/dist/runtime/execution-mode.d.ts +80 -0
- package/dist/runtime/execution-mode.js +353 -0
- package/dist/runtime/executor.d.ts +16 -0
- package/dist/runtime/executor.js +152 -0
- package/dist/runtime/registry.d.ts +78 -0
- package/dist/runtime/registry.js +416 -0
- package/dist/runtime/run-report.d.ts +202 -0
- package/dist/runtime/run-report.js +220 -0
- package/dist/runtime/types.d.ts +356 -0
- package/dist/runtime/types.js +76 -0
- package/dist/testing.d.ts +65 -0
- package/dist/testing.js +42 -0
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/package.json +4 -3
package/dist/cli/explain.js
CHANGED
|
@@ -124,9 +124,7 @@ function classifyRootCauses(report) {
|
|
|
124
124
|
}
|
|
125
125
|
// Analyze failed cases for drift patterns
|
|
126
126
|
if (failedCases.length > 0) {
|
|
127
|
-
const outputs = failedCases
|
|
128
|
-
.map((fc) => (fc.output ?? "").toLowerCase())
|
|
129
|
-
.filter(Boolean);
|
|
127
|
+
const outputs = failedCases.map((fc) => (fc.output ?? "").toLowerCase()).filter(Boolean);
|
|
130
128
|
const expectedOutputs = failedCases
|
|
131
129
|
.map((fc) => (fc.expectedOutput ?? "").toLowerCase())
|
|
132
130
|
.filter(Boolean);
|
|
@@ -138,9 +136,7 @@ function classifyRootCauses(report) {
|
|
|
138
136
|
causes.push("formatting_drift");
|
|
139
137
|
}
|
|
140
138
|
// Tool use drift: output mentions tool calls or function calls
|
|
141
|
-
const hasToolIssue = outputs.some((o) => o.includes("tool_call") ||
|
|
142
|
-
o.includes("function_call") ||
|
|
143
|
-
o.includes("tool_use"));
|
|
139
|
+
const hasToolIssue = outputs.some((o) => o.includes("tool_call") || o.includes("function_call") || o.includes("tool_use"));
|
|
144
140
|
if (hasToolIssue) {
|
|
145
141
|
causes.push("tool_use_drift");
|
|
146
142
|
}
|
|
@@ -173,52 +169,164 @@ function classifyRootCauses(report) {
|
|
|
173
169
|
// ── Suggested fixes ──
|
|
174
170
|
const ROOT_CAUSE_FIXES = {
|
|
175
171
|
prompt_drift: [
|
|
176
|
-
{
|
|
177
|
-
|
|
178
|
-
|
|
172
|
+
{
|
|
173
|
+
action: "Review prompt changes",
|
|
174
|
+
detail: "Compare current prompt with the version used in baseline run. Diff system/user messages.",
|
|
175
|
+
priority: "high",
|
|
176
|
+
},
|
|
177
|
+
{
|
|
178
|
+
action: "Pin model version",
|
|
179
|
+
detail: "Use a specific model snapshot (e.g. gpt-4-0613) instead of a rolling alias.",
|
|
180
|
+
priority: "medium",
|
|
181
|
+
},
|
|
182
|
+
{
|
|
183
|
+
action: "Update baseline",
|
|
184
|
+
detail: "If changes are intentional, run: npx evalai baseline update",
|
|
185
|
+
priority: "low",
|
|
186
|
+
},
|
|
179
187
|
],
|
|
180
188
|
retrieval_drift: [
|
|
181
|
-
{
|
|
182
|
-
|
|
183
|
-
|
|
189
|
+
{
|
|
190
|
+
action: "Check retrieval pipeline",
|
|
191
|
+
detail: "Verify embeddings, index, and chunk strategy haven't changed.",
|
|
192
|
+
priority: "high",
|
|
193
|
+
},
|
|
194
|
+
{
|
|
195
|
+
action: "Update test case context",
|
|
196
|
+
detail: "If knowledge base changed, update expected outputs in test cases.",
|
|
197
|
+
priority: "medium",
|
|
198
|
+
},
|
|
199
|
+
{
|
|
200
|
+
action: "Add retrieval-specific tests",
|
|
201
|
+
detail: "Add test cases that verify document retrieval before generation.",
|
|
202
|
+
priority: "low",
|
|
203
|
+
},
|
|
184
204
|
],
|
|
185
205
|
formatting_drift: [
|
|
186
|
-
{
|
|
187
|
-
|
|
188
|
-
|
|
206
|
+
{
|
|
207
|
+
action: "Update output format instructions",
|
|
208
|
+
detail: "Check if system prompt format instructions match expected output structure.",
|
|
209
|
+
priority: "high",
|
|
210
|
+
},
|
|
211
|
+
{
|
|
212
|
+
action: "Add format validators",
|
|
213
|
+
detail: "Use schema assertions to validate output structure (JSON schema, regex).",
|
|
214
|
+
priority: "medium",
|
|
215
|
+
},
|
|
216
|
+
{
|
|
217
|
+
action: "Refresh baseline",
|
|
218
|
+
detail: "If new format is intentional, run: npx evalai baseline update",
|
|
219
|
+
priority: "low",
|
|
220
|
+
},
|
|
189
221
|
],
|
|
190
222
|
tool_use_drift: [
|
|
191
|
-
{
|
|
192
|
-
|
|
193
|
-
|
|
223
|
+
{
|
|
224
|
+
action: "Verify tool definitions",
|
|
225
|
+
detail: "Check that tool/function schemas match what the model expects.",
|
|
226
|
+
priority: "high",
|
|
227
|
+
},
|
|
228
|
+
{
|
|
229
|
+
action: "Review tool call patterns",
|
|
230
|
+
detail: "Compare tool call sequences in failing vs passing cases.",
|
|
231
|
+
priority: "medium",
|
|
232
|
+
},
|
|
233
|
+
{
|
|
234
|
+
action: "Add tool-use assertions",
|
|
235
|
+
detail: "Assert specific tool calls are made (or not made) per test case.",
|
|
236
|
+
priority: "low",
|
|
237
|
+
},
|
|
194
238
|
],
|
|
195
239
|
safety_regression: [
|
|
196
|
-
{
|
|
197
|
-
|
|
198
|
-
|
|
240
|
+
{
|
|
241
|
+
action: "Review safety assertions",
|
|
242
|
+
detail: "Check which safety test cases are failing and why.",
|
|
243
|
+
priority: "high",
|
|
244
|
+
},
|
|
245
|
+
{
|
|
246
|
+
action: "Strengthen guardrails",
|
|
247
|
+
detail: "Add or update content filters, system prompt safety instructions.",
|
|
248
|
+
priority: "high",
|
|
249
|
+
},
|
|
250
|
+
{
|
|
251
|
+
action: "Update rubric",
|
|
252
|
+
detail: "If safety criteria changed, update the LLM judge rubric.",
|
|
253
|
+
priority: "medium",
|
|
254
|
+
},
|
|
199
255
|
],
|
|
200
256
|
cost_regression: [
|
|
201
|
-
{
|
|
202
|
-
|
|
203
|
-
|
|
257
|
+
{
|
|
258
|
+
action: "Check token usage",
|
|
259
|
+
detail: "Compare input/output token counts between baseline and current run.",
|
|
260
|
+
priority: "high",
|
|
261
|
+
},
|
|
262
|
+
{
|
|
263
|
+
action: "Optimize prompts",
|
|
264
|
+
detail: "Reduce prompt length or use a smaller model for non-critical paths.",
|
|
265
|
+
priority: "medium",
|
|
266
|
+
},
|
|
267
|
+
{
|
|
268
|
+
action: "Update cost budget",
|
|
269
|
+
detail: "If higher cost is expected, adjust --max-cost-usd threshold.",
|
|
270
|
+
priority: "low",
|
|
271
|
+
},
|
|
204
272
|
],
|
|
205
273
|
latency_regression: [
|
|
206
|
-
{
|
|
207
|
-
|
|
208
|
-
|
|
274
|
+
{
|
|
275
|
+
action: "Check response times",
|
|
276
|
+
detail: "Compare per-test-case latency between baseline and current run.",
|
|
277
|
+
priority: "high",
|
|
278
|
+
},
|
|
279
|
+
{
|
|
280
|
+
action: "Reduce prompt complexity",
|
|
281
|
+
detail: "Simplify prompts or use streaming to reduce perceived latency.",
|
|
282
|
+
priority: "medium",
|
|
283
|
+
},
|
|
284
|
+
{
|
|
285
|
+
action: "Update latency budget",
|
|
286
|
+
detail: "If higher latency is expected, adjust --max-latency-ms threshold.",
|
|
287
|
+
priority: "low",
|
|
288
|
+
},
|
|
209
289
|
],
|
|
210
290
|
coverage_drop: [
|
|
211
|
-
{
|
|
212
|
-
|
|
291
|
+
{
|
|
292
|
+
action: "Add test cases",
|
|
293
|
+
detail: "Current test count is below minimum. Add more test cases to the evaluation.",
|
|
294
|
+
priority: "high",
|
|
295
|
+
},
|
|
296
|
+
{
|
|
297
|
+
action: "Check test case filtering",
|
|
298
|
+
detail: "Verify no test cases were accidentally deleted or filtered out.",
|
|
299
|
+
priority: "medium",
|
|
300
|
+
},
|
|
213
301
|
],
|
|
214
302
|
baseline_stale: [
|
|
215
|
-
{
|
|
216
|
-
|
|
303
|
+
{
|
|
304
|
+
action: "Create baseline",
|
|
305
|
+
detail: "Run: npx evalai baseline init (or publish a run from the dashboard)",
|
|
306
|
+
priority: "high",
|
|
307
|
+
},
|
|
308
|
+
{
|
|
309
|
+
action: "Use --baseline previous",
|
|
310
|
+
detail: "Compare against the previous run instead of a published baseline.",
|
|
311
|
+
priority: "medium",
|
|
312
|
+
},
|
|
217
313
|
],
|
|
218
314
|
unknown: [
|
|
219
|
-
{
|
|
220
|
-
|
|
221
|
-
|
|
315
|
+
{
|
|
316
|
+
action: "Run evalai doctor",
|
|
317
|
+
detail: "Run: npx evalai doctor to check your full CI/CD setup.",
|
|
318
|
+
priority: "high",
|
|
319
|
+
},
|
|
320
|
+
{
|
|
321
|
+
action: "Check logs",
|
|
322
|
+
detail: "Review CI logs for errors or unexpected behavior.",
|
|
323
|
+
priority: "medium",
|
|
324
|
+
},
|
|
325
|
+
{
|
|
326
|
+
action: "Update baseline",
|
|
327
|
+
detail: "If changes are intentional, run: npx evalai baseline update",
|
|
328
|
+
priority: "low",
|
|
329
|
+
},
|
|
222
330
|
],
|
|
223
331
|
};
|
|
224
332
|
function suggestFixes(causes) {
|
|
@@ -395,9 +503,7 @@ async function runExplain(argv) {
|
|
|
395
503
|
const cwd = process.cwd();
|
|
396
504
|
const reportPath = findReport(cwd, flags.reportPath);
|
|
397
505
|
if (!reportPath) {
|
|
398
|
-
const searched = flags.reportPath
|
|
399
|
-
? flags.reportPath
|
|
400
|
-
: REPORT_SEARCH_PATHS.join(", ");
|
|
506
|
+
const searched = flags.reportPath ? flags.reportPath : REPORT_SEARCH_PATHS.join(", ");
|
|
401
507
|
console.error(`\n \u274C No report found. Searched: ${searched}`);
|
|
402
508
|
console.error(" Run a gate first:");
|
|
403
509
|
console.error(" npx evalai gate --format json");
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TICKET 3 — Impact Analysis CLI Command (v0)
|
|
3
|
+
*
|
|
4
|
+
* Goal: Modal-like perceived speed via incremental intelligence
|
|
5
|
+
*
|
|
6
|
+
* Algorithm v0 (practical, shippable):
|
|
7
|
+
* - Inputs: manifest.json + git diff --name-only base...HEAD
|
|
8
|
+
* - Rules: Direct file mapping, dependency tracking, safe fallback
|
|
9
|
+
* - Output: Human-readable counts + JSON for automation
|
|
10
|
+
*/
|
|
11
|
+
import type { EvaluationManifest } from "./manifest";
|
|
12
|
+
/**
|
|
13
|
+
* Impact analysis result
|
|
14
|
+
*/
|
|
15
|
+
export interface ImpactAnalysisResult {
|
|
16
|
+
/** Impacted specification IDs */
|
|
17
|
+
impactedSpecIds: string[];
|
|
18
|
+
/** Reason for each impacted spec */
|
|
19
|
+
reasonBySpecId: Record<string, string>;
|
|
20
|
+
/** Changed files that triggered the analysis */
|
|
21
|
+
changedFiles: string[];
|
|
22
|
+
/** Analysis metadata */
|
|
23
|
+
metadata: {
|
|
24
|
+
baseBranch: string;
|
|
25
|
+
totalSpecs: number;
|
|
26
|
+
impactedCount: number;
|
|
27
|
+
analysisTime: number;
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Impact analysis options
|
|
32
|
+
*/
|
|
33
|
+
export interface ImpactAnalysisOptions {
|
|
34
|
+
/** Base branch to compare against */
|
|
35
|
+
baseBranch: string;
|
|
36
|
+
/** Optional explicit list of changed files (for CI) */
|
|
37
|
+
changedFiles?: string[];
|
|
38
|
+
/** Output format */
|
|
39
|
+
format?: "human" | "json";
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Run impact analysis
|
|
43
|
+
*/
|
|
44
|
+
export declare function runImpactAnalysis(options: ImpactAnalysisOptions, projectRoot?: string): Promise<ImpactAnalysisResult>;
|
|
45
|
+
/**
|
|
46
|
+
* Analyze impact of changed files
|
|
47
|
+
*/
|
|
48
|
+
export declare function analyzeImpact(changedFiles: string[], manifest: EvaluationManifest): {
|
|
49
|
+
impactedSpecIds: string[];
|
|
50
|
+
reasonBySpecId: Record<string, string>;
|
|
51
|
+
};
|
|
52
|
+
/**
|
|
53
|
+
* Print human-readable results
|
|
54
|
+
*/
|
|
55
|
+
export declare function printHumanResults(result: ImpactAnalysisResult): void;
|
|
56
|
+
/**
|
|
57
|
+
* Print JSON results
|
|
58
|
+
*/
|
|
59
|
+
export declare function printJsonResults(result: ImpactAnalysisResult): void;
|
|
60
|
+
/**
|
|
61
|
+
* CLI entry point
|
|
62
|
+
*/
|
|
63
|
+
export declare function runImpactAnalysisCLI(options: ImpactAnalysisOptions): Promise<void>;
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* TICKET 3 — Impact Analysis CLI Command (v0)
|
|
4
|
+
*
|
|
5
|
+
* Goal: Modal-like perceived speed via incremental intelligence
|
|
6
|
+
*
|
|
7
|
+
* Algorithm v0 (practical, shippable):
|
|
8
|
+
* - Inputs: manifest.json + git diff --name-only base...HEAD
|
|
9
|
+
* - Rules: Direct file mapping, dependency tracking, safe fallback
|
|
10
|
+
* - Output: Human-readable counts + JSON for automation
|
|
11
|
+
*/
|
|
12
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
13
|
+
if (k2 === undefined) k2 = k;
|
|
14
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
15
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
16
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
17
|
+
}
|
|
18
|
+
Object.defineProperty(o, k2, desc);
|
|
19
|
+
}) : (function(o, m, k, k2) {
|
|
20
|
+
if (k2 === undefined) k2 = k;
|
|
21
|
+
o[k2] = m[k];
|
|
22
|
+
}));
|
|
23
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
24
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
25
|
+
}) : function(o, v) {
|
|
26
|
+
o["default"] = v;
|
|
27
|
+
});
|
|
28
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
29
|
+
var ownKeys = function(o) {
|
|
30
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
31
|
+
var ar = [];
|
|
32
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
33
|
+
return ar;
|
|
34
|
+
};
|
|
35
|
+
return ownKeys(o);
|
|
36
|
+
};
|
|
37
|
+
return function (mod) {
|
|
38
|
+
if (mod && mod.__esModule) return mod;
|
|
39
|
+
var result = {};
|
|
40
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
41
|
+
__setModuleDefault(result, mod);
|
|
42
|
+
return result;
|
|
43
|
+
};
|
|
44
|
+
})();
|
|
45
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
46
|
+
exports.runImpactAnalysis = runImpactAnalysis;
|
|
47
|
+
exports.analyzeImpact = analyzeImpact;
|
|
48
|
+
exports.printHumanResults = printHumanResults;
|
|
49
|
+
exports.printJsonResults = printJsonResults;
|
|
50
|
+
exports.runImpactAnalysisCLI = runImpactAnalysisCLI;
|
|
51
|
+
const fs = __importStar(require("node:fs/promises"));
|
|
52
|
+
const path = __importStar(require("node:path"));
|
|
53
|
+
const node_child_process_1 = require("node:child_process");
|
|
54
|
+
/**
|
|
55
|
+
* Run impact analysis
|
|
56
|
+
*/
|
|
57
|
+
async function runImpactAnalysis(options, projectRoot = process.cwd()) {
|
|
58
|
+
const startTime = Date.now();
|
|
59
|
+
// Read manifest
|
|
60
|
+
const manifest = await readManifest(projectRoot);
|
|
61
|
+
if (!manifest) {
|
|
62
|
+
throw new Error("No evaluation manifest found. Run 'evalai discover --manifest' first.");
|
|
63
|
+
}
|
|
64
|
+
// Get changed files
|
|
65
|
+
const changedFiles = options.changedFiles || (await getChangedFiles(options.baseBranch));
|
|
66
|
+
// Analyze impact
|
|
67
|
+
const { impactedSpecIds, reasonBySpecId } = analyzeImpact(changedFiles, manifest);
|
|
68
|
+
const result = {
|
|
69
|
+
impactedSpecIds,
|
|
70
|
+
reasonBySpecId,
|
|
71
|
+
changedFiles,
|
|
72
|
+
metadata: {
|
|
73
|
+
baseBranch: options.baseBranch,
|
|
74
|
+
totalSpecs: manifest.specs.length,
|
|
75
|
+
impactedCount: impactedSpecIds.length,
|
|
76
|
+
analysisTime: Date.now() - startTime,
|
|
77
|
+
},
|
|
78
|
+
};
|
|
79
|
+
return result;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Read evaluation manifest
|
|
83
|
+
*/
|
|
84
|
+
async function readManifest(projectRoot = process.cwd()) {
|
|
85
|
+
const manifestPath = path.join(projectRoot, ".evalai", "manifest.json");
|
|
86
|
+
try {
|
|
87
|
+
const content = await fs.readFile(manifestPath, "utf-8");
|
|
88
|
+
return JSON.parse(content);
|
|
89
|
+
}
|
|
90
|
+
catch (error) {
|
|
91
|
+
return null;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Get changed files from git
|
|
96
|
+
*/
|
|
97
|
+
async function getChangedFiles(baseBranch) {
|
|
98
|
+
return new Promise((resolve, reject) => {
|
|
99
|
+
const git = (0, node_child_process_1.spawn)("git", ["diff", "--name-only", `${baseBranch}...HEAD`], {
|
|
100
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
101
|
+
});
|
|
102
|
+
let output = "";
|
|
103
|
+
let error = "";
|
|
104
|
+
git.stdout?.on("data", (data) => {
|
|
105
|
+
output += data.toString();
|
|
106
|
+
});
|
|
107
|
+
git.stderr?.on("data", (data) => {
|
|
108
|
+
error += data.toString();
|
|
109
|
+
});
|
|
110
|
+
git.on("close", (code) => {
|
|
111
|
+
if (code !== 0) {
|
|
112
|
+
reject(new Error(`Git diff failed: ${error}`));
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
const files = output
|
|
116
|
+
.split("\n")
|
|
117
|
+
.map((f) => f.trim())
|
|
118
|
+
.filter((f) => f.length > 0)
|
|
119
|
+
.map((f) => f.replace(/\\/g, "/")); // Normalize to POSIX
|
|
120
|
+
resolve(files);
|
|
121
|
+
});
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Analyze impact of changed files
|
|
126
|
+
*/
|
|
127
|
+
function analyzeImpact(changedFiles, manifest) {
|
|
128
|
+
const impactedSpecIds = new Set();
|
|
129
|
+
const reasonBySpecId = {};
|
|
130
|
+
// Normalize changed files to POSIX format
|
|
131
|
+
const normalizedChangedFiles = changedFiles.map((f) => f.replace(/\\/g, "/"));
|
|
132
|
+
// Create lookup maps
|
|
133
|
+
const specsByFile = new Map();
|
|
134
|
+
const specsByDependency = new Map();
|
|
135
|
+
// Index specs by file
|
|
136
|
+
for (const spec of manifest.specs) {
|
|
137
|
+
// By file path
|
|
138
|
+
if (!specsByFile.has(spec.filePath)) {
|
|
139
|
+
specsByFile.set(spec.filePath, []);
|
|
140
|
+
}
|
|
141
|
+
specsByFile.get(spec.filePath).push(spec);
|
|
142
|
+
// By dependencies
|
|
143
|
+
const deps = [
|
|
144
|
+
...spec.dependsOn.prompts,
|
|
145
|
+
...spec.dependsOn.datasets,
|
|
146
|
+
...spec.dependsOn.tools,
|
|
147
|
+
...spec.dependsOn.code,
|
|
148
|
+
];
|
|
149
|
+
for (const dep of deps) {
|
|
150
|
+
if (!specsByDependency.has(dep)) {
|
|
151
|
+
specsByDependency.set(dep, []);
|
|
152
|
+
}
|
|
153
|
+
specsByDependency.get(dep).push(spec);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
// Analyze each changed file
|
|
157
|
+
for (const changedFile of normalizedChangedFiles) {
|
|
158
|
+
// Rule 1: Direct spec file change
|
|
159
|
+
const specsInFile = specsByFile.get(changedFile);
|
|
160
|
+
if (specsInFile) {
|
|
161
|
+
for (const spec of specsInFile) {
|
|
162
|
+
impactedSpecIds.add(spec.id);
|
|
163
|
+
reasonBySpecId[spec.id] = `Spec file changed: ${changedFile}`;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
// Rule 2: Dependency change
|
|
167
|
+
const specsUsingDep = specsByDependency.get(changedFile);
|
|
168
|
+
if (specsUsingDep) {
|
|
169
|
+
for (const spec of specsUsingDep) {
|
|
170
|
+
impactedSpecIds.add(spec.id);
|
|
171
|
+
reasonBySpecId[spec.id] = `Dependency changed: ${changedFile}`;
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
// Rule 3: Safe fallback for unknown files
|
|
175
|
+
if (!specsInFile && !specsUsingDep) {
|
|
176
|
+
// If we can't map the file, be conservative and run everything
|
|
177
|
+
console.warn(`⚠️ Unknown changed file: ${changedFile}`);
|
|
178
|
+
console.warn(`🛡️ Running full suite for safety`);
|
|
179
|
+
// Add all specs
|
|
180
|
+
for (const spec of manifest.specs) {
|
|
181
|
+
impactedSpecIds.add(spec.id);
|
|
182
|
+
reasonBySpecId[spec.id] = `Unknown file changed: ${changedFile} (safe fallback)`;
|
|
183
|
+
}
|
|
184
|
+
break; // No need to continue analyzing
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
return {
|
|
188
|
+
impactedSpecIds: Array.from(impactedSpecIds).sort(),
|
|
189
|
+
reasonBySpecId,
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
/**
|
|
193
|
+
* Print human-readable results
|
|
194
|
+
*/
|
|
195
|
+
function printHumanResults(result) {
|
|
196
|
+
console.log("\n🔍 Impact Analysis Results");
|
|
197
|
+
console.log(`📊 Base branch: ${result.metadata.baseBranch}`);
|
|
198
|
+
console.log(`📁 Changed files: ${result.changedFiles.length}`);
|
|
199
|
+
console.log(`🎯 Impacted specs: ${result.metadata.impactedCount}/${result.metadata.totalSpecs}`);
|
|
200
|
+
console.log(`⏱️ Analysis time: ${result.metadata.analysisTime}ms`);
|
|
201
|
+
if (result.changedFiles.length > 0) {
|
|
202
|
+
console.log("\n📝 Changed files:");
|
|
203
|
+
for (const file of result.changedFiles) {
|
|
204
|
+
console.log(` • ${file}`);
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
if (result.impactedSpecIds.length > 0) {
|
|
208
|
+
console.log("\n🎯 Impacted specifications:");
|
|
209
|
+
for (const specId of result.impactedSpecIds) {
|
|
210
|
+
const reason = result.reasonBySpecId[specId];
|
|
211
|
+
console.log(` • ${specId} (${reason})`);
|
|
212
|
+
}
|
|
213
|
+
console.log("\n💡 Suggested command:");
|
|
214
|
+
console.log(` evalai run --spec-ids ${result.impactedSpecIds.join(",")}`);
|
|
215
|
+
}
|
|
216
|
+
else {
|
|
217
|
+
console.log("\n✅ No specifications impacted");
|
|
218
|
+
console.log("💡 No tests needed to run");
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
222
|
+
* Print JSON results
|
|
223
|
+
*/
|
|
224
|
+
function printJsonResults(result) {
|
|
225
|
+
console.log(JSON.stringify(result, null, 2));
|
|
226
|
+
}
|
|
227
|
+
/**
|
|
228
|
+
* CLI entry point
|
|
229
|
+
*/
|
|
230
|
+
async function runImpactAnalysisCLI(options) {
|
|
231
|
+
try {
|
|
232
|
+
const result = await runImpactAnalysis(options);
|
|
233
|
+
if (options.format === "json") {
|
|
234
|
+
printJsonResults(result);
|
|
235
|
+
}
|
|
236
|
+
else {
|
|
237
|
+
printHumanResults(result);
|
|
238
|
+
}
|
|
239
|
+
// Exit with appropriate code
|
|
240
|
+
if (result.metadata.impactedCount === 0) {
|
|
241
|
+
process.exit(0);
|
|
242
|
+
}
|
|
243
|
+
else {
|
|
244
|
+
process.exit(1); // Signal that tests should run
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
catch (error) {
|
|
248
|
+
console.error("❌ Impact analysis failed:", error instanceof Error ? error.message : String(error));
|
|
249
|
+
process.exit(2);
|
|
250
|
+
}
|
|
251
|
+
}
|