@pauly4010/evalai-sdk 1.4.1 → 1.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +85 -0
- package/README.md +205 -543
- package/dist/assertions.d.ts +2 -2
- package/dist/assertions.js +104 -71
- package/dist/batch.js +12 -17
- package/dist/cache.js +7 -11
- package/dist/cli/api.d.ts +108 -0
- package/dist/cli/api.js +130 -0
- package/dist/cli/check.d.ts +28 -13
- package/dist/cli/check.js +249 -142
- package/dist/cli/ci-context.d.ts +6 -0
- package/dist/cli/ci-context.js +110 -0
- package/dist/cli/config.d.ts +30 -0
- package/dist/cli/config.js +207 -0
- package/dist/cli/constants.d.ts +15 -0
- package/dist/cli/constants.js +18 -0
- package/dist/cli/doctor.d.ts +11 -0
- package/dist/cli/doctor.js +82 -0
- package/dist/cli/formatters/github.d.ts +8 -0
- package/dist/cli/formatters/github.js +130 -0
- package/dist/cli/formatters/human.d.ts +6 -0
- package/dist/cli/formatters/human.js +107 -0
- package/dist/cli/formatters/json.d.ts +6 -0
- package/dist/cli/formatters/json.js +10 -0
- package/dist/cli/formatters/pr-comment.d.ts +12 -0
- package/dist/cli/formatters/pr-comment.js +101 -0
- package/dist/cli/formatters/types.d.ts +100 -0
- package/dist/cli/formatters/types.js +5 -0
- package/dist/cli/gate.d.ts +21 -0
- package/dist/cli/gate.js +175 -0
- package/dist/cli/index.d.ts +1 -0
- package/dist/cli/index.js +67 -23
- package/dist/cli/init.d.ts +7 -0
- package/dist/cli/init.js +69 -0
- package/dist/cli/policy-packs.d.ts +23 -0
- package/dist/cli/policy-packs.js +83 -0
- package/dist/cli/profiles.d.ts +28 -0
- package/dist/cli/profiles.js +30 -0
- package/dist/cli/reason-codes.d.ts +17 -0
- package/dist/cli/reason-codes.js +19 -0
- package/dist/cli/render/snippet.d.ts +5 -0
- package/dist/cli/render/snippet.js +15 -0
- package/dist/cli/render/sort.d.ts +10 -0
- package/dist/cli/render/sort.js +24 -0
- package/dist/cli/report/build-check-report.d.ts +19 -0
- package/dist/cli/report/build-check-report.js +124 -0
- package/dist/cli/share.d.ts +17 -0
- package/dist/cli/share.js +83 -0
- package/dist/client.d.ts +2 -2
- package/dist/client.js +144 -132
- package/dist/context.d.ts +1 -1
- package/dist/context.js +4 -6
- package/dist/errors.d.ts +2 -0
- package/dist/errors.js +116 -107
- package/dist/export.d.ts +6 -6
- package/dist/export.js +39 -33
- package/dist/index.d.ts +25 -24
- package/dist/index.js +62 -56
- package/dist/integrations/anthropic.d.ts +1 -1
- package/dist/integrations/anthropic.js +23 -19
- package/dist/integrations/openai-eval.d.ts +57 -0
- package/dist/integrations/openai-eval.js +230 -0
- package/dist/integrations/openai.d.ts +1 -1
- package/dist/integrations/openai.js +23 -19
- package/dist/local.d.ts +2 -2
- package/dist/local.js +25 -25
- package/dist/logger.d.ts +1 -1
- package/dist/logger.js +24 -28
- package/dist/matchers/index.d.ts +1 -0
- package/dist/matchers/index.js +6 -0
- package/dist/matchers/to-pass-gate.d.ts +29 -0
- package/dist/matchers/to-pass-gate.js +35 -0
- package/dist/pagination.d.ts +1 -1
- package/dist/pagination.js +6 -6
- package/dist/snapshot.js +24 -24
- package/dist/streaming.js +11 -11
- package/dist/testing.d.ts +6 -2
- package/dist/testing.js +30 -12
- package/dist/types.d.ts +22 -22
- package/dist/types.js +13 -13
- package/dist/utils/input-hash.d.ts +8 -0
- package/dist/utils/input-hash.js +38 -0
- package/dist/version.d.ts +7 -0
- package/dist/version.js +10 -0
- package/dist/workflows.d.ts +7 -7
- package/dist/workflows.js +44 -44
- package/package.json +102 -90
- package/dist/__tests__/assertions.test.d.ts +0 -1
- package/dist/__tests__/assertions.test.js +0 -288
- package/dist/__tests__/client.test.d.ts +0 -1
- package/dist/__tests__/client.test.js +0 -185
- package/dist/__tests__/testing.test.d.ts +0 -1
- package/dist/__tests__/testing.test.js +0 -230
- package/dist/__tests__/workflows.test.d.ts +0 -1
- package/dist/__tests__/workflows.test.js +0 -222
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* EvalAI config loader
|
|
4
|
+
* Discovery: evalai.config.json → evalai.config.js → evalai.config.cjs → package.json evalai
|
|
5
|
+
*/
|
|
6
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
7
|
+
if (k2 === undefined) k2 = k;
|
|
8
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
9
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
10
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
11
|
+
}
|
|
12
|
+
Object.defineProperty(o, k2, desc);
|
|
13
|
+
}) : (function(o, m, k, k2) {
|
|
14
|
+
if (k2 === undefined) k2 = k;
|
|
15
|
+
o[k2] = m[k];
|
|
16
|
+
}));
|
|
17
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
18
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
19
|
+
}) : function(o, v) {
|
|
20
|
+
o["default"] = v;
|
|
21
|
+
});
|
|
22
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
23
|
+
var ownKeys = function(o) {
|
|
24
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
25
|
+
var ar = [];
|
|
26
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
27
|
+
return ar;
|
|
28
|
+
};
|
|
29
|
+
return ownKeys(o);
|
|
30
|
+
};
|
|
31
|
+
return function (mod) {
|
|
32
|
+
if (mod && mod.__esModule) return mod;
|
|
33
|
+
var result = {};
|
|
34
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
35
|
+
__setModuleDefault(result, mod);
|
|
36
|
+
return result;
|
|
37
|
+
};
|
|
38
|
+
})();
|
|
39
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
40
|
+
exports.findConfigPath = findConfigPath;
|
|
41
|
+
exports.loadConfig = loadConfig;
|
|
42
|
+
exports.mergeConfigWithArgs = mergeConfigWithArgs;
|
|
43
|
+
const fs = __importStar(require("node:fs"));
|
|
44
|
+
const path = __importStar(require("node:path"));
|
|
45
|
+
const profiles_1 = require("./profiles");
|
|
46
|
+
const CONFIG_FILES = ["evalai.config.json", "evalai.config.js", "evalai.config.cjs"];
|
|
47
|
+
/**
|
|
48
|
+
* Find config file path in directory, walking up to root
|
|
49
|
+
*/
|
|
50
|
+
function findConfigPath(cwd = process.cwd()) {
|
|
51
|
+
let dir = path.resolve(cwd);
|
|
52
|
+
const root = path.parse(dir).root;
|
|
53
|
+
while (dir !== root) {
|
|
54
|
+
for (const file of CONFIG_FILES) {
|
|
55
|
+
const filePath = path.join(dir, file);
|
|
56
|
+
if (fs.existsSync(filePath)) {
|
|
57
|
+
return filePath;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
// Check package.json for evalai field
|
|
61
|
+
const pkgPath = path.join(dir, "package.json");
|
|
62
|
+
if (fs.existsSync(pkgPath)) {
|
|
63
|
+
try {
|
|
64
|
+
const pkg = JSON.parse(fs.readFileSync(pkgPath, "utf-8"));
|
|
65
|
+
if (pkg.evalai != null) {
|
|
66
|
+
return pkgPath;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
catch {
|
|
70
|
+
// ignore
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
dir = path.dirname(dir);
|
|
74
|
+
}
|
|
75
|
+
return null;
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Load config from file system
|
|
79
|
+
*/
|
|
80
|
+
function loadConfig(cwd = process.cwd()) {
|
|
81
|
+
const configPath = findConfigPath(cwd);
|
|
82
|
+
if (!configPath)
|
|
83
|
+
return null;
|
|
84
|
+
try {
|
|
85
|
+
let config = null;
|
|
86
|
+
if (configPath.endsWith("package.json")) {
|
|
87
|
+
const pkg = JSON.parse(fs.readFileSync(configPath, "utf-8"));
|
|
88
|
+
config = pkg.evalai ?? null;
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
const content = fs.readFileSync(configPath, "utf-8");
|
|
92
|
+
if (configPath.endsWith(".json")) {
|
|
93
|
+
config = JSON.parse(content);
|
|
94
|
+
}
|
|
95
|
+
else if (configPath.endsWith(".js") || configPath.endsWith(".cjs")) {
|
|
96
|
+
try {
|
|
97
|
+
config = JSON.parse(content);
|
|
98
|
+
}
|
|
99
|
+
catch {
|
|
100
|
+
return null;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
if (!config)
|
|
105
|
+
return null;
|
|
106
|
+
if (config.packages && Object.keys(config.packages).length > 0) {
|
|
107
|
+
const configDir = path.dirname(configPath);
|
|
108
|
+
const rel = path.relative(configDir, path.resolve(cwd));
|
|
109
|
+
const relNorm = rel.split(path.sep).join("/");
|
|
110
|
+
const pkgConfig = config.packages[relNorm];
|
|
111
|
+
if (pkgConfig) {
|
|
112
|
+
return { ...config, ...pkgConfig, packages: config.packages };
|
|
113
|
+
}
|
|
114
|
+
for (const key of Object.keys(config.packages)) {
|
|
115
|
+
if (relNorm === key || relNorm.startsWith(`${key}/`)) {
|
|
116
|
+
return { ...config, ...config.packages[key], packages: config.packages };
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
return config;
|
|
121
|
+
}
|
|
122
|
+
catch {
|
|
123
|
+
return null;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Merge config with CLI args. Priority: args > profile > config > defaults.
|
|
128
|
+
*/
|
|
129
|
+
function mergeConfigWithArgs(config, args) {
|
|
130
|
+
const merged = {};
|
|
131
|
+
if (config) {
|
|
132
|
+
if (config.evaluationId)
|
|
133
|
+
merged.evaluationId = config.evaluationId;
|
|
134
|
+
if (config.baseUrl)
|
|
135
|
+
merged.baseUrl = config.baseUrl;
|
|
136
|
+
if (config.minScore != null)
|
|
137
|
+
merged.minScore = config.minScore;
|
|
138
|
+
if (config.minN != null)
|
|
139
|
+
merged.minN = config.minN;
|
|
140
|
+
if (config.maxDrop != null)
|
|
141
|
+
merged.maxDrop = config.maxDrop;
|
|
142
|
+
if (config.warnDrop != null)
|
|
143
|
+
merged.warnDrop = config.warnDrop;
|
|
144
|
+
if (config.allowWeakEvidence != null)
|
|
145
|
+
merged.allowWeakEvidence = config.allowWeakEvidence;
|
|
146
|
+
if (config.baseline)
|
|
147
|
+
merged.baseline = config.baseline;
|
|
148
|
+
if (config.profile)
|
|
149
|
+
merged.profile = config.profile;
|
|
150
|
+
}
|
|
151
|
+
// Profile defaults (from --profile or config.profile). Apply before args override.
|
|
152
|
+
const profileName = (args.profile ?? merged.profile);
|
|
153
|
+
if (profileName && profileName in profiles_1.PROFILES) {
|
|
154
|
+
const profile = profiles_1.PROFILES[profileName];
|
|
155
|
+
if (merged.minScore === undefined && args.minScore === undefined)
|
|
156
|
+
merged.minScore = profile.minScore;
|
|
157
|
+
if (merged.maxDrop === undefined && args.maxDrop === undefined)
|
|
158
|
+
merged.maxDrop = profile.maxDrop;
|
|
159
|
+
if (merged.warnDrop === undefined && args.warnDrop === undefined && "warnDrop" in profile)
|
|
160
|
+
merged.warnDrop = profile.warnDrop;
|
|
161
|
+
if (merged.minN === undefined && args.minN === undefined)
|
|
162
|
+
merged.minN = profile.minN;
|
|
163
|
+
if (merged.allowWeakEvidence === undefined && args.allowWeakEvidence === undefined)
|
|
164
|
+
merged.allowWeakEvidence = profile.allowWeakEvidence;
|
|
165
|
+
}
|
|
166
|
+
// Args override
|
|
167
|
+
if (args.evaluationId !== undefined && args.evaluationId !== "") {
|
|
168
|
+
merged.evaluationId = String(args.evaluationId);
|
|
169
|
+
}
|
|
170
|
+
if (args.baseUrl !== undefined && args.baseUrl !== "") {
|
|
171
|
+
merged.baseUrl = String(args.baseUrl);
|
|
172
|
+
}
|
|
173
|
+
if (args.minScore !== undefined) {
|
|
174
|
+
merged.minScore =
|
|
175
|
+
typeof args.minScore === "number" ? args.minScore : parseInt(String(args.minScore), 10);
|
|
176
|
+
}
|
|
177
|
+
if (args.maxDrop !== undefined) {
|
|
178
|
+
merged.maxDrop =
|
|
179
|
+
typeof args.maxDrop === "number" ? args.maxDrop : parseInt(String(args.maxDrop), 10);
|
|
180
|
+
}
|
|
181
|
+
if (args.warnDrop !== undefined) {
|
|
182
|
+
merged.warnDrop =
|
|
183
|
+
typeof args.warnDrop === "number" ? args.warnDrop : parseInt(String(args.warnDrop), 10);
|
|
184
|
+
}
|
|
185
|
+
if (args.minN !== undefined) {
|
|
186
|
+
merged.minN = typeof args.minN === "number" ? args.minN : parseInt(String(args.minN), 10);
|
|
187
|
+
}
|
|
188
|
+
if (args.allowWeakEvidence !== undefined) {
|
|
189
|
+
merged.allowWeakEvidence =
|
|
190
|
+
args.allowWeakEvidence === true ||
|
|
191
|
+
args.allowWeakEvidence === "true" ||
|
|
192
|
+
args.allowWeakEvidence === "1";
|
|
193
|
+
}
|
|
194
|
+
if (args.baseline !== undefined && args.baseline !== "") {
|
|
195
|
+
const b = String(args.baseline);
|
|
196
|
+
if (b === "auto" || b === "previous" || b === "production") {
|
|
197
|
+
merged.baseline = b;
|
|
198
|
+
}
|
|
199
|
+
else {
|
|
200
|
+
merged.baseline = "published";
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
if (args.profile !== undefined && args.profile !== "") {
|
|
204
|
+
merged.profile = String(args.profile);
|
|
205
|
+
}
|
|
206
|
+
return merged;
|
|
207
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Standardized exit codes for evalai check.
|
|
3
|
+
*/
|
|
4
|
+
export declare const EXIT: {
|
|
5
|
+
readonly PASS: 0;
|
|
6
|
+
readonly SCORE_BELOW: 1;
|
|
7
|
+
readonly REGRESSION: 2;
|
|
8
|
+
readonly POLICY_VIOLATION: 3;
|
|
9
|
+
readonly API_ERROR: 4;
|
|
10
|
+
readonly BAD_ARGS: 5;
|
|
11
|
+
readonly LOW_N: 6;
|
|
12
|
+
readonly WEAK_EVIDENCE: 7;
|
|
13
|
+
/** Near-regression: score dropped within warn band (warnDrop ≤ drop < maxDrop) */
|
|
14
|
+
readonly WARN_REGRESSION: 8;
|
|
15
|
+
};
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.EXIT = void 0;
|
|
4
|
+
/**
|
|
5
|
+
* Standardized exit codes for evalai check.
|
|
6
|
+
*/
|
|
7
|
+
exports.EXIT = {
|
|
8
|
+
PASS: 0,
|
|
9
|
+
SCORE_BELOW: 1,
|
|
10
|
+
REGRESSION: 2,
|
|
11
|
+
POLICY_VIOLATION: 3,
|
|
12
|
+
API_ERROR: 4,
|
|
13
|
+
BAD_ARGS: 5,
|
|
14
|
+
LOW_N: 6,
|
|
15
|
+
WEAK_EVIDENCE: 7,
|
|
16
|
+
/** Near-regression: score dropped within warn band (warnDrop ≤ drop < maxDrop) */
|
|
17
|
+
WARN_REGRESSION: 8,
|
|
18
|
+
};
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* evalai doctor — Verify CI/CD setup.
|
|
3
|
+
* Uses the same quality endpoint as check — if doctor passes, check works.
|
|
4
|
+
*/
|
|
5
|
+
export type DoctorArgs = {
|
|
6
|
+
baseUrl: string;
|
|
7
|
+
apiKey: string;
|
|
8
|
+
evaluationId: string;
|
|
9
|
+
baseline: "published" | "previous" | "production";
|
|
10
|
+
};
|
|
11
|
+
export declare function runDoctor(argv: string[]): Promise<number>;
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* evalai doctor — Verify CI/CD setup.
|
|
4
|
+
* Uses the same quality endpoint as check — if doctor passes, check works.
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.runDoctor = runDoctor;
|
|
8
|
+
const api_1 = require("./api");
|
|
9
|
+
const config_1 = require("./config");
|
|
10
|
+
function parseDoctorArgs(argv) {
|
|
11
|
+
const args = {};
|
|
12
|
+
for (let i = 0; i < argv.length; i++) {
|
|
13
|
+
const arg = argv[i];
|
|
14
|
+
if (arg.startsWith("--")) {
|
|
15
|
+
const key = arg.slice(2);
|
|
16
|
+
const next = argv[i + 1];
|
|
17
|
+
if (next !== undefined && !next.startsWith("--")) {
|
|
18
|
+
args[key] = next;
|
|
19
|
+
i++;
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
args[key] = "true";
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
const baseUrl = args.baseUrl || process.env.EVALAI_BASE_URL || "http://localhost:3000";
|
|
27
|
+
const apiKey = args.apiKey || process.env.EVALAI_API_KEY || "";
|
|
28
|
+
let evaluationId = args.evaluationId || "";
|
|
29
|
+
const baseline = (args.baseline === "previous"
|
|
30
|
+
? "previous"
|
|
31
|
+
: args.baseline === "production"
|
|
32
|
+
? "production"
|
|
33
|
+
: "published");
|
|
34
|
+
if (!evaluationId) {
|
|
35
|
+
const config = (0, config_1.loadConfig)(process.cwd());
|
|
36
|
+
const merged = (0, config_1.mergeConfigWithArgs)(config, {
|
|
37
|
+
evaluationId: args.evaluationId,
|
|
38
|
+
baseUrl: args.baseUrl || process.env.EVALAI_BASE_URL,
|
|
39
|
+
baseline: args.baseline,
|
|
40
|
+
});
|
|
41
|
+
if (merged.evaluationId)
|
|
42
|
+
evaluationId = String(merged.evaluationId);
|
|
43
|
+
}
|
|
44
|
+
if (!apiKey) {
|
|
45
|
+
return { ok: false, message: "Set EVALAI_API_KEY" };
|
|
46
|
+
}
|
|
47
|
+
if (!evaluationId) {
|
|
48
|
+
const configPath = (0, config_1.findConfigPath)(process.cwd());
|
|
49
|
+
if (!configPath) {
|
|
50
|
+
return { ok: false, message: "Run npx evalai init" };
|
|
51
|
+
}
|
|
52
|
+
return { ok: false, message: "Set evaluationId in evalai.config.json" };
|
|
53
|
+
}
|
|
54
|
+
return { baseUrl, apiKey, evaluationId, baseline };
|
|
55
|
+
}
|
|
56
|
+
async function runDoctor(argv) {
|
|
57
|
+
const parsed = parseDoctorArgs(argv);
|
|
58
|
+
if (!("baseUrl" in parsed)) {
|
|
59
|
+
console.error(parsed.message);
|
|
60
|
+
return 1;
|
|
61
|
+
}
|
|
62
|
+
const args = parsed;
|
|
63
|
+
// Call exact quality endpoint: GET /api/quality?action=latest&evaluationId=&baseline=
|
|
64
|
+
const result = await (0, api_1.fetchQualityLatest)(args.baseUrl, args.apiKey, args.evaluationId, args.baseline);
|
|
65
|
+
if (!result.ok) {
|
|
66
|
+
if (result.status === 0) {
|
|
67
|
+
console.error(`Quality API: Network failure — ${result.body}`);
|
|
68
|
+
}
|
|
69
|
+
else {
|
|
70
|
+
console.error(`Quality API: ${result.status} — ${result.body}`);
|
|
71
|
+
}
|
|
72
|
+
return 1;
|
|
73
|
+
}
|
|
74
|
+
const { data } = result;
|
|
75
|
+
// Baseline: if quality returns baselineMissing, suggest fix
|
|
76
|
+
if (data.baselineMissing === true) {
|
|
77
|
+
console.error("Publish a run or use --baseline previous");
|
|
78
|
+
return 1;
|
|
79
|
+
}
|
|
80
|
+
console.log("✓ EvalAI doctor: OK");
|
|
81
|
+
return 0;
|
|
82
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GitHub formatter for evalai check.
|
|
3
|
+
* - stdout: minimal (verdict + score + link) + ::error annotations for failed cases
|
|
4
|
+
* - Step summary: full Markdown written to GITHUB_STEP_SUMMARY (not stdout)
|
|
5
|
+
*/
|
|
6
|
+
import type { CheckReport } from "./types";
|
|
7
|
+
export declare function appendStepSummary(report: CheckReport): void;
|
|
8
|
+
export declare function formatGitHub(report: CheckReport): string;
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* GitHub formatter for evalai check.
|
|
4
|
+
* - stdout: minimal (verdict + score + link) + ::error annotations for failed cases
|
|
5
|
+
* - Step summary: full Markdown written to GITHUB_STEP_SUMMARY (not stdout)
|
|
6
|
+
*/
|
|
7
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
8
|
+
if (k2 === undefined) k2 = k;
|
|
9
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
10
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
11
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
12
|
+
}
|
|
13
|
+
Object.defineProperty(o, k2, desc);
|
|
14
|
+
}) : (function(o, m, k, k2) {
|
|
15
|
+
if (k2 === undefined) k2 = k;
|
|
16
|
+
o[k2] = m[k];
|
|
17
|
+
}));
|
|
18
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
19
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
20
|
+
}) : function(o, v) {
|
|
21
|
+
o["default"] = v;
|
|
22
|
+
});
|
|
23
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
24
|
+
var ownKeys = function(o) {
|
|
25
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
26
|
+
var ar = [];
|
|
27
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
28
|
+
return ar;
|
|
29
|
+
};
|
|
30
|
+
return ownKeys(o);
|
|
31
|
+
};
|
|
32
|
+
return function (mod) {
|
|
33
|
+
if (mod && mod.__esModule) return mod;
|
|
34
|
+
var result = {};
|
|
35
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
36
|
+
__setModuleDefault(result, mod);
|
|
37
|
+
return result;
|
|
38
|
+
};
|
|
39
|
+
})();
|
|
40
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
41
|
+
exports.appendStepSummary = appendStepSummary;
|
|
42
|
+
exports.formatGitHub = formatGitHub;
|
|
43
|
+
const fs = __importStar(require("node:fs"));
|
|
44
|
+
const snippet_1 = require("../render/snippet");
|
|
45
|
+
const ANNOTATION_MAX = 10;
|
|
46
|
+
function escapeAnnotationMessage(s) {
|
|
47
|
+
return s.replace(/\r/g, "").replace(/\n/g, "%0A");
|
|
48
|
+
}
|
|
49
|
+
function formatAnnotation(fc) {
|
|
50
|
+
const id = fc.testCaseId ?? fc.name ?? "unknown";
|
|
51
|
+
const reason = fc.reason ?? fc.outputSnippet ?? fc.output ?? "no output";
|
|
52
|
+
const msg = escapeAnnotationMessage(`TestCase ${id} failed - ${(0, snippet_1.truncateSnippet)(reason, 100)}`);
|
|
53
|
+
return `::error title=EvalAI regression::${msg}`;
|
|
54
|
+
}
|
|
55
|
+
function appendStepSummary(report) {
|
|
56
|
+
const path = typeof process !== "undefined" && process.env?.GITHUB_STEP_SUMMARY;
|
|
57
|
+
if (!path)
|
|
58
|
+
return;
|
|
59
|
+
const lines = [];
|
|
60
|
+
const passed = report.verdict === "pass";
|
|
61
|
+
const warned = report.verdict === "warn";
|
|
62
|
+
lines.push("## EvalAI Gate");
|
|
63
|
+
lines.push("");
|
|
64
|
+
lines.push(passed && !warned
|
|
65
|
+
? "✅ **PASSED**"
|
|
66
|
+
: warned
|
|
67
|
+
? `⚠️ **WARNED**: ${report.reasonMessage ?? report.reasonCode}`
|
|
68
|
+
: `❌ **FAILED**: ${report.reasonMessage ?? report.reasonCode}`);
|
|
69
|
+
lines.push("");
|
|
70
|
+
const deltaStr = report.baselineScore != null && report.delta != null
|
|
71
|
+
? ` (baseline ${report.baselineScore}, ${report.delta >= 0 ? "+" : ""}${report.delta} pts)`
|
|
72
|
+
: "";
|
|
73
|
+
lines.push(`**Score:** ${report.score ?? 0}/100${deltaStr}`);
|
|
74
|
+
lines.push("");
|
|
75
|
+
const failedCases = report.failedCases ?? [];
|
|
76
|
+
if (failedCases.length > 0) {
|
|
77
|
+
lines.push(`### ${failedCases.length} failing case${failedCases.length === 1 ? "" : "s"}`);
|
|
78
|
+
lines.push("");
|
|
79
|
+
for (const fc of failedCases.slice(0, 10)) {
|
|
80
|
+
const label = fc.name ?? fc.input ?? "(unnamed)";
|
|
81
|
+
const exp = (0, snippet_1.truncateSnippet)(fc.expectedOutput ?? fc.expectedSnippet, 80);
|
|
82
|
+
const out = (0, snippet_1.truncateSnippet)(fc.output ?? fc.outputSnippet, 80);
|
|
83
|
+
const reason = out ? `got "${out}"` : "no output";
|
|
84
|
+
lines.push(`- **${(0, snippet_1.truncateSnippet)(label, 60)}** — expected: ${exp || "(any)"}, ${reason}`);
|
|
85
|
+
}
|
|
86
|
+
if (failedCases.length > 10) {
|
|
87
|
+
lines.push(`- _+ ${failedCases.length - 10} more_`);
|
|
88
|
+
}
|
|
89
|
+
lines.push("");
|
|
90
|
+
}
|
|
91
|
+
if (report.dashboardUrl) {
|
|
92
|
+
lines.push(`[View Dashboard](${report.dashboardUrl})`);
|
|
93
|
+
lines.push("");
|
|
94
|
+
}
|
|
95
|
+
try {
|
|
96
|
+
fs.appendFileSync(path, lines.join("\n"), "utf8");
|
|
97
|
+
}
|
|
98
|
+
catch {
|
|
99
|
+
// Non-fatal: step summary is best-effort
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
function formatGitHub(report) {
|
|
103
|
+
const stdoutLines = [];
|
|
104
|
+
// Emit ::error annotations for failed cases (up to N)
|
|
105
|
+
const failedCases = report.failedCases ?? [];
|
|
106
|
+
const toAnnotate = failedCases.slice(0, ANNOTATION_MAX);
|
|
107
|
+
for (const fc of toAnnotate) {
|
|
108
|
+
stdoutLines.push(formatAnnotation(fc));
|
|
109
|
+
}
|
|
110
|
+
// Minimal summary: verdict + score + link
|
|
111
|
+
const passed = report.verdict === "pass";
|
|
112
|
+
const warned = report.verdict === "warn";
|
|
113
|
+
const failReason = report.reasonMessage ?? report.reasonCode;
|
|
114
|
+
if (passed && !warned)
|
|
115
|
+
stdoutLines.push("\n✓ EvalAI gate PASSED");
|
|
116
|
+
else if (warned)
|
|
117
|
+
stdoutLines.push(`\n⚠ EvalAI gate WARNED: ${failReason}`);
|
|
118
|
+
else
|
|
119
|
+
stdoutLines.push(`\n✗ EvalAI gate FAILED: ${failReason}`);
|
|
120
|
+
const deltaStr = report.baselineScore != null && report.delta != null
|
|
121
|
+
? ` (baseline ${report.baselineScore}, ${report.delta >= 0 ? "+" : ""}${report.delta} pts)`
|
|
122
|
+
: "";
|
|
123
|
+
stdoutLines.push(`Score: ${report.score ?? 0}/100${deltaStr}`);
|
|
124
|
+
if (report.dashboardUrl) {
|
|
125
|
+
stdoutLines.push(`Dashboard: ${report.dashboardUrl}`);
|
|
126
|
+
}
|
|
127
|
+
// Write full markdown to GITHUB_STEP_SUMMARY (not stdout)
|
|
128
|
+
appendStepSummary(report);
|
|
129
|
+
return stdoutLines.join("\n");
|
|
130
|
+
}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Human-readable formatter for evalai check output.
|
|
4
|
+
* Deterministic: verdict → score → failures → link → hint.
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.formatHuman = formatHuman;
|
|
8
|
+
const snippet_1 = require("../render/snippet");
|
|
9
|
+
const TOP_N = 3;
|
|
10
|
+
function formatHuman(report) {
|
|
11
|
+
const lines = [];
|
|
12
|
+
const passed = report.verdict === "pass";
|
|
13
|
+
const warned = report.verdict === "warn";
|
|
14
|
+
const failReason = report.reasonMessage;
|
|
15
|
+
lines.push(passed && !warned
|
|
16
|
+
? "\n✓ EvalAI gate PASSED"
|
|
17
|
+
: warned
|
|
18
|
+
? `\n⚠ EvalAI gate WARNED: ${failReason ?? report.reasonCode}`
|
|
19
|
+
: `\n✗ EvalAI gate FAILED: ${failReason ?? report.reasonCode}`);
|
|
20
|
+
const deltaStr = report.baselineScore != null && report.delta != null
|
|
21
|
+
? ` (baseline ${report.baselineScore}, ${report.delta >= 0 ? "+" : ""}${report.delta} pts)`
|
|
22
|
+
: "";
|
|
23
|
+
lines.push(`Score: ${report.score ?? 0}/100${deltaStr}`);
|
|
24
|
+
const failedCases = report.failedCases ?? [];
|
|
25
|
+
if (failedCases.length > 0) {
|
|
26
|
+
const toShow = failedCases.slice(0, TOP_N);
|
|
27
|
+
lines.push(`${failedCases.length} failing case${failedCases.length === 1 ? "" : "s"}:`);
|
|
28
|
+
for (const fc of toShow) {
|
|
29
|
+
const label = fc.name ?? fc.input ?? "(unnamed)";
|
|
30
|
+
const exp = (0, snippet_1.truncateSnippet)(fc.expectedOutput ?? fc.expectedSnippet, 50);
|
|
31
|
+
const out = (0, snippet_1.truncateSnippet)(fc.output ?? fc.outputSnippet, 50);
|
|
32
|
+
const reason = out ? `got "${out}"` : "no output";
|
|
33
|
+
lines.push(` - "${(0, snippet_1.truncateSnippet)(label, 50)}" → expected: ${exp || "(any)"}, ${reason}`);
|
|
34
|
+
}
|
|
35
|
+
if (failedCases.length > toShow.length) {
|
|
36
|
+
lines.push(` + ${failedCases.length - toShow.length} more`);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
if (report.dashboardUrl) {
|
|
40
|
+
lines.push(`Dashboard: ${report.dashboardUrl}`);
|
|
41
|
+
}
|
|
42
|
+
if (!passed || warned) {
|
|
43
|
+
lines.push("Next: View full report above, fix failing cases, or adjust gate with --minScore / --maxDrop / --warnDrop");
|
|
44
|
+
}
|
|
45
|
+
if (report.explain &&
|
|
46
|
+
(report.breakdown01 || report.contribPts || report.flags?.length || report.policyEvidence)) {
|
|
47
|
+
lines.push("");
|
|
48
|
+
lines.push("--- Explain ---");
|
|
49
|
+
if (report.contribPts) {
|
|
50
|
+
const cp = report.contribPts;
|
|
51
|
+
const pts = [];
|
|
52
|
+
if (cp.passRatePts != null)
|
|
53
|
+
pts.push(`passRate: ${cp.passRatePts}`);
|
|
54
|
+
if (cp.safetyPts != null)
|
|
55
|
+
pts.push(`safety: ${cp.safetyPts}`);
|
|
56
|
+
if (cp.compliancePts != null)
|
|
57
|
+
pts.push(`compliance: ${cp.compliancePts}`);
|
|
58
|
+
if (cp.performancePts != null)
|
|
59
|
+
pts.push(`performance: ${cp.performancePts}`);
|
|
60
|
+
if (pts.length)
|
|
61
|
+
lines.push(`Contrib pts: ${pts.join(", ")}`);
|
|
62
|
+
}
|
|
63
|
+
if (report.breakdown01) {
|
|
64
|
+
const b = report.breakdown01;
|
|
65
|
+
const parts = [];
|
|
66
|
+
if (b.passRate != null)
|
|
67
|
+
parts.push(`passRate=${b.passRate}`);
|
|
68
|
+
if (b.safety != null)
|
|
69
|
+
parts.push(`safety=${b.safety}`);
|
|
70
|
+
if (b.judge != null)
|
|
71
|
+
parts.push(`judge=${b.judge}`);
|
|
72
|
+
if (b.schema != null)
|
|
73
|
+
parts.push(`schema=${b.schema}`);
|
|
74
|
+
if (b.latency != null)
|
|
75
|
+
parts.push(`latency=${b.latency}`);
|
|
76
|
+
if (b.cost != null)
|
|
77
|
+
parts.push(`cost=${b.cost}`);
|
|
78
|
+
if (parts.length)
|
|
79
|
+
lines.push(`Breakdown: ${parts.join(", ")}`);
|
|
80
|
+
}
|
|
81
|
+
if (report.flags && report.flags.length > 0) {
|
|
82
|
+
lines.push(`Flags: ${report.flags.join(", ")}`);
|
|
83
|
+
}
|
|
84
|
+
if (report.thresholds) {
|
|
85
|
+
const t = report.thresholds;
|
|
86
|
+
const parts = [];
|
|
87
|
+
if (t.minScore != null)
|
|
88
|
+
parts.push(`minScore=${t.minScore}`);
|
|
89
|
+
if (t.maxDrop != null)
|
|
90
|
+
parts.push(`maxDrop=${t.maxDrop}`);
|
|
91
|
+
if (t.minN != null)
|
|
92
|
+
parts.push(`minN=${t.minN}`);
|
|
93
|
+
if (parts.length)
|
|
94
|
+
lines.push(`Thresholds: ${parts.join(", ")}`);
|
|
95
|
+
}
|
|
96
|
+
if (report.policyEvidence) {
|
|
97
|
+
const pe = report.policyEvidence;
|
|
98
|
+
lines.push(`Policy sub-check failed: ${pe.failedCheck ?? "unknown"}`);
|
|
99
|
+
if (pe.remediation)
|
|
100
|
+
lines.push(`Remediation: ${pe.remediation}`);
|
|
101
|
+
if (pe.snapshot && Object.keys(pe.snapshot).length > 0) {
|
|
102
|
+
lines.push(`Snapshot: ${JSON.stringify(pe.snapshot)}`);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
return lines.join("\n");
|
|
107
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* JSON formatter for evalai check.
|
|
4
|
+
* Outputs only JSON, no extra logs.
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.formatJson = formatJson;
|
|
8
|
+
function formatJson(report) {
|
|
9
|
+
return JSON.stringify(report, null, 0);
|
|
10
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PR comment markdown builder for evalai check --pr-comment-out.
|
|
3
|
+
* Produces deterministic markdown for GitHub Action to post as PR comment.
|
|
4
|
+
*/
|
|
5
|
+
import type { CheckReport } from "./types";
|
|
6
|
+
/**
|
|
7
|
+
* Hidden marker for GitHub Action to find and update existing comment (sticky update).
|
|
8
|
+
* Action should: 1) post body from file 2) search PR comments for this marker 3) update if found, else create.
|
|
9
|
+
* Export for use in Action scripts.
|
|
10
|
+
*/
|
|
11
|
+
export declare const PR_COMMENT_MARKER = "<!-- evalai-gate-comment -->";
|
|
12
|
+
export declare function buildPrComment(report: CheckReport): string;
|