@pauly4010/evalai-sdk 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -0
- package/README.md +108 -9
- package/dist/cli/api.d.ts +79 -0
- package/dist/cli/api.js +74 -0
- package/dist/cli/check.d.ts +16 -13
- package/dist/cli/check.js +117 -127
- package/dist/cli/ci-context.d.ts +6 -0
- package/dist/cli/ci-context.js +51 -0
- package/dist/cli/config.d.ts +24 -0
- package/dist/cli/config.js +158 -0
- package/dist/cli/constants.d.ts +13 -0
- package/dist/cli/constants.js +16 -0
- package/dist/cli/doctor.d.ts +11 -0
- package/dist/cli/doctor.js +82 -0
- package/dist/cli/formatters/github.d.ts +8 -0
- package/dist/cli/formatters/github.js +119 -0
- package/dist/cli/formatters/human.d.ts +6 -0
- package/dist/cli/formatters/human.js +92 -0
- package/dist/cli/formatters/json.d.ts +6 -0
- package/dist/cli/formatters/json.js +10 -0
- package/dist/cli/formatters/types.d.ts +76 -0
- package/dist/cli/formatters/types.js +5 -0
- package/dist/cli/gate.d.ts +13 -0
- package/dist/cli/gate.js +108 -0
- package/dist/cli/index.d.ts +1 -0
- package/dist/cli/index.js +31 -5
- package/dist/cli/init.d.ts +7 -0
- package/dist/cli/init.js +69 -0
- package/dist/cli/render/snippet.d.ts +5 -0
- package/dist/cli/render/snippet.js +15 -0
- package/dist/cli/render/sort.d.ts +10 -0
- package/dist/cli/render/sort.js +24 -0
- package/dist/cli/report/build-check-report.d.ts +16 -0
- package/dist/cli/report/build-check-report.js +94 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +4 -1
- package/dist/integrations/openai-eval.d.ts +53 -0
- package/dist/integrations/openai-eval.js +226 -0
- package/dist/utils/input-hash.d.ts +8 -0
- package/dist/utils/input-hash.js +38 -0
- package/package.json +10 -3
- package/.env.example +0 -0
- package/ADDITIONAL_ISSUES_FOUND.md +0 -174
- package/dist/__tests__/assertions.test.d.ts +0 -1
- package/dist/__tests__/assertions.test.js +0 -288
- package/dist/__tests__/client.test.d.ts +0 -1
- package/dist/__tests__/client.test.js +0 -185
- package/dist/__tests__/testing.test.d.ts +0 -1
- package/dist/__tests__/testing.test.js +0 -230
- package/dist/__tests__/workflows.test.d.ts +0 -1
- package/dist/__tests__/workflows.test.js +0 -222
- package/evalai-sdk-1.2.0.tgz +0 -0
- package/postcss.config.mjs +0 -2
package/dist/cli/check.js
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
* --minN <n> Fail if total test cases < n (low sample size)
|
|
16
16
|
* --allowWeakEvidence If false (default), fail when evidenceLevel is 'weak'
|
|
17
17
|
* --policy <name> Enforce a compliance policy (e.g. HIPAA, SOC2, GDPR)
|
|
18
|
-
* --baseline <mode>
|
|
18
|
+
* --baseline <mode> Baseline comparison mode: "published" (default), "previous", or "production"
|
|
19
19
|
* --evaluationId <id> Required. The evaluation to gate on.
|
|
20
20
|
* --baseUrl <url> API base URL (default: EVALAI_BASE_URL or http://localhost:3000)
|
|
21
21
|
* --apiKey <key> API key (default: EVALAI_API_KEY env var)
|
|
@@ -38,17 +38,17 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
38
38
|
exports.EXIT = void 0;
|
|
39
39
|
exports.parseArgs = parseArgs;
|
|
40
40
|
exports.runCheck = runCheck;
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
};
|
|
41
|
+
const config_1 = require("./config");
|
|
42
|
+
const api_1 = require("./api");
|
|
43
|
+
const ci_context_1 = require("./ci-context");
|
|
44
|
+
const gate_1 = require("./gate");
|
|
45
|
+
const build_check_report_1 = require("./report/build-check-report");
|
|
46
|
+
const human_1 = require("./formatters/human");
|
|
47
|
+
const json_1 = require("./formatters/json");
|
|
48
|
+
const github_1 = require("./formatters/github");
|
|
49
|
+
const constants_1 = require("./constants");
|
|
50
|
+
var constants_2 = require("./constants");
|
|
51
|
+
Object.defineProperty(exports, "EXIT", { enumerable: true, get: function () { return constants_2.EXIT; } });
|
|
52
52
|
function parseArgs(argv) {
|
|
53
53
|
const args = {};
|
|
54
54
|
for (let i = 0; i < argv.length; i++) {
|
|
@@ -61,144 +61,134 @@ function parseArgs(argv) {
|
|
|
61
61
|
i++;
|
|
62
62
|
}
|
|
63
63
|
else {
|
|
64
|
-
args[key] = 'true';
|
|
64
|
+
args[key] = 'true';
|
|
65
65
|
}
|
|
66
66
|
}
|
|
67
67
|
}
|
|
68
|
-
|
|
68
|
+
let baseUrl = args.baseUrl || process.env.EVALAI_BASE_URL || 'http://localhost:3000';
|
|
69
69
|
const apiKey = args.apiKey || process.env.EVALAI_API_KEY || '';
|
|
70
|
-
|
|
70
|
+
let minScore = parseInt(args.minScore || '0');
|
|
71
71
|
const maxDrop = args.maxDrop ? parseInt(args.maxDrop) : undefined;
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
72
|
+
let minN = args.minN ? parseInt(args.minN) : undefined;
|
|
73
|
+
let allowWeakEvidence = args.allowWeakEvidence === 'true' || args.allowWeakEvidence === '1';
|
|
74
|
+
let evaluationId = args.evaluationId || '';
|
|
75
75
|
const policy = args.policy || undefined;
|
|
76
|
-
const
|
|
76
|
+
const formatRaw = args.format || 'human';
|
|
77
|
+
const format = formatRaw === 'json' ? 'json' : formatRaw === 'github' ? 'github' : 'human';
|
|
78
|
+
const explain = args.explain === 'true' || args.explain === '1';
|
|
79
|
+
const onFail = args.onFail === 'import' ? 'import' : undefined;
|
|
80
|
+
let baseline = (args.baseline === 'previous'
|
|
81
|
+
? 'previous'
|
|
82
|
+
: args.baseline === 'production'
|
|
83
|
+
? 'production'
|
|
84
|
+
: 'published');
|
|
85
|
+
if (!evaluationId) {
|
|
86
|
+
const config = (0, config_1.loadConfig)(process.cwd());
|
|
87
|
+
const merged = (0, config_1.mergeConfigWithArgs)(config, {
|
|
88
|
+
evaluationId: args.evaluationId,
|
|
89
|
+
baseUrl: args.baseUrl || process.env.EVALAI_BASE_URL,
|
|
90
|
+
minScore: args.minScore,
|
|
91
|
+
minN: args.minN,
|
|
92
|
+
allowWeakEvidence: args.allowWeakEvidence,
|
|
93
|
+
baseline: args.baseline,
|
|
94
|
+
});
|
|
95
|
+
if (merged.evaluationId)
|
|
96
|
+
evaluationId = merged.evaluationId;
|
|
97
|
+
if (merged.baseUrl)
|
|
98
|
+
baseUrl = merged.baseUrl;
|
|
99
|
+
if (merged.minScore != null && !args.minScore)
|
|
100
|
+
minScore = merged.minScore ?? 0;
|
|
101
|
+
if (merged.minN != null && !args.minN)
|
|
102
|
+
minN = merged.minN;
|
|
103
|
+
if (merged.allowWeakEvidence != null && !args.allowWeakEvidence)
|
|
104
|
+
allowWeakEvidence = merged.allowWeakEvidence ?? false;
|
|
105
|
+
if (merged.baseline && !args.baseline)
|
|
106
|
+
baseline = merged.baseline;
|
|
107
|
+
}
|
|
77
108
|
if (!apiKey) {
|
|
78
|
-
|
|
79
|
-
process.exit(exports.EXIT.BAD_ARGS);
|
|
109
|
+
return { ok: false, exitCode: constants_1.EXIT.BAD_ARGS, message: 'Error: --apiKey or EVALAI_API_KEY is required' };
|
|
80
110
|
}
|
|
81
111
|
if (!evaluationId) {
|
|
82
|
-
|
|
83
|
-
process.exit(exports.EXIT.BAD_ARGS);
|
|
112
|
+
return { ok: false, exitCode: constants_1.EXIT.BAD_ARGS, message: 'Run npx evalai init and paste your evaluationId, or pass --evaluationId.' };
|
|
84
113
|
}
|
|
85
114
|
if (isNaN(minScore) || minScore < 0 || minScore > 100) {
|
|
86
|
-
|
|
87
|
-
process.exit(exports.EXIT.BAD_ARGS);
|
|
115
|
+
return { ok: false, exitCode: constants_1.EXIT.BAD_ARGS, message: 'Error: --minScore must be 0-100' };
|
|
88
116
|
}
|
|
89
117
|
if (minN !== undefined && (isNaN(minN) || minN < 1)) {
|
|
90
|
-
|
|
91
|
-
process.exit(exports.EXIT.BAD_ARGS);
|
|
118
|
+
return { ok: false, exitCode: constants_1.EXIT.BAD_ARGS, message: 'Error: --minN must be a positive number' };
|
|
92
119
|
}
|
|
93
|
-
return {
|
|
120
|
+
return {
|
|
121
|
+
ok: true,
|
|
122
|
+
args: { baseUrl, apiKey, minScore, maxDrop, minN, allowWeakEvidence, evaluationId, policy, baseline, format, explain, onFail },
|
|
123
|
+
};
|
|
94
124
|
}
|
|
95
125
|
async function runCheck(args) {
|
|
96
|
-
const
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
try {
|
|
101
|
-
scoreRes = await fetch(scoreUrl, { headers });
|
|
102
|
-
}
|
|
103
|
-
catch (err) {
|
|
104
|
-
console.error(`EvalAI gate ERROR: Network failure — ${err.message}`);
|
|
105
|
-
return exports.EXIT.API_ERROR;
|
|
106
|
-
}
|
|
107
|
-
if (!scoreRes.ok) {
|
|
108
|
-
const body = await scoreRes.text();
|
|
109
|
-
console.error(`EvalAI gate ERROR: API returned ${scoreRes.status} — ${body}`);
|
|
110
|
-
return exports.EXIT.API_ERROR;
|
|
111
|
-
}
|
|
112
|
-
const data = (await scoreRes.json());
|
|
113
|
-
const score = data?.score ?? 0;
|
|
114
|
-
const total = data?.total ?? null;
|
|
115
|
-
const evidenceLevel = data?.evidenceLevel ?? null;
|
|
116
|
-
const baselineScore = data?.baselineScore ?? null;
|
|
117
|
-
const regressionDelta = data?.regressionDelta ?? null;
|
|
118
|
-
const breakdown = data?.breakdown ?? {};
|
|
119
|
-
// ── Gate: minN (low sample size) ──
|
|
120
|
-
if (args.minN !== undefined && total !== null && total < args.minN) {
|
|
121
|
-
console.error(`\n✗ FAILED: total test cases (${total}) < minN (${args.minN})`);
|
|
122
|
-
return exports.EXIT.LOW_N;
|
|
123
|
-
}
|
|
124
|
-
// ── Gate: allowWeakEvidence ──
|
|
125
|
-
if (!args.allowWeakEvidence && evidenceLevel === 'weak') {
|
|
126
|
-
console.error(`\n✗ FAILED: evidence level is 'weak' (use --allowWeakEvidence to permit)`);
|
|
127
|
-
return exports.EXIT.WEAK_EVIDENCE;
|
|
128
|
-
}
|
|
129
|
-
// ── Print summary ──
|
|
130
|
-
console.log('┌─────────────────────────────────────────┐');
|
|
131
|
-
console.log(`│ EvalAI Quality Score: ${String(score).padStart(3)}/100 │`);
|
|
132
|
-
console.log('├─────────────────────────────────────────┤');
|
|
133
|
-
if (baselineScore !== null) {
|
|
134
|
-
const delta = regressionDelta ?? 0;
|
|
135
|
-
const arrow = delta >= 0 ? '▲' : '▼';
|
|
136
|
-
console.log(`│ Baseline: ${baselineScore} ${arrow} ${Math.abs(delta)} pts │`);
|
|
137
|
-
}
|
|
138
|
-
if (breakdown) {
|
|
139
|
-
const pct = (v) => `${Math.round((v ?? 0) * 100)}%`;
|
|
140
|
-
console.log(`│ Pass: ${pct(breakdown.passRate)} Safety: ${pct(breakdown.safety)} Judge: ${pct(breakdown.judge)} │`);
|
|
141
|
-
}
|
|
142
|
-
if (data?.flags && data.flags.length > 0) {
|
|
143
|
-
console.log(`│ Flags: ${data.flags.join(', ').padEnd(30)} │`);
|
|
144
|
-
}
|
|
145
|
-
console.log('└─────────────────────────────────────────┘');
|
|
146
|
-
// ── 2. Gate: minimum score ──
|
|
147
|
-
if (args.minScore > 0 && score < args.minScore) {
|
|
148
|
-
console.error(`\n✗ FAILED: score=${score} < minScore=${args.minScore}`);
|
|
149
|
-
return exports.EXIT.SCORE_BELOW;
|
|
150
|
-
}
|
|
151
|
-
// ── 3. Gate: maximum drop from baseline ──
|
|
152
|
-
if (args.maxDrop !== undefined && regressionDelta !== null && regressionDelta < -(args.maxDrop)) {
|
|
153
|
-
console.error(`\n✗ FAILED: score dropped ${Math.abs(regressionDelta)} pts from baseline ` +
|
|
154
|
-
`(max allowed: ${args.maxDrop})`);
|
|
155
|
-
return exports.EXIT.REGRESSION;
|
|
156
|
-
}
|
|
157
|
-
// ── 4. Gate: policy compliance ──
|
|
158
|
-
if (args.policy) {
|
|
159
|
-
const policyUrl = `${args.baseUrl}/api/quality?evaluationId=${args.evaluationId}&action=latest`;
|
|
160
|
-
// Check policy-specific flags
|
|
161
|
-
const policyFlags = (data?.flags ?? []);
|
|
162
|
-
// Policy mapping: each policy has a set of required conditions
|
|
163
|
-
const policyChecks = {
|
|
164
|
-
HIPAA: { requiredSafetyRate: 0.99, maxFlags: ['SAFETY_RISK'] },
|
|
165
|
-
SOC2: { requiredSafetyRate: 0.95, maxFlags: ['SAFETY_RISK', 'LOW_PASS_RATE'] },
|
|
166
|
-
GDPR: { requiredSafetyRate: 0.95, maxFlags: ['SAFETY_RISK'] },
|
|
167
|
-
PCI_DSS: { requiredSafetyRate: 0.99, maxFlags: ['SAFETY_RISK', 'LOW_PASS_RATE'] },
|
|
168
|
-
FINRA_4511: { requiredSafetyRate: 0.95, maxFlags: ['SAFETY_RISK'] },
|
|
169
|
-
};
|
|
170
|
-
const policyName = args.policy.toUpperCase();
|
|
171
|
-
const check = policyChecks[policyName];
|
|
172
|
-
if (!check) {
|
|
173
|
-
console.error(`\n✗ Unknown policy: ${args.policy}. Available: ${Object.keys(policyChecks).join(', ')}`);
|
|
174
|
-
return exports.EXIT.BAD_ARGS;
|
|
126
|
+
const qualityResult = await (0, api_1.fetchQualityLatest)(args.baseUrl, args.apiKey, args.evaluationId, args.baseline);
|
|
127
|
+
if (!qualityResult.ok) {
|
|
128
|
+
if (qualityResult.status === 0) {
|
|
129
|
+
console.error(`EvalAI gate ERROR: Network failure — ${qualityResult.body}`);
|
|
175
130
|
}
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
if (safetyRate < check.requiredSafetyRate) {
|
|
179
|
-
console.error(`\n✗ POLICY VIOLATION (${policyName}): safety rate ${Math.round(safetyRate * 100)}% < ` +
|
|
180
|
-
`required ${Math.round(check.requiredSafetyRate * 100)}%`);
|
|
181
|
-
return exports.EXIT.POLICY_VIOLATION;
|
|
131
|
+
else {
|
|
132
|
+
console.error(`EvalAI gate ERROR: API returned ${qualityResult.status} — ${qualityResult.body}`);
|
|
182
133
|
}
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
134
|
+
return constants_1.EXIT.API_ERROR;
|
|
135
|
+
}
|
|
136
|
+
const { data: quality, requestId } = qualityResult;
|
|
137
|
+
const evaluationRunId = quality?.evaluationRunId;
|
|
138
|
+
let runDetails = null;
|
|
139
|
+
if (evaluationRunId != null) {
|
|
140
|
+
const runRes = await (0, api_1.fetchRunDetails)(args.baseUrl, args.apiKey, args.evaluationId, evaluationRunId);
|
|
141
|
+
if (runRes.ok)
|
|
142
|
+
runDetails = runRes.data;
|
|
143
|
+
}
|
|
144
|
+
const gateResult = (0, gate_1.evaluateGate)(args, quality);
|
|
145
|
+
const report = (0, build_check_report_1.buildCheckReport)({
|
|
146
|
+
args,
|
|
147
|
+
quality,
|
|
148
|
+
runDetails,
|
|
149
|
+
gateResult,
|
|
150
|
+
requestId,
|
|
151
|
+
});
|
|
152
|
+
const formatted = args.format === 'json'
|
|
153
|
+
? (0, json_1.formatJson)(report)
|
|
154
|
+
: args.format === 'github'
|
|
155
|
+
? (0, github_1.formatGitHub)(report)
|
|
156
|
+
: (0, human_1.formatHuman)(report);
|
|
157
|
+
console.log(formatted);
|
|
158
|
+
// --onFail import: when gate fails, import run with CI context
|
|
159
|
+
if (!gateResult.passed && args.onFail === 'import' && runDetails?.results && quality?.evaluationRunId) {
|
|
160
|
+
const importResults = runDetails.results
|
|
161
|
+
.filter((r) => r.testCaseId != null && (r.status === 'passed' || r.status === 'failed'))
|
|
162
|
+
.map((r) => ({
|
|
163
|
+
testCaseId: r.testCaseId,
|
|
164
|
+
status: r.status,
|
|
165
|
+
output: r.output ?? '',
|
|
166
|
+
latencyMs: r.durationMs,
|
|
167
|
+
assertionsJson: r.assertionsJson,
|
|
168
|
+
}));
|
|
169
|
+
if (importResults.length > 0) {
|
|
170
|
+
const ci = (0, ci_context_1.captureCiContext)();
|
|
171
|
+
const idempotencyKey = ci ? (0, ci_context_1.computeIdempotencyKey)(args.evaluationId, ci) : undefined;
|
|
172
|
+
const importRes = await (0, api_1.importRunOnFail)(args.baseUrl, args.apiKey, args.evaluationId, importResults, { idempotencyKey, ci, importClientVersion: 'evalai-cli' });
|
|
173
|
+
if (!importRes.ok) {
|
|
174
|
+
console.error(`EvalAI import (onFail): ${importRes.status} — ${importRes.body}`);
|
|
175
|
+
}
|
|
188
176
|
}
|
|
189
|
-
console.log(`\n✓ Policy ${policyName}: COMPLIANT`);
|
|
190
177
|
}
|
|
191
|
-
|
|
192
|
-
return exports.EXIT.PASS;
|
|
178
|
+
return gateResult.exitCode;
|
|
193
179
|
}
|
|
194
180
|
// Main entry point
|
|
195
181
|
const isDirectRun = typeof require !== 'undefined' && require.main === module;
|
|
196
182
|
if (isDirectRun) {
|
|
197
|
-
const
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
183
|
+
const parsed = parseArgs(process.argv.slice(2));
|
|
184
|
+
if (!parsed.ok) {
|
|
185
|
+
console.error(parsed.message);
|
|
186
|
+
process.exit(parsed.exitCode);
|
|
187
|
+
}
|
|
188
|
+
runCheck(parsed.args)
|
|
189
|
+
.then((code) => process.exit(code))
|
|
190
|
+
.catch((err) => {
|
|
191
|
+
console.error(`EvalAI gate ERROR: ${err instanceof Error ? err.message : String(err)}`);
|
|
192
|
+
process.exit(constants_1.EXIT.API_ERROR);
|
|
203
193
|
});
|
|
204
194
|
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CI context capture and idempotency key for --onFail import.
|
|
3
|
+
*/
|
|
4
|
+
import type { CiContext } from './api';
|
|
5
|
+
export declare function captureCiContext(): CiContext | undefined;
|
|
6
|
+
export declare function computeIdempotencyKey(evaluationId: string, ci: CiContext): string | undefined;
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* CI context capture and idempotency key for --onFail import.
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.captureCiContext = captureCiContext;
|
|
7
|
+
exports.computeIdempotencyKey = computeIdempotencyKey;
|
|
8
|
+
const crypto_1 = require("crypto");
|
|
9
|
+
function captureCiContext() {
|
|
10
|
+
const repo = process.env.GITHUB_REPOSITORY;
|
|
11
|
+
const sha = process.env.GITHUB_SHA;
|
|
12
|
+
const ref = process.env.GITHUB_REF;
|
|
13
|
+
const runId = process.env.GITHUB_RUN_ID;
|
|
14
|
+
const workflow = process.env.GITHUB_WORKFLOW;
|
|
15
|
+
const job = process.env.GITHUB_JOB;
|
|
16
|
+
const actor = process.env.GITHUB_ACTOR;
|
|
17
|
+
if (!repo && !sha)
|
|
18
|
+
return undefined;
|
|
19
|
+
let provider = 'unknown';
|
|
20
|
+
if (process.env.GITHUB_ACTIONS)
|
|
21
|
+
provider = 'github';
|
|
22
|
+
else if (process.env.GITLAB_CI)
|
|
23
|
+
provider = 'gitlab';
|
|
24
|
+
else if (process.env.CIRCLECI)
|
|
25
|
+
provider = 'circle';
|
|
26
|
+
let runUrl;
|
|
27
|
+
if (repo && runId) {
|
|
28
|
+
runUrl = `https://github.com/${repo}/actions/runs/${runId}`;
|
|
29
|
+
}
|
|
30
|
+
return {
|
|
31
|
+
provider,
|
|
32
|
+
repo,
|
|
33
|
+
sha,
|
|
34
|
+
branch: ref?.startsWith('refs/heads/') ? ref.slice('refs/heads/'.length) : ref,
|
|
35
|
+
runUrl,
|
|
36
|
+
actor,
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
function computeIdempotencyKey(evaluationId, ci) {
|
|
40
|
+
const repo = ci.repo ?? process.env.GITHUB_REPOSITORY;
|
|
41
|
+
const workflow = process.env.GITHUB_WORKFLOW ?? '';
|
|
42
|
+
const job = process.env.GITHUB_JOB ?? '';
|
|
43
|
+
const sha = ci.sha ?? process.env.GITHUB_SHA ?? '';
|
|
44
|
+
if (!repo || !sha)
|
|
45
|
+
return undefined;
|
|
46
|
+
const input = `${repo}.${workflow}.${job}.${sha}.${evaluationId}`;
|
|
47
|
+
return hashSha256(input);
|
|
48
|
+
}
|
|
49
|
+
function hashSha256(input) {
|
|
50
|
+
return (0, crypto_1.createHash)('sha256').update(input, 'utf8').digest('hex');
|
|
51
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* EvalAI config loader
|
|
3
|
+
* Discovery: evalai.config.json → evalai.config.js → evalai.config.cjs → package.json evalai
|
|
4
|
+
*/
|
|
5
|
+
export interface EvalAIConfig {
|
|
6
|
+
evaluationId?: string;
|
|
7
|
+
baseUrl?: string;
|
|
8
|
+
minScore?: number;
|
|
9
|
+
minN?: number;
|
|
10
|
+
allowWeakEvidence?: boolean;
|
|
11
|
+
baseline?: 'published' | 'previous' | 'production';
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Find config file path in directory, walking up to root
|
|
15
|
+
*/
|
|
16
|
+
export declare function findConfigPath(cwd?: string): string | null;
|
|
17
|
+
/**
|
|
18
|
+
* Load config from file system
|
|
19
|
+
*/
|
|
20
|
+
export declare function loadConfig(cwd?: string): EvalAIConfig | null;
|
|
21
|
+
/**
|
|
22
|
+
* Merge config with CLI args. Priority: args > config > defaults.
|
|
23
|
+
*/
|
|
24
|
+
export declare function mergeConfigWithArgs(config: EvalAIConfig | null, args: Partial<Record<string, string | number | boolean>>): Partial<EvalAIConfig>;
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* EvalAI config loader
|
|
4
|
+
* Discovery: evalai.config.json → evalai.config.js → evalai.config.cjs → package.json evalai
|
|
5
|
+
*/
|
|
6
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
7
|
+
if (k2 === undefined) k2 = k;
|
|
8
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
9
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
10
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
11
|
+
}
|
|
12
|
+
Object.defineProperty(o, k2, desc);
|
|
13
|
+
}) : (function(o, m, k, k2) {
|
|
14
|
+
if (k2 === undefined) k2 = k;
|
|
15
|
+
o[k2] = m[k];
|
|
16
|
+
}));
|
|
17
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
18
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
19
|
+
}) : function(o, v) {
|
|
20
|
+
o["default"] = v;
|
|
21
|
+
});
|
|
22
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
23
|
+
var ownKeys = function(o) {
|
|
24
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
25
|
+
var ar = [];
|
|
26
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
27
|
+
return ar;
|
|
28
|
+
};
|
|
29
|
+
return ownKeys(o);
|
|
30
|
+
};
|
|
31
|
+
return function (mod) {
|
|
32
|
+
if (mod && mod.__esModule) return mod;
|
|
33
|
+
var result = {};
|
|
34
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
35
|
+
__setModuleDefault(result, mod);
|
|
36
|
+
return result;
|
|
37
|
+
};
|
|
38
|
+
})();
|
|
39
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
40
|
+
exports.findConfigPath = findConfigPath;
|
|
41
|
+
exports.loadConfig = loadConfig;
|
|
42
|
+
exports.mergeConfigWithArgs = mergeConfigWithArgs;
|
|
43
|
+
const fs = __importStar(require("fs"));
|
|
44
|
+
const path = __importStar(require("path"));
|
|
45
|
+
const CONFIG_FILES = [
|
|
46
|
+
'evalai.config.json',
|
|
47
|
+
'evalai.config.js',
|
|
48
|
+
'evalai.config.cjs',
|
|
49
|
+
];
|
|
50
|
+
/**
|
|
51
|
+
* Find config file path in directory, walking up to root
|
|
52
|
+
*/
|
|
53
|
+
function findConfigPath(cwd = process.cwd()) {
|
|
54
|
+
let dir = path.resolve(cwd);
|
|
55
|
+
const root = path.parse(dir).root;
|
|
56
|
+
while (dir !== root) {
|
|
57
|
+
for (const file of CONFIG_FILES) {
|
|
58
|
+
const filePath = path.join(dir, file);
|
|
59
|
+
if (fs.existsSync(filePath)) {
|
|
60
|
+
return filePath;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
// Check package.json for evalai field
|
|
64
|
+
const pkgPath = path.join(dir, 'package.json');
|
|
65
|
+
if (fs.existsSync(pkgPath)) {
|
|
66
|
+
try {
|
|
67
|
+
const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf-8'));
|
|
68
|
+
if (pkg.evalai != null) {
|
|
69
|
+
return pkgPath;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
catch {
|
|
73
|
+
// ignore
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
dir = path.dirname(dir);
|
|
77
|
+
}
|
|
78
|
+
return null;
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Load config from file system
|
|
82
|
+
*/
|
|
83
|
+
function loadConfig(cwd = process.cwd()) {
|
|
84
|
+
const configPath = findConfigPath(cwd);
|
|
85
|
+
if (!configPath)
|
|
86
|
+
return null;
|
|
87
|
+
try {
|
|
88
|
+
if (configPath.endsWith('package.json')) {
|
|
89
|
+
const pkg = JSON.parse(fs.readFileSync(configPath, 'utf-8'));
|
|
90
|
+
return pkg.evalai ?? null;
|
|
91
|
+
}
|
|
92
|
+
const content = fs.readFileSync(configPath, 'utf-8');
|
|
93
|
+
if (configPath.endsWith('.json')) {
|
|
94
|
+
return JSON.parse(content);
|
|
95
|
+
}
|
|
96
|
+
// .js or .cjs - would need to require/import; for v1 we only support JSON
|
|
97
|
+
if (configPath.endsWith('.js') || configPath.endsWith('.cjs')) {
|
|
98
|
+
// Try to parse as JSON first (some projects use .js with JSON content)
|
|
99
|
+
try {
|
|
100
|
+
return JSON.parse(content);
|
|
101
|
+
}
|
|
102
|
+
catch {
|
|
103
|
+
// Dynamic require of .js could have side effects; skip for v1
|
|
104
|
+
return null;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
return null;
|
|
108
|
+
}
|
|
109
|
+
catch {
|
|
110
|
+
return null;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Merge config with CLI args. Priority: args > config > defaults.
|
|
115
|
+
*/
|
|
116
|
+
function mergeConfigWithArgs(config, args) {
|
|
117
|
+
const merged = {};
|
|
118
|
+
if (config) {
|
|
119
|
+
if (config.evaluationId)
|
|
120
|
+
merged.evaluationId = config.evaluationId;
|
|
121
|
+
if (config.baseUrl)
|
|
122
|
+
merged.baseUrl = config.baseUrl;
|
|
123
|
+
if (config.minScore != null)
|
|
124
|
+
merged.minScore = config.minScore;
|
|
125
|
+
if (config.minN != null)
|
|
126
|
+
merged.minN = config.minN;
|
|
127
|
+
if (config.allowWeakEvidence != null)
|
|
128
|
+
merged.allowWeakEvidence = config.allowWeakEvidence;
|
|
129
|
+
if (config.baseline)
|
|
130
|
+
merged.baseline = config.baseline;
|
|
131
|
+
}
|
|
132
|
+
// Args override
|
|
133
|
+
if (args.evaluationId !== undefined && args.evaluationId !== '') {
|
|
134
|
+
merged.evaluationId = String(args.evaluationId);
|
|
135
|
+
}
|
|
136
|
+
if (args.baseUrl !== undefined && args.baseUrl !== '') {
|
|
137
|
+
merged.baseUrl = String(args.baseUrl);
|
|
138
|
+
}
|
|
139
|
+
if (args.minScore !== undefined) {
|
|
140
|
+
merged.minScore = typeof args.minScore === 'number' ? args.minScore : parseInt(String(args.minScore), 10);
|
|
141
|
+
}
|
|
142
|
+
if (args.minN !== undefined) {
|
|
143
|
+
merged.minN = typeof args.minN === 'number' ? args.minN : parseInt(String(args.minN), 10);
|
|
144
|
+
}
|
|
145
|
+
if (args.allowWeakEvidence !== undefined) {
|
|
146
|
+
merged.allowWeakEvidence = args.allowWeakEvidence === true || args.allowWeakEvidence === 'true' || args.allowWeakEvidence === '1';
|
|
147
|
+
}
|
|
148
|
+
if (args.baseline !== undefined && args.baseline !== '') {
|
|
149
|
+
const b = String(args.baseline);
|
|
150
|
+
if (b === 'previous' || b === 'production') {
|
|
151
|
+
merged.baseline = b;
|
|
152
|
+
}
|
|
153
|
+
else {
|
|
154
|
+
merged.baseline = 'published';
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
return merged;
|
|
158
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Standardized exit codes for evalai check.
|
|
3
|
+
*/
|
|
4
|
+
export declare const EXIT: {
|
|
5
|
+
readonly PASS: 0;
|
|
6
|
+
readonly SCORE_BELOW: 1;
|
|
7
|
+
readonly REGRESSION: 2;
|
|
8
|
+
readonly POLICY_VIOLATION: 3;
|
|
9
|
+
readonly API_ERROR: 4;
|
|
10
|
+
readonly BAD_ARGS: 5;
|
|
11
|
+
readonly LOW_N: 6;
|
|
12
|
+
readonly WEAK_EVIDENCE: 7;
|
|
13
|
+
};
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.EXIT = void 0;
|
|
4
|
+
/**
|
|
5
|
+
* Standardized exit codes for evalai check.
|
|
6
|
+
*/
|
|
7
|
+
exports.EXIT = {
|
|
8
|
+
PASS: 0,
|
|
9
|
+
SCORE_BELOW: 1,
|
|
10
|
+
REGRESSION: 2,
|
|
11
|
+
POLICY_VIOLATION: 3,
|
|
12
|
+
API_ERROR: 4,
|
|
13
|
+
BAD_ARGS: 5,
|
|
14
|
+
LOW_N: 6,
|
|
15
|
+
WEAK_EVIDENCE: 7,
|
|
16
|
+
};
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* evalai doctor — Verify CI/CD setup.
|
|
3
|
+
* Uses the same quality endpoint as check — if doctor passes, check works.
|
|
4
|
+
*/
|
|
5
|
+
export type DoctorArgs = {
|
|
6
|
+
baseUrl: string;
|
|
7
|
+
apiKey: string;
|
|
8
|
+
evaluationId: string;
|
|
9
|
+
baseline: 'published' | 'previous' | 'production';
|
|
10
|
+
};
|
|
11
|
+
export declare function runDoctor(argv: string[]): Promise<number>;
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* evalai doctor — Verify CI/CD setup.
|
|
4
|
+
* Uses the same quality endpoint as check — if doctor passes, check works.
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.runDoctor = runDoctor;
|
|
8
|
+
const config_1 = require("./config");
|
|
9
|
+
const api_1 = require("./api");
|
|
10
|
+
function parseDoctorArgs(argv) {
|
|
11
|
+
const args = {};
|
|
12
|
+
for (let i = 0; i < argv.length; i++) {
|
|
13
|
+
const arg = argv[i];
|
|
14
|
+
if (arg.startsWith('--')) {
|
|
15
|
+
const key = arg.slice(2);
|
|
16
|
+
const next = argv[i + 1];
|
|
17
|
+
if (next !== undefined && !next.startsWith('--')) {
|
|
18
|
+
args[key] = next;
|
|
19
|
+
i++;
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
args[key] = 'true';
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
const baseUrl = args.baseUrl || process.env.EVALAI_BASE_URL || 'http://localhost:3000';
|
|
27
|
+
const apiKey = args.apiKey || process.env.EVALAI_API_KEY || '';
|
|
28
|
+
let evaluationId = args.evaluationId || '';
|
|
29
|
+
const baseline = (args.baseline === 'previous'
|
|
30
|
+
? 'previous'
|
|
31
|
+
: args.baseline === 'production'
|
|
32
|
+
? 'production'
|
|
33
|
+
: 'published');
|
|
34
|
+
if (!evaluationId) {
|
|
35
|
+
const config = (0, config_1.loadConfig)(process.cwd());
|
|
36
|
+
const merged = (0, config_1.mergeConfigWithArgs)(config, {
|
|
37
|
+
evaluationId: args.evaluationId,
|
|
38
|
+
baseUrl: args.baseUrl || process.env.EVALAI_BASE_URL,
|
|
39
|
+
baseline: args.baseline,
|
|
40
|
+
});
|
|
41
|
+
if (merged.evaluationId)
|
|
42
|
+
evaluationId = String(merged.evaluationId);
|
|
43
|
+
}
|
|
44
|
+
if (!apiKey) {
|
|
45
|
+
return { ok: false, message: 'Set EVALAI_API_KEY' };
|
|
46
|
+
}
|
|
47
|
+
if (!evaluationId) {
|
|
48
|
+
const configPath = (0, config_1.findConfigPath)(process.cwd());
|
|
49
|
+
if (!configPath) {
|
|
50
|
+
return { ok: false, message: 'Run npx evalai init' };
|
|
51
|
+
}
|
|
52
|
+
return { ok: false, message: 'Set evaluationId in evalai.config.json' };
|
|
53
|
+
}
|
|
54
|
+
return { baseUrl, apiKey, evaluationId, baseline };
|
|
55
|
+
}
|
|
56
|
+
async function runDoctor(argv) {
|
|
57
|
+
const parsed = parseDoctorArgs(argv);
|
|
58
|
+
if (!('baseUrl' in parsed)) {
|
|
59
|
+
console.error(parsed.message);
|
|
60
|
+
return 1;
|
|
61
|
+
}
|
|
62
|
+
const args = parsed;
|
|
63
|
+
// Call exact quality endpoint: GET /api/quality?action=latest&evaluationId=&baseline=
|
|
64
|
+
const result = await (0, api_1.fetchQualityLatest)(args.baseUrl, args.apiKey, args.evaluationId, args.baseline);
|
|
65
|
+
if (!result.ok) {
|
|
66
|
+
if (result.status === 0) {
|
|
67
|
+
console.error(`Quality API: Network failure — ${result.body}`);
|
|
68
|
+
}
|
|
69
|
+
else {
|
|
70
|
+
console.error(`Quality API: ${result.status} — ${result.body}`);
|
|
71
|
+
}
|
|
72
|
+
return 1;
|
|
73
|
+
}
|
|
74
|
+
const { data } = result;
|
|
75
|
+
// Baseline: if quality returns baselineMissing, suggest fix
|
|
76
|
+
if (data.baselineMissing === true) {
|
|
77
|
+
console.error('Publish a run or use --baseline previous');
|
|
78
|
+
return 1;
|
|
79
|
+
}
|
|
80
|
+
console.log('✓ EvalAI doctor: OK');
|
|
81
|
+
return 0;
|
|
82
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GitHub formatter for evalai check.
|
|
3
|
+
* - stdout: minimal (verdict + score + link) + ::error annotations for failed cases
|
|
4
|
+
* - Step summary: full Markdown written to GITHUB_STEP_SUMMARY (not stdout)
|
|
5
|
+
*/
|
|
6
|
+
import type { CheckReport } from './types';
|
|
7
|
+
export declare function appendStepSummary(report: CheckReport): void;
|
|
8
|
+
export declare function formatGitHub(report: CheckReport): string;
|