@req2rank/core 0.1.0-r7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/adaptive-calibration.d.ts +13 -0
- package/dist/adaptive-calibration.d.ts.map +1 -0
- package/dist/adaptive-calibration.js +39 -0
- package/dist/adaptive-calibration.js.map +1 -0
- package/dist/adaptive-calibration.test.d.ts +2 -0
- package/dist/adaptive-calibration.test.d.ts.map +1 -0
- package/dist/adaptive-calibration.test.js +20 -0
- package/dist/adaptive-calibration.test.js.map +1 -0
- package/dist/checkpoint-key.d.ts +3 -0
- package/dist/checkpoint-key.d.ts.map +1 -0
- package/dist/checkpoint-key.js +29 -0
- package/dist/checkpoint-key.js.map +1 -0
- package/dist/checkpoint-key.test.d.ts +2 -0
- package/dist/checkpoint-key.test.d.ts.map +1 -0
- package/dist/checkpoint-key.test.js +32 -0
- package/dist/checkpoint-key.test.js.map +1 -0
- package/dist/config.d.ts +205 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +85 -0
- package/dist/config.js.map +1 -0
- package/dist/domain-taxonomy.d.ts +13 -0
- package/dist/domain-taxonomy.d.ts.map +1 -0
- package/dist/domain-taxonomy.js +12 -0
- package/dist/domain-taxonomy.js.map +1 -0
- package/dist/evaluation-panel.d.ts +27 -0
- package/dist/evaluation-panel.d.ts.map +1 -0
- package/dist/evaluation-panel.js +158 -0
- package/dist/evaluation-panel.js.map +1 -0
- package/dist/evaluation-panel.test.d.ts +2 -0
- package/dist/evaluation-panel.test.d.ts.map +1 -0
- package/dist/evaluation-panel.test.js +185 -0
- package/dist/evaluation-panel.test.js.map +1 -0
- package/dist/evidence-chain.d.ts +11 -0
- package/dist/evidence-chain.d.ts.map +1 -0
- package/dist/evidence-chain.js +33 -0
- package/dist/evidence-chain.js.map +1 -0
- package/dist/evidence-chain.test.d.ts +2 -0
- package/dist/evidence-chain.test.d.ts.map +1 -0
- package/dist/evidence-chain.test.js +16 -0
- package/dist/evidence-chain.test.js.map +1 -0
- package/dist/execution-engine.d.ts +29 -0
- package/dist/execution-engine.d.ts.map +1 -0
- package/dist/execution-engine.js +102 -0
- package/dist/execution-engine.js.map +1 -0
- package/dist/execution-engine.test.d.ts +2 -0
- package/dist/execution-engine.test.d.ts.map +1 -0
- package/dist/execution-engine.test.js +86 -0
- package/dist/execution-engine.test.js.map +1 -0
- package/dist/hub-client.d.ts +21 -0
- package/dist/hub-client.d.ts.map +1 -0
- package/dist/hub-client.js +99 -0
- package/dist/hub-client.js.map +1 -0
- package/dist/hub-client.test.d.ts +2 -0
- package/dist/hub-client.test.d.ts.map +1 -0
- package/dist/hub-client.test.js +129 -0
- package/dist/hub-client.test.js.map +1 -0
- package/dist/index.d.ts +19 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +19 -0
- package/dist/index.js.map +1 -0
- package/dist/leaderboard-query.d.ts +29 -0
- package/dist/leaderboard-query.d.ts.map +1 -0
- package/dist/leaderboard-query.js +59 -0
- package/dist/leaderboard-query.js.map +1 -0
- package/dist/leaderboard-query.test.d.ts +2 -0
- package/dist/leaderboard-query.test.d.ts.map +1 -0
- package/dist/leaderboard-query.test.js +34 -0
- package/dist/leaderboard-query.test.js.map +1 -0
- package/dist/local-store.d.ts +18 -0
- package/dist/local-store.d.ts.map +1 -0
- package/dist/local-store.js +181 -0
- package/dist/local-store.js.map +1 -0
- package/dist/local-store.test.d.ts +2 -0
- package/dist/local-store.test.d.ts.map +1 -0
- package/dist/local-store.test.js +153 -0
- package/dist/local-store.test.js.map +1 -0
- package/dist/pipeline-stage-handoff.test.d.ts +2 -0
- package/dist/pipeline-stage-handoff.test.d.ts.map +1 -0
- package/dist/pipeline-stage-handoff.test.js +290 -0
- package/dist/pipeline-stage-handoff.test.js.map +1 -0
- package/dist/pipeline.d.ts +67 -0
- package/dist/pipeline.d.ts.map +1 -0
- package/dist/pipeline.js +493 -0
- package/dist/pipeline.js.map +1 -0
- package/dist/providers/anthropic.d.ts +8 -0
- package/dist/providers/anthropic.d.ts.map +1 -0
- package/dist/providers/anthropic.js +45 -0
- package/dist/providers/anthropic.js.map +1 -0
- package/dist/providers/base.d.ts +36 -0
- package/dist/providers/base.d.ts.map +1 -0
- package/dist/providers/base.js +47 -0
- package/dist/providers/base.js.map +1 -0
- package/dist/providers/custom.d.ts +6 -0
- package/dist/providers/custom.d.ts.map +1 -0
- package/dist/providers/custom.js +6 -0
- package/dist/providers/custom.js.map +1 -0
- package/dist/providers/google.d.ts +8 -0
- package/dist/providers/google.d.ts.map +1 -0
- package/dist/providers/google.js +48 -0
- package/dist/providers/google.js.map +1 -0
- package/dist/providers/index.d.ts +31 -0
- package/dist/providers/index.d.ts.map +1 -0
- package/dist/providers/index.js +63 -0
- package/dist/providers/index.js.map +1 -0
- package/dist/providers/openai.d.ts +18 -0
- package/dist/providers/openai.d.ts.map +1 -0
- package/dist/providers/openai.js +111 -0
- package/dist/providers/openai.js.map +1 -0
- package/dist/providers/providers.test.d.ts +2 -0
- package/dist/providers/providers.test.d.ts.map +1 -0
- package/dist/providers/providers.test.js +71 -0
- package/dist/providers/providers.test.js.map +1 -0
- package/dist/requirement-generator.d.ts +29 -0
- package/dist/requirement-generator.d.ts.map +1 -0
- package/dist/requirement-generator.js +358 -0
- package/dist/requirement-generator.js.map +1 -0
- package/dist/requirement-generator.test.d.ts +2 -0
- package/dist/requirement-generator.test.d.ts.map +1 -0
- package/dist/requirement-generator.test.js +182 -0
- package/dist/requirement-generator.test.js.map +1 -0
- package/dist/sandbox.d.ts +32 -0
- package/dist/sandbox.d.ts.map +1 -0
- package/dist/sandbox.js +124 -0
- package/dist/sandbox.js.map +1 -0
- package/dist/sandbox.test.d.ts +2 -0
- package/dist/sandbox.test.d.ts.map +1 -0
- package/dist/sandbox.test.js +20 -0
- package/dist/sandbox.test.js.map +1 -0
- package/dist/scoring-engine.d.ts +15 -0
- package/dist/scoring-engine.d.ts.map +1 -0
- package/dist/scoring-engine.js +109 -0
- package/dist/scoring-engine.js.map +1 -0
- package/dist/scoring-engine.test.d.ts +2 -0
- package/dist/scoring-engine.test.d.ts.map +1 -0
- package/dist/scoring-engine.test.js +137 -0
- package/dist/scoring-engine.test.js.map +1 -0
- package/dist/submit-payload-builder.d.ts +9 -0
- package/dist/submit-payload-builder.d.ts.map +1 -0
- package/dist/submit-payload-builder.js +23 -0
- package/dist/submit-payload-builder.js.map +1 -0
- package/dist/submit-payload-builder.test.d.ts +2 -0
- package/dist/submit-payload-builder.test.d.ts.map +1 -0
- package/dist/submit-payload-builder.test.js +75 -0
- package/dist/submit-payload-builder.test.js.map +1 -0
- package/dist/submitter-types.d.ts +54 -0
- package/dist/submitter-types.d.ts.map +1 -0
- package/dist/submitter-types.js +2 -0
- package/dist/submitter-types.js.map +1 -0
- package/dist/types.d.ts +40 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +36 -0
package/dist/sandbox.js
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import { execFile } from "node:child_process";
|
|
2
|
+
import { mkdtemp, rm, writeFile } from "node:fs/promises";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import { promisify } from "node:util";
|
|
6
|
+
const execFileAsync = promisify(execFile);
|
|
7
|
+
export function buildDockerSandboxCommand(options = {}) {
|
|
8
|
+
const image = options.image ?? "node:20-alpine";
|
|
9
|
+
const workdir = options.workdir ?? "/workspace";
|
|
10
|
+
const workspacePath = options.workspacePath ?? process.cwd();
|
|
11
|
+
const command = options.command ?? ["pnpm", "test"];
|
|
12
|
+
const cpus = options.cpus ?? 1;
|
|
13
|
+
const memoryMb = options.memoryMb ?? 512;
|
|
14
|
+
const pidsLimit = options.pidsLimit ?? 128;
|
|
15
|
+
const network = options.network ?? "none";
|
|
16
|
+
const readOnly = options.readOnly ?? true;
|
|
17
|
+
const volumeMapping = readOnly ? `${workspacePath}:${workdir}:ro` : `${workspacePath}:${workdir}`;
|
|
18
|
+
const args = [
|
|
19
|
+
"run",
|
|
20
|
+
"--rm",
|
|
21
|
+
...(readOnly ? ["--read-only"] : []),
|
|
22
|
+
"--cpus",
|
|
23
|
+
String(cpus),
|
|
24
|
+
"--memory",
|
|
25
|
+
`${memoryMb}m`,
|
|
26
|
+
"--pids-limit",
|
|
27
|
+
String(pidsLimit),
|
|
28
|
+
"--network",
|
|
29
|
+
network,
|
|
30
|
+
"-w",
|
|
31
|
+
workdir,
|
|
32
|
+
"-v",
|
|
33
|
+
volumeMapping,
|
|
34
|
+
"--tmpfs",
|
|
35
|
+
"/tmp:rw,nosuid,nodev,size=64m",
|
|
36
|
+
image,
|
|
37
|
+
...command
|
|
38
|
+
];
|
|
39
|
+
return args;
|
|
40
|
+
}
|
|
41
|
+
export async function runSandboxedCommand(options = {}) {
|
|
42
|
+
const args = buildDockerSandboxCommand(options);
|
|
43
|
+
return execFileAsync("docker", args, {
|
|
44
|
+
encoding: "utf-8",
|
|
45
|
+
timeout: options.timeoutMs ?? 60_000,
|
|
46
|
+
killSignal: "SIGKILL"
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
function extensionForLanguage(language) {
|
|
50
|
+
const normalized = language.toLowerCase();
|
|
51
|
+
if (normalized.includes("typescript") || normalized === "ts") {
|
|
52
|
+
return "ts";
|
|
53
|
+
}
|
|
54
|
+
if (normalized.includes("javascript") || normalized === "js") {
|
|
55
|
+
return "js";
|
|
56
|
+
}
|
|
57
|
+
return "txt";
|
|
58
|
+
}
|
|
59
|
+
function buildValidationCommand(fileName, extension) {
|
|
60
|
+
if (extension === "js") {
|
|
61
|
+
return ["node", "--check", fileName];
|
|
62
|
+
}
|
|
63
|
+
if (extension === "ts") {
|
|
64
|
+
return [
|
|
65
|
+
"node",
|
|
66
|
+
"-e",
|
|
67
|
+
[
|
|
68
|
+
"const fs = require('node:fs');",
|
|
69
|
+
`const src = fs.readFileSync('${fileName}', 'utf8');`,
|
|
70
|
+
"if (!src || src.trim().length === 0) throw new Error('empty submission');",
|
|
71
|
+
"if (!/export|function|const|class/.test(src)) throw new Error('submission lacks executable constructs');"
|
|
72
|
+
].join(" ")
|
|
73
|
+
];
|
|
74
|
+
}
|
|
75
|
+
return [
|
|
76
|
+
"node",
|
|
77
|
+
"-e",
|
|
78
|
+
[
|
|
79
|
+
"const fs = require('node:fs');",
|
|
80
|
+
`const src = fs.readFileSync('${fileName}', 'utf8');`,
|
|
81
|
+
"if (!src || src.trim().length === 0) throw new Error('empty submission');"
|
|
82
|
+
].join(" ")
|
|
83
|
+
];
|
|
84
|
+
}
|
|
85
|
+
export async function runSandboxedSubmission(input) {
|
|
86
|
+
const workspacePath = await mkdtemp(join(tmpdir(), "req2rank-sandbox-"));
|
|
87
|
+
const extension = extensionForLanguage(input.language);
|
|
88
|
+
const fileName = `submission.${extension}`;
|
|
89
|
+
const filePath = join(workspacePath, fileName);
|
|
90
|
+
try {
|
|
91
|
+
await writeFile(filePath, input.code, "utf8");
|
|
92
|
+
const command = buildValidationCommand(fileName, extension);
|
|
93
|
+
const result = await runSandboxedCommand({
|
|
94
|
+
image: input.image,
|
|
95
|
+
timeoutMs: input.timeoutMs,
|
|
96
|
+
workspacePath,
|
|
97
|
+
workdir: "/workspace",
|
|
98
|
+
command,
|
|
99
|
+
network: "none",
|
|
100
|
+
readOnly: true
|
|
101
|
+
});
|
|
102
|
+
return {
|
|
103
|
+
passed: true,
|
|
104
|
+
command,
|
|
105
|
+
workspacePath,
|
|
106
|
+
stdout: result.stdout,
|
|
107
|
+
stderr: result.stderr
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
catch (error) {
|
|
111
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
112
|
+
return {
|
|
113
|
+
passed: false,
|
|
114
|
+
command: buildValidationCommand(fileName, extension),
|
|
115
|
+
workspacePath,
|
|
116
|
+
stdout: "",
|
|
117
|
+
stderr: message
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
finally {
|
|
121
|
+
await rm(workspacePath, { recursive: true, force: true });
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
//# sourceMappingURL=sandbox.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sandbox.js","sourceRoot":"","sources":["../src/sandbox.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC1D,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,MAAM,aAAa,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC;AAe1C,MAAM,UAAU,yBAAyB,CAAC,UAA0B,EAAE;IACpE,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,gBAAgB,CAAC;IAChD,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,YAAY,CAAC;IAChD,MAAM,aAAa,GAAG,OAAO,CAAC,aAAa,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;IAC7D,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACpD,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,CAAC,CAAC;IAC/B,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,GAAG,CAAC;IACzC,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,GAAG,CAAC;IAC3C,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,MAAM,CAAC;IAC1C,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,IAAI,CAAC;IAC1C,MAAM,aAAa,GAAG,QAAQ,CAAC,CAAC,CAAC,GAAG,aAAa,IAAI,OAAO,KAAK,CAAC,CAAC,CAAC,GAAG,aAAa,IAAI,OAAO,EAAE,CAAC;IAElG,MAAM,IAAI,GAAG;QACX,KAAK;QACL,MAAM;QACN,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACpC,QAAQ;QACR,MAAM,CAAC,IAAI,CAAC;QACZ,UAAU;QACV,GAAG,QAAQ,GAAG;QACd,cAAc;QACd,MAAM,CAAC,SAAS,CAAC;QACjB,WAAW;QACX,OAAO;QACP,IAAI;QACJ,OAAO;QACP,IAAI;QACJ,aAAa;QACb,SAAS;QACT,+BAA+B;QAC/B,KAAK;QACL,GAAG,OAAO;KACX,CAAC;IAEF,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,UAA0B,EAAE;IACpE,MAAM,IAAI,GAAG,yBAAyB,CAAC,OAAO,CAAC,CAAC;IAChD,OAAO,aAAa,CAAC,QAAQ,EAAE,IAAI,EAAE;QACnC,QAAQ,EAAE,OAAO;QACjB,OAAO,EAAE,OAAO,CAAC,SAAS,IAAI,MAAM;QACpC,UAAU,EAAE,SAAS;KACtB,CAAC,CAAC;AACL,CAAC;AAiBD,SAAS,oBAAoB,CAAC,QAAgB;IAC5C,MAAM,UAAU,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC;IAC1C,IAAI,UAAU,CAAC,QAAQ,CAAC,YAAY,CAAC,IAAI,UAAU,KAAK,IAAI,EAAE,CAAC;QAC7D,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,UAAU,CAAC,QAAQ,CAAC,YAAY,CAAC,IAAI,UAAU,KAAK,IAAI,EAAE,CAAC;QAC7D,OAAO,IAAI,CAAC;IACd,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,sBAAsB,CAAC,QAAgB,EAAE,SAAiB;IACjE,IAAI,SAAS,KAAK,IAAI,EAAE,CAAC;QACvB,OAAO,CAAC,MAAM,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;IACvC,CAAC;IAED,IAAI,SAAS,KAAK,IAAI,EAAE,CAAC;QACvB,OAAO;YACL,MAAM;YACN,IAAI;YACJ;gBACE,gCAAgC;gBAChC,gCAAgC,QAAQ,aAAa;gBACrD,2EAA2E;gBAC3E,0GAA0G;aAC3G,CAAC,IAAI,CAAC,GAAG,CAAC;SACZ,CAAC;IACJ,CAAC;IAED,OAAO;QACL,MAAM;QACN,IAAI;QACJ;YACE,gCAAgC;YAChC,gCAAgC,QAAQ,aAAa;YACrD,2EAA2E;SAC5E,CAAC,IAAI,CAAC,GAAG,CAAC;KACZ,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,sBAAsB,CAAC,KAA+B;IAC1E,MAAM,aAAa,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,mBAAmB,CAAC,CAAC,CAAC;IACzE,MAAM,SAAS,GAAG,oBAAoB,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IACvD,MAAM,QAAQ,GAAG,cAAc,SAAS,EAAE,CAAC;IAC3C,MAAM,QAAQ,GAAG,IAAI,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;IAE/C,IAAI,CAAC;QACH,MAAM,SAAS,CAAC,QAAQ,EAAE,KAAK,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;QAE9C,MAAM,OAAO,GAAG,sBAAsB,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;QAC5D,MAAM,MAAM,GAAG,MAAM,mBAAmB,CAAC;YACvC,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,SAAS,EAAE,KAAK,CAAC,SAAS;YAC1B,aAAa;YACb,OAAO,EAAE,YAAY;YACrB,OAAO;YACP,OAAO,EAAE,MAAM;YACf,QAAQ,EAAE,IAAI;SACf,CAAC,CAAC;QAEH,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,OAAO;YACP,aAAa;YACb,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,MAAM,EAAE,MAAM,CAAC,MAAM;SACtB,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACvE,OAAO;YACL,MAAM,EAAE,KAAK;YACb,OAAO,EAAE,sBAAsB,CAAC,QAAQ,EAAE,SAAS,CAAC;YACpD,aAAa;YACb,MAAM,EAAE,EAAE;YACV,MAAM,EAAE,OAAO;SAChB,CAAC;IACJ,CAAC;YAAS,CAAC;QACT,MAAM,EAAE,CAAC,aAAa,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IAC5D,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sandbox.test.d.ts","sourceRoot":"","sources":["../src/sandbox.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { buildDockerSandboxCommand } from "./sandbox.js";
|
|
3
|
+
describe("buildDockerSandboxCommand", () => {
|
|
4
|
+
it("builds default docker invocation", () => {
|
|
5
|
+
const args = buildDockerSandboxCommand();
|
|
6
|
+
expect(args.slice(0, 6)).toEqual(["run", "--rm", "--read-only", "--cpus", "1", "--memory"]);
|
|
7
|
+
expect(args).toContain("--network");
|
|
8
|
+
expect(args).toContain("none");
|
|
9
|
+
expect(args).toContain("node:20-alpine");
|
|
10
|
+
expect(args).toContain("pnpm");
|
|
11
|
+
expect(args).toContain("test");
|
|
12
|
+
});
|
|
13
|
+
it("uses explicit workspacePath when provided", () => {
|
|
14
|
+
const args = buildDockerSandboxCommand({ workspacePath: "/tmp/demo-workspace", command: ["node", "--version"] });
|
|
15
|
+
expect(args).toContain("/tmp/demo-workspace:/workspace:ro");
|
|
16
|
+
expect(args).toContain("node");
|
|
17
|
+
expect(args).toContain("--version");
|
|
18
|
+
});
|
|
19
|
+
});
|
|
20
|
+
//# sourceMappingURL=sandbox.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sandbox.test.js","sourceRoot":"","sources":["../src/sandbox.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,yBAAyB,EAAE,MAAM,cAAc,CAAC;AAEzD,QAAQ,CAAC,2BAA2B,EAAE,GAAG,EAAE;IACzC,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;QAC1C,MAAM,IAAI,GAAG,yBAAyB,EAAE,CAAC;QACzC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,EAAE,UAAU,CAAC,CAAC,CAAC;QAC5F,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QAC/B,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,gBAAgB,CAAC,CAAC;QACzC,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QAC/B,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;IACjC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,MAAM,IAAI,GAAG,yBAAyB,CAAC,EAAE,aAAa,EAAE,qBAAqB,EAAE,OAAO,EAAE,CAAC,MAAM,EAAE,WAAW,CAAC,EAAE,CAAC,CAAC;QACjH,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,mCAAmC,CAAC,CAAC;QAC5D,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QAC/B,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IACtC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { DimensionScoreMap, EvaluationResult, ScoreDimension } from "./evaluation-panel.js";
|
|
2
|
+
export type DimensionWeightMap = Record<ScoreDimension, number>;
|
|
3
|
+
export interface ScoreResult {
|
|
4
|
+
overallScore: number;
|
|
5
|
+
dimensionScores: DimensionScoreMap;
|
|
6
|
+
ci95: [number, number];
|
|
7
|
+
agreementLevel: "high" | "moderate" | "low";
|
|
8
|
+
warnings: string[];
|
|
9
|
+
}
|
|
10
|
+
export declare class ScoringEngine {
|
|
11
|
+
private readonly weights;
|
|
12
|
+
constructor(weights?: DimensionWeightMap);
|
|
13
|
+
score(results: EvaluationResult[]): ScoreResult;
|
|
14
|
+
}
|
|
15
|
+
//# sourceMappingURL=scoring-engine.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scoring-engine.d.ts","sourceRoot":"","sources":["../src/scoring-engine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAE5F,MAAM,MAAM,kBAAkB,GAAG,MAAM,CAAC,cAAc,EAAE,MAAM,CAAC,CAAC;AAEhE,MAAM,WAAW,WAAW;IAC1B,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,iBAAiB,CAAC;IACnC,IAAI,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACvB,cAAc,EAAE,MAAM,GAAG,UAAU,GAAG,KAAK,CAAC;IAC5C,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AA4DD,qBAAa,aAAa;IACxB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAqB;gBAEjC,OAAO,GAAE,kBAAoC;IAIzD,KAAK,CAAC,OAAO,EAAE,gBAAgB,EAAE,GAAG,WAAW;CAkEhD"}
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
const DEFAULT_WEIGHTS = {
|
|
2
|
+
functionalCompleteness: 0.3,
|
|
3
|
+
codeQuality: 0.25,
|
|
4
|
+
logicAccuracy: 0.25,
|
|
5
|
+
security: 0.1,
|
|
6
|
+
engineeringPractice: 0.1
|
|
7
|
+
};
|
|
8
|
+
const DIMENSIONS = [
|
|
9
|
+
"functionalCompleteness",
|
|
10
|
+
"codeQuality",
|
|
11
|
+
"logicAccuracy",
|
|
12
|
+
"security",
|
|
13
|
+
"engineeringPractice"
|
|
14
|
+
];
|
|
15
|
+
function roundToOneDecimal(value) {
|
|
16
|
+
return Math.round(value * 10) / 10;
|
|
17
|
+
}
|
|
18
|
+
function calculateStdDev(values) {
|
|
19
|
+
if (values.length <= 1) {
|
|
20
|
+
return 0;
|
|
21
|
+
}
|
|
22
|
+
const mean = values.reduce((sum, value) => sum + value, 0) / values.length;
|
|
23
|
+
const variance = values.reduce((sum, value) => sum + (value - mean) ** 2, 0) / values.length;
|
|
24
|
+
return Math.sqrt(variance);
|
|
25
|
+
}
|
|
26
|
+
function classifyAgreement(stdDev) {
|
|
27
|
+
if (stdDev <= 8) {
|
|
28
|
+
return "high";
|
|
29
|
+
}
|
|
30
|
+
if (stdDev <= 15) {
|
|
31
|
+
return "moderate";
|
|
32
|
+
}
|
|
33
|
+
return "low";
|
|
34
|
+
}
|
|
35
|
+
function average(values) {
|
|
36
|
+
if (values.length === 0) {
|
|
37
|
+
return 0;
|
|
38
|
+
}
|
|
39
|
+
return values.reduce((sum, value) => sum + value, 0) / values.length;
|
|
40
|
+
}
|
|
41
|
+
function trimMinMax(values) {
|
|
42
|
+
if (values.length < 3) {
|
|
43
|
+
return values;
|
|
44
|
+
}
|
|
45
|
+
const sorted = values.slice().sort((left, right) => left - right);
|
|
46
|
+
return sorted.slice(1, -1);
|
|
47
|
+
}
|
|
48
|
+
export class ScoringEngine {
|
|
49
|
+
weights;
|
|
50
|
+
constructor(weights = DEFAULT_WEIGHTS) {
|
|
51
|
+
this.weights = weights;
|
|
52
|
+
}
|
|
53
|
+
score(results) {
|
|
54
|
+
if (results.length === 0) {
|
|
55
|
+
return {
|
|
56
|
+
overallScore: 0,
|
|
57
|
+
dimensionScores: {
|
|
58
|
+
functionalCompleteness: 0,
|
|
59
|
+
codeQuality: 0,
|
|
60
|
+
logicAccuracy: 0,
|
|
61
|
+
security: 0,
|
|
62
|
+
engineeringPractice: 0
|
|
63
|
+
},
|
|
64
|
+
ci95: [0, 0],
|
|
65
|
+
agreementLevel: "low",
|
|
66
|
+
warnings: []
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
const perDimensionStats = DIMENSIONS.map((dimension) => {
|
|
70
|
+
const values = results.map((result) => result.dimensions[dimension]);
|
|
71
|
+
const stdDev = calculateStdDev(values);
|
|
72
|
+
return {
|
|
73
|
+
dimension,
|
|
74
|
+
values,
|
|
75
|
+
stdDev,
|
|
76
|
+
agreement: classifyAgreement(stdDev)
|
|
77
|
+
};
|
|
78
|
+
});
|
|
79
|
+
const overallStdDev = average(perDimensionStats.map((item) => item.stdDev));
|
|
80
|
+
const agreementLevel = classifyAgreement(overallStdDev);
|
|
81
|
+
const enableTrimmedMean = results.length >= 3 && agreementLevel !== "low";
|
|
82
|
+
const warnings = perDimensionStats
|
|
83
|
+
.filter((item) => item.agreement === "low")
|
|
84
|
+
.map((item) => `${item.dimension} dimension has low agreement (sigma=${roundToOneDecimal(item.stdDev)})`);
|
|
85
|
+
const dimensionScores = DIMENSIONS.reduce((accumulator, dimension) => {
|
|
86
|
+
const stat = perDimensionStats.find((item) => item.dimension === dimension);
|
|
87
|
+
const values = stat ? stat.values : [];
|
|
88
|
+
const scoresForMean = enableTrimmedMean ? trimMinMax(values) : values;
|
|
89
|
+
return {
|
|
90
|
+
...accumulator,
|
|
91
|
+
[dimension]: roundToOneDecimal(average(scoresForMean))
|
|
92
|
+
};
|
|
93
|
+
}, {});
|
|
94
|
+
const overallScore = roundToOneDecimal(DIMENSIONS.reduce((sum, dimension) => sum + dimensionScores[dimension] * this.weights[dimension], 0));
|
|
95
|
+
const perJudgeOverallScores = results.map((result) => DIMENSIONS.reduce((sum, dimension) => sum + result.dimensions[dimension] * this.weights[dimension], 0));
|
|
96
|
+
const judgeScoresForCi = enableTrimmedMean ? trimMinMax(perJudgeOverallScores) : perJudgeOverallScores;
|
|
97
|
+
const stdDev = calculateStdDev(judgeScoresForCi);
|
|
98
|
+
const margin = judgeScoresForCi.length > 1 ? 1.96 * (stdDev / Math.sqrt(judgeScoresForCi.length)) : 0;
|
|
99
|
+
const ci95 = [roundToOneDecimal(overallScore - margin), roundToOneDecimal(overallScore + margin)];
|
|
100
|
+
return {
|
|
101
|
+
overallScore,
|
|
102
|
+
dimensionScores,
|
|
103
|
+
ci95,
|
|
104
|
+
agreementLevel,
|
|
105
|
+
warnings
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
//# sourceMappingURL=scoring-engine.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scoring-engine.js","sourceRoot":"","sources":["../src/scoring-engine.ts"],"names":[],"mappings":"AAYA,MAAM,eAAe,GAAuB;IAC1C,sBAAsB,EAAE,GAAG;IAC3B,WAAW,EAAE,IAAI;IACjB,aAAa,EAAE,IAAI;IACnB,QAAQ,EAAE,GAAG;IACb,mBAAmB,EAAE,GAAG;CACzB,CAAC;AAEF,MAAM,UAAU,GAAqB;IACnC,wBAAwB;IACxB,aAAa;IACb,eAAe;IACf,UAAU;IACV,qBAAqB;CACtB,CAAC;AAEF,SAAS,iBAAiB,CAAC,KAAa;IACtC,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,EAAE,CAAC,GAAG,EAAE,CAAC;AACrC,CAAC;AAED,SAAS,eAAe,CAAC,MAAgB;IACvC,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,CAAC;IACX,CAAC;IAED,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,KAAK,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IAC3E,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IAC7F,OAAO,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;AAC7B,CAAC;AAED,SAAS,iBAAiB,CAAC,MAAc;IACvC,IAAI,MAAM,IAAI,CAAC,EAAE,CAAC;QAChB,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,IAAI,MAAM,IAAI,EAAE,EAAE,CAAC;QACjB,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,OAAO,CAAC,MAAgB;IAC/B,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,CAAC,CAAC;IACX,CAAC;IACD,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,KAAK,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;AACvE,CAAC;AAED,SAAS,UAAU,CAAC,MAAgB;IAClC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,IAAI,GAAG,KAAK,CAAC,CAAC;IAClE,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;AAC7B,CAAC;AAED,MAAM,OAAO,aAAa;IACP,OAAO,CAAqB;IAE7C,YAAY,UAA8B,eAAe;QACvD,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;IACzB,CAAC;IAED,KAAK,CAAC,OAA2B;QAC/B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,OAAO;gBACL,YAAY,EAAE,CAAC;gBACf,eAAe,EAAE;oBACf,sBAAsB,EAAE,CAAC;oBACzB,WAAW,EAAE,CAAC;oBACd,aAAa,EAAE,CAAC;oBAChB,QAAQ,EAAE,CAAC;oBACX,mBAAmB,EAAE,CAAC;iBACvB;gBACD,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC;gBACZ,cAAc,EAAE,KAAK;gBACrB,QAAQ,EAAE,EAAE;aACb,CAAC;QACJ,CAAC;QAED,MAAM,iBAAiB,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,EAAE;YACrD,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,CAAC;YACrE,MAAM,MAAM,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC;YACvC,OAAO;gBACL,SAAS;gBACT,MAAM;gBACN,MAAM;gBACN,SAAS,EAAE,iBAAiB,CAAC,MAAM,CAAC;aACrC,CAAC;QACJ,CAAC,CAAC,CAAC;QAEH,MAAM,aAAa,GAAG,OAAO,CAAC,iBAAiB,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;QAC5E,MAAM,cAAc,GAAG,iBAAiB,CAAC,aAAa,CAAC,CAAC;QACxD,MAAM,iBAAiB,GAAG,OAAO,CAAC,MAAM,IAAI,CAAC,IAAI,cAAc,KAAK,KAAK,CAAC;QAE1E,MAAM,QAAQ,GAAG,iBAAiB;aAC/B,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,KAAK,KAAK,CAAC;aAC1C,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,uCAAuC,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QAE5G,MAAM,eAAe,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,SAAS,EAAE,EAAE;YACnE,MAAM,IAAI,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC;YAC5E,MAAM,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;YACvC,MAAM,aAAa,GAAG,iBAAiB,CAAC,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;YACtE,OAAO;gBACL,GAAG,WAAW;gBACd,CAAC,SAAS,CAAC,EAAE,iBAAiB,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;aACvD,CAAC;QACJ,CAAC,EAAE,EAAuB,CAAC,CAAC;QAE5B,MAAM,YAAY,GAAG,iBAAiB,CACpC,UAAU,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,SAAS,EAAE,EAAE,CAAC,GAAG,GAAG,eAAe,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC,CACrG,CAAC;QAEF,MAAM,qBAAqB,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CACnD,UAAU,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,SAAS,EAAE,EAAE,CAAC,GAAG,GAAG,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC,CACvG,CAAC;QACF,MAAM,gBAAgB,GAAG,iBAAiB,CAAC,CAAC,CAAC,UAAU,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC,qBAAqB,CAAC;QACvG,MAAM,MAAM,GAAG,eAAe,CAAC,gBAAgB,CAAC,CAAC;QACjD,MAAM,MAAM,GAAG,gBAAgB,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACtG,MAAM,IAAI,GAAqB,CAAC,iBAAiB,CAAC,YAAY,GAAG,MAAM,CAAC,EAAE,iBAAiB,CAAC,YAAY,GAAG,MAAM,CAAC,CAAC,CAAC;QAEpH,OAAO;YACL,YAAY;YACZ,eAAe;YACf,IAAI;YACJ,cAAc;YACd,QAAQ;SACT,CAAC;IACJ,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scoring-engine.test.d.ts","sourceRoot":"","sources":["../src/scoring-engine.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { ScoringEngine } from "./scoring-engine.js";
|
|
3
|
+
describe("ScoringEngine", () => {
|
|
4
|
+
it("calculates weighted dimension and overall scores", () => {
|
|
5
|
+
const engine = new ScoringEngine({
|
|
6
|
+
functionalCompleteness: 0.3,
|
|
7
|
+
codeQuality: 0.25,
|
|
8
|
+
logicAccuracy: 0.25,
|
|
9
|
+
security: 0.1,
|
|
10
|
+
engineeringPractice: 0.1
|
|
11
|
+
});
|
|
12
|
+
const result = engine.score([
|
|
13
|
+
{
|
|
14
|
+
judgeId: "j1",
|
|
15
|
+
dimensions: {
|
|
16
|
+
functionalCompleteness: 80,
|
|
17
|
+
codeQuality: 70,
|
|
18
|
+
logicAccuracy: 90,
|
|
19
|
+
security: 85,
|
|
20
|
+
engineeringPractice: 75
|
|
21
|
+
}
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
judgeId: "j2",
|
|
25
|
+
dimensions: {
|
|
26
|
+
functionalCompleteness: 60,
|
|
27
|
+
codeQuality: 90,
|
|
28
|
+
logicAccuracy: 70,
|
|
29
|
+
security: 95,
|
|
30
|
+
engineeringPractice: 85
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
]);
|
|
34
|
+
expect(result.dimensionScores.functionalCompleteness).toBe(70);
|
|
35
|
+
expect(result.dimensionScores.codeQuality).toBe(80);
|
|
36
|
+
expect(result.overallScore).toBe(78);
|
|
37
|
+
expect(result.ci95[0]).toBeLessThanOrEqual(result.overallScore);
|
|
38
|
+
expect(result.ci95[1]).toBeGreaterThanOrEqual(result.overallScore);
|
|
39
|
+
expect(result.agreementLevel).toBe("high");
|
|
40
|
+
expect(result.warnings).toEqual([]);
|
|
41
|
+
});
|
|
42
|
+
it("trims min/max judge scores when agreement is moderate or high and judge count >= 3", () => {
|
|
43
|
+
const engine = new ScoringEngine();
|
|
44
|
+
const result = engine.score([
|
|
45
|
+
{
|
|
46
|
+
judgeId: "j1",
|
|
47
|
+
dimensions: {
|
|
48
|
+
functionalCompleteness: 70,
|
|
49
|
+
codeQuality: 70,
|
|
50
|
+
logicAccuracy: 70,
|
|
51
|
+
security: 70,
|
|
52
|
+
engineeringPractice: 70
|
|
53
|
+
}
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
judgeId: "j2",
|
|
57
|
+
dimensions: {
|
|
58
|
+
functionalCompleteness: 80,
|
|
59
|
+
codeQuality: 80,
|
|
60
|
+
logicAccuracy: 80,
|
|
61
|
+
security: 80,
|
|
62
|
+
engineeringPractice: 80
|
|
63
|
+
}
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
judgeId: "j3",
|
|
67
|
+
dimensions: {
|
|
68
|
+
functionalCompleteness: 81,
|
|
69
|
+
codeQuality: 81,
|
|
70
|
+
logicAccuracy: 81,
|
|
71
|
+
security: 81,
|
|
72
|
+
engineeringPractice: 81
|
|
73
|
+
}
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
judgeId: "j4",
|
|
77
|
+
dimensions: {
|
|
78
|
+
functionalCompleteness: 82,
|
|
79
|
+
codeQuality: 82,
|
|
80
|
+
logicAccuracy: 82,
|
|
81
|
+
security: 82,
|
|
82
|
+
engineeringPractice: 82
|
|
83
|
+
}
|
|
84
|
+
},
|
|
85
|
+
{
|
|
86
|
+
judgeId: "j5",
|
|
87
|
+
dimensions: {
|
|
88
|
+
functionalCompleteness: 90,
|
|
89
|
+
codeQuality: 90,
|
|
90
|
+
logicAccuracy: 90,
|
|
91
|
+
security: 90,
|
|
92
|
+
engineeringPractice: 90
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
]);
|
|
96
|
+
expect(result.dimensionScores.functionalCompleteness).toBe(81);
|
|
97
|
+
expect(result.overallScore).toBe(81);
|
|
98
|
+
});
|
|
99
|
+
it("emits warnings for low-agreement dimensions", () => {
|
|
100
|
+
const engine = new ScoringEngine();
|
|
101
|
+
const result = engine.score([
|
|
102
|
+
{
|
|
103
|
+
judgeId: "j1",
|
|
104
|
+
dimensions: {
|
|
105
|
+
functionalCompleteness: 80,
|
|
106
|
+
codeQuality: 80,
|
|
107
|
+
logicAccuracy: 80,
|
|
108
|
+
security: 0,
|
|
109
|
+
engineeringPractice: 80
|
|
110
|
+
}
|
|
111
|
+
},
|
|
112
|
+
{
|
|
113
|
+
judgeId: "j2",
|
|
114
|
+
dimensions: {
|
|
115
|
+
functionalCompleteness: 80,
|
|
116
|
+
codeQuality: 80,
|
|
117
|
+
logicAccuracy: 80,
|
|
118
|
+
security: 50,
|
|
119
|
+
engineeringPractice: 80
|
|
120
|
+
}
|
|
121
|
+
},
|
|
122
|
+
{
|
|
123
|
+
judgeId: "j3",
|
|
124
|
+
dimensions: {
|
|
125
|
+
functionalCompleteness: 80,
|
|
126
|
+
codeQuality: 80,
|
|
127
|
+
logicAccuracy: 80,
|
|
128
|
+
security: 100,
|
|
129
|
+
engineeringPractice: 80
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
]);
|
|
133
|
+
expect(result.warnings.length).toBe(1);
|
|
134
|
+
expect(result.warnings[0]).toContain("security");
|
|
135
|
+
});
|
|
136
|
+
});
|
|
137
|
+
//# sourceMappingURL=scoring-engine.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scoring-engine.test.js","sourceRoot":"","sources":["../src/scoring-engine.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAEpD,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC7B,EAAE,CAAC,kDAAkD,EAAE,GAAG,EAAE;QAC1D,MAAM,MAAM,GAAG,IAAI,aAAa,CAAC;YAC/B,sBAAsB,EAAE,GAAG;YAC3B,WAAW,EAAE,IAAI;YACjB,aAAa,EAAE,IAAI;YACnB,QAAQ,EAAE,GAAG;YACb,mBAAmB,EAAE,GAAG;SACzB,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC;YAC1B;gBACE,OAAO,EAAE,IAAI;gBACb,UAAU,EAAE;oBACV,sBAAsB,EAAE,EAAE;oBAC1B,WAAW,EAAE,EAAE;oBACf,aAAa,EAAE,EAAE;oBACjB,QAAQ,EAAE,EAAE;oBACZ,mBAAmB,EAAE,EAAE;iBACxB;aACF;YACD;gBACE,OAAO,EAAE,IAAI;gBACb,UAAU,EAAE;oBACV,sBAAsB,EAAE,EAAE;oBAC1B,WAAW,EAAE,EAAE;oBACf,aAAa,EAAE,EAAE;oBACjB,QAAQ,EAAE,EAAE;oBACZ,mBAAmB,EAAE,EAAE;iBACxB;aACF;SACF,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,sBAAsB,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC/D,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACpD,MAAM,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,mBAAmB,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;QAChE,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,sBAAsB,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;QACnE,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IACtC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oFAAoF,EAAE,GAAG,EAAE;QAC5F,MAAM,MAAM,GAAG,IAAI,aAAa,EAAE,CAAC;QAEnC,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC;YAC1B;gBACE,OAAO,EAAE,IAAI;gBACb,UAAU,EAAE;oBACV,sBAAsB,EAAE,EAAE;oBAC1B,WAAW,EAAE,EAAE;oBACf,aAAa,EAAE,EAAE;oBACjB,QAAQ,EAAE,EAAE;oBACZ,mBAAmB,EAAE,EAAE;iBACxB;aACF;YACD;gBACE,OAAO,EAAE,IAAI;gBACb,UAAU,EAAE;oBACV,sBAAsB,EAAE,EAAE;oBAC1B,WAAW,EAAE,EAAE;oBACf,aAAa,EAAE,EAAE;oBACjB,QAAQ,EAAE,EAAE;oBACZ,mBAAmB,EAAE,EAAE;iBACxB;aACF;YACD;gBACE,OAAO,EAAE,IAAI;gBACb,UAAU,EAAE;oBACV,sBAAsB,EAAE,EAAE;oBAC1B,WAAW,EAAE,EAAE;oBACf,aAAa,EAAE,EAAE;oBACjB,QAAQ,EAAE,EAAE;oBACZ,mBAAmB,EAAE,EAAE;iBACxB;aACF;YACD;gBACE,OAAO,EAAE,IAAI;gBACb,UAAU,EAAE;oBACV,sBAAsB,EAAE,EAAE;oBAC1B,WAAW,EAAE,EAAE;oBACf,aAAa,EAAE,EAAE;oBACjB,QAAQ,EAAE,EAAE;oBACZ,mBAAmB,EAAE,EAAE;iBACxB;aACF;YACD;gBACE,OAAO,EAAE,IAAI;gBACb,UAAU,EAAE;oBACV,sBAAsB,EAAE,EAAE;oBAC1B,WAAW,EAAE,EAAE;oBACf,aAAa,EAAE,EAAE;oBACjB,QAAQ,EAAE,EAAE;oBACZ,mBAAmB,EAAE,EAAE;iBACxB;aACF;SACF,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,sBAAsB,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC/D,MAAM,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6CAA6C,EAAE,GAAG,EAAE;QACrD,MAAM,MAAM,GAAG,IAAI,aAAa,EAAE,CAAC;QAEnC,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC;YAC1B;gBACE,OAAO,EAAE,IAAI;gBACb,UAAU,EAAE;oBACV,sBAAsB,EAAE,EAAE;oBAC1B,WAAW,EAAE,EAAE;oBACf,aAAa,EAAE,EAAE;oBACjB,QAAQ,EAAE,CAAC;oBACX,mBAAmB,EAAE,EAAE;iBACxB;aACF;YACD;gBACE,OAAO,EAAE,IAAI;gBACb,UAAU,EAAE;oBACV,sBAAsB,EAAE,EAAE;oBAC1B,WAAW,EAAE,EAAE;oBACf,aAAa,EAAE,EAAE;oBACjB,QAAQ,EAAE,EAAE;oBACZ,mBAAmB,EAAE,EAAE;iBACxB;aACF;YACD;gBACE,OAAO,EAAE,IAAI;gBACb,UAAU,EAAE;oBACV,sBAAsB,EAAE,EAAE;oBAC1B,WAAW,EAAE,EAAE;oBACf,aAAa,EAAE,EAAE;oBACjB,QAAQ,EAAE,GAAG;oBACb,mBAAmB,EAAE,EAAE;iBACxB;aACF;SACF,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { RunRecord } from "./config.js";
|
|
2
|
+
import { SubmissionRequest } from "./submitter-types.js";
|
|
3
|
+
export interface BuildSubmissionPayloadInput {
|
|
4
|
+
run: RunRecord;
|
|
5
|
+
nonce: string;
|
|
6
|
+
now?: Date;
|
|
7
|
+
}
|
|
8
|
+
export declare function buildSubmissionPayload(input: BuildSubmissionPayloadInput): SubmissionRequest;
|
|
9
|
+
//# sourceMappingURL=submit-payload-builder.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"submit-payload-builder.d.ts","sourceRoot":"","sources":["../src/submit-payload-builder.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAExC,OAAO,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AAEzD,MAAM,WAAW,2BAA2B;IAC1C,GAAG,EAAE,SAAS,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,IAAI,CAAC;CACZ;AAED,wBAAgB,sBAAsB,CAAC,KAAK,EAAE,2BAA2B,GAAG,iBAAiB,CAqB5F"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { createEvidenceChain } from "./evidence-chain.js";
|
|
2
|
+
export function buildSubmissionPayload(input) {
|
|
3
|
+
const now = input.now ?? new Date();
|
|
4
|
+
const fallbackEvidenceChain = createEvidenceChain({
|
|
5
|
+
requirement: input.run.requirementTitle,
|
|
6
|
+
codeSubmission: "code-unavailable",
|
|
7
|
+
judgeModels: []
|
|
8
|
+
});
|
|
9
|
+
return {
|
|
10
|
+
runId: input.run.id,
|
|
11
|
+
nonce: input.nonce,
|
|
12
|
+
targetProvider: input.run.targetProvider,
|
|
13
|
+
targetModel: input.run.targetModel,
|
|
14
|
+
complexity: input.run.complexity,
|
|
15
|
+
overallScore: input.run.overallScore,
|
|
16
|
+
ci95: input.run.ci95,
|
|
17
|
+
agreementLevel: input.run.agreementLevel,
|
|
18
|
+
dimensionScores: input.run.dimensionScores,
|
|
19
|
+
submittedAt: now.toISOString(),
|
|
20
|
+
evidenceChain: input.run.evidenceChain ?? fallbackEvidenceChain
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
//# sourceMappingURL=submit-payload-builder.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"submit-payload-builder.js","sourceRoot":"","sources":["../src/submit-payload-builder.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAS1D,MAAM,UAAU,sBAAsB,CAAC,KAAkC;IACvE,MAAM,GAAG,GAAG,KAAK,CAAC,GAAG,IAAI,IAAI,IAAI,EAAE,CAAC;IACpC,MAAM,qBAAqB,GAAG,mBAAmB,CAAC;QAChD,WAAW,EAAE,KAAK,CAAC,GAAG,CAAC,gBAAgB;QACvC,cAAc,EAAE,kBAAkB;QAClC,WAAW,EAAE,EAAE;KAChB,CAAC,CAAC;IAEH,OAAO;QACL,KAAK,EAAE,KAAK,CAAC,GAAG,CAAC,EAAE;QACnB,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,cAAc,EAAE,KAAK,CAAC,GAAG,CAAC,cAAc;QACxC,WAAW,EAAE,KAAK,CAAC,GAAG,CAAC,WAAW;QAClC,UAAU,EAAE,KAAK,CAAC,GAAG,CAAC,UAAU;QAChC,YAAY,EAAE,KAAK,CAAC,GAAG,CAAC,YAAY;QACpC,IAAI,EAAE,KAAK,CAAC,GAAG,CAAC,IAAI;QACpB,cAAc,EAAE,KAAK,CAAC,GAAG,CAAC,cAAc;QACxC,eAAe,EAAE,KAAK,CAAC,GAAG,CAAC,eAAe;QAC1C,WAAW,EAAE,GAAG,CAAC,WAAW,EAAE;QAC9B,aAAa,EAAE,KAAK,CAAC,GAAG,CAAC,aAAa,IAAI,qBAAqB;KAChE,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"submit-payload-builder.test.d.ts","sourceRoot":"","sources":["../src/submit-payload-builder.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { buildSubmissionPayload } from "./submit-payload-builder.js";
|
|
3
|
+
describe("buildSubmissionPayload", () => {
|
|
4
|
+
it("builds payload with nonce and evidence chain", () => {
|
|
5
|
+
const payload = buildSubmissionPayload({
|
|
6
|
+
run: {
|
|
7
|
+
id: "run-1",
|
|
8
|
+
createdAt: "2026-01-01T00:00:00.000Z",
|
|
9
|
+
targetProvider: "openai",
|
|
10
|
+
targetModel: "gpt-4o-mini",
|
|
11
|
+
complexity: "C1",
|
|
12
|
+
rounds: 1,
|
|
13
|
+
requirementTitle: "demo requirement",
|
|
14
|
+
overallScore: 88,
|
|
15
|
+
ci95: [86, 90],
|
|
16
|
+
agreementLevel: "high",
|
|
17
|
+
ijaScore: 0.92,
|
|
18
|
+
dimensionScores: {
|
|
19
|
+
functionalCompleteness: 88,
|
|
20
|
+
codeQuality: 88,
|
|
21
|
+
logicAccuracy: 88,
|
|
22
|
+
security: 88,
|
|
23
|
+
engineeringPractice: 88
|
|
24
|
+
},
|
|
25
|
+
evidenceChain: {
|
|
26
|
+
timeline: [
|
|
27
|
+
{
|
|
28
|
+
phase: "generate",
|
|
29
|
+
startedAt: "2026-01-01T00:00:00.000Z",
|
|
30
|
+
completedAt: "2026-01-01T00:00:01.000Z",
|
|
31
|
+
model: "anthropic/claude-sonnet"
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
phase: "execute",
|
|
35
|
+
startedAt: "2026-01-01T00:00:01.000Z",
|
|
36
|
+
completedAt: "2026-01-01T00:00:02.000Z",
|
|
37
|
+
model: "openai/gpt-4o-mini"
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
phase: "evaluate",
|
|
41
|
+
startedAt: "2026-01-01T00:00:02.000Z",
|
|
42
|
+
completedAt: "2026-01-01T00:00:03.000Z",
|
|
43
|
+
model: "openai/gpt-4o"
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
phase: "score",
|
|
47
|
+
startedAt: "2026-01-01T00:00:03.000Z",
|
|
48
|
+
completedAt: "2026-01-01T00:00:04.000Z",
|
|
49
|
+
model: "scoring-engine"
|
|
50
|
+
}
|
|
51
|
+
],
|
|
52
|
+
samples: [
|
|
53
|
+
{
|
|
54
|
+
roundIndex: 0,
|
|
55
|
+
requirement: "Build API",
|
|
56
|
+
codeSubmission: "export const ok = true;"
|
|
57
|
+
}
|
|
58
|
+
],
|
|
59
|
+
environment: {
|
|
60
|
+
os: "win32",
|
|
61
|
+
nodeVersion: "v22",
|
|
62
|
+
timezone: "UTC"
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
},
|
|
66
|
+
nonce: "nonce-1",
|
|
67
|
+
now: new Date("2026-01-01T00:00:02.000Z")
|
|
68
|
+
});
|
|
69
|
+
expect(payload.runId).toBe("run-1");
|
|
70
|
+
expect(payload.nonce).toBe("nonce-1");
|
|
71
|
+
expect(payload.complexity).toBe("C1");
|
|
72
|
+
expect(payload.evidenceChain.samples[0]?.codeSubmission).toBe("export const ok = true;");
|
|
73
|
+
});
|
|
74
|
+
});
|
|
75
|
+
//# sourceMappingURL=submit-payload-builder.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"submit-payload-builder.test.js","sourceRoot":"","sources":["../src/submit-payload-builder.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAErE,QAAQ,CAAC,wBAAwB,EAAE,GAAG,EAAE;IACtC,EAAE,CAAC,8CAA8C,EAAE,GAAG,EAAE;QACtD,MAAM,OAAO,GAAG,sBAAsB,CAAC;YACrC,GAAG,EAAE;gBACH,EAAE,EAAE,OAAO;gBACX,SAAS,EAAE,0BAA0B;gBACrC,cAAc,EAAE,QAAQ;gBACxB,WAAW,EAAE,aAAa;gBAC1B,UAAU,EAAE,IAAI;gBAChB,MAAM,EAAE,CAAC;gBACT,gBAAgB,EAAE,kBAAkB;gBACpC,YAAY,EAAE,EAAE;gBAChB,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC;gBACd,cAAc,EAAE,MAAM;gBACtB,QAAQ,EAAE,IAAI;gBACd,eAAe,EAAE;oBACf,sBAAsB,EAAE,EAAE;oBAC1B,WAAW,EAAE,EAAE;oBACf,aAAa,EAAE,EAAE;oBACjB,QAAQ,EAAE,EAAE;oBACZ,mBAAmB,EAAE,EAAE;iBACxB;gBACD,aAAa,EAAE;oBACb,QAAQ,EAAE;wBACR;4BACE,KAAK,EAAE,UAAU;4BACjB,SAAS,EAAE,0BAA0B;4BACrC,WAAW,EAAE,0BAA0B;4BACvC,KAAK,EAAE,yBAAyB;yBACjC;wBACD;4BACE,KAAK,EAAE,SAAS;4BAChB,SAAS,EAAE,0BAA0B;4BACrC,WAAW,EAAE,0BAA0B;4BACvC,KAAK,EAAE,oBAAoB;yBAC5B;wBACD;4BACE,KAAK,EAAE,UAAU;4BACjB,SAAS,EAAE,0BAA0B;4BACrC,WAAW,EAAE,0BAA0B;4BACvC,KAAK,EAAE,eAAe;yBACvB;wBACD;4BACE,KAAK,EAAE,OAAO;4BACd,SAAS,EAAE,0BAA0B;4BACrC,WAAW,EAAE,0BAA0B;4BACvC,KAAK,EAAE,gBAAgB;yBACxB;qBACF;oBACD,OAAO,EAAE;wBACP;4BACE,UAAU,EAAE,CAAC;4BACb,WAAW,EAAE,WAAW;4BACxB,cAAc,EAAE,yBAAyB;yBAC1C;qBACF;oBACD,WAAW,EAAE;wBACX,EAAE,EAAE,OAAO;wBACX,WAAW,EAAE,KAAK;wBAClB,QAAQ,EAAE,KAAK;qBAChB;iBACF;aACF;YACD,KAAK,EAAE,SAAS;YAChB,GAAG,EAAE,IAAI,IAAI,CAAC,0BAA0B,CAAC;SAC1C,CAAC,CAAC;QAEH,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACpC,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACtC,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACtC,MAAM,CAAC,OAAO,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,cAAc,CAAC,CAAC,IAAI,CAAC,yBAAyB,CAAC,CAAC;IAC3F,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
export interface SubmissionRequest {
|
|
2
|
+
runId: string;
|
|
3
|
+
nonce: string;
|
|
4
|
+
targetProvider: string;
|
|
5
|
+
targetModel: string;
|
|
6
|
+
complexity?: "C1" | "C2" | "C3" | "C4" | "mixed";
|
|
7
|
+
overallScore: number;
|
|
8
|
+
ci95?: [number, number];
|
|
9
|
+
agreementLevel?: "high" | "moderate" | "low";
|
|
10
|
+
dimensionScores?: Record<string, number>;
|
|
11
|
+
submittedAt: string;
|
|
12
|
+
evidenceChain: EvidenceChain;
|
|
13
|
+
}
|
|
14
|
+
export interface NonceResponse {
|
|
15
|
+
nonce: string;
|
|
16
|
+
expiresAt: string;
|
|
17
|
+
}
|
|
18
|
+
export interface SubmissionResponse {
|
|
19
|
+
status: "pending" | "accepted" | "rejected";
|
|
20
|
+
message: string;
|
|
21
|
+
}
|
|
22
|
+
export interface LeaderboardEntry {
|
|
23
|
+
rank: number;
|
|
24
|
+
model: string;
|
|
25
|
+
score: number;
|
|
26
|
+
ci95?: [number, number];
|
|
27
|
+
verificationStatus?: "pending" | "verified" | "disputed";
|
|
28
|
+
}
|
|
29
|
+
export interface LeaderboardQuery {
|
|
30
|
+
limit?: number | string;
|
|
31
|
+
offset?: number | string;
|
|
32
|
+
sort?: "asc" | "desc" | string;
|
|
33
|
+
complexity?: "C1" | "C2" | "C3" | "C4" | "mixed" | string;
|
|
34
|
+
dimension?: "functionalCompleteness" | "codeQuality" | "logicAccuracy" | "security" | "engineeringPractice" | string;
|
|
35
|
+
}
|
|
36
|
+
export interface EvidenceChain {
|
|
37
|
+
timeline: Array<{
|
|
38
|
+
phase: "generate" | "execute" | "evaluate" | "score";
|
|
39
|
+
startedAt: string;
|
|
40
|
+
completedAt: string;
|
|
41
|
+
model: string;
|
|
42
|
+
}>;
|
|
43
|
+
samples: Array<{
|
|
44
|
+
roundIndex: number;
|
|
45
|
+
requirement: string;
|
|
46
|
+
codeSubmission: string;
|
|
47
|
+
}>;
|
|
48
|
+
environment: {
|
|
49
|
+
os: string;
|
|
50
|
+
nodeVersion: string;
|
|
51
|
+
timezone: string;
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
//# sourceMappingURL=submitter-types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"submitter-types.d.ts","sourceRoot":"","sources":["../src/submitter-types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,cAAc,EAAE,MAAM,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,OAAO,CAAC;IACjD,YAAY,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACxB,cAAc,CAAC,EAAE,MAAM,GAAG,UAAU,GAAG,KAAK,CAAC;IAC7C,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACzC,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,aAAa,CAAC;CAC9B;AAED,MAAM,WAAW,aAAa;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,kBAAkB;IACjC,MAAM,EAAE,SAAS,GAAG,UAAU,GAAG,UAAU,CAAC;IAC5C,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACxB,kBAAkB,CAAC,EAAE,SAAS,GAAG,UAAU,GAAG,UAAU,CAAC;CAC1D;AAED,MAAM,WAAW,gBAAgB;IAC/B,KAAK,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;IACxB,MAAM,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;IACzB,IAAI,CAAC,EAAE,KAAK,GAAG,MAAM,GAAG,MAAM,CAAC;IAC/B,UAAU,CAAC,EAAE,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,OAAO,GAAG,MAAM,CAAC;IAC1D,SAAS,CAAC,EACN,wBAAwB,GACxB,aAAa,GACb,eAAe,GACf,UAAU,GACV,qBAAqB,GACrB,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,aAAa;IAC5B,QAAQ,EAAE,KAAK,CAAC;QACd,KAAK,EAAE,UAAU,GAAG,SAAS,GAAG,UAAU,GAAG,OAAO,CAAC;QACrD,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;QACpB,KAAK,EAAE,MAAM,CAAC;KACf,CAAC,CAAC;IACH,OAAO,EAAE,KAAK,CAAC;QACb,UAAU,EAAE,MAAM,CAAC;QACnB,WAAW,EAAE,MAAM,CAAC;QACpB,cAAc,EAAE,MAAM,CAAC;KACxB,CAAC,CAAC;IACH,WAAW,EAAE;QACX,EAAE,EAAE,MAAM,CAAC;QACX,WAAW,EAAE,MAAM,CAAC;QACpB,QAAQ,EAAE,MAAM,CAAC;KAClB,CAAC;CACH"}
|