@req2rank/core 0.1.0-r7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/adaptive-calibration.d.ts +13 -0
- package/dist/adaptive-calibration.d.ts.map +1 -0
- package/dist/adaptive-calibration.js +39 -0
- package/dist/adaptive-calibration.js.map +1 -0
- package/dist/adaptive-calibration.test.d.ts +2 -0
- package/dist/adaptive-calibration.test.d.ts.map +1 -0
- package/dist/adaptive-calibration.test.js +20 -0
- package/dist/adaptive-calibration.test.js.map +1 -0
- package/dist/checkpoint-key.d.ts +3 -0
- package/dist/checkpoint-key.d.ts.map +1 -0
- package/dist/checkpoint-key.js +29 -0
- package/dist/checkpoint-key.js.map +1 -0
- package/dist/checkpoint-key.test.d.ts +2 -0
- package/dist/checkpoint-key.test.d.ts.map +1 -0
- package/dist/checkpoint-key.test.js +32 -0
- package/dist/checkpoint-key.test.js.map +1 -0
- package/dist/config.d.ts +205 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +85 -0
- package/dist/config.js.map +1 -0
- package/dist/domain-taxonomy.d.ts +13 -0
- package/dist/domain-taxonomy.d.ts.map +1 -0
- package/dist/domain-taxonomy.js +12 -0
- package/dist/domain-taxonomy.js.map +1 -0
- package/dist/evaluation-panel.d.ts +27 -0
- package/dist/evaluation-panel.d.ts.map +1 -0
- package/dist/evaluation-panel.js +158 -0
- package/dist/evaluation-panel.js.map +1 -0
- package/dist/evaluation-panel.test.d.ts +2 -0
- package/dist/evaluation-panel.test.d.ts.map +1 -0
- package/dist/evaluation-panel.test.js +185 -0
- package/dist/evaluation-panel.test.js.map +1 -0
- package/dist/evidence-chain.d.ts +11 -0
- package/dist/evidence-chain.d.ts.map +1 -0
- package/dist/evidence-chain.js +33 -0
- package/dist/evidence-chain.js.map +1 -0
- package/dist/evidence-chain.test.d.ts +2 -0
- package/dist/evidence-chain.test.d.ts.map +1 -0
- package/dist/evidence-chain.test.js +16 -0
- package/dist/evidence-chain.test.js.map +1 -0
- package/dist/execution-engine.d.ts +29 -0
- package/dist/execution-engine.d.ts.map +1 -0
- package/dist/execution-engine.js +102 -0
- package/dist/execution-engine.js.map +1 -0
- package/dist/execution-engine.test.d.ts +2 -0
- package/dist/execution-engine.test.d.ts.map +1 -0
- package/dist/execution-engine.test.js +86 -0
- package/dist/execution-engine.test.js.map +1 -0
- package/dist/hub-client.d.ts +21 -0
- package/dist/hub-client.d.ts.map +1 -0
- package/dist/hub-client.js +99 -0
- package/dist/hub-client.js.map +1 -0
- package/dist/hub-client.test.d.ts +2 -0
- package/dist/hub-client.test.d.ts.map +1 -0
- package/dist/hub-client.test.js +129 -0
- package/dist/hub-client.test.js.map +1 -0
- package/dist/index.d.ts +19 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +19 -0
- package/dist/index.js.map +1 -0
- package/dist/leaderboard-query.d.ts +29 -0
- package/dist/leaderboard-query.d.ts.map +1 -0
- package/dist/leaderboard-query.js +59 -0
- package/dist/leaderboard-query.js.map +1 -0
- package/dist/leaderboard-query.test.d.ts +2 -0
- package/dist/leaderboard-query.test.d.ts.map +1 -0
- package/dist/leaderboard-query.test.js +34 -0
- package/dist/leaderboard-query.test.js.map +1 -0
- package/dist/local-store.d.ts +18 -0
- package/dist/local-store.d.ts.map +1 -0
- package/dist/local-store.js +181 -0
- package/dist/local-store.js.map +1 -0
- package/dist/local-store.test.d.ts +2 -0
- package/dist/local-store.test.d.ts.map +1 -0
- package/dist/local-store.test.js +153 -0
- package/dist/local-store.test.js.map +1 -0
- package/dist/pipeline-stage-handoff.test.d.ts +2 -0
- package/dist/pipeline-stage-handoff.test.d.ts.map +1 -0
- package/dist/pipeline-stage-handoff.test.js +290 -0
- package/dist/pipeline-stage-handoff.test.js.map +1 -0
- package/dist/pipeline.d.ts +67 -0
- package/dist/pipeline.d.ts.map +1 -0
- package/dist/pipeline.js +493 -0
- package/dist/pipeline.js.map +1 -0
- package/dist/providers/anthropic.d.ts +8 -0
- package/dist/providers/anthropic.d.ts.map +1 -0
- package/dist/providers/anthropic.js +45 -0
- package/dist/providers/anthropic.js.map +1 -0
- package/dist/providers/base.d.ts +36 -0
- package/dist/providers/base.d.ts.map +1 -0
- package/dist/providers/base.js +47 -0
- package/dist/providers/base.js.map +1 -0
- package/dist/providers/custom.d.ts +6 -0
- package/dist/providers/custom.d.ts.map +1 -0
- package/dist/providers/custom.js +6 -0
- package/dist/providers/custom.js.map +1 -0
- package/dist/providers/google.d.ts +8 -0
- package/dist/providers/google.d.ts.map +1 -0
- package/dist/providers/google.js +48 -0
- package/dist/providers/google.js.map +1 -0
- package/dist/providers/index.d.ts +31 -0
- package/dist/providers/index.d.ts.map +1 -0
- package/dist/providers/index.js +63 -0
- package/dist/providers/index.js.map +1 -0
- package/dist/providers/openai.d.ts +18 -0
- package/dist/providers/openai.d.ts.map +1 -0
- package/dist/providers/openai.js +111 -0
- package/dist/providers/openai.js.map +1 -0
- package/dist/providers/providers.test.d.ts +2 -0
- package/dist/providers/providers.test.d.ts.map +1 -0
- package/dist/providers/providers.test.js +71 -0
- package/dist/providers/providers.test.js.map +1 -0
- package/dist/requirement-generator.d.ts +29 -0
- package/dist/requirement-generator.d.ts.map +1 -0
- package/dist/requirement-generator.js +358 -0
- package/dist/requirement-generator.js.map +1 -0
- package/dist/requirement-generator.test.d.ts +2 -0
- package/dist/requirement-generator.test.d.ts.map +1 -0
- package/dist/requirement-generator.test.js +182 -0
- package/dist/requirement-generator.test.js.map +1 -0
- package/dist/sandbox.d.ts +32 -0
- package/dist/sandbox.d.ts.map +1 -0
- package/dist/sandbox.js +124 -0
- package/dist/sandbox.js.map +1 -0
- package/dist/sandbox.test.d.ts +2 -0
- package/dist/sandbox.test.d.ts.map +1 -0
- package/dist/sandbox.test.js +20 -0
- package/dist/sandbox.test.js.map +1 -0
- package/dist/scoring-engine.d.ts +15 -0
- package/dist/scoring-engine.d.ts.map +1 -0
- package/dist/scoring-engine.js +109 -0
- package/dist/scoring-engine.js.map +1 -0
- package/dist/scoring-engine.test.d.ts +2 -0
- package/dist/scoring-engine.test.d.ts.map +1 -0
- package/dist/scoring-engine.test.js +137 -0
- package/dist/scoring-engine.test.js.map +1 -0
- package/dist/submit-payload-builder.d.ts +9 -0
- package/dist/submit-payload-builder.d.ts.map +1 -0
- package/dist/submit-payload-builder.js +23 -0
- package/dist/submit-payload-builder.js.map +1 -0
- package/dist/submit-payload-builder.test.d.ts +2 -0
- package/dist/submit-payload-builder.test.d.ts.map +1 -0
- package/dist/submit-payload-builder.test.js +75 -0
- package/dist/submit-payload-builder.test.js.map +1 -0
- package/dist/submitter-types.d.ts +54 -0
- package/dist/submitter-types.d.ts.map +1 -0
- package/dist/submitter-types.js +2 -0
- package/dist/submitter-types.js.map +1 -0
- package/dist/types.d.ts +40 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +36 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Req-2-Rank Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { Complexity } from "./types.js";
|
|
2
|
+
export interface CalibrationInput {
|
|
3
|
+
score: number;
|
|
4
|
+
complexity: Complexity;
|
|
5
|
+
}
|
|
6
|
+
export interface CalibrationResult {
|
|
7
|
+
recommendedComplexity: Complexity;
|
|
8
|
+
reason: string;
|
|
9
|
+
averageScore: number;
|
|
10
|
+
sampleSize: number;
|
|
11
|
+
}
|
|
12
|
+
export declare function calibrateComplexity(history: CalibrationInput[]): CalibrationResult;
|
|
13
|
+
//# sourceMappingURL=adaptive-calibration.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"adaptive-calibration.d.ts","sourceRoot":"","sources":["../src/adaptive-calibration.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,MAAM,WAAW,gBAAgB;IAC/B,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,UAAU,CAAC;CACxB;AAED,MAAM,WAAW,iBAAiB;IAChC,qBAAqB,EAAE,UAAU,CAAC;IAClC,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;CACpB;AAID,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,gBAAgB,EAAE,GAAG,iBAAiB,CAwClF"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
const ORDER = ["C1", "C2", "C3", "C4"];
|
|
2
|
+
export function calibrateComplexity(history) {
|
|
3
|
+
if (history.length === 0) {
|
|
4
|
+
return {
|
|
5
|
+
recommendedComplexity: "C2",
|
|
6
|
+
reason: "No historical data; start from C2 baseline.",
|
|
7
|
+
averageScore: 0,
|
|
8
|
+
sampleSize: 0
|
|
9
|
+
};
|
|
10
|
+
}
|
|
11
|
+
const avg = history.reduce((sum, run) => sum + run.score, 0) / history.length;
|
|
12
|
+
const latest = history[history.length - 1]?.complexity ?? "C2";
|
|
13
|
+
const latestIndex = ORDER.indexOf(latest);
|
|
14
|
+
if (avg >= 88 && latestIndex < ORDER.length - 1) {
|
|
15
|
+
const next = ORDER[latestIndex + 1];
|
|
16
|
+
return {
|
|
17
|
+
recommendedComplexity: next,
|
|
18
|
+
reason: `Average score ${avg.toFixed(1)} is high; increase to ${next}.`,
|
|
19
|
+
averageScore: avg,
|
|
20
|
+
sampleSize: history.length
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
if (avg <= 60 && latestIndex > 0) {
|
|
24
|
+
const previous = ORDER[latestIndex - 1];
|
|
25
|
+
return {
|
|
26
|
+
recommendedComplexity: previous,
|
|
27
|
+
reason: `Average score ${avg.toFixed(1)} is low; reduce to ${previous}.`,
|
|
28
|
+
averageScore: avg,
|
|
29
|
+
sampleSize: history.length
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
return {
|
|
33
|
+
recommendedComplexity: latest,
|
|
34
|
+
reason: `Average score ${avg.toFixed(1)} supports keeping ${latest}.`,
|
|
35
|
+
averageScore: avg,
|
|
36
|
+
sampleSize: history.length
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
//# sourceMappingURL=adaptive-calibration.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"adaptive-calibration.js","sourceRoot":"","sources":["../src/adaptive-calibration.ts"],"names":[],"mappings":"AAcA,MAAM,KAAK,GAAiB,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC;AAErD,MAAM,UAAU,mBAAmB,CAAC,OAA2B;IAC7D,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO;YACL,qBAAqB,EAAE,IAAI;YAC3B,MAAM,EAAE,6CAA6C;YACrD,YAAY,EAAE,CAAC;YACf,UAAU,EAAE,CAAC;SACd,CAAC;IACJ,CAAC;IAED,MAAM,GAAG,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;IAC9E,MAAM,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,UAAU,IAAI,IAAI,CAAC;IAC/D,MAAM,WAAW,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAE1C,IAAI,GAAG,IAAI,EAAE,IAAI,WAAW,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAChD,MAAM,IAAI,GAAG,KAAK,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC;QACpC,OAAO;YACL,qBAAqB,EAAE,IAAI;YAC3B,MAAM,EAAE,iBAAiB,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,yBAAyB,IAAI,GAAG;YACvE,YAAY,EAAE,GAAG;YACjB,UAAU,EAAE,OAAO,CAAC,MAAM;SAC3B,CAAC;IACJ,CAAC;IAED,IAAI,GAAG,IAAI,EAAE,IAAI,WAAW,GAAG,CAAC,EAAE,CAAC;QACjC,MAAM,QAAQ,GAAG,KAAK,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC;QACxC,OAAO;YACL,qBAAqB,EAAE,QAAQ;YAC/B,MAAM,EAAE,iBAAiB,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,sBAAsB,QAAQ,GAAG;YACxE,YAAY,EAAE,GAAG;YACjB,UAAU,EAAE,OAAO,CAAC,MAAM;SAC3B,CAAC;IACJ,CAAC;IAED,OAAO;QACL,qBAAqB,EAAE,MAAM;QAC7B,MAAM,EAAE,iBAAiB,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,qBAAqB,MAAM,GAAG;QACrE,YAAY,EAAE,GAAG;QACjB,UAAU,EAAE,OAAO,CAAC,MAAM;KAC3B,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"adaptive-calibration.test.d.ts","sourceRoot":"","sources":["../src/adaptive-calibration.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { calibrateComplexity } from "./adaptive-calibration.js";
|
|
3
|
+
describe("calibrateComplexity", () => {
|
|
4
|
+
it("raises complexity for strong history", () => {
|
|
5
|
+
const result = calibrateComplexity([
|
|
6
|
+
{ score: 90, complexity: "C2" },
|
|
7
|
+
{ score: 92, complexity: "C2" }
|
|
8
|
+
]);
|
|
9
|
+
expect(result.recommendedComplexity).toBe("C3");
|
|
10
|
+
expect(result.sampleSize).toBe(2);
|
|
11
|
+
});
|
|
12
|
+
it("lowers complexity for weak history", () => {
|
|
13
|
+
const result = calibrateComplexity([
|
|
14
|
+
{ score: 40, complexity: "C3" },
|
|
15
|
+
{ score: 55, complexity: "C3" }
|
|
16
|
+
]);
|
|
17
|
+
expect(result.recommendedComplexity).toBe("C2");
|
|
18
|
+
});
|
|
19
|
+
});
|
|
20
|
+
//# sourceMappingURL=adaptive-calibration.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"adaptive-calibration.test.js","sourceRoot":"","sources":["../src/adaptive-calibration.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,mBAAmB,EAAE,MAAM,2BAA2B,CAAC;AAEhE,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;IACnC,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,MAAM,MAAM,GAAG,mBAAmB,CAAC;YACjC,EAAE,KAAK,EAAE,EAAE,EAAE,UAAU,EAAE,IAAI,EAAE;YAC/B,EAAE,KAAK,EAAE,EAAE,EAAE,UAAU,EAAE,IAAI,EAAE;SAChC,CAAC,CAAC;QACH,MAAM,CAAC,MAAM,CAAC,qBAAqB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAChD,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,GAAG,EAAE;QAC5C,MAAM,MAAM,GAAG,mBAAmB,CAAC;YACjC,EAAE,KAAK,EAAE,EAAE,EAAE,UAAU,EAAE,IAAI,EAAE;YAC/B,EAAE,KAAK,EAAE,EAAE,EAAE,UAAU,EAAE,IAAI,EAAE;SAChC,CAAC,CAAC;QACH,MAAM,CAAC,MAAM,CAAC,qBAAqB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"checkpoint-key.d.ts","sourceRoot":"","sources":["../src/checkpoint-key.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAE7C,wBAAgB,2BAA2B,CAAC,KAAK,EAAE,KAAK,GAAG,SAAS,EAAE,MAAM,EAAE,cAAc,GAAG,MAAM,CA2BpG"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
export function createPipelineCheckpointKey(scope, config) {
|
|
3
|
+
const signaturePayload = {
|
|
4
|
+
target: {
|
|
5
|
+
provider: config.target.provider,
|
|
6
|
+
model: config.target.model,
|
|
7
|
+
baseUrl: config.target.baseUrl ?? null
|
|
8
|
+
},
|
|
9
|
+
systemModel: {
|
|
10
|
+
provider: config.systemModel.provider,
|
|
11
|
+
model: config.systemModel.model,
|
|
12
|
+
baseUrl: config.systemModel.baseUrl ?? null
|
|
13
|
+
},
|
|
14
|
+
judges: config.judges.map((judge) => ({
|
|
15
|
+
provider: judge.provider,
|
|
16
|
+
model: judge.model,
|
|
17
|
+
baseUrl: judge.baseUrl ?? null,
|
|
18
|
+
weight: judge.weight
|
|
19
|
+
})),
|
|
20
|
+
test: {
|
|
21
|
+
complexity: config.test.complexity,
|
|
22
|
+
rounds: config.test.rounds,
|
|
23
|
+
concurrency: config.test.concurrency
|
|
24
|
+
}
|
|
25
|
+
};
|
|
26
|
+
const hash = createHash("sha256").update(JSON.stringify(signaturePayload)).digest("hex").slice(0, 16);
|
|
27
|
+
return `${scope}:${config.target.provider}/${config.target.model}:${hash}`;
|
|
28
|
+
}
|
|
29
|
+
//# sourceMappingURL=checkpoint-key.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"checkpoint-key.js","sourceRoot":"","sources":["../src/checkpoint-key.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAGzC,MAAM,UAAU,2BAA2B,CAAC,KAAwB,EAAE,MAAsB;IAC1F,MAAM,gBAAgB,GAAG;QACvB,MAAM,EAAE;YACN,QAAQ,EAAE,MAAM,CAAC,MAAM,CAAC,QAAQ;YAChC,KAAK,EAAE,MAAM,CAAC,MAAM,CAAC,KAAK;YAC1B,OAAO,EAAE,MAAM,CAAC,MAAM,CAAC,OAAO,IAAI,IAAI;SACvC;QACD,WAAW,EAAE;YACX,QAAQ,EAAE,MAAM,CAAC,WAAW,CAAC,QAAQ;YACrC,KAAK,EAAE,MAAM,CAAC,WAAW,CAAC,KAAK;YAC/B,OAAO,EAAE,MAAM,CAAC,WAAW,CAAC,OAAO,IAAI,IAAI;SAC5C;QACD,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;YACpC,QAAQ,EAAE,KAAK,CAAC,QAAQ;YACxB,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,OAAO,EAAE,KAAK,CAAC,OAAO,IAAI,IAAI;YAC9B,MAAM,EAAE,KAAK,CAAC,MAAM;SACrB,CAAC,CAAC;QACH,IAAI,EAAE;YACJ,UAAU,EAAE,MAAM,CAAC,IAAI,CAAC,UAAU;YAClC,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM;YAC1B,WAAW,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW;SACrC;KACF,CAAC;IAEF,MAAM,IAAI,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,gBAAgB,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IACtG,OAAO,GAAG,KAAK,IAAI,MAAM,CAAC,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,MAAM,CAAC,KAAK,IAAI,IAAI,EAAE,CAAC;AAC7E,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"checkpoint-key.test.d.ts","sourceRoot":"","sources":["../src/checkpoint-key.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { defaultConfig } from "./config.js";
|
|
3
|
+
import { createPipelineCheckpointKey } from "./checkpoint-key.js";
|
|
4
|
+
function cloneConfig() {
|
|
5
|
+
return JSON.parse(JSON.stringify(defaultConfig));
|
|
6
|
+
}
|
|
7
|
+
describe("createPipelineCheckpointKey", () => {
|
|
8
|
+
it("returns deterministic key for identical config", () => {
|
|
9
|
+
const config = cloneConfig();
|
|
10
|
+
const first = createPipelineCheckpointKey("run", config);
|
|
11
|
+
const second = createPipelineCheckpointKey("run", config);
|
|
12
|
+
expect(first).toBe(second);
|
|
13
|
+
expect(first).toContain("run:openai/gpt-4o-mini:");
|
|
14
|
+
});
|
|
15
|
+
it("changes key when judge model changes", () => {
|
|
16
|
+
const left = cloneConfig();
|
|
17
|
+
const right = cloneConfig();
|
|
18
|
+
right.judges[0].model = "gpt-4.1";
|
|
19
|
+
const keyLeft = createPipelineCheckpointKey("run", left);
|
|
20
|
+
const keyRight = createPipelineCheckpointKey("run", right);
|
|
21
|
+
expect(keyLeft).not.toBe(keyRight);
|
|
22
|
+
});
|
|
23
|
+
it("changes key when target provider protocol changes", () => {
|
|
24
|
+
const left = cloneConfig();
|
|
25
|
+
const right = cloneConfig();
|
|
26
|
+
right.target.provider = "openai-response";
|
|
27
|
+
const keyLeft = createPipelineCheckpointKey("run", left);
|
|
28
|
+
const keyRight = createPipelineCheckpointKey("run", right);
|
|
29
|
+
expect(keyLeft).not.toBe(keyRight);
|
|
30
|
+
});
|
|
31
|
+
});
|
|
32
|
+
//# sourceMappingURL=checkpoint-key.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"checkpoint-key.test.js","sourceRoot":"","sources":["../src/checkpoint-key.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAkB,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5D,OAAO,EAAE,2BAA2B,EAAE,MAAM,qBAAqB,CAAC;AAElE,SAAS,WAAW;IAClB,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,CAAmB,CAAC;AACrE,CAAC;AAED,QAAQ,CAAC,6BAA6B,EAAE,GAAG,EAAE;IAC3C,EAAE,CAAC,gDAAgD,EAAE,GAAG,EAAE;QACxD,MAAM,MAAM,GAAG,WAAW,EAAE,CAAC;QAC7B,MAAM,KAAK,GAAG,2BAA2B,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;QACzD,MAAM,MAAM,GAAG,2BAA2B,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;QAE1D,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC3B,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,yBAAyB,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,MAAM,IAAI,GAAG,WAAW,EAAE,CAAC;QAC3B,MAAM,KAAK,GAAG,WAAW,EAAE,CAAC;QAC5B,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,SAAS,CAAC;QAElC,MAAM,OAAO,GAAG,2BAA2B,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;QACzD,MAAM,QAAQ,GAAG,2BAA2B,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;QAC3D,MAAM,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mDAAmD,EAAE,GAAG,EAAE;QAC3D,MAAM,IAAI,GAAG,WAAW,EAAE,CAAC;QAC3B,MAAM,KAAK,GAAG,WAAW,EAAE,CAAC;QAC5B,KAAK,CAAC,MAAM,CAAC,QAAQ,GAAG,iBAAiB,CAAC;QAE1C,MAAM,OAAO,GAAG,2BAA2B,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;QACzD,MAAM,QAAQ,GAAG,2BAA2B,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;QAC3D,MAAM,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
package/dist/config.d.ts
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { Complexity } from "./types.js";
|
|
3
|
+
import { EvidenceChain } from "./submitter-types.js";
|
|
4
|
+
export declare const req2rankConfigSchema: z.ZodObject<{
|
|
5
|
+
target: z.ZodEffects<z.ZodObject<{
|
|
6
|
+
provider: z.ZodEnum<["openai", "openai-response", "gemini", "anthropic", "azure-openai", "newapi", "google", "custom"]>;
|
|
7
|
+
model: z.ZodString;
|
|
8
|
+
apiKey: z.ZodOptional<z.ZodString>;
|
|
9
|
+
baseUrl: z.ZodOptional<z.ZodNullable<z.ZodString>>;
|
|
10
|
+
}, z.UnknownKeysParam, z.ZodTypeAny, {
|
|
11
|
+
provider: "custom" | "openai" | "openai-response" | "gemini" | "anthropic" | "azure-openai" | "newapi" | "google";
|
|
12
|
+
model: string;
|
|
13
|
+
apiKey?: string | undefined;
|
|
14
|
+
baseUrl?: string | null | undefined;
|
|
15
|
+
}, {
|
|
16
|
+
provider: "custom" | "openai" | "openai-response" | "gemini" | "anthropic" | "azure-openai" | "newapi" | "google";
|
|
17
|
+
model: string;
|
|
18
|
+
apiKey?: string | undefined;
|
|
19
|
+
baseUrl?: string | null | undefined;
|
|
20
|
+
}>, {
|
|
21
|
+
provider: "custom" | "openai" | "openai-response" | "gemini" | "anthropic" | "azure-openai" | "newapi" | "google";
|
|
22
|
+
model: string;
|
|
23
|
+
apiKey?: string | undefined;
|
|
24
|
+
baseUrl?: string | null | undefined;
|
|
25
|
+
}, {
|
|
26
|
+
provider: "custom" | "openai" | "openai-response" | "gemini" | "anthropic" | "azure-openai" | "newapi" | "google";
|
|
27
|
+
model: string;
|
|
28
|
+
apiKey?: string | undefined;
|
|
29
|
+
baseUrl?: string | null | undefined;
|
|
30
|
+
}>;
|
|
31
|
+
systemModel: z.ZodEffects<z.ZodObject<{
|
|
32
|
+
provider: z.ZodEnum<["openai", "openai-response", "gemini", "anthropic", "azure-openai", "newapi", "google", "custom"]>;
|
|
33
|
+
model: z.ZodString;
|
|
34
|
+
apiKey: z.ZodOptional<z.ZodString>;
|
|
35
|
+
baseUrl: z.ZodOptional<z.ZodNullable<z.ZodString>>;
|
|
36
|
+
}, z.UnknownKeysParam, z.ZodTypeAny, {
|
|
37
|
+
provider: "custom" | "openai" | "openai-response" | "gemini" | "anthropic" | "azure-openai" | "newapi" | "google";
|
|
38
|
+
model: string;
|
|
39
|
+
apiKey?: string | undefined;
|
|
40
|
+
baseUrl?: string | null | undefined;
|
|
41
|
+
}, {
|
|
42
|
+
provider: "custom" | "openai" | "openai-response" | "gemini" | "anthropic" | "azure-openai" | "newapi" | "google";
|
|
43
|
+
model: string;
|
|
44
|
+
apiKey?: string | undefined;
|
|
45
|
+
baseUrl?: string | null | undefined;
|
|
46
|
+
}>, {
|
|
47
|
+
provider: "custom" | "openai" | "openai-response" | "gemini" | "anthropic" | "azure-openai" | "newapi" | "google";
|
|
48
|
+
model: string;
|
|
49
|
+
apiKey?: string | undefined;
|
|
50
|
+
baseUrl?: string | null | undefined;
|
|
51
|
+
}, {
|
|
52
|
+
provider: "custom" | "openai" | "openai-response" | "gemini" | "anthropic" | "azure-openai" | "newapi" | "google";
|
|
53
|
+
model: string;
|
|
54
|
+
apiKey?: string | undefined;
|
|
55
|
+
baseUrl?: string | null | undefined;
|
|
56
|
+
}>;
|
|
57
|
+
judges: z.ZodArray<z.ZodEffects<z.ZodObject<{
|
|
58
|
+
provider: z.ZodEnum<["openai", "openai-response", "gemini", "anthropic", "azure-openai", "newapi", "google", "custom"]>;
|
|
59
|
+
model: z.ZodString;
|
|
60
|
+
apiKey: z.ZodOptional<z.ZodString>;
|
|
61
|
+
baseUrl: z.ZodOptional<z.ZodNullable<z.ZodString>>;
|
|
62
|
+
} & {
|
|
63
|
+
weight: z.ZodDefault<z.ZodNumber>;
|
|
64
|
+
}, z.UnknownKeysParam, z.ZodTypeAny, {
|
|
65
|
+
provider: "custom" | "openai" | "openai-response" | "gemini" | "anthropic" | "azure-openai" | "newapi" | "google";
|
|
66
|
+
model: string;
|
|
67
|
+
weight: number;
|
|
68
|
+
apiKey?: string | undefined;
|
|
69
|
+
baseUrl?: string | null | undefined;
|
|
70
|
+
}, {
|
|
71
|
+
provider: "custom" | "openai" | "openai-response" | "gemini" | "anthropic" | "azure-openai" | "newapi" | "google";
|
|
72
|
+
model: string;
|
|
73
|
+
apiKey?: string | undefined;
|
|
74
|
+
baseUrl?: string | null | undefined;
|
|
75
|
+
weight?: number | undefined;
|
|
76
|
+
}>, {
|
|
77
|
+
provider: "custom" | "openai" | "openai-response" | "gemini" | "anthropic" | "azure-openai" | "newapi" | "google";
|
|
78
|
+
model: string;
|
|
79
|
+
weight: number;
|
|
80
|
+
apiKey?: string | undefined;
|
|
81
|
+
baseUrl?: string | null | undefined;
|
|
82
|
+
}, {
|
|
83
|
+
provider: "custom" | "openai" | "openai-response" | "gemini" | "anthropic" | "azure-openai" | "newapi" | "google";
|
|
84
|
+
model: string;
|
|
85
|
+
apiKey?: string | undefined;
|
|
86
|
+
baseUrl?: string | null | undefined;
|
|
87
|
+
weight?: number | undefined;
|
|
88
|
+
}>, "many">;
|
|
89
|
+
test: z.ZodObject<{
|
|
90
|
+
complexity: z.ZodUnion<[z.ZodLiteral<"C1">, z.ZodLiteral<"C2">, z.ZodLiteral<"C3">, z.ZodLiteral<"C4">, z.ZodLiteral<"mixed">]>;
|
|
91
|
+
rounds: z.ZodNumber;
|
|
92
|
+
concurrency: z.ZodNumber;
|
|
93
|
+
}, "strip", z.ZodTypeAny, {
|
|
94
|
+
complexity: "C1" | "C2" | "C3" | "C4" | "mixed";
|
|
95
|
+
rounds: number;
|
|
96
|
+
concurrency: number;
|
|
97
|
+
}, {
|
|
98
|
+
complexity: "C1" | "C2" | "C3" | "C4" | "mixed";
|
|
99
|
+
rounds: number;
|
|
100
|
+
concurrency: number;
|
|
101
|
+
}>;
|
|
102
|
+
hub: z.ZodOptional<z.ZodObject<{
|
|
103
|
+
enabled: z.ZodDefault<z.ZodBoolean>;
|
|
104
|
+
serverUrl: z.ZodOptional<z.ZodString>;
|
|
105
|
+
token: z.ZodOptional<z.ZodString>;
|
|
106
|
+
}, "strip", z.ZodTypeAny, {
|
|
107
|
+
enabled: boolean;
|
|
108
|
+
serverUrl?: string | undefined;
|
|
109
|
+
token?: string | undefined;
|
|
110
|
+
}, {
|
|
111
|
+
enabled?: boolean | undefined;
|
|
112
|
+
serverUrl?: string | undefined;
|
|
113
|
+
token?: string | undefined;
|
|
114
|
+
}>>;
|
|
115
|
+
}, "strip", z.ZodTypeAny, {
|
|
116
|
+
target: {
|
|
117
|
+
provider: "custom" | "openai" | "openai-response" | "gemini" | "anthropic" | "azure-openai" | "newapi" | "google";
|
|
118
|
+
model: string;
|
|
119
|
+
apiKey?: string | undefined;
|
|
120
|
+
baseUrl?: string | null | undefined;
|
|
121
|
+
};
|
|
122
|
+
systemModel: {
|
|
123
|
+
provider: "custom" | "openai" | "openai-response" | "gemini" | "anthropic" | "azure-openai" | "newapi" | "google";
|
|
124
|
+
model: string;
|
|
125
|
+
apiKey?: string | undefined;
|
|
126
|
+
baseUrl?: string | null | undefined;
|
|
127
|
+
};
|
|
128
|
+
judges: {
|
|
129
|
+
provider: "custom" | "openai" | "openai-response" | "gemini" | "anthropic" | "azure-openai" | "newapi" | "google";
|
|
130
|
+
model: string;
|
|
131
|
+
weight: number;
|
|
132
|
+
apiKey?: string | undefined;
|
|
133
|
+
baseUrl?: string | null | undefined;
|
|
134
|
+
}[];
|
|
135
|
+
test: {
|
|
136
|
+
complexity: "C1" | "C2" | "C3" | "C4" | "mixed";
|
|
137
|
+
rounds: number;
|
|
138
|
+
concurrency: number;
|
|
139
|
+
};
|
|
140
|
+
hub?: {
|
|
141
|
+
enabled: boolean;
|
|
142
|
+
serverUrl?: string | undefined;
|
|
143
|
+
token?: string | undefined;
|
|
144
|
+
} | undefined;
|
|
145
|
+
}, {
|
|
146
|
+
target: {
|
|
147
|
+
provider: "custom" | "openai" | "openai-response" | "gemini" | "anthropic" | "azure-openai" | "newapi" | "google";
|
|
148
|
+
model: string;
|
|
149
|
+
apiKey?: string | undefined;
|
|
150
|
+
baseUrl?: string | null | undefined;
|
|
151
|
+
};
|
|
152
|
+
systemModel: {
|
|
153
|
+
provider: "custom" | "openai" | "openai-response" | "gemini" | "anthropic" | "azure-openai" | "newapi" | "google";
|
|
154
|
+
model: string;
|
|
155
|
+
apiKey?: string | undefined;
|
|
156
|
+
baseUrl?: string | null | undefined;
|
|
157
|
+
};
|
|
158
|
+
judges: {
|
|
159
|
+
provider: "custom" | "openai" | "openai-response" | "gemini" | "anthropic" | "azure-openai" | "newapi" | "google";
|
|
160
|
+
model: string;
|
|
161
|
+
apiKey?: string | undefined;
|
|
162
|
+
baseUrl?: string | null | undefined;
|
|
163
|
+
weight?: number | undefined;
|
|
164
|
+
}[];
|
|
165
|
+
test: {
|
|
166
|
+
complexity: "C1" | "C2" | "C3" | "C4" | "mixed";
|
|
167
|
+
rounds: number;
|
|
168
|
+
concurrency: number;
|
|
169
|
+
};
|
|
170
|
+
hub?: {
|
|
171
|
+
enabled?: boolean | undefined;
|
|
172
|
+
serverUrl?: string | undefined;
|
|
173
|
+
token?: string | undefined;
|
|
174
|
+
} | undefined;
|
|
175
|
+
}>;
|
|
176
|
+
export type Req2RankConfig = z.infer<typeof req2rankConfigSchema>;
|
|
177
|
+
export interface RunRecord {
|
|
178
|
+
id: string;
|
|
179
|
+
createdAt: string;
|
|
180
|
+
targetProvider: string;
|
|
181
|
+
targetModel: string;
|
|
182
|
+
complexity: Complexity | "mixed";
|
|
183
|
+
rounds: number;
|
|
184
|
+
requirementTitle: string;
|
|
185
|
+
overallScore: number;
|
|
186
|
+
dimensionScores: Record<string, number>;
|
|
187
|
+
ci95: [number, number];
|
|
188
|
+
agreementLevel: "high" | "moderate" | "low";
|
|
189
|
+
ijaScore?: number;
|
|
190
|
+
evidenceChain?: EvidenceChain;
|
|
191
|
+
}
|
|
192
|
+
export interface LocalStoreShape {
|
|
193
|
+
runs: RunRecord[];
|
|
194
|
+
calibrations?: CalibrationSnapshot[];
|
|
195
|
+
}
|
|
196
|
+
export interface CalibrationSnapshot {
|
|
197
|
+
id: string;
|
|
198
|
+
createdAt: string;
|
|
199
|
+
recommendedComplexity: Complexity;
|
|
200
|
+
reason: string;
|
|
201
|
+
averageScore: number;
|
|
202
|
+
sampleSize: number;
|
|
203
|
+
}
|
|
204
|
+
export declare const defaultConfig: Req2RankConfig;
|
|
205
|
+
//# sourceMappingURL=config.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AACxC,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AA+CrD,eAAO,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAgB/B,CAAC;AAEH,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC,CAAC;AAElE,MAAM,WAAW,SAAS;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,cAAc,EAAE,MAAM,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,UAAU,GAAG,OAAO,CAAC;IACjC,MAAM,EAAE,MAAM,CAAC;IACf,gBAAgB,EAAE,MAAM,CAAC;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACxC,IAAI,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACvB,cAAc,EAAE,MAAM,GAAG,UAAU,GAAG,KAAK,CAAC;IAC5C,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,aAAa,CAAC,EAAE,aAAa,CAAC;CAC/B;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,SAAS,EAAE,CAAC;IAClB,YAAY,CAAC,EAAE,mBAAmB,EAAE,CAAC;CACtC;AAED,MAAM,WAAW,mBAAmB;IAClC,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,qBAAqB,EAAE,UAAU,CAAC;IAClC,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,eAAO,MAAM,aAAa,EAAE,cA2B3B,CAAC"}
|
package/dist/config.js
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
const complexitySchema = z.union([
|
|
3
|
+
z.literal("C1"),
|
|
4
|
+
z.literal("C2"),
|
|
5
|
+
z.literal("C3"),
|
|
6
|
+
z.literal("C4"),
|
|
7
|
+
z.literal("mixed")
|
|
8
|
+
]);
|
|
9
|
+
const providerTypeSchema = z.enum([
|
|
10
|
+
"openai",
|
|
11
|
+
"openai-response",
|
|
12
|
+
"gemini",
|
|
13
|
+
"anthropic",
|
|
14
|
+
"azure-openai",
|
|
15
|
+
"newapi",
|
|
16
|
+
"google",
|
|
17
|
+
"custom"
|
|
18
|
+
]);
|
|
19
|
+
const modelEndpointBaseSchema = z.object({
|
|
20
|
+
provider: providerTypeSchema,
|
|
21
|
+
model: z.string().min(1),
|
|
22
|
+
apiKey: z.string().optional(),
|
|
23
|
+
baseUrl: z.string().url().nullable().optional()
|
|
24
|
+
});
|
|
25
|
+
function withProviderBaseUrlRules(schema) {
|
|
26
|
+
return schema.superRefine((value, context) => {
|
|
27
|
+
if ((value.provider === "azure-openai" || value.provider === "newapi") && !value.baseUrl) {
|
|
28
|
+
context.addIssue({
|
|
29
|
+
code: z.ZodIssueCode.custom,
|
|
30
|
+
message: `baseUrl is required for ${value.provider}`,
|
|
31
|
+
path: ["baseUrl"]
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
const modelEndpointSchema = withProviderBaseUrlRules(modelEndpointBaseSchema);
|
|
37
|
+
const judgeEndpointSchema = withProviderBaseUrlRules(modelEndpointBaseSchema.extend({
|
|
38
|
+
weight: z.number().positive().default(1)
|
|
39
|
+
}));
|
|
40
|
+
export const req2rankConfigSchema = z.object({
|
|
41
|
+
target: modelEndpointSchema,
|
|
42
|
+
systemModel: modelEndpointSchema,
|
|
43
|
+
judges: z.array(judgeEndpointSchema),
|
|
44
|
+
test: z.object({
|
|
45
|
+
complexity: complexitySchema,
|
|
46
|
+
rounds: z.number().int().positive(),
|
|
47
|
+
concurrency: z.number().int().positive()
|
|
48
|
+
}),
|
|
49
|
+
hub: z
|
|
50
|
+
.object({
|
|
51
|
+
enabled: z.boolean().default(false),
|
|
52
|
+
serverUrl: z.string().url().optional(),
|
|
53
|
+
token: z.string().optional()
|
|
54
|
+
})
|
|
55
|
+
.optional()
|
|
56
|
+
});
|
|
57
|
+
export const defaultConfig = {
|
|
58
|
+
target: {
|
|
59
|
+
provider: "openai",
|
|
60
|
+
model: "gpt-4o-mini",
|
|
61
|
+
apiKey: ""
|
|
62
|
+
},
|
|
63
|
+
systemModel: {
|
|
64
|
+
provider: "anthropic",
|
|
65
|
+
model: "claude-sonnet-4-20250514",
|
|
66
|
+
apiKey: ""
|
|
67
|
+
},
|
|
68
|
+
judges: [
|
|
69
|
+
{
|
|
70
|
+
provider: "openai",
|
|
71
|
+
model: "gpt-4o",
|
|
72
|
+
apiKey: "",
|
|
73
|
+
weight: 1
|
|
74
|
+
}
|
|
75
|
+
],
|
|
76
|
+
test: {
|
|
77
|
+
complexity: "mixed",
|
|
78
|
+
rounds: 1,
|
|
79
|
+
concurrency: 1
|
|
80
|
+
},
|
|
81
|
+
hub: {
|
|
82
|
+
enabled: false
|
|
83
|
+
}
|
|
84
|
+
};
|
|
85
|
+
//# sourceMappingURL=config.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAIxB,MAAM,gBAAgB,GAAG,CAAC,CAAC,KAAK,CAAC;IAC/B,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC;IACf,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC;IACf,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC;IACf,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC;IACf,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC;CACnB,CAAC,CAAC;AAEH,MAAM,kBAAkB,GAAG,CAAC,CAAC,IAAI,CAAC;IAChC,QAAQ;IACR,iBAAiB;IACjB,QAAQ;IACR,WAAW;IACX,cAAc;IACd,QAAQ;IACR,QAAQ;IACR,QAAQ;CACT,CAAC,CAAC;AAEH,MAAM,uBAAuB,GAAG,CAAC,CAAC,MAAM,CAAC;IACvC,QAAQ,EAAE,kBAAkB;IAC5B,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;IACxB,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC7B,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE;CAChD,CAAC,CAAC;AAEH,SAAS,wBAAwB,CAA0B,MAAsB;IAC/E,OAAO,MAAM,CAAC,WAAW,CAAC,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;QAC3C,IAAI,CAAC,KAAK,CAAC,QAAQ,KAAK,cAAc,IAAI,KAAK,CAAC,QAAQ,KAAK,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;YACzF,OAAO,CAAC,QAAQ,CAAC;gBACf,IAAI,EAAE,CAAC,CAAC,YAAY,CAAC,MAAM;gBAC3B,OAAO,EAAE,2BAA2B,KAAK,CAAC,QAAQ,EAAE;gBACpD,IAAI,EAAE,CAAC,SAAS,CAAC;aAClB,CAAC,CAAC;QACL,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,mBAAmB,GAAG,wBAAwB,CAAC,uBAAuB,CAAC,CAAC;AAC9E,MAAM,mBAAmB,GAAG,wBAAwB,CAClD,uBAAuB,CAAC,MAAM,CAAC;IAC7B,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;CACzC,CAAC,CACH,CAAC;AAEF,MAAM,CAAC,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;IAC3C,MAAM,EAAE,mBAAmB;IAC3B,WAAW,EAAE,mBAAmB;IAChC,MAAM,EAAE,CAAC,CAAC,KAAK,CAAC,mBAAmB,CAAC;IACpC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;QACb,UAAU,EAAE,gBAAgB;QAC5B,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE;QACnC,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE;KACzC,CAAC;IACF,GAAG,EAAE,CAAC;SACH,MAAM,CAAC;QACN,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC;QACnC,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE;QACtC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;KAC7B,CAAC;SACD,QAAQ,EAAE;CACd,CAAC,CAAC;AAkCH,MAAM,CAAC,MAAM,aAAa,GAAmB;IAC3C,MAAM,EAAE;QACN,QAAQ,EAAE,QAAQ;QAClB,KAAK,EAAE,aAAa;QACpB,MAAM,EAAE,EAAE;KACX;IACD,WAAW,EAAE;QACX,QAAQ,EAAE,WAAW;QACrB,KAAK,EAAE,0BAA0B;QACjC,MAAM,EAAE,EAAE;KACX;IACD,MAAM,EAAE;QACN;YACE,QAAQ,EAAE,QAAQ;YAClB,KAAK,EAAE,QAAQ;YACf,MAAM,EAAE,EAAE;YACV,MAAM,EAAE,CAAC;SACV;KACF;IACD,IAAI,EAAE;QACJ,UAAU,EAAE,OAAO;QACnB,MAAM,EAAE,CAAC;QACT,WAAW,EAAE,CAAC;KACf;IACD,GAAG,EAAE;QACH,OAAO,EAAE,KAAK;KACf;CACF,CAAC"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export declare const DOMAIN_TAXONOMY: {
|
|
2
|
+
readonly ecommerce: readonly ["product-search", "cart", "coupon-pricing", "inventory", "order-state-machine"];
|
|
3
|
+
readonly social: readonly ["feed", "comment-thread", "notification-fanout", "friend-graph", "content-moderation"];
|
|
4
|
+
readonly finance: readonly ["ledger", "fx-conversion", "risk-rules", "statement-reporting", "reconciliation"];
|
|
5
|
+
readonly developerTools: readonly ["cli-tool", "config-parser", "log-analysis", "code-formatting", "mock-server"];
|
|
6
|
+
readonly dataAnalytics: readonly ["data-cleaning", "aggregation", "viz-prep", "etl-pipeline", "anomaly-detection"];
|
|
7
|
+
readonly iot: readonly ["sensor-ingestion", "protocol-parser", "alert-engine", "device-state", "firmware-rollout"];
|
|
8
|
+
readonly gaming: readonly ["game-loop", "collision-check", "scoreboard", "save-system", "level-generation"];
|
|
9
|
+
readonly utilities: readonly ["file-converter", "cache-layer", "task-scheduler", "markdown-rendering", "rule-engine"];
|
|
10
|
+
};
|
|
11
|
+
export type DomainName = keyof typeof DOMAIN_TAXONOMY;
|
|
12
|
+
export declare const DOMAIN_NAMES: DomainName[];
|
|
13
|
+
//# sourceMappingURL=domain-taxonomy.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"domain-taxonomy.d.ts","sourceRoot":"","sources":["../src/domain-taxonomy.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,eAAe;;;;;;;;;CASlB,CAAC;AAEX,MAAM,MAAM,UAAU,GAAG,MAAM,OAAO,eAAe,CAAC;AAEtD,eAAO,MAAM,YAAY,EAAmC,UAAU,EAAE,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export const DOMAIN_TAXONOMY = {
|
|
2
|
+
ecommerce: ["product-search", "cart", "coupon-pricing", "inventory", "order-state-machine"],
|
|
3
|
+
social: ["feed", "comment-thread", "notification-fanout", "friend-graph", "content-moderation"],
|
|
4
|
+
finance: ["ledger", "fx-conversion", "risk-rules", "statement-reporting", "reconciliation"],
|
|
5
|
+
developerTools: ["cli-tool", "config-parser", "log-analysis", "code-formatting", "mock-server"],
|
|
6
|
+
dataAnalytics: ["data-cleaning", "aggregation", "viz-prep", "etl-pipeline", "anomaly-detection"],
|
|
7
|
+
iot: ["sensor-ingestion", "protocol-parser", "alert-engine", "device-state", "firmware-rollout"],
|
|
8
|
+
gaming: ["game-loop", "collision-check", "scoreboard", "save-system", "level-generation"],
|
|
9
|
+
utilities: ["file-converter", "cache-layer", "task-scheduler", "markdown-rendering", "rule-engine"]
|
|
10
|
+
};
|
|
11
|
+
export const DOMAIN_NAMES = Object.keys(DOMAIN_TAXONOMY);
|
|
12
|
+
//# sourceMappingURL=domain-taxonomy.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"domain-taxonomy.js","sourceRoot":"","sources":["../src/domain-taxonomy.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,eAAe,GAAG;IAC7B,SAAS,EAAE,CAAC,gBAAgB,EAAE,MAAM,EAAE,gBAAgB,EAAE,WAAW,EAAE,qBAAqB,CAAC;IAC3F,MAAM,EAAE,CAAC,MAAM,EAAE,gBAAgB,EAAE,qBAAqB,EAAE,cAAc,EAAE,oBAAoB,CAAC;IAC/F,OAAO,EAAE,CAAC,QAAQ,EAAE,eAAe,EAAE,YAAY,EAAE,qBAAqB,EAAE,gBAAgB,CAAC;IAC3F,cAAc,EAAE,CAAC,UAAU,EAAE,eAAe,EAAE,cAAc,EAAE,iBAAiB,EAAE,aAAa,CAAC;IAC/F,aAAa,EAAE,CAAC,eAAe,EAAE,aAAa,EAAE,UAAU,EAAE,cAAc,EAAE,mBAAmB,CAAC;IAChG,GAAG,EAAE,CAAC,kBAAkB,EAAE,iBAAiB,EAAE,cAAc,EAAE,cAAc,EAAE,kBAAkB,CAAC;IAChG,MAAM,EAAE,CAAC,WAAW,EAAE,iBAAiB,EAAE,YAAY,EAAE,aAAa,EAAE,kBAAkB,CAAC;IACzF,SAAS,EAAE,CAAC,gBAAgB,EAAE,aAAa,EAAE,gBAAgB,EAAE,oBAAoB,EAAE,aAAa,CAAC;CAC3F,CAAC;AAIX,MAAM,CAAC,MAAM,YAAY,GAAG,MAAM,CAAC,IAAI,CAAC,eAAe,CAAiB,CAAC"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { ExecutionResult } from "./execution-engine.js";
|
|
2
|
+
import { LLMProvider } from "./providers/base.js";
|
|
3
|
+
import { ProjectRequirement } from "./types.js";
|
|
4
|
+
export type ScoreDimension = "functionalCompleteness" | "codeQuality" | "logicAccuracy" | "security" | "engineeringPractice";
|
|
5
|
+
export type DimensionScoreMap = Record<ScoreDimension, number>;
|
|
6
|
+
export interface EvaluationResult {
|
|
7
|
+
judgeId: string;
|
|
8
|
+
dimensions: DimensionScoreMap;
|
|
9
|
+
}
|
|
10
|
+
export interface EvaluationPanelOutput {
|
|
11
|
+
results: EvaluationResult[];
|
|
12
|
+
ija: number;
|
|
13
|
+
droppedJudges: string[];
|
|
14
|
+
}
|
|
15
|
+
export interface JudgeConfig {
|
|
16
|
+
provider: string;
|
|
17
|
+
model: string;
|
|
18
|
+
weight: number;
|
|
19
|
+
}
|
|
20
|
+
export declare function calculateIja(results: EvaluationResult[]): number;
|
|
21
|
+
export declare class EvaluationPanel {
|
|
22
|
+
private latestDroppedJudges;
|
|
23
|
+
private collectEvaluations;
|
|
24
|
+
evaluate(requirement: ProjectRequirement, execution: ExecutionResult, judges: JudgeConfig[], providerForJudge: (judge: JudgeConfig) => LLMProvider): Promise<EvaluationResult[]>;
|
|
25
|
+
evaluateWithIja(requirement: ProjectRequirement, execution: ExecutionResult, judges: JudgeConfig[], providerForJudge: (judge: JudgeConfig) => LLMProvider): Promise<EvaluationPanelOutput>;
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=evaluation-panel.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluation-panel.d.ts","sourceRoot":"","sources":["../src/evaluation-panel.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAClD,OAAO,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAEhD,MAAM,MAAM,cAAc,GACtB,wBAAwB,GACxB,aAAa,GACb,eAAe,GACf,UAAU,GACV,qBAAqB,CAAC;AAE1B,MAAM,MAAM,iBAAiB,GAAG,MAAM,CAAC,cAAc,EAAE,MAAM,CAAC,CAAC;AAE/D,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,iBAAiB,CAAC;CAC/B;AAED,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,gBAAgB,EAAE,CAAC;IAC5B,GAAG,EAAE,MAAM,CAAC;IACZ,aAAa,EAAE,MAAM,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,WAAW;IAC1B,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;CAChB;AA+ED,wBAAgB,YAAY,CAAC,OAAO,EAAE,gBAAgB,EAAE,GAAG,MAAM,CAYhE;AAED,qBAAa,eAAe;IAC1B,OAAO,CAAC,mBAAmB,CAAgB;YAE7B,kBAAkB;IAwE1B,QAAQ,CACZ,WAAW,EAAE,kBAAkB,EAC/B,SAAS,EAAE,eAAe,EAC1B,MAAM,EAAE,WAAW,EAAE,EACrB,gBAAgB,EAAE,CAAC,KAAK,EAAE,WAAW,KAAK,WAAW,GACpD,OAAO,CAAC,gBAAgB,EAAE,CAAC;IASxB,eAAe,CACnB,WAAW,EAAE,kBAAkB,EAC/B,SAAS,EAAE,eAAe,EAC1B,MAAM,EAAE,WAAW,EAAE,EACrB,gBAAgB,EAAE,CAAC,KAAK,EAAE,WAAW,KAAK,WAAW,GACpD,OAAO,CAAC,qBAAqB,CAAC;CAUlC"}
|