@req2rank/core 0.1.0-r7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/adaptive-calibration.d.ts +13 -0
- package/dist/adaptive-calibration.d.ts.map +1 -0
- package/dist/adaptive-calibration.js +39 -0
- package/dist/adaptive-calibration.js.map +1 -0
- package/dist/adaptive-calibration.test.d.ts +2 -0
- package/dist/adaptive-calibration.test.d.ts.map +1 -0
- package/dist/adaptive-calibration.test.js +20 -0
- package/dist/adaptive-calibration.test.js.map +1 -0
- package/dist/checkpoint-key.d.ts +3 -0
- package/dist/checkpoint-key.d.ts.map +1 -0
- package/dist/checkpoint-key.js +29 -0
- package/dist/checkpoint-key.js.map +1 -0
- package/dist/checkpoint-key.test.d.ts +2 -0
- package/dist/checkpoint-key.test.d.ts.map +1 -0
- package/dist/checkpoint-key.test.js +32 -0
- package/dist/checkpoint-key.test.js.map +1 -0
- package/dist/config.d.ts +205 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +85 -0
- package/dist/config.js.map +1 -0
- package/dist/domain-taxonomy.d.ts +13 -0
- package/dist/domain-taxonomy.d.ts.map +1 -0
- package/dist/domain-taxonomy.js +12 -0
- package/dist/domain-taxonomy.js.map +1 -0
- package/dist/evaluation-panel.d.ts +27 -0
- package/dist/evaluation-panel.d.ts.map +1 -0
- package/dist/evaluation-panel.js +158 -0
- package/dist/evaluation-panel.js.map +1 -0
- package/dist/evaluation-panel.test.d.ts +2 -0
- package/dist/evaluation-panel.test.d.ts.map +1 -0
- package/dist/evaluation-panel.test.js +185 -0
- package/dist/evaluation-panel.test.js.map +1 -0
- package/dist/evidence-chain.d.ts +11 -0
- package/dist/evidence-chain.d.ts.map +1 -0
- package/dist/evidence-chain.js +33 -0
- package/dist/evidence-chain.js.map +1 -0
- package/dist/evidence-chain.test.d.ts +2 -0
- package/dist/evidence-chain.test.d.ts.map +1 -0
- package/dist/evidence-chain.test.js +16 -0
- package/dist/evidence-chain.test.js.map +1 -0
- package/dist/execution-engine.d.ts +29 -0
- package/dist/execution-engine.d.ts.map +1 -0
- package/dist/execution-engine.js +102 -0
- package/dist/execution-engine.js.map +1 -0
- package/dist/execution-engine.test.d.ts +2 -0
- package/dist/execution-engine.test.d.ts.map +1 -0
- package/dist/execution-engine.test.js +86 -0
- package/dist/execution-engine.test.js.map +1 -0
- package/dist/hub-client.d.ts +21 -0
- package/dist/hub-client.d.ts.map +1 -0
- package/dist/hub-client.js +99 -0
- package/dist/hub-client.js.map +1 -0
- package/dist/hub-client.test.d.ts +2 -0
- package/dist/hub-client.test.d.ts.map +1 -0
- package/dist/hub-client.test.js +129 -0
- package/dist/hub-client.test.js.map +1 -0
- package/dist/index.d.ts +19 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +19 -0
- package/dist/index.js.map +1 -0
- package/dist/leaderboard-query.d.ts +29 -0
- package/dist/leaderboard-query.d.ts.map +1 -0
- package/dist/leaderboard-query.js +59 -0
- package/dist/leaderboard-query.js.map +1 -0
- package/dist/leaderboard-query.test.d.ts +2 -0
- package/dist/leaderboard-query.test.d.ts.map +1 -0
- package/dist/leaderboard-query.test.js +34 -0
- package/dist/leaderboard-query.test.js.map +1 -0
- package/dist/local-store.d.ts +18 -0
- package/dist/local-store.d.ts.map +1 -0
- package/dist/local-store.js +181 -0
- package/dist/local-store.js.map +1 -0
- package/dist/local-store.test.d.ts +2 -0
- package/dist/local-store.test.d.ts.map +1 -0
- package/dist/local-store.test.js +153 -0
- package/dist/local-store.test.js.map +1 -0
- package/dist/pipeline-stage-handoff.test.d.ts +2 -0
- package/dist/pipeline-stage-handoff.test.d.ts.map +1 -0
- package/dist/pipeline-stage-handoff.test.js +290 -0
- package/dist/pipeline-stage-handoff.test.js.map +1 -0
- package/dist/pipeline.d.ts +67 -0
- package/dist/pipeline.d.ts.map +1 -0
- package/dist/pipeline.js +493 -0
- package/dist/pipeline.js.map +1 -0
- package/dist/providers/anthropic.d.ts +8 -0
- package/dist/providers/anthropic.d.ts.map +1 -0
- package/dist/providers/anthropic.js +45 -0
- package/dist/providers/anthropic.js.map +1 -0
- package/dist/providers/base.d.ts +36 -0
- package/dist/providers/base.d.ts.map +1 -0
- package/dist/providers/base.js +47 -0
- package/dist/providers/base.js.map +1 -0
- package/dist/providers/custom.d.ts +6 -0
- package/dist/providers/custom.d.ts.map +1 -0
- package/dist/providers/custom.js +6 -0
- package/dist/providers/custom.js.map +1 -0
- package/dist/providers/google.d.ts +8 -0
- package/dist/providers/google.d.ts.map +1 -0
- package/dist/providers/google.js +48 -0
- package/dist/providers/google.js.map +1 -0
- package/dist/providers/index.d.ts +31 -0
- package/dist/providers/index.d.ts.map +1 -0
- package/dist/providers/index.js +63 -0
- package/dist/providers/index.js.map +1 -0
- package/dist/providers/openai.d.ts +18 -0
- package/dist/providers/openai.d.ts.map +1 -0
- package/dist/providers/openai.js +111 -0
- package/dist/providers/openai.js.map +1 -0
- package/dist/providers/providers.test.d.ts +2 -0
- package/dist/providers/providers.test.d.ts.map +1 -0
- package/dist/providers/providers.test.js +71 -0
- package/dist/providers/providers.test.js.map +1 -0
- package/dist/requirement-generator.d.ts +29 -0
- package/dist/requirement-generator.d.ts.map +1 -0
- package/dist/requirement-generator.js +358 -0
- package/dist/requirement-generator.js.map +1 -0
- package/dist/requirement-generator.test.d.ts +2 -0
- package/dist/requirement-generator.test.d.ts.map +1 -0
- package/dist/requirement-generator.test.js +182 -0
- package/dist/requirement-generator.test.js.map +1 -0
- package/dist/sandbox.d.ts +32 -0
- package/dist/sandbox.d.ts.map +1 -0
- package/dist/sandbox.js +124 -0
- package/dist/sandbox.js.map +1 -0
- package/dist/sandbox.test.d.ts +2 -0
- package/dist/sandbox.test.d.ts.map +1 -0
- package/dist/sandbox.test.js +20 -0
- package/dist/sandbox.test.js.map +1 -0
- package/dist/scoring-engine.d.ts +15 -0
- package/dist/scoring-engine.d.ts.map +1 -0
- package/dist/scoring-engine.js +109 -0
- package/dist/scoring-engine.js.map +1 -0
- package/dist/scoring-engine.test.d.ts +2 -0
- package/dist/scoring-engine.test.d.ts.map +1 -0
- package/dist/scoring-engine.test.js +137 -0
- package/dist/scoring-engine.test.js.map +1 -0
- package/dist/submit-payload-builder.d.ts +9 -0
- package/dist/submit-payload-builder.d.ts.map +1 -0
- package/dist/submit-payload-builder.js +23 -0
- package/dist/submit-payload-builder.js.map +1 -0
- package/dist/submit-payload-builder.test.d.ts +2 -0
- package/dist/submit-payload-builder.test.d.ts.map +1 -0
- package/dist/submit-payload-builder.test.js +75 -0
- package/dist/submit-payload-builder.test.js.map +1 -0
- package/dist/submitter-types.d.ts +54 -0
- package/dist/submitter-types.d.ts.map +1 -0
- package/dist/submitter-types.js +2 -0
- package/dist/submitter-types.js.map +1 -0
- package/dist/types.d.ts +40 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +36 -0
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
import { mkdir } from "node:fs/promises";
|
|
2
|
+
import { dirname } from "node:path";
|
|
3
|
+
import Database from "better-sqlite3";
|
|
4
|
+
export class LocalStore {
|
|
5
|
+
filePath;
|
|
6
|
+
db;
|
|
7
|
+
constructor(filePath) {
|
|
8
|
+
this.filePath = filePath;
|
|
9
|
+
}
|
|
10
|
+
async getDb() {
|
|
11
|
+
if (this.db) {
|
|
12
|
+
return this.db;
|
|
13
|
+
}
|
|
14
|
+
await mkdir(dirname(this.filePath), { recursive: true });
|
|
15
|
+
this.db = new Database(this.filePath);
|
|
16
|
+
this.db.exec(`
|
|
17
|
+
CREATE TABLE IF NOT EXISTS runs (
|
|
18
|
+
id TEXT PRIMARY KEY,
|
|
19
|
+
created_at TEXT NOT NULL,
|
|
20
|
+
target_provider TEXT NOT NULL,
|
|
21
|
+
target_model TEXT NOT NULL,
|
|
22
|
+
complexity TEXT NOT NULL,
|
|
23
|
+
rounds INTEGER NOT NULL,
|
|
24
|
+
requirement_title TEXT NOT NULL,
|
|
25
|
+
overall_score REAL NOT NULL,
|
|
26
|
+
dimension_scores TEXT NOT NULL,
|
|
27
|
+
ci95 TEXT NOT NULL,
|
|
28
|
+
agreement_level TEXT NOT NULL,
|
|
29
|
+
ija_score REAL,
|
|
30
|
+
evidence_chain TEXT
|
|
31
|
+
);
|
|
32
|
+
|
|
33
|
+
CREATE TABLE IF NOT EXISTS calibrations (
|
|
34
|
+
id TEXT PRIMARY KEY,
|
|
35
|
+
created_at TEXT NOT NULL,
|
|
36
|
+
recommended_complexity TEXT NOT NULL,
|
|
37
|
+
reason TEXT NOT NULL,
|
|
38
|
+
average_score REAL NOT NULL,
|
|
39
|
+
sample_size INTEGER NOT NULL
|
|
40
|
+
);
|
|
41
|
+
`);
|
|
42
|
+
this.ensureColumns(this.db);
|
|
43
|
+
return this.db;
|
|
44
|
+
}
|
|
45
|
+
ensureColumns(db) {
|
|
46
|
+
const rows = db.prepare("PRAGMA table_info(runs)").all();
|
|
47
|
+
const columnNames = new Set(rows.map((row) => row.name));
|
|
48
|
+
if (!columnNames.has("ija_score")) {
|
|
49
|
+
db.exec("ALTER TABLE runs ADD COLUMN ija_score REAL");
|
|
50
|
+
}
|
|
51
|
+
if (!columnNames.has("evidence_chain")) {
|
|
52
|
+
db.exec("ALTER TABLE runs ADD COLUMN evidence_chain TEXT");
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
mapRun(row) {
|
|
56
|
+
const dimensionScores = this.parseJson(row.dimension_scores);
|
|
57
|
+
const ci95 = this.parseJson(row.ci95);
|
|
58
|
+
if (!dimensionScores || !Array.isArray(ci95) || ci95.length !== 2) {
|
|
59
|
+
throw new Error("Invalid run record JSON payload");
|
|
60
|
+
}
|
|
61
|
+
let evidenceChain;
|
|
62
|
+
if (row.evidence_chain) {
|
|
63
|
+
evidenceChain = this.parseJson(row.evidence_chain);
|
|
64
|
+
if (!evidenceChain) {
|
|
65
|
+
throw new Error("Invalid run evidenceChain payload");
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
return {
|
|
69
|
+
id: String(row.id),
|
|
70
|
+
createdAt: String(row.created_at),
|
|
71
|
+
targetProvider: String(row.target_provider),
|
|
72
|
+
targetModel: String(row.target_model),
|
|
73
|
+
complexity: String(row.complexity),
|
|
74
|
+
rounds: Number(row.rounds),
|
|
75
|
+
requirementTitle: String(row.requirement_title),
|
|
76
|
+
overallScore: Number(row.overall_score),
|
|
77
|
+
dimensionScores,
|
|
78
|
+
ci95,
|
|
79
|
+
agreementLevel: String(row.agreement_level),
|
|
80
|
+
ijaScore: typeof row.ija_score === "number" ? Number(row.ija_score) : undefined,
|
|
81
|
+
evidenceChain
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
parseJson(value) {
|
|
85
|
+
try {
|
|
86
|
+
return JSON.parse(String(value));
|
|
87
|
+
}
|
|
88
|
+
catch {
|
|
89
|
+
return undefined;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
close() {
|
|
93
|
+
if (this.db) {
|
|
94
|
+
this.db.close();
|
|
95
|
+
this.db = undefined;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
async read() {
|
|
99
|
+
const runs = await this.listRuns();
|
|
100
|
+
const calibrations = await this.listCalibrations();
|
|
101
|
+
return { runs, calibrations };
|
|
102
|
+
}
|
|
103
|
+
async appendRun(run) {
|
|
104
|
+
const db = await this.getDb();
|
|
105
|
+
const statement = db.prepare(`
|
|
106
|
+
INSERT INTO runs (
|
|
107
|
+
id,
|
|
108
|
+
created_at,
|
|
109
|
+
target_provider,
|
|
110
|
+
target_model,
|
|
111
|
+
complexity,
|
|
112
|
+
rounds,
|
|
113
|
+
requirement_title,
|
|
114
|
+
overall_score,
|
|
115
|
+
dimension_scores,
|
|
116
|
+
ci95,
|
|
117
|
+
agreement_level,
|
|
118
|
+
ija_score,
|
|
119
|
+
evidence_chain
|
|
120
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
121
|
+
`);
|
|
122
|
+
statement.run(run.id, run.createdAt, run.targetProvider, run.targetModel, run.complexity, run.rounds, run.requirementTitle, run.overallScore, JSON.stringify(run.dimensionScores), JSON.stringify(run.ci95), run.agreementLevel, run.ijaScore ?? null, run.evidenceChain ? JSON.stringify(run.evidenceChain) : null);
|
|
123
|
+
}
|
|
124
|
+
async listRuns() {
|
|
125
|
+
const db = await this.getDb();
|
|
126
|
+
const statement = db.prepare("SELECT id, created_at, target_provider, target_model, complexity, rounds, requirement_title, overall_score, dimension_scores, ci95, agreement_level, ija_score, evidence_chain FROM runs ORDER BY created_at DESC");
|
|
127
|
+
const rows = statement.all();
|
|
128
|
+
const runs = [];
|
|
129
|
+
for (const row of rows) {
|
|
130
|
+
try {
|
|
131
|
+
runs.push(this.mapRun(row));
|
|
132
|
+
}
|
|
133
|
+
catch {
|
|
134
|
+
continue;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
return runs;
|
|
138
|
+
}
|
|
139
|
+
async findRunById(runId) {
|
|
140
|
+
const db = await this.getDb();
|
|
141
|
+
const statement = db.prepare("SELECT id, created_at, target_provider, target_model, complexity, rounds, requirement_title, overall_score, dimension_scores, ci95, agreement_level, ija_score, evidence_chain FROM runs WHERE id = ?");
|
|
142
|
+
const row = statement.get(runId);
|
|
143
|
+
if (!row) {
|
|
144
|
+
return undefined;
|
|
145
|
+
}
|
|
146
|
+
try {
|
|
147
|
+
return this.mapRun(row);
|
|
148
|
+
}
|
|
149
|
+
catch {
|
|
150
|
+
return undefined;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
async appendCalibration(snapshot) {
|
|
154
|
+
const db = await this.getDb();
|
|
155
|
+
const statement = db.prepare(`
|
|
156
|
+
INSERT OR REPLACE INTO calibrations (
|
|
157
|
+
id,
|
|
158
|
+
created_at,
|
|
159
|
+
recommended_complexity,
|
|
160
|
+
reason,
|
|
161
|
+
average_score,
|
|
162
|
+
sample_size
|
|
163
|
+
) VALUES (?, ?, ?, ?, ?, ?)
|
|
164
|
+
`);
|
|
165
|
+
statement.run(snapshot.id, snapshot.createdAt, snapshot.recommendedComplexity, snapshot.reason, snapshot.averageScore, snapshot.sampleSize);
|
|
166
|
+
}
|
|
167
|
+
async listCalibrations() {
|
|
168
|
+
const db = await this.getDb();
|
|
169
|
+
const statement = db.prepare("SELECT id, created_at, recommended_complexity, reason, average_score, sample_size FROM calibrations ORDER BY created_at DESC");
|
|
170
|
+
const rows = statement.all();
|
|
171
|
+
return rows.map((row) => ({
|
|
172
|
+
id: String(row.id),
|
|
173
|
+
createdAt: String(row.created_at),
|
|
174
|
+
recommendedComplexity: String(row.recommended_complexity),
|
|
175
|
+
reason: String(row.reason),
|
|
176
|
+
averageScore: Number(row.average_score),
|
|
177
|
+
sampleSize: Number(row.sample_size)
|
|
178
|
+
}));
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
//# sourceMappingURL=local-store.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"local-store.js","sourceRoot":"","sources":["../src/local-store.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AACzC,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,QAAQ,MAAM,gBAAgB,CAAC;AAGtC,MAAM,OAAO,UAAU;IACJ,QAAQ,CAAS;IAC1B,EAAE,CAAqB;IAE/B,YAAY,QAAgB;QAC1B,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;IAC3B,CAAC;IAEO,KAAK,CAAC,KAAK;QACjB,IAAI,IAAI,CAAC,EAAE,EAAE,CAAC;YACZ,OAAO,IAAI,CAAC,EAAE,CAAC;QACjB,CAAC;QAED,MAAM,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACzD,IAAI,CAAC,EAAE,GAAG,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACtC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;KAyBZ,CAAC,CAAC;QACH,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC5B,OAAO,IAAI,CAAC,EAAE,CAAC;IACjB,CAAC;IAEO,aAAa,CAAC,EAAqB;QACzC,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,CAAC,yBAAyB,CAAC,CAAC,GAAG,EAA6B,CAAC;QACpF,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC;QAEzD,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,CAAC;YAClC,EAAE,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;QACxD,CAAC;QAED,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,gBAAgB,CAAC,EAAE,CAAC;YACvC,EAAE,CAAC,IAAI,CAAC,iDAAiD,CAAC,CAAC;QAC7D,CAAC;IACH,CAAC;IAEO,MAAM,CAAC,GAA4B;QACzC,MAAM,eAAe,GAAG,IAAI,CAAC,SAAS,CAAyB,GAAG,CAAC,gBAAgB,CAAC,CAAC;QACrF,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAmB,GAAG,CAAC,IAAI,CAAC,CAAC;QACxD,IAAI,CAAC,eAAe,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAClE,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAC;QACrD,CAAC;QAED,IAAI,aAAqD,CAAC;QAC1D,IAAI,GAAG,CAAC,cAAc,EAAE,CAAC;YACvB,aAAa,GAAG,IAAI,CAAC,SAAS,CAA6B,GAAG,CAAC,cAAc,CAAC,CAAC;YAC/E,IAAI,CAAC,aAAa,EAAE,CAAC;gBACnB,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;YACvD,CAAC;QACH,CAAC;QAED,OAAO;YACL,EAAE,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;YAClB,SAAS,EAAE,MAAM,CAAC,GAAG,CAAC,UAAU,CAAC;YACjC,cAAc,EAAE,MAAM,CAAC,GAAG,CAAC,eAAe,CAAC;YAC3C,WAAW,EAAE,MAAM,CAAC,GAAG,CAAC,YAAY,CAAC;YACrC,UAAU,EAAE,MAAM,CAAC,GAAG,CAAC,UAAU,CAA4B;YAC7D,MAAM,EAAE,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC;YAC1B,gBAAgB,EAAE,MAAM,CAAC,GAAG,CAAC,iBAAiB,CAAC;YAC/C,YAAY,EAAE,MAAM,CAAC,GAAG,CAAC,aAAa,CAAC;YACvC,eAAe;YACf,IAAI;YACJ,cAAc,EAAE,MAAM,CAAC,GAAG,CAAC,eAAe,CAAgC;YAC1E,QAAQ,EAAE,OAAO,GAAG,CAAC,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS;YAC/E,aAAa;SACd,CAAC;IACJ,CAAC;IAEO,SAAS,CAAI,KAAc;QACjC,IAAI,CAAC;YACH,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAM,CAAC;QACxC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,SAAS,CAAC;QACnB,CAAC;IACH,CAAC;IAED,KAAK;QACH,IAAI,IAAI,CAAC,EAAE,EAAE,CAAC;YACZ,IAAI,CAAC,EAAE,CAAC,KAAK,EAAE,CAAC;YAChB,IAAI,CAAC,EAAE,GAAG,SAAS,CAAC;QACtB,CAAC;IACH,CAAC;IAED,KAAK,CAAC,IAAI;QACR,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC;QACnC,MAAM,YAAY,GAAG,MAAM,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACnD,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC;IAChC,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,GAAc;QAC5B,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;QAC9B,MAAM,SAAS,GAAG,EAAE,CAAC,OAAO,CAAC;;;;;;;;;;;;;;;;KAgB5B,CAAC,CAAC;QACH,SAAS,CAAC,GAAG,CACX,GAAG,CAAC,EAAE,EACN,GAAG,CAAC,SAAS,EACb,GAAG,CAAC,cAAc,EAClB,GAAG,CAAC,WAAW,EACf,GAAG,CAAC,UAAU,EACd,GAAG,CAAC,MAAM,EACV,GAAG,CAAC,gBAAgB,EACpB,GAAG,CAAC,YAAY,EAChB,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,eAAe,CAAC,EACnC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EACxB,GAAG,CAAC,cAAc,EAClB,GAAG,CAAC,QAAQ,IAAI,IAAI,EACpB,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,IAAI,CAC7D,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,QAAQ;QACZ,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;QAC9B,MAAM,SAAS,GAAG,EAAE,CAAC,OAAO,CAC1B,mNAAmN,CACpN,CAAC;QACF,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,EAAoC,CAAC;QAC/D,MAAM,IAAI,GAAgB,EAAE,CAAC;QAC7B,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,IAAI,CAAC;gBACH,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;YAC9B,CAAC;YAAC,MAAM,CAAC;gBACP,SAAS;YACX,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,KAAa;QAC7B,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;QAC9B,MAAM,SAAS,GAAG,EAAE,CAAC,OAAO,CAC1B,uMAAuM,CACxM,CAAC;QACF,MAAM,GAAG,GAAG,SAAS,CAAC,GAAG,CAAC,KAAK,CAAwC,CAAC;QACxE,IAAI,CAAC,GAAG,EAAE,CAAC;YACT,OAAO,SAAS,CAAC;QACnB,CAAC;QAED,IAAI,CAAC;YACH,OAAO,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QAC1B,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,SAAS,CAAC;QACnB,CAAC;IACH,CAAC;IAED,KAAK,CAAC,iBAAiB,CAAC,QAA6B;QACnD,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;QAC9B,MAAM,SAAS,GAAG,EAAE,CAAC,OAAO,CAAC;;;;;;;;;KAS5B,CAAC,CAAC;QACH,SAAS,CAAC,GAAG,CACX,QAAQ,CAAC,EAAE,EACX,QAAQ,CAAC,SAAS,EAClB,QAAQ,CAAC,qBAAqB,EAC9B,QAAQ,CAAC,MAAM,EACf,QAAQ,CAAC,YAAY,EACrB,QAAQ,CAAC,UAAU,CACpB,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,gBAAgB;QACpB,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;QAC9B,MAAM,SAAS,GAAG,EAAE,CAAC,OAAO,CAC1B,8HAA8H,CAC/H,CAAC;QACF,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,EAAoC,CAAC;QAC/D,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;YACxB,EAAE,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;YAClB,SAAS,EAAE,MAAM,CAAC,GAAG,CAAC,UAAU,CAAC;YACjC,qBAAqB,EAAE,MAAM,CAAC,GAAG,CAAC,sBAAsB,CAAiD;YACzG,MAAM,EAAE,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC;YAC1B,YAAY,EAAE,MAAM,CAAC,GAAG,CAAC,aAAa,CAAC;YACvC,UAAU,EAAE,MAAM,CAAC,GAAG,CAAC,WAAW,CAAC;SACpC,CAAC,CAAC,CAAC;IACN,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"local-store.test.d.ts","sourceRoot":"","sources":["../src/local-store.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import { mkdtemp, readFile, rm } from "node:fs/promises";
|
|
2
|
+
import { tmpdir } from "node:os";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import Database from "better-sqlite3";
|
|
5
|
+
import { afterEach, describe, expect, it } from "vitest";
|
|
6
|
+
import { LocalStore } from "./local-store.js";
|
|
7
|
+
const tempDirs = [];
|
|
8
|
+
afterEach(async () => {
|
|
9
|
+
await Promise.all(tempDirs.splice(0, tempDirs.length).map((dir) => rm(dir, { recursive: true, force: true })));
|
|
10
|
+
});
|
|
11
|
+
describe("LocalStore", () => {
|
|
12
|
+
it("uses sqlite file format for persistence", async () => {
|
|
13
|
+
const dir = await mkdtemp(join(tmpdir(), "req2rank-store-"));
|
|
14
|
+
tempDirs.push(dir);
|
|
15
|
+
const dbPath = join(dir, "runs.db");
|
|
16
|
+
const store = new LocalStore(dbPath);
|
|
17
|
+
await store.appendRun({
|
|
18
|
+
id: "run-1",
|
|
19
|
+
createdAt: new Date("2026-01-01T00:00:00.000Z").toISOString(),
|
|
20
|
+
targetProvider: "openai",
|
|
21
|
+
targetModel: "gpt-4o-mini",
|
|
22
|
+
complexity: "C1",
|
|
23
|
+
rounds: 1,
|
|
24
|
+
requirementTitle: "demo",
|
|
25
|
+
overallScore: 80,
|
|
26
|
+
dimensionScores: {
|
|
27
|
+
functionalCompleteness: 80,
|
|
28
|
+
codeQuality: 80,
|
|
29
|
+
logicAccuracy: 80,
|
|
30
|
+
security: 80,
|
|
31
|
+
engineeringPractice: 80
|
|
32
|
+
},
|
|
33
|
+
ci95: [78, 82],
|
|
34
|
+
agreementLevel: "high",
|
|
35
|
+
ijaScore: 0.88,
|
|
36
|
+
evidenceChain: {
|
|
37
|
+
timeline: [
|
|
38
|
+
{
|
|
39
|
+
phase: "generate",
|
|
40
|
+
startedAt: "2026-01-01T00:00:00.000Z",
|
|
41
|
+
completedAt: "2026-01-01T00:00:01.000Z",
|
|
42
|
+
model: "system"
|
|
43
|
+
}
|
|
44
|
+
],
|
|
45
|
+
samples: [{ roundIndex: 0, requirement: "demo", codeSubmission: "ok" }],
|
|
46
|
+
environment: { os: "win32", nodeVersion: "v22", timezone: "UTC" }
|
|
47
|
+
}
|
|
48
|
+
});
|
|
49
|
+
const content = await readFile(dbPath);
|
|
50
|
+
expect(content.subarray(0, 15).toString("utf-8")).toBe("SQLite format 3");
|
|
51
|
+
store.close();
|
|
52
|
+
});
|
|
53
|
+
it("supports append list and find", async () => {
|
|
54
|
+
const dir = await mkdtemp(join(tmpdir(), "req2rank-store-"));
|
|
55
|
+
tempDirs.push(dir);
|
|
56
|
+
const store = new LocalStore(join(dir, "runs.db"));
|
|
57
|
+
await store.appendRun({
|
|
58
|
+
id: "run-1",
|
|
59
|
+
createdAt: new Date("2026-01-01T00:00:00.000Z").toISOString(),
|
|
60
|
+
targetProvider: "openai",
|
|
61
|
+
targetModel: "gpt-4o-mini",
|
|
62
|
+
complexity: "C1",
|
|
63
|
+
rounds: 1,
|
|
64
|
+
requirementTitle: "demo-1",
|
|
65
|
+
overallScore: 80,
|
|
66
|
+
dimensionScores: {
|
|
67
|
+
functionalCompleteness: 80,
|
|
68
|
+
codeQuality: 80,
|
|
69
|
+
logicAccuracy: 80,
|
|
70
|
+
security: 80,
|
|
71
|
+
engineeringPractice: 80
|
|
72
|
+
},
|
|
73
|
+
ci95: [78, 82],
|
|
74
|
+
agreementLevel: "high",
|
|
75
|
+
ijaScore: 0.75
|
|
76
|
+
});
|
|
77
|
+
await store.appendRun({
|
|
78
|
+
id: "run-2",
|
|
79
|
+
createdAt: new Date("2026-01-02T00:00:00.000Z").toISOString(),
|
|
80
|
+
targetProvider: "anthropic",
|
|
81
|
+
targetModel: "claude-sonnet-4-20250514",
|
|
82
|
+
complexity: "C2",
|
|
83
|
+
rounds: 2,
|
|
84
|
+
requirementTitle: "demo-2",
|
|
85
|
+
overallScore: 88,
|
|
86
|
+
dimensionScores: {
|
|
87
|
+
functionalCompleteness: 88,
|
|
88
|
+
codeQuality: 88,
|
|
89
|
+
logicAccuracy: 88,
|
|
90
|
+
security: 88,
|
|
91
|
+
engineeringPractice: 88
|
|
92
|
+
},
|
|
93
|
+
ci95: [86, 90],
|
|
94
|
+
agreementLevel: "high",
|
|
95
|
+
ijaScore: 0.92
|
|
96
|
+
});
|
|
97
|
+
const runs = await store.listRuns();
|
|
98
|
+
expect(runs).toHaveLength(2);
|
|
99
|
+
expect(runs[0].id).toBe("run-2");
|
|
100
|
+
const found = await store.findRunById("run-1");
|
|
101
|
+
expect(found?.targetProvider).toBe("openai");
|
|
102
|
+
expect(found?.ijaScore).toBe(0.75);
|
|
103
|
+
await store.appendCalibration({
|
|
104
|
+
id: "cal-1",
|
|
105
|
+
createdAt: new Date("2026-01-03T00:00:00.000Z").toISOString(),
|
|
106
|
+
recommendedComplexity: "C2",
|
|
107
|
+
reason: "Average score supports C2",
|
|
108
|
+
averageScore: 84,
|
|
109
|
+
sampleSize: 2
|
|
110
|
+
});
|
|
111
|
+
const calibrations = await store.listCalibrations();
|
|
112
|
+
expect(calibrations).toHaveLength(1);
|
|
113
|
+
expect(calibrations[0]?.recommendedComplexity).toBe("C2");
|
|
114
|
+
store.close();
|
|
115
|
+
});
|
|
116
|
+
it("skips malformed json rows instead of crashing run queries", async () => {
|
|
117
|
+
const dir = await mkdtemp(join(tmpdir(), "req2rank-store-"));
|
|
118
|
+
tempDirs.push(dir);
|
|
119
|
+
const dbPath = join(dir, "runs.db");
|
|
120
|
+
const store = new LocalStore(dbPath);
|
|
121
|
+
await store.appendRun({
|
|
122
|
+
id: "run-1",
|
|
123
|
+
createdAt: new Date("2026-01-02T00:00:00.000Z").toISOString(),
|
|
124
|
+
targetProvider: "openai",
|
|
125
|
+
targetModel: "gpt-4o-mini",
|
|
126
|
+
complexity: "C1",
|
|
127
|
+
rounds: 1,
|
|
128
|
+
requirementTitle: "healthy",
|
|
129
|
+
overallScore: 80,
|
|
130
|
+
dimensionScores: {
|
|
131
|
+
functionalCompleteness: 80,
|
|
132
|
+
codeQuality: 80,
|
|
133
|
+
logicAccuracy: 80,
|
|
134
|
+
security: 80,
|
|
135
|
+
engineeringPractice: 80
|
|
136
|
+
},
|
|
137
|
+
ci95: [78, 82],
|
|
138
|
+
agreementLevel: "high"
|
|
139
|
+
});
|
|
140
|
+
const raw = new Database(dbPath);
|
|
141
|
+
raw
|
|
142
|
+
.prepare("INSERT INTO runs (id, created_at, target_provider, target_model, complexity, rounds, requirement_title, overall_score, dimension_scores, ci95, agreement_level) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")
|
|
143
|
+
.run("run-bad", new Date("2026-01-01T00:00:00.000Z").toISOString(), "openai", "gpt-4o-mini", "C1", 1, "bad", 60, "{bad-json", "[60,61]", "low");
|
|
144
|
+
raw.close();
|
|
145
|
+
const runs = await store.listRuns();
|
|
146
|
+
expect(runs).toHaveLength(1);
|
|
147
|
+
expect(runs[0].id).toBe("run-1");
|
|
148
|
+
const foundBad = await store.findRunById("run-bad");
|
|
149
|
+
expect(foundBad).toBeUndefined();
|
|
150
|
+
store.close();
|
|
151
|
+
});
|
|
152
|
+
});
|
|
153
|
+
//# sourceMappingURL=local-store.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"local-store.test.js","sourceRoot":"","sources":["../src/local-store.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,kBAAkB,CAAC;AACzD,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,QAAQ,MAAM,gBAAgB,CAAC;AACtC,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AACzD,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAE9C,MAAM,QAAQ,GAAa,EAAE,CAAC;AAE9B,SAAS,CAAC,KAAK,IAAI,EAAE;IACnB,MAAM,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;AACjH,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;IAC1B,EAAE,CAAC,yCAAyC,EAAE,KAAK,IAAI,EAAE;QACvD,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,iBAAiB,CAAC,CAAC,CAAC;QAC7D,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACnB,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;QAEpC,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC;QACrC,MAAM,KAAK,CAAC,SAAS,CAAC;YACpB,EAAE,EAAE,OAAO;YACX,SAAS,EAAE,IAAI,IAAI,CAAC,0BAA0B,CAAC,CAAC,WAAW,EAAE;YAC7D,cAAc,EAAE,QAAQ;YACxB,WAAW,EAAE,aAAa;YAC1B,UAAU,EAAE,IAAI;YAChB,MAAM,EAAE,CAAC;YACT,gBAAgB,EAAE,MAAM;YACxB,YAAY,EAAE,EAAE;YAChB,eAAe,EAAE;gBACf,sBAAsB,EAAE,EAAE;gBAC1B,WAAW,EAAE,EAAE;gBACf,aAAa,EAAE,EAAE;gBACjB,QAAQ,EAAE,EAAE;gBACZ,mBAAmB,EAAE,EAAE;aACxB;YACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC;YACd,cAAc,EAAE,MAAM;YACtB,QAAQ,EAAE,IAAI;YACd,aAAa,EAAE;gBACb,QAAQ,EAAE;oBACR;wBACE,KAAK,EAAE,UAAU;wBACjB,SAAS,EAAE,0BAA0B;wBACrC,WAAW,EAAE,0BAA0B;wBACvC,KAAK,EAAE,QAAQ;qBAChB;iBACF;gBACD,OAAO,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,EAAE,WAAW,EAAE,MAAM,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC;gBACvE,WAAW,EAAE,EAAE,EAAE,EAAE,OAAO,EAAE,WAAW,EAAE,KAAK,EAAE,QAAQ,EAAE,KAAK,EAAE;aAClE;SACF,CAAC,CAAC;QAEH,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,CAAC;QACvC,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;QAC1E,KAAK,CAAC,KAAK,EAAE,CAAC;IAChB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+BAA+B,EAAE,KAAK,IAAI,EAAE;QAC7C,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,iBAAiB,CAAC,CAAC,CAAC;QAC7D,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAEnB,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,IAAI,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC,CAAC;QACnD,MAAM,KAAK,CAAC,SAAS,CAAC;YACpB,EAAE,EAAE,OAAO;YACX,SAAS,EAAE,IAAI,IAAI,CAAC,0BAA0B,CAAC,CAAC,WAAW,EAAE;YAC7D,cAAc,EAAE,QAAQ;YACxB,WAAW,EAAE,aAAa;YAC1B,UAAU,EAAE,IAAI;YAChB,MAAM,EAAE,CAAC;YACT,gBAAgB,EAAE,QAAQ;YAC1B,YAAY,EAAE,EAAE;YAChB,eAAe,EAAE;gBACf,sBAAsB,EAAE,EAAE;gBAC1B,WAAW,EAAE,EAAE;gBACf,aAAa,EAAE,EAAE;gBACjB,QAAQ,EAAE,EAAE;gBACZ,mBAAmB,EAAE,EAAE;aACxB;YACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC;YACd,cAAc,EAAE,MAAM;YACtB,QAAQ,EAAE,IAAI;SACf,CAAC,CAAC;QAEH,MAAM,KAAK,CAAC,SAAS,CAAC;YACpB,EAAE,EAAE,OAAO;YACX,SAAS,EAAE,IAAI,IAAI,CAAC,0BAA0B,CAAC,CAAC,WAAW,EAAE;YAC7D,cAAc,EAAE,WAAW;YAC3B,WAAW,EAAE,0BAA0B;YACvC,UAAU,EAAE,IAAI;YAChB,MAAM,EAAE,CAAC;YACT,gBAAgB,EAAE,QAAQ;YAC1B,YAAY,EAAE,EAAE;YAChB,eAAe,EAAE;gBACf,sBAAsB,EAAE,EAAE;gBAC1B,WAAW,EAAE,EAAE;gBACf,aAAa,EAAE,EAAE;gBACjB,QAAQ,EAAE,EAAE;gBACZ,mBAAmB,EAAE,EAAE;aACxB;YACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC;YACd,cAAc,EAAE,MAAM;YACtB,QAAQ,EAAE,IAAI;SACf,CAAC,CAAC;QAEH,MAAM,IAAI,GAAG,MAAM,KAAK,CAAC,QAAQ,EAAE,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAEjC,MAAM,KAAK,GAAG,MAAM,KAAK,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;QAC/C,MAAM,CAAC,KAAK,EAAE,cAAc,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC7C,MAAM,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEnC,MAAM,KAAK,CAAC,iBAAiB,CAAC;YAC5B,EAAE,EAAE,OAAO;YACX,SAAS,EAAE,IAAI,IAAI,CAAC,0BAA0B,CAAC,CAAC,WAAW,EAAE;YAC7D,qBAAqB,EAAE,IAAI;YAC3B,MAAM,EAAE,2BAA2B;YACnC,YAAY,EAAE,EAAE;YAChB,UAAU,EAAE,CAAC;SACd,CAAC,CAAC;QACH,MAAM,YAAY,GAAG,MAAM,KAAK,CAAC,gBAAgB,EAAE,CAAC;QACpD,MAAM,CAAC,YAAY,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,qBAAqB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1D,KAAK,CAAC,KAAK,EAAE,CAAC;IAChB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2DAA2D,EAAE,KAAK,IAAI,EAAE;QACzE,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,iBAAiB,CAAC,CAAC,CAAC;QAC7D,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAEnB,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;QACpC,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC;QACrC,MAAM,KAAK,CAAC,SAAS,CAAC;YACpB,EAAE,EAAE,OAAO;YACX,SAAS,EAAE,IAAI,IAAI,CAAC,0BAA0B,CAAC,CAAC,WAAW,EAAE;YAC7D,cAAc,EAAE,QAAQ;YACxB,WAAW,EAAE,aAAa;YAC1B,UAAU,EAAE,IAAI;YAChB,MAAM,EAAE,CAAC;YACT,gBAAgB,EAAE,SAAS;YAC3B,YAAY,EAAE,EAAE;YAChB,eAAe,EAAE;gBACf,sBAAsB,EAAE,EAAE;gBAC1B,WAAW,EAAE,EAAE;gBACf,aAAa,EAAE,EAAE;gBACjB,QAAQ,EAAE,EAAE;gBACZ,mBAAmB,EAAE,EAAE;aACxB;YACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC;YACd,cAAc,EAAE,MAAM;SACvB,CAAC,CAAC;QAEH,MAAM,GAAG,GAAG,IAAI,QAAQ,CAAC,MAAM,CAAC,CAAC;QACjC,GAAG;aACA,OAAO,CACN,0MAA0M,CAC3M;aACA,GAAG,CACF,SAAS,EACT,IAAI,IAAI,CAAC,0BAA0B,CAAC,CAAC,WAAW,EAAE,EAClD,QAAQ,EACR,aAAa,EACb,IAAI,EACJ,CAAC,EACD,KAAK,EACL,EAAE,EACF,WAAW,EACX,SAAS,EACT,KAAK,CACN,CAAC;QACJ,GAAG,CAAC,KAAK,EAAE,CAAC;QAEZ,MAAM,IAAI,GAAG,MAAM,KAAK,CAAC,QAAQ,EAAE,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACjC,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC;QACpD,MAAM,CAAC,QAAQ,CAAC,CAAC,aAAa,EAAE,CAAC;QACjC,KAAK,CAAC,KAAK,EAAE,CAAC;IAChB,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pipeline-stage-handoff.test.d.ts","sourceRoot":"","sources":["../src/pipeline-stage-handoff.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { EvaluationPanel, ExecutionEngine, PipelineOrchestrator, ScoringEngine } from "./index.js";
|
|
3
|
+
class FakeExecutionEngine extends ExecutionEngine {
|
|
4
|
+
called = 0;
|
|
5
|
+
async execute() {
|
|
6
|
+
this.called += 1;
|
|
7
|
+
return {
|
|
8
|
+
code: "export const ok = true;",
|
|
9
|
+
language: "typescript",
|
|
10
|
+
timeoutMs: 30_000,
|
|
11
|
+
maxTokens: 4_096
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
class FakeEvaluationPanel extends EvaluationPanel {
|
|
16
|
+
called = 0;
|
|
17
|
+
async evaluate(requirement, execution) {
|
|
18
|
+
this.called += 1;
|
|
19
|
+
expect(requirement.title.length).toBeGreaterThan(0);
|
|
20
|
+
expect(execution.code).toContain("ok");
|
|
21
|
+
return [
|
|
22
|
+
{
|
|
23
|
+
judgeId: "judge-1",
|
|
24
|
+
dimensions: {
|
|
25
|
+
functionalCompleteness: 88,
|
|
26
|
+
codeQuality: 84,
|
|
27
|
+
logicAccuracy: 90,
|
|
28
|
+
security: 80,
|
|
29
|
+
engineeringPractice: 85
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
];
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
class FakeScoringEngine extends ScoringEngine {
|
|
36
|
+
called = 0;
|
|
37
|
+
score(results) {
|
|
38
|
+
this.called += 1;
|
|
39
|
+
expect(results.length).toBe(1);
|
|
40
|
+
return {
|
|
41
|
+
overallScore: 87,
|
|
42
|
+
dimensionScores: {
|
|
43
|
+
functionalCompleteness: 88,
|
|
44
|
+
codeQuality: 84,
|
|
45
|
+
logicAccuracy: 90,
|
|
46
|
+
security: 80,
|
|
47
|
+
engineeringPractice: 85
|
|
48
|
+
},
|
|
49
|
+
ci95: [84, 90],
|
|
50
|
+
agreementLevel: "high",
|
|
51
|
+
warnings: []
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
class FakeProvider {
|
|
56
|
+
id = "openai";
|
|
57
|
+
name = "Fake";
|
|
58
|
+
async chat() {
|
|
59
|
+
return {
|
|
60
|
+
content: JSON.stringify({
|
|
61
|
+
title: "Req",
|
|
62
|
+
description: "desc",
|
|
63
|
+
functionalRequirements: [
|
|
64
|
+
{
|
|
65
|
+
id: "FR-1",
|
|
66
|
+
description: "Do thing",
|
|
67
|
+
acceptanceCriteria: "Done",
|
|
68
|
+
priority: "must"
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
id: "FR-2",
|
|
72
|
+
description: "Handle error",
|
|
73
|
+
acceptanceCriteria: "Errors handled",
|
|
74
|
+
priority: "must"
|
|
75
|
+
}
|
|
76
|
+
],
|
|
77
|
+
constraints: ["Use TS"],
|
|
78
|
+
expectedDeliverables: ["source"],
|
|
79
|
+
evaluationGuidance: {
|
|
80
|
+
keyDifferentiators: ["quality"],
|
|
81
|
+
commonPitfalls: ["bugs"],
|
|
82
|
+
edgeCases: ["empty"]
|
|
83
|
+
},
|
|
84
|
+
selfReviewPassed: true
|
|
85
|
+
}),
|
|
86
|
+
usage: { promptTokens: 1, completionTokens: 1 },
|
|
87
|
+
latencyMs: 1
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
class InMemoryCheckpointStore {
|
|
92
|
+
checkpoints = new Map();
|
|
93
|
+
async load(key) {
|
|
94
|
+
return this.checkpoints.get(key);
|
|
95
|
+
}
|
|
96
|
+
async save(key, checkpoint) {
|
|
97
|
+
this.checkpoints.set(key, checkpoint);
|
|
98
|
+
}
|
|
99
|
+
async clear(key) {
|
|
100
|
+
this.checkpoints.delete(key);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
class FailingExecutionEngine extends ExecutionEngine {
|
|
104
|
+
called = 0;
|
|
105
|
+
async execute() {
|
|
106
|
+
this.called += 1;
|
|
107
|
+
if (this.called === 2) {
|
|
108
|
+
throw new Error("synthetic crash");
|
|
109
|
+
}
|
|
110
|
+
return {
|
|
111
|
+
code: "export const ok = true;",
|
|
112
|
+
language: "typescript",
|
|
113
|
+
timeoutMs: 30_000,
|
|
114
|
+
maxTokens: 4_096
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
class TimeoutExecutionEngine extends ExecutionEngine {
|
|
119
|
+
async execute() {
|
|
120
|
+
const timeoutError = new Error("network timeout");
|
|
121
|
+
timeoutError.name = "TimeoutError";
|
|
122
|
+
throw timeoutError;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
describe("PipelineOrchestrator stage handoff", () => {
|
|
126
|
+
it("passes execute -> evaluate -> score and returns dimension scores", async () => {
|
|
127
|
+
const execution = new FakeExecutionEngine();
|
|
128
|
+
const evaluation = new FakeEvaluationPanel();
|
|
129
|
+
const scoring = new FakeScoringEngine();
|
|
130
|
+
const provider = new FakeProvider();
|
|
131
|
+
const providerFactory = () => provider;
|
|
132
|
+
const orchestrator = new PipelineOrchestrator(undefined, execution, evaluation, scoring, providerFactory);
|
|
133
|
+
const run = await orchestrator.run({
|
|
134
|
+
config: {
|
|
135
|
+
target: { provider: "openai", model: "gpt-4o-mini", apiKey: "k" },
|
|
136
|
+
systemModel: { provider: "openai", model: "gpt-4o-mini", apiKey: "k" },
|
|
137
|
+
judges: [{ provider: "openai", model: "gpt-4o", apiKey: "k", weight: 1 }],
|
|
138
|
+
test: { complexity: "C1", rounds: 1, concurrency: 1 }
|
|
139
|
+
}
|
|
140
|
+
});
|
|
141
|
+
expect(execution.called).toBe(1);
|
|
142
|
+
expect(evaluation.called).toBe(1);
|
|
143
|
+
expect(scoring.called).toBe(1);
|
|
144
|
+
expect(run.overallScore).toBe(87);
|
|
145
|
+
expect(run.dimensionScores.logicAccuracy).toBe(90);
|
|
146
|
+
expect(run.ci95[0]).toBe(87);
|
|
147
|
+
expect(run.agreementLevel).toBe("high");
|
|
148
|
+
expect(run.evidenceChain?.samples[0]?.codeSubmission).toContain("ok");
|
|
149
|
+
expect(run.evidenceChain?.timeline).toHaveLength(4);
|
|
150
|
+
});
|
|
151
|
+
it("honors rounds configuration and aggregates multiple rounds", async () => {
|
|
152
|
+
const execution = new FakeExecutionEngine();
|
|
153
|
+
const evaluation = new FakeEvaluationPanel();
|
|
154
|
+
const scoring = new FakeScoringEngine();
|
|
155
|
+
const provider = new FakeProvider();
|
|
156
|
+
const providerFactory = () => provider;
|
|
157
|
+
const orchestrator = new PipelineOrchestrator(undefined, execution, evaluation, scoring, providerFactory);
|
|
158
|
+
const run = await orchestrator.run({
|
|
159
|
+
config: {
|
|
160
|
+
target: { provider: "openai", model: "gpt-4o-mini", apiKey: "k" },
|
|
161
|
+
systemModel: { provider: "openai", model: "gpt-4o-mini", apiKey: "k" },
|
|
162
|
+
judges: [{ provider: "openai", model: "gpt-4o", apiKey: "k", weight: 1 }],
|
|
163
|
+
test: { complexity: "C1", rounds: 3, concurrency: 2 }
|
|
164
|
+
}
|
|
165
|
+
});
|
|
166
|
+
expect(execution.called).toBe(3);
|
|
167
|
+
expect(evaluation.called).toBe(3);
|
|
168
|
+
expect(scoring.called).toBe(3);
|
|
169
|
+
expect(run.rounds).toBe(3);
|
|
170
|
+
expect(run.overallScore).toBe(87);
|
|
171
|
+
});
|
|
172
|
+
it("supports checkpoint resume after mid-run failure", async () => {
|
|
173
|
+
const checkpointStore = new InMemoryCheckpointStore();
|
|
174
|
+
const checkpointKey = "pipeline-checkpoint-1";
|
|
175
|
+
const crashingExecution = new FailingExecutionEngine();
|
|
176
|
+
const evaluation = new FakeEvaluationPanel();
|
|
177
|
+
const scoring = new FakeScoringEngine();
|
|
178
|
+
const provider = new FakeProvider();
|
|
179
|
+
const providerFactory = () => provider;
|
|
180
|
+
const firstOrchestrator = new PipelineOrchestrator(undefined, crashingExecution, evaluation, scoring, providerFactory);
|
|
181
|
+
await expect(firstOrchestrator.run({
|
|
182
|
+
config: {
|
|
183
|
+
target: { provider: "openai", model: "gpt-4o-mini", apiKey: "k" },
|
|
184
|
+
systemModel: { provider: "openai", model: "gpt-4o-mini", apiKey: "k" },
|
|
185
|
+
judges: [{ provider: "openai", model: "gpt-4o", apiKey: "k", weight: 1 }],
|
|
186
|
+
test: { complexity: "C1", rounds: 3, concurrency: 1 }
|
|
187
|
+
},
|
|
188
|
+
checkpoint: {
|
|
189
|
+
key: checkpointKey,
|
|
190
|
+
store: checkpointStore
|
|
191
|
+
}
|
|
192
|
+
})).rejects.toThrow("synthetic crash");
|
|
193
|
+
const savedCheckpoint = await checkpointStore.load(checkpointKey);
|
|
194
|
+
expect(savedCheckpoint).toBeDefined();
|
|
195
|
+
expect(savedCheckpoint?.completedRounds.length).toBe(1);
|
|
196
|
+
const resumedExecution = new FakeExecutionEngine();
|
|
197
|
+
const secondOrchestrator = new PipelineOrchestrator(undefined, resumedExecution, evaluation, scoring, providerFactory);
|
|
198
|
+
const resumedRun = await secondOrchestrator.run({
|
|
199
|
+
config: {
|
|
200
|
+
target: { provider: "openai", model: "gpt-4o-mini", apiKey: "k" },
|
|
201
|
+
systemModel: { provider: "openai", model: "gpt-4o-mini", apiKey: "k" },
|
|
202
|
+
judges: [{ provider: "openai", model: "gpt-4o", apiKey: "k", weight: 1 }],
|
|
203
|
+
test: { complexity: "C1", rounds: 3, concurrency: 1 }
|
|
204
|
+
},
|
|
205
|
+
checkpoint: {
|
|
206
|
+
key: checkpointKey,
|
|
207
|
+
store: checkpointStore
|
|
208
|
+
}
|
|
209
|
+
});
|
|
210
|
+
expect(resumedExecution.called).toBe(2);
|
|
211
|
+
expect(resumedRun.rounds).toBe(3);
|
|
212
|
+
expect(resumedRun.overallScore).toBe(87);
|
|
213
|
+
expect(await checkpointStore.load(checkpointKey)).toBeUndefined();
|
|
214
|
+
});
|
|
215
|
+
it("marks timeout rounds as zero score instead of crashing run", async () => {
|
|
216
|
+
const execution = new TimeoutExecutionEngine();
|
|
217
|
+
const evaluation = new FakeEvaluationPanel();
|
|
218
|
+
const scoring = new FakeScoringEngine();
|
|
219
|
+
const provider = new FakeProvider();
|
|
220
|
+
const providerFactory = () => provider;
|
|
221
|
+
const orchestrator = new PipelineOrchestrator(undefined, execution, evaluation, scoring, providerFactory);
|
|
222
|
+
const run = await orchestrator.run({
|
|
223
|
+
config: {
|
|
224
|
+
target: { provider: "openai", model: "gpt-4o-mini", apiKey: "k" },
|
|
225
|
+
systemModel: { provider: "openai", model: "gpt-4o-mini", apiKey: "k" },
|
|
226
|
+
judges: [{ provider: "openai", model: "gpt-4o", apiKey: "k", weight: 1 }],
|
|
227
|
+
test: { complexity: "C1", rounds: 1, concurrency: 1 }
|
|
228
|
+
}
|
|
229
|
+
});
|
|
230
|
+
expect(run.overallScore).toBe(0);
|
|
231
|
+
expect(run.dimensionScores.logicAccuracy).toBe(0);
|
|
232
|
+
expect(run.requirementTitle.toLowerCase()).toContain("failed");
|
|
233
|
+
});
|
|
234
|
+
it("marks sandbox validation failures as recoverable round failures", async () => {
|
|
235
|
+
const execution = new FakeExecutionEngine();
|
|
236
|
+
const evaluation = new FakeEvaluationPanel();
|
|
237
|
+
const scoring = new FakeScoringEngine();
|
|
238
|
+
const provider = new FakeProvider();
|
|
239
|
+
const providerFactory = () => provider;
|
|
240
|
+
const orchestrator = new PipelineOrchestrator(undefined, execution, evaluation, scoring, providerFactory);
|
|
241
|
+
const run = await orchestrator.run({
|
|
242
|
+
config: {
|
|
243
|
+
target: { provider: "openai", model: "gpt-4o-mini", apiKey: "k" },
|
|
244
|
+
systemModel: { provider: "openai", model: "gpt-4o-mini", apiKey: "k" },
|
|
245
|
+
judges: [{ provider: "openai", model: "gpt-4o", apiKey: "k", weight: 1 }],
|
|
246
|
+
test: { complexity: "C1", rounds: 1, concurrency: 1 }
|
|
247
|
+
},
|
|
248
|
+
sandbox: {
|
|
249
|
+
enabled: true,
|
|
250
|
+
strict: true,
|
|
251
|
+
runner: async () => {
|
|
252
|
+
throw new Error("sandbox rejected generated code");
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
});
|
|
256
|
+
expect(run.overallScore).toBe(0);
|
|
257
|
+
expect(run.requirementTitle.toLowerCase()).toContain("failed");
|
|
258
|
+
});
|
|
259
|
+
it("emits stage progress events during run", async () => {
|
|
260
|
+
const execution = new FakeExecutionEngine();
|
|
261
|
+
const evaluation = new FakeEvaluationPanel();
|
|
262
|
+
const scoring = new FakeScoringEngine();
|
|
263
|
+
const provider = new FakeProvider();
|
|
264
|
+
const providerFactory = () => provider;
|
|
265
|
+
const events = [];
|
|
266
|
+
const orchestrator = new PipelineOrchestrator(undefined, execution, evaluation, scoring, providerFactory);
|
|
267
|
+
await orchestrator.run({
|
|
268
|
+
config: {
|
|
269
|
+
target: { provider: "openai", model: "gpt-4o-mini", apiKey: "k" },
|
|
270
|
+
systemModel: { provider: "openai", model: "gpt-4o-mini", apiKey: "k" },
|
|
271
|
+
judges: [{ provider: "openai", model: "gpt-4o", apiKey: "k", weight: 1 }],
|
|
272
|
+
test: { complexity: "C1", rounds: 1, concurrency: 1 }
|
|
273
|
+
},
|
|
274
|
+
onProgress: (event) => {
|
|
275
|
+
events.push(`${event.phase}:${event.state}`);
|
|
276
|
+
}
|
|
277
|
+
});
|
|
278
|
+
expect(events).toEqual([
|
|
279
|
+
"generate:started",
|
|
280
|
+
"generate:completed",
|
|
281
|
+
"execute:started",
|
|
282
|
+
"execute:completed",
|
|
283
|
+
"evaluate:started",
|
|
284
|
+
"evaluate:completed",
|
|
285
|
+
"score:started",
|
|
286
|
+
"score:completed"
|
|
287
|
+
]);
|
|
288
|
+
});
|
|
289
|
+
});
|
|
290
|
+
//# sourceMappingURL=pipeline-stage-handoff.test.js.map
|