@contractspec/module.provider-ranking 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -0
- package/dist/browser/entities/index.js +107 -0
- package/dist/browser/index.js +538 -0
- package/dist/browser/pipeline/index.js +134 -0
- package/dist/browser/pipeline/ingestion-pipeline.js +86 -0
- package/dist/browser/pipeline/ranking-pipeline.js +49 -0
- package/dist/browser/storage/index.js +299 -0
- package/dist/entities/index.d.ts +60 -0
- package/dist/entities/index.js +108 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.js +539 -0
- package/dist/node/entities/index.js +107 -0
- package/dist/node/index.js +538 -0
- package/dist/node/pipeline/index.js +134 -0
- package/dist/node/pipeline/ingestion-pipeline.js +86 -0
- package/dist/node/pipeline/ranking-pipeline.js +49 -0
- package/dist/node/storage/index.js +299 -0
- package/dist/pipeline/index.d.ts +2 -0
- package/dist/pipeline/index.js +135 -0
- package/dist/pipeline/ingestion-pipeline.d.ts +38 -0
- package/dist/pipeline/ingestion-pipeline.js +87 -0
- package/dist/pipeline/ranking-pipeline.d.ts +31 -0
- package/dist/pipeline/ranking-pipeline.js +50 -0
- package/dist/storage/index.d.ts +30 -0
- package/dist/storage/index.js +300 -0
- package/package.json +183 -0
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
// @bun
|
|
2
|
+
// src/entities/index.ts
|
|
3
|
+
import { defineEntity, field, index } from "@contractspec/lib.schema";
|
|
4
|
+
var BenchmarkResultEntity = defineEntity({
|
|
5
|
+
name: "BenchmarkResult",
|
|
6
|
+
description: "Individual benchmark score for a model from a specific source.",
|
|
7
|
+
schema: "lssm_ranking",
|
|
8
|
+
map: "benchmark_result",
|
|
9
|
+
fields: {
|
|
10
|
+
id: field.id({ description: "Benchmark result ID" }),
|
|
11
|
+
modelId: field.string({ description: "Model identifier" }),
|
|
12
|
+
providerKey: field.string({
|
|
13
|
+
description: "Provider key (e.g. openai, anthropic)"
|
|
14
|
+
}),
|
|
15
|
+
source: field.string({
|
|
16
|
+
description: "Benchmark source (e.g. swe-bench, chatbot-arena)"
|
|
17
|
+
}),
|
|
18
|
+
dimension: field.string({
|
|
19
|
+
description: "Ranking dimension (coding, reasoning, etc.)"
|
|
20
|
+
}),
|
|
21
|
+
score: field.float({ description: "Normalized score 0-100" }),
|
|
22
|
+
rawScore: field.json({ description: "Original score from source" }),
|
|
23
|
+
metadata: field.json({ isOptional: true }),
|
|
24
|
+
measuredAt: field.dateTime({
|
|
25
|
+
description: "When the benchmark was measured"
|
|
26
|
+
}),
|
|
27
|
+
ingestedAt: field.dateTime({ description: "When the result was ingested" })
|
|
28
|
+
},
|
|
29
|
+
indexes: [
|
|
30
|
+
index.unique(["id"]),
|
|
31
|
+
index.on(["modelId"]),
|
|
32
|
+
index.on(["providerKey"]),
|
|
33
|
+
index.on(["source"]),
|
|
34
|
+
index.on(["dimension"]),
|
|
35
|
+
index.on(["modelId", "source", "dimension"])
|
|
36
|
+
]
|
|
37
|
+
});
|
|
38
|
+
var ModelRankingEntity = defineEntity({
|
|
39
|
+
name: "ModelRanking",
|
|
40
|
+
description: "Computed composite ranking for a model.",
|
|
41
|
+
schema: "lssm_ranking",
|
|
42
|
+
map: "model_ranking",
|
|
43
|
+
fields: {
|
|
44
|
+
modelId: field.id({ description: "Model identifier (primary key)" }),
|
|
45
|
+
providerKey: field.string({ description: "Provider key" }),
|
|
46
|
+
compositeScore: field.float({
|
|
47
|
+
description: "Weighted composite score 0-100"
|
|
48
|
+
}),
|
|
49
|
+
dimensionScores: field.json({
|
|
50
|
+
description: "Per-dimension score breakdown"
|
|
51
|
+
}),
|
|
52
|
+
rank: field.int({ description: "Current rank position" }),
|
|
53
|
+
previousRank: field.int({
|
|
54
|
+
isOptional: true,
|
|
55
|
+
description: "Previous rank position"
|
|
56
|
+
}),
|
|
57
|
+
updatedAt: field.updatedAt()
|
|
58
|
+
},
|
|
59
|
+
indexes: [
|
|
60
|
+
index.on(["providerKey"]),
|
|
61
|
+
index.on(["rank"]),
|
|
62
|
+
index.on(["compositeScore"])
|
|
63
|
+
]
|
|
64
|
+
});
|
|
65
|
+
var IngestionRunEntity = defineEntity({
|
|
66
|
+
name: "IngestionRun",
|
|
67
|
+
description: "Tracks a benchmark data ingestion run.",
|
|
68
|
+
schema: "lssm_ranking",
|
|
69
|
+
map: "ingestion_run",
|
|
70
|
+
fields: {
|
|
71
|
+
id: field.id({ description: "Ingestion run ID" }),
|
|
72
|
+
source: field.string({ description: "Benchmark source" }),
|
|
73
|
+
status: field.string({
|
|
74
|
+
description: "Run status: pending, running, completed, failed"
|
|
75
|
+
}),
|
|
76
|
+
resultsCount: field.int({ description: "Number of results ingested" }),
|
|
77
|
+
startedAt: field.dateTime({ description: "When the run started" }),
|
|
78
|
+
completedAt: field.dateTime({
|
|
79
|
+
isOptional: true,
|
|
80
|
+
description: "When the run completed"
|
|
81
|
+
}),
|
|
82
|
+
error: field.string({
|
|
83
|
+
isOptional: true,
|
|
84
|
+
description: "Error message if failed"
|
|
85
|
+
})
|
|
86
|
+
},
|
|
87
|
+
indexes: [
|
|
88
|
+
index.on(["source"]),
|
|
89
|
+
index.on(["status"]),
|
|
90
|
+
index.on(["startedAt"])
|
|
91
|
+
]
|
|
92
|
+
});
|
|
93
|
+
var providerRankingEntities = [
|
|
94
|
+
BenchmarkResultEntity,
|
|
95
|
+
ModelRankingEntity,
|
|
96
|
+
IngestionRunEntity
|
|
97
|
+
];
|
|
98
|
+
var providerRankingSchemaContribution = {
|
|
99
|
+
moduleId: "@contractspec/module.provider-ranking",
|
|
100
|
+
entities: providerRankingEntities
|
|
101
|
+
};
|
|
102
|
+
export {
|
|
103
|
+
providerRankingSchemaContribution,
|
|
104
|
+
providerRankingEntities,
|
|
105
|
+
ModelRankingEntity,
|
|
106
|
+
IngestionRunEntity,
|
|
107
|
+
BenchmarkResultEntity
|
|
108
|
+
};
|
package/dist/index.d.ts
ADDED
package/dist/index.js
ADDED
|
@@ -0,0 +1,539 @@
|
|
|
1
|
+
// @bun
|
|
2
|
+
// src/entities/index.ts
|
|
3
|
+
import { defineEntity, field, index } from "@contractspec/lib.schema";
|
|
4
|
+
var BenchmarkResultEntity = defineEntity({
|
|
5
|
+
name: "BenchmarkResult",
|
|
6
|
+
description: "Individual benchmark score for a model from a specific source.",
|
|
7
|
+
schema: "lssm_ranking",
|
|
8
|
+
map: "benchmark_result",
|
|
9
|
+
fields: {
|
|
10
|
+
id: field.id({ description: "Benchmark result ID" }),
|
|
11
|
+
modelId: field.string({ description: "Model identifier" }),
|
|
12
|
+
providerKey: field.string({
|
|
13
|
+
description: "Provider key (e.g. openai, anthropic)"
|
|
14
|
+
}),
|
|
15
|
+
source: field.string({
|
|
16
|
+
description: "Benchmark source (e.g. swe-bench, chatbot-arena)"
|
|
17
|
+
}),
|
|
18
|
+
dimension: field.string({
|
|
19
|
+
description: "Ranking dimension (coding, reasoning, etc.)"
|
|
20
|
+
}),
|
|
21
|
+
score: field.float({ description: "Normalized score 0-100" }),
|
|
22
|
+
rawScore: field.json({ description: "Original score from source" }),
|
|
23
|
+
metadata: field.json({ isOptional: true }),
|
|
24
|
+
measuredAt: field.dateTime({
|
|
25
|
+
description: "When the benchmark was measured"
|
|
26
|
+
}),
|
|
27
|
+
ingestedAt: field.dateTime({ description: "When the result was ingested" })
|
|
28
|
+
},
|
|
29
|
+
indexes: [
|
|
30
|
+
index.unique(["id"]),
|
|
31
|
+
index.on(["modelId"]),
|
|
32
|
+
index.on(["providerKey"]),
|
|
33
|
+
index.on(["source"]),
|
|
34
|
+
index.on(["dimension"]),
|
|
35
|
+
index.on(["modelId", "source", "dimension"])
|
|
36
|
+
]
|
|
37
|
+
});
|
|
38
|
+
var ModelRankingEntity = defineEntity({
|
|
39
|
+
name: "ModelRanking",
|
|
40
|
+
description: "Computed composite ranking for a model.",
|
|
41
|
+
schema: "lssm_ranking",
|
|
42
|
+
map: "model_ranking",
|
|
43
|
+
fields: {
|
|
44
|
+
modelId: field.id({ description: "Model identifier (primary key)" }),
|
|
45
|
+
providerKey: field.string({ description: "Provider key" }),
|
|
46
|
+
compositeScore: field.float({
|
|
47
|
+
description: "Weighted composite score 0-100"
|
|
48
|
+
}),
|
|
49
|
+
dimensionScores: field.json({
|
|
50
|
+
description: "Per-dimension score breakdown"
|
|
51
|
+
}),
|
|
52
|
+
rank: field.int({ description: "Current rank position" }),
|
|
53
|
+
previousRank: field.int({
|
|
54
|
+
isOptional: true,
|
|
55
|
+
description: "Previous rank position"
|
|
56
|
+
}),
|
|
57
|
+
updatedAt: field.updatedAt()
|
|
58
|
+
},
|
|
59
|
+
indexes: [
|
|
60
|
+
index.on(["providerKey"]),
|
|
61
|
+
index.on(["rank"]),
|
|
62
|
+
index.on(["compositeScore"])
|
|
63
|
+
]
|
|
64
|
+
});
|
|
65
|
+
var IngestionRunEntity = defineEntity({
|
|
66
|
+
name: "IngestionRun",
|
|
67
|
+
description: "Tracks a benchmark data ingestion run.",
|
|
68
|
+
schema: "lssm_ranking",
|
|
69
|
+
map: "ingestion_run",
|
|
70
|
+
fields: {
|
|
71
|
+
id: field.id({ description: "Ingestion run ID" }),
|
|
72
|
+
source: field.string({ description: "Benchmark source" }),
|
|
73
|
+
status: field.string({
|
|
74
|
+
description: "Run status: pending, running, completed, failed"
|
|
75
|
+
}),
|
|
76
|
+
resultsCount: field.int({ description: "Number of results ingested" }),
|
|
77
|
+
startedAt: field.dateTime({ description: "When the run started" }),
|
|
78
|
+
completedAt: field.dateTime({
|
|
79
|
+
isOptional: true,
|
|
80
|
+
description: "When the run completed"
|
|
81
|
+
}),
|
|
82
|
+
error: field.string({
|
|
83
|
+
isOptional: true,
|
|
84
|
+
description: "Error message if failed"
|
|
85
|
+
})
|
|
86
|
+
},
|
|
87
|
+
indexes: [
|
|
88
|
+
index.on(["source"]),
|
|
89
|
+
index.on(["status"]),
|
|
90
|
+
index.on(["startedAt"])
|
|
91
|
+
]
|
|
92
|
+
});
|
|
93
|
+
var providerRankingEntities = [
|
|
94
|
+
BenchmarkResultEntity,
|
|
95
|
+
ModelRankingEntity,
|
|
96
|
+
IngestionRunEntity
|
|
97
|
+
];
|
|
98
|
+
var providerRankingSchemaContribution = {
|
|
99
|
+
moduleId: "@contractspec/module.provider-ranking",
|
|
100
|
+
entities: providerRankingEntities
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
// src/storage/index.ts
|
|
104
|
+
class PostgresProviderRankingStore {
|
|
105
|
+
database;
|
|
106
|
+
schema;
|
|
107
|
+
createTablesIfMissing;
|
|
108
|
+
ensured = false;
|
|
109
|
+
constructor(options) {
|
|
110
|
+
this.database = options.database;
|
|
111
|
+
this.schema = options.schema ?? "lssm_ranking";
|
|
112
|
+
this.createTablesIfMissing = options.createTablesIfMissing ?? true;
|
|
113
|
+
}
|
|
114
|
+
async upsertBenchmarkResult(result) {
|
|
115
|
+
await this.ensureTables();
|
|
116
|
+
await this.database.execute(`INSERT INTO ${this.table("benchmark_result")}
|
|
117
|
+
(id, model_id, provider_key, source, dimension, score, raw_score, metadata, measured_at, ingested_at)
|
|
118
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7::jsonb, $8::jsonb, $9, $10)
|
|
119
|
+
ON CONFLICT (id)
|
|
120
|
+
DO UPDATE SET
|
|
121
|
+
score = EXCLUDED.score,
|
|
122
|
+
raw_score = EXCLUDED.raw_score,
|
|
123
|
+
metadata = EXCLUDED.metadata,
|
|
124
|
+
measured_at = EXCLUDED.measured_at,
|
|
125
|
+
ingested_at = EXCLUDED.ingested_at;`, [
|
|
126
|
+
result.id,
|
|
127
|
+
result.modelId,
|
|
128
|
+
result.providerKey,
|
|
129
|
+
result.source,
|
|
130
|
+
result.dimension,
|
|
131
|
+
result.score,
|
|
132
|
+
JSON.stringify(result.rawScore),
|
|
133
|
+
result.metadata ? JSON.stringify(result.metadata) : null,
|
|
134
|
+
result.measuredAt.toISOString(),
|
|
135
|
+
result.ingestedAt.toISOString()
|
|
136
|
+
]);
|
|
137
|
+
}
|
|
138
|
+
async getBenchmarkResult(id) {
|
|
139
|
+
await this.ensureTables();
|
|
140
|
+
const rows = await this.database.query(`SELECT * FROM ${this.table("benchmark_result")} WHERE id = $1;`, [id]);
|
|
141
|
+
return rows.rows[0] ? this.mapBenchmarkResult(rows.rows[0]) : null;
|
|
142
|
+
}
|
|
143
|
+
async listBenchmarkResults(query) {
|
|
144
|
+
await this.ensureTables();
|
|
145
|
+
const limit = query.limit ?? 50;
|
|
146
|
+
const offset = query.offset ?? 0;
|
|
147
|
+
const countFilters = [];
|
|
148
|
+
const countParams = [];
|
|
149
|
+
if (query.source) {
|
|
150
|
+
countParams.push(query.source);
|
|
151
|
+
countFilters.push(`source = $${countParams.length}`);
|
|
152
|
+
}
|
|
153
|
+
if (query.modelId) {
|
|
154
|
+
countParams.push(query.modelId);
|
|
155
|
+
countFilters.push(`model_id = $${countParams.length}`);
|
|
156
|
+
}
|
|
157
|
+
if (query.dimension) {
|
|
158
|
+
countParams.push(query.dimension);
|
|
159
|
+
countFilters.push(`dimension = $${countParams.length}`);
|
|
160
|
+
}
|
|
161
|
+
if (query.providerKey) {
|
|
162
|
+
countParams.push(query.providerKey);
|
|
163
|
+
countFilters.push(`provider_key = $${countParams.length}`);
|
|
164
|
+
}
|
|
165
|
+
const where = countFilters.length ? `WHERE ${countFilters.join(" AND ")}` : "";
|
|
166
|
+
const countResult = await this.database.query(`SELECT COUNT(*)::int as total FROM ${this.table("benchmark_result")} ${where};`, countParams);
|
|
167
|
+
const total = Number(countResult.rows[0]?.total ?? 0);
|
|
168
|
+
const dataParams = [
|
|
169
|
+
limit,
|
|
170
|
+
offset,
|
|
171
|
+
...countParams
|
|
172
|
+
];
|
|
173
|
+
const dataFilters = countFilters.map((_f, i) => _f.replace(`$${i + 1}`, `$${i + 3}`));
|
|
174
|
+
const dataWhere = dataFilters.length ? `WHERE ${dataFilters.join(" AND ")}` : "";
|
|
175
|
+
const rows = await this.database.query(`SELECT * FROM ${this.table("benchmark_result")}
|
|
176
|
+
${dataWhere}
|
|
177
|
+
ORDER BY ingested_at DESC
|
|
178
|
+
LIMIT $1 OFFSET $2;`, dataParams);
|
|
179
|
+
const results = rows.rows.map((row) => this.mapBenchmarkResult(row));
|
|
180
|
+
const nextOffset = offset + results.length < total ? offset + results.length : undefined;
|
|
181
|
+
return { results, total, nextOffset };
|
|
182
|
+
}
|
|
183
|
+
async upsertModelRanking(ranking) {
|
|
184
|
+
await this.ensureTables();
|
|
185
|
+
await this.database.execute(`INSERT INTO ${this.table("model_ranking")}
|
|
186
|
+
(model_id, provider_key, composite_score, dimension_scores, rank, previous_rank, updated_at)
|
|
187
|
+
VALUES ($1, $2, $3, $4::jsonb, $5, $6, $7)
|
|
188
|
+
ON CONFLICT (model_id)
|
|
189
|
+
DO UPDATE SET
|
|
190
|
+
provider_key = EXCLUDED.provider_key,
|
|
191
|
+
composite_score = EXCLUDED.composite_score,
|
|
192
|
+
dimension_scores = EXCLUDED.dimension_scores,
|
|
193
|
+
rank = EXCLUDED.rank,
|
|
194
|
+
previous_rank = EXCLUDED.previous_rank,
|
|
195
|
+
updated_at = EXCLUDED.updated_at;`, [
|
|
196
|
+
ranking.modelId,
|
|
197
|
+
ranking.providerKey,
|
|
198
|
+
ranking.compositeScore,
|
|
199
|
+
JSON.stringify(ranking.dimensionScores),
|
|
200
|
+
ranking.rank,
|
|
201
|
+
ranking.previousRank,
|
|
202
|
+
ranking.updatedAt.toISOString()
|
|
203
|
+
]);
|
|
204
|
+
}
|
|
205
|
+
async getModelRanking(modelId) {
|
|
206
|
+
await this.ensureTables();
|
|
207
|
+
const rows = await this.database.query(`SELECT * FROM ${this.table("model_ranking")} WHERE model_id = $1;`, [modelId]);
|
|
208
|
+
return rows.rows[0] ? this.mapModelRanking(rows.rows[0]) : null;
|
|
209
|
+
}
|
|
210
|
+
async listModelRankings(query) {
|
|
211
|
+
await this.ensureTables();
|
|
212
|
+
const limit = query.limit ?? 50;
|
|
213
|
+
const offset = query.offset ?? 0;
|
|
214
|
+
const countFilters = [];
|
|
215
|
+
const countParams = [];
|
|
216
|
+
if (query.providerKey) {
|
|
217
|
+
countParams.push(query.providerKey);
|
|
218
|
+
countFilters.push(`provider_key = $${countParams.length}`);
|
|
219
|
+
}
|
|
220
|
+
const where = countFilters.length ? `WHERE ${countFilters.join(" AND ")}` : "";
|
|
221
|
+
const countResult = await this.database.query(`SELECT COUNT(*)::int as total FROM ${this.table("model_ranking")} ${where};`, countParams);
|
|
222
|
+
const total = Number(countResult.rows[0]?.total ?? 0);
|
|
223
|
+
const dataParams = [
|
|
224
|
+
limit,
|
|
225
|
+
offset,
|
|
226
|
+
...countParams
|
|
227
|
+
];
|
|
228
|
+
const dataFilters = countFilters.map((_f, i) => _f.replace(`$${i + 1}`, `$${i + 3}`));
|
|
229
|
+
const dataWhere = dataFilters.length ? `WHERE ${dataFilters.join(" AND ")}` : "";
|
|
230
|
+
const orderBy = query.dimension ? `(dimension_scores->>'${query.dimension}')::jsonb->>'score' DESC NULLS LAST` : "rank ASC";
|
|
231
|
+
const rows = await this.database.query(`SELECT * FROM ${this.table("model_ranking")}
|
|
232
|
+
${dataWhere}
|
|
233
|
+
ORDER BY ${orderBy}
|
|
234
|
+
LIMIT $1 OFFSET $2;`, dataParams);
|
|
235
|
+
const rankings = rows.rows.map((row) => this.mapModelRanking(row));
|
|
236
|
+
const nextOffset = offset + rankings.length < total ? offset + rankings.length : undefined;
|
|
237
|
+
return { rankings, total, nextOffset };
|
|
238
|
+
}
|
|
239
|
+
async getModelProfile(modelId) {
|
|
240
|
+
await this.ensureTables();
|
|
241
|
+
const ranking = await this.getModelRanking(modelId);
|
|
242
|
+
const benchResults = await this.database.query(`SELECT * FROM ${this.table("benchmark_result")}
|
|
243
|
+
WHERE model_id = $1
|
|
244
|
+
ORDER BY ingested_at DESC;`, [modelId]);
|
|
245
|
+
if (!ranking && benchResults.rows.length === 0)
|
|
246
|
+
return null;
|
|
247
|
+
return {
|
|
248
|
+
modelId,
|
|
249
|
+
providerKey: ranking?.providerKey ?? String(benchResults.rows[0]?.provider_key ?? "unknown"),
|
|
250
|
+
displayName: modelId,
|
|
251
|
+
contextWindow: 0,
|
|
252
|
+
costPerMillion: null,
|
|
253
|
+
capabilities: [],
|
|
254
|
+
ranking: ranking ?? null,
|
|
255
|
+
benchmarkResults: benchResults.rows.map((row) => this.mapBenchmarkResult(row))
|
|
256
|
+
};
|
|
257
|
+
}
|
|
258
|
+
async createIngestionRun(run) {
|
|
259
|
+
await this.ensureTables();
|
|
260
|
+
await this.database.execute(`INSERT INTO ${this.table("ingestion_run")}
|
|
261
|
+
(id, source, status, results_count, started_at, completed_at, error)
|
|
262
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7);`, [
|
|
263
|
+
run.id,
|
|
264
|
+
run.source,
|
|
265
|
+
run.status,
|
|
266
|
+
run.resultsCount,
|
|
267
|
+
run.startedAt.toISOString(),
|
|
268
|
+
run.completedAt?.toISOString() ?? null,
|
|
269
|
+
run.error
|
|
270
|
+
]);
|
|
271
|
+
}
|
|
272
|
+
async updateIngestionRun(id, update) {
|
|
273
|
+
await this.ensureTables();
|
|
274
|
+
const sets = [];
|
|
275
|
+
const params = [id];
|
|
276
|
+
if (update.status !== undefined) {
|
|
277
|
+
params.push(update.status);
|
|
278
|
+
sets.push(`status = $${params.length}`);
|
|
279
|
+
}
|
|
280
|
+
if (update.resultsCount !== undefined) {
|
|
281
|
+
params.push(update.resultsCount);
|
|
282
|
+
sets.push(`results_count = $${params.length}`);
|
|
283
|
+
}
|
|
284
|
+
if (update.completedAt !== undefined) {
|
|
285
|
+
params.push(update.completedAt?.toISOString() ?? null);
|
|
286
|
+
sets.push(`completed_at = $${params.length}`);
|
|
287
|
+
}
|
|
288
|
+
if (update.error !== undefined) {
|
|
289
|
+
params.push(update.error);
|
|
290
|
+
sets.push(`error = $${params.length}`);
|
|
291
|
+
}
|
|
292
|
+
if (sets.length === 0)
|
|
293
|
+
return;
|
|
294
|
+
await this.database.execute(`UPDATE ${this.table("ingestion_run")} SET ${sets.join(", ")} WHERE id = $1;`, params);
|
|
295
|
+
}
|
|
296
|
+
async getIngestionRun(id) {
|
|
297
|
+
await this.ensureTables();
|
|
298
|
+
const rows = await this.database.query(`SELECT * FROM ${this.table("ingestion_run")} WHERE id = $1;`, [id]);
|
|
299
|
+
return rows.rows[0] ? this.mapIngestionRun(rows.rows[0]) : null;
|
|
300
|
+
}
|
|
301
|
+
async ensureTables() {
|
|
302
|
+
if (this.ensured || !this.createTablesIfMissing)
|
|
303
|
+
return;
|
|
304
|
+
await this.database.execute(`CREATE SCHEMA IF NOT EXISTS ${this.schema};`);
|
|
305
|
+
await this.database.execute(`CREATE TABLE IF NOT EXISTS ${this.table("benchmark_result")} (
|
|
306
|
+
id text PRIMARY KEY,
|
|
307
|
+
model_id text NOT NULL,
|
|
308
|
+
provider_key text NOT NULL,
|
|
309
|
+
source text NOT NULL,
|
|
310
|
+
dimension text NOT NULL,
|
|
311
|
+
score double precision NOT NULL,
|
|
312
|
+
raw_score jsonb,
|
|
313
|
+
metadata jsonb,
|
|
314
|
+
measured_at timestamptz NOT NULL,
|
|
315
|
+
ingested_at timestamptz NOT NULL
|
|
316
|
+
);`);
|
|
317
|
+
await this.database.execute(`CREATE INDEX IF NOT EXISTS benchmark_result_model_idx
|
|
318
|
+
ON ${this.table("benchmark_result")} (model_id);`);
|
|
319
|
+
await this.database.execute(`CREATE INDEX IF NOT EXISTS benchmark_result_source_idx
|
|
320
|
+
ON ${this.table("benchmark_result")} (source);`);
|
|
321
|
+
await this.database.execute(`CREATE INDEX IF NOT EXISTS benchmark_result_dimension_idx
|
|
322
|
+
ON ${this.table("benchmark_result")} (dimension);`);
|
|
323
|
+
await this.database.execute(`CREATE TABLE IF NOT EXISTS ${this.table("model_ranking")} (
|
|
324
|
+
model_id text PRIMARY KEY,
|
|
325
|
+
provider_key text NOT NULL,
|
|
326
|
+
composite_score double precision NOT NULL,
|
|
327
|
+
dimension_scores jsonb NOT NULL,
|
|
328
|
+
rank int NOT NULL,
|
|
329
|
+
previous_rank int,
|
|
330
|
+
updated_at timestamptz NOT NULL
|
|
331
|
+
);`);
|
|
332
|
+
await this.database.execute(`CREATE INDEX IF NOT EXISTS model_ranking_rank_idx
|
|
333
|
+
ON ${this.table("model_ranking")} (rank);`);
|
|
334
|
+
await this.database.execute(`CREATE TABLE IF NOT EXISTS ${this.table("ingestion_run")} (
|
|
335
|
+
id text PRIMARY KEY,
|
|
336
|
+
source text NOT NULL,
|
|
337
|
+
status text NOT NULL,
|
|
338
|
+
results_count int NOT NULL DEFAULT 0,
|
|
339
|
+
started_at timestamptz NOT NULL,
|
|
340
|
+
completed_at timestamptz,
|
|
341
|
+
error text
|
|
342
|
+
);`);
|
|
343
|
+
this.ensured = true;
|
|
344
|
+
}
|
|
345
|
+
table(name) {
|
|
346
|
+
return `${this.schema}.${name}`;
|
|
347
|
+
}
|
|
348
|
+
mapBenchmarkResult(row) {
|
|
349
|
+
return {
|
|
350
|
+
id: String(row.id),
|
|
351
|
+
modelId: String(row.model_id),
|
|
352
|
+
providerKey: String(row.provider_key),
|
|
353
|
+
source: String(row.source),
|
|
354
|
+
dimension: String(row.dimension),
|
|
355
|
+
score: Number(row.score),
|
|
356
|
+
rawScore: parseJson(row.raw_score),
|
|
357
|
+
metadata: parseJson(row.metadata) ?? {},
|
|
358
|
+
measuredAt: new Date(String(row.measured_at)),
|
|
359
|
+
ingestedAt: new Date(String(row.ingested_at))
|
|
360
|
+
};
|
|
361
|
+
}
|
|
362
|
+
mapModelRanking(row) {
|
|
363
|
+
return {
|
|
364
|
+
modelId: String(row.model_id),
|
|
365
|
+
providerKey: String(row.provider_key),
|
|
366
|
+
compositeScore: Number(row.composite_score),
|
|
367
|
+
dimensionScores: parseJson(row.dimension_scores) ?? {},
|
|
368
|
+
rank: Number(row.rank),
|
|
369
|
+
previousRank: row.previous_rank != null ? Number(row.previous_rank) : null,
|
|
370
|
+
updatedAt: new Date(String(row.updated_at))
|
|
371
|
+
};
|
|
372
|
+
}
|
|
373
|
+
mapIngestionRun(row) {
|
|
374
|
+
return {
|
|
375
|
+
id: String(row.id),
|
|
376
|
+
source: String(row.source),
|
|
377
|
+
status: String(row.status),
|
|
378
|
+
resultsCount: Number(row.results_count),
|
|
379
|
+
startedAt: new Date(String(row.started_at)),
|
|
380
|
+
completedAt: row.completed_at ? new Date(String(row.completed_at)) : null,
|
|
381
|
+
error: row.error ? String(row.error) : null
|
|
382
|
+
};
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
function parseJson(value) {
|
|
386
|
+
if (value == null)
|
|
387
|
+
return null;
|
|
388
|
+
if (typeof value === "object")
|
|
389
|
+
return value;
|
|
390
|
+
if (typeof value === "string") {
|
|
391
|
+
try {
|
|
392
|
+
return JSON.parse(value);
|
|
393
|
+
} catch {
|
|
394
|
+
return null;
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
return value;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
// src/pipeline/ingestion-pipeline.ts
|
|
401
|
+
import { normalizeBenchmarkResults } from "@contractspec/lib.provider-ranking/scoring";
|
|
402
|
+
|
|
403
|
+
class IngestionPipeline {
|
|
404
|
+
store;
|
|
405
|
+
registry;
|
|
406
|
+
ingesterOptions;
|
|
407
|
+
constructor(options) {
|
|
408
|
+
this.store = options.store;
|
|
409
|
+
this.registry = options.ingesterRegistry;
|
|
410
|
+
this.ingesterOptions = options.ingesterOptions;
|
|
411
|
+
}
|
|
412
|
+
async ingest(source, params) {
|
|
413
|
+
const ingester = this.registry.get(source);
|
|
414
|
+
if (!ingester) {
|
|
415
|
+
throw new Error(`No ingester registered for source: ${source}`);
|
|
416
|
+
}
|
|
417
|
+
return this.runIngester(ingester, params);
|
|
418
|
+
}
|
|
419
|
+
async ingestAll(params) {
|
|
420
|
+
const results = [];
|
|
421
|
+
for (const ingester of this.registry.list()) {
|
|
422
|
+
const result = await this.runIngester(ingester, params);
|
|
423
|
+
results.push(result);
|
|
424
|
+
}
|
|
425
|
+
return results;
|
|
426
|
+
}
|
|
427
|
+
mergeOptions(params) {
|
|
428
|
+
const merged = { ...this.ingesterOptions };
|
|
429
|
+
if (params?.fromDate)
|
|
430
|
+
merged.fromDate = new Date(params.fromDate);
|
|
431
|
+
if (params?.toDate)
|
|
432
|
+
merged.toDate = new Date(params.toDate);
|
|
433
|
+
if (params?.dimensions?.length)
|
|
434
|
+
merged.dimensions = params.dimensions;
|
|
435
|
+
return merged;
|
|
436
|
+
}
|
|
437
|
+
async runIngester(ingester, params) {
|
|
438
|
+
const ingestionId = `ingest-${ingester.source}-${Date.now()}`;
|
|
439
|
+
const run = {
|
|
440
|
+
id: ingestionId,
|
|
441
|
+
source: ingester.source,
|
|
442
|
+
status: "running",
|
|
443
|
+
resultsCount: 0,
|
|
444
|
+
startedAt: new Date,
|
|
445
|
+
completedAt: null,
|
|
446
|
+
error: null
|
|
447
|
+
};
|
|
448
|
+
await this.store.createIngestionRun(run);
|
|
449
|
+
try {
|
|
450
|
+
const opts = this.mergeOptions(params);
|
|
451
|
+
const rawResults = await ingester.ingest(opts);
|
|
452
|
+
const normalized = normalizeBenchmarkResults(rawResults);
|
|
453
|
+
for (const result of normalized) {
|
|
454
|
+
await this.store.upsertBenchmarkResult(result);
|
|
455
|
+
}
|
|
456
|
+
await this.store.updateIngestionRun(ingestionId, {
|
|
457
|
+
status: "completed",
|
|
458
|
+
resultsCount: normalized.length,
|
|
459
|
+
completedAt: new Date
|
|
460
|
+
});
|
|
461
|
+
return {
|
|
462
|
+
ingestionId,
|
|
463
|
+
source: ingester.source,
|
|
464
|
+
resultsCount: normalized.length,
|
|
465
|
+
status: "completed"
|
|
466
|
+
};
|
|
467
|
+
} catch (error) {
|
|
468
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
469
|
+
await this.store.updateIngestionRun(ingestionId, {
|
|
470
|
+
status: "failed",
|
|
471
|
+
completedAt: new Date,
|
|
472
|
+
error: errorMessage
|
|
473
|
+
});
|
|
474
|
+
return {
|
|
475
|
+
ingestionId,
|
|
476
|
+
source: ingester.source,
|
|
477
|
+
resultsCount: 0,
|
|
478
|
+
status: "failed"
|
|
479
|
+
};
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
// src/pipeline/ranking-pipeline.ts
|
|
485
|
+
import { computeModelRankings } from "@contractspec/lib.provider-ranking/scoring";
|
|
486
|
+
|
|
487
|
+
class RankingPipeline {
|
|
488
|
+
store;
|
|
489
|
+
constructor(options) {
|
|
490
|
+
this.store = options.store;
|
|
491
|
+
}
|
|
492
|
+
async refresh(params) {
|
|
493
|
+
let allResults = await this.loadAllBenchmarkResults();
|
|
494
|
+
if (params?.dimensions?.length) {
|
|
495
|
+
const dimSet = new Set(params.dimensions);
|
|
496
|
+
allResults = allResults.filter((r) => dimSet.has(r.dimension));
|
|
497
|
+
}
|
|
498
|
+
const existingRankings = params?.forceRecalculate ? new Map : new Map((await this.store.listModelRankings({
|
|
499
|
+
limit: 1e4,
|
|
500
|
+
requiredTransport: params?.requiredTransport,
|
|
501
|
+
requiredAuthMethod: params?.requiredAuthMethod
|
|
502
|
+
})).rankings.map((r) => [r.modelId, r]));
|
|
503
|
+
const newRankings = computeModelRankings(allResults, params?.weightOverrides ? { weightOverrides: params.weightOverrides } : undefined, existingRankings);
|
|
504
|
+
for (const ranking of newRankings) {
|
|
505
|
+
await this.store.upsertModelRanking(ranking);
|
|
506
|
+
}
|
|
507
|
+
return {
|
|
508
|
+
modelsRanked: newRankings.length,
|
|
509
|
+
updatedAt: new Date
|
|
510
|
+
};
|
|
511
|
+
}
|
|
512
|
+
async loadAllBenchmarkResults() {
|
|
513
|
+
const pageSize = 500;
|
|
514
|
+
let offset = 0;
|
|
515
|
+
const allResults = [];
|
|
516
|
+
while (true) {
|
|
517
|
+
const page = await this.store.listBenchmarkResults({
|
|
518
|
+
limit: pageSize,
|
|
519
|
+
offset
|
|
520
|
+
});
|
|
521
|
+
allResults.push(...page.results);
|
|
522
|
+
if (allResults.length >= page.total || page.results.length < pageSize) {
|
|
523
|
+
break;
|
|
524
|
+
}
|
|
525
|
+
offset += pageSize;
|
|
526
|
+
}
|
|
527
|
+
return allResults;
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
export {
|
|
531
|
+
providerRankingSchemaContribution,
|
|
532
|
+
providerRankingEntities,
|
|
533
|
+
RankingPipeline,
|
|
534
|
+
PostgresProviderRankingStore,
|
|
535
|
+
ModelRankingEntity,
|
|
536
|
+
IngestionRunEntity,
|
|
537
|
+
IngestionPipeline,
|
|
538
|
+
BenchmarkResultEntity
|
|
539
|
+
};
|