@contractspec/lib.provider-ranking 0.7.6 → 0.7.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,44 +1,77 @@
1
1
  # @contractspec/lib.provider-ranking
2
2
 
3
- Website: https://contractspec.io/
3
+ Website: https://contractspec.io
4
4
 
5
5
  **AI provider ranking: benchmark ingestion, scoring, and model comparison.**
6
6
 
7
- Ingests benchmark data from multiple sources (Chatbot Arena, SWE-bench, Artificial Analysis, Open LLM Leaderboard), normalizes scores to a 0-100 scale, and computes composite rankings across dimensions like coding, reasoning, cost, and latency.
7
+ ## What It Provides
8
+
9
+ - **Layer**: lib.
10
+ - **Consumers**: module.provider-ranking.
11
+ - Related ContractSpec packages include `@contractspec/tool.bun`, `@contractspec/tool.typescript`.
12
+ - Related ContractSpec packages include `@contractspec/tool.bun`, `@contractspec/tool.typescript`.
8
13
 
9
14
  ## Installation
10
15
 
11
- ```bash
12
- bun add @contractspec/lib.provider-ranking
13
- ```
16
+ `npm install @contractspec/lib.provider-ranking`
14
17
 
15
- ## Exports
18
+ or
16
19
 
17
- - `.` -- Core types, store interface, and in-memory store
18
- - `./types` -- `BenchmarkResult`, `ModelRanking`, `ModelProfile`, `BenchmarkDimension`, `DimensionWeightConfig`
19
- - `./store` -- `ProviderRankingStore` interface
20
- - `./in-memory-store` -- `InMemoryProviderRankingStore` class
21
- - `./scoring` -- `computeModelRankings()`, `normalizeScore()`, `DEFAULT_DIMENSION_WEIGHTS`
22
- - `./ingesters` -- `chatbotArenaIngester`, `sweBenchIngester`, `artificialAnalysisIngester`, `IngesterRegistry`
23
- - `./eval` -- `EvalRunner`, `EvalSuite`, `EvalCase` for custom evaluation
20
+ `bun add @contractspec/lib.provider-ranking`
24
21
 
25
22
  ## Usage
26
23
 
27
- ```ts
28
- import { InMemoryProviderRankingStore } from "@contractspec/lib.provider-ranking/in-memory-store";
29
- import { createDefaultIngesterRegistry } from "@contractspec/lib.provider-ranking/ingesters";
30
- import { computeModelRankings } from "@contractspec/lib.provider-ranking/scoring";
24
+ Import the root entrypoint from `@contractspec/lib.provider-ranking`, or choose a documented subpath when you only need one part of the package surface.
25
+
26
+ ## Architecture
27
+
28
+ - `src/eval` is part of the package's public or composition surface.
29
+ - `src/in-memory-store.ts` is part of the package's public or composition surface.
30
+ - `src/index.ts` is the root public barrel and package entrypoint.
31
+ - `src/ingesters` is part of the package's public or composition surface.
32
+ - `src/scoring` is part of the package's public or composition surface.
33
+ - `src/store.ts` is part of the package's public or composition surface.
34
+ - `src/types.ts` is shared public type definitions.
35
+
36
+ ## Public Entry Points
37
+
38
+ - Export `.` resolves through `./src/index.ts`.
39
+ - Export `./eval` resolves through `./src/eval/index.ts`.
40
+ - Export `./eval/runner` resolves through `./src/eval/runner.ts`.
41
+ - Export `./eval/types` resolves through `./src/eval/types.ts`.
42
+ - Export `./in-memory-store` resolves through `./src/in-memory-store.ts`.
43
+ - Export `./ingesters` resolves through `./src/ingesters/index.ts`.
44
+ - Export `./ingesters/artificial-analysis` resolves through `./src/ingesters/artificial-analysis.ts`.
45
+ - Export `./ingesters/chatbot-arena` resolves through `./src/ingesters/chatbot-arena.ts`.
46
+ - Export `./ingesters/fetch-utils` resolves through `./src/ingesters/fetch-utils.ts`.
47
+ - Export `./ingesters/open-llm-leaderboard` resolves through `./src/ingesters/open-llm-leaderboard.ts`.
48
+ - The package publishes 19 total export subpaths; keep docs aligned with `package.json`.
49
+
50
+ ## Local Commands
51
+
52
+ - `bun run dev` — contractspec-bun-build dev
53
+ - `bun run build` — bun run prebuild && bun run build:bundle && bun run build:types
54
+ - `bun run test` — bun test --pass-with-no-tests
55
+ - `bun run lint` — bun lint:fix
56
+ - `bun run lint:check` — biome check .
57
+ - `bun run lint:fix` — biome check --write --unsafe --only=nursery/useSortedClasses . && biome check --write .
58
+ - `bun run typecheck` — tsc --noEmit
59
+ - `bun run publish:pkg` — bun publish --tolerate-republish --ignore-scripts --verbose
60
+ - `bun run publish:pkg:canary` — bun publish:pkg --tag canary
61
+ - `bun run clean` — rimraf dist .turbo
62
+ - `bun run build:bundle` — contractspec-bun-build transpile
63
+ - `bun run build:types` — contractspec-bun-build types
64
+ - `bun run prebuild` — contractspec-bun-build prebuild
31
65
 
32
- const store = new InMemoryProviderRankingStore();
33
- const registry = createDefaultIngesterRegistry();
66
+ ## Recent Updates
34
67
 
35
- const ingester = registry.get("swe-bench");
36
- const results = await ingester.ingest();
68
+ - Replace eslint+prettier by biomejs to optimize speed.
69
+ - Resolve lint, build, and type errors across nine packages.
70
+ - Add first-class transport, auth, versioning, and BYOK support across all integrations.
71
+ - Add AI provider ranking system with ranking-driven model selection.
37
72
 
38
- for (const result of results) {
39
- await store.addBenchmarkResult(result);
40
- }
73
+ ## Notes
41
74
 
42
- const rankings = computeModelRankings(await store.listBenchmarkResults({}));
43
- console.log(rankings);
44
- ```
75
+ - Store interface is the adapter boundary — do not leak implementation details.
76
+ - Scoring algorithms must stay deterministic (no randomness, no side effects).
77
+ - Benchmark dimension enum is shared across ingesters and scoring — keep in sync.
@@ -249,73 +249,6 @@ function mapOrganizationToProvider(org) {
249
249
  return org;
250
250
  }
251
251
 
252
- // src/ingesters/swe-bench.ts
253
- var DEFAULT_SWE_BENCH_URL = "https://raw.githubusercontent.com/princeton-nlp/SWE-bench/main/docs/leaderboard.json";
254
- var sweBenchIngester = {
255
- source: "swe-bench",
256
- displayName: "SWE-bench",
257
- description: "Software engineering task completion rates from SWE-bench.",
258
- async ingest(options) {
259
- if (options?.dimensions?.length && !options.dimensions.includes("coding")) {
260
- return [];
261
- }
262
- const url = options?.sourceUrl ?? DEFAULT_SWE_BENCH_URL;
263
- const response = await fetchWithRetry(url, { fetch: options?.fetch });
264
- const text = await response.text();
265
- const data = parseJsonSafe(text, "SWE-bench");
266
- const now = new Date;
267
- let entries = data.filter((entry) => entry.model && entry.resolved_rate != null);
268
- if (options?.modelFilter?.length) {
269
- const filterSet = new Set(options.modelFilter);
270
- entries = entries.filter((e) => filterSet.has(e.model.toLowerCase().replace(/\s+/g, "-")));
271
- }
272
- if (options?.maxResults) {
273
- entries = entries.slice(0, options.maxResults);
274
- }
275
- let results = entries.map((entry) => {
276
- const modelId = entry.model.toLowerCase().replace(/\s+/g, "-");
277
- const org = entry.organization?.toLowerCase() ?? "unknown";
278
- return {
279
- id: `swe-bench:${modelId}:coding`,
280
- modelId,
281
- providerKey: mapOrganizationToProvider2(org),
282
- source: "swe-bench",
283
- dimension: "coding",
284
- score: Math.max(0, Math.min(100, entry.resolved_rate)),
285
- rawScore: entry.resolved_rate,
286
- metadata: {
287
- organization: entry.organization,
288
- date: entry.date
289
- },
290
- measuredAt: entry.date ? new Date(entry.date) : now,
291
- ingestedAt: now
292
- };
293
- });
294
- const { fromDate, toDate } = options ?? {};
295
- if (fromDate) {
296
- results = results.filter((r) => r.measuredAt >= fromDate);
297
- }
298
- if (toDate) {
299
- results = results.filter((r) => r.measuredAt <= toDate);
300
- }
301
- return results;
302
- }
303
- };
304
- function mapOrganizationToProvider2(org) {
305
- const normalized = org.toLowerCase();
306
- if (normalized.includes("openai"))
307
- return "openai";
308
- if (normalized.includes("anthropic"))
309
- return "anthropic";
310
- if (normalized.includes("google") || normalized.includes("deepmind"))
311
- return "gemini";
312
- if (normalized.includes("mistral"))
313
- return "mistral";
314
- if (normalized.includes("meta"))
315
- return "meta";
316
- return org;
317
- }
318
-
319
252
  // src/ingesters/open-llm-leaderboard.ts
320
253
  var DEFAULT_HF_URL = "https://huggingface.co/api/spaces/open-llm-leaderboard/open_llm_leaderboard/results";
321
254
  var BENCHMARK_MAPPINGS = [
@@ -344,7 +277,7 @@ var openLlmLeaderboardIngester = {
344
277
  for (const entry of entries) {
345
278
  const modelId = entry.model_name.toLowerCase().replace(/\s+/g, "-");
346
279
  const org = entry.organization?.toLowerCase() ?? "unknown";
347
- const providerKey = mapOrganizationToProvider3(org);
280
+ const providerKey = mapOrganizationToProvider2(org);
348
281
  for (const mapping of BENCHMARK_MAPPINGS) {
349
282
  if (dims && !dims.has(mapping.dimension))
350
283
  continue;
@@ -371,6 +304,73 @@ var openLlmLeaderboardIngester = {
371
304
  return options?.maxResults ? results.slice(0, options.maxResults) : results;
372
305
  }
373
306
  };
307
+ function mapOrganizationToProvider2(org) {
308
+ const normalized = org.toLowerCase();
309
+ if (normalized.includes("openai"))
310
+ return "openai";
311
+ if (normalized.includes("anthropic"))
312
+ return "anthropic";
313
+ if (normalized.includes("google") || normalized.includes("deepmind"))
314
+ return "gemini";
315
+ if (normalized.includes("mistral"))
316
+ return "mistral";
317
+ if (normalized.includes("meta"))
318
+ return "meta";
319
+ return org;
320
+ }
321
+
322
+ // src/ingesters/swe-bench.ts
323
+ var DEFAULT_SWE_BENCH_URL = "https://raw.githubusercontent.com/princeton-nlp/SWE-bench/main/docs/leaderboard.json";
324
+ var sweBenchIngester = {
325
+ source: "swe-bench",
326
+ displayName: "SWE-bench",
327
+ description: "Software engineering task completion rates from SWE-bench.",
328
+ async ingest(options) {
329
+ if (options?.dimensions?.length && !options.dimensions.includes("coding")) {
330
+ return [];
331
+ }
332
+ const url = options?.sourceUrl ?? DEFAULT_SWE_BENCH_URL;
333
+ const response = await fetchWithRetry(url, { fetch: options?.fetch });
334
+ const text = await response.text();
335
+ const data = parseJsonSafe(text, "SWE-bench");
336
+ const now = new Date;
337
+ let entries = data.filter((entry) => entry.model && entry.resolved_rate != null);
338
+ if (options?.modelFilter?.length) {
339
+ const filterSet = new Set(options.modelFilter);
340
+ entries = entries.filter((e) => filterSet.has(e.model.toLowerCase().replace(/\s+/g, "-")));
341
+ }
342
+ if (options?.maxResults) {
343
+ entries = entries.slice(0, options.maxResults);
344
+ }
345
+ let results = entries.map((entry) => {
346
+ const modelId = entry.model.toLowerCase().replace(/\s+/g, "-");
347
+ const org = entry.organization?.toLowerCase() ?? "unknown";
348
+ return {
349
+ id: `swe-bench:${modelId}:coding`,
350
+ modelId,
351
+ providerKey: mapOrganizationToProvider3(org),
352
+ source: "swe-bench",
353
+ dimension: "coding",
354
+ score: Math.max(0, Math.min(100, entry.resolved_rate)),
355
+ rawScore: entry.resolved_rate,
356
+ metadata: {
357
+ organization: entry.organization,
358
+ date: entry.date
359
+ },
360
+ measuredAt: entry.date ? new Date(entry.date) : now,
361
+ ingestedAt: now
362
+ };
363
+ });
364
+ const { fromDate, toDate } = options ?? {};
365
+ if (fromDate) {
366
+ results = results.filter((r) => r.measuredAt >= fromDate);
367
+ }
368
+ if (toDate) {
369
+ results = results.filter((r) => r.measuredAt <= toDate);
370
+ }
371
+ return results;
372
+ }
373
+ };
374
374
  function mapOrganizationToProvider3(org) {
375
375
  const normalized = org.toLowerCase();
376
376
  if (normalized.includes("openai"))
@@ -249,73 +249,6 @@ function mapOrganizationToProvider(org) {
249
249
  return org;
250
250
  }
251
251
 
252
- // src/ingesters/swe-bench.ts
253
- var DEFAULT_SWE_BENCH_URL = "https://raw.githubusercontent.com/princeton-nlp/SWE-bench/main/docs/leaderboard.json";
254
- var sweBenchIngester = {
255
- source: "swe-bench",
256
- displayName: "SWE-bench",
257
- description: "Software engineering task completion rates from SWE-bench.",
258
- async ingest(options) {
259
- if (options?.dimensions?.length && !options.dimensions.includes("coding")) {
260
- return [];
261
- }
262
- const url = options?.sourceUrl ?? DEFAULT_SWE_BENCH_URL;
263
- const response = await fetchWithRetry(url, { fetch: options?.fetch });
264
- const text = await response.text();
265
- const data = parseJsonSafe(text, "SWE-bench");
266
- const now = new Date;
267
- let entries = data.filter((entry) => entry.model && entry.resolved_rate != null);
268
- if (options?.modelFilter?.length) {
269
- const filterSet = new Set(options.modelFilter);
270
- entries = entries.filter((e) => filterSet.has(e.model.toLowerCase().replace(/\s+/g, "-")));
271
- }
272
- if (options?.maxResults) {
273
- entries = entries.slice(0, options.maxResults);
274
- }
275
- let results = entries.map((entry) => {
276
- const modelId = entry.model.toLowerCase().replace(/\s+/g, "-");
277
- const org = entry.organization?.toLowerCase() ?? "unknown";
278
- return {
279
- id: `swe-bench:${modelId}:coding`,
280
- modelId,
281
- providerKey: mapOrganizationToProvider2(org),
282
- source: "swe-bench",
283
- dimension: "coding",
284
- score: Math.max(0, Math.min(100, entry.resolved_rate)),
285
- rawScore: entry.resolved_rate,
286
- metadata: {
287
- organization: entry.organization,
288
- date: entry.date
289
- },
290
- measuredAt: entry.date ? new Date(entry.date) : now,
291
- ingestedAt: now
292
- };
293
- });
294
- const { fromDate, toDate } = options ?? {};
295
- if (fromDate) {
296
- results = results.filter((r) => r.measuredAt >= fromDate);
297
- }
298
- if (toDate) {
299
- results = results.filter((r) => r.measuredAt <= toDate);
300
- }
301
- return results;
302
- }
303
- };
304
- function mapOrganizationToProvider2(org) {
305
- const normalized = org.toLowerCase();
306
- if (normalized.includes("openai"))
307
- return "openai";
308
- if (normalized.includes("anthropic"))
309
- return "anthropic";
310
- if (normalized.includes("google") || normalized.includes("deepmind"))
311
- return "gemini";
312
- if (normalized.includes("mistral"))
313
- return "mistral";
314
- if (normalized.includes("meta"))
315
- return "meta";
316
- return org;
317
- }
318
-
319
252
  // src/ingesters/open-llm-leaderboard.ts
320
253
  var DEFAULT_HF_URL = "https://huggingface.co/api/spaces/open-llm-leaderboard/open_llm_leaderboard/results";
321
254
  var BENCHMARK_MAPPINGS = [
@@ -344,7 +277,7 @@ var openLlmLeaderboardIngester = {
344
277
  for (const entry of entries) {
345
278
  const modelId = entry.model_name.toLowerCase().replace(/\s+/g, "-");
346
279
  const org = entry.organization?.toLowerCase() ?? "unknown";
347
- const providerKey = mapOrganizationToProvider3(org);
280
+ const providerKey = mapOrganizationToProvider2(org);
348
281
  for (const mapping of BENCHMARK_MAPPINGS) {
349
282
  if (dims && !dims.has(mapping.dimension))
350
283
  continue;
@@ -371,6 +304,73 @@ var openLlmLeaderboardIngester = {
371
304
  return options?.maxResults ? results.slice(0, options.maxResults) : results;
372
305
  }
373
306
  };
307
+ function mapOrganizationToProvider2(org) {
308
+ const normalized = org.toLowerCase();
309
+ if (normalized.includes("openai"))
310
+ return "openai";
311
+ if (normalized.includes("anthropic"))
312
+ return "anthropic";
313
+ if (normalized.includes("google") || normalized.includes("deepmind"))
314
+ return "gemini";
315
+ if (normalized.includes("mistral"))
316
+ return "mistral";
317
+ if (normalized.includes("meta"))
318
+ return "meta";
319
+ return org;
320
+ }
321
+
322
+ // src/ingesters/swe-bench.ts
323
+ var DEFAULT_SWE_BENCH_URL = "https://raw.githubusercontent.com/princeton-nlp/SWE-bench/main/docs/leaderboard.json";
324
+ var sweBenchIngester = {
325
+ source: "swe-bench",
326
+ displayName: "SWE-bench",
327
+ description: "Software engineering task completion rates from SWE-bench.",
328
+ async ingest(options) {
329
+ if (options?.dimensions?.length && !options.dimensions.includes("coding")) {
330
+ return [];
331
+ }
332
+ const url = options?.sourceUrl ?? DEFAULT_SWE_BENCH_URL;
333
+ const response = await fetchWithRetry(url, { fetch: options?.fetch });
334
+ const text = await response.text();
335
+ const data = parseJsonSafe(text, "SWE-bench");
336
+ const now = new Date;
337
+ let entries = data.filter((entry) => entry.model && entry.resolved_rate != null);
338
+ if (options?.modelFilter?.length) {
339
+ const filterSet = new Set(options.modelFilter);
340
+ entries = entries.filter((e) => filterSet.has(e.model.toLowerCase().replace(/\s+/g, "-")));
341
+ }
342
+ if (options?.maxResults) {
343
+ entries = entries.slice(0, options.maxResults);
344
+ }
345
+ let results = entries.map((entry) => {
346
+ const modelId = entry.model.toLowerCase().replace(/\s+/g, "-");
347
+ const org = entry.organization?.toLowerCase() ?? "unknown";
348
+ return {
349
+ id: `swe-bench:${modelId}:coding`,
350
+ modelId,
351
+ providerKey: mapOrganizationToProvider3(org),
352
+ source: "swe-bench",
353
+ dimension: "coding",
354
+ score: Math.max(0, Math.min(100, entry.resolved_rate)),
355
+ rawScore: entry.resolved_rate,
356
+ metadata: {
357
+ organization: entry.organization,
358
+ date: entry.date
359
+ },
360
+ measuredAt: entry.date ? new Date(entry.date) : now,
361
+ ingestedAt: now
362
+ };
363
+ });
364
+ const { fromDate, toDate } = options ?? {};
365
+ if (fromDate) {
366
+ results = results.filter((r) => r.measuredAt >= fromDate);
367
+ }
368
+ if (toDate) {
369
+ results = results.filter((r) => r.measuredAt <= toDate);
370
+ }
371
+ return results;
372
+ }
373
+ };
374
374
  function mapOrganizationToProvider3(org) {
375
375
  const normalized = org.toLowerCase();
376
376
  if (normalized.includes("openai"))
@@ -1,2 +1,2 @@
1
- export type { EvalCase, EvalSuite, EvalCaseResult, EvalRunResult, EvalLLMAdapter, } from './types';
2
1
  export { EvalRunner } from './runner';
2
+ export type { EvalCase, EvalCaseResult, EvalLLMAdapter, EvalRunResult, EvalSuite, } from './types';
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- export type { BenchmarkDimension, BenchmarkSource, BenchmarkResult, DimensionScore, ModelRanking, ModelProfile, BenchmarkResultQuery, BenchmarkResultListResult, RankingQuery, RankingListResult, IngestionRun, DimensionWeightConfig, } from './types';
2
- export { BENCHMARK_DIMENSIONS } from './types';
3
- export type { ProviderRankingStore } from './store';
4
1
  export { InMemoryProviderRankingStore } from './in-memory-store';
2
+ export type { ProviderRankingStore } from './store';
3
+ export type { BenchmarkDimension, BenchmarkResult, BenchmarkResultListResult, BenchmarkResultQuery, BenchmarkSource, DimensionScore, DimensionWeightConfig, IngestionRun, ModelProfile, ModelRanking, RankingListResult, RankingQuery, } from './types';
4
+ export { BENCHMARK_DIMENSIONS } from './types';
@@ -1,7 +1,7 @@
1
- export type { BenchmarkIngester, IngesterOptions } from './types';
2
- export { chatbotArenaIngester } from './chatbot-arena';
3
1
  export { artificialAnalysisIngester } from './artificial-analysis';
4
- export { sweBenchIngester } from './swe-bench';
5
- export { openLlmLeaderboardIngester } from './open-llm-leaderboard';
6
- export { IngesterRegistry, createDefaultIngesterRegistry } from './registry';
2
+ export { chatbotArenaIngester } from './chatbot-arena';
7
3
  export { fetchWithRetry, parseJsonSafe } from './fetch-utils';
4
+ export { openLlmLeaderboardIngester } from './open-llm-leaderboard';
5
+ export { createDefaultIngesterRegistry, IngesterRegistry } from './registry';
6
+ export { sweBenchIngester } from './swe-bench';
7
+ export type { BenchmarkIngester, IngesterOptions } from './types';
@@ -250,73 +250,6 @@ function mapOrganizationToProvider(org) {
250
250
  return org;
251
251
  }
252
252
 
253
- // src/ingesters/swe-bench.ts
254
- var DEFAULT_SWE_BENCH_URL = "https://raw.githubusercontent.com/princeton-nlp/SWE-bench/main/docs/leaderboard.json";
255
- var sweBenchIngester = {
256
- source: "swe-bench",
257
- displayName: "SWE-bench",
258
- description: "Software engineering task completion rates from SWE-bench.",
259
- async ingest(options) {
260
- if (options?.dimensions?.length && !options.dimensions.includes("coding")) {
261
- return [];
262
- }
263
- const url = options?.sourceUrl ?? DEFAULT_SWE_BENCH_URL;
264
- const response = await fetchWithRetry(url, { fetch: options?.fetch });
265
- const text = await response.text();
266
- const data = parseJsonSafe(text, "SWE-bench");
267
- const now = new Date;
268
- let entries = data.filter((entry) => entry.model && entry.resolved_rate != null);
269
- if (options?.modelFilter?.length) {
270
- const filterSet = new Set(options.modelFilter);
271
- entries = entries.filter((e) => filterSet.has(e.model.toLowerCase().replace(/\s+/g, "-")));
272
- }
273
- if (options?.maxResults) {
274
- entries = entries.slice(0, options.maxResults);
275
- }
276
- let results = entries.map((entry) => {
277
- const modelId = entry.model.toLowerCase().replace(/\s+/g, "-");
278
- const org = entry.organization?.toLowerCase() ?? "unknown";
279
- return {
280
- id: `swe-bench:${modelId}:coding`,
281
- modelId,
282
- providerKey: mapOrganizationToProvider2(org),
283
- source: "swe-bench",
284
- dimension: "coding",
285
- score: Math.max(0, Math.min(100, entry.resolved_rate)),
286
- rawScore: entry.resolved_rate,
287
- metadata: {
288
- organization: entry.organization,
289
- date: entry.date
290
- },
291
- measuredAt: entry.date ? new Date(entry.date) : now,
292
- ingestedAt: now
293
- };
294
- });
295
- const { fromDate, toDate } = options ?? {};
296
- if (fromDate) {
297
- results = results.filter((r) => r.measuredAt >= fromDate);
298
- }
299
- if (toDate) {
300
- results = results.filter((r) => r.measuredAt <= toDate);
301
- }
302
- return results;
303
- }
304
- };
305
- function mapOrganizationToProvider2(org) {
306
- const normalized = org.toLowerCase();
307
- if (normalized.includes("openai"))
308
- return "openai";
309
- if (normalized.includes("anthropic"))
310
- return "anthropic";
311
- if (normalized.includes("google") || normalized.includes("deepmind"))
312
- return "gemini";
313
- if (normalized.includes("mistral"))
314
- return "mistral";
315
- if (normalized.includes("meta"))
316
- return "meta";
317
- return org;
318
- }
319
-
320
253
  // src/ingesters/open-llm-leaderboard.ts
321
254
  var DEFAULT_HF_URL = "https://huggingface.co/api/spaces/open-llm-leaderboard/open_llm_leaderboard/results";
322
255
  var BENCHMARK_MAPPINGS = [
@@ -345,7 +278,7 @@ var openLlmLeaderboardIngester = {
345
278
  for (const entry of entries) {
346
279
  const modelId = entry.model_name.toLowerCase().replace(/\s+/g, "-");
347
280
  const org = entry.organization?.toLowerCase() ?? "unknown";
348
- const providerKey = mapOrganizationToProvider3(org);
281
+ const providerKey = mapOrganizationToProvider2(org);
349
282
  for (const mapping of BENCHMARK_MAPPINGS) {
350
283
  if (dims && !dims.has(mapping.dimension))
351
284
  continue;
@@ -372,6 +305,73 @@ var openLlmLeaderboardIngester = {
372
305
  return options?.maxResults ? results.slice(0, options.maxResults) : results;
373
306
  }
374
307
  };
308
+ function mapOrganizationToProvider2(org) {
309
+ const normalized = org.toLowerCase();
310
+ if (normalized.includes("openai"))
311
+ return "openai";
312
+ if (normalized.includes("anthropic"))
313
+ return "anthropic";
314
+ if (normalized.includes("google") || normalized.includes("deepmind"))
315
+ return "gemini";
316
+ if (normalized.includes("mistral"))
317
+ return "mistral";
318
+ if (normalized.includes("meta"))
319
+ return "meta";
320
+ return org;
321
+ }
322
+
323
+ // src/ingesters/swe-bench.ts
324
+ var DEFAULT_SWE_BENCH_URL = "https://raw.githubusercontent.com/princeton-nlp/SWE-bench/main/docs/leaderboard.json";
325
+ var sweBenchIngester = {
326
+ source: "swe-bench",
327
+ displayName: "SWE-bench",
328
+ description: "Software engineering task completion rates from SWE-bench.",
329
+ async ingest(options) {
330
+ if (options?.dimensions?.length && !options.dimensions.includes("coding")) {
331
+ return [];
332
+ }
333
+ const url = options?.sourceUrl ?? DEFAULT_SWE_BENCH_URL;
334
+ const response = await fetchWithRetry(url, { fetch: options?.fetch });
335
+ const text = await response.text();
336
+ const data = parseJsonSafe(text, "SWE-bench");
337
+ const now = new Date;
338
+ let entries = data.filter((entry) => entry.model && entry.resolved_rate != null);
339
+ if (options?.modelFilter?.length) {
340
+ const filterSet = new Set(options.modelFilter);
341
+ entries = entries.filter((e) => filterSet.has(e.model.toLowerCase().replace(/\s+/g, "-")));
342
+ }
343
+ if (options?.maxResults) {
344
+ entries = entries.slice(0, options.maxResults);
345
+ }
346
+ let results = entries.map((entry) => {
347
+ const modelId = entry.model.toLowerCase().replace(/\s+/g, "-");
348
+ const org = entry.organization?.toLowerCase() ?? "unknown";
349
+ return {
350
+ id: `swe-bench:${modelId}:coding`,
351
+ modelId,
352
+ providerKey: mapOrganizationToProvider3(org),
353
+ source: "swe-bench",
354
+ dimension: "coding",
355
+ score: Math.max(0, Math.min(100, entry.resolved_rate)),
356
+ rawScore: entry.resolved_rate,
357
+ metadata: {
358
+ organization: entry.organization,
359
+ date: entry.date
360
+ },
361
+ measuredAt: entry.date ? new Date(entry.date) : now,
362
+ ingestedAt: now
363
+ };
364
+ });
365
+ const { fromDate, toDate } = options ?? {};
366
+ if (fromDate) {
367
+ results = results.filter((r) => r.measuredAt >= fromDate);
368
+ }
369
+ if (toDate) {
370
+ results = results.filter((r) => r.measuredAt <= toDate);
371
+ }
372
+ return results;
373
+ }
374
+ };
375
375
  function mapOrganizationToProvider3(org) {
376
376
  const normalized = org.toLowerCase();
377
377
  if (normalized.includes("openai"))
@@ -250,73 +250,6 @@ function mapOrganizationToProvider(org) {
250
250
  return org;
251
251
  }
252
252
 
253
- // src/ingesters/swe-bench.ts
254
- var DEFAULT_SWE_BENCH_URL = "https://raw.githubusercontent.com/princeton-nlp/SWE-bench/main/docs/leaderboard.json";
255
- var sweBenchIngester = {
256
- source: "swe-bench",
257
- displayName: "SWE-bench",
258
- description: "Software engineering task completion rates from SWE-bench.",
259
- async ingest(options) {
260
- if (options?.dimensions?.length && !options.dimensions.includes("coding")) {
261
- return [];
262
- }
263
- const url = options?.sourceUrl ?? DEFAULT_SWE_BENCH_URL;
264
- const response = await fetchWithRetry(url, { fetch: options?.fetch });
265
- const text = await response.text();
266
- const data = parseJsonSafe(text, "SWE-bench");
267
- const now = new Date;
268
- let entries = data.filter((entry) => entry.model && entry.resolved_rate != null);
269
- if (options?.modelFilter?.length) {
270
- const filterSet = new Set(options.modelFilter);
271
- entries = entries.filter((e) => filterSet.has(e.model.toLowerCase().replace(/\s+/g, "-")));
272
- }
273
- if (options?.maxResults) {
274
- entries = entries.slice(0, options.maxResults);
275
- }
276
- let results = entries.map((entry) => {
277
- const modelId = entry.model.toLowerCase().replace(/\s+/g, "-");
278
- const org = entry.organization?.toLowerCase() ?? "unknown";
279
- return {
280
- id: `swe-bench:${modelId}:coding`,
281
- modelId,
282
- providerKey: mapOrganizationToProvider2(org),
283
- source: "swe-bench",
284
- dimension: "coding",
285
- score: Math.max(0, Math.min(100, entry.resolved_rate)),
286
- rawScore: entry.resolved_rate,
287
- metadata: {
288
- organization: entry.organization,
289
- date: entry.date
290
- },
291
- measuredAt: entry.date ? new Date(entry.date) : now,
292
- ingestedAt: now
293
- };
294
- });
295
- const { fromDate, toDate } = options ?? {};
296
- if (fromDate) {
297
- results = results.filter((r) => r.measuredAt >= fromDate);
298
- }
299
- if (toDate) {
300
- results = results.filter((r) => r.measuredAt <= toDate);
301
- }
302
- return results;
303
- }
304
- };
305
- function mapOrganizationToProvider2(org) {
306
- const normalized = org.toLowerCase();
307
- if (normalized.includes("openai"))
308
- return "openai";
309
- if (normalized.includes("anthropic"))
310
- return "anthropic";
311
- if (normalized.includes("google") || normalized.includes("deepmind"))
312
- return "gemini";
313
- if (normalized.includes("mistral"))
314
- return "mistral";
315
- if (normalized.includes("meta"))
316
- return "meta";
317
- return org;
318
- }
319
-
320
253
  // src/ingesters/open-llm-leaderboard.ts
321
254
  var DEFAULT_HF_URL = "https://huggingface.co/api/spaces/open-llm-leaderboard/open_llm_leaderboard/results";
322
255
  var BENCHMARK_MAPPINGS = [
@@ -345,7 +278,7 @@ var openLlmLeaderboardIngester = {
345
278
  for (const entry of entries) {
346
279
  const modelId = entry.model_name.toLowerCase().replace(/\s+/g, "-");
347
280
  const org = entry.organization?.toLowerCase() ?? "unknown";
348
- const providerKey = mapOrganizationToProvider3(org);
281
+ const providerKey = mapOrganizationToProvider2(org);
349
282
  for (const mapping of BENCHMARK_MAPPINGS) {
350
283
  if (dims && !dims.has(mapping.dimension))
351
284
  continue;
@@ -372,6 +305,73 @@ var openLlmLeaderboardIngester = {
372
305
  return options?.maxResults ? results.slice(0, options.maxResults) : results;
373
306
  }
374
307
  };
308
+ function mapOrganizationToProvider2(org) {
309
+ const normalized = org.toLowerCase();
310
+ if (normalized.includes("openai"))
311
+ return "openai";
312
+ if (normalized.includes("anthropic"))
313
+ return "anthropic";
314
+ if (normalized.includes("google") || normalized.includes("deepmind"))
315
+ return "gemini";
316
+ if (normalized.includes("mistral"))
317
+ return "mistral";
318
+ if (normalized.includes("meta"))
319
+ return "meta";
320
+ return org;
321
+ }
322
+
323
+ // src/ingesters/swe-bench.ts
324
+ var DEFAULT_SWE_BENCH_URL = "https://raw.githubusercontent.com/princeton-nlp/SWE-bench/main/docs/leaderboard.json";
325
+ var sweBenchIngester = {
326
+ source: "swe-bench",
327
+ displayName: "SWE-bench",
328
+ description: "Software engineering task completion rates from SWE-bench.",
329
+ async ingest(options) {
330
+ if (options?.dimensions?.length && !options.dimensions.includes("coding")) {
331
+ return [];
332
+ }
333
+ const url = options?.sourceUrl ?? DEFAULT_SWE_BENCH_URL;
334
+ const response = await fetchWithRetry(url, { fetch: options?.fetch });
335
+ const text = await response.text();
336
+ const data = parseJsonSafe(text, "SWE-bench");
337
+ const now = new Date;
338
+ let entries = data.filter((entry) => entry.model && entry.resolved_rate != null);
339
+ if (options?.modelFilter?.length) {
340
+ const filterSet = new Set(options.modelFilter);
341
+ entries = entries.filter((e) => filterSet.has(e.model.toLowerCase().replace(/\s+/g, "-")));
342
+ }
343
+ if (options?.maxResults) {
344
+ entries = entries.slice(0, options.maxResults);
345
+ }
346
+ let results = entries.map((entry) => {
347
+ const modelId = entry.model.toLowerCase().replace(/\s+/g, "-");
348
+ const org = entry.organization?.toLowerCase() ?? "unknown";
349
+ return {
350
+ id: `swe-bench:${modelId}:coding`,
351
+ modelId,
352
+ providerKey: mapOrganizationToProvider3(org),
353
+ source: "swe-bench",
354
+ dimension: "coding",
355
+ score: Math.max(0, Math.min(100, entry.resolved_rate)),
356
+ rawScore: entry.resolved_rate,
357
+ metadata: {
358
+ organization: entry.organization,
359
+ date: entry.date
360
+ },
361
+ measuredAt: entry.date ? new Date(entry.date) : now,
362
+ ingestedAt: now
363
+ };
364
+ });
365
+ const { fromDate, toDate } = options ?? {};
366
+ if (fromDate) {
367
+ results = results.filter((r) => r.measuredAt >= fromDate);
368
+ }
369
+ if (toDate) {
370
+ results = results.filter((r) => r.measuredAt <= toDate);
371
+ }
372
+ return results;
373
+ }
374
+ };
375
375
  function mapOrganizationToProvider3(org) {
376
376
  const normalized = org.toLowerCase();
377
377
  if (normalized.includes("openai"))
@@ -249,73 +249,6 @@ function mapOrganizationToProvider(org) {
249
249
  return org;
250
250
  }
251
251
 
252
- // src/ingesters/swe-bench.ts
253
- var DEFAULT_SWE_BENCH_URL = "https://raw.githubusercontent.com/princeton-nlp/SWE-bench/main/docs/leaderboard.json";
254
- var sweBenchIngester = {
255
- source: "swe-bench",
256
- displayName: "SWE-bench",
257
- description: "Software engineering task completion rates from SWE-bench.",
258
- async ingest(options) {
259
- if (options?.dimensions?.length && !options.dimensions.includes("coding")) {
260
- return [];
261
- }
262
- const url = options?.sourceUrl ?? DEFAULT_SWE_BENCH_URL;
263
- const response = await fetchWithRetry(url, { fetch: options?.fetch });
264
- const text = await response.text();
265
- const data = parseJsonSafe(text, "SWE-bench");
266
- const now = new Date;
267
- let entries = data.filter((entry) => entry.model && entry.resolved_rate != null);
268
- if (options?.modelFilter?.length) {
269
- const filterSet = new Set(options.modelFilter);
270
- entries = entries.filter((e) => filterSet.has(e.model.toLowerCase().replace(/\s+/g, "-")));
271
- }
272
- if (options?.maxResults) {
273
- entries = entries.slice(0, options.maxResults);
274
- }
275
- let results = entries.map((entry) => {
276
- const modelId = entry.model.toLowerCase().replace(/\s+/g, "-");
277
- const org = entry.organization?.toLowerCase() ?? "unknown";
278
- return {
279
- id: `swe-bench:${modelId}:coding`,
280
- modelId,
281
- providerKey: mapOrganizationToProvider2(org),
282
- source: "swe-bench",
283
- dimension: "coding",
284
- score: Math.max(0, Math.min(100, entry.resolved_rate)),
285
- rawScore: entry.resolved_rate,
286
- metadata: {
287
- organization: entry.organization,
288
- date: entry.date
289
- },
290
- measuredAt: entry.date ? new Date(entry.date) : now,
291
- ingestedAt: now
292
- };
293
- });
294
- const { fromDate, toDate } = options ?? {};
295
- if (fromDate) {
296
- results = results.filter((r) => r.measuredAt >= fromDate);
297
- }
298
- if (toDate) {
299
- results = results.filter((r) => r.measuredAt <= toDate);
300
- }
301
- return results;
302
- }
303
- };
304
- function mapOrganizationToProvider2(org) {
305
- const normalized = org.toLowerCase();
306
- if (normalized.includes("openai"))
307
- return "openai";
308
- if (normalized.includes("anthropic"))
309
- return "anthropic";
310
- if (normalized.includes("google") || normalized.includes("deepmind"))
311
- return "gemini";
312
- if (normalized.includes("mistral"))
313
- return "mistral";
314
- if (normalized.includes("meta"))
315
- return "meta";
316
- return org;
317
- }
318
-
319
252
  // src/ingesters/open-llm-leaderboard.ts
320
253
  var DEFAULT_HF_URL = "https://huggingface.co/api/spaces/open-llm-leaderboard/open_llm_leaderboard/results";
321
254
  var BENCHMARK_MAPPINGS = [
@@ -344,7 +277,7 @@ var openLlmLeaderboardIngester = {
344
277
  for (const entry of entries) {
345
278
  const modelId = entry.model_name.toLowerCase().replace(/\s+/g, "-");
346
279
  const org = entry.organization?.toLowerCase() ?? "unknown";
347
- const providerKey = mapOrganizationToProvider3(org);
280
+ const providerKey = mapOrganizationToProvider2(org);
348
281
  for (const mapping of BENCHMARK_MAPPINGS) {
349
282
  if (dims && !dims.has(mapping.dimension))
350
283
  continue;
@@ -371,6 +304,73 @@ var openLlmLeaderboardIngester = {
371
304
  return options?.maxResults ? results.slice(0, options.maxResults) : results;
372
305
  }
373
306
  };
307
+ function mapOrganizationToProvider2(org) {
308
+ const normalized = org.toLowerCase();
309
+ if (normalized.includes("openai"))
310
+ return "openai";
311
+ if (normalized.includes("anthropic"))
312
+ return "anthropic";
313
+ if (normalized.includes("google") || normalized.includes("deepmind"))
314
+ return "gemini";
315
+ if (normalized.includes("mistral"))
316
+ return "mistral";
317
+ if (normalized.includes("meta"))
318
+ return "meta";
319
+ return org;
320
+ }
321
+
322
+ // src/ingesters/swe-bench.ts
323
+ var DEFAULT_SWE_BENCH_URL = "https://raw.githubusercontent.com/princeton-nlp/SWE-bench/main/docs/leaderboard.json";
324
+ var sweBenchIngester = {
325
+ source: "swe-bench",
326
+ displayName: "SWE-bench",
327
+ description: "Software engineering task completion rates from SWE-bench.",
328
+ async ingest(options) {
329
+ if (options?.dimensions?.length && !options.dimensions.includes("coding")) {
330
+ return [];
331
+ }
332
+ const url = options?.sourceUrl ?? DEFAULT_SWE_BENCH_URL;
333
+ const response = await fetchWithRetry(url, { fetch: options?.fetch });
334
+ const text = await response.text();
335
+ const data = parseJsonSafe(text, "SWE-bench");
336
+ const now = new Date;
337
+ let entries = data.filter((entry) => entry.model && entry.resolved_rate != null);
338
+ if (options?.modelFilter?.length) {
339
+ const filterSet = new Set(options.modelFilter);
340
+ entries = entries.filter((e) => filterSet.has(e.model.toLowerCase().replace(/\s+/g, "-")));
341
+ }
342
+ if (options?.maxResults) {
343
+ entries = entries.slice(0, options.maxResults);
344
+ }
345
+ let results = entries.map((entry) => {
346
+ const modelId = entry.model.toLowerCase().replace(/\s+/g, "-");
347
+ const org = entry.organization?.toLowerCase() ?? "unknown";
348
+ return {
349
+ id: `swe-bench:${modelId}:coding`,
350
+ modelId,
351
+ providerKey: mapOrganizationToProvider3(org),
352
+ source: "swe-bench",
353
+ dimension: "coding",
354
+ score: Math.max(0, Math.min(100, entry.resolved_rate)),
355
+ rawScore: entry.resolved_rate,
356
+ metadata: {
357
+ organization: entry.organization,
358
+ date: entry.date
359
+ },
360
+ measuredAt: entry.date ? new Date(entry.date) : now,
361
+ ingestedAt: now
362
+ };
363
+ });
364
+ const { fromDate, toDate } = options ?? {};
365
+ if (fromDate) {
366
+ results = results.filter((r) => r.measuredAt >= fromDate);
367
+ }
368
+ if (toDate) {
369
+ results = results.filter((r) => r.measuredAt <= toDate);
370
+ }
371
+ return results;
372
+ }
373
+ };
374
374
  function mapOrganizationToProvider3(org) {
375
375
  const normalized = org.toLowerCase();
376
376
  if (normalized.includes("openai"))
@@ -249,73 +249,6 @@ function mapOrganizationToProvider(org) {
249
249
  return org;
250
250
  }
251
251
 
252
- // src/ingesters/swe-bench.ts
253
- var DEFAULT_SWE_BENCH_URL = "https://raw.githubusercontent.com/princeton-nlp/SWE-bench/main/docs/leaderboard.json";
254
- var sweBenchIngester = {
255
- source: "swe-bench",
256
- displayName: "SWE-bench",
257
- description: "Software engineering task completion rates from SWE-bench.",
258
- async ingest(options) {
259
- if (options?.dimensions?.length && !options.dimensions.includes("coding")) {
260
- return [];
261
- }
262
- const url = options?.sourceUrl ?? DEFAULT_SWE_BENCH_URL;
263
- const response = await fetchWithRetry(url, { fetch: options?.fetch });
264
- const text = await response.text();
265
- const data = parseJsonSafe(text, "SWE-bench");
266
- const now = new Date;
267
- let entries = data.filter((entry) => entry.model && entry.resolved_rate != null);
268
- if (options?.modelFilter?.length) {
269
- const filterSet = new Set(options.modelFilter);
270
- entries = entries.filter((e) => filterSet.has(e.model.toLowerCase().replace(/\s+/g, "-")));
271
- }
272
- if (options?.maxResults) {
273
- entries = entries.slice(0, options.maxResults);
274
- }
275
- let results = entries.map((entry) => {
276
- const modelId = entry.model.toLowerCase().replace(/\s+/g, "-");
277
- const org = entry.organization?.toLowerCase() ?? "unknown";
278
- return {
279
- id: `swe-bench:${modelId}:coding`,
280
- modelId,
281
- providerKey: mapOrganizationToProvider2(org),
282
- source: "swe-bench",
283
- dimension: "coding",
284
- score: Math.max(0, Math.min(100, entry.resolved_rate)),
285
- rawScore: entry.resolved_rate,
286
- metadata: {
287
- organization: entry.organization,
288
- date: entry.date
289
- },
290
- measuredAt: entry.date ? new Date(entry.date) : now,
291
- ingestedAt: now
292
- };
293
- });
294
- const { fromDate, toDate } = options ?? {};
295
- if (fromDate) {
296
- results = results.filter((r) => r.measuredAt >= fromDate);
297
- }
298
- if (toDate) {
299
- results = results.filter((r) => r.measuredAt <= toDate);
300
- }
301
- return results;
302
- }
303
- };
304
- function mapOrganizationToProvider2(org) {
305
- const normalized = org.toLowerCase();
306
- if (normalized.includes("openai"))
307
- return "openai";
308
- if (normalized.includes("anthropic"))
309
- return "anthropic";
310
- if (normalized.includes("google") || normalized.includes("deepmind"))
311
- return "gemini";
312
- if (normalized.includes("mistral"))
313
- return "mistral";
314
- if (normalized.includes("meta"))
315
- return "meta";
316
- return org;
317
- }
318
-
319
252
  // src/ingesters/open-llm-leaderboard.ts
320
253
  var DEFAULT_HF_URL = "https://huggingface.co/api/spaces/open-llm-leaderboard/open_llm_leaderboard/results";
321
254
  var BENCHMARK_MAPPINGS = [
@@ -344,7 +277,7 @@ var openLlmLeaderboardIngester = {
344
277
  for (const entry of entries) {
345
278
  const modelId = entry.model_name.toLowerCase().replace(/\s+/g, "-");
346
279
  const org = entry.organization?.toLowerCase() ?? "unknown";
347
- const providerKey = mapOrganizationToProvider3(org);
280
+ const providerKey = mapOrganizationToProvider2(org);
348
281
  for (const mapping of BENCHMARK_MAPPINGS) {
349
282
  if (dims && !dims.has(mapping.dimension))
350
283
  continue;
@@ -371,6 +304,73 @@ var openLlmLeaderboardIngester = {
371
304
  return options?.maxResults ? results.slice(0, options.maxResults) : results;
372
305
  }
373
306
  };
307
+ function mapOrganizationToProvider2(org) {
308
+ const normalized = org.toLowerCase();
309
+ if (normalized.includes("openai"))
310
+ return "openai";
311
+ if (normalized.includes("anthropic"))
312
+ return "anthropic";
313
+ if (normalized.includes("google") || normalized.includes("deepmind"))
314
+ return "gemini";
315
+ if (normalized.includes("mistral"))
316
+ return "mistral";
317
+ if (normalized.includes("meta"))
318
+ return "meta";
319
+ return org;
320
+ }
321
+
322
+ // src/ingesters/swe-bench.ts
323
+ var DEFAULT_SWE_BENCH_URL = "https://raw.githubusercontent.com/princeton-nlp/SWE-bench/main/docs/leaderboard.json";
324
+ var sweBenchIngester = {
325
+ source: "swe-bench",
326
+ displayName: "SWE-bench",
327
+ description: "Software engineering task completion rates from SWE-bench.",
328
+ async ingest(options) {
329
+ if (options?.dimensions?.length && !options.dimensions.includes("coding")) {
330
+ return [];
331
+ }
332
+ const url = options?.sourceUrl ?? DEFAULT_SWE_BENCH_URL;
333
+ const response = await fetchWithRetry(url, { fetch: options?.fetch });
334
+ const text = await response.text();
335
+ const data = parseJsonSafe(text, "SWE-bench");
336
+ const now = new Date;
337
+ let entries = data.filter((entry) => entry.model && entry.resolved_rate != null);
338
+ if (options?.modelFilter?.length) {
339
+ const filterSet = new Set(options.modelFilter);
340
+ entries = entries.filter((e) => filterSet.has(e.model.toLowerCase().replace(/\s+/g, "-")));
341
+ }
342
+ if (options?.maxResults) {
343
+ entries = entries.slice(0, options.maxResults);
344
+ }
345
+ let results = entries.map((entry) => {
346
+ const modelId = entry.model.toLowerCase().replace(/\s+/g, "-");
347
+ const org = entry.organization?.toLowerCase() ?? "unknown";
348
+ return {
349
+ id: `swe-bench:${modelId}:coding`,
350
+ modelId,
351
+ providerKey: mapOrganizationToProvider3(org),
352
+ source: "swe-bench",
353
+ dimension: "coding",
354
+ score: Math.max(0, Math.min(100, entry.resolved_rate)),
355
+ rawScore: entry.resolved_rate,
356
+ metadata: {
357
+ organization: entry.organization,
358
+ date: entry.date
359
+ },
360
+ measuredAt: entry.date ? new Date(entry.date) : now,
361
+ ingestedAt: now
362
+ };
363
+ });
364
+ const { fromDate, toDate } = options ?? {};
365
+ if (fromDate) {
366
+ results = results.filter((r) => r.measuredAt >= fromDate);
367
+ }
368
+ if (toDate) {
369
+ results = results.filter((r) => r.measuredAt <= toDate);
370
+ }
371
+ return results;
372
+ }
373
+ };
374
374
  function mapOrganizationToProvider3(org) {
375
375
  const normalized = org.toLowerCase();
376
376
  if (normalized.includes("openai"))
@@ -1,3 +1,3 @@
1
1
  export { computeModelRankings } from './composite-scorer';
2
2
  export { DEFAULT_DIMENSION_WEIGHTS, getWeightMap, normalizeWeights, } from './dimension-weights';
3
- export { normalizeScore, normalizeBenchmarkResults } from './normalizer';
3
+ export { normalizeBenchmarkResults, normalizeScore } from './normalizer';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@contractspec/lib.provider-ranking",
3
- "version": "0.7.6",
3
+ "version": "0.7.9",
4
4
  "description": "AI provider ranking: benchmark ingestion, scoring, and model comparison",
5
5
  "keywords": [
6
6
  "contractspec",
@@ -24,16 +24,16 @@
24
24
  "dev": "contractspec-bun-build dev",
25
25
  "clean": "rimraf dist .turbo",
26
26
  "lint": "bun lint:fix",
27
- "lint:fix": "eslint src --fix",
28
- "lint:check": "eslint src",
27
+ "lint:fix": "biome check --write --unsafe --only=nursery/useSortedClasses . && biome check --write .",
28
+ "lint:check": "biome check .",
29
29
  "test": "bun test --pass-with-no-tests",
30
30
  "prebuild": "contractspec-bun-build prebuild",
31
31
  "typecheck": "tsc --noEmit"
32
32
  },
33
33
  "dependencies": {},
34
34
  "devDependencies": {
35
- "@contractspec/tool.typescript": "3.7.6",
36
- "@contractspec/tool.bun": "3.7.6",
35
+ "@contractspec/tool.typescript": "3.7.9",
36
+ "@contractspec/tool.bun": "3.7.9",
37
37
  "typescript": "^5.9.3"
38
38
  },
39
39
  "exports": {