@contractspec/lib.provider-ranking 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/README.md +44 -0
  2. package/dist/browser/eval/index.js +101 -0
  3. package/dist/browser/eval/runner.js +101 -0
  4. package/dist/browser/eval/types.js +0 -0
  5. package/dist/browser/in-memory-store.js +92 -0
  6. package/dist/browser/index.js +105 -0
  7. package/dist/browser/ingesters/artificial-analysis.js +149 -0
  8. package/dist/browser/ingesters/chatbot-arena.js +142 -0
  9. package/dist/browser/ingesters/fetch-utils.js +39 -0
  10. package/dist/browser/ingesters/index.js +418 -0
  11. package/dist/browser/ingesters/open-llm-leaderboard.js +108 -0
  12. package/dist/browser/ingesters/registry.js +412 -0
  13. package/dist/browser/ingesters/swe-bench.js +105 -0
  14. package/dist/browser/ingesters/types.js +0 -0
  15. package/dist/browser/scoring/composite-scorer.js +122 -0
  16. package/dist/browser/scoring/dimension-weights.js +39 -0
  17. package/dist/browser/scoring/index.js +161 -0
  18. package/dist/browser/scoring/normalizer.js +37 -0
  19. package/dist/browser/store.js +0 -0
  20. package/dist/browser/types.js +14 -0
  21. package/dist/eval/index.d.ts +2 -0
  22. package/dist/eval/index.js +102 -0
  23. package/dist/eval/runner.d.ts +18 -0
  24. package/dist/eval/runner.js +102 -0
  25. package/dist/eval/types.d.ts +51 -0
  26. package/dist/eval/types.js +1 -0
  27. package/dist/in-memory-store.d.ts +17 -0
  28. package/dist/in-memory-store.js +93 -0
  29. package/dist/index.d.ts +4 -0
  30. package/dist/index.js +106 -0
  31. package/dist/ingesters/artificial-analysis.d.ts +8 -0
  32. package/dist/ingesters/artificial-analysis.js +150 -0
  33. package/dist/ingesters/chatbot-arena.d.ts +8 -0
  34. package/dist/ingesters/chatbot-arena.js +143 -0
  35. package/dist/ingesters/fetch-utils.d.ts +11 -0
  36. package/dist/ingesters/fetch-utils.js +40 -0
  37. package/dist/ingesters/index.d.ts +7 -0
  38. package/dist/ingesters/index.js +419 -0
  39. package/dist/ingesters/open-llm-leaderboard.d.ts +8 -0
  40. package/dist/ingesters/open-llm-leaderboard.js +109 -0
  41. package/dist/ingesters/registry.d.ts +17 -0
  42. package/dist/ingesters/registry.js +413 -0
  43. package/dist/ingesters/swe-bench.d.ts +8 -0
  44. package/dist/ingesters/swe-bench.js +106 -0
  45. package/dist/ingesters/types.d.ts +31 -0
  46. package/dist/ingesters/types.js +1 -0
  47. package/dist/node/eval/index.js +101 -0
  48. package/dist/node/eval/runner.js +101 -0
  49. package/dist/node/eval/types.js +0 -0
  50. package/dist/node/in-memory-store.js +92 -0
  51. package/dist/node/index.js +105 -0
  52. package/dist/node/ingesters/artificial-analysis.js +149 -0
  53. package/dist/node/ingesters/chatbot-arena.js +142 -0
  54. package/dist/node/ingesters/fetch-utils.js +39 -0
  55. package/dist/node/ingesters/index.js +418 -0
  56. package/dist/node/ingesters/open-llm-leaderboard.js +108 -0
  57. package/dist/node/ingesters/registry.js +412 -0
  58. package/dist/node/ingesters/swe-bench.js +105 -0
  59. package/dist/node/ingesters/types.js +0 -0
  60. package/dist/node/scoring/composite-scorer.js +122 -0
  61. package/dist/node/scoring/dimension-weights.js +39 -0
  62. package/dist/node/scoring/index.js +161 -0
  63. package/dist/node/scoring/normalizer.js +37 -0
  64. package/dist/node/store.js +0 -0
  65. package/dist/node/types.js +14 -0
  66. package/dist/scoring/composite-scorer.d.ts +10 -0
  67. package/dist/scoring/composite-scorer.js +123 -0
  68. package/dist/scoring/dimension-weights.d.ts +8 -0
  69. package/dist/scoring/dimension-weights.js +40 -0
  70. package/dist/scoring/index.d.ts +3 -0
  71. package/dist/scoring/index.js +162 -0
  72. package/dist/scoring/normalizer.d.ts +20 -0
  73. package/dist/scoring/normalizer.js +38 -0
  74. package/dist/store.d.ts +19 -0
  75. package/dist/store.js +1 -0
  76. package/dist/types.d.ts +100 -0
  77. package/dist/types.js +15 -0
  78. package/package.json +362 -0
package/README.md ADDED
@@ -0,0 +1,44 @@
1
+ # @contractspec/lib.provider-ranking
2
+
3
+ Website: https://contractspec.io/
4
+
5
+ **AI provider ranking: benchmark ingestion, scoring, and model comparison.**
6
+
7
+ Ingests benchmark data from multiple sources (Chatbot Arena, SWE-bench, Artificial Analysis, Open LLM Leaderboard), normalizes scores to a 0-100 scale, and computes composite rankings across dimensions like coding, reasoning, cost, and latency.
8
+
9
+ ## Installation
10
+
11
+ ```bash
12
+ bun add @contractspec/lib.provider-ranking
13
+ ```
14
+
15
+ ## Exports
16
+
17
+ - `.` -- Core types, store interface, and in-memory store
18
+ - `./types` -- `BenchmarkResult`, `ModelRanking`, `ModelProfile`, `BenchmarkDimension`, `DimensionWeightConfig`
19
+ - `./store` -- `ProviderRankingStore` interface
20
+ - `./in-memory-store` -- `InMemoryProviderRankingStore` class
21
+ - `./scoring` -- `computeModelRankings()`, `normalizeScore()`, `DEFAULT_DIMENSION_WEIGHTS`
22
+ - `./ingesters` -- `chatbotArenaIngester`, `sweBenchIngester`, `artificialAnalysisIngester`, `IngesterRegistry`
23
+ - `./eval` -- `EvalRunner`, `EvalSuite`, `EvalCase` for custom evaluation
24
+
25
+ ## Usage
26
+
27
+ ```ts
28
+ import { InMemoryProviderRankingStore } from "@contractspec/lib.provider-ranking/in-memory-store";
29
+ import { createDefaultIngesterRegistry } from "@contractspec/lib.provider-ranking/ingesters";
30
+ import { computeModelRankings } from "@contractspec/lib.provider-ranking/scoring";
31
+
32
+ const store = new InMemoryProviderRankingStore();
33
+ const registry = createDefaultIngesterRegistry();
34
+
35
+ const ingester = registry.get("swe-bench");
36
+ const results = await ingester.ingest();
37
+
38
+ for (const result of results) {
39
+ await store.addBenchmarkResult(result);
40
+ }
41
+
42
+ const rankings = computeModelRankings(await store.listBenchmarkResults({}));
43
+ console.log(rankings);
44
+ ```
@@ -0,0 +1,101 @@
1
+ // src/eval/runner.ts
2
+ class EvalRunner {
3
+ adapter;
4
+ options;
5
+ constructor(adapter, options = {}) {
6
+ this.adapter = adapter;
7
+ this.options = options;
8
+ }
9
+ async run(suite, modelId, providerKey) {
10
+ const runId = `eval-${suite.key}-${modelId}-${Date.now()}`;
11
+ const startedAt = new Date;
12
+ const concurrency = this.options.maxConcurrency ?? 5;
13
+ const caseResults = await this.runCasesWithConcurrency(suite.cases, suite.defaultGrader, concurrency);
14
+ const passedCases = caseResults.filter((r) => r.passed).length;
15
+ const averageScore = caseResults.length > 0 ? caseResults.reduce((sum, r) => sum + r.score, 0) / caseResults.length : 0;
16
+ const averageLatencyMs = caseResults.length > 0 ? caseResults.reduce((sum, r) => sum + r.latencyMs, 0) / caseResults.length : 0;
17
+ return {
18
+ runId,
19
+ evalSuiteKey: suite.key,
20
+ modelId,
21
+ providerKey,
22
+ totalCases: suite.cases.length,
23
+ passedCases,
24
+ averageScore: Math.round(averageScore * 100) / 100,
25
+ averageLatencyMs: Math.round(averageLatencyMs),
26
+ caseResults,
27
+ startedAt,
28
+ completedAt: new Date
29
+ };
30
+ }
31
+ async runCasesWithConcurrency(cases, defaultGrader, concurrency) {
32
+ const results = [];
33
+ const queue = [...cases];
34
+ const workers = Array.from({ length: Math.min(concurrency, queue.length) }, async () => {
35
+ while (queue.length > 0) {
36
+ const evalCase = queue.shift();
37
+ if (!evalCase)
38
+ break;
39
+ const result = await this.runSingleCase(evalCase, defaultGrader);
40
+ results.push(result);
41
+ }
42
+ });
43
+ await Promise.all(workers);
44
+ return results;
45
+ }
46
+ async runSingleCase(evalCase, defaultGrader) {
47
+ try {
48
+ const { text, latencyMs } = await this.adapter.chat(evalCase.prompt);
49
+ const { passed, score } = this.grade(evalCase, text, defaultGrader);
50
+ return {
51
+ caseId: evalCase.id,
52
+ passed,
53
+ score,
54
+ response: text,
55
+ latencyMs
56
+ };
57
+ } catch (error) {
58
+ return {
59
+ caseId: evalCase.id,
60
+ passed: false,
61
+ score: 0,
62
+ response: "",
63
+ latencyMs: 0,
64
+ error: error instanceof Error ? error.message : String(error)
65
+ };
66
+ }
67
+ }
68
+ grade(evalCase, response, defaultGrader) {
69
+ const grader = evalCase.graderKey ?? defaultGrader;
70
+ switch (grader) {
71
+ case "exact":
72
+ if (!evalCase.expectedOutput)
73
+ return { passed: true, score: 1 };
74
+ return {
75
+ passed: response.trim() === evalCase.expectedOutput.trim(),
76
+ score: response.trim() === evalCase.expectedOutput.trim() ? 1 : 0
77
+ };
78
+ case "contains":
79
+ if (!evalCase.expectedOutput)
80
+ return { passed: true, score: 1 };
81
+ return {
82
+ passed: response.includes(evalCase.expectedOutput),
83
+ score: response.includes(evalCase.expectedOutput) ? 1 : 0
84
+ };
85
+ case "regex": {
86
+ if (!evalCase.expectedPattern)
87
+ return { passed: true, score: 1 };
88
+ const regex = new RegExp(evalCase.expectedPattern);
89
+ const matches = regex.test(response);
90
+ return { passed: matches, score: matches ? 1 : 0 };
91
+ }
92
+ case "llm-judge":
93
+ return { passed: true, score: 0.5 };
94
+ default:
95
+ return { passed: true, score: 0.5 };
96
+ }
97
+ }
98
+ }
99
+ export {
100
+ EvalRunner
101
+ };
@@ -0,0 +1,101 @@
1
+ // src/eval/runner.ts
2
+ class EvalRunner {
3
+ adapter;
4
+ options;
5
+ constructor(adapter, options = {}) {
6
+ this.adapter = adapter;
7
+ this.options = options;
8
+ }
9
+ async run(suite, modelId, providerKey) {
10
+ const runId = `eval-${suite.key}-${modelId}-${Date.now()}`;
11
+ const startedAt = new Date;
12
+ const concurrency = this.options.maxConcurrency ?? 5;
13
+ const caseResults = await this.runCasesWithConcurrency(suite.cases, suite.defaultGrader, concurrency);
14
+ const passedCases = caseResults.filter((r) => r.passed).length;
15
+ const averageScore = caseResults.length > 0 ? caseResults.reduce((sum, r) => sum + r.score, 0) / caseResults.length : 0;
16
+ const averageLatencyMs = caseResults.length > 0 ? caseResults.reduce((sum, r) => sum + r.latencyMs, 0) / caseResults.length : 0;
17
+ return {
18
+ runId,
19
+ evalSuiteKey: suite.key,
20
+ modelId,
21
+ providerKey,
22
+ totalCases: suite.cases.length,
23
+ passedCases,
24
+ averageScore: Math.round(averageScore * 100) / 100,
25
+ averageLatencyMs: Math.round(averageLatencyMs),
26
+ caseResults,
27
+ startedAt,
28
+ completedAt: new Date
29
+ };
30
+ }
31
+ async runCasesWithConcurrency(cases, defaultGrader, concurrency) {
32
+ const results = [];
33
+ const queue = [...cases];
34
+ const workers = Array.from({ length: Math.min(concurrency, queue.length) }, async () => {
35
+ while (queue.length > 0) {
36
+ const evalCase = queue.shift();
37
+ if (!evalCase)
38
+ break;
39
+ const result = await this.runSingleCase(evalCase, defaultGrader);
40
+ results.push(result);
41
+ }
42
+ });
43
+ await Promise.all(workers);
44
+ return results;
45
+ }
46
+ async runSingleCase(evalCase, defaultGrader) {
47
+ try {
48
+ const { text, latencyMs } = await this.adapter.chat(evalCase.prompt);
49
+ const { passed, score } = this.grade(evalCase, text, defaultGrader);
50
+ return {
51
+ caseId: evalCase.id,
52
+ passed,
53
+ score,
54
+ response: text,
55
+ latencyMs
56
+ };
57
+ } catch (error) {
58
+ return {
59
+ caseId: evalCase.id,
60
+ passed: false,
61
+ score: 0,
62
+ response: "",
63
+ latencyMs: 0,
64
+ error: error instanceof Error ? error.message : String(error)
65
+ };
66
+ }
67
+ }
68
+ grade(evalCase, response, defaultGrader) {
69
+ const grader = evalCase.graderKey ?? defaultGrader;
70
+ switch (grader) {
71
+ case "exact":
72
+ if (!evalCase.expectedOutput)
73
+ return { passed: true, score: 1 };
74
+ return {
75
+ passed: response.trim() === evalCase.expectedOutput.trim(),
76
+ score: response.trim() === evalCase.expectedOutput.trim() ? 1 : 0
77
+ };
78
+ case "contains":
79
+ if (!evalCase.expectedOutput)
80
+ return { passed: true, score: 1 };
81
+ return {
82
+ passed: response.includes(evalCase.expectedOutput),
83
+ score: response.includes(evalCase.expectedOutput) ? 1 : 0
84
+ };
85
+ case "regex": {
86
+ if (!evalCase.expectedPattern)
87
+ return { passed: true, score: 1 };
88
+ const regex = new RegExp(evalCase.expectedPattern);
89
+ const matches = regex.test(response);
90
+ return { passed: matches, score: matches ? 1 : 0 };
91
+ }
92
+ case "llm-judge":
93
+ return { passed: true, score: 0.5 };
94
+ default:
95
+ return { passed: true, score: 0.5 };
96
+ }
97
+ }
98
+ }
99
+ export {
100
+ EvalRunner
101
+ };
File without changes
@@ -0,0 +1,92 @@
1
+ // src/in-memory-store.ts
2
+ class InMemoryProviderRankingStore {
3
+ benchmarkResults = new Map;
4
+ modelRankings = new Map;
5
+ ingestionRuns = new Map;
6
+ async upsertBenchmarkResult(result) {
7
+ this.benchmarkResults.set(result.id, result);
8
+ }
9
+ async getBenchmarkResult(id) {
10
+ return this.benchmarkResults.get(id) ?? null;
11
+ }
12
+ async listBenchmarkResults(query) {
13
+ let results = Array.from(this.benchmarkResults.values());
14
+ if (query.source) {
15
+ results = results.filter((r) => r.source === query.source);
16
+ }
17
+ if (query.modelId) {
18
+ results = results.filter((r) => r.modelId === query.modelId);
19
+ }
20
+ if (query.dimension) {
21
+ results = results.filter((r) => r.dimension === query.dimension);
22
+ }
23
+ if (query.providerKey) {
24
+ results = results.filter((r) => r.providerKey === query.providerKey);
25
+ }
26
+ const total = results.length;
27
+ const offset = query.offset ?? 0;
28
+ const limit = query.limit ?? 50;
29
+ results = results.slice(offset, offset + limit);
30
+ const nextOffset = offset + results.length < total ? offset + results.length : undefined;
31
+ return { results, total, nextOffset };
32
+ }
33
+ async upsertModelRanking(ranking) {
34
+ this.modelRankings.set(ranking.modelId, ranking);
35
+ }
36
+ async getModelRanking(modelId) {
37
+ return this.modelRankings.get(modelId) ?? null;
38
+ }
39
+ async listModelRankings(query) {
40
+ let rankings = Array.from(this.modelRankings.values());
41
+ if (query.providerKey) {
42
+ rankings = rankings.filter((r) => r.providerKey === query.providerKey);
43
+ }
44
+ if (query.dimension) {
45
+ const dim = query.dimension;
46
+ rankings.sort((a, b) => {
47
+ const scoreA = a.dimensionScores[dim]?.score ?? -1;
48
+ const scoreB = b.dimensionScores[dim]?.score ?? -1;
49
+ return scoreB - scoreA;
50
+ });
51
+ } else {
52
+ rankings.sort((a, b) => a.rank - b.rank);
53
+ }
54
+ const total = rankings.length;
55
+ const offset = query.offset ?? 0;
56
+ const limit = query.limit ?? 50;
57
+ rankings = rankings.slice(offset, offset + limit);
58
+ const nextOffset = offset + rankings.length < total ? offset + rankings.length : undefined;
59
+ return { rankings, total, nextOffset };
60
+ }
61
+ async getModelProfile(modelId) {
62
+ const ranking = this.modelRankings.get(modelId);
63
+ if (!ranking)
64
+ return null;
65
+ const benchmarkResults = Array.from(this.benchmarkResults.values()).filter((r) => r.modelId === modelId);
66
+ return {
67
+ modelId,
68
+ providerKey: ranking.providerKey,
69
+ displayName: modelId,
70
+ contextWindow: 0,
71
+ costPerMillion: null,
72
+ capabilities: [],
73
+ ranking,
74
+ benchmarkResults
75
+ };
76
+ }
77
+ async createIngestionRun(run) {
78
+ this.ingestionRuns.set(run.id, run);
79
+ }
80
+ async updateIngestionRun(id, update) {
81
+ const existing = this.ingestionRuns.get(id);
82
+ if (existing) {
83
+ this.ingestionRuns.set(id, { ...existing, ...update });
84
+ }
85
+ }
86
+ async getIngestionRun(id) {
87
+ return this.ingestionRuns.get(id) ?? null;
88
+ }
89
+ }
90
+ export {
91
+ InMemoryProviderRankingStore
92
+ };
@@ -0,0 +1,105 @@
1
+ // src/in-memory-store.ts
2
+ class InMemoryProviderRankingStore {
3
+ benchmarkResults = new Map;
4
+ modelRankings = new Map;
5
+ ingestionRuns = new Map;
6
+ async upsertBenchmarkResult(result) {
7
+ this.benchmarkResults.set(result.id, result);
8
+ }
9
+ async getBenchmarkResult(id) {
10
+ return this.benchmarkResults.get(id) ?? null;
11
+ }
12
+ async listBenchmarkResults(query) {
13
+ let results = Array.from(this.benchmarkResults.values());
14
+ if (query.source) {
15
+ results = results.filter((r) => r.source === query.source);
16
+ }
17
+ if (query.modelId) {
18
+ results = results.filter((r) => r.modelId === query.modelId);
19
+ }
20
+ if (query.dimension) {
21
+ results = results.filter((r) => r.dimension === query.dimension);
22
+ }
23
+ if (query.providerKey) {
24
+ results = results.filter((r) => r.providerKey === query.providerKey);
25
+ }
26
+ const total = results.length;
27
+ const offset = query.offset ?? 0;
28
+ const limit = query.limit ?? 50;
29
+ results = results.slice(offset, offset + limit);
30
+ const nextOffset = offset + results.length < total ? offset + results.length : undefined;
31
+ return { results, total, nextOffset };
32
+ }
33
+ async upsertModelRanking(ranking) {
34
+ this.modelRankings.set(ranking.modelId, ranking);
35
+ }
36
+ async getModelRanking(modelId) {
37
+ return this.modelRankings.get(modelId) ?? null;
38
+ }
39
+ async listModelRankings(query) {
40
+ let rankings = Array.from(this.modelRankings.values());
41
+ if (query.providerKey) {
42
+ rankings = rankings.filter((r) => r.providerKey === query.providerKey);
43
+ }
44
+ if (query.dimension) {
45
+ const dim = query.dimension;
46
+ rankings.sort((a, b) => {
47
+ const scoreA = a.dimensionScores[dim]?.score ?? -1;
48
+ const scoreB = b.dimensionScores[dim]?.score ?? -1;
49
+ return scoreB - scoreA;
50
+ });
51
+ } else {
52
+ rankings.sort((a, b) => a.rank - b.rank);
53
+ }
54
+ const total = rankings.length;
55
+ const offset = query.offset ?? 0;
56
+ const limit = query.limit ?? 50;
57
+ rankings = rankings.slice(offset, offset + limit);
58
+ const nextOffset = offset + rankings.length < total ? offset + rankings.length : undefined;
59
+ return { rankings, total, nextOffset };
60
+ }
61
+ async getModelProfile(modelId) {
62
+ const ranking = this.modelRankings.get(modelId);
63
+ if (!ranking)
64
+ return null;
65
+ const benchmarkResults = Array.from(this.benchmarkResults.values()).filter((r) => r.modelId === modelId);
66
+ return {
67
+ modelId,
68
+ providerKey: ranking.providerKey,
69
+ displayName: modelId,
70
+ contextWindow: 0,
71
+ costPerMillion: null,
72
+ capabilities: [],
73
+ ranking,
74
+ benchmarkResults
75
+ };
76
+ }
77
+ async createIngestionRun(run) {
78
+ this.ingestionRuns.set(run.id, run);
79
+ }
80
+ async updateIngestionRun(id, update) {
81
+ const existing = this.ingestionRuns.get(id);
82
+ if (existing) {
83
+ this.ingestionRuns.set(id, { ...existing, ...update });
84
+ }
85
+ }
86
+ async getIngestionRun(id) {
87
+ return this.ingestionRuns.get(id) ?? null;
88
+ }
89
+ }
90
+
91
+ // src/types.ts
92
+ var BENCHMARK_DIMENSIONS = [
93
+ "coding",
94
+ "reasoning",
95
+ "agentic",
96
+ "cost",
97
+ "latency",
98
+ "context",
99
+ "safety",
100
+ "custom"
101
+ ];
102
+ export {
103
+ InMemoryProviderRankingStore,
104
+ BENCHMARK_DIMENSIONS
105
+ };
@@ -0,0 +1,149 @@
1
+ // src/ingesters/fetch-utils.ts
2
+ async function fetchWithRetry(url, options) {
3
+ const fetchFn = options?.fetch ?? globalThis.fetch;
4
+ const maxRetries = options?.maxRetries ?? 2;
5
+ const baseDelay = options?.baseDelayMs ?? 500;
6
+ let lastError;
7
+ for (let attempt = 0;attempt <= maxRetries; attempt++) {
8
+ try {
9
+ const response = await fetchFn(url);
10
+ if (response.ok)
11
+ return response;
12
+ if (response.status >= 500 && attempt < maxRetries) {
13
+ await sleep(baseDelay * Math.pow(2, attempt));
14
+ continue;
15
+ }
16
+ throw new Error(`Fetch failed: ${response.status} ${response.statusText} (${url})`);
17
+ } catch (error) {
18
+ lastError = error instanceof Error ? error : new Error(String(error));
19
+ if (attempt < maxRetries) {
20
+ await sleep(baseDelay * Math.pow(2, attempt));
21
+ }
22
+ }
23
+ }
24
+ throw lastError ?? new Error(`Fetch failed after ${maxRetries + 1} attempts: ${url}`);
25
+ }
26
+ function parseJsonSafe(text, label) {
27
+ try {
28
+ return JSON.parse(text);
29
+ } catch {
30
+ throw new Error(`Failed to parse JSON response from ${label}: ${text.slice(0, 200)}`);
31
+ }
32
+ }
33
+ function sleep(ms) {
34
+ return new Promise((resolve) => setTimeout(resolve, ms));
35
+ }
36
+
37
+ // src/ingesters/artificial-analysis.ts
38
+ var DEFAULT_AA_URL = "https://artificialanalysis.ai/api/models";
39
+ var artificialAnalysisIngester = {
40
+ source: "artificial-analysis",
41
+ displayName: "Artificial Analysis",
42
+ description: "Quality, speed, and cost benchmarks from Artificial Analysis.",
43
+ async ingest(options) {
44
+ const url = options?.sourceUrl ?? DEFAULT_AA_URL;
45
+ const response = await fetchWithRetry(url, { fetch: options?.fetch });
46
+ const text = await response.text();
47
+ const data = parseJsonSafe(text, "Artificial Analysis");
48
+ const now = new Date;
49
+ const results = [];
50
+ const dims = options?.dimensions ? new Set(options.dimensions) : null;
51
+ let entries = data.filter((e) => e.model_id && e.provider);
52
+ if (options?.modelFilter?.length) {
53
+ const filterSet = new Set(options.modelFilter);
54
+ entries = entries.filter((e) => filterSet.has(e.model_id));
55
+ }
56
+ for (const entry of entries) {
57
+ const baseId = `artificial-analysis:${entry.model_id}`;
58
+ if (entry.quality_score != null && (!dims || dims.has("reasoning"))) {
59
+ results.push({
60
+ id: `${baseId}:reasoning`,
61
+ modelId: entry.model_id,
62
+ providerKey: entry.provider.toLowerCase(),
63
+ source: "artificial-analysis",
64
+ dimension: "reasoning",
65
+ score: Math.max(0, Math.min(100, entry.quality_score)),
66
+ rawScore: entry.quality_score,
67
+ metadata: { model_name: entry.model_name },
68
+ measuredAt: now,
69
+ ingestedAt: now
70
+ });
71
+ }
72
+ if ((entry.tokens_per_second != null || entry.ttft_ms != null) && (!dims || dims.has("latency"))) {
73
+ const latencyScore = computeLatencyScore(entry.tokens_per_second, entry.ttft_ms);
74
+ results.push({
75
+ id: `${baseId}:latency`,
76
+ modelId: entry.model_id,
77
+ providerKey: entry.provider.toLowerCase(),
78
+ source: "artificial-analysis",
79
+ dimension: "latency",
80
+ score: latencyScore,
81
+ rawScore: {
82
+ tokens_per_second: entry.tokens_per_second,
83
+ ttft_ms: entry.ttft_ms
84
+ },
85
+ metadata: { model_name: entry.model_name },
86
+ measuredAt: now,
87
+ ingestedAt: now
88
+ });
89
+ }
90
+ if ((entry.price_per_million_input_tokens != null || entry.price_per_million_output_tokens != null) && (!dims || dims.has("cost"))) {
91
+ const costScore = computeCostScore(entry.price_per_million_input_tokens, entry.price_per_million_output_tokens);
92
+ results.push({
93
+ id: `${baseId}:cost`,
94
+ modelId: entry.model_id,
95
+ providerKey: entry.provider.toLowerCase(),
96
+ source: "artificial-analysis",
97
+ dimension: "cost",
98
+ score: costScore,
99
+ rawScore: {
100
+ input: entry.price_per_million_input_tokens,
101
+ output: entry.price_per_million_output_tokens
102
+ },
103
+ metadata: { model_name: entry.model_name },
104
+ measuredAt: now,
105
+ ingestedAt: now
106
+ });
107
+ }
108
+ if (entry.context_window != null && (!dims || dims.has("context"))) {
109
+ const contextScore = computeContextScore(entry.context_window);
110
+ results.push({
111
+ id: `${baseId}:context`,
112
+ modelId: entry.model_id,
113
+ providerKey: entry.provider.toLowerCase(),
114
+ source: "artificial-analysis",
115
+ dimension: "context",
116
+ score: contextScore,
117
+ rawScore: entry.context_window,
118
+ metadata: { model_name: entry.model_name },
119
+ measuredAt: now,
120
+ ingestedAt: now
121
+ });
122
+ }
123
+ }
124
+ return options?.maxResults ? results.slice(0, options.maxResults) : results;
125
+ }
126
+ };
127
+ function computeLatencyScore(tokensPerSec, ttftMs) {
128
+ let score = 50;
129
+ if (tokensPerSec != null) {
130
+ score = Math.min(100, tokensPerSec / 200 * 100);
131
+ }
132
+ if (ttftMs != null) {
133
+ const ttftPenalty = Math.max(0, Math.min(30, (ttftMs - 200) / 100 * 10));
134
+ score = Math.max(0, score - ttftPenalty);
135
+ }
136
+ return Math.round(score * 100) / 100;
137
+ }
138
+ function computeCostScore(inputCost, outputCost) {
139
+ const avgCost = ((inputCost ?? 0) + (outputCost ?? 0)) / 2;
140
+ const score = Math.max(0, 100 - avgCost / 30 * 100);
141
+ return Math.round(score * 100) / 100;
142
+ }
143
+ function computeContextScore(contextWindow) {
144
+ const score = Math.min(100, contextWindow / 1e6 * 100);
145
+ return Math.round(score * 100) / 100;
146
+ }
147
+ export {
148
+ artificialAnalysisIngester
149
+ };