npm - @contractspec/lib.provider-ranking - Versions diffs - 0.1.1 - Mend

@contractspec/lib.provider-ranking 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/README.md +44 -0
package/dist/browser/eval/index.js +101 -0
package/dist/browser/eval/runner.js +101 -0
package/dist/browser/eval/types.js +0 -0
package/dist/browser/in-memory-store.js +92 -0
package/dist/browser/index.js +105 -0
package/dist/browser/ingesters/artificial-analysis.js +149 -0
package/dist/browser/ingesters/chatbot-arena.js +142 -0
package/dist/browser/ingesters/fetch-utils.js +39 -0
package/dist/browser/ingesters/index.js +418 -0
package/dist/browser/ingesters/open-llm-leaderboard.js +108 -0
package/dist/browser/ingesters/registry.js +412 -0
package/dist/browser/ingesters/swe-bench.js +105 -0
package/dist/browser/ingesters/types.js +0 -0
package/dist/browser/scoring/composite-scorer.js +122 -0
package/dist/browser/scoring/dimension-weights.js +39 -0
package/dist/browser/scoring/index.js +161 -0
package/dist/browser/scoring/normalizer.js +37 -0
package/dist/browser/store.js +0 -0
package/dist/browser/types.js +14 -0
package/dist/eval/index.d.ts +2 -0
package/dist/eval/index.js +102 -0
package/dist/eval/runner.d.ts +18 -0
package/dist/eval/runner.js +102 -0
package/dist/eval/types.d.ts +51 -0
package/dist/eval/types.js +1 -0
package/dist/in-memory-store.d.ts +17 -0
package/dist/in-memory-store.js +93 -0
package/dist/index.d.ts +4 -0
package/dist/index.js +106 -0
package/dist/ingesters/artificial-analysis.d.ts +8 -0
package/dist/ingesters/artificial-analysis.js +150 -0
package/dist/ingesters/chatbot-arena.d.ts +8 -0
package/dist/ingesters/chatbot-arena.js +143 -0
package/dist/ingesters/fetch-utils.d.ts +11 -0
package/dist/ingesters/fetch-utils.js +40 -0
package/dist/ingesters/index.d.ts +7 -0
package/dist/ingesters/index.js +419 -0
package/dist/ingesters/open-llm-leaderboard.d.ts +8 -0
package/dist/ingesters/open-llm-leaderboard.js +109 -0
package/dist/ingesters/registry.d.ts +17 -0
package/dist/ingesters/registry.js +413 -0
package/dist/ingesters/swe-bench.d.ts +8 -0
package/dist/ingesters/swe-bench.js +106 -0
package/dist/ingesters/types.d.ts +31 -0
package/dist/ingesters/types.js +1 -0
package/dist/node/eval/index.js +101 -0
package/dist/node/eval/runner.js +101 -0
package/dist/node/eval/types.js +0 -0
package/dist/node/in-memory-store.js +92 -0
package/dist/node/index.js +105 -0
package/dist/node/ingesters/artificial-analysis.js +149 -0
package/dist/node/ingesters/chatbot-arena.js +142 -0
package/dist/node/ingesters/fetch-utils.js +39 -0
package/dist/node/ingesters/index.js +418 -0
package/dist/node/ingesters/open-llm-leaderboard.js +108 -0
package/dist/node/ingesters/registry.js +412 -0
package/dist/node/ingesters/swe-bench.js +105 -0
package/dist/node/ingesters/types.js +0 -0
package/dist/node/scoring/composite-scorer.js +122 -0
package/dist/node/scoring/dimension-weights.js +39 -0
package/dist/node/scoring/index.js +161 -0
package/dist/node/scoring/normalizer.js +37 -0
package/dist/node/store.js +0 -0
package/dist/node/types.js +14 -0
package/dist/scoring/composite-scorer.d.ts +10 -0
package/dist/scoring/composite-scorer.js +123 -0
package/dist/scoring/dimension-weights.d.ts +8 -0
package/dist/scoring/dimension-weights.js +40 -0
package/dist/scoring/index.d.ts +3 -0
package/dist/scoring/index.js +162 -0
package/dist/scoring/normalizer.d.ts +20 -0
package/dist/scoring/normalizer.js +38 -0
package/dist/store.d.ts +19 -0
package/dist/store.js +1 -0
package/dist/types.d.ts +100 -0
package/dist/types.js +15 -0
package/package.json +362 -0

package/dist/node/scoring/index.js ADDED Viewed

@@ -0,0 +1,161 @@
+// src/scoring/normalizer.ts
+var SOURCE_NORMALIZATION = {
+  "chatbot-arena": { min: 800, max: 1400, invertScale: false },
+  "swe-bench": { min: 0, max: 100, invertScale: false },
+  "human-eval": { min: 0, max: 100, invertScale: false },
+  mmlu: { min: 0, max: 100, invertScale: false },
+  gpqa: { min: 0, max: 100, invertScale: false },
+  arc: { min: 0, max: 100, invertScale: false },
+  truthfulqa: { min: 0, max: 100, invertScale: false },
+  "tau-bench": { min: 0, max: 100, invertScale: false },
+  "artificial-analysis": { min: 0, max: 100, invertScale: false }
+};
+function normalizeScore(rawScore, source, configOverride) {
+  const config = configOverride ?? SOURCE_NORMALIZATION[source];
+  if (!config) {
+    return Math.max(0, Math.min(100, rawScore));
+  }
+  const { min, max, invertScale } = config;
+  const range = max - min;
+  if (range === 0)
+    return 50;
+  let normalized = (rawScore - min) / range * 100;
+  if (invertScale) {
+    normalized = 100 - normalized;
+  }
+  return Math.max(0, Math.min(100, normalized));
+}
+function normalizeBenchmarkResults(results) {
+  return results.map((result) => ({
+    ...result,
+    score: normalizeScore(typeof result.rawScore === "number" ? result.rawScore : result.score, result.source)
+  }));
+}
+// src/scoring/dimension-weights.ts
+var DEFAULT_DIMENSION_WEIGHTS = [
+  { dimension: "coding", weight: 20 },
+  { dimension: "reasoning", weight: 20 },
+  { dimension: "agentic", weight: 15 },
+  { dimension: "cost", weight: 10 },
+  { dimension: "latency", weight: 10 },
+  { dimension: "context", weight: 10 },
+  { dimension: "safety", weight: 10 },
+  { dimension: "custom", weight: 5 }
+];
+function getWeightMap(overrides) {
+  const map = new Map;
+  for (const w of DEFAULT_DIMENSION_WEIGHTS) {
+    map.set(w.dimension, w.weight);
+  }
+  if (overrides) {
+    for (const w of overrides) {
+      map.set(w.dimension, w.weight);
+    }
+  }
+  return map;
+}
+function normalizeWeights(weights, activeDimensions) {
+  const totalWeight = activeDimensions.reduce((sum, dim) => sum + (weights.get(dim) ?? 0), 0);
+  if (totalWeight === 0)
+    return new Map;
+  const normalized = new Map;
+  for (const dim of activeDimensions) {
+    const raw = weights.get(dim) ?? 0;
+    normalized.set(dim, raw / totalWeight);
+  }
+  return normalized;
+}
+// src/scoring/composite-scorer.ts
+function computeModelRankings(results, options, existingRankings) {
+  const byModel = groupByModel(results);
+  const weights = getWeightMap(options?.weightOverrides);
+  const unsorted = [];
+  for (const [modelId, modelResults] of byModel) {
+    const providerKey = modelResults[0]?.providerKey ?? "unknown";
+    const dimensionScores = computeDimensionScores(modelResults);
+    const activeDimensions = Object.keys(dimensionScores);
+    const normalizedWeights = normalizeWeights(weights, activeDimensions);
+    let compositeScore = 0;
+    for (const dim of activeDimensions) {
+      const dimScore = dimensionScores[dim];
+      const weight = normalizedWeights.get(dim) ?? 0;
+      if (dimScore) {
+        compositeScore += dimScore.score * weight;
+      }
+    }
+    const previousRank = existingRankings?.get(modelId)?.rank ?? null;
+    unsorted.push({
+      modelId,
+      providerKey,
+      compositeScore: Math.round(compositeScore * 100) / 100,
+      dimensionScores,
+      rank: 0,
+      previousRank,
+      updatedAt: new Date
+    });
+  }
+  unsorted.sort((a, b) => b.compositeScore - a.compositeScore);
+  return unsorted.map((ranking, index) => ({
+    ...ranking,
+    rank: index + 1
+  }));
+}
+function groupByModel(results) {
+  const map = new Map;
+  for (const result of results) {
+    const existing = map.get(result.modelId);
+    if (existing) {
+      existing.push(result);
+    } else {
+      map.set(result.modelId, [result]);
+    }
+  }
+  return map;
+}
+function computeDimensionScores(results) {
+  const byDimension = new Map;
+  for (const result of results) {
+    const existing = byDimension.get(result.dimension);
+    if (existing) {
+      existing.push(result);
+    } else {
+      byDimension.set(result.dimension, [result]);
+    }
+  }
+  const scores = {};
+  for (const [dimension, dimResults] of byDimension) {
+    const avgScore = dimResults.reduce((sum, r) => sum + r.score, 0) / dimResults.length;
+    const sources = [...new Set(dimResults.map((r) => r.source))];
+    const recencyFactor = computeRecencyFactor(dimResults);
+    const sourceDiversity = Math.min(sources.length / 3, 1);
+    const confidence = Math.round((recencyFactor * 0.5 + sourceDiversity * 0.5) * 100) / 100;
+    scores[dimension] = {
+      score: Math.round(avgScore * 100) / 100,
+      confidence,
+      sources
+    };
+  }
+  return scores;
+}
+function computeRecencyFactor(results) {
+  if (results.length === 0)
+    return 0;
+  const now = Date.now();
+  const mostRecent = Math.max(...results.map((r) => r.measuredAt.getTime()));
+  const daysSinceMostRecent = (now - mostRecent) / (1000 * 60 * 60 * 24);
+  if (daysSinceMostRecent <= 30)
+    return 1;
+  if (daysSinceMostRecent >= 180)
+    return 0.3;
+  return 1 - (daysSinceMostRecent - 30) / (180 - 30) * 0.7;
+}
+export {
+  normalizeWeights,
+  normalizeScore,
+  normalizeBenchmarkResults,
+  getWeightMap,
+  computeModelRankings,
+  DEFAULT_DIMENSION_WEIGHTS
+};

package/dist/node/scoring/normalizer.js ADDED Viewed

@@ -0,0 +1,37 @@
+// src/scoring/normalizer.ts
+var SOURCE_NORMALIZATION = {
+  "chatbot-arena": { min: 800, max: 1400, invertScale: false },
+  "swe-bench": { min: 0, max: 100, invertScale: false },
+  "human-eval": { min: 0, max: 100, invertScale: false },
+  mmlu: { min: 0, max: 100, invertScale: false },
+  gpqa: { min: 0, max: 100, invertScale: false },
+  arc: { min: 0, max: 100, invertScale: false },
+  truthfulqa: { min: 0, max: 100, invertScale: false },
+  "tau-bench": { min: 0, max: 100, invertScale: false },
+  "artificial-analysis": { min: 0, max: 100, invertScale: false }
+};
+function normalizeScore(rawScore, source, configOverride) {
+  const config = configOverride ?? SOURCE_NORMALIZATION[source];
+  if (!config) {
+    return Math.max(0, Math.min(100, rawScore));
+  }
+  const { min, max, invertScale } = config;
+  const range = max - min;
+  if (range === 0)
+    return 50;
+  let normalized = (rawScore - min) / range * 100;
+  if (invertScale) {
+    normalized = 100 - normalized;
+  }
+  return Math.max(0, Math.min(100, normalized));
+}
+function normalizeBenchmarkResults(results) {
+  return results.map((result) => ({
+    ...result,
+    score: normalizeScore(typeof result.rawScore === "number" ? result.rawScore : result.score, result.source)
+  }));
+}
+export {
+  normalizeScore,
+  normalizeBenchmarkResults
+};

package/dist/node/store.js ADDED Viewed

File without changes

package/dist/node/types.js ADDED Viewed

@@ -0,0 +1,14 @@
+// src/types.ts
+var BENCHMARK_DIMENSIONS = [
+  "coding",
+  "reasoning",
+  "agentic",
+  "cost",
+  "latency",
+  "context",
+  "safety",
+  "custom"
+];
+export {
+  BENCHMARK_DIMENSIONS
+};

package/dist/scoring/composite-scorer.d.ts ADDED Viewed

@@ -0,0 +1,10 @@
+import type { BenchmarkResult, DimensionWeightConfig, ModelRanking } from '../types';
+interface ScorerOptions {
+    weightOverrides?: DimensionWeightConfig[];
+}
+/**
+ * Groups benchmark results by model, computes per-dimension scores,
+ * and produces a weighted composite ranking.
+ */
+export declare function computeModelRankings(results: BenchmarkResult[], options?: ScorerOptions, existingRankings?: Map<string, ModelRanking>): ModelRanking[];
+export {};

package/dist/scoring/composite-scorer.js ADDED Viewed

@@ -0,0 +1,123 @@
+// @bun
+// src/scoring/dimension-weights.ts
+var DEFAULT_DIMENSION_WEIGHTS = [
+  { dimension: "coding", weight: 20 },
+  { dimension: "reasoning", weight: 20 },
+  { dimension: "agentic", weight: 15 },
+  { dimension: "cost", weight: 10 },
+  { dimension: "latency", weight: 10 },
+  { dimension: "context", weight: 10 },
+  { dimension: "safety", weight: 10 },
+  { dimension: "custom", weight: 5 }
+];
+function getWeightMap(overrides) {
+  const map = new Map;
+  for (const w of DEFAULT_DIMENSION_WEIGHTS) {
+    map.set(w.dimension, w.weight);
+  }
+  if (overrides) {
+    for (const w of overrides) {
+      map.set(w.dimension, w.weight);
+    }
+  }
+  return map;
+}
+function normalizeWeights(weights, activeDimensions) {
+  const totalWeight = activeDimensions.reduce((sum, dim) => sum + (weights.get(dim) ?? 0), 0);
+  if (totalWeight === 0)
+    return new Map;
+  const normalized = new Map;
+  for (const dim of activeDimensions) {
+    const raw = weights.get(dim) ?? 0;
+    normalized.set(dim, raw / totalWeight);
+  }
+  return normalized;
+}
+// src/scoring/composite-scorer.ts
+function computeModelRankings(results, options, existingRankings) {
+  const byModel = groupByModel(results);
+  const weights = getWeightMap(options?.weightOverrides);
+  const unsorted = [];
+  for (const [modelId, modelResults] of byModel) {
+    const providerKey = modelResults[0]?.providerKey ?? "unknown";
+    const dimensionScores = computeDimensionScores(modelResults);
+    const activeDimensions = Object.keys(dimensionScores);
+    const normalizedWeights = normalizeWeights(weights, activeDimensions);
+    let compositeScore = 0;
+    for (const dim of activeDimensions) {
+      const dimScore = dimensionScores[dim];
+      const weight = normalizedWeights.get(dim) ?? 0;
+      if (dimScore) {
+        compositeScore += dimScore.score * weight;
+      }
+    }
+    const previousRank = existingRankings?.get(modelId)?.rank ?? null;
+    unsorted.push({
+      modelId,
+      providerKey,
+      compositeScore: Math.round(compositeScore * 100) / 100,
+      dimensionScores,
+      rank: 0,
+      previousRank,
+      updatedAt: new Date
+    });
+  }
+  unsorted.sort((a, b) => b.compositeScore - a.compositeScore);
+  return unsorted.map((ranking, index) => ({
+    ...ranking,
+    rank: index + 1
+  }));
+}
+function groupByModel(results) {
+  const map = new Map;
+  for (const result of results) {
+    const existing = map.get(result.modelId);
+    if (existing) {
+      existing.push(result);
+    } else {
+      map.set(result.modelId, [result]);
+    }
+  }
+  return map;
+}
+function computeDimensionScores(results) {
+  const byDimension = new Map;
+  for (const result of results) {
+    const existing = byDimension.get(result.dimension);
+    if (existing) {
+      existing.push(result);
+    } else {
+      byDimension.set(result.dimension, [result]);
+    }
+  }
+  const scores = {};
+  for (const [dimension, dimResults] of byDimension) {
+    const avgScore = dimResults.reduce((sum, r) => sum + r.score, 0) / dimResults.length;
+    const sources = [...new Set(dimResults.map((r) => r.source))];
+    const recencyFactor = computeRecencyFactor(dimResults);
+    const sourceDiversity = Math.min(sources.length / 3, 1);
+    const confidence = Math.round((recencyFactor * 0.5 + sourceDiversity * 0.5) * 100) / 100;
+    scores[dimension] = {
+      score: Math.round(avgScore * 100) / 100,
+      confidence,
+      sources
+    };
+  }
+  return scores;
+}
+function computeRecencyFactor(results) {
+  if (results.length === 0)
+    return 0;
+  const now = Date.now();
+  const mostRecent = Math.max(...results.map((r) => r.measuredAt.getTime()));
+  const daysSinceMostRecent = (now - mostRecent) / (1000 * 60 * 60 * 24);
+  if (daysSinceMostRecent <= 30)
+    return 1;
+  if (daysSinceMostRecent >= 180)
+    return 0.3;
+  return 1 - (daysSinceMostRecent - 30) / (180 - 30) * 0.7;
+}
+export {
+  computeModelRankings
+};

package/dist/scoring/dimension-weights.d.ts ADDED Viewed

@@ -0,0 +1,8 @@
+import type { BenchmarkDimension, DimensionWeightConfig } from '../types';
+/**
+ * Default weights for composite score calculation.
+ * Weights are normalized to sum to 1.0 at scoring time.
+ */
+export declare const DEFAULT_DIMENSION_WEIGHTS: DimensionWeightConfig[];
+export declare function getWeightMap(overrides?: DimensionWeightConfig[]): Map<BenchmarkDimension, number>;
+export declare function normalizeWeights(weights: Map<BenchmarkDimension, number>, activeDimensions: BenchmarkDimension[]): Map<BenchmarkDimension, number>;

package/dist/scoring/dimension-weights.js ADDED Viewed

@@ -0,0 +1,40 @@
+// @bun
+// src/scoring/dimension-weights.ts
+var DEFAULT_DIMENSION_WEIGHTS = [
+  { dimension: "coding", weight: 20 },
+  { dimension: "reasoning", weight: 20 },
+  { dimension: "agentic", weight: 15 },
+  { dimension: "cost", weight: 10 },
+  { dimension: "latency", weight: 10 },
+  { dimension: "context", weight: 10 },
+  { dimension: "safety", weight: 10 },
+  { dimension: "custom", weight: 5 }
+];
+function getWeightMap(overrides) {
+  const map = new Map;
+  for (const w of DEFAULT_DIMENSION_WEIGHTS) {
+    map.set(w.dimension, w.weight);
+  }
+  if (overrides) {
+    for (const w of overrides) {
+      map.set(w.dimension, w.weight);
+    }
+  }
+  return map;
+}
+function normalizeWeights(weights, activeDimensions) {
+  const totalWeight = activeDimensions.reduce((sum, dim) => sum + (weights.get(dim) ?? 0), 0);
+  if (totalWeight === 0)
+    return new Map;
+  const normalized = new Map;
+  for (const dim of activeDimensions) {
+    const raw = weights.get(dim) ?? 0;
+    normalized.set(dim, raw / totalWeight);
+  }
+  return normalized;
+}
+export {
+  normalizeWeights,
+  getWeightMap,
+  DEFAULT_DIMENSION_WEIGHTS
+};

package/dist/scoring/index.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+export { computeModelRankings } from './composite-scorer';
+export { DEFAULT_DIMENSION_WEIGHTS, getWeightMap, normalizeWeights, } from './dimension-weights';
+export { normalizeScore, normalizeBenchmarkResults } from './normalizer';

package/dist/scoring/index.js ADDED Viewed

@@ -0,0 +1,162 @@
+// @bun
+// src/scoring/normalizer.ts
+var SOURCE_NORMALIZATION = {
+  "chatbot-arena": { min: 800, max: 1400, invertScale: false },
+  "swe-bench": { min: 0, max: 100, invertScale: false },
+  "human-eval": { min: 0, max: 100, invertScale: false },
+  mmlu: { min: 0, max: 100, invertScale: false },
+  gpqa: { min: 0, max: 100, invertScale: false },
+  arc: { min: 0, max: 100, invertScale: false },
+  truthfulqa: { min: 0, max: 100, invertScale: false },
+  "tau-bench": { min: 0, max: 100, invertScale: false },
+  "artificial-analysis": { min: 0, max: 100, invertScale: false }
+};
+function normalizeScore(rawScore, source, configOverride) {
+  const config = configOverride ?? SOURCE_NORMALIZATION[source];
+  if (!config) {
+    return Math.max(0, Math.min(100, rawScore));
+  }
+  const { min, max, invertScale } = config;
+  const range = max - min;
+  if (range === 0)
+    return 50;
+  let normalized = (rawScore - min) / range * 100;
+  if (invertScale) {
+    normalized = 100 - normalized;
+  }
+  return Math.max(0, Math.min(100, normalized));
+}
+function normalizeBenchmarkResults(results) {
+  return results.map((result) => ({
+    ...result,
+    score: normalizeScore(typeof result.rawScore === "number" ? result.rawScore : result.score, result.source)
+  }));
+}
+// src/scoring/dimension-weights.ts
+var DEFAULT_DIMENSION_WEIGHTS = [
+  { dimension: "coding", weight: 20 },
+  { dimension: "reasoning", weight: 20 },
+  { dimension: "agentic", weight: 15 },
+  { dimension: "cost", weight: 10 },
+  { dimension: "latency", weight: 10 },
+  { dimension: "context", weight: 10 },
+  { dimension: "safety", weight: 10 },
+  { dimension: "custom", weight: 5 }
+];
+function getWeightMap(overrides) {
+  const map = new Map;
+  for (const w of DEFAULT_DIMENSION_WEIGHTS) {
+    map.set(w.dimension, w.weight);
+  }
+  if (overrides) {
+    for (const w of overrides) {
+      map.set(w.dimension, w.weight);
+    }
+  }
+  return map;
+}
+function normalizeWeights(weights, activeDimensions) {
+  const totalWeight = activeDimensions.reduce((sum, dim) => sum + (weights.get(dim) ?? 0), 0);
+  if (totalWeight === 0)
+    return new Map;
+  const normalized = new Map;
+  for (const dim of activeDimensions) {
+    const raw = weights.get(dim) ?? 0;
+    normalized.set(dim, raw / totalWeight);
+  }
+  return normalized;
+}
+// src/scoring/composite-scorer.ts
+function computeModelRankings(results, options, existingRankings) {
+  const byModel = groupByModel(results);
+  const weights = getWeightMap(options?.weightOverrides);
+  const unsorted = [];
+  for (const [modelId, modelResults] of byModel) {
+    const providerKey = modelResults[0]?.providerKey ?? "unknown";
+    const dimensionScores = computeDimensionScores(modelResults);
+    const activeDimensions = Object.keys(dimensionScores);
+    const normalizedWeights = normalizeWeights(weights, activeDimensions);
+    let compositeScore = 0;
+    for (const dim of activeDimensions) {
+      const dimScore = dimensionScores[dim];
+      const weight = normalizedWeights.get(dim) ?? 0;
+      if (dimScore) {
+        compositeScore += dimScore.score * weight;
+      }
+    }
+    const previousRank = existingRankings?.get(modelId)?.rank ?? null;
+    unsorted.push({
+      modelId,
+      providerKey,
+      compositeScore: Math.round(compositeScore * 100) / 100,
+      dimensionScores,
+      rank: 0,
+      previousRank,
+      updatedAt: new Date
+    });
+  }
+  unsorted.sort((a, b) => b.compositeScore - a.compositeScore);
+  return unsorted.map((ranking, index) => ({
+    ...ranking,
+    rank: index + 1
+  }));
+}
+function groupByModel(results) {
+  const map = new Map;
+  for (const result of results) {
+    const existing = map.get(result.modelId);
+    if (existing) {
+      existing.push(result);
+    } else {
+      map.set(result.modelId, [result]);
+    }
+  }
+  return map;
+}
+function computeDimensionScores(results) {
+  const byDimension = new Map;
+  for (const result of results) {
+    const existing = byDimension.get(result.dimension);
+    if (existing) {
+      existing.push(result);
+    } else {
+      byDimension.set(result.dimension, [result]);
+    }
+  }
+  const scores = {};
+  for (const [dimension, dimResults] of byDimension) {
+    const avgScore = dimResults.reduce((sum, r) => sum + r.score, 0) / dimResults.length;
+    const sources = [...new Set(dimResults.map((r) => r.source))];
+    const recencyFactor = computeRecencyFactor(dimResults);
+    const sourceDiversity = Math.min(sources.length / 3, 1);
+    const confidence = Math.round((recencyFactor * 0.5 + sourceDiversity * 0.5) * 100) / 100;
+    scores[dimension] = {
+      score: Math.round(avgScore * 100) / 100,
+      confidence,
+      sources
+    };
+  }
+  return scores;
+}
+function computeRecencyFactor(results) {
+  if (results.length === 0)
+    return 0;
+  const now = Date.now();
+  const mostRecent = Math.max(...results.map((r) => r.measuredAt.getTime()));
+  const daysSinceMostRecent = (now - mostRecent) / (1000 * 60 * 60 * 24);
+  if (daysSinceMostRecent <= 30)
+    return 1;
+  if (daysSinceMostRecent >= 180)
+    return 0.3;
+  return 1 - (daysSinceMostRecent - 30) / (180 - 30) * 0.7;
+}
+export {
+  normalizeWeights,
+  normalizeScore,
+  normalizeBenchmarkResults,
+  getWeightMap,
+  computeModelRankings,
+  DEFAULT_DIMENSION_WEIGHTS
+};

package/dist/scoring/normalizer.d.ts ADDED Viewed

@@ -0,0 +1,20 @@
+import type { BenchmarkResult, BenchmarkSource } from '../types';
+interface NormalizationConfig {
+    /** Minimum possible raw score for the source. */
+    min: number;
+    /** Maximum possible raw score for the source. */
+    max: number;
+    /** If true, lower raw scores are better (e.g. latency, cost). */
+    invertScale: boolean;
+}
+/**
+ * Normalize a raw score to the 0-100 scale.
+ * Falls back to clamping if no source config is known.
+ */
+export declare function normalizeScore(rawScore: number, source: BenchmarkSource, configOverride?: NormalizationConfig): number;
+/**
+ * Normalize an array of benchmark results in place,
+ * setting the `score` field from `rawScore`.
+ */
+export declare function normalizeBenchmarkResults(results: BenchmarkResult[]): BenchmarkResult[];
+export {};

package/dist/scoring/normalizer.js ADDED Viewed

@@ -0,0 +1,38 @@
+// @bun
+// src/scoring/normalizer.ts
+var SOURCE_NORMALIZATION = {
+  "chatbot-arena": { min: 800, max: 1400, invertScale: false },
+  "swe-bench": { min: 0, max: 100, invertScale: false },
+  "human-eval": { min: 0, max: 100, invertScale: false },
+  mmlu: { min: 0, max: 100, invertScale: false },
+  gpqa: { min: 0, max: 100, invertScale: false },
+  arc: { min: 0, max: 100, invertScale: false },
+  truthfulqa: { min: 0, max: 100, invertScale: false },
+  "tau-bench": { min: 0, max: 100, invertScale: false },
+  "artificial-analysis": { min: 0, max: 100, invertScale: false }
+};
+function normalizeScore(rawScore, source, configOverride) {
+  const config = configOverride ?? SOURCE_NORMALIZATION[source];
+  if (!config) {
+    return Math.max(0, Math.min(100, rawScore));
+  }
+  const { min, max, invertScale } = config;
+  const range = max - min;
+  if (range === 0)
+    return 50;
+  let normalized = (rawScore - min) / range * 100;
+  if (invertScale) {
+    normalized = 100 - normalized;
+  }
+  return Math.max(0, Math.min(100, normalized));
+}
+function normalizeBenchmarkResults(results) {
+  return results.map((result) => ({
+    ...result,
+    score: normalizeScore(typeof result.rawScore === "number" ? result.rawScore : result.score, result.source)
+  }));
+}
+export {
+  normalizeScore,
+  normalizeBenchmarkResults
+};

package/dist/store.d.ts ADDED Viewed

@@ -0,0 +1,19 @@
+import type { BenchmarkResult, BenchmarkResultListResult, BenchmarkResultQuery, IngestionRun, ModelProfile, ModelRanking, RankingListResult, RankingQuery } from './types';
+/**
+ * Storage interface for the provider ranking system.
+ *
+ * Lib provides an in-memory implementation; the module layer
+ * adds a Postgres-backed implementation.
+ */
+export interface ProviderRankingStore {
+    upsertBenchmarkResult(result: BenchmarkResult): Promise<void>;
+    getBenchmarkResult(id: string): Promise<BenchmarkResult | null>;
+    listBenchmarkResults(query: BenchmarkResultQuery): Promise<BenchmarkResultListResult>;
+    upsertModelRanking(ranking: ModelRanking): Promise<void>;
+    getModelRanking(modelId: string): Promise<ModelRanking | null>;
+    listModelRankings(query: RankingQuery): Promise<RankingListResult>;
+    getModelProfile(modelId: string): Promise<ModelProfile | null>;
+    createIngestionRun(run: IngestionRun): Promise<void>;
+    updateIngestionRun(id: string, update: Partial<IngestionRun>): Promise<void>;
+    getIngestionRun(id: string): Promise<IngestionRun | null>;
+}

package/dist/store.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ // @bun