npm - @contractspec/lib.provider-ranking - Versions diffs - 0.1.1 - Mend

@contractspec/lib.provider-ranking 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/README.md +44 -0
package/dist/browser/eval/index.js +101 -0
package/dist/browser/eval/runner.js +101 -0
package/dist/browser/eval/types.js +0 -0
package/dist/browser/in-memory-store.js +92 -0
package/dist/browser/index.js +105 -0
package/dist/browser/ingesters/artificial-analysis.js +149 -0
package/dist/browser/ingesters/chatbot-arena.js +142 -0
package/dist/browser/ingesters/fetch-utils.js +39 -0
package/dist/browser/ingesters/index.js +418 -0
package/dist/browser/ingesters/open-llm-leaderboard.js +108 -0
package/dist/browser/ingesters/registry.js +412 -0
package/dist/browser/ingesters/swe-bench.js +105 -0
package/dist/browser/ingesters/types.js +0 -0
package/dist/browser/scoring/composite-scorer.js +122 -0
package/dist/browser/scoring/dimension-weights.js +39 -0
package/dist/browser/scoring/index.js +161 -0
package/dist/browser/scoring/normalizer.js +37 -0
package/dist/browser/store.js +0 -0
package/dist/browser/types.js +14 -0
package/dist/eval/index.d.ts +2 -0
package/dist/eval/index.js +102 -0
package/dist/eval/runner.d.ts +18 -0
package/dist/eval/runner.js +102 -0
package/dist/eval/types.d.ts +51 -0
package/dist/eval/types.js +1 -0
package/dist/in-memory-store.d.ts +17 -0
package/dist/in-memory-store.js +93 -0
package/dist/index.d.ts +4 -0
package/dist/index.js +106 -0
package/dist/ingesters/artificial-analysis.d.ts +8 -0
package/dist/ingesters/artificial-analysis.js +150 -0
package/dist/ingesters/chatbot-arena.d.ts +8 -0
package/dist/ingesters/chatbot-arena.js +143 -0
package/dist/ingesters/fetch-utils.d.ts +11 -0
package/dist/ingesters/fetch-utils.js +40 -0
package/dist/ingesters/index.d.ts +7 -0
package/dist/ingesters/index.js +419 -0
package/dist/ingesters/open-llm-leaderboard.d.ts +8 -0
package/dist/ingesters/open-llm-leaderboard.js +109 -0
package/dist/ingesters/registry.d.ts +17 -0
package/dist/ingesters/registry.js +413 -0
package/dist/ingesters/swe-bench.d.ts +8 -0
package/dist/ingesters/swe-bench.js +106 -0
package/dist/ingesters/types.d.ts +31 -0
package/dist/ingesters/types.js +1 -0
package/dist/node/eval/index.js +101 -0
package/dist/node/eval/runner.js +101 -0
package/dist/node/eval/types.js +0 -0
package/dist/node/in-memory-store.js +92 -0
package/dist/node/index.js +105 -0
package/dist/node/ingesters/artificial-analysis.js +149 -0
package/dist/node/ingesters/chatbot-arena.js +142 -0
package/dist/node/ingesters/fetch-utils.js +39 -0
package/dist/node/ingesters/index.js +418 -0
package/dist/node/ingesters/open-llm-leaderboard.js +108 -0
package/dist/node/ingesters/registry.js +412 -0
package/dist/node/ingesters/swe-bench.js +105 -0
package/dist/node/ingesters/types.js +0 -0
package/dist/node/scoring/composite-scorer.js +122 -0
package/dist/node/scoring/dimension-weights.js +39 -0
package/dist/node/scoring/index.js +161 -0
package/dist/node/scoring/normalizer.js +37 -0
package/dist/node/store.js +0 -0
package/dist/node/types.js +14 -0
package/dist/scoring/composite-scorer.d.ts +10 -0
package/dist/scoring/composite-scorer.js +123 -0
package/dist/scoring/dimension-weights.d.ts +8 -0
package/dist/scoring/dimension-weights.js +40 -0
package/dist/scoring/index.d.ts +3 -0
package/dist/scoring/index.js +162 -0
package/dist/scoring/normalizer.d.ts +20 -0
package/dist/scoring/normalizer.js +38 -0
package/dist/store.d.ts +19 -0
package/dist/store.js +1 -0
package/dist/types.d.ts +100 -0
package/dist/types.js +15 -0
package/package.json +362 -0

package/dist/browser/ingesters/open-llm-leaderboard.js ADDED Viewed

@@ -0,0 +1,108 @@
+// src/ingesters/fetch-utils.ts
+async function fetchWithRetry(url, options) {
+  const fetchFn = options?.fetch ?? globalThis.fetch;
+  const maxRetries = options?.maxRetries ?? 2;
+  const baseDelay = options?.baseDelayMs ?? 500;
+  let lastError;
+  for (let attempt = 0;attempt <= maxRetries; attempt++) {
+    try {
+      const response = await fetchFn(url);
+      if (response.ok)
+        return response;
+      if (response.status >= 500 && attempt < maxRetries) {
+        await sleep(baseDelay * Math.pow(2, attempt));
+        continue;
+      }
+      throw new Error(`Fetch failed: ${response.status} ${response.statusText} (${url})`);
+    } catch (error) {
+      lastError = error instanceof Error ? error : new Error(String(error));
+      if (attempt < maxRetries) {
+        await sleep(baseDelay * Math.pow(2, attempt));
+      }
+    }
+  }
+  throw lastError ?? new Error(`Fetch failed after ${maxRetries + 1} attempts: ${url}`);
+}
+function parseJsonSafe(text, label) {
+  try {
+    return JSON.parse(text);
+  } catch {
+    throw new Error(`Failed to parse JSON response from ${label}: ${text.slice(0, 200)}`);
+  }
+}
+function sleep(ms) {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+// src/ingesters/open-llm-leaderboard.ts
+var DEFAULT_HF_URL = "https://huggingface.co/api/spaces/open-llm-leaderboard/open_llm_leaderboard/results";
+var BENCHMARK_MAPPINGS = [
+  { field: "mmlu", dimension: "reasoning", sourceKey: "mmlu" },
+  { field: "arc", dimension: "reasoning", sourceKey: "arc" },
+  { field: "gpqa", dimension: "reasoning", sourceKey: "gpqa" },
+  { field: "truthfulqa", dimension: "safety", sourceKey: "truthfulqa" }
+];
+var openLlmLeaderboardIngester = {
+  source: "mmlu",
+  displayName: "Open LLM Leaderboard",
+  description: "Aggregated benchmark scores from the HuggingFace Open LLM Leaderboard.",
+  async ingest(options) {
+    const url = options?.sourceUrl ?? DEFAULT_HF_URL;
+    const response = await fetchWithRetry(url, { fetch: options?.fetch });
+    const text = await response.text();
+    const data = parseJsonSafe(text, "Open LLM Leaderboard");
+    const now = new Date;
+    const results = [];
+    const dims = options?.dimensions ? new Set(options.dimensions) : null;
+    let entries = data.filter((e) => e.model_name);
+    if (options?.modelFilter?.length) {
+      const filterSet = new Set(options.modelFilter);
+      entries = entries.filter((e) => filterSet.has(e.model_name.toLowerCase().replace(/\s+/g, "-")));
+    }
+    for (const entry of entries) {
+      const modelId = entry.model_name.toLowerCase().replace(/\s+/g, "-");
+      const org = entry.organization?.toLowerCase() ?? "unknown";
+      const providerKey = mapOrganizationToProvider(org);
+      for (const mapping of BENCHMARK_MAPPINGS) {
+        if (dims && !dims.has(mapping.dimension))
+          continue;
+        const value = entry[mapping.field];
+        if (typeof value !== "number")
+          continue;
+        results.push({
+          id: `open-llm:${modelId}:${mapping.sourceKey}`,
+          modelId,
+          providerKey,
+          source: mapping.sourceKey,
+          dimension: mapping.dimension,
+          score: Math.max(0, Math.min(100, value)),
+          rawScore: value,
+          metadata: {
+            organization: entry.organization,
+            leaderboard_average: entry.average
+          },
+          measuredAt: now,
+          ingestedAt: now
+        });
+      }
+    }
+    return options?.maxResults ? results.slice(0, options.maxResults) : results;
+  }
+};
+function mapOrganizationToProvider(org) {
+  const normalized = org.toLowerCase();
+  if (normalized.includes("openai"))
+    return "openai";
+  if (normalized.includes("anthropic"))
+    return "anthropic";
+  if (normalized.includes("google") || normalized.includes("deepmind"))
+    return "gemini";
+  if (normalized.includes("mistral"))
+    return "mistral";
+  if (normalized.includes("meta"))
+    return "meta";
+  return org;
+}
+export {
+  openLlmLeaderboardIngester
+};

package/dist/browser/ingesters/registry.js ADDED Viewed

@@ -0,0 +1,412 @@
+// src/ingesters/fetch-utils.ts
+async function fetchWithRetry(url, options) {
+  const fetchFn = options?.fetch ?? globalThis.fetch;
+  const maxRetries = options?.maxRetries ?? 2;
+  const baseDelay = options?.baseDelayMs ?? 500;
+  let lastError;
+  for (let attempt = 0;attempt <= maxRetries; attempt++) {
+    try {
+      const response = await fetchFn(url);
+      if (response.ok)
+        return response;
+      if (response.status >= 500 && attempt < maxRetries) {
+        await sleep(baseDelay * Math.pow(2, attempt));
+        continue;
+      }
+      throw new Error(`Fetch failed: ${response.status} ${response.statusText} (${url})`);
+    } catch (error) {
+      lastError = error instanceof Error ? error : new Error(String(error));
+      if (attempt < maxRetries) {
+        await sleep(baseDelay * Math.pow(2, attempt));
+      }
+    }
+  }
+  throw lastError ?? new Error(`Fetch failed after ${maxRetries + 1} attempts: ${url}`);
+}
+function parseJsonSafe(text, label) {
+  try {
+    return JSON.parse(text);
+  } catch {
+    throw new Error(`Failed to parse JSON response from ${label}: ${text.slice(0, 200)}`);
+  }
+}
+function sleep(ms) {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+// src/ingesters/artificial-analysis.ts
+var DEFAULT_AA_URL = "https://artificialanalysis.ai/api/models";
+var artificialAnalysisIngester = {
+  source: "artificial-analysis",
+  displayName: "Artificial Analysis",
+  description: "Quality, speed, and cost benchmarks from Artificial Analysis.",
+  async ingest(options) {
+    const url = options?.sourceUrl ?? DEFAULT_AA_URL;
+    const response = await fetchWithRetry(url, { fetch: options?.fetch });
+    const text = await response.text();
+    const data = parseJsonSafe(text, "Artificial Analysis");
+    const now = new Date;
+    const results = [];
+    const dims = options?.dimensions ? new Set(options.dimensions) : null;
+    let entries = data.filter((e) => e.model_id && e.provider);
+    if (options?.modelFilter?.length) {
+      const filterSet = new Set(options.modelFilter);
+      entries = entries.filter((e) => filterSet.has(e.model_id));
+    }
+    for (const entry of entries) {
+      const baseId = `artificial-analysis:${entry.model_id}`;
+      if (entry.quality_score != null && (!dims || dims.has("reasoning"))) {
+        results.push({
+          id: `${baseId}:reasoning`,
+          modelId: entry.model_id,
+          providerKey: entry.provider.toLowerCase(),
+          source: "artificial-analysis",
+          dimension: "reasoning",
+          score: Math.max(0, Math.min(100, entry.quality_score)),
+          rawScore: entry.quality_score,
+          metadata: { model_name: entry.model_name },
+          measuredAt: now,
+          ingestedAt: now
+        });
+      }
+      if ((entry.tokens_per_second != null || entry.ttft_ms != null) && (!dims || dims.has("latency"))) {
+        const latencyScore = computeLatencyScore(entry.tokens_per_second, entry.ttft_ms);
+        results.push({
+          id: `${baseId}:latency`,
+          modelId: entry.model_id,
+          providerKey: entry.provider.toLowerCase(),
+          source: "artificial-analysis",
+          dimension: "latency",
+          score: latencyScore,
+          rawScore: {
+            tokens_per_second: entry.tokens_per_second,
+            ttft_ms: entry.ttft_ms
+          },
+          metadata: { model_name: entry.model_name },
+          measuredAt: now,
+          ingestedAt: now
+        });
+      }
+      if ((entry.price_per_million_input_tokens != null || entry.price_per_million_output_tokens != null) && (!dims || dims.has("cost"))) {
+        const costScore = computeCostScore(entry.price_per_million_input_tokens, entry.price_per_million_output_tokens);
+        results.push({
+          id: `${baseId}:cost`,
+          modelId: entry.model_id,
+          providerKey: entry.provider.toLowerCase(),
+          source: "artificial-analysis",
+          dimension: "cost",
+          score: costScore,
+          rawScore: {
+            input: entry.price_per_million_input_tokens,
+            output: entry.price_per_million_output_tokens
+          },
+          metadata: { model_name: entry.model_name },
+          measuredAt: now,
+          ingestedAt: now
+        });
+      }
+      if (entry.context_window != null && (!dims || dims.has("context"))) {
+        const contextScore = computeContextScore(entry.context_window);
+        results.push({
+          id: `${baseId}:context`,
+          modelId: entry.model_id,
+          providerKey: entry.provider.toLowerCase(),
+          source: "artificial-analysis",
+          dimension: "context",
+          score: contextScore,
+          rawScore: entry.context_window,
+          metadata: { model_name: entry.model_name },
+          measuredAt: now,
+          ingestedAt: now
+        });
+      }
+    }
+    return options?.maxResults ? results.slice(0, options.maxResults) : results;
+  }
+};
+function computeLatencyScore(tokensPerSec, ttftMs) {
+  let score = 50;
+  if (tokensPerSec != null) {
+    score = Math.min(100, tokensPerSec / 200 * 100);
+  }
+  if (ttftMs != null) {
+    const ttftPenalty = Math.max(0, Math.min(30, (ttftMs - 200) / 100 * 10));
+    score = Math.max(0, score - ttftPenalty);
+  }
+  return Math.round(score * 100) / 100;
+}
+function computeCostScore(inputCost, outputCost) {
+  const avgCost = ((inputCost ?? 0) + (outputCost ?? 0)) / 2;
+  const score = Math.max(0, 100 - avgCost / 30 * 100);
+  return Math.round(score * 100) / 100;
+}
+function computeContextScore(contextWindow) {
+  const score = Math.min(100, contextWindow / 1e6 * 100);
+  return Math.round(score * 100) / 100;
+}
+// src/scoring/normalizer.ts
+var SOURCE_NORMALIZATION = {
+  "chatbot-arena": { min: 800, max: 1400, invertScale: false },
+  "swe-bench": { min: 0, max: 100, invertScale: false },
+  "human-eval": { min: 0, max: 100, invertScale: false },
+  mmlu: { min: 0, max: 100, invertScale: false },
+  gpqa: { min: 0, max: 100, invertScale: false },
+  arc: { min: 0, max: 100, invertScale: false },
+  truthfulqa: { min: 0, max: 100, invertScale: false },
+  "tau-bench": { min: 0, max: 100, invertScale: false },
+  "artificial-analysis": { min: 0, max: 100, invertScale: false }
+};
+function normalizeScore(rawScore, source, configOverride) {
+  const config = configOverride ?? SOURCE_NORMALIZATION[source];
+  if (!config) {
+    return Math.max(0, Math.min(100, rawScore));
+  }
+  const { min, max, invertScale } = config;
+  const range = max - min;
+  if (range === 0)
+    return 50;
+  let normalized = (rawScore - min) / range * 100;
+  if (invertScale) {
+    normalized = 100 - normalized;
+  }
+  return Math.max(0, Math.min(100, normalized));
+}
+function normalizeBenchmarkResults(results) {
+  return results.map((result) => ({
+    ...result,
+    score: normalizeScore(typeof result.rawScore === "number" ? result.rawScore : result.score, result.source)
+  }));
+}
+// src/ingesters/chatbot-arena.ts
+var DEFAULT_ARENA_URL = "https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard/resolve/main/results.json";
+var chatbotArenaIngester = {
+  source: "chatbot-arena",
+  displayName: "Chatbot Arena (LMSYS)",
+  description: "Elo ratings from the LMSYS Chatbot Arena human preference leaderboard.",
+  async ingest(options) {
+    if (options?.dimensions?.length && !options.dimensions.includes("reasoning")) {
+      return [];
+    }
+    const url = options?.sourceUrl ?? DEFAULT_ARENA_URL;
+    const response = await fetchWithRetry(url, { fetch: options?.fetch });
+    const text = await response.text();
+    const data = parseJsonSafe(text, "Chatbot Arena");
+    const now = new Date;
+    let entries = data.filter((entry) => entry["Arena Elo rating"] != null && entry.Model);
+    if (options?.modelFilter?.length) {
+      const filterSet = new Set(options.modelFilter);
+      entries = entries.filter((e) => filterSet.has(e.key ?? e.Model));
+    }
+    if (options?.maxResults) {
+      entries = entries.slice(0, options.maxResults);
+    }
+    let results = entries.map((entry) => {
+      const elo = entry["Arena Elo rating"];
+      const modelId = entry.key ?? entry.Model.toLowerCase().replace(/\s+/g, "-");
+      const org = entry.Organization?.toLowerCase() ?? "unknown";
+      return {
+        id: `chatbot-arena:${modelId}:reasoning`,
+        modelId,
+        providerKey: mapOrganizationToProvider(org),
+        source: "chatbot-arena",
+        dimension: "reasoning",
+        score: normalizeScore(elo, "chatbot-arena"),
+        rawScore: elo,
+        metadata: {
+          organization: entry.Organization,
+          license: entry.License
+        },
+        measuredAt: now,
+        ingestedAt: now
+      };
+    });
+    const { fromDate, toDate } = options ?? {};
+    if (fromDate) {
+      results = results.filter((r) => r.measuredAt >= fromDate);
+    }
+    if (toDate) {
+      results = results.filter((r) => r.measuredAt <= toDate);
+    }
+    return results;
+  }
+};
+function mapOrganizationToProvider(org) {
+  const normalized = org.toLowerCase();
+  if (normalized.includes("openai"))
+    return "openai";
+  if (normalized.includes("anthropic"))
+    return "anthropic";
+  if (normalized.includes("google") || normalized.includes("deepmind"))
+    return "gemini";
+  if (normalized.includes("mistral"))
+    return "mistral";
+  if (normalized.includes("meta"))
+    return "meta";
+  if (normalized.includes("cohere"))
+    return "cohere";
+  return org;
+}
+// src/ingesters/swe-bench.ts
+var DEFAULT_SWE_BENCH_URL = "https://raw.githubusercontent.com/princeton-nlp/SWE-bench/main/docs/leaderboard.json";
+var sweBenchIngester = {
+  source: "swe-bench",
+  displayName: "SWE-bench",
+  description: "Software engineering task completion rates from SWE-bench.",
+  async ingest(options) {
+    if (options?.dimensions?.length && !options.dimensions.includes("coding")) {
+      return [];
+    }
+    const url = options?.sourceUrl ?? DEFAULT_SWE_BENCH_URL;
+    const response = await fetchWithRetry(url, { fetch: options?.fetch });
+    const text = await response.text();
+    const data = parseJsonSafe(text, "SWE-bench");
+    const now = new Date;
+    let entries = data.filter((entry) => entry.model && entry.resolved_rate != null);
+    if (options?.modelFilter?.length) {
+      const filterSet = new Set(options.modelFilter);
+      entries = entries.filter((e) => filterSet.has(e.model.toLowerCase().replace(/\s+/g, "-")));
+    }
+    if (options?.maxResults) {
+      entries = entries.slice(0, options.maxResults);
+    }
+    let results = entries.map((entry) => {
+      const modelId = entry.model.toLowerCase().replace(/\s+/g, "-");
+      const org = entry.organization?.toLowerCase() ?? "unknown";
+      return {
+        id: `swe-bench:${modelId}:coding`,
+        modelId,
+        providerKey: mapOrganizationToProvider2(org),
+        source: "swe-bench",
+        dimension: "coding",
+        score: Math.max(0, Math.min(100, entry.resolved_rate)),
+        rawScore: entry.resolved_rate,
+        metadata: {
+          organization: entry.organization,
+          date: entry.date
+        },
+        measuredAt: entry.date ? new Date(entry.date) : now,
+        ingestedAt: now
+      };
+    });
+    const { fromDate, toDate } = options ?? {};
+    if (fromDate) {
+      results = results.filter((r) => r.measuredAt >= fromDate);
+    }
+    if (toDate) {
+      results = results.filter((r) => r.measuredAt <= toDate);
+    }
+    return results;
+  }
+};
+function mapOrganizationToProvider2(org) {
+  const normalized = org.toLowerCase();
+  if (normalized.includes("openai"))
+    return "openai";
+  if (normalized.includes("anthropic"))
+    return "anthropic";
+  if (normalized.includes("google") || normalized.includes("deepmind"))
+    return "gemini";
+  if (normalized.includes("mistral"))
+    return "mistral";
+  if (normalized.includes("meta"))
+    return "meta";
+  return org;
+}
+// src/ingesters/open-llm-leaderboard.ts
+var DEFAULT_HF_URL = "https://huggingface.co/api/spaces/open-llm-leaderboard/open_llm_leaderboard/results";
+var BENCHMARK_MAPPINGS = [
+  { field: "mmlu", dimension: "reasoning", sourceKey: "mmlu" },
+  { field: "arc", dimension: "reasoning", sourceKey: "arc" },
+  { field: "gpqa", dimension: "reasoning", sourceKey: "gpqa" },
+  { field: "truthfulqa", dimension: "safety", sourceKey: "truthfulqa" }
+];
+var openLlmLeaderboardIngester = {
+  source: "mmlu",
+  displayName: "Open LLM Leaderboard",
+  description: "Aggregated benchmark scores from the HuggingFace Open LLM Leaderboard.",
+  async ingest(options) {
+    const url = options?.sourceUrl ?? DEFAULT_HF_URL;
+    const response = await fetchWithRetry(url, { fetch: options?.fetch });
+    const text = await response.text();
+    const data = parseJsonSafe(text, "Open LLM Leaderboard");
+    const now = new Date;
+    const results = [];
+    const dims = options?.dimensions ? new Set(options.dimensions) : null;
+    let entries = data.filter((e) => e.model_name);
+    if (options?.modelFilter?.length) {
+      const filterSet = new Set(options.modelFilter);
+      entries = entries.filter((e) => filterSet.has(e.model_name.toLowerCase().replace(/\s+/g, "-")));
+    }
+    for (const entry of entries) {
+      const modelId = entry.model_name.toLowerCase().replace(/\s+/g, "-");
+      const org = entry.organization?.toLowerCase() ?? "unknown";
+      const providerKey = mapOrganizationToProvider3(org);
+      for (const mapping of BENCHMARK_MAPPINGS) {
+        if (dims && !dims.has(mapping.dimension))
+          continue;
+        const value = entry[mapping.field];
+        if (typeof value !== "number")
+          continue;
+        results.push({
+          id: `open-llm:${modelId}:${mapping.sourceKey}`,
+          modelId,
+          providerKey,
+          source: mapping.sourceKey,
+          dimension: mapping.dimension,
+          score: Math.max(0, Math.min(100, value)),
+          rawScore: value,
+          metadata: {
+            organization: entry.organization,
+            leaderboard_average: entry.average
+          },
+          measuredAt: now,
+          ingestedAt: now
+        });
+      }
+    }
+    return options?.maxResults ? results.slice(0, options.maxResults) : results;
+  }
+};
+function mapOrganizationToProvider3(org) {
+  const normalized = org.toLowerCase();
+  if (normalized.includes("openai"))
+    return "openai";
+  if (normalized.includes("anthropic"))
+    return "anthropic";
+  if (normalized.includes("google") || normalized.includes("deepmind"))
+    return "gemini";
+  if (normalized.includes("mistral"))
+    return "mistral";
+  if (normalized.includes("meta"))
+    return "meta";
+  return org;
+}
+// src/ingesters/registry.ts
+class IngesterRegistry {
+  ingesters = new Map;
+  register(ingester) {
+    this.ingesters.set(ingester.source, ingester);
+    return this;
+  }
+  get(source) {
+    return this.ingesters.get(source);
+  }
+  list() {
+    return Array.from(this.ingesters.values());
+  }
+  has(source) {
+    return this.ingesters.has(source);
+  }
+}
+function createDefaultIngesterRegistry() {
+  return new IngesterRegistry().register(chatbotArenaIngester).register(artificialAnalysisIngester).register(sweBenchIngester).register(openLlmLeaderboardIngester);
+}
+export {
+  createDefaultIngesterRegistry,
+  IngesterRegistry
+};

package/dist/browser/ingesters/swe-bench.js ADDED Viewed

@@ -0,0 +1,105 @@
+// src/ingesters/fetch-utils.ts
+async function fetchWithRetry(url, options) {
+  const fetchFn = options?.fetch ?? globalThis.fetch;
+  const maxRetries = options?.maxRetries ?? 2;
+  const baseDelay = options?.baseDelayMs ?? 500;
+  let lastError;
+  for (let attempt = 0;attempt <= maxRetries; attempt++) {
+    try {
+      const response = await fetchFn(url);
+      if (response.ok)
+        return response;
+      if (response.status >= 500 && attempt < maxRetries) {
+        await sleep(baseDelay * Math.pow(2, attempt));
+        continue;
+      }
+      throw new Error(`Fetch failed: ${response.status} ${response.statusText} (${url})`);
+    } catch (error) {
+      lastError = error instanceof Error ? error : new Error(String(error));
+      if (attempt < maxRetries) {
+        await sleep(baseDelay * Math.pow(2, attempt));
+      }
+    }
+  }
+  throw lastError ?? new Error(`Fetch failed after ${maxRetries + 1} attempts: ${url}`);
+}
+function parseJsonSafe(text, label) {
+  try {
+    return JSON.parse(text);
+  } catch {
+    throw new Error(`Failed to parse JSON response from ${label}: ${text.slice(0, 200)}`);
+  }
+}
+function sleep(ms) {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+// src/ingesters/swe-bench.ts
+var DEFAULT_SWE_BENCH_URL = "https://raw.githubusercontent.com/princeton-nlp/SWE-bench/main/docs/leaderboard.json";
+var sweBenchIngester = {
+  source: "swe-bench",
+  displayName: "SWE-bench",
+  description: "Software engineering task completion rates from SWE-bench.",
+  async ingest(options) {
+    if (options?.dimensions?.length && !options.dimensions.includes("coding")) {
+      return [];
+    }
+    const url = options?.sourceUrl ?? DEFAULT_SWE_BENCH_URL;
+    const response = await fetchWithRetry(url, { fetch: options?.fetch });
+    const text = await response.text();
+    const data = parseJsonSafe(text, "SWE-bench");
+    const now = new Date;
+    let entries = data.filter((entry) => entry.model && entry.resolved_rate != null);
+    if (options?.modelFilter?.length) {
+      const filterSet = new Set(options.modelFilter);
+      entries = entries.filter((e) => filterSet.has(e.model.toLowerCase().replace(/\s+/g, "-")));
+    }
+    if (options?.maxResults) {
+      entries = entries.slice(0, options.maxResults);
+    }
+    let results = entries.map((entry) => {
+      const modelId = entry.model.toLowerCase().replace(/\s+/g, "-");
+      const org = entry.organization?.toLowerCase() ?? "unknown";
+      return {
+        id: `swe-bench:${modelId}:coding`,
+        modelId,
+        providerKey: mapOrganizationToProvider(org),
+        source: "swe-bench",
+        dimension: "coding",
+        score: Math.max(0, Math.min(100, entry.resolved_rate)),
+        rawScore: entry.resolved_rate,
+        metadata: {
+          organization: entry.organization,
+          date: entry.date
+        },
+        measuredAt: entry.date ? new Date(entry.date) : now,
+        ingestedAt: now
+      };
+    });
+    const { fromDate, toDate } = options ?? {};
+    if (fromDate) {
+      results = results.filter((r) => r.measuredAt >= fromDate);
+    }
+    if (toDate) {
+      results = results.filter((r) => r.measuredAt <= toDate);
+    }
+    return results;
+  }
+};
+function mapOrganizationToProvider(org) {
+  const normalized = org.toLowerCase();
+  if (normalized.includes("openai"))
+    return "openai";
+  if (normalized.includes("anthropic"))
+    return "anthropic";
+  if (normalized.includes("google") || normalized.includes("deepmind"))
+    return "gemini";
+  if (normalized.includes("mistral"))
+    return "mistral";
+  if (normalized.includes("meta"))
+    return "meta";
+  return org;
+}
+export {
+  sweBenchIngester
+};

package/dist/browser/ingesters/types.js ADDED Viewed

File without changes