npm - @aggc/or-info - Versions diffs - 0.2.12 → 0.2.14 - Mend

@aggc/or-info 0.2.12 → 0.2.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -154,17 +154,38 @@ or-info compare openai/gpt-4o deepseek/deepseek-chat-v3-0324 --json
 ### Top models for a task
 ```bash
-or-info top --task coding             # Best coding models
-or-info top --task reasoning          # Best reasoning models
-or-info top --task general            # Best all-rounders
-or-info top --task vision             # Best vision models
-or-info top --task cheap              # Best value for money
-or-info top --task coding --budget 2  # Best coders under $2/M output
+or-info top --task coding                          # Best coding models
+or-info top --task reasoning                       # Best reasoning models
+or-info top --task general                         # Best all-rounders
+or-info top --task vision                          # Best vision models (requires image input)
+or-info top --task cheap                           # Best value for money
+or-info top --task premium                         # Highest quality, ignoring price
+or-info top --task coding --pricing premium        # Best coder regardless of price
+or-info top --task coding --budget 2               # Best coders under $2/M output
 or-info top --task general --limit 10
 ```
-Ranking combines LMArena ELO with price. `--task vision` and `--task coding` additionally
-filter for models that support the required capability (image input / tool use).
+Ranking combines LMArena ELO with price and context window size.
+`--task` controls which ELO category and capability filter to apply.
+`--pricing` overrides the price-penalty strategy independently:
+| `--pricing` | Effect |
+|-------------|--------|
+| `standard` (default) | Moderate penalty for expensive models |
+| `cheap` | Steep penalty; strongly favours free/low-cost models |
+| `premium` | No penalty; ranks by quality alone |
+Task defaults (when `--pricing` is not set):
+| Task | Default pricing | Capability filter |
+|------|----------------|-------------------|
+| `general` | standard | none |
+| `coding` | standard | soft penalty (−15%) if no tool support |
+| `reasoning` | standard | none |
+| `vision` | standard | hard filter: image input required |
+| `cheap` | cheap | none |
+| `premium` | premium | none |
 ### Cache management
@@ -184,7 +205,7 @@ or-info refresh         # Force-refresh OpenRouter catalog + LMArena ELO
 | `models.get` | Pricing, context, architecture, features and LMArena ELO for a model |
 | `models.list` | List models with optional filter, sort and limit |
 | `models.compare` | Side-by-side comparison of two models |
-| `models.top` | Ranked top models for coding/reasoning/general/vision/cheap |
+| `models.top` | Ranked top models for coding/reasoning/general/vision/cheap/premium; accepts optional `pricing` override |
 | `benchmarks.get` | LMArena ELO score, global rank, vote count and confidence interval for a model |
 | `cache.refresh` | Force-refresh OpenRouter catalog + LMArena ELO |

package/bin/or-info.mjs CHANGED Viewed

@@ -22,7 +22,7 @@ function die(msg) {
   process.exit(1);
 }
-const TOP_TASKS = new Set(['coding', 'reasoning', 'general', 'vision', 'cheap']);
+const TOP_TASKS = new Set(['coding', 'reasoning', 'general', 'vision', 'cheap', 'premium']);
 function parsePositiveInteger(value) {
   const n = Number.parseInt(value, 10);
@@ -173,7 +173,8 @@ program
 program
   .command('top')
   .description('Best models for a task')
-  .option('--task <task>', 'Task: coding, reasoning, general, vision, cheap', 'general')
+  .option('--task <task>', 'Task: coding, reasoning, general, vision, cheap, premium', 'general')
+  .option('--pricing <mode>', 'Price scoring override: standard, cheap, premium', v => { const s = new Set(['standard', 'cheap', 'premium']); if (!s.has(v)) throw new InvalidArgumentError('must be standard, cheap, or premium'); return v; })
   .option('--budget <usd>', 'Max price per 1M output tokens (e.g. 1.00)', parseFloat)
   .option('--limit <n>', 'Number of results', parsePositiveInteger, 5)
   .option('--json', 'Output raw JSON')
@@ -190,6 +191,7 @@ program
     const ranked = rankModels(models, allElo, {
       task: opts.task,
+      pricing: opts.pricing,
       maxPricePerMOutput: opts.budget,
       limit: opts.limit,
     });

package/lib/scorer.mjs CHANGED Viewed

@@ -1,8 +1,8 @@
-import { pricePerMillion, supportsFeature } from './openrouter.mjs';
+import { pricePerMillion, supportsFeature, contextLength } from './openrouter.mjs';
-// ELO range observed on LMArena (2026): ~1050 (weak) to ~1500 (best)
-const ELO_MIN = 1050;
-const ELO_MAX = 1500;
+// ELO range observed on LMArena (2026): ~1000 (weak) to ~1540+ (best)
+const ELO_MIN = 1000;
+const ELO_MAX = 1600;
 function normaliseElo(elo) {
   return Math.max(0, Math.min(100, ((elo - ELO_MIN) / (ELO_MAX - ELO_MIN)) * 100));
@@ -18,40 +18,83 @@ function pricePenalty(outputPerM) {
   return Math.max(0.1, 1 - Math.log10(outputPerM + 1) * 0.15);
 }
-function requiresCapability(task) {
-  if (task === 'vision') return 'vision';
-  if (task === 'coding') return 'tools';
-  return null;
+// Steeper penalty for cheap task — more spread between free and expensive.
+// Free → 1.0, $1/M → 0.88, $5/M → 0.72, $20/M → 0.59
+function cheapPenalty(outputPerM) {
+  if (outputPerM === null || outputPerM === 0) return 1.0;
+  return Math.max(0.1, 1 - Math.log10(outputPerM + 1) * 0.25);
+}
+// Context window bonus [0.9, 1.0]. Larger context is more useful,
+// especially for coding where long files are common.
+function contextBonus(ctx, task) {
+  if (!ctx) return 0.9;
+  if (ctx >= 128_000) return 1.0;
+  if (ctx >= 64_000) return 0.97;
+  if (ctx >= 32_000) return 0.93;
+  return 0.9;
 }
-// Score a model for a task.
+// Score a model for a task with optional pricing override.
 // Returns { score, qualityScore } or null if not eligible.
-export function scoreForTask(model, eloEntry, task = 'general') {
-  const cap = requiresCapability(task);
-  if (cap && !supportsFeature(model, cap)) return null;
+export function scoreForTask(model, eloEntry, task = 'general', pricing) {
+  const { pricingMode, capability } = parseTaskSpec(task, pricing);
+  // Hard filter: vision requires vision capability
+  if (capability === 'vision' && !supportsFeature(model, 'vision')) return null;
   if (!eloEntry?.elo) return null;
   const quality = normaliseElo(eloEntry.elo);
   const price = pricePerMillion(model);
-  const penalty = task === 'cheap'
-    ? pricePenalty(price.output) * 1.4   // aggressively favour cheap
-    : pricePenalty(price.output);
+  const ctx = contextLength(model);
+  // Price penalty: premium ignores price, cheap uses steep curve, others standard
+  let penalty;
+  if (pricingMode === 'premium') {
+    penalty = 1.0;
+  } else if (pricingMode === 'cheap') {
+    penalty = cheapPenalty(price.output);
+  } else {
+    penalty = pricePenalty(price.output);
+  }
+  const ctxB = contextBonus(ctx, task);
+  // Soft penalty for coding without tools (still eligible, just less ideal)
+  const capPenalty = (task === 'coding' && !supportsFeature(model, 'tools')) ? 0.85 : 1.0;
+  const rawScore = quality * penalty * ctxB * capPenalty;
   return {
-    score: Math.round(quality * penalty * 10) / 10,
+    score: Math.round(Math.min(100, rawScore) * 10) / 10,
     qualityScore: Math.round(quality * 10) / 10,
   };
 }
-const CATEGORY_FOR_TASK = { coding: 'coding', reasoning: 'math', vision: null };
+// Parse task into { eloCategory, pricingMode, capability }.
+// This decouples the ELO category from the price penalty strategy.
+// task='coding' → coding ELO, standard pricing
+// task='coding', pricing='premium' → coding ELO, no price penalty
+const TASK_ELO = { coding: 'coding', reasoning: 'math', vision: null, general: null, cheap: null, premium: null };
+const TASK_CAP = { vision: 'vision', coding: 'tools' };
+const PRICING_MODES = new Set(['standard', 'cheap', 'premium']);
+function parseTaskSpec(task, pricing) {
+  const eloCategory = TASK_ELO[task] ?? 'overall';
+  const capability = TASK_CAP[task] ?? null;
+  let pricingMode = pricing ?? 'standard';
+  // Legacy: 'cheap' and 'premium' as task names set pricing mode
+  if (task === 'cheap') pricingMode = 'cheap';
+  else if (task === 'premium') pricingMode = 'premium';
+  if (!PRICING_MODES.has(pricingMode)) pricingMode = 'standard';
+  return { eloCategory, pricingMode, capability };
+}
-export function rankModels(models, allElo, { task = 'general', maxPricePerMOutput, limit = 5 } = {}) {
-  // allElo may be a by-category map { overall: [...], coding: [...], ... }
-  // or a plain array (legacy). Select the right category for this task.
-  const category = CATEGORY_FOR_TASK[task] ?? 'overall';
+export function rankModels(models, allElo, { task = 'general', pricing, maxPricePerMOutput, limit = 5 } = {}) {
+  const { eloCategory } = parseTaskSpec(task, pricing);
   const entries = Array.isArray(allElo)
     ? allElo
-    : (allElo[category] ?? allElo.overall ?? []);
+    : (allElo[eloCategory] ?? allElo.overall ?? []);
   const scored = [];
@@ -60,7 +103,7 @@ export function rankModels(models, allElo, { task = 'general', maxPricePerMOutpu
       ? entries.find((e) => _matchName(e.lmarenaName, model.id))
       : null;
-    const result = scoreForTask(model, eloEntry, task);
+    const result = scoreForTask(model, eloEntry, task, pricing);
     if (!result) continue;
     const price = pricePerMillion(model);
@@ -69,7 +112,25 @@ export function rankModels(models, allElo, { task = 'general', maxPricePerMOutpu
     scored.push({ model, score: result.score, qualityScore: result.qualityScore, eloEntry });
   }
-  return scored.sort((a, b) => b.score - a.score).slice(0, limit);
+  // Dedup 1: :free variants — keep highest-scoring variant per base model
+  const byBase = new Map();
+  for (const entry of scored) {
+    const baseId = entry.model.id.replace(/:free$/, '');
+    const prev = byBase.get(baseId);
+    if (!prev || entry.score > prev.score) byBase.set(baseId, entry);
+  }
+  // Dedup 2: same ELO entry — multiple OR models can match one LMArena name
+  // (e.g. gpt-5.4-nano and gpt-5.4 both match "gpt-5.4-high").
+  // Keep only the best-scoring OR model per ELO entry.
+  const byElo = new Map();
+  for (const entry of byBase.values()) {
+    const eloKey = entry.eloEntry?.lmarenaName ?? entry.model.id;
+    const prev = byElo.get(eloKey);
+    if (!prev || entry.score > prev.score) byElo.set(eloKey, entry);
+  }
+  return [...byElo.values()].sort((a, b) => b.score - a.score).slice(0, limit);
 }
 // Inline minimal name matching (mirrors lmarena.mjs logic without importing it)

package/mcp/server.mjs CHANGED Viewed

@@ -147,9 +147,14 @@ const CANONICAL_TOOLS = [
       properties: {
         task: {
           type: 'string',
-          enum: ['coding', 'reasoning', 'general', 'vision', 'cheap'],
+          enum: ['coding', 'reasoning', 'general', 'vision', 'cheap', 'premium'],
           description: 'Task type to optimise for',
         },
+        pricing: {
+          type: 'string',
+          enum: ['standard', 'cheap', 'premium'],
+          description: 'Price scoring override. Set to "premium" with task="coding" for best coding model regardless of price',
+        },
         max_price_per_m_output: {
           type: 'number',
           description: 'Maximum price per 1M output tokens in USD (e.g. 1.0)',
@@ -307,11 +312,12 @@ async function handleTool(name, args) {
   if (name === 'models.top') {
     const task = args.task ?? 'general';
+    const pricing = args.pricing ?? undefined;
     const limit = Math.min(20, Math.max(1, args.limit ?? 5));
     const maxPrice = args.max_price_per_m_output ?? undefined;
     const [models, allElo] = await Promise.all([fetchModels({ apiKey: key }), getAllElo()]);
-    const ranked = rankModels(models, allElo, { task, maxPricePerMOutput: maxPrice, limit });
+    const ranked = rankModels(models, allElo, { task, pricing, maxPricePerMOutput: maxPrice, limit });
     return result({ task, results: ranked.map((r) => ({ ...safeModelSummary(r.model), score: r.score, lmarena_elo: r.eloEntry })) });
   }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aggc/or-info",
-  "version": "0.2.12",
+  "version": "0.2.14",
   "description": "CLI + MCP server for OpenRouter models: prices, benchmarks, context and comparisons",
   "type": "module",
   "engines": {