npm - @aggc/or-info - Versions diffs - 0.2.12 → 0.2.13 - Mend

@aggc/or-info 0.2.12 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -157,14 +157,23 @@ or-info compare openai/gpt-4o deepseek/deepseek-chat-v3-0324 --json
 or-info top --task coding             # Best coding models
 or-info top --task reasoning          # Best reasoning models
 or-info top --task general            # Best all-rounders
-or-info top --task vision             # Best vision models
+or-info top --task vision             # Best vision models (requires image input)
 or-info top --task cheap              # Best value for money
+or-info top --task premium            # Highest quality, ignoring price
 or-info top --task coding --budget 2  # Best coders under $2/M output
 or-info top --task general --limit 10
 ```
-Ranking combines LMArena ELO with price. `--task vision` and `--task coding` additionally
-filter for models that support the required capability (image input / tool use).
+Ranking combines LMArena ELO with price and context window size. Task behaviour:
+| Task | Price weight | Capability filter |
+|------|-------------|-------------------|
+| `general` | standard penalty | none |
+| `coding` | standard penalty | soft penalty (−15%) if no tool support |
+| `reasoning` | standard penalty | none |
+| `vision` | standard penalty | hard filter: image input required |
+| `cheap` | steep penalty | none |
+| `premium` | ignored | none |
 ### Cache management
@@ -184,7 +193,7 @@ or-info refresh         # Force-refresh OpenRouter catalog + LMArena ELO
 | `models.get` | Pricing, context, architecture, features and LMArena ELO for a model |
 | `models.list` | List models with optional filter, sort and limit |
 | `models.compare` | Side-by-side comparison of two models |
-| `models.top` | Ranked top models for coding/reasoning/general/vision/cheap |
+| `models.top` | Ranked top models for coding/reasoning/general/vision/cheap/premium |
 | `benchmarks.get` | LMArena ELO score, global rank, vote count and confidence interval for a model |
 | `cache.refresh` | Force-refresh OpenRouter catalog + LMArena ELO |

package/bin/or-info.mjs CHANGED Viewed

@@ -22,7 +22,7 @@ function die(msg) {
   process.exit(1);
 }
-const TOP_TASKS = new Set(['coding', 'reasoning', 'general', 'vision', 'cheap']);
+const TOP_TASKS = new Set(['coding', 'reasoning', 'general', 'vision', 'cheap', 'premium']);
 function parsePositiveInteger(value) {
   const n = Number.parseInt(value, 10);

package/lib/scorer.mjs CHANGED Viewed

@@ -1,8 +1,8 @@
-import { pricePerMillion, supportsFeature } from './openrouter.mjs';
+import { pricePerMillion, supportsFeature, contextLength } from './openrouter.mjs';
-// ELO range observed on LMArena (2026): ~1050 (weak) to ~1500 (best)
-const ELO_MIN = 1050;
-const ELO_MAX = 1500;
+// ELO range observed on LMArena (2026): ~1000 (weak) to ~1540+ (best)
+const ELO_MIN = 1000;
+const ELO_MAX = 1600;
 function normaliseElo(elo) {
   return Math.max(0, Math.min(100, ((elo - ELO_MIN) / (ELO_MAX - ELO_MIN)) * 100));
@@ -18,32 +18,58 @@ function pricePenalty(outputPerM) {
   return Math.max(0.1, 1 - Math.log10(outputPerM + 1) * 0.15);
 }
-function requiresCapability(task) {
-  if (task === 'vision') return 'vision';
-  if (task === 'coding') return 'tools';
-  return null;
+// Steeper penalty for cheap task — more spread between free and expensive.
+// Free → 1.0, $1/M → 0.88, $5/M → 0.72, $20/M → 0.59
+function cheapPenalty(outputPerM) {
+  if (outputPerM === null || outputPerM === 0) return 1.0;
+  return Math.max(0.1, 1 - Math.log10(outputPerM + 1) * 0.25);
+}
+// Context window bonus [0.9, 1.0]. Larger context is more useful,
+// especially for coding where long files are common.
+function contextBonus(ctx, task) {
+  if (!ctx) return 0.9;
+  if (ctx >= 128_000) return 1.0;
+  if (ctx >= 64_000) return 0.97;
+  if (ctx >= 32_000) return 0.93;
+  return 0.9;
 }
 // Score a model for a task.
 // Returns { score, qualityScore } or null if not eligible.
 export function scoreForTask(model, eloEntry, task = 'general') {
-  const cap = requiresCapability(task);
-  if (cap && !supportsFeature(model, cap)) return null;
+  // Hard filter: vision requires vision capability
+  if (task === 'vision' && !supportsFeature(model, 'vision')) return null;
   if (!eloEntry?.elo) return null;
   const quality = normaliseElo(eloEntry.elo);
   const price = pricePerMillion(model);
-  const penalty = task === 'cheap'
-    ? pricePenalty(price.output) * 1.4   // aggressively favour cheap
-    : pricePenalty(price.output);
+  const ctx = contextLength(model);
+  // Price penalty: premium ignores price, cheap uses steep curve, others standard
+  let penalty;
+  if (task === 'premium') {
+    penalty = 1.0;
+  } else if (task === 'cheap') {
+    penalty = cheapPenalty(price.output);
+  } else {
+    penalty = pricePenalty(price.output);
+  }
+  const ctxB = contextBonus(ctx, task);
+  // Soft penalty for coding without tools (still eligible, just less ideal)
+  const capPenalty = (task === 'coding' && !supportsFeature(model, 'tools')) ? 0.85 : 1.0;
+  const rawScore = quality * penalty * ctxB * capPenalty;
   return {
-    score: Math.round(quality * penalty * 10) / 10,
+    score: Math.round(Math.min(100, rawScore) * 10) / 10,
     qualityScore: Math.round(quality * 10) / 10,
   };
 }
-const CATEGORY_FOR_TASK = { coding: 'coding', reasoning: 'math', vision: null };
+const CATEGORY_FOR_TASK = { coding: 'coding', reasoning: 'math', vision: null, premium: null };
 export function rankModels(models, allElo, { task = 'general', maxPricePerMOutput, limit = 5 } = {}) {
   // allElo may be a by-category map { overall: [...], coding: [...], ... }
@@ -69,7 +95,15 @@ export function rankModels(models, allElo, { task = 'general', maxPricePerMOutpu
     scored.push({ model, score: result.score, qualityScore: result.qualityScore, eloEntry });
   }
-  return scored.sort((a, b) => b.score - a.score).slice(0, limit);
+  // Dedup :free variants — keep highest-scoring variant per base model
+  const byBase = new Map();
+  for (const entry of scored) {
+    const baseId = entry.model.id.replace(/:free$/, '');
+    const prev = byBase.get(baseId);
+    if (!prev || entry.score > prev.score) byBase.set(baseId, entry);
+  }
+  return [...byBase.values()].sort((a, b) => b.score - a.score).slice(0, limit);
 }
 // Inline minimal name matching (mirrors lmarena.mjs logic without importing it)

package/mcp/server.mjs CHANGED Viewed

@@ -147,7 +147,7 @@ const CANONICAL_TOOLS = [
       properties: {
         task: {
           type: 'string',
-          enum: ['coding', 'reasoning', 'general', 'vision', 'cheap'],
+          enum: ['coding', 'reasoning', 'general', 'vision', 'cheap', 'premium'],
           description: 'Task type to optimise for',
         },
         max_price_per_m_output: {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aggc/or-info",
-  "version": "0.2.12",
+  "version": "0.2.13",
   "description": "CLI + MCP server for OpenRouter models: prices, benchmarks, context and comparisons",
   "type": "module",
   "engines": {