npm - @aggc/or-info - Versions diffs - 0.2.13 → 0.2.15 - Mend

@aggc/or-info 0.2.13 → 0.2.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -154,26 +154,38 @@ or-info compare openai/gpt-4o deepseek/deepseek-chat-v3-0324 --json
 ### Top models for a task
 ```bash
-or-info top --task coding             # Best coding models
-or-info top --task reasoning          # Best reasoning models
-or-info top --task general            # Best all-rounders
-or-info top --task vision             # Best vision models (requires image input)
-or-info top --task cheap              # Best value for money
-or-info top --task premium            # Highest quality, ignoring price
-or-info top --task coding --budget 2  # Best coders under $2/M output
+or-info top --task coding                          # Best coding models
+or-info top --task reasoning                       # Best reasoning models
+or-info top --task general                         # Best all-rounders
+or-info top --task vision                          # Best vision models (requires image input)
+or-info top --task cheap                           # Best value for money
+or-info top --task premium                         # Highest quality, ignoring price
+or-info top --task coding --pricing premium        # Best coder regardless of price
+or-info top --task coding --budget 2               # Best coders under $2/M output
 or-info top --task general --limit 10
 ```
-Ranking combines LMArena ELO with price and context window size. Task behaviour:
+Ranking combines LMArena ELO with price and context window size.
-| Task | Price weight | Capability filter |
-|------|-------------|-------------------|
-| `general` | standard penalty | none |
-| `coding` | standard penalty | soft penalty (−15%) if no tool support |
-| `reasoning` | standard penalty | none |
-| `vision` | standard penalty | hard filter: image input required |
-| `cheap` | steep penalty | none |
-| `premium` | ignored | none |
+`--task` controls which ELO category and capability filter to apply.
+`--pricing` overrides the price-penalty strategy independently:
+| `--pricing` | Effect |
+|-------------|--------|
+| `standard` (default) | Moderate penalty for expensive models |
+| `cheap` | Steep penalty; strongly favours free/low-cost models |
+| `premium` | No penalty; ranks by quality alone |
+Task defaults (when `--pricing` is not set):
+| Task | Default pricing | Capability filter |
+|------|----------------|-------------------|
+| `general` | standard | none |
+| `coding` | standard | soft penalty (−15%) if no tool support |
+| `reasoning` | standard | none |
+| `vision` | standard | hard filter: image input required |
+| `cheap` | cheap | none |
+| `premium` | premium | none |
 ### Cache management
@@ -193,7 +205,7 @@ or-info refresh         # Force-refresh OpenRouter catalog + LMArena ELO
 | `models.get` | Pricing, context, architecture, features and LMArena ELO for a model |
 | `models.list` | List models with optional filter, sort and limit |
 | `models.compare` | Side-by-side comparison of two models |
-| `models.top` | Ranked top models for coding/reasoning/general/vision/cheap/premium |
+| `models.top` | Ranked top models for coding/reasoning/general/vision/cheap/premium; accepts optional `pricing` override |
 | `benchmarks.get` | LMArena ELO score, global rank, vote count and confidence interval for a model |
 | `cache.refresh` | Force-refresh OpenRouter catalog + LMArena ELO |

package/bin/or-info.mjs CHANGED Viewed

@@ -173,7 +173,8 @@ program
 program
   .command('top')
   .description('Best models for a task')
-  .option('--task <task>', 'Task: coding, reasoning, general, vision, cheap', 'general')
+  .option('--task <task>', 'Task: coding, reasoning, general, vision, cheap, premium', 'general')
+  .option('--pricing <mode>', 'Price scoring override: standard, cheap, premium', v => { const s = new Set(['standard', 'cheap', 'premium']); if (!s.has(v)) throw new InvalidArgumentError('must be standard, cheap, or premium'); return v; })
   .option('--budget <usd>', 'Max price per 1M output tokens (e.g. 1.00)', parseFloat)
   .option('--limit <n>', 'Number of results', parsePositiveInteger, 5)
   .option('--json', 'Output raw JSON')
@@ -190,6 +191,7 @@ program
     const ranked = rankModels(models, allElo, {
       task: opts.task,
+      pricing: opts.pricing,
       maxPricePerMOutput: opts.budget,
       limit: opts.limit,
     });

package/lib/scorer.mjs CHANGED Viewed

@@ -35,11 +35,13 @@ function contextBonus(ctx, task) {
   return 0.9;
 }
-// Score a model for a task.
+// Score a model for a task with optional pricing override.
 // Returns { score, qualityScore } or null if not eligible.
-export function scoreForTask(model, eloEntry, task = 'general') {
+export function scoreForTask(model, eloEntry, task = 'general', pricing) {
+  const { pricingMode, capability } = parseTaskSpec(task, pricing);
   // Hard filter: vision requires vision capability
-  if (task === 'vision' && !supportsFeature(model, 'vision')) return null;
+  if (capability === 'vision' && !supportsFeature(model, 'vision')) return null;
   if (!eloEntry?.elo) return null;
   const quality = normaliseElo(eloEntry.elo);
@@ -48,9 +50,9 @@ export function scoreForTask(model, eloEntry, task = 'general') {
   // Price penalty: premium ignores price, cheap uses steep curve, others standard
   let penalty;
-  if (task === 'premium') {
+  if (pricingMode === 'premium') {
     penalty = 1.0;
-  } else if (task === 'cheap') {
+  } else if (pricingMode === 'cheap') {
     penalty = cheapPenalty(price.output);
   } else {
     penalty = pricePenalty(price.output);
@@ -69,15 +71,30 @@ export function scoreForTask(model, eloEntry, task = 'general') {
   };
 }
-const CATEGORY_FOR_TASK = { coding: 'coding', reasoning: 'math', vision: null, premium: null };
+// Parse task into { eloCategory, pricingMode, capability }.
+// This decouples the ELO category from the price penalty strategy.
+// task='coding' → coding ELO, standard pricing
+// task='coding', pricing='premium' → coding ELO, no price penalty
+const TASK_ELO = { coding: 'coding', reasoning: 'math', vision: null, general: null, cheap: null, premium: null };
+const TASK_CAP = { vision: 'vision', coding: 'tools' };
+const PRICING_MODES = new Set(['standard', 'cheap', 'premium']);
+function parseTaskSpec(task, pricing) {
+  const eloCategory = TASK_ELO[task] ?? 'overall';
+  const capability = TASK_CAP[task] ?? null;
+  let pricingMode = pricing ?? 'standard';
+  // Legacy: 'cheap' and 'premium' as task names set pricing mode
+  if (task === 'cheap') pricingMode = 'cheap';
+  else if (task === 'premium') pricingMode = 'premium';
+  if (!PRICING_MODES.has(pricingMode)) pricingMode = 'standard';
+  return { eloCategory, pricingMode, capability };
+}
-export function rankModels(models, allElo, { task = 'general', maxPricePerMOutput, limit = 5 } = {}) {
-  // allElo may be a by-category map { overall: [...], coding: [...], ... }
-  // or a plain array (legacy). Select the right category for this task.
-  const category = CATEGORY_FOR_TASK[task] ?? 'overall';
+export function rankModels(models, allElo, { task = 'general', pricing, maxPricePerMOutput, limit = 5 } = {}) {
+  const { eloCategory } = parseTaskSpec(task, pricing);
   const entries = Array.isArray(allElo)
     ? allElo
-    : (allElo[category] ?? allElo.overall ?? []);
+    : (allElo[eloCategory] ?? allElo.overall ?? []);
   const scored = [];
@@ -86,7 +103,7 @@ export function rankModels(models, allElo, { task = 'general', maxPricePerMOutpu
       ? entries.find((e) => _matchName(e.lmarenaName, model.id))
       : null;
-    const result = scoreForTask(model, eloEntry, task);
+    const result = scoreForTask(model, eloEntry, task, pricing);
     if (!result) continue;
     const price = pricePerMillion(model);
@@ -95,7 +112,7 @@ export function rankModels(models, allElo, { task = 'general', maxPricePerMOutpu
     scored.push({ model, score: result.score, qualityScore: result.qualityScore, eloEntry });
   }
-  // Dedup :free variants — keep highest-scoring variant per base model
+  // Dedup 1: :free variants — keep highest-scoring variant per base model
   const byBase = new Map();
   for (const entry of scored) {
     const baseId = entry.model.id.replace(/:free$/, '');
@@ -103,7 +120,17 @@ export function rankModels(models, allElo, { task = 'general', maxPricePerMOutpu
     if (!prev || entry.score > prev.score) byBase.set(baseId, entry);
   }
-  return [...byBase.values()].sort((a, b) => b.score - a.score).slice(0, limit);
+  // Dedup 2: same ELO entry — multiple OR models can match one LMArena name
+  // (e.g. gpt-5.4-nano and gpt-5.4 both match "gpt-5.4-high").
+  // Keep only the best-scoring OR model per ELO entry.
+  const byElo = new Map();
+  for (const entry of byBase.values()) {
+    const eloKey = entry.eloEntry?.lmarenaName ?? entry.model.id;
+    const prev = byElo.get(eloKey);
+    if (!prev || entry.score > prev.score) byElo.set(eloKey, entry);
+  }
+  return [...byElo.values()].sort((a, b) => b.score - a.score).slice(0, limit);
 }
 // Inline minimal name matching (mirrors lmarena.mjs logic without importing it)

package/mcp/server.mjs CHANGED Viewed

@@ -150,6 +150,11 @@ const CANONICAL_TOOLS = [
           enum: ['coding', 'reasoning', 'general', 'vision', 'cheap', 'premium'],
           description: 'Task type to optimise for',
         },
+        pricing: {
+          type: 'string',
+          enum: ['standard', 'cheap', 'premium'],
+          description: 'Price scoring override. Set to "premium" with task="coding" for best coding model regardless of price',
+        },
         max_price_per_m_output: {
           type: 'number',
           description: 'Maximum price per 1M output tokens in USD (e.g. 1.0)',
@@ -249,6 +254,22 @@ function errorContent(msg) {
   return { content: [{ type: 'text', text: `Error: ${msg}` }], isError: true };
 }
+async function safeGetElo(modelId, opts) {
+  try {
+    return await getElo(modelId, opts);
+  } catch {
+    return null;
+  }
+}
+async function safeGetAllElo(opts) {
+  try {
+    return await getAllElo(opts);
+  } catch {
+    return {};
+  }
+}
 async function handleTool(name, args) {
   // Accept legacy flat names (get_model_info, list_models, ...) by mapping
   // them to the dot-notation canonical names exposed in tools/list.
@@ -261,7 +282,7 @@ async function handleTool(name, args) {
     const models = await fetchModels({ apiKey: key });
     const model = findModel(models, model_id);
     if (!model) return errorContent(`Model not found: ${model_id}`);
-    const elo = await getElo(model_id);
+    const elo = await safeGetElo(model_id);
     return result({ ...safeModelSummary(model), lmarena_elo: elo ?? null });
   }
@@ -286,7 +307,7 @@ async function handleTool(name, args) {
   if (name === 'benchmarks.get') {
     const { model_id } = args;
     if (!model_id || typeof model_id !== 'string') return errorContent('model_id is required');
-    const elo = await getElo(model_id);
+    const elo = await safeGetElo(model_id);
     return result({ model_id, lmarena_elo: elo ?? null });
   }
@@ -295,8 +316,8 @@ async function handleTool(name, args) {
     if (!model_a || !model_b) return errorContent('model_a and model_b are required');
     const [models, eloA, eloB] = await Promise.all([
       fetchModels({ apiKey: key }),
-      getElo(model_a),
-      getElo(model_b),
+      safeGetElo(model_a),
+      safeGetElo(model_b),
     ]);
     const mA = findModel(models, model_a);
     const mB = findModel(models, model_b);
@@ -307,11 +328,12 @@ async function handleTool(name, args) {
   if (name === 'models.top') {
     const task = args.task ?? 'general';
+    const pricing = args.pricing ?? undefined;
     const limit = Math.min(20, Math.max(1, args.limit ?? 5));
     const maxPrice = args.max_price_per_m_output ?? undefined;
-    const [models, allElo] = await Promise.all([fetchModels({ apiKey: key }), getAllElo()]);
-    const ranked = rankModels(models, allElo, { task, maxPricePerMOutput: maxPrice, limit });
+    const [models, allElo] = await Promise.all([fetchModels({ apiKey: key }), safeGetAllElo()]);
+    const ranked = rankModels(models, allElo, { task, pricing, maxPricePerMOutput: maxPrice, limit });
     return result({ task, results: ranked.map((r) => ({ ...safeModelSummary(r.model), score: r.score, lmarena_elo: r.eloEntry })) });
   }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aggc/or-info",
-  "version": "0.2.13",
+  "version": "0.2.15",
   "description": "CLI + MCP server for OpenRouter models: prices, benchmarks, context and comparisons",
   "type": "module",
   "engines": {