@aggc/or-info 0.2.12 → 0.2.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -154,17 +154,38 @@ or-info compare openai/gpt-4o deepseek/deepseek-chat-v3-0324 --json
154
154
  ### Top models for a task
155
155
 
156
156
  ```bash
157
- or-info top --task coding # Best coding models
158
- or-info top --task reasoning # Best reasoning models
159
- or-info top --task general # Best all-rounders
160
- or-info top --task vision # Best vision models
161
- or-info top --task cheap # Best value for money
162
- or-info top --task coding --budget 2 # Best coders under $2/M output
157
+ or-info top --task coding # Best coding models
158
+ or-info top --task reasoning # Best reasoning models
159
+ or-info top --task general # Best all-rounders
160
+ or-info top --task vision # Best vision models (requires image input)
161
+ or-info top --task cheap # Best value for money
162
+ or-info top --task premium # Highest quality, ignoring price
163
+ or-info top --task coding --pricing premium # Best coder regardless of price
164
+ or-info top --task coding --budget 2 # Best coders under $2/M output
163
165
  or-info top --task general --limit 10
164
166
  ```
165
167
 
166
- Ranking combines LMArena ELO with price. `--task vision` and `--task coding` additionally
167
- filter for models that support the required capability (image input / tool use).
168
+ Ranking combines LMArena ELO with price and context window size.
169
+
170
+ `--task` controls which ELO category and capability filter to apply.
171
+ `--pricing` overrides the price-penalty strategy independently:
172
+
173
+ | `--pricing` | Effect |
174
+ |-------------|--------|
175
+ | `standard` (default) | Moderate penalty for expensive models |
176
+ | `cheap` | Steep penalty; strongly favours free/low-cost models |
177
+ | `premium` | No penalty; ranks by quality alone |
178
+
179
+ Task defaults (when `--pricing` is not set):
180
+
181
+ | Task | Default pricing | Capability filter |
182
+ |------|----------------|-------------------|
183
+ | `general` | standard | none |
184
+ | `coding` | standard | soft penalty (−15%) if no tool support |
185
+ | `reasoning` | standard | none |
186
+ | `vision` | standard | hard filter: image input required |
187
+ | `cheap` | cheap | none |
188
+ | `premium` | premium | none |
168
189
 
169
190
  ### Cache management
170
191
 
@@ -184,7 +205,7 @@ or-info refresh # Force-refresh OpenRouter catalog + LMArena ELO
184
205
  | `models.get` | Pricing, context, architecture, features and LMArena ELO for a model |
185
206
  | `models.list` | List models with optional filter, sort and limit |
186
207
  | `models.compare` | Side-by-side comparison of two models |
187
- | `models.top` | Ranked top models for coding/reasoning/general/vision/cheap |
208
+ | `models.top` | Ranked top models for coding/reasoning/general/vision/cheap/premium; accepts optional `pricing` override |
188
209
  | `benchmarks.get` | LMArena ELO score, global rank, vote count and confidence interval for a model |
189
210
  | `cache.refresh` | Force-refresh OpenRouter catalog + LMArena ELO |
190
211
 
package/bin/or-info.mjs CHANGED
@@ -22,7 +22,7 @@ function die(msg) {
22
22
  process.exit(1);
23
23
  }
24
24
 
25
- const TOP_TASKS = new Set(['coding', 'reasoning', 'general', 'vision', 'cheap']);
25
+ const TOP_TASKS = new Set(['coding', 'reasoning', 'general', 'vision', 'cheap', 'premium']);
26
26
 
27
27
  function parsePositiveInteger(value) {
28
28
  const n = Number.parseInt(value, 10);
@@ -173,7 +173,8 @@ program
173
173
  program
174
174
  .command('top')
175
175
  .description('Best models for a task')
176
- .option('--task <task>', 'Task: coding, reasoning, general, vision, cheap', 'general')
176
+ .option('--task <task>', 'Task: coding, reasoning, general, vision, cheap, premium', 'general')
177
+ .option('--pricing <mode>', 'Price scoring override: standard, cheap, premium', v => { const s = new Set(['standard', 'cheap', 'premium']); if (!s.has(v)) throw new InvalidArgumentError('must be standard, cheap, or premium'); return v; })
177
178
  .option('--budget <usd>', 'Max price per 1M output tokens (e.g. 1.00)', parseFloat)
178
179
  .option('--limit <n>', 'Number of results', parsePositiveInteger, 5)
179
180
  .option('--json', 'Output raw JSON')
@@ -190,6 +191,7 @@ program
190
191
 
191
192
  const ranked = rankModels(models, allElo, {
192
193
  task: opts.task,
194
+ pricing: opts.pricing,
193
195
  maxPricePerMOutput: opts.budget,
194
196
  limit: opts.limit,
195
197
  });
package/lib/scorer.mjs CHANGED
@@ -1,8 +1,8 @@
1
- import { pricePerMillion, supportsFeature } from './openrouter.mjs';
1
+ import { pricePerMillion, supportsFeature, contextLength } from './openrouter.mjs';
2
2
 
3
- // ELO range observed on LMArena (2026): ~1050 (weak) to ~1500 (best)
4
- const ELO_MIN = 1050;
5
- const ELO_MAX = 1500;
3
+ // ELO range observed on LMArena (2026): ~1000 (weak) to ~1540+ (best)
4
+ const ELO_MIN = 1000;
5
+ const ELO_MAX = 1600;
6
6
 
7
7
  function normaliseElo(elo) {
8
8
  return Math.max(0, Math.min(100, ((elo - ELO_MIN) / (ELO_MAX - ELO_MIN)) * 100));
@@ -18,40 +18,83 @@ function pricePenalty(outputPerM) {
18
18
  return Math.max(0.1, 1 - Math.log10(outputPerM + 1) * 0.15);
19
19
  }
20
20
 
21
- function requiresCapability(task) {
22
- if (task === 'vision') return 'vision';
23
- if (task === 'coding') return 'tools';
24
- return null;
21
+ // Steeper penalty for cheap task — more spread between free and expensive.
22
+ // Free 1.0, $1/M → 0.88, $5/M → 0.72, $20/M → 0.59
23
+ function cheapPenalty(outputPerM) {
24
+ if (outputPerM === null || outputPerM === 0) return 1.0;
25
+ return Math.max(0.1, 1 - Math.log10(outputPerM + 1) * 0.25);
26
+ }
27
+
28
+ // Context window bonus [0.9, 1.0]. Larger context is more useful,
29
+ // especially for coding where long files are common.
30
+ function contextBonus(ctx, task) {
31
+ if (!ctx) return 0.9;
32
+ if (ctx >= 128_000) return 1.0;
33
+ if (ctx >= 64_000) return 0.97;
34
+ if (ctx >= 32_000) return 0.93;
35
+ return 0.9;
25
36
  }
26
37
 
27
- // Score a model for a task.
38
+ // Score a model for a task with optional pricing override.
28
39
  // Returns { score, qualityScore } or null if not eligible.
29
- export function scoreForTask(model, eloEntry, task = 'general') {
30
- const cap = requiresCapability(task);
31
- if (cap && !supportsFeature(model, cap)) return null;
40
+ export function scoreForTask(model, eloEntry, task = 'general', pricing) {
41
+ const { pricingMode, capability } = parseTaskSpec(task, pricing);
42
+
43
+ // Hard filter: vision requires vision capability
44
+ if (capability === 'vision' && !supportsFeature(model, 'vision')) return null;
32
45
  if (!eloEntry?.elo) return null;
33
46
 
34
47
  const quality = normaliseElo(eloEntry.elo);
35
48
  const price = pricePerMillion(model);
36
- const penalty = task === 'cheap'
37
- ? pricePenalty(price.output) * 1.4 // aggressively favour cheap
38
- : pricePenalty(price.output);
49
+ const ctx = contextLength(model);
50
+
51
+ // Price penalty: premium ignores price, cheap uses steep curve, others standard
52
+ let penalty;
53
+ if (pricingMode === 'premium') {
54
+ penalty = 1.0;
55
+ } else if (pricingMode === 'cheap') {
56
+ penalty = cheapPenalty(price.output);
57
+ } else {
58
+ penalty = pricePenalty(price.output);
59
+ }
60
+
61
+ const ctxB = contextBonus(ctx, task);
62
+
63
+ // Soft penalty for coding without tools (still eligible, just less ideal)
64
+ const capPenalty = (task === 'coding' && !supportsFeature(model, 'tools')) ? 0.85 : 1.0;
65
+
66
+ const rawScore = quality * penalty * ctxB * capPenalty;
39
67
 
40
68
  return {
41
- score: Math.round(quality * penalty * 10) / 10,
69
+ score: Math.round(Math.min(100, rawScore) * 10) / 10,
42
70
  qualityScore: Math.round(quality * 10) / 10,
43
71
  };
44
72
  }
45
73
 
46
- const CATEGORY_FOR_TASK = { coding: 'coding', reasoning: 'math', vision: null };
74
+ // Parse task into { eloCategory, pricingMode, capability }.
75
+ // This decouples the ELO category from the price penalty strategy.
76
+ // task='coding' → coding ELO, standard pricing
77
+ // task='coding', pricing='premium' → coding ELO, no price penalty
78
+ const TASK_ELO = { coding: 'coding', reasoning: 'math', vision: null, general: null, cheap: null, premium: null };
79
+ const TASK_CAP = { vision: 'vision', coding: 'tools' };
80
+ const PRICING_MODES = new Set(['standard', 'cheap', 'premium']);
81
+
82
+ function parseTaskSpec(task, pricing) {
83
+ const eloCategory = TASK_ELO[task] ?? 'overall';
84
+ const capability = TASK_CAP[task] ?? null;
85
+ let pricingMode = pricing ?? 'standard';
86
+ // Legacy: 'cheap' and 'premium' as task names set pricing mode
87
+ if (task === 'cheap') pricingMode = 'cheap';
88
+ else if (task === 'premium') pricingMode = 'premium';
89
+ if (!PRICING_MODES.has(pricingMode)) pricingMode = 'standard';
90
+ return { eloCategory, pricingMode, capability };
91
+ }
47
92
 
48
- export function rankModels(models, allElo, { task = 'general', maxPricePerMOutput, limit = 5 } = {}) {
49
- // allElo may be a by-category map { overall: [...], coding: [...], ... }
50
- // or a plain array (legacy). Select the right category for this task.
51
- const category = CATEGORY_FOR_TASK[task] ?? 'overall';
93
+ export function rankModels(models, allElo, { task = 'general', pricing, maxPricePerMOutput, limit = 5 } = {}) {
94
+ const { eloCategory } = parseTaskSpec(task, pricing);
52
95
  const entries = Array.isArray(allElo)
53
96
  ? allElo
54
- : (allElo[category] ?? allElo.overall ?? []);
97
+ : (allElo[eloCategory] ?? allElo.overall ?? []);
55
98
 
56
99
  const scored = [];
57
100
 
@@ -60,7 +103,7 @@ export function rankModels(models, allElo, { task = 'general', maxPricePerMOutpu
60
103
  ? entries.find((e) => _matchName(e.lmarenaName, model.id))
61
104
  : null;
62
105
 
63
- const result = scoreForTask(model, eloEntry, task);
106
+ const result = scoreForTask(model, eloEntry, task, pricing);
64
107
  if (!result) continue;
65
108
 
66
109
  const price = pricePerMillion(model);
@@ -69,7 +112,25 @@ export function rankModels(models, allElo, { task = 'general', maxPricePerMOutpu
69
112
  scored.push({ model, score: result.score, qualityScore: result.qualityScore, eloEntry });
70
113
  }
71
114
 
72
- return scored.sort((a, b) => b.score - a.score).slice(0, limit);
115
+ // Dedup 1: :free variants — keep highest-scoring variant per base model
116
+ const byBase = new Map();
117
+ for (const entry of scored) {
118
+ const baseId = entry.model.id.replace(/:free$/, '');
119
+ const prev = byBase.get(baseId);
120
+ if (!prev || entry.score > prev.score) byBase.set(baseId, entry);
121
+ }
122
+
123
+ // Dedup 2: same ELO entry — multiple OR models can match one LMArena name
124
+ // (e.g. gpt-5.4-nano and gpt-5.4 both match "gpt-5.4-high").
125
+ // Keep only the best-scoring OR model per ELO entry.
126
+ const byElo = new Map();
127
+ for (const entry of byBase.values()) {
128
+ const eloKey = entry.eloEntry?.lmarenaName ?? entry.model.id;
129
+ const prev = byElo.get(eloKey);
130
+ if (!prev || entry.score > prev.score) byElo.set(eloKey, entry);
131
+ }
132
+
133
+ return [...byElo.values()].sort((a, b) => b.score - a.score).slice(0, limit);
73
134
  }
74
135
 
75
136
  // Inline minimal name matching (mirrors lmarena.mjs logic without importing it)
package/mcp/server.mjs CHANGED
@@ -147,9 +147,14 @@ const CANONICAL_TOOLS = [
147
147
  properties: {
148
148
  task: {
149
149
  type: 'string',
150
- enum: ['coding', 'reasoning', 'general', 'vision', 'cheap'],
150
+ enum: ['coding', 'reasoning', 'general', 'vision', 'cheap', 'premium'],
151
151
  description: 'Task type to optimise for',
152
152
  },
153
+ pricing: {
154
+ type: 'string',
155
+ enum: ['standard', 'cheap', 'premium'],
156
+ description: 'Price scoring override. Set to "premium" with task="coding" for best coding model regardless of price',
157
+ },
153
158
  max_price_per_m_output: {
154
159
  type: 'number',
155
160
  description: 'Maximum price per 1M output tokens in USD (e.g. 1.0)',
@@ -307,11 +312,12 @@ async function handleTool(name, args) {
307
312
 
308
313
  if (name === 'models.top') {
309
314
  const task = args.task ?? 'general';
315
+ const pricing = args.pricing ?? undefined;
310
316
  const limit = Math.min(20, Math.max(1, args.limit ?? 5));
311
317
  const maxPrice = args.max_price_per_m_output ?? undefined;
312
318
 
313
319
  const [models, allElo] = await Promise.all([fetchModels({ apiKey: key }), getAllElo()]);
314
- const ranked = rankModels(models, allElo, { task, maxPricePerMOutput: maxPrice, limit });
320
+ const ranked = rankModels(models, allElo, { task, pricing, maxPricePerMOutput: maxPrice, limit });
315
321
  return result({ task, results: ranked.map((r) => ({ ...safeModelSummary(r.model), score: r.score, lmarena_elo: r.eloEntry })) });
316
322
  }
317
323
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aggc/or-info",
3
- "version": "0.2.12",
3
+ "version": "0.2.14",
4
4
  "description": "CLI + MCP server for OpenRouter models: prices, benchmarks, context and comparisons",
5
5
  "type": "module",
6
6
  "engines": {