@aggc/or-info 0.2.12 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -157,14 +157,23 @@ or-info compare openai/gpt-4o deepseek/deepseek-chat-v3-0324 --json
157
157
  or-info top --task coding # Best coding models
158
158
  or-info top --task reasoning # Best reasoning models
159
159
  or-info top --task general # Best all-rounders
160
- or-info top --task vision # Best vision models
160
+ or-info top --task vision # Best vision models (requires image input)
161
161
  or-info top --task cheap # Best value for money
162
+ or-info top --task premium # Highest quality, ignoring price
162
163
  or-info top --task coding --budget 2 # Best coders under $2/M output
163
164
  or-info top --task general --limit 10
164
165
  ```
165
166
 
166
- Ranking combines LMArena ELO with price. `--task vision` and `--task coding` additionally
167
- filter for models that support the required capability (image input / tool use).
167
+ Ranking combines LMArena ELO with price and context window size. Task behaviour:
168
+
169
+ | Task | Price weight | Capability filter |
170
+ |------|-------------|-------------------|
171
+ | `general` | standard penalty | none |
172
+ | `coding` | standard penalty | soft penalty (−15%) if no tool support |
173
+ | `reasoning` | standard penalty | none |
174
+ | `vision` | standard penalty | hard filter: image input required |
175
+ | `cheap` | steep penalty | none |
176
+ | `premium` | ignored | none |
168
177
 
169
178
  ### Cache management
170
179
 
@@ -184,7 +193,7 @@ or-info refresh # Force-refresh OpenRouter catalog + LMArena ELO
184
193
  | `models.get` | Pricing, context, architecture, features and LMArena ELO for a model |
185
194
  | `models.list` | List models with optional filter, sort and limit |
186
195
  | `models.compare` | Side-by-side comparison of two models |
187
- | `models.top` | Ranked top models for coding/reasoning/general/vision/cheap |
196
+ | `models.top` | Ranked top models for coding/reasoning/general/vision/cheap/premium |
188
197
  | `benchmarks.get` | LMArena ELO score, global rank, vote count and confidence interval for a model |
189
198
  | `cache.refresh` | Force-refresh OpenRouter catalog + LMArena ELO |
190
199
 
package/bin/or-info.mjs CHANGED
@@ -22,7 +22,7 @@ function die(msg) {
22
22
  process.exit(1);
23
23
  }
24
24
 
25
- const TOP_TASKS = new Set(['coding', 'reasoning', 'general', 'vision', 'cheap']);
25
+ const TOP_TASKS = new Set(['coding', 'reasoning', 'general', 'vision', 'cheap', 'premium']);
26
26
 
27
27
  function parsePositiveInteger(value) {
28
28
  const n = Number.parseInt(value, 10);
package/lib/scorer.mjs CHANGED
@@ -1,8 +1,8 @@
1
- import { pricePerMillion, supportsFeature } from './openrouter.mjs';
1
+ import { pricePerMillion, supportsFeature, contextLength } from './openrouter.mjs';
2
2
 
3
- // ELO range observed on LMArena (2026): ~1050 (weak) to ~1500 (best)
4
- const ELO_MIN = 1050;
5
- const ELO_MAX = 1500;
3
+ // ELO range observed on LMArena (2026): ~1000 (weak) to ~1540+ (best)
4
+ const ELO_MIN = 1000;
5
+ const ELO_MAX = 1600;
6
6
 
7
7
  function normaliseElo(elo) {
8
8
  return Math.max(0, Math.min(100, ((elo - ELO_MIN) / (ELO_MAX - ELO_MIN)) * 100));
@@ -18,32 +18,58 @@ function pricePenalty(outputPerM) {
18
18
  return Math.max(0.1, 1 - Math.log10(outputPerM + 1) * 0.15);
19
19
  }
20
20
 
21
- function requiresCapability(task) {
22
- if (task === 'vision') return 'vision';
23
- if (task === 'coding') return 'tools';
24
- return null;
21
+ // Steeper penalty for cheap task — more spread between free and expensive.
22
+ // Free 1.0, $1/M → 0.88, $5/M → 0.72, $20/M → 0.59
23
+ function cheapPenalty(outputPerM) {
24
+ if (outputPerM === null || outputPerM === 0) return 1.0;
25
+ return Math.max(0.1, 1 - Math.log10(outputPerM + 1) * 0.25);
26
+ }
27
+
28
+ // Context window bonus [0.9, 1.0]. Larger context is more useful,
29
+ // especially for coding where long files are common.
30
+ function contextBonus(ctx, task) {
31
+ if (!ctx) return 0.9;
32
+ if (ctx >= 128_000) return 1.0;
33
+ if (ctx >= 64_000) return 0.97;
34
+ if (ctx >= 32_000) return 0.93;
35
+ return 0.9;
25
36
  }
26
37
 
27
38
  // Score a model for a task.
28
39
  // Returns { score, qualityScore } or null if not eligible.
29
40
  export function scoreForTask(model, eloEntry, task = 'general') {
30
- const cap = requiresCapability(task);
31
- if (cap && !supportsFeature(model, cap)) return null;
41
+ // Hard filter: vision requires vision capability
42
+ if (task === 'vision' && !supportsFeature(model, 'vision')) return null;
32
43
  if (!eloEntry?.elo) return null;
33
44
 
34
45
  const quality = normaliseElo(eloEntry.elo);
35
46
  const price = pricePerMillion(model);
36
- const penalty = task === 'cheap'
37
- ? pricePenalty(price.output) * 1.4 // aggressively favour cheap
38
- : pricePenalty(price.output);
47
+ const ctx = contextLength(model);
48
+
49
+ // Price penalty: premium ignores price, cheap uses steep curve, others standard
50
+ let penalty;
51
+ if (task === 'premium') {
52
+ penalty = 1.0;
53
+ } else if (task === 'cheap') {
54
+ penalty = cheapPenalty(price.output);
55
+ } else {
56
+ penalty = pricePenalty(price.output);
57
+ }
58
+
59
+ const ctxB = contextBonus(ctx, task);
60
+
61
+ // Soft penalty for coding without tools (still eligible, just less ideal)
62
+ const capPenalty = (task === 'coding' && !supportsFeature(model, 'tools')) ? 0.85 : 1.0;
63
+
64
+ const rawScore = quality * penalty * ctxB * capPenalty;
39
65
 
40
66
  return {
41
- score: Math.round(quality * penalty * 10) / 10,
67
+ score: Math.round(Math.min(100, rawScore) * 10) / 10,
42
68
  qualityScore: Math.round(quality * 10) / 10,
43
69
  };
44
70
  }
45
71
 
46
- const CATEGORY_FOR_TASK = { coding: 'coding', reasoning: 'math', vision: null };
72
+ const CATEGORY_FOR_TASK = { coding: 'coding', reasoning: 'math', vision: null, premium: null };
47
73
 
48
74
  export function rankModels(models, allElo, { task = 'general', maxPricePerMOutput, limit = 5 } = {}) {
49
75
  // allElo may be a by-category map { overall: [...], coding: [...], ... }
@@ -69,7 +95,15 @@ export function rankModels(models, allElo, { task = 'general', maxPricePerMOutpu
69
95
  scored.push({ model, score: result.score, qualityScore: result.qualityScore, eloEntry });
70
96
  }
71
97
 
72
- return scored.sort((a, b) => b.score - a.score).slice(0, limit);
98
+ // Dedup :free variants keep highest-scoring variant per base model
99
+ const byBase = new Map();
100
+ for (const entry of scored) {
101
+ const baseId = entry.model.id.replace(/:free$/, '');
102
+ const prev = byBase.get(baseId);
103
+ if (!prev || entry.score > prev.score) byBase.set(baseId, entry);
104
+ }
105
+
106
+ return [...byBase.values()].sort((a, b) => b.score - a.score).slice(0, limit);
73
107
  }
74
108
 
75
109
  // Inline minimal name matching (mirrors lmarena.mjs logic without importing it)
package/mcp/server.mjs CHANGED
@@ -147,7 +147,7 @@ const CANONICAL_TOOLS = [
147
147
  properties: {
148
148
  task: {
149
149
  type: 'string',
150
- enum: ['coding', 'reasoning', 'general', 'vision', 'cheap'],
150
+ enum: ['coding', 'reasoning', 'general', 'vision', 'cheap', 'premium'],
151
151
  description: 'Task type to optimise for',
152
152
  },
153
153
  max_price_per_m_output: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aggc/or-info",
3
- "version": "0.2.12",
3
+ "version": "0.2.13",
4
4
  "description": "CLI + MCP server for OpenRouter models: prices, benchmarks, context and comparisons",
5
5
  "type": "module",
6
6
  "engines": {