@aggc/or-info 0.2.11 → 0.2.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -4
- package/bin/or-info.mjs +1 -1
- package/lib/openrouter.mjs +5 -1
- package/lib/scorer.mjs +50 -16
- package/mcp/server.mjs +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -157,14 +157,23 @@ or-info compare openai/gpt-4o deepseek/deepseek-chat-v3-0324 --json
|
|
|
157
157
|
or-info top --task coding # Best coding models
|
|
158
158
|
or-info top --task reasoning # Best reasoning models
|
|
159
159
|
or-info top --task general # Best all-rounders
|
|
160
|
-
or-info top --task vision # Best vision models
|
|
160
|
+
or-info top --task vision # Best vision models (requires image input)
|
|
161
161
|
or-info top --task cheap # Best value for money
|
|
162
|
+
or-info top --task premium # Highest quality, ignoring price
|
|
162
163
|
or-info top --task coding --budget 2 # Best coders under $2/M output
|
|
163
164
|
or-info top --task general --limit 10
|
|
164
165
|
```
|
|
165
166
|
|
|
166
|
-
Ranking combines LMArena ELO with price
|
|
167
|
-
|
|
167
|
+
Ranking combines LMArena ELO with price and context window size. Task behaviour:
|
|
168
|
+
|
|
169
|
+
| Task | Price weight | Capability filter |
|
|
170
|
+
|------|-------------|-------------------|
|
|
171
|
+
| `general` | standard penalty | none |
|
|
172
|
+
| `coding` | standard penalty | soft penalty (−15%) if no tool support |
|
|
173
|
+
| `reasoning` | standard penalty | none |
|
|
174
|
+
| `vision` | standard penalty | hard filter: image input required |
|
|
175
|
+
| `cheap` | steep penalty | none |
|
|
176
|
+
| `premium` | ignored | none |
|
|
168
177
|
|
|
169
178
|
### Cache management
|
|
170
179
|
|
|
@@ -184,7 +193,7 @@ or-info refresh # Force-refresh OpenRouter catalog + LMArena ELO
|
|
|
184
193
|
| `models.get` | Pricing, context, architecture, features and LMArena ELO for a model |
|
|
185
194
|
| `models.list` | List models with optional filter, sort and limit |
|
|
186
195
|
| `models.compare` | Side-by-side comparison of two models |
|
|
187
|
-
| `models.top` | Ranked top models for coding/reasoning/general/vision/cheap |
|
|
196
|
+
| `models.top` | Ranked top models for coding/reasoning/general/vision/cheap/premium |
|
|
188
197
|
| `benchmarks.get` | LMArena ELO score, global rank, vote count and confidence interval for a model |
|
|
189
198
|
| `cache.refresh` | Force-refresh OpenRouter catalog + LMArena ELO |
|
|
190
199
|
|
package/bin/or-info.mjs
CHANGED
|
@@ -22,7 +22,7 @@ function die(msg) {
|
|
|
22
22
|
process.exit(1);
|
|
23
23
|
}
|
|
24
24
|
|
|
25
|
-
const TOP_TASKS = new Set(['coding', 'reasoning', 'general', 'vision', 'cheap']);
|
|
25
|
+
const TOP_TASKS = new Set(['coding', 'reasoning', 'general', 'vision', 'cheap', 'premium']);
|
|
26
26
|
|
|
27
27
|
function parsePositiveInteger(value) {
|
|
28
28
|
const n = Number.parseInt(value, 10);
|
package/lib/openrouter.mjs
CHANGED
|
@@ -66,7 +66,11 @@ export async function fetchModels({ apiKey, force = false } = {}) {
|
|
|
66
66
|
}
|
|
67
67
|
|
|
68
68
|
export function findModel(models, id) {
|
|
69
|
-
|
|
69
|
+
const exact = models.find((m) => m?.id === id);
|
|
70
|
+
if (exact) return exact;
|
|
71
|
+
const norm = (s) => s.replace(/\./g, '-');
|
|
72
|
+
const needle = norm(id);
|
|
73
|
+
return models.find((m) => m?.id && norm(m.id) === needle) ?? null;
|
|
70
74
|
}
|
|
71
75
|
|
|
72
76
|
export function pricePerMillion(model) {
|
package/lib/scorer.mjs
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { pricePerMillion, supportsFeature } from './openrouter.mjs';
|
|
1
|
+
import { pricePerMillion, supportsFeature, contextLength } from './openrouter.mjs';
|
|
2
2
|
|
|
3
|
-
// ELO range observed on LMArena (2026): ~
|
|
4
|
-
const ELO_MIN =
|
|
5
|
-
const ELO_MAX =
|
|
3
|
+
// ELO range observed on LMArena (2026): ~1000 (weak) to ~1540+ (best)
|
|
4
|
+
const ELO_MIN = 1000;
|
|
5
|
+
const ELO_MAX = 1600;
|
|
6
6
|
|
|
7
7
|
function normaliseElo(elo) {
|
|
8
8
|
return Math.max(0, Math.min(100, ((elo - ELO_MIN) / (ELO_MAX - ELO_MIN)) * 100));
|
|
@@ -18,32 +18,58 @@ function pricePenalty(outputPerM) {
|
|
|
18
18
|
return Math.max(0.1, 1 - Math.log10(outputPerM + 1) * 0.15);
|
|
19
19
|
}
|
|
20
20
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
return
|
|
21
|
+
// Steeper penalty for cheap task — more spread between free and expensive.
|
|
22
|
+
// Free → 1.0, $1/M → 0.88, $5/M → 0.72, $20/M → 0.59
|
|
23
|
+
function cheapPenalty(outputPerM) {
|
|
24
|
+
if (outputPerM === null || outputPerM === 0) return 1.0;
|
|
25
|
+
return Math.max(0.1, 1 - Math.log10(outputPerM + 1) * 0.25);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Context window bonus [0.9, 1.0]. Larger context is more useful,
|
|
29
|
+
// especially for coding where long files are common.
|
|
30
|
+
function contextBonus(ctx, task) {
|
|
31
|
+
if (!ctx) return 0.9;
|
|
32
|
+
if (ctx >= 128_000) return 1.0;
|
|
33
|
+
if (ctx >= 64_000) return 0.97;
|
|
34
|
+
if (ctx >= 32_000) return 0.93;
|
|
35
|
+
return 0.9;
|
|
25
36
|
}
|
|
26
37
|
|
|
27
38
|
// Score a model for a task.
|
|
28
39
|
// Returns { score, qualityScore } or null if not eligible.
|
|
29
40
|
export function scoreForTask(model, eloEntry, task = 'general') {
|
|
30
|
-
|
|
31
|
-
if (
|
|
41
|
+
// Hard filter: vision requires vision capability
|
|
42
|
+
if (task === 'vision' && !supportsFeature(model, 'vision')) return null;
|
|
32
43
|
if (!eloEntry?.elo) return null;
|
|
33
44
|
|
|
34
45
|
const quality = normaliseElo(eloEntry.elo);
|
|
35
46
|
const price = pricePerMillion(model);
|
|
36
|
-
const
|
|
37
|
-
|
|
38
|
-
|
|
47
|
+
const ctx = contextLength(model);
|
|
48
|
+
|
|
49
|
+
// Price penalty: premium ignores price, cheap uses steep curve, others standard
|
|
50
|
+
let penalty;
|
|
51
|
+
if (task === 'premium') {
|
|
52
|
+
penalty = 1.0;
|
|
53
|
+
} else if (task === 'cheap') {
|
|
54
|
+
penalty = cheapPenalty(price.output);
|
|
55
|
+
} else {
|
|
56
|
+
penalty = pricePenalty(price.output);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const ctxB = contextBonus(ctx, task);
|
|
60
|
+
|
|
61
|
+
// Soft penalty for coding without tools (still eligible, just less ideal)
|
|
62
|
+
const capPenalty = (task === 'coding' && !supportsFeature(model, 'tools')) ? 0.85 : 1.0;
|
|
63
|
+
|
|
64
|
+
const rawScore = quality * penalty * ctxB * capPenalty;
|
|
39
65
|
|
|
40
66
|
return {
|
|
41
|
-
score: Math.round(
|
|
67
|
+
score: Math.round(Math.min(100, rawScore) * 10) / 10,
|
|
42
68
|
qualityScore: Math.round(quality * 10) / 10,
|
|
43
69
|
};
|
|
44
70
|
}
|
|
45
71
|
|
|
46
|
-
const CATEGORY_FOR_TASK = { coding: 'coding', reasoning: 'math', vision: null };
|
|
72
|
+
const CATEGORY_FOR_TASK = { coding: 'coding', reasoning: 'math', vision: null, premium: null };
|
|
47
73
|
|
|
48
74
|
export function rankModels(models, allElo, { task = 'general', maxPricePerMOutput, limit = 5 } = {}) {
|
|
49
75
|
// allElo may be a by-category map { overall: [...], coding: [...], ... }
|
|
@@ -69,7 +95,15 @@ export function rankModels(models, allElo, { task = 'general', maxPricePerMOutpu
|
|
|
69
95
|
scored.push({ model, score: result.score, qualityScore: result.qualityScore, eloEntry });
|
|
70
96
|
}
|
|
71
97
|
|
|
72
|
-
|
|
98
|
+
// Dedup :free variants — keep highest-scoring variant per base model
|
|
99
|
+
const byBase = new Map();
|
|
100
|
+
for (const entry of scored) {
|
|
101
|
+
const baseId = entry.model.id.replace(/:free$/, '');
|
|
102
|
+
const prev = byBase.get(baseId);
|
|
103
|
+
if (!prev || entry.score > prev.score) byBase.set(baseId, entry);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return [...byBase.values()].sort((a, b) => b.score - a.score).slice(0, limit);
|
|
73
107
|
}
|
|
74
108
|
|
|
75
109
|
// Inline minimal name matching (mirrors lmarena.mjs logic without importing it)
|
package/mcp/server.mjs
CHANGED
|
@@ -147,7 +147,7 @@ const CANONICAL_TOOLS = [
|
|
|
147
147
|
properties: {
|
|
148
148
|
task: {
|
|
149
149
|
type: 'string',
|
|
150
|
-
enum: ['coding', 'reasoning', 'general', 'vision', 'cheap'],
|
|
150
|
+
enum: ['coding', 'reasoning', 'general', 'vision', 'cheap', 'premium'],
|
|
151
151
|
description: 'Task type to optimise for',
|
|
152
152
|
},
|
|
153
153
|
max_price_per_m_output: {
|