@aggc/or-info 0.2.13 → 0.2.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -17
- package/bin/or-info.mjs +3 -1
- package/lib/scorer.mjs +41 -14
- package/mcp/server.mjs +28 -6
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -154,26 +154,38 @@ or-info compare openai/gpt-4o deepseek/deepseek-chat-v3-0324 --json
|
|
|
154
154
|
### Top models for a task
|
|
155
155
|
|
|
156
156
|
```bash
|
|
157
|
-
or-info top --task coding
|
|
158
|
-
or-info top --task reasoning
|
|
159
|
-
or-info top --task general
|
|
160
|
-
or-info top --task vision
|
|
161
|
-
or-info top --task cheap
|
|
162
|
-
or-info top --task premium
|
|
163
|
-
or-info top --task coding --
|
|
157
|
+
or-info top --task coding # Best coding models
|
|
158
|
+
or-info top --task reasoning # Best reasoning models
|
|
159
|
+
or-info top --task general # Best all-rounders
|
|
160
|
+
or-info top --task vision # Best vision models (requires image input)
|
|
161
|
+
or-info top --task cheap # Best value for money
|
|
162
|
+
or-info top --task premium # Highest quality, ignoring price
|
|
163
|
+
or-info top --task coding --pricing premium # Best coder regardless of price
|
|
164
|
+
or-info top --task coding --budget 2 # Best coders under $2/M output
|
|
164
165
|
or-info top --task general --limit 10
|
|
165
166
|
```
|
|
166
167
|
|
|
167
|
-
Ranking combines LMArena ELO with price and context window size.
|
|
168
|
+
Ranking combines LMArena ELO with price and context window size.
|
|
168
169
|
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
| `
|
|
173
|
-
|
|
174
|
-
| `
|
|
175
|
-
| `cheap` |
|
|
176
|
-
| `premium` |
|
|
170
|
+
`--task` controls which ELO category and capability filter to apply.
|
|
171
|
+
`--pricing` overrides the price-penalty strategy independently:
|
|
172
|
+
|
|
173
|
+
| `--pricing` | Effect |
|
|
174
|
+
|-------------|--------|
|
|
175
|
+
| `standard` (default) | Moderate penalty for expensive models |
|
|
176
|
+
| `cheap` | Steep penalty; strongly favours free/low-cost models |
|
|
177
|
+
| `premium` | No penalty; ranks by quality alone |
|
|
178
|
+
|
|
179
|
+
Task defaults (when `--pricing` is not set):
|
|
180
|
+
|
|
181
|
+
| Task | Default pricing | Capability filter |
|
|
182
|
+
|------|----------------|-------------------|
|
|
183
|
+
| `general` | standard | none |
|
|
184
|
+
| `coding` | standard | soft penalty (−15%) if no tool support |
|
|
185
|
+
| `reasoning` | standard | none |
|
|
186
|
+
| `vision` | standard | hard filter: image input required |
|
|
187
|
+
| `cheap` | cheap | none |
|
|
188
|
+
| `premium` | premium | none |
|
|
177
189
|
|
|
178
190
|
### Cache management
|
|
179
191
|
|
|
@@ -193,7 +205,7 @@ or-info refresh # Force-refresh OpenRouter catalog + LMArena ELO
|
|
|
193
205
|
| `models.get` | Pricing, context, architecture, features and LMArena ELO for a model |
|
|
194
206
|
| `models.list` | List models with optional filter, sort and limit |
|
|
195
207
|
| `models.compare` | Side-by-side comparison of two models |
|
|
196
|
-
| `models.top` | Ranked top models for coding/reasoning/general/vision/cheap/premium |
|
|
208
|
+
| `models.top` | Ranked top models for coding/reasoning/general/vision/cheap/premium; accepts optional `pricing` override |
|
|
197
209
|
| `benchmarks.get` | LMArena ELO score, global rank, vote count and confidence interval for a model |
|
|
198
210
|
| `cache.refresh` | Force-refresh OpenRouter catalog + LMArena ELO |
|
|
199
211
|
|
package/bin/or-info.mjs
CHANGED
|
@@ -173,7 +173,8 @@ program
|
|
|
173
173
|
program
|
|
174
174
|
.command('top')
|
|
175
175
|
.description('Best models for a task')
|
|
176
|
-
.option('--task <task>', 'Task: coding, reasoning, general, vision, cheap', 'general')
|
|
176
|
+
.option('--task <task>', 'Task: coding, reasoning, general, vision, cheap, premium', 'general')
|
|
177
|
+
.option('--pricing <mode>', 'Price scoring override: standard, cheap, premium', v => { const s = new Set(['standard', 'cheap', 'premium']); if (!s.has(v)) throw new InvalidArgumentError('must be standard, cheap, or premium'); return v; })
|
|
177
178
|
.option('--budget <usd>', 'Max price per 1M output tokens (e.g. 1.00)', parseFloat)
|
|
178
179
|
.option('--limit <n>', 'Number of results', parsePositiveInteger, 5)
|
|
179
180
|
.option('--json', 'Output raw JSON')
|
|
@@ -190,6 +191,7 @@ program
|
|
|
190
191
|
|
|
191
192
|
const ranked = rankModels(models, allElo, {
|
|
192
193
|
task: opts.task,
|
|
194
|
+
pricing: opts.pricing,
|
|
193
195
|
maxPricePerMOutput: opts.budget,
|
|
194
196
|
limit: opts.limit,
|
|
195
197
|
});
|
package/lib/scorer.mjs
CHANGED
|
@@ -35,11 +35,13 @@ function contextBonus(ctx, task) {
|
|
|
35
35
|
return 0.9;
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
-
// Score a model for a task.
|
|
38
|
+
// Score a model for a task with optional pricing override.
|
|
39
39
|
// Returns { score, qualityScore } or null if not eligible.
|
|
40
|
-
export function scoreForTask(model, eloEntry, task = 'general') {
|
|
40
|
+
export function scoreForTask(model, eloEntry, task = 'general', pricing) {
|
|
41
|
+
const { pricingMode, capability } = parseTaskSpec(task, pricing);
|
|
42
|
+
|
|
41
43
|
// Hard filter: vision requires vision capability
|
|
42
|
-
if (
|
|
44
|
+
if (capability === 'vision' && !supportsFeature(model, 'vision')) return null;
|
|
43
45
|
if (!eloEntry?.elo) return null;
|
|
44
46
|
|
|
45
47
|
const quality = normaliseElo(eloEntry.elo);
|
|
@@ -48,9 +50,9 @@ export function scoreForTask(model, eloEntry, task = 'general') {
|
|
|
48
50
|
|
|
49
51
|
// Price penalty: premium ignores price, cheap uses steep curve, others standard
|
|
50
52
|
let penalty;
|
|
51
|
-
if (
|
|
53
|
+
if (pricingMode === 'premium') {
|
|
52
54
|
penalty = 1.0;
|
|
53
|
-
} else if (
|
|
55
|
+
} else if (pricingMode === 'cheap') {
|
|
54
56
|
penalty = cheapPenalty(price.output);
|
|
55
57
|
} else {
|
|
56
58
|
penalty = pricePenalty(price.output);
|
|
@@ -69,15 +71,30 @@ export function scoreForTask(model, eloEntry, task = 'general') {
|
|
|
69
71
|
};
|
|
70
72
|
}
|
|
71
73
|
|
|
72
|
-
|
|
74
|
+
// Parse task into { eloCategory, pricingMode, capability }.
|
|
75
|
+
// This decouples the ELO category from the price penalty strategy.
|
|
76
|
+
// task='coding' → coding ELO, standard pricing
|
|
77
|
+
// task='coding', pricing='premium' → coding ELO, no price penalty
|
|
78
|
+
const TASK_ELO = { coding: 'coding', reasoning: 'math', vision: null, general: null, cheap: null, premium: null };
|
|
79
|
+
const TASK_CAP = { vision: 'vision', coding: 'tools' };
|
|
80
|
+
const PRICING_MODES = new Set(['standard', 'cheap', 'premium']);
|
|
81
|
+
|
|
82
|
+
function parseTaskSpec(task, pricing) {
|
|
83
|
+
const eloCategory = TASK_ELO[task] ?? 'overall';
|
|
84
|
+
const capability = TASK_CAP[task] ?? null;
|
|
85
|
+
let pricingMode = pricing ?? 'standard';
|
|
86
|
+
// Legacy: 'cheap' and 'premium' as task names set pricing mode
|
|
87
|
+
if (task === 'cheap') pricingMode = 'cheap';
|
|
88
|
+
else if (task === 'premium') pricingMode = 'premium';
|
|
89
|
+
if (!PRICING_MODES.has(pricingMode)) pricingMode = 'standard';
|
|
90
|
+
return { eloCategory, pricingMode, capability };
|
|
91
|
+
}
|
|
73
92
|
|
|
74
|
-
export function rankModels(models, allElo, { task = 'general', maxPricePerMOutput, limit = 5 } = {}) {
|
|
75
|
-
|
|
76
|
-
// or a plain array (legacy). Select the right category for this task.
|
|
77
|
-
const category = CATEGORY_FOR_TASK[task] ?? 'overall';
|
|
93
|
+
export function rankModels(models, allElo, { task = 'general', pricing, maxPricePerMOutput, limit = 5 } = {}) {
|
|
94
|
+
const { eloCategory } = parseTaskSpec(task, pricing);
|
|
78
95
|
const entries = Array.isArray(allElo)
|
|
79
96
|
? allElo
|
|
80
|
-
: (allElo[
|
|
97
|
+
: (allElo[eloCategory] ?? allElo.overall ?? []);
|
|
81
98
|
|
|
82
99
|
const scored = [];
|
|
83
100
|
|
|
@@ -86,7 +103,7 @@ export function rankModels(models, allElo, { task = 'general', maxPricePerMOutpu
|
|
|
86
103
|
? entries.find((e) => _matchName(e.lmarenaName, model.id))
|
|
87
104
|
: null;
|
|
88
105
|
|
|
89
|
-
const result = scoreForTask(model, eloEntry, task);
|
|
106
|
+
const result = scoreForTask(model, eloEntry, task, pricing);
|
|
90
107
|
if (!result) continue;
|
|
91
108
|
|
|
92
109
|
const price = pricePerMillion(model);
|
|
@@ -95,7 +112,7 @@ export function rankModels(models, allElo, { task = 'general', maxPricePerMOutpu
|
|
|
95
112
|
scored.push({ model, score: result.score, qualityScore: result.qualityScore, eloEntry });
|
|
96
113
|
}
|
|
97
114
|
|
|
98
|
-
// Dedup :free variants — keep highest-scoring variant per base model
|
|
115
|
+
// Dedup 1: :free variants — keep highest-scoring variant per base model
|
|
99
116
|
const byBase = new Map();
|
|
100
117
|
for (const entry of scored) {
|
|
101
118
|
const baseId = entry.model.id.replace(/:free$/, '');
|
|
@@ -103,7 +120,17 @@ export function rankModels(models, allElo, { task = 'general', maxPricePerMOutpu
|
|
|
103
120
|
if (!prev || entry.score > prev.score) byBase.set(baseId, entry);
|
|
104
121
|
}
|
|
105
122
|
|
|
106
|
-
|
|
123
|
+
// Dedup 2: same ELO entry — multiple OR models can match one LMArena name
|
|
124
|
+
// (e.g. gpt-5.4-nano and gpt-5.4 both match "gpt-5.4-high").
|
|
125
|
+
// Keep only the best-scoring OR model per ELO entry.
|
|
126
|
+
const byElo = new Map();
|
|
127
|
+
for (const entry of byBase.values()) {
|
|
128
|
+
const eloKey = entry.eloEntry?.lmarenaName ?? entry.model.id;
|
|
129
|
+
const prev = byElo.get(eloKey);
|
|
130
|
+
if (!prev || entry.score > prev.score) byElo.set(eloKey, entry);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return [...byElo.values()].sort((a, b) => b.score - a.score).slice(0, limit);
|
|
107
134
|
}
|
|
108
135
|
|
|
109
136
|
// Inline minimal name matching (mirrors lmarena.mjs logic without importing it)
|
package/mcp/server.mjs
CHANGED
|
@@ -150,6 +150,11 @@ const CANONICAL_TOOLS = [
|
|
|
150
150
|
enum: ['coding', 'reasoning', 'general', 'vision', 'cheap', 'premium'],
|
|
151
151
|
description: 'Task type to optimise for',
|
|
152
152
|
},
|
|
153
|
+
pricing: {
|
|
154
|
+
type: 'string',
|
|
155
|
+
enum: ['standard', 'cheap', 'premium'],
|
|
156
|
+
description: 'Price scoring override. Set to "premium" with task="coding" for best coding model regardless of price',
|
|
157
|
+
},
|
|
153
158
|
max_price_per_m_output: {
|
|
154
159
|
type: 'number',
|
|
155
160
|
description: 'Maximum price per 1M output tokens in USD (e.g. 1.0)',
|
|
@@ -249,6 +254,22 @@ function errorContent(msg) {
|
|
|
249
254
|
return { content: [{ type: 'text', text: `Error: ${msg}` }], isError: true };
|
|
250
255
|
}
|
|
251
256
|
|
|
257
|
+
async function safeGetElo(modelId, opts) {
|
|
258
|
+
try {
|
|
259
|
+
return await getElo(modelId, opts);
|
|
260
|
+
} catch {
|
|
261
|
+
return null;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
async function safeGetAllElo(opts) {
|
|
266
|
+
try {
|
|
267
|
+
return await getAllElo(opts);
|
|
268
|
+
} catch {
|
|
269
|
+
return {};
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
252
273
|
async function handleTool(name, args) {
|
|
253
274
|
// Accept legacy flat names (get_model_info, list_models, ...) by mapping
|
|
254
275
|
// them to the dot-notation canonical names exposed in tools/list.
|
|
@@ -261,7 +282,7 @@ async function handleTool(name, args) {
|
|
|
261
282
|
const models = await fetchModels({ apiKey: key });
|
|
262
283
|
const model = findModel(models, model_id);
|
|
263
284
|
if (!model) return errorContent(`Model not found: ${model_id}`);
|
|
264
|
-
const elo = await
|
|
285
|
+
const elo = await safeGetElo(model_id);
|
|
265
286
|
return result({ ...safeModelSummary(model), lmarena_elo: elo ?? null });
|
|
266
287
|
}
|
|
267
288
|
|
|
@@ -286,7 +307,7 @@ async function handleTool(name, args) {
|
|
|
286
307
|
if (name === 'benchmarks.get') {
|
|
287
308
|
const { model_id } = args;
|
|
288
309
|
if (!model_id || typeof model_id !== 'string') return errorContent('model_id is required');
|
|
289
|
-
const elo = await
|
|
310
|
+
const elo = await safeGetElo(model_id);
|
|
290
311
|
return result({ model_id, lmarena_elo: elo ?? null });
|
|
291
312
|
}
|
|
292
313
|
|
|
@@ -295,8 +316,8 @@ async function handleTool(name, args) {
|
|
|
295
316
|
if (!model_a || !model_b) return errorContent('model_a and model_b are required');
|
|
296
317
|
const [models, eloA, eloB] = await Promise.all([
|
|
297
318
|
fetchModels({ apiKey: key }),
|
|
298
|
-
|
|
299
|
-
|
|
319
|
+
safeGetElo(model_a),
|
|
320
|
+
safeGetElo(model_b),
|
|
300
321
|
]);
|
|
301
322
|
const mA = findModel(models, model_a);
|
|
302
323
|
const mB = findModel(models, model_b);
|
|
@@ -307,11 +328,12 @@ async function handleTool(name, args) {
|
|
|
307
328
|
|
|
308
329
|
if (name === 'models.top') {
|
|
309
330
|
const task = args.task ?? 'general';
|
|
331
|
+
const pricing = args.pricing ?? undefined;
|
|
310
332
|
const limit = Math.min(20, Math.max(1, args.limit ?? 5));
|
|
311
333
|
const maxPrice = args.max_price_per_m_output ?? undefined;
|
|
312
334
|
|
|
313
|
-
const [models, allElo] = await Promise.all([fetchModels({ apiKey: key }),
|
|
314
|
-
const ranked = rankModels(models, allElo, { task, maxPricePerMOutput: maxPrice, limit });
|
|
335
|
+
const [models, allElo] = await Promise.all([fetchModels({ apiKey: key }), safeGetAllElo()]);
|
|
336
|
+
const ranked = rankModels(models, allElo, { task, pricing, maxPricePerMOutput: maxPrice, limit });
|
|
315
337
|
return result({ task, results: ranked.map((r) => ({ ...safeModelSummary(r.model), score: r.score, lmarena_elo: r.eloEntry })) });
|
|
316
338
|
}
|
|
317
339
|
|