@aggc/or-info 0.2.13 → 0.2.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -154,26 +154,38 @@ or-info compare openai/gpt-4o deepseek/deepseek-chat-v3-0324 --json
154
154
  ### Top models for a task
155
155
 
156
156
  ```bash
157
- or-info top --task coding # Best coding models
158
- or-info top --task reasoning # Best reasoning models
159
- or-info top --task general # Best all-rounders
160
- or-info top --task vision # Best vision models (requires image input)
161
- or-info top --task cheap # Best value for money
162
- or-info top --task premium # Highest quality, ignoring price
163
- or-info top --task coding --budget 2 # Best coders under $2/M output
157
+ or-info top --task coding # Best coding models
158
+ or-info top --task reasoning # Best reasoning models
159
+ or-info top --task general # Best all-rounders
160
+ or-info top --task vision # Best vision models (requires image input)
161
+ or-info top --task cheap # Best value for money
162
+ or-info top --task premium # Highest quality, ignoring price
163
+ or-info top --task coding --pricing premium # Best coder regardless of price
164
+ or-info top --task coding --budget 2 # Best coders under $2/M output
164
165
  or-info top --task general --limit 10
165
166
  ```
166
167
 
167
- Ranking combines LMArena ELO with price and context window size. Task behaviour:
168
+ Ranking combines LMArena ELO with price and context window size.
168
169
 
169
- | Task | Price weight | Capability filter |
170
- |------|-------------|-------------------|
171
- | `general` | standard penalty | none |
172
- | `coding` | standard penalty | soft penalty (−15%) if no tool support |
173
- | `reasoning` | standard penalty | none |
174
- | `vision` | standard penalty | hard filter: image input required |
175
- | `cheap` | steep penalty | none |
176
- | `premium` | ignored | none |
170
+ `--task` controls which ELO category and capability filter to apply.
171
+ `--pricing` overrides the price-penalty strategy independently:
172
+
173
+ | `--pricing` | Effect |
174
+ |-------------|--------|
175
+ | `standard` (default) | Moderate penalty for expensive models |
176
+ | `cheap` | Steep penalty; strongly favours free/low-cost models |
177
+ | `premium` | No penalty; ranks by quality alone |
178
+
179
+ Task defaults (when `--pricing` is not set):
180
+
181
+ | Task | Default pricing | Capability filter |
182
+ |------|----------------|-------------------|
183
+ | `general` | standard | none |
184
+ | `coding` | standard | soft penalty (−15%) if no tool support |
185
+ | `reasoning` | standard | none |
186
+ | `vision` | standard | hard filter: image input required |
187
+ | `cheap` | cheap | none |
188
+ | `premium` | premium | none |
177
189
 
178
190
  ### Cache management
179
191
 
@@ -193,7 +205,7 @@ or-info refresh # Force-refresh OpenRouter catalog + LMArena ELO
193
205
  | `models.get` | Pricing, context, architecture, features and LMArena ELO for a model |
194
206
  | `models.list` | List models with optional filter, sort and limit |
195
207
  | `models.compare` | Side-by-side comparison of two models |
196
- | `models.top` | Ranked top models for coding/reasoning/general/vision/cheap/premium |
208
+ | `models.top` | Ranked top models for coding/reasoning/general/vision/cheap/premium; accepts optional `pricing` override |
197
209
  | `benchmarks.get` | LMArena ELO score, global rank, vote count and confidence interval for a model |
198
210
  | `cache.refresh` | Force-refresh OpenRouter catalog + LMArena ELO |
199
211
 
package/bin/or-info.mjs CHANGED
@@ -173,7 +173,8 @@ program
173
173
  program
174
174
  .command('top')
175
175
  .description('Best models for a task')
176
- .option('--task <task>', 'Task: coding, reasoning, general, vision, cheap', 'general')
176
+ .option('--task <task>', 'Task: coding, reasoning, general, vision, cheap, premium', 'general')
177
+ .option('--pricing <mode>', 'Price scoring override: standard, cheap, premium', v => { const s = new Set(['standard', 'cheap', 'premium']); if (!s.has(v)) throw new InvalidArgumentError('must be standard, cheap, or premium'); return v; })
177
178
  .option('--budget <usd>', 'Max price per 1M output tokens (e.g. 1.00)', parseFloat)
178
179
  .option('--limit <n>', 'Number of results', parsePositiveInteger, 5)
179
180
  .option('--json', 'Output raw JSON')
@@ -190,6 +191,7 @@ program
190
191
 
191
192
  const ranked = rankModels(models, allElo, {
192
193
  task: opts.task,
194
+ pricing: opts.pricing,
193
195
  maxPricePerMOutput: opts.budget,
194
196
  limit: opts.limit,
195
197
  });
package/lib/scorer.mjs CHANGED
@@ -35,11 +35,13 @@ function contextBonus(ctx, task) {
35
35
  return 0.9;
36
36
  }
37
37
 
38
- // Score a model for a task.
38
+ // Score a model for a task with optional pricing override.
39
39
  // Returns { score, qualityScore } or null if not eligible.
40
- export function scoreForTask(model, eloEntry, task = 'general') {
40
+ export function scoreForTask(model, eloEntry, task = 'general', pricing) {
41
+ const { pricingMode, capability } = parseTaskSpec(task, pricing);
42
+
41
43
  // Hard filter: vision requires vision capability
42
- if (task === 'vision' && !supportsFeature(model, 'vision')) return null;
44
+ if (capability === 'vision' && !supportsFeature(model, 'vision')) return null;
43
45
  if (!eloEntry?.elo) return null;
44
46
 
45
47
  const quality = normaliseElo(eloEntry.elo);
@@ -48,9 +50,9 @@ export function scoreForTask(model, eloEntry, task = 'general') {
48
50
 
49
51
  // Price penalty: premium ignores price, cheap uses steep curve, others standard
50
52
  let penalty;
51
- if (task === 'premium') {
53
+ if (pricingMode === 'premium') {
52
54
  penalty = 1.0;
53
- } else if (task === 'cheap') {
55
+ } else if (pricingMode === 'cheap') {
54
56
  penalty = cheapPenalty(price.output);
55
57
  } else {
56
58
  penalty = pricePenalty(price.output);
@@ -69,15 +71,30 @@ export function scoreForTask(model, eloEntry, task = 'general') {
69
71
  };
70
72
  }
71
73
 
72
- const CATEGORY_FOR_TASK = { coding: 'coding', reasoning: 'math', vision: null, premium: null };
74
+ // Parse task into { eloCategory, pricingMode, capability }.
75
+ // This decouples the ELO category from the price penalty strategy.
76
+ // task='coding' → coding ELO, standard pricing
77
+ // task='coding', pricing='premium' → coding ELO, no price penalty
78
+ const TASK_ELO = { coding: 'coding', reasoning: 'math', vision: null, general: null, cheap: null, premium: null };
79
+ const TASK_CAP = { vision: 'vision', coding: 'tools' };
80
+ const PRICING_MODES = new Set(['standard', 'cheap', 'premium']);
81
+
82
+ function parseTaskSpec(task, pricing) {
83
+ const eloCategory = TASK_ELO[task] ?? 'overall';
84
+ const capability = TASK_CAP[task] ?? null;
85
+ let pricingMode = pricing ?? 'standard';
86
+ // Legacy: 'cheap' and 'premium' as task names set pricing mode
87
+ if (task === 'cheap') pricingMode = 'cheap';
88
+ else if (task === 'premium') pricingMode = 'premium';
89
+ if (!PRICING_MODES.has(pricingMode)) pricingMode = 'standard';
90
+ return { eloCategory, pricingMode, capability };
91
+ }
73
92
 
74
- export function rankModels(models, allElo, { task = 'general', maxPricePerMOutput, limit = 5 } = {}) {
75
- // allElo may be a by-category map { overall: [...], coding: [...], ... }
76
- // or a plain array (legacy). Select the right category for this task.
77
- const category = CATEGORY_FOR_TASK[task] ?? 'overall';
93
+ export function rankModels(models, allElo, { task = 'general', pricing, maxPricePerMOutput, limit = 5 } = {}) {
94
+ const { eloCategory } = parseTaskSpec(task, pricing);
78
95
  const entries = Array.isArray(allElo)
79
96
  ? allElo
80
- : (allElo[category] ?? allElo.overall ?? []);
97
+ : (allElo[eloCategory] ?? allElo.overall ?? []);
81
98
 
82
99
  const scored = [];
83
100
 
@@ -86,7 +103,7 @@ export function rankModels(models, allElo, { task = 'general', maxPricePerMOutpu
86
103
  ? entries.find((e) => _matchName(e.lmarenaName, model.id))
87
104
  : null;
88
105
 
89
- const result = scoreForTask(model, eloEntry, task);
106
+ const result = scoreForTask(model, eloEntry, task, pricing);
90
107
  if (!result) continue;
91
108
 
92
109
  const price = pricePerMillion(model);
@@ -95,7 +112,7 @@ export function rankModels(models, allElo, { task = 'general', maxPricePerMOutpu
95
112
  scored.push({ model, score: result.score, qualityScore: result.qualityScore, eloEntry });
96
113
  }
97
114
 
98
- // Dedup :free variants — keep highest-scoring variant per base model
115
+ // Dedup 1: :free variants — keep highest-scoring variant per base model
99
116
  const byBase = new Map();
100
117
  for (const entry of scored) {
101
118
  const baseId = entry.model.id.replace(/:free$/, '');
@@ -103,7 +120,17 @@ export function rankModels(models, allElo, { task = 'general', maxPricePerMOutpu
103
120
  if (!prev || entry.score > prev.score) byBase.set(baseId, entry);
104
121
  }
105
122
 
106
- return [...byBase.values()].sort((a, b) => b.score - a.score).slice(0, limit);
123
+ // Dedup 2: same ELO entry multiple OR models can match one LMArena name
124
+ // (e.g. gpt-5.4-nano and gpt-5.4 both match "gpt-5.4-high").
125
+ // Keep only the best-scoring OR model per ELO entry.
126
+ const byElo = new Map();
127
+ for (const entry of byBase.values()) {
128
+ const eloKey = entry.eloEntry?.lmarenaName ?? entry.model.id;
129
+ const prev = byElo.get(eloKey);
130
+ if (!prev || entry.score > prev.score) byElo.set(eloKey, entry);
131
+ }
132
+
133
+ return [...byElo.values()].sort((a, b) => b.score - a.score).slice(0, limit);
107
134
  }
108
135
 
109
136
  // Inline minimal name matching (mirrors lmarena.mjs logic without importing it)
package/mcp/server.mjs CHANGED
@@ -150,6 +150,11 @@ const CANONICAL_TOOLS = [
150
150
  enum: ['coding', 'reasoning', 'general', 'vision', 'cheap', 'premium'],
151
151
  description: 'Task type to optimise for',
152
152
  },
153
+ pricing: {
154
+ type: 'string',
155
+ enum: ['standard', 'cheap', 'premium'],
156
+ description: 'Price scoring override. Set to "premium" with task="coding" for best coding model regardless of price',
157
+ },
153
158
  max_price_per_m_output: {
154
159
  type: 'number',
155
160
  description: 'Maximum price per 1M output tokens in USD (e.g. 1.0)',
@@ -249,6 +254,22 @@ function errorContent(msg) {
249
254
  return { content: [{ type: 'text', text: `Error: ${msg}` }], isError: true };
250
255
  }
251
256
 
257
+ async function safeGetElo(modelId, opts) {
258
+ try {
259
+ return await getElo(modelId, opts);
260
+ } catch {
261
+ return null;
262
+ }
263
+ }
264
+
265
+ async function safeGetAllElo(opts) {
266
+ try {
267
+ return await getAllElo(opts);
268
+ } catch {
269
+ return {};
270
+ }
271
+ }
272
+
252
273
  async function handleTool(name, args) {
253
274
  // Accept legacy flat names (get_model_info, list_models, ...) by mapping
254
275
  // them to the dot-notation canonical names exposed in tools/list.
@@ -261,7 +282,7 @@ async function handleTool(name, args) {
261
282
  const models = await fetchModels({ apiKey: key });
262
283
  const model = findModel(models, model_id);
263
284
  if (!model) return errorContent(`Model not found: ${model_id}`);
264
- const elo = await getElo(model_id);
285
+ const elo = await safeGetElo(model_id);
265
286
  return result({ ...safeModelSummary(model), lmarena_elo: elo ?? null });
266
287
  }
267
288
 
@@ -286,7 +307,7 @@ async function handleTool(name, args) {
286
307
  if (name === 'benchmarks.get') {
287
308
  const { model_id } = args;
288
309
  if (!model_id || typeof model_id !== 'string') return errorContent('model_id is required');
289
- const elo = await getElo(model_id);
310
+ const elo = await safeGetElo(model_id);
290
311
  return result({ model_id, lmarena_elo: elo ?? null });
291
312
  }
292
313
 
@@ -295,8 +316,8 @@ async function handleTool(name, args) {
295
316
  if (!model_a || !model_b) return errorContent('model_a and model_b are required');
296
317
  const [models, eloA, eloB] = await Promise.all([
297
318
  fetchModels({ apiKey: key }),
298
- getElo(model_a),
299
- getElo(model_b),
319
+ safeGetElo(model_a),
320
+ safeGetElo(model_b),
300
321
  ]);
301
322
  const mA = findModel(models, model_a);
302
323
  const mB = findModel(models, model_b);
@@ -307,11 +328,12 @@ async function handleTool(name, args) {
307
328
 
308
329
  if (name === 'models.top') {
309
330
  const task = args.task ?? 'general';
331
+ const pricing = args.pricing ?? undefined;
310
332
  const limit = Math.min(20, Math.max(1, args.limit ?? 5));
311
333
  const maxPrice = args.max_price_per_m_output ?? undefined;
312
334
 
313
- const [models, allElo] = await Promise.all([fetchModels({ apiKey: key }), getAllElo()]);
314
- const ranked = rankModels(models, allElo, { task, maxPricePerMOutput: maxPrice, limit });
335
+ const [models, allElo] = await Promise.all([fetchModels({ apiKey: key }), safeGetAllElo()]);
336
+ const ranked = rankModels(models, allElo, { task, pricing, maxPricePerMOutput: maxPrice, limit });
315
337
  return result({ task, results: ranked.map((r) => ({ ...safeModelSummary(r.model), score: r.score, lmarena_elo: r.eloEntry })) });
316
338
  }
317
339
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aggc/or-info",
3
- "version": "0.2.13",
3
+ "version": "0.2.15",
4
4
  "description": "CLI + MCP server for OpenRouter models: prices, benchmarks, context and comparisons",
5
5
  "type": "module",
6
6
  "engines": {