@aggc/or-info 0.2.13 → 0.2.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -154,26 +154,38 @@ or-info compare openai/gpt-4o deepseek/deepseek-chat-v3-0324 --json
154
154
  ### Top models for a task
155
155
 
156
156
  ```bash
157
- or-info top --task coding # Best coding models
158
- or-info top --task reasoning # Best reasoning models
159
- or-info top --task general # Best all-rounders
160
- or-info top --task vision # Best vision models (requires image input)
161
- or-info top --task cheap # Best value for money
162
- or-info top --task premium # Highest quality, ignoring price
163
- or-info top --task coding --budget 2 # Best coders under $2/M output
157
+ or-info top --task coding # Best coding models
158
+ or-info top --task reasoning # Best reasoning models
159
+ or-info top --task general # Best all-rounders
160
+ or-info top --task vision # Best vision models (requires image input)
161
+ or-info top --task cheap # Best value for money
162
+ or-info top --task premium # Highest quality, ignoring price
163
+ or-info top --task coding --pricing premium # Best coder regardless of price
164
+ or-info top --task coding --budget 2 # Best coders under $2/M output
164
165
  or-info top --task general --limit 10
165
166
  ```
166
167
 
167
- Ranking combines LMArena ELO with price and context window size. Task behaviour:
168
+ Ranking combines LMArena ELO with price and context window size.
168
169
 
169
- | Task | Price weight | Capability filter |
170
- |------|-------------|-------------------|
171
- | `general` | standard penalty | none |
172
- | `coding` | standard penalty | soft penalty (−15%) if no tool support |
173
- | `reasoning` | standard penalty | none |
174
- | `vision` | standard penalty | hard filter: image input required |
175
- | `cheap` | steep penalty | none |
176
- | `premium` | ignored | none |
170
+ `--task` controls which ELO category and capability filter to apply.
171
+ `--pricing` overrides the price-penalty strategy independently:
172
+
173
+ | `--pricing` | Effect |
174
+ |-------------|--------|
175
+ | `standard` (default) | Moderate penalty for expensive models |
176
+ | `cheap` | Steep penalty; strongly favours free/low-cost models |
177
+ | `premium` | No penalty; ranks by quality alone |
178
+
179
+ Task defaults (when `--pricing` is not set):
180
+
181
+ | Task | Default pricing | Capability filter |
182
+ |------|----------------|-------------------|
183
+ | `general` | standard | none |
184
+ | `coding` | standard | soft penalty (−15%) if no tool support |
185
+ | `reasoning` | standard | none |
186
+ | `vision` | standard | hard filter: image input required |
187
+ | `cheap` | cheap | none |
188
+ | `premium` | premium | none |
177
189
 
178
190
  ### Cache management
179
191
 
@@ -193,7 +205,7 @@ or-info refresh # Force-refresh OpenRouter catalog + LMArena ELO
193
205
  | `models.get` | Pricing, context, architecture, features and LMArena ELO for a model |
194
206
  | `models.list` | List models with optional filter, sort and limit |
195
207
  | `models.compare` | Side-by-side comparison of two models |
196
- | `models.top` | Ranked top models for coding/reasoning/general/vision/cheap/premium |
208
+ | `models.top` | Ranked top models for coding/reasoning/general/vision/cheap/premium; accepts optional `pricing` override |
197
209
  | `benchmarks.get` | LMArena ELO score, global rank, vote count and confidence interval for a model |
198
210
  | `cache.refresh` | Force-refresh OpenRouter catalog + LMArena ELO |
199
211
 
package/bin/or-info.mjs CHANGED
@@ -173,7 +173,8 @@ program
173
173
  program
174
174
  .command('top')
175
175
  .description('Best models for a task')
176
- .option('--task <task>', 'Task: coding, reasoning, general, vision, cheap', 'general')
176
+ .option('--task <task>', 'Task: coding, reasoning, general, vision, cheap, premium', 'general')
177
+ .option('--pricing <mode>', 'Price scoring override: standard, cheap, premium', v => { const s = new Set(['standard', 'cheap', 'premium']); if (!s.has(v)) throw new InvalidArgumentError('must be standard, cheap, or premium'); return v; })
177
178
  .option('--budget <usd>', 'Max price per 1M output tokens (e.g. 1.00)', parseFloat)
178
179
  .option('--limit <n>', 'Number of results', parsePositiveInteger, 5)
179
180
  .option('--json', 'Output raw JSON')
@@ -190,6 +191,7 @@ program
190
191
 
191
192
  const ranked = rankModels(models, allElo, {
192
193
  task: opts.task,
194
+ pricing: opts.pricing,
193
195
  maxPricePerMOutput: opts.budget,
194
196
  limit: opts.limit,
195
197
  });
package/lib/scorer.mjs CHANGED
@@ -35,11 +35,13 @@ function contextBonus(ctx, task) {
35
35
  return 0.9;
36
36
  }
37
37
 
38
- // Score a model for a task.
38
+ // Score a model for a task with optional pricing override.
39
39
  // Returns { score, qualityScore } or null if not eligible.
40
- export function scoreForTask(model, eloEntry, task = 'general') {
40
+ export function scoreForTask(model, eloEntry, task = 'general', pricing) {
41
+ const { pricingMode, capability } = parseTaskSpec(task, pricing);
42
+
41
43
  // Hard filter: vision requires vision capability
42
- if (task === 'vision' && !supportsFeature(model, 'vision')) return null;
44
+ if (capability === 'vision' && !supportsFeature(model, 'vision')) return null;
43
45
  if (!eloEntry?.elo) return null;
44
46
 
45
47
  const quality = normaliseElo(eloEntry.elo);
@@ -48,9 +50,9 @@ export function scoreForTask(model, eloEntry, task = 'general') {
48
50
 
49
51
  // Price penalty: premium ignores price, cheap uses steep curve, others standard
50
52
  let penalty;
51
- if (task === 'premium') {
53
+ if (pricingMode === 'premium') {
52
54
  penalty = 1.0;
53
- } else if (task === 'cheap') {
55
+ } else if (pricingMode === 'cheap') {
54
56
  penalty = cheapPenalty(price.output);
55
57
  } else {
56
58
  penalty = pricePenalty(price.output);
@@ -69,15 +71,30 @@ export function scoreForTask(model, eloEntry, task = 'general') {
69
71
  };
70
72
  }
71
73
 
72
- const CATEGORY_FOR_TASK = { coding: 'coding', reasoning: 'math', vision: null, premium: null };
74
+ // Parse task into { eloCategory, pricingMode, capability }.
75
+ // This decouples the ELO category from the price penalty strategy.
76
+ // task='coding' → coding ELO, standard pricing
77
+ // task='coding', pricing='premium' → coding ELO, no price penalty
78
+ const TASK_ELO = { coding: 'coding', reasoning: 'math', vision: null, general: null, cheap: null, premium: null };
79
+ const TASK_CAP = { vision: 'vision', coding: 'tools' };
80
+ const PRICING_MODES = new Set(['standard', 'cheap', 'premium']);
81
+
82
+ function parseTaskSpec(task, pricing) {
83
+ const eloCategory = TASK_ELO[task] ?? 'overall';
84
+ const capability = TASK_CAP[task] ?? null;
85
+ let pricingMode = pricing ?? 'standard';
86
+ // Legacy: 'cheap' and 'premium' as task names set pricing mode
87
+ if (task === 'cheap') pricingMode = 'cheap';
88
+ else if (task === 'premium') pricingMode = 'premium';
89
+ if (!PRICING_MODES.has(pricingMode)) pricingMode = 'standard';
90
+ return { eloCategory, pricingMode, capability };
91
+ }
73
92
 
74
- export function rankModels(models, allElo, { task = 'general', maxPricePerMOutput, limit = 5 } = {}) {
75
- // allElo may be a by-category map { overall: [...], coding: [...], ... }
76
- // or a plain array (legacy). Select the right category for this task.
77
- const category = CATEGORY_FOR_TASK[task] ?? 'overall';
93
+ export function rankModels(models, allElo, { task = 'general', pricing, maxPricePerMOutput, limit = 5 } = {}) {
94
+ const { eloCategory } = parseTaskSpec(task, pricing);
78
95
  const entries = Array.isArray(allElo)
79
96
  ? allElo
80
- : (allElo[category] ?? allElo.overall ?? []);
97
+ : (allElo[eloCategory] ?? allElo.overall ?? []);
81
98
 
82
99
  const scored = [];
83
100
 
@@ -86,7 +103,7 @@ export function rankModels(models, allElo, { task = 'general', maxPricePerMOutpu
86
103
  ? entries.find((e) => _matchName(e.lmarenaName, model.id))
87
104
  : null;
88
105
 
89
- const result = scoreForTask(model, eloEntry, task);
106
+ const result = scoreForTask(model, eloEntry, task, pricing);
90
107
  if (!result) continue;
91
108
 
92
109
  const price = pricePerMillion(model);
@@ -95,7 +112,7 @@ export function rankModels(models, allElo, { task = 'general', maxPricePerMOutpu
95
112
  scored.push({ model, score: result.score, qualityScore: result.qualityScore, eloEntry });
96
113
  }
97
114
 
98
- // Dedup :free variants — keep highest-scoring variant per base model
115
+ // Dedup 1: :free variants — keep highest-scoring variant per base model
99
116
  const byBase = new Map();
100
117
  for (const entry of scored) {
101
118
  const baseId = entry.model.id.replace(/:free$/, '');
@@ -103,7 +120,17 @@ export function rankModels(models, allElo, { task = 'general', maxPricePerMOutpu
103
120
  if (!prev || entry.score > prev.score) byBase.set(baseId, entry);
104
121
  }
105
122
 
106
- return [...byBase.values()].sort((a, b) => b.score - a.score).slice(0, limit);
123
+ // Dedup 2: same ELO entry multiple OR models can match one LMArena name
124
+ // (e.g. gpt-5.4-nano and gpt-5.4 both match "gpt-5.4-high").
125
+ // Keep only the best-scoring OR model per ELO entry.
126
+ const byElo = new Map();
127
+ for (const entry of byBase.values()) {
128
+ const eloKey = entry.eloEntry?.lmarenaName ?? entry.model.id;
129
+ const prev = byElo.get(eloKey);
130
+ if (!prev || entry.score > prev.score) byElo.set(eloKey, entry);
131
+ }
132
+
133
+ return [...byElo.values()].sort((a, b) => b.score - a.score).slice(0, limit);
107
134
  }
108
135
 
109
136
  // Inline minimal name matching (mirrors lmarena.mjs logic without importing it)
package/mcp/server.mjs CHANGED
@@ -150,6 +150,11 @@ const CANONICAL_TOOLS = [
150
150
  enum: ['coding', 'reasoning', 'general', 'vision', 'cheap', 'premium'],
151
151
  description: 'Task type to optimise for',
152
152
  },
153
+ pricing: {
154
+ type: 'string',
155
+ enum: ['standard', 'cheap', 'premium'],
156
+ description: 'Price scoring override. Set to "premium" with task="coding" for best coding model regardless of price',
157
+ },
153
158
  max_price_per_m_output: {
154
159
  type: 'number',
155
160
  description: 'Maximum price per 1M output tokens in USD (e.g. 1.0)',
@@ -307,11 +312,12 @@ async function handleTool(name, args) {
307
312
 
308
313
  if (name === 'models.top') {
309
314
  const task = args.task ?? 'general';
315
+ const pricing = args.pricing ?? undefined;
310
316
  const limit = Math.min(20, Math.max(1, args.limit ?? 5));
311
317
  const maxPrice = args.max_price_per_m_output ?? undefined;
312
318
 
313
319
  const [models, allElo] = await Promise.all([fetchModels({ apiKey: key }), getAllElo()]);
314
- const ranked = rankModels(models, allElo, { task, maxPricePerMOutput: maxPrice, limit });
320
+ const ranked = rankModels(models, allElo, { task, pricing, maxPricePerMOutput: maxPrice, limit });
315
321
  return result({ task, results: ranked.map((r) => ({ ...safeModelSummary(r.model), score: r.score, lmarena_elo: r.eloEntry })) });
316
322
  }
317
323
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aggc/or-info",
3
- "version": "0.2.13",
3
+ "version": "0.2.14",
4
4
  "description": "CLI + MCP server for OpenRouter models: prices, benchmarks, context and comparisons",
5
5
  "type": "module",
6
6
  "engines": {