@aggc/or-info 0.2.10 → 0.2.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/or-info.mjs +6 -1
- package/lib/lmarena.mjs +62 -34
- package/lib/scorer.mjs +11 -5
- package/mcp/server.mjs +1 -1
- package/package.json +1 -1
package/bin/or-info.mjs
CHANGED
|
@@ -158,6 +158,10 @@ program
|
|
|
158
158
|
if (!mA) die(`Model not found: ${idA}`);
|
|
159
159
|
if (!mB) die(`Model not found: ${idB}`);
|
|
160
160
|
|
|
161
|
+
if (mA.id === mB.id) {
|
|
162
|
+
console.log(chalk.yellow('Both model IDs resolve to the same model:') + ` ${mA.id}`);
|
|
163
|
+
}
|
|
164
|
+
|
|
161
165
|
if (opts.json) {
|
|
162
166
|
console.log(JSON.stringify({ a: { model: mA, elo: eloA }, b: { model: mB, elo: eloB } }, null, 2));
|
|
163
167
|
return;
|
|
@@ -213,7 +217,8 @@ program
|
|
|
213
217
|
|
|
214
218
|
process.stdout.write(chalk.dim('Refreshing LMArena ELO…'));
|
|
215
219
|
const elo = await loadLeaderboard({ force: true });
|
|
216
|
-
|
|
220
|
+
const eloCount = (elo.overall ?? []).length;
|
|
221
|
+
console.log(chalk.green(` ✓ ${eloCount} entries`));
|
|
217
222
|
});
|
|
218
223
|
|
|
219
224
|
// ── status ─────────────────────────────────────────────────────────────────
|
package/lib/lmarena.mjs
CHANGED
|
@@ -52,36 +52,52 @@ async function fetchPage(offset) {
|
|
|
52
52
|
throw new Error('LMArena request failed: exhausted retries');
|
|
53
53
|
}
|
|
54
54
|
|
|
55
|
-
//
|
|
56
|
-
//
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
55
|
+
// Categories we actually use. LMArena has ~25 but we only need 3.
|
|
56
|
+
// This cuts pages from ~89 to ~12 and avoids HuggingFace 429s.
|
|
57
|
+
const WANTED_CATEGORIES = new Set(['overall', 'coding', 'math']);
|
|
58
|
+
|
|
59
|
+
// Fetch only the wanted category rows from the dataset.
|
|
60
|
+
// The dataset is sorted by category so once we've moved past all
|
|
61
|
+
// wanted categories we stop early.
|
|
62
|
+
// Returns { overall: [...], coding: [...], math: [...] }
|
|
63
|
+
async function fetchAllByCategory() {
|
|
64
|
+
const byCategory = {};
|
|
60
65
|
let offset = 0;
|
|
66
|
+
let lastCat = null;
|
|
67
|
+
let passedAllWanted = false;
|
|
61
68
|
|
|
62
|
-
while (
|
|
69
|
+
while (!passedAllWanted) {
|
|
63
70
|
const page = await fetchPage(offset);
|
|
64
71
|
const rows = page.rows ?? [];
|
|
65
72
|
if (!rows.length) break;
|
|
66
73
|
|
|
67
|
-
let sawOther = false;
|
|
68
74
|
for (const { row } of rows) {
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
75
|
+
const cat = row.category;
|
|
76
|
+
lastCat = cat;
|
|
77
|
+
if (WANTED_CATEGORIES.has(cat)) {
|
|
78
|
+
if (!byCategory[cat]) byCategory[cat] = [];
|
|
79
|
+
byCategory[cat].push({
|
|
80
|
+
lmarenaName: row.model_name,
|
|
81
|
+
elo: Math.round(row.rating),
|
|
82
|
+
eloLower: Math.round(row.rating_lower),
|
|
83
|
+
eloUpper: Math.round(row.rating_upper),
|
|
84
|
+
votes: Math.round(row.vote_count),
|
|
85
|
+
rank: Math.round(row.rank),
|
|
86
|
+
updatedAt: row.leaderboard_publish_date,
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Stop early: all wanted categories found AND current page moved past them
|
|
92
|
+
const found = Object.keys(byCategory);
|
|
93
|
+
if (found.length === WANTED_CATEGORIES.size && !WANTED_CATEGORIES.has(lastCat)) {
|
|
94
|
+
passedAllWanted = true;
|
|
79
95
|
}
|
|
80
|
-
|
|
96
|
+
|
|
81
97
|
offset += PAGE;
|
|
82
98
|
if (offset >= (page.num_rows_total ?? Infinity)) break;
|
|
83
99
|
}
|
|
84
|
-
return
|
|
100
|
+
return byCategory;
|
|
85
101
|
}
|
|
86
102
|
|
|
87
103
|
// ── Name normalisation ─────────────────────────────────────────────────────
|
|
@@ -109,8 +125,6 @@ function orSlug(id) {
|
|
|
109
125
|
}
|
|
110
126
|
|
|
111
127
|
// Build a lookup Map from normalised LMArena name → entry.
|
|
112
|
-
// Also index by the last "segment" after the last '-' number group
|
|
113
|
-
// to help with partial matches.
|
|
114
128
|
function buildIndex(entries) {
|
|
115
129
|
const exact = new Map();
|
|
116
130
|
for (const e of entries) {
|
|
@@ -119,6 +133,15 @@ function buildIndex(entries) {
|
|
|
119
133
|
return exact;
|
|
120
134
|
}
|
|
121
135
|
|
|
136
|
+
// Build a per-category index: { overall: Map<name, entry>, coding: Map<name, entry>, ... }
|
|
137
|
+
function buildCategoryIndex(byCategory) {
|
|
138
|
+
const result = {};
|
|
139
|
+
for (const [cat, entries] of Object.entries(byCategory)) {
|
|
140
|
+
result[cat] = buildIndex(entries);
|
|
141
|
+
}
|
|
142
|
+
return result;
|
|
143
|
+
}
|
|
144
|
+
|
|
122
145
|
// Find the best matching LMArena entry for an OpenRouter model ID.
|
|
123
146
|
// Returns the entry or null.
|
|
124
147
|
function match(orId, index) {
|
|
@@ -147,29 +170,34 @@ function match(orId, index) {
|
|
|
147
170
|
|
|
148
171
|
// ── Public API ─────────────────────────────────────────────────────────────
|
|
149
172
|
|
|
150
|
-
let
|
|
173
|
+
let _categoryIndex = null; // { overall: Map<name, entry>, coding: Map<name, entry>, ... }
|
|
174
|
+
let _byCategory = null; // { overall: entry[], coding: entry[], ... }
|
|
151
175
|
|
|
152
176
|
export async function loadLeaderboard({ force = false } = {}) {
|
|
153
177
|
if (!force) {
|
|
154
178
|
const cached = await get(BENCHMARKS_CACHE, TTL.BENCHMARKS);
|
|
155
|
-
if (cached?.
|
|
156
|
-
|
|
157
|
-
|
|
179
|
+
if (cached?.byCategory) {
|
|
180
|
+
_byCategory = cached.byCategory;
|
|
181
|
+
_categoryIndex = buildCategoryIndex(cached.byCategory);
|
|
182
|
+
return cached.byCategory;
|
|
158
183
|
}
|
|
159
184
|
}
|
|
160
185
|
|
|
161
|
-
const
|
|
162
|
-
await set(BENCHMARKS_CACHE, {
|
|
163
|
-
|
|
164
|
-
|
|
186
|
+
const byCategory = await fetchAllByCategory();
|
|
187
|
+
await set(BENCHMARKS_CACHE, { byCategory, fetchedAt: Date.now() });
|
|
188
|
+
_byCategory = byCategory;
|
|
189
|
+
_categoryIndex = buildCategoryIndex(byCategory);
|
|
190
|
+
return byCategory;
|
|
165
191
|
}
|
|
166
192
|
|
|
167
|
-
export async function getElo(orModelId, { force = false } = {}) {
|
|
168
|
-
if (!
|
|
169
|
-
|
|
193
|
+
export async function getElo(orModelId, { force = false, category = 'overall' } = {}) {
|
|
194
|
+
if (!_categoryIndex || force) await loadLeaderboard({ force });
|
|
195
|
+
const idx = _categoryIndex[category] ?? _categoryIndex.overall;
|
|
196
|
+
if (!idx) return null;
|
|
197
|
+
return match(orModelId, idx);
|
|
170
198
|
}
|
|
171
199
|
|
|
172
200
|
export async function getAllElo({ force = false } = {}) {
|
|
173
|
-
const
|
|
174
|
-
return
|
|
201
|
+
const byCategory = await loadLeaderboard({ force });
|
|
202
|
+
return byCategory; // { overall: entry[], coding: entry[], ... }
|
|
175
203
|
}
|
package/lib/scorer.mjs
CHANGED
|
@@ -43,15 +43,21 @@ export function scoreForTask(model, eloEntry, task = 'general') {
|
|
|
43
43
|
};
|
|
44
44
|
}
|
|
45
45
|
|
|
46
|
+
const CATEGORY_FOR_TASK = { coding: 'coding', reasoning: 'math', vision: null };
|
|
47
|
+
|
|
46
48
|
export function rankModels(models, allElo, { task = 'general', maxPricePerMOutput, limit = 5 } = {}) {
|
|
47
|
-
//
|
|
48
|
-
//
|
|
49
|
+
// allElo may be a by-category map { overall: [...], coding: [...], ... }
|
|
50
|
+
// or a plain array (legacy). Select the right category for this task.
|
|
51
|
+
const category = CATEGORY_FOR_TASK[task] ?? 'overall';
|
|
52
|
+
const entries = Array.isArray(allElo)
|
|
53
|
+
? allElo
|
|
54
|
+
: (allElo[category] ?? allElo.overall ?? []);
|
|
55
|
+
|
|
49
56
|
const scored = [];
|
|
50
57
|
|
|
51
58
|
for (const model of models) {
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
? allElo.find((e) => _matchName(e.lmarenaName, model.id))
|
|
59
|
+
const eloEntry = entries.find
|
|
60
|
+
? entries.find((e) => _matchName(e.lmarenaName, model.id))
|
|
55
61
|
: null;
|
|
56
62
|
|
|
57
63
|
const result = scoreForTask(model, eloEntry, task);
|
package/mcp/server.mjs
CHANGED
|
@@ -320,7 +320,7 @@ async function handleTool(name, args) {
|
|
|
320
320
|
fetchModels({ force: true, apiKey: key }),
|
|
321
321
|
loadLeaderboard({ force: true }),
|
|
322
322
|
]);
|
|
323
|
-
return result({ refreshed: true, models_count: models.length, elo_entries: elo.length });
|
|
323
|
+
return result({ refreshed: true, models_count: models.length, elo_entries: (elo.overall ?? []).length });
|
|
324
324
|
}
|
|
325
325
|
|
|
326
326
|
return errorContent(`Unknown tool: ${name}`);
|