@aggc/or-info 0.2.9 → 0.2.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/or-info.mjs +7 -2
- package/lib/lmarena.mjs +62 -34
- package/lib/openrouter.mjs +11 -1
- package/lib/scorer.mjs +11 -5
- package/mcp/server.mjs +33 -8
- package/package.json +1 -1
package/bin/or-info.mjs
CHANGED
|
@@ -108,7 +108,7 @@ program
|
|
|
108
108
|
|
|
109
109
|
if (opts.json) {
|
|
110
110
|
console.log(JSON.stringify({
|
|
111
|
-
|
|
111
|
+
id: model.id,
|
|
112
112
|
pricing: model.pricing,
|
|
113
113
|
context_length: contextLength(model),
|
|
114
114
|
}, null, 2));
|
|
@@ -158,6 +158,10 @@ program
|
|
|
158
158
|
if (!mA) die(`Model not found: ${idA}`);
|
|
159
159
|
if (!mB) die(`Model not found: ${idB}`);
|
|
160
160
|
|
|
161
|
+
if (mA.id === mB.id) {
|
|
162
|
+
console.log(chalk.yellow('Both model IDs resolve to the same model:') + ` ${mA.id}`);
|
|
163
|
+
}
|
|
164
|
+
|
|
161
165
|
if (opts.json) {
|
|
162
166
|
console.log(JSON.stringify({ a: { model: mA, elo: eloA }, b: { model: mB, elo: eloB } }, null, 2));
|
|
163
167
|
return;
|
|
@@ -213,7 +217,8 @@ program
|
|
|
213
217
|
|
|
214
218
|
process.stdout.write(chalk.dim('Refreshing LMArena ELO…'));
|
|
215
219
|
const elo = await loadLeaderboard({ force: true });
|
|
216
|
-
|
|
220
|
+
const eloCount = (elo.overall ?? []).length;
|
|
221
|
+
console.log(chalk.green(` ✓ ${eloCount} entries`));
|
|
217
222
|
});
|
|
218
223
|
|
|
219
224
|
// ── status ─────────────────────────────────────────────────────────────────
|
package/lib/lmarena.mjs
CHANGED
|
@@ -52,36 +52,52 @@ async function fetchPage(offset) {
|
|
|
52
52
|
throw new Error('LMArena request failed: exhausted retries');
|
|
53
53
|
}
|
|
54
54
|
|
|
55
|
-
//
|
|
56
|
-
//
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
55
|
+
// Categories we actually use. LMArena has ~25 but we only need 3.
|
|
56
|
+
// This cuts pages from ~89 to ~12 and avoids HuggingFace 429s.
|
|
57
|
+
const WANTED_CATEGORIES = new Set(['overall', 'coding', 'math']);
|
|
58
|
+
|
|
59
|
+
// Fetch only the wanted category rows from the dataset.
|
|
60
|
+
// The dataset is sorted by category so once we've moved past all
|
|
61
|
+
// wanted categories we stop early.
|
|
62
|
+
// Returns { overall: [...], coding: [...], math: [...] }
|
|
63
|
+
async function fetchAllByCategory() {
|
|
64
|
+
const byCategory = {};
|
|
60
65
|
let offset = 0;
|
|
66
|
+
let lastCat = null;
|
|
67
|
+
let passedAllWanted = false;
|
|
61
68
|
|
|
62
|
-
while (
|
|
69
|
+
while (!passedAllWanted) {
|
|
63
70
|
const page = await fetchPage(offset);
|
|
64
71
|
const rows = page.rows ?? [];
|
|
65
72
|
if (!rows.length) break;
|
|
66
73
|
|
|
67
|
-
let sawOther = false;
|
|
68
74
|
for (const { row } of rows) {
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
75
|
+
const cat = row.category;
|
|
76
|
+
lastCat = cat;
|
|
77
|
+
if (WANTED_CATEGORIES.has(cat)) {
|
|
78
|
+
if (!byCategory[cat]) byCategory[cat] = [];
|
|
79
|
+
byCategory[cat].push({
|
|
80
|
+
lmarenaName: row.model_name,
|
|
81
|
+
elo: Math.round(row.rating),
|
|
82
|
+
eloLower: Math.round(row.rating_lower),
|
|
83
|
+
eloUpper: Math.round(row.rating_upper),
|
|
84
|
+
votes: Math.round(row.vote_count),
|
|
85
|
+
rank: Math.round(row.rank),
|
|
86
|
+
updatedAt: row.leaderboard_publish_date,
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Stop early: all wanted categories found AND current page moved past them
|
|
92
|
+
const found = Object.keys(byCategory);
|
|
93
|
+
if (found.length === WANTED_CATEGORIES.size && !WANTED_CATEGORIES.has(lastCat)) {
|
|
94
|
+
passedAllWanted = true;
|
|
79
95
|
}
|
|
80
|
-
|
|
96
|
+
|
|
81
97
|
offset += PAGE;
|
|
82
98
|
if (offset >= (page.num_rows_total ?? Infinity)) break;
|
|
83
99
|
}
|
|
84
|
-
return
|
|
100
|
+
return byCategory;
|
|
85
101
|
}
|
|
86
102
|
|
|
87
103
|
// ── Name normalisation ─────────────────────────────────────────────────────
|
|
@@ -109,8 +125,6 @@ function orSlug(id) {
|
|
|
109
125
|
}
|
|
110
126
|
|
|
111
127
|
// Build a lookup Map from normalised LMArena name → entry.
|
|
112
|
-
// Also index by the last "segment" after the last '-' number group
|
|
113
|
-
// to help with partial matches.
|
|
114
128
|
function buildIndex(entries) {
|
|
115
129
|
const exact = new Map();
|
|
116
130
|
for (const e of entries) {
|
|
@@ -119,6 +133,15 @@ function buildIndex(entries) {
|
|
|
119
133
|
return exact;
|
|
120
134
|
}
|
|
121
135
|
|
|
136
|
+
// Build a per-category index: { overall: Map<name, entry>, coding: Map<name, entry>, ... }
|
|
137
|
+
function buildCategoryIndex(byCategory) {
|
|
138
|
+
const result = {};
|
|
139
|
+
for (const [cat, entries] of Object.entries(byCategory)) {
|
|
140
|
+
result[cat] = buildIndex(entries);
|
|
141
|
+
}
|
|
142
|
+
return result;
|
|
143
|
+
}
|
|
144
|
+
|
|
122
145
|
// Find the best matching LMArena entry for an OpenRouter model ID.
|
|
123
146
|
// Returns the entry or null.
|
|
124
147
|
function match(orId, index) {
|
|
@@ -147,29 +170,34 @@ function match(orId, index) {
|
|
|
147
170
|
|
|
148
171
|
// ── Public API ─────────────────────────────────────────────────────────────
|
|
149
172
|
|
|
150
|
-
let
|
|
173
|
+
let _categoryIndex = null; // { overall: Map<name, entry>, coding: Map<name, entry>, ... }
|
|
174
|
+
let _byCategory = null; // { overall: entry[], coding: entry[], ... }
|
|
151
175
|
|
|
152
176
|
export async function loadLeaderboard({ force = false } = {}) {
|
|
153
177
|
if (!force) {
|
|
154
178
|
const cached = await get(BENCHMARKS_CACHE, TTL.BENCHMARKS);
|
|
155
|
-
if (cached?.
|
|
156
|
-
|
|
157
|
-
|
|
179
|
+
if (cached?.byCategory) {
|
|
180
|
+
_byCategory = cached.byCategory;
|
|
181
|
+
_categoryIndex = buildCategoryIndex(cached.byCategory);
|
|
182
|
+
return cached.byCategory;
|
|
158
183
|
}
|
|
159
184
|
}
|
|
160
185
|
|
|
161
|
-
const
|
|
162
|
-
await set(BENCHMARKS_CACHE, {
|
|
163
|
-
|
|
164
|
-
|
|
186
|
+
const byCategory = await fetchAllByCategory();
|
|
187
|
+
await set(BENCHMARKS_CACHE, { byCategory, fetchedAt: Date.now() });
|
|
188
|
+
_byCategory = byCategory;
|
|
189
|
+
_categoryIndex = buildCategoryIndex(byCategory);
|
|
190
|
+
return byCategory;
|
|
165
191
|
}
|
|
166
192
|
|
|
167
|
-
export async function getElo(orModelId, { force = false } = {}) {
|
|
168
|
-
if (!
|
|
169
|
-
|
|
193
|
+
export async function getElo(orModelId, { force = false, category = 'overall' } = {}) {
|
|
194
|
+
if (!_categoryIndex || force) await loadLeaderboard({ force });
|
|
195
|
+
const idx = _categoryIndex[category] ?? _categoryIndex.overall;
|
|
196
|
+
if (!idx) return null;
|
|
197
|
+
return match(orModelId, idx);
|
|
170
198
|
}
|
|
171
199
|
|
|
172
200
|
export async function getAllElo({ force = false } = {}) {
|
|
173
|
-
const
|
|
174
|
-
return
|
|
201
|
+
const byCategory = await loadLeaderboard({ force });
|
|
202
|
+
return byCategory; // { overall: entry[], coding: entry[], ... }
|
|
175
203
|
}
|
package/lib/openrouter.mjs
CHANGED
|
@@ -100,7 +100,17 @@ export function supportsFeature(model, feature) {
|
|
|
100
100
|
const featureMap = {
|
|
101
101
|
reasoning: ['include_reasoning', 'reasoning'],
|
|
102
102
|
tools: ['tools', 'tool_choice'],
|
|
103
|
-
vision: () =>
|
|
103
|
+
vision: () => {
|
|
104
|
+
// Prefer the canonical modality string (e.g. "text+image->text") because
|
|
105
|
+
// input_modalities is inconsistently populated by OpenRouter providers.
|
|
106
|
+
const modality = model?.architecture?.modality ?? '';
|
|
107
|
+
if (modality) {
|
|
108
|
+
return modality.split('->')[0].split('+').map((s) => s.trim()).includes('image');
|
|
109
|
+
}
|
|
110
|
+
const inputMods = model?.architecture?.input_modalities;
|
|
111
|
+
if (Array.isArray(inputMods)) return inputMods.includes('image');
|
|
112
|
+
return false;
|
|
113
|
+
},
|
|
104
114
|
structured: ['structured_outputs'],
|
|
105
115
|
};
|
|
106
116
|
const check = featureMap[feature];
|
package/lib/scorer.mjs
CHANGED
|
@@ -43,15 +43,21 @@ export function scoreForTask(model, eloEntry, task = 'general') {
|
|
|
43
43
|
};
|
|
44
44
|
}
|
|
45
45
|
|
|
46
|
+
const CATEGORY_FOR_TASK = { coding: 'coding', reasoning: 'math', vision: null };
|
|
47
|
+
|
|
46
48
|
export function rankModels(models, allElo, { task = 'general', maxPricePerMOutput, limit = 5 } = {}) {
|
|
47
|
-
//
|
|
48
|
-
//
|
|
49
|
+
// allElo may be a by-category map { overall: [...], coding: [...], ... }
|
|
50
|
+
// or a plain array (legacy). Select the right category for this task.
|
|
51
|
+
const category = CATEGORY_FOR_TASK[task] ?? 'overall';
|
|
52
|
+
const entries = Array.isArray(allElo)
|
|
53
|
+
? allElo
|
|
54
|
+
: (allElo[category] ?? allElo.overall ?? []);
|
|
55
|
+
|
|
49
56
|
const scored = [];
|
|
50
57
|
|
|
51
58
|
for (const model of models) {
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
? allElo.find((e) => _matchName(e.lmarenaName, model.id))
|
|
59
|
+
const eloEntry = entries.find
|
|
60
|
+
? entries.find((e) => _matchName(e.lmarenaName, model.id))
|
|
55
61
|
: null;
|
|
56
62
|
|
|
57
63
|
const result = scoreForTask(model, eloEntry, task);
|
package/mcp/server.mjs
CHANGED
|
@@ -320,7 +320,7 @@ async function handleTool(name, args) {
|
|
|
320
320
|
fetchModels({ force: true, apiKey: key }),
|
|
321
321
|
loadLeaderboard({ force: true }),
|
|
322
322
|
]);
|
|
323
|
-
return result({ refreshed: true, models_count: models.length, elo_entries: elo.length });
|
|
323
|
+
return result({ refreshed: true, models_count: models.length, elo_entries: (elo.overall ?? []).length });
|
|
324
324
|
}
|
|
325
325
|
|
|
326
326
|
return errorContent(`Unknown tool: ${name}`);
|
|
@@ -347,20 +347,45 @@ function wireHandlers(server) {
|
|
|
347
347
|
}
|
|
348
348
|
|
|
349
349
|
export async function startMcp() {
|
|
350
|
+
// Track in-flight tool calls so we don't exit while a response is still being written.
|
|
351
|
+
// Race condition: stdin EOF fires before the async handleTool completes, causing
|
|
352
|
+
// process.exit(0) to kill the process before the MCP SDK writes the response to stdout.
|
|
353
|
+
let pending = 0;
|
|
354
|
+
let stdinEnded = false;
|
|
355
|
+
let resolveWhenDone;
|
|
356
|
+
const donePromise = new Promise((res) => { resolveWhenDone = res; });
|
|
357
|
+
|
|
358
|
+
function checkDone() {
|
|
359
|
+
if (stdinEnded && pending === 0) resolveWhenDone();
|
|
360
|
+
}
|
|
361
|
+
|
|
350
362
|
const server = makeServer();
|
|
351
|
-
|
|
363
|
+
server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: TOOLS }));
|
|
364
|
+
server.setRequestHandler(CallToolRequestSchema, async (req) => {
|
|
365
|
+
const { name, arguments: args } = req.params;
|
|
366
|
+
pending++;
|
|
367
|
+
try {
|
|
368
|
+
return await handleTool(name, args ?? {});
|
|
369
|
+
} catch (err) {
|
|
370
|
+
const safe = err.message?.replace(/sk-[a-zA-Z0-9-]+/g, '[REDACTED]') ?? 'Unexpected error';
|
|
371
|
+
return errorContent(safe);
|
|
372
|
+
} finally {
|
|
373
|
+
pending--;
|
|
374
|
+
// Defer checkDone by one tick so the SDK's response-write microtask runs first.
|
|
375
|
+
setImmediate(checkDone);
|
|
376
|
+
}
|
|
377
|
+
});
|
|
352
378
|
|
|
353
379
|
const transport = new StdioServerTransport();
|
|
354
380
|
await server.connect(transport);
|
|
355
381
|
|
|
356
|
-
// server.connect() returns immediately after wiring up the transport.
|
|
357
|
-
// Block here until stdin closes so the process stays alive while serving.
|
|
358
382
|
if (!process.stdin.destroyed) {
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
});
|
|
383
|
+
process.stdin.once('close', () => { stdinEnded = true; checkDone(); });
|
|
384
|
+
process.stdin.once('end', () => { stdinEnded = true; checkDone(); });
|
|
385
|
+
await donePromise;
|
|
363
386
|
}
|
|
387
|
+
// One extra tick for any buffered stdout writes before the caller calls process.exit().
|
|
388
|
+
await new Promise((resolve) => setImmediate(resolve));
|
|
364
389
|
}
|
|
365
390
|
|
|
366
391
|
export async function startHttpMcp() {
|