@aggc/or-info 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +294 -0
- package/bin/or-info.mjs +240 -0
- package/lib/cache.mjs +117 -0
- package/lib/formatter.mjs +178 -0
- package/lib/lmarena.mjs +174 -0
- package/lib/openrouter.mjs +125 -0
- package/lib/paths.mjs +53 -0
- package/lib/scorer.mjs +81 -0
- package/lib/secrets.mjs +41 -0
- package/mcp/server.mjs +213 -0
- package/package.json +51 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 or-info contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
# or-info
|
|
2
|
+
|
|
3
|
+
> CLI + MCP server to query OpenRouter model info: prices, ELO rankings, context and comparisons.
|
|
4
|
+
|
|
5
|
+
Any person or AI agent (Claude Code, Cursor, pi, etc.) can install it and use it
|
|
6
|
+
to make informed decisions about which model to use.
|
|
7
|
+
|
|
8
|
+
[](https://www.npmjs.com/package/@aggc/or-info)
|
|
9
|
+
[](https://github.com/jmtrs/or-info/actions/workflows/ci.yml)
|
|
10
|
+
[](LICENSE)
|
|
11
|
+
|
|
12
|
+
## Install
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
npm install -g @aggc/or-info
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
Requires Node.js 22 or later.
|
|
19
|
+
|
|
20
|
+
Supported runtimes and platforms:
|
|
21
|
+
|
|
22
|
+
- Node.js 22+
|
|
23
|
+
- macOS, Linux, Windows
|
|
24
|
+
|
|
25
|
+
## Config and cache paths
|
|
26
|
+
|
|
27
|
+
`or-info` resolves config and cache natively per platform:
|
|
28
|
+
|
|
29
|
+
| Platform | Config directory | Cache directory |
|
|
30
|
+
|----------|------------------|-----------------|
|
|
31
|
+
| macOS / Linux | `$XDG_CONFIG_HOME/or-info` or `~/.config/or-info` | `$XDG_CACHE_HOME/or-info` or `~/.cache/or-info` |
|
|
32
|
+
| Windows | `%APPDATA%\\or-info` | `%LOCALAPPDATA%\\or-info` |
|
|
33
|
+
| Any platform | `OR_INFO_CONFIG_DIR` override | `OR_INFO_CACHE_DIR` override |
|
|
34
|
+
|
|
35
|
+
Files of interest:
|
|
36
|
+
|
|
37
|
+
- Config file: `<config-dir>/.env`
|
|
38
|
+
- Model cache: `<cache-dir>/models.json`
|
|
39
|
+
- LMArena cache: `<cache-dir>/benchmarks.json`
|
|
40
|
+
|
|
41
|
+
## API key (optional)
|
|
42
|
+
|
|
43
|
+
Without an API key the CLI works with OpenRouter's public catalog.
|
|
44
|
+
With a key you also see private/pay-gated models.
|
|
45
|
+
|
|
46
|
+
### Bash / Zsh
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
export OPENROUTER_API_KEY=sk-or-...
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### PowerShell
|
|
53
|
+
|
|
54
|
+
```powershell
|
|
55
|
+
$env:OPENROUTER_API_KEY = "sk-or-..."
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### CMD
|
|
59
|
+
|
|
60
|
+
```cmd
|
|
61
|
+
set OPENROUTER_API_KEY=sk-or-...
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### Config file
|
|
65
|
+
|
|
66
|
+
macOS / Linux:
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
mkdir -p ~/.config/or-info
|
|
70
|
+
echo 'OPENROUTER_API_KEY=sk-or-...' >> ~/.config/or-info/.env
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Windows PowerShell:
|
|
74
|
+
|
|
75
|
+
```powershell
|
|
76
|
+
New-Item -ItemType Directory -Force "$env:APPDATA\or-info" | Out-Null
|
|
77
|
+
Add-Content -Path "$env:APPDATA\or-info\.env" -Value "OPENROUTER_API_KEY=sk-or-..."
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Windows CMD:
|
|
81
|
+
|
|
82
|
+
```cmd
|
|
83
|
+
if not exist "%APPDATA%\or-info" mkdir "%APPDATA%\or-info"
|
|
84
|
+
echo OPENROUTER_API_KEY=sk-or-...>> "%APPDATA%\or-info\.env"
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
For tests and debugging you can redirect storage without touching your real machine state:
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
OR_INFO_CONFIG_DIR=/tmp/or-info-config OR_INFO_CACHE_DIR=/tmp/or-info-cache or-info status
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## CLI usage
|
|
94
|
+
|
|
95
|
+
### List models
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
or-info models # All models sorted by name
|
|
99
|
+
or-info models --sort price # Cheapest output first
|
|
100
|
+
or-info models --sort context # Largest context first
|
|
101
|
+
or-info models --filter coding # Models whose ID/name contains "coding"
|
|
102
|
+
or-info models --free # Free models only
|
|
103
|
+
or-info models --limit 20 # Limit the number of results
|
|
104
|
+
or-info models --tags # Show feature tags (vision, tools, reasoning…)
|
|
105
|
+
or-info models --json # Raw JSON
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Pricing and details
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
or-info price anthropic/claude-sonnet-4.5
|
|
112
|
+
or-info price google/gemini-2.5-flash --json
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### ELO ranking (LMArena)
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
or-info benchmark openai/gpt-4o
|
|
119
|
+
or-info benchmark deepseek/deepseek-r1 --json
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Shows the model's ELO score from [LMArena](https://lmarena.ai) (human preference votes),
|
|
123
|
+
confidence interval, global rank and vote count. Data is fetched live from HuggingFace
|
|
124
|
+
and cached locally for 24 hours — no API key required.
|
|
125
|
+
|
|
126
|
+
### Compare two models
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
or-info compare anthropic/claude-sonnet-4.5 google/gemini-2.5-flash
|
|
130
|
+
or-info compare openai/gpt-4o deepseek/deepseek-chat-v3-0324 --json
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### Top models for a task
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
or-info top --task coding # Best coding models
|
|
137
|
+
or-info top --task reasoning # Best reasoning models
|
|
138
|
+
or-info top --task general # Best all-rounders
|
|
139
|
+
or-info top --task vision # Best vision models
|
|
140
|
+
or-info top --task cheap # Best value for money
|
|
141
|
+
or-info top --task coding --budget 2 # Best coders under $2/M output
|
|
142
|
+
or-info top --task general --limit 10
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
Ranking combines LMArena ELO with price. `--task vision` and `--task coding` additionally
|
|
146
|
+
filter for models that support the required capability (image input / tool use).
|
|
147
|
+
|
|
148
|
+
### Cache management
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
or-info status # Show cache age and TTL for each data source
|
|
152
|
+
or-info refresh # Force-refresh OpenRouter catalog + LMArena ELO
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
## MCP server
|
|
156
|
+
|
|
157
|
+
`or-info` can run as an MCP server for AI agents.
|
|
158
|
+
|
|
159
|
+
### Tools available
|
|
160
|
+
|
|
161
|
+
| Tool | Description |
|
|
162
|
+
|------|-------------|
|
|
163
|
+
| `get_model_info` | Pricing, context, architecture, features and LMArena ELO for a model |
|
|
164
|
+
| `list_models` | List models with optional filter, sort and limit |
|
|
165
|
+
| `get_benchmarks` | LMArena ELO, rank, votes and confidence interval for a model |
|
|
166
|
+
| `compare_models` | Side-by-side comparison of two models |
|
|
167
|
+
| `best_for_task` | Ranked top models for coding/reasoning/general/vision/cheap |
|
|
168
|
+
| `refresh_cache` | Force-refresh OpenRouter catalog + LMArena ELO |
|
|
169
|
+
|
|
170
|
+
### Register in Claude Code
|
|
171
|
+
|
|
172
|
+
Add to `~/.claude/settings.json`:
|
|
173
|
+
|
|
174
|
+
```json
|
|
175
|
+
{
|
|
176
|
+
"mcpServers": {
|
|
177
|
+
"or-info": {
|
|
178
|
+
"command": "or-info",
|
|
179
|
+
"args": ["--mcp"],
|
|
180
|
+
"env": {
|
|
181
|
+
"OPENROUTER_API_KEY": "sk-or-..."
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
Windows global install:
|
|
189
|
+
|
|
190
|
+
```json
|
|
191
|
+
{
|
|
192
|
+
"mcpServers": {
|
|
193
|
+
"or-info": {
|
|
194
|
+
"command": "or-info.cmd",
|
|
195
|
+
"args": ["--mcp"],
|
|
196
|
+
"env": {
|
|
197
|
+
"OPENROUTER_API_KEY": "sk-or-..."
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
If you installed via `npx` (without global install) on macOS/Linux:
|
|
205
|
+
|
|
206
|
+
```json
|
|
207
|
+
{
|
|
208
|
+
"mcpServers": {
|
|
209
|
+
"or-info": {
|
|
210
|
+
"command": "npx",
|
|
211
|
+
"args": ["-y", "@aggc/or-info", "--mcp"]
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
If you installed via `npx` on Windows:
|
|
218
|
+
|
|
219
|
+
```json
|
|
220
|
+
{
|
|
221
|
+
"mcpServers": {
|
|
222
|
+
"or-info": {
|
|
223
|
+
"command": "npx.cmd",
|
|
224
|
+
"args": ["-y", "@aggc/or-info", "--mcp"]
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
Then verify:
|
|
231
|
+
|
|
232
|
+
```bash
|
|
233
|
+
claude mcp list
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
### Use from Pi
|
|
237
|
+
|
|
238
|
+
Pi does not use an `mcpServers` settings schema. The recommended integration is a Pi skill
|
|
239
|
+
that calls the installed `or-info` CLI, for example `~/.pi/agent/skills/or-info/SKILL.md`.
|
|
240
|
+
|
|
241
|
+
### Test the MCP server
|
|
242
|
+
|
|
243
|
+
macOS / Linux:
|
|
244
|
+
|
|
245
|
+
```bash
|
|
246
|
+
echo '{"jsonrpc":"2.0","id":1,"method":"tools/list"}' | or-info --mcp
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
Windows PowerShell:
|
|
250
|
+
|
|
251
|
+
```powershell
|
|
252
|
+
'{"jsonrpc":"2.0","id":1,"method":"tools/list"}' | or-info.cmd --mcp
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
Windows CMD:
|
|
256
|
+
|
|
257
|
+
```cmd
|
|
258
|
+
echo {"jsonrpc":"2.0","id":1,"method":"tools/list"} | or-info.cmd --mcp
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
## Data sources
|
|
262
|
+
|
|
263
|
+
| Data | Source | Refresh |
|
|
264
|
+
|------|--------|---------|
|
|
265
|
+
| Model catalog and pricing | [OpenRouter API](https://openrouter.ai/api/v1/models) | Every 30 min |
|
|
266
|
+
| ELO rankings | [LMArena](https://lmarena.ai) via [HuggingFace dataset](https://huggingface.co/datasets/lmarena-ai/leaderboard-dataset) | Every 24 h |
|
|
267
|
+
|
|
268
|
+
ELO data is fetched directly from the `lmarena-ai/leaderboard-dataset` dataset on HuggingFace
|
|
269
|
+
using their public Datasets Server API — no API key required. Coverage: ~350 models
|
|
270
|
+
including all major commercial and open-source models tracked by LMArena.
|
|
271
|
+
|
|
272
|
+
## Testing
|
|
273
|
+
|
|
274
|
+
```bash
|
|
275
|
+
npm test
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
`npm test` runs the deterministic local suite and is the release gate used before publishing.
|
|
279
|
+
Live integration tests are available separately because they depend on OpenRouter and
|
|
280
|
+
HuggingFace availability and can occasionally hit third-party rate limits.
|
|
281
|
+
|
|
282
|
+
Additional entry points:
|
|
283
|
+
|
|
284
|
+
- `npm run test:local` for deterministic no-network coverage
|
|
285
|
+
- `npm run test:online:smoke` for the live smoke subset used by CI as a non-blocking signal
|
|
286
|
+
- `npm run test:online` for the full live CLI/MCP suite, including edge cases
|
|
287
|
+
|
|
288
|
+
## Contributing
|
|
289
|
+
|
|
290
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) — adding new CLI commands or new MCP tools.
|
|
291
|
+
|
|
292
|
+
## License
|
|
293
|
+
|
|
294
|
+
[MIT](LICENSE)
|
package/bin/or-info.mjs
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { InvalidArgumentError, program } from 'commander';
|
|
3
|
+
import chalk from 'chalk';
|
|
4
|
+
import { fetchModels, findModel, pricePerMillion, contextLength } from '../lib/openrouter.mjs';
|
|
5
|
+
import { getElo, getAllElo, loadLeaderboard } from '../lib/lmarena.mjs';
|
|
6
|
+
import { rankModels } from '../lib/scorer.mjs';
|
|
7
|
+
import { clearAll, status } from '../lib/cache.mjs';
|
|
8
|
+
import { getApiKey } from '../lib/secrets.mjs';
|
|
9
|
+
import {
|
|
10
|
+
modelTable,
|
|
11
|
+
modelDetail,
|
|
12
|
+
comparison,
|
|
13
|
+
topList,
|
|
14
|
+
statusReport,
|
|
15
|
+
} from '../lib/formatter.mjs';
|
|
16
|
+
|
|
17
|
+
function die(msg) {
|
|
18
|
+
console.error(chalk.red('Error: ') + msg);
|
|
19
|
+
process.exit(1);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const TOP_TASKS = new Set(['coding', 'reasoning', 'general', 'vision', 'cheap']);
|
|
23
|
+
|
|
24
|
+
function parsePositiveInteger(value) {
|
|
25
|
+
const n = Number.parseInt(value, 10);
|
|
26
|
+
if (!Number.isInteger(n) || n < 1 || String(n) !== String(value).trim()) {
|
|
27
|
+
throw new InvalidArgumentError('must be a positive integer');
|
|
28
|
+
}
|
|
29
|
+
return n;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function parseLimit(value, max) {
|
|
33
|
+
return Math.min(max, parsePositiveInteger(value));
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
async function apiKey() {
|
|
37
|
+
return getApiKey();
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
program
|
|
41
|
+
.name('or-info')
|
|
42
|
+
.description('OpenRouter model info: prices, benchmarks, context and comparisons')
|
|
43
|
+
.version('0.1.0')
|
|
44
|
+
.option('--mcp', 'Start MCP server (stdio transport)');
|
|
45
|
+
|
|
46
|
+
// ── models ─────────────────────────────────────────────────────────────────
|
|
47
|
+
program
|
|
48
|
+
.command('models')
|
|
49
|
+
.description('List all models with pricing')
|
|
50
|
+
.option('--sort <field>', 'Sort by: price, context, name', 'name')
|
|
51
|
+
.option('--filter <text>', 'Filter by model ID or name (case-insensitive)')
|
|
52
|
+
.option('--free', 'Show only free models')
|
|
53
|
+
.option('--limit <n>', 'Maximum number of models to return (max 200)', (v) => parseLimit(v, 200))
|
|
54
|
+
.option('--tags', 'Show feature tags')
|
|
55
|
+
.option('--json', 'Output raw JSON')
|
|
56
|
+
.action(async (opts) => {
|
|
57
|
+
const key = await apiKey();
|
|
58
|
+
let models = await fetchModels({ apiKey: key });
|
|
59
|
+
|
|
60
|
+
if (opts.filter) {
|
|
61
|
+
const q = opts.filter.toLowerCase();
|
|
62
|
+
models = models.filter(
|
|
63
|
+
(m) => m.id.toLowerCase().includes(q) || (m.name ?? '').toLowerCase().includes(q)
|
|
64
|
+
);
|
|
65
|
+
}
|
|
66
|
+
if (opts.free) {
|
|
67
|
+
models = models.filter((m) => {
|
|
68
|
+
const p = pricePerMillion(m);
|
|
69
|
+
return p.input === 0 && p.output === 0;
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (opts.sort === 'price') {
|
|
74
|
+
models.sort((a, b) => {
|
|
75
|
+
const pa = pricePerMillion(a).output ?? Infinity;
|
|
76
|
+
const pb = pricePerMillion(b).output ?? Infinity;
|
|
77
|
+
return pa - pb;
|
|
78
|
+
});
|
|
79
|
+
} else if (opts.sort === 'context') {
|
|
80
|
+
models.sort((a, b) => (contextLength(b) ?? 0) - (contextLength(a) ?? 0));
|
|
81
|
+
} else {
|
|
82
|
+
models.sort((a, b) => a.id.localeCompare(b.id));
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
if (opts.limit) models = models.slice(0, opts.limit);
|
|
86
|
+
|
|
87
|
+
if (opts.json) {
|
|
88
|
+
console.log(JSON.stringify(models, null, 2));
|
|
89
|
+
return;
|
|
90
|
+
}
|
|
91
|
+
console.log(modelTable(models, { showTags: opts.tags }));
|
|
92
|
+
console.log(chalk.dim(` ${models.length} models`));
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
// ── price ──────────────────────────────────────────────────────────────────
|
|
96
|
+
program
|
|
97
|
+
.command('price <model-id>')
|
|
98
|
+
.description('Detailed pricing and context for a model')
|
|
99
|
+
.option('--json', 'Output raw JSON')
|
|
100
|
+
.action(async (modelId, opts) => {
|
|
101
|
+
const key = await apiKey();
|
|
102
|
+
const models = await fetchModels({ apiKey: key });
|
|
103
|
+
const model = findModel(models, modelId);
|
|
104
|
+
if (!model) die(`Model not found: ${modelId}`);
|
|
105
|
+
|
|
106
|
+
if (opts.json) {
|
|
107
|
+
console.log(JSON.stringify({
|
|
108
|
+
model: model.id,
|
|
109
|
+
pricing: model.pricing,
|
|
110
|
+
context_length: contextLength(model),
|
|
111
|
+
}, null, 2));
|
|
112
|
+
return;
|
|
113
|
+
}
|
|
114
|
+
const elo = await getElo(modelId);
|
|
115
|
+
console.log(modelDetail(model, elo));
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
// ── benchmark ──────────────────────────────────────────────────────────────
|
|
119
|
+
program
|
|
120
|
+
.command('benchmark <model-id>')
|
|
121
|
+
.description('LMArena ELO and pricing details for a model')
|
|
122
|
+
.option('--json', 'Output raw JSON')
|
|
123
|
+
.action(async (modelId, opts) => {
|
|
124
|
+
const [key, elo] = await Promise.all([apiKey(), getElo(modelId)]);
|
|
125
|
+
if (opts.json) {
|
|
126
|
+
console.log(JSON.stringify({ model: modelId, elo: elo ?? null }, null, 2));
|
|
127
|
+
return;
|
|
128
|
+
}
|
|
129
|
+
const models = await fetchModels({ apiKey: key });
|
|
130
|
+
const model = findModel(models, modelId);
|
|
131
|
+
if (!model) die(`Model not found: ${modelId}`);
|
|
132
|
+
if (!elo) {
|
|
133
|
+
console.log(modelDetail(model, null));
|
|
134
|
+
console.log(chalk.yellow(' No LMArena ELO data for this model.'));
|
|
135
|
+
console.log(chalk.dim(' Data refreshes every 24h from lmarena.ai via HuggingFace.'));
|
|
136
|
+
return;
|
|
137
|
+
}
|
|
138
|
+
console.log(modelDetail(model, elo));
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
// ── compare ────────────────────────────────────────────────────────────────
|
|
142
|
+
program
|
|
143
|
+
.command('compare <model-a> <model-b>')
|
|
144
|
+
.description('Side-by-side comparison of two models')
|
|
145
|
+
.option('--json', 'Output raw JSON')
|
|
146
|
+
.action(async (idA, idB, opts) => {
|
|
147
|
+
const key = await apiKey();
|
|
148
|
+
const [models, eloA, eloB] = await Promise.all([
|
|
149
|
+
fetchModels({ apiKey: key }),
|
|
150
|
+
getElo(idA),
|
|
151
|
+
getElo(idB),
|
|
152
|
+
]);
|
|
153
|
+
const mA = findModel(models, idA);
|
|
154
|
+
const mB = findModel(models, idB);
|
|
155
|
+
if (!mA) die(`Model not found: ${idA}`);
|
|
156
|
+
if (!mB) die(`Model not found: ${idB}`);
|
|
157
|
+
|
|
158
|
+
if (opts.json) {
|
|
159
|
+
console.log(JSON.stringify({ a: { model: mA, elo: eloA }, b: { model: mB, elo: eloB } }, null, 2));
|
|
160
|
+
return;
|
|
161
|
+
}
|
|
162
|
+
console.log(comparison(mA, eloA, mB, eloB));
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
// ── top ────────────────────────────────────────────────────────────────────
|
|
166
|
+
program
|
|
167
|
+
.command('top')
|
|
168
|
+
.description('Best models for a task')
|
|
169
|
+
.option('--task <task>', 'Task: coding, reasoning, general, vision, cheap', 'general')
|
|
170
|
+
.option('--budget <usd>', 'Max price per 1M output tokens (e.g. 1.00)', parseFloat)
|
|
171
|
+
.option('--limit <n>', 'Number of results', parsePositiveInteger, 5)
|
|
172
|
+
.option('--json', 'Output raw JSON')
|
|
173
|
+
.action(async (opts) => {
|
|
174
|
+
if (!TOP_TASKS.has(opts.task)) {
|
|
175
|
+
die(`Invalid task: ${opts.task}. Expected one of: ${[...TOP_TASKS].join(', ')}`);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
const key = await apiKey();
|
|
179
|
+
const [models, allElo] = await Promise.all([
|
|
180
|
+
fetchModels({ apiKey: key }),
|
|
181
|
+
getAllElo(),
|
|
182
|
+
]);
|
|
183
|
+
|
|
184
|
+
const ranked = rankModels(models, allElo, {
|
|
185
|
+
task: opts.task,
|
|
186
|
+
maxPricePerMOutput: opts.budget,
|
|
187
|
+
limit: opts.limit,
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
if (!ranked.length) {
|
|
191
|
+
console.log(chalk.yellow('No results. Try a different task or increase --budget.'));
|
|
192
|
+
return;
|
|
193
|
+
}
|
|
194
|
+
if (opts.json) {
|
|
195
|
+
console.log(JSON.stringify(ranked.map((r) => ({ id: r.model.id, score: r.score, elo: r.eloEntry }))));
|
|
196
|
+
return;
|
|
197
|
+
}
|
|
198
|
+
console.log(topList(ranked));
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
// ── refresh ────────────────────────────────────────────────────────────────
|
|
202
|
+
program
|
|
203
|
+
.command('refresh')
|
|
204
|
+
.description('Force-refresh the cache (models + LMArena ELO)')
|
|
205
|
+
.action(async () => {
|
|
206
|
+
const key = await apiKey();
|
|
207
|
+
process.stdout.write(chalk.dim('Refreshing models…'));
|
|
208
|
+
const models = await fetchModels({ apiKey: key, force: true });
|
|
209
|
+
process.stdout.write(chalk.green(` ✓ ${models.length} models\n`));
|
|
210
|
+
|
|
211
|
+
process.stdout.write(chalk.dim('Refreshing LMArena ELO…'));
|
|
212
|
+
const elo = await loadLeaderboard({ force: true });
|
|
213
|
+
console.log(chalk.green(` ✓ ${elo.length} entries`));
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
// ── status ─────────────────────────────────────────────────────────────────
|
|
217
|
+
program
|
|
218
|
+
.command('status')
|
|
219
|
+
.description('Show cache status')
|
|
220
|
+
.action(async () => {
|
|
221
|
+
const items = await status();
|
|
222
|
+
console.log(statusReport(items));
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
// ── root --mcp ─────────────────────────────────────────────────────────────
|
|
226
|
+
// Must be checked before parseAsync: Commander exits (shows help, code 1)
|
|
227
|
+
// when no subcommand is given, so the --mcp branch would never be reached.
|
|
228
|
+
if (process.argv.includes('--mcp')) {
|
|
229
|
+
const { startMcp } = await import('../mcp/server.mjs');
|
|
230
|
+
await startMcp();
|
|
231
|
+
process.exit(0);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
program.hook('preAction', async () => {});
|
|
235
|
+
|
|
236
|
+
program.parseAsync(process.argv).catch((err) => {
|
|
237
|
+
// Avoid leaking API keys or file paths from raw errors
|
|
238
|
+
const safe = err.message?.replace(/sk-[a-zA-Z0-9-]+/g, '[REDACTED]');
|
|
239
|
+
die(safe ?? 'Unexpected error');
|
|
240
|
+
});
|
package/lib/cache.mjs
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import fs from 'node:fs/promises';
|
|
2
|
+
import { randomUUID } from 'node:crypto';
|
|
3
|
+
import { basename, dirname, join } from 'node:path';
|
|
4
|
+
import { CACHE_DIR, MODELS_CACHE, BENCHMARKS_CACHE } from './paths.mjs';
|
|
5
|
+
|
|
6
|
+
export const TTL = {
|
|
7
|
+
MODELS: 30 * 60 * 1000, // 30 min
|
|
8
|
+
BENCHMARKS: 24 * 60 * 60 * 1000, // 24 h
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
const IS_WINDOWS = process.platform === 'win32';
|
|
12
|
+
const PERMISSION_ERRORS = new Set(['EINVAL', 'ENOSYS', 'EPERM']);
|
|
13
|
+
|
|
14
|
+
async function chmodBestEffort(path, mode) {
|
|
15
|
+
if (IS_WINDOWS) return;
|
|
16
|
+
try {
|
|
17
|
+
await fs.chmod(path, mode);
|
|
18
|
+
} catch (err) {
|
|
19
|
+
if (!PERMISSION_ERRORS.has(err?.code)) throw err;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
async function ensureDir(dir) {
|
|
24
|
+
await fs.mkdir(dir, { recursive: true, mode: 0o700 });
|
|
25
|
+
await chmodBestEffort(dir, 0o700);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
async function readJson(file) {
|
|
29
|
+
try {
|
|
30
|
+
const text = await fs.readFile(file, 'utf8');
|
|
31
|
+
return JSON.parse(text);
|
|
32
|
+
} catch {
|
|
33
|
+
return null;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function tempFileFor(file) {
|
|
38
|
+
return join(
|
|
39
|
+
dirname(file),
|
|
40
|
+
`.${basename(file)}.${process.pid}.${Date.now()}.${randomUUID()}.tmp`
|
|
41
|
+
);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
async function writeJson(file, data) {
|
|
45
|
+
await ensureDir(dirname(file));
|
|
46
|
+
const tmp = tempFileFor(file);
|
|
47
|
+
|
|
48
|
+
try {
|
|
49
|
+
await fs.writeFile(tmp, JSON.stringify(data), { mode: 0o600 });
|
|
50
|
+
await fs.rename(tmp, file);
|
|
51
|
+
await chmodBestEffort(file, 0o600);
|
|
52
|
+
} catch (err) {
|
|
53
|
+
await fs.unlink(tmp).catch(() => {});
|
|
54
|
+
throw err;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
async function mtimeMs(file) {
|
|
59
|
+
try {
|
|
60
|
+
const stat = await fs.stat(file);
|
|
61
|
+
return stat.mtimeMs;
|
|
62
|
+
} catch {
|
|
63
|
+
return 0;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export async function get(file, ttlMs) {
|
|
68
|
+
const age = Date.now() - (await mtimeMs(file));
|
|
69
|
+
if (age > ttlMs) return null;
|
|
70
|
+
return readJson(file);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export async function set(file, data) {
|
|
74
|
+
await writeJson(file, data);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export async function clear(file) {
|
|
78
|
+
try {
|
|
79
|
+
await fs.unlink(file);
|
|
80
|
+
} catch (err) {
|
|
81
|
+
if (err.code !== 'ENOENT') throw err;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
export async function clearAll() {
|
|
86
|
+
try {
|
|
87
|
+
const entries = await fs.readdir(CACHE_DIR);
|
|
88
|
+
await Promise.all(
|
|
89
|
+
entries
|
|
90
|
+
.filter((e) => e.endsWith('.json'))
|
|
91
|
+
.map((e) => fs.unlink(join(CACHE_DIR, e)).catch(() => {}))
|
|
92
|
+
);
|
|
93
|
+
} catch (err) {
|
|
94
|
+
if (err.code !== 'ENOENT') throw err;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export async function status() {
|
|
99
|
+
const files = [
|
|
100
|
+
{ key: 'models', file: MODELS_CACHE, ttl: TTL.MODELS },
|
|
101
|
+
{ key: 'benchmarks', file: BENCHMARKS_CACHE, ttl: TTL.BENCHMARKS },
|
|
102
|
+
];
|
|
103
|
+
|
|
104
|
+
return Promise.all(
|
|
105
|
+
files.map(async ({ key, file, ttl }) => {
|
|
106
|
+
const mtime = await mtimeMs(file);
|
|
107
|
+
const age = mtime ? Date.now() - mtime : null;
|
|
108
|
+
return {
|
|
109
|
+
key,
|
|
110
|
+
exists: mtime > 0,
|
|
111
|
+
ageMs: age,
|
|
112
|
+
fresh: age !== null && age < ttl,
|
|
113
|
+
ttlMs: ttl,
|
|
114
|
+
};
|
|
115
|
+
})
|
|
116
|
+
);
|
|
117
|
+
}
|