@xynogen/pix-data 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +94 -8
- package/package.json +1 -1
- package/src/data.test.ts +135 -155
- package/src/data.ts +243 -79
- package/src/index.ts +5 -6
package/README.md
CHANGED
|
@@ -1,20 +1,101 @@
|
|
|
1
1
|
# pix-data
|
|
2
2
|
|
|
3
|
-
Pi coding agent extension — shared model data layer. Fetches and caches
|
|
3
|
+
Pi coding agent extension — shared model data layer. Fetches and caches the
|
|
4
|
+
[modelgrep](https://modelgrep.com) model catalog to `~/.cache/pi/` on session
|
|
5
|
+
start, so other extensions (model picker, footer, subagent resolver) can read
|
|
6
|
+
context window, pricing, and a coding-focused score/rank synchronously without
|
|
7
|
+
redundant network calls.
|
|
8
|
+
|
|
9
|
+
## Data source
|
|
10
|
+
|
|
11
|
+
All data comes from a **single source**: [modelgrep.com](https://modelgrep.com)
|
|
12
|
+
(`/api/v1/models?benchmarked=1&sort=coding`). Free, no API key, ~190 benchmarked
|
|
13
|
+
models with real model ids. modelgrep aggregates benchmark numbers from
|
|
14
|
+
[Artificial Analysis](https://artificialanalysis.ai).
|
|
15
|
+
|
|
16
|
+
- **Context window + pricing** — taken verbatim from modelgrep.
|
|
17
|
+
- **Score** — computed locally from the raw benchmark fields (see below).
|
|
18
|
+
- **Rank** — the model's position once the whole catalog is sorted by that score
|
|
19
|
+
(best = `#1`). Unscored models sink to the bottom.
|
|
20
|
+
|
|
21
|
+
Cached 24h → `~/.cache/pi/modelgrep.json`. On outage the stale cache keeps the
|
|
22
|
+
picker working until it can refresh.
|
|
23
|
+
|
|
24
|
+
## Scoring methodology
|
|
25
|
+
|
|
26
|
+
**Primary score = [Artificial Analysis Intelligence Index](https://artificialanalysis.ai/methodology/intelligence-benchmarking)**
|
|
27
|
+
when available — AA's authoritative composite of 9 independent evals (agents,
|
|
28
|
+
coding, scientific reasoning, general), already weighted toward agentic work.
|
|
29
|
+
It is rescaled to 0–100 (`intelligence / 65 × 100`; the current leader scores
|
|
30
|
+
~65).
|
|
31
|
+
|
|
32
|
+
**Fallback = a coding-and-agentic heuristic** for the ~84% of models AA has not
|
|
33
|
+
index-scored, computed from the raw benchmarks below, then mapped onto the index
|
|
34
|
+
scale by a least-squares line. Both the heuristic weights *and* the line were
|
|
35
|
+
jointly tuned against the index on the models that carry *both* it and the raw
|
|
36
|
+
benches (`index100 ≈ 120.6·heuristic − 10.6`, deduped n=29, R²=0.901,
|
|
37
|
+
leave-one-out RMSE 6.55pt) — a data calibration, not a guessed penalty. The
|
|
38
|
+
picker exists to choose a model *for coding work in an agent*, so the heuristic
|
|
39
|
+
is weighted toward exactly that:
|
|
40
|
+
|
|
41
|
+
| bench | range | measures |
|
|
42
|
+
|---|---|---|
|
|
43
|
+
| `coding` | 0–100 | code generation index |
|
|
44
|
+
| `scicode` | 0–1 | scientific coding |
|
|
45
|
+
| `tau2` | 0–1 | agentic tool-use |
|
|
46
|
+
| `agentic` | 0–100 | agentic index |
|
|
47
|
+
| `gpqa` | 0–1 | graduate-level reasoning |
|
|
48
|
+
| `hle` | 0–1 | hard-exam reasoning |
|
|
49
|
+
|
|
50
|
+
When the index is absent, three sub-scores combine, each a weighted blend of
|
|
51
|
+
its benches (all normalized to 0–1):
|
|
52
|
+
|
|
53
|
+
```
|
|
54
|
+
coding_score = 0.60·(coding/100) + 0.40·scicode
|
|
55
|
+
agentic_score = 0.70·tau2 + 0.30·(agentic/100)
|
|
56
|
+
reasoning_score = 0.60·gpqa + 0.40·hle
|
|
57
|
+
|
|
58
|
+
heuristic = 0.30·coding_score + 0.60·agentic_score + 0.10·reasoning_score
|
|
59
|
+
score = round(clamp₀₁₀₀(120.6·heuristic − 10.6)) // fitted to the index
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
**Why a heuristic at all, and why these raw evals only:** the AA Intelligence
|
|
63
|
+
Index *is* the ideal number — but only ~16% of the catalog has it. For the rest
|
|
64
|
+
we rebuild a comparable score from the same family of raw evals. Crucially we
|
|
65
|
+
use each raw eval **once** and never feed `intelligence` *and* its components
|
|
66
|
+
together, nor any `_pct` field (which is just a percentile-rank of a raw field)
|
|
67
|
+
— doing so would double-count the same measurement and silently inflate weights
|
|
68
|
+
you can't see. Independent inputs only → honest weighted average.
|
|
69
|
+
|
|
70
|
+
**Why these weights:** an agentic coding model lives or dies on *tool-calling*
|
|
71
|
+
and *code generation*, so `agentic_score` (0.60) and `coding_score` (0.30)
|
|
72
|
+
carry the score; pure reasoning (0.10) is a tiebreaker, not the headline. The
|
|
73
|
+
split is not arbitrary — a grid search over weight combinations, scored by how
|
|
74
|
+
well the heuristic predicts the AA index (leave-one-out cross-validation),
|
|
75
|
+
landed on this agentic-heavy mix. Within each group the dominant bench (`tau2`
|
|
76
|
+
for agentic, raw `coding`, `gpqa`) carries most of the weight and a secondary
|
|
77
|
+
bench refines it.
|
|
78
|
+
|
|
79
|
+
**Missing benchmarks:** every blend renormalizes over the fields actually
|
|
80
|
+
present, so a model missing one bench is diluted only *within its own group* —
|
|
81
|
+
it is never zero-penalized or dropped. A model with no benchmarks at all gets a
|
|
82
|
+
`null` score (shown as a bare row) and sorts to the bottom.
|
|
83
|
+
|
|
84
|
+
The exact implementation is `codingScore()` in
|
|
85
|
+
[`src/data.ts`](src/data.ts); the weights are intentionally easy to tune in one
|
|
86
|
+
place if your priorities differ.
|
|
4
87
|
|
|
5
88
|
## What's included
|
|
6
89
|
|
|
7
90
|
| Export | Description |
|
|
8
91
|
|---|---|
|
|
9
|
-
| `
|
|
10
|
-
| `benchmark` | `DataSource<BenchmarkEntry[]>` — BenchLM leaderboard (rank, score, pricing). TTL 24h → `~/.cache/pi/benchlm.json` |
|
|
92
|
+
| `modelgrep` | `DataSource<ModelGrepModel[]>` — the catalog. TTL 24h → `~/.cache/pi/modelgrep.json` |
|
|
11
93
|
| `DataSource` | Generic cached data source class |
|
|
12
94
|
| `CACHE_DIR` | Resolved cache directory (`~/.cache/pi`) |
|
|
13
|
-
| `buildModelsDevIndex` | Build a lookup `Map` from
|
|
14
|
-
| `lookupInIndex` | Fuzzy-match a router model id against
|
|
95
|
+
| `buildModelsDevIndex` | Build a lookup `Map` from the catalog (context/cost/modalities) |
|
|
96
|
+
| `lookupInIndex` | Fuzzy-match a router model id against an index |
|
|
15
97
|
| `lookupModelsDev` | Sync lookup by provider + id from in-memory cache |
|
|
16
|
-
| `lookupBenchmark` |
|
|
17
|
-
| `fetchModelsDevIndex` | Async — fetch models.dev and return built index |
|
|
98
|
+
| `lookupBenchmark` | Sync lookup a model by id — returns score + rank + pricing |
|
|
18
99
|
|
|
19
100
|
## Install
|
|
20
101
|
|
|
@@ -24,10 +105,15 @@ pi install npm:@xynogen/pix-data
|
|
|
24
105
|
|
|
25
106
|
## How it works
|
|
26
107
|
|
|
27
|
-
On session start the extension fires
|
|
108
|
+
On session start the extension fires a background fetch (`modelgrep.get()`),
|
|
109
|
+
paginating the API until the full benchmarked catalog is retrieved. If the cache
|
|
110
|
+
is fresh the fetch is skipped. The cache file lives in `~/.cache/pi/` — any Pi
|
|
111
|
+
extension using the same `DataSource` shares it automatically.
|
|
28
112
|
|
|
29
113
|
## Full distro
|
|
30
114
|
|
|
115
|
+
Source: [github.com/xynogen/pix-mono](https://github.com/xynogen/pix-mono)
|
|
116
|
+
|
|
31
117
|
To install the complete pix suite (all packages + Pi itself):
|
|
32
118
|
|
|
33
119
|
```bash
|
package/package.json
CHANGED
package/src/data.test.ts
CHANGED
|
@@ -1,107 +1,102 @@
|
|
|
1
1
|
import { afterEach, beforeEach, describe, expect, it } from "bun:test";
|
|
2
2
|
import {
|
|
3
|
-
type BenchmarkEntry,
|
|
4
|
-
benchmark,
|
|
5
3
|
buildModelsDevIndex,
|
|
6
4
|
lookupBenchmark,
|
|
7
5
|
lookupInIndex,
|
|
8
6
|
lookupModelsDev,
|
|
9
|
-
type
|
|
10
|
-
|
|
11
|
-
modelsDev,
|
|
7
|
+
type ModelGrepModel,
|
|
8
|
+
modelgrep,
|
|
12
9
|
} from "./data.ts";
|
|
13
10
|
|
|
11
|
+
// Compact modelgrep-shaped fixture builder.
|
|
12
|
+
function mg(
|
|
13
|
+
id: string,
|
|
14
|
+
opts: {
|
|
15
|
+
name?: string;
|
|
16
|
+
ctx?: number;
|
|
17
|
+
in?: number;
|
|
18
|
+
out?: number;
|
|
19
|
+
reasoning?: boolean;
|
|
20
|
+
input?: string[];
|
|
21
|
+
// Raw benchmark inputs to codingScore. intelligence (~0–65) wins when
|
|
22
|
+
// present; otherwise coding/agentic (0–100) + rest (0–1) feed the heuristic.
|
|
23
|
+
bench?: {
|
|
24
|
+
intelligence?: number;
|
|
25
|
+
coding?: number;
|
|
26
|
+
agentic?: number;
|
|
27
|
+
gpqa?: number;
|
|
28
|
+
scicode?: number;
|
|
29
|
+
tau2?: number;
|
|
30
|
+
hle?: number;
|
|
31
|
+
};
|
|
32
|
+
} = {},
|
|
33
|
+
): ModelGrepModel {
|
|
34
|
+
return {
|
|
35
|
+
id,
|
|
36
|
+
name: opts.name ?? id,
|
|
37
|
+
context_length: opts.ctx,
|
|
38
|
+
pricing: { input: opts.in, output: opts.out },
|
|
39
|
+
modality: { input: opts.input },
|
|
40
|
+
capabilities: { reasoning: opts.reasoning },
|
|
41
|
+
benchmarks: { artificial_analysis: { ...opts.bench } },
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
|
|
14
45
|
// ── buildModelsDevIndex ──────────────────────────────────────────────────────
|
|
15
46
|
|
|
16
47
|
describe("buildModelsDevIndex", () => {
|
|
17
|
-
const
|
|
18
|
-
anthropic:
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
name: "Claude Sonnet 4.5",
|
|
23
|
-
},
|
|
24
|
-
"claude-opus-4": {
|
|
25
|
-
id: "claude-opus-4",
|
|
26
|
-
name: "Claude Opus 4",
|
|
27
|
-
reasoning: true,
|
|
28
|
-
},
|
|
29
|
-
},
|
|
30
|
-
},
|
|
31
|
-
openai: {
|
|
32
|
-
models: {
|
|
33
|
-
"gpt-4o": {
|
|
34
|
-
id: "gpt-4o",
|
|
35
|
-
name: "GPT-4o",
|
|
36
|
-
modalities: { input: ["text", "image"] },
|
|
37
|
-
},
|
|
38
|
-
},
|
|
39
|
-
},
|
|
40
|
-
};
|
|
48
|
+
const catalog: ModelGrepModel[] = [
|
|
49
|
+
mg("anthropic/claude-sonnet-4-5", { name: "Claude Sonnet 4.5" }),
|
|
50
|
+
mg("anthropic/claude-opus-4", { name: "Claude Opus 4", reasoning: true }),
|
|
51
|
+
mg("openai/gpt-4o", { name: "GPT-4o", input: ["text", "image"] }),
|
|
52
|
+
];
|
|
41
53
|
|
|
42
|
-
it("indexes all models by
|
|
43
|
-
const idx = buildModelsDevIndex(
|
|
54
|
+
it("indexes all models by slug", () => {
|
|
55
|
+
const idx = buildModelsDevIndex(catalog);
|
|
44
56
|
expect(idx.has("claude-sonnet-4-5")).toBe(true);
|
|
45
57
|
expect(idx.has("claude-opus-4")).toBe(true);
|
|
46
58
|
expect(idx.has("gpt-4o")).toBe(true);
|
|
47
59
|
});
|
|
48
60
|
|
|
49
|
-
it("indexes normalized
|
|
50
|
-
const
|
|
51
|
-
anthropic:
|
|
52
|
-
|
|
53
|
-
"claude-sonnet-4-5-20250514": {
|
|
54
|
-
id: "claude-sonnet-4-5-20250514",
|
|
55
|
-
name: "Claude Sonnet 4.5",
|
|
56
|
-
},
|
|
57
|
-
},
|
|
58
|
-
},
|
|
59
|
-
};
|
|
60
|
-
const idx = buildModelsDevIndex(a);
|
|
61
|
+
it("indexes normalized slug (strip date suffix)", () => {
|
|
62
|
+
const idx = buildModelsDevIndex([
|
|
63
|
+
mg("anthropic/claude-sonnet-4-5-20250514", { name: "Claude Sonnet 4.5" }),
|
|
64
|
+
]);
|
|
61
65
|
expect(idx.has("claude-sonnet-4-5")).toBe(true);
|
|
62
66
|
});
|
|
63
67
|
|
|
64
|
-
it("handles empty
|
|
65
|
-
expect(buildModelsDevIndex(
|
|
68
|
+
it("handles empty catalog", () => {
|
|
69
|
+
expect(buildModelsDevIndex([]).size).toBe(0);
|
|
66
70
|
});
|
|
67
71
|
|
|
68
|
-
it("
|
|
69
|
-
|
|
72
|
+
it("maps fields onto ModelsDevModel shape", () => {
|
|
73
|
+
const m = buildModelsDevIndex([
|
|
74
|
+
mg("openai/gpt-4o", { ctx: 128000, in: 5, out: 15, input: ["text"] }),
|
|
75
|
+
]).get("gpt-4o");
|
|
76
|
+
expect(m?.limit?.context).toBe(128000);
|
|
77
|
+
expect(m?.cost?.input).toBe(5);
|
|
78
|
+
expect(m?.cost?.output).toBe(15);
|
|
79
|
+
expect(m?.modalities?.input).toEqual(["text"]);
|
|
70
80
|
});
|
|
71
81
|
|
|
72
|
-
it("preserves first-seen on
|
|
73
|
-
const
|
|
74
|
-
a
|
|
75
|
-
b
|
|
76
|
-
|
|
77
|
-
expect(
|
|
82
|
+
it("preserves first-seen on slug collision", () => {
|
|
83
|
+
const idx = buildModelsDevIndex([
|
|
84
|
+
mg("a/gpt-4o", { name: "First" }),
|
|
85
|
+
mg("b/gpt-4o", { name: "Second" }),
|
|
86
|
+
]);
|
|
87
|
+
expect(idx.get("gpt-4o")?.name).toBe("First");
|
|
78
88
|
});
|
|
79
89
|
});
|
|
80
90
|
|
|
81
91
|
// ── lookupInIndex ────────────────────────────────────────────────────────────
|
|
82
92
|
|
|
83
93
|
describe("lookupInIndex", () => {
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
"claude-sonnet-4-5": {
|
|
91
|
-
id: "claude-sonnet-4-5",
|
|
92
|
-
name: "Claude Sonnet 4.5",
|
|
93
|
-
},
|
|
94
|
-
"claude-opus-4": { id: "claude-opus-4", name: "Claude Opus 4" },
|
|
95
|
-
},
|
|
96
|
-
},
|
|
97
|
-
openai: {
|
|
98
|
-
models: {
|
|
99
|
-
"gpt-4o": { id: "gpt-4o", name: "GPT-4o" },
|
|
100
|
-
"o3-mini": { id: "o3-mini", name: "o3 mini" },
|
|
101
|
-
},
|
|
102
|
-
},
|
|
103
|
-
});
|
|
104
|
-
});
|
|
94
|
+
const index = buildModelsDevIndex([
|
|
95
|
+
mg("anthropic/claude-sonnet-4-5", { name: "Claude Sonnet 4.5" }),
|
|
96
|
+
mg("anthropic/claude-opus-4", { name: "Claude Opus 4" }),
|
|
97
|
+
mg("openai/gpt-4o", { name: "GPT-4o" }),
|
|
98
|
+
mg("openai/o3-mini", { name: "o3 mini" }),
|
|
99
|
+
]);
|
|
105
100
|
|
|
106
101
|
it("finds exact match", () => {
|
|
107
102
|
expect(lookupInIndex("claude-sonnet-4-5", index)?.name).toBe(
|
|
@@ -142,108 +137,93 @@ describe("lookupInIndex", () => {
|
|
|
142
137
|
});
|
|
143
138
|
});
|
|
144
139
|
|
|
145
|
-
// ── lookupModelsDev
|
|
140
|
+
// ── modelgrep adapters (lookupModelsDev + lookupBenchmark) ────────────────────
|
|
141
|
+
|
|
142
|
+
describe("modelgrep adapters", () => {
|
|
143
|
+
const catalog: ModelGrepModel[] = [
|
|
144
|
+
mg("anthropic/claude-haiku-4.5", {
|
|
145
|
+
name: "Anthropic: Claude Haiku 4.5",
|
|
146
|
+
ctx: 200000,
|
|
147
|
+
in: 1,
|
|
148
|
+
out: 5,
|
|
149
|
+
input: ["text", "image"],
|
|
150
|
+
bench: {
|
|
151
|
+
coding: 43.9,
|
|
152
|
+
agentic: 16.4,
|
|
153
|
+
gpqa: 0.672,
|
|
154
|
+
scicode: 0.433,
|
|
155
|
+
tau2: 0.547,
|
|
156
|
+
hle: 0.097,
|
|
157
|
+
},
|
|
158
|
+
}),
|
|
159
|
+
mg("tencent/hy3-preview", {
|
|
160
|
+
name: "Tencent: hy3 preview",
|
|
161
|
+
ctx: 256000,
|
|
162
|
+
in: 0,
|
|
163
|
+
out: 0,
|
|
164
|
+
reasoning: true,
|
|
165
|
+
// coding/agentic absent — only raw benches → renormalized over present
|
|
166
|
+
bench: { gpqa: 0.732, scicode: 0.394, tau2: 0.675, hle: 0.063 },
|
|
167
|
+
}),
|
|
168
|
+
mg("ghost/unbenched", { name: "Ghost" }), // no signal at all
|
|
169
|
+
];
|
|
146
170
|
|
|
147
|
-
describe("lookupModelsDev", () => {
|
|
148
171
|
beforeEach(() => {
|
|
149
|
-
|
|
150
|
-
(modelsDev as any)._mem = {
|
|
151
|
-
anthropic: {
|
|
152
|
-
models: {
|
|
153
|
-
"claude-sonnet-4-5": {
|
|
154
|
-
id: "claude-sonnet-4-5",
|
|
155
|
-
name: "Claude Sonnet 4.5",
|
|
156
|
-
},
|
|
157
|
-
},
|
|
158
|
-
},
|
|
159
|
-
openai: {
|
|
160
|
-
models: {
|
|
161
|
-
"gpt-4o": { id: "gpt-4o", name: "GPT-4o" },
|
|
162
|
-
},
|
|
163
|
-
},
|
|
164
|
-
};
|
|
172
|
+
(modelgrep as unknown as { _mem: ModelGrepModel[] })._mem = catalog;
|
|
165
173
|
});
|
|
166
|
-
|
|
167
174
|
afterEach(() => {
|
|
168
|
-
(
|
|
175
|
+
(modelgrep as unknown as { _mem: ModelGrepModel[] | null })._mem = null;
|
|
169
176
|
});
|
|
170
177
|
|
|
171
|
-
it("finds
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
);
|
|
178
|
+
it("lookupModelsDev finds haiku via slug, ignoring routing prefix + date", () => {
|
|
179
|
+
const m = lookupModelsDev("cc", "claude-haiku-4-5-20251001");
|
|
180
|
+
expect(m?.limit?.context).toBe(200000);
|
|
181
|
+
expect(m?.cost?.input).toBe(1);
|
|
175
182
|
});
|
|
176
183
|
|
|
177
|
-
it("
|
|
178
|
-
expect(lookupModelsDev("unknown-provider", "gpt-4o")?.name).toBe("GPT-4o");
|
|
179
|
-
});
|
|
180
|
-
|
|
181
|
-
it("strips path prefix from id", () => {
|
|
184
|
+
it("lookupModelsDev finds hy3 via prefix + suffix strip", () => {
|
|
182
185
|
expect(
|
|
183
|
-
lookupModelsDev("
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
it("returns undefined for unknown model", () => {
|
|
188
|
-
expect(lookupModelsDev("anthropic", "nonexistent-xyz")).toBeUndefined();
|
|
186
|
+
lookupModelsDev("openrouter", "tencent/hy3-preview:nitro")?.limit
|
|
187
|
+
?.context,
|
|
188
|
+
).toBe(256000);
|
|
189
189
|
});
|
|
190
|
-
});
|
|
191
190
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
describe("lookupBenchmark", () => {
|
|
195
|
-
const entries: BenchmarkEntry[] = [
|
|
196
|
-
{
|
|
197
|
-
rank: 1,
|
|
198
|
-
model: "Claude Sonnet 4.5",
|
|
199
|
-
creator: "Anthropic",
|
|
200
|
-
overallScore: 95,
|
|
201
|
-
inputPrice: 3,
|
|
202
|
-
outputPrice: 15,
|
|
203
|
-
},
|
|
204
|
-
{
|
|
205
|
-
rank: 2,
|
|
206
|
-
model: "GPT-4o",
|
|
207
|
-
creator: "OpenAI",
|
|
208
|
-
overallScore: 90,
|
|
209
|
-
inputPrice: 5,
|
|
210
|
-
outputPrice: 15,
|
|
211
|
-
},
|
|
212
|
-
{
|
|
213
|
-
rank: 3,
|
|
214
|
-
model: "Gemini 1.5 Pro",
|
|
215
|
-
creator: "Google",
|
|
216
|
-
overallScore: 88,
|
|
217
|
-
inputPrice: 3.5,
|
|
218
|
-
outputPrice: 10.5,
|
|
219
|
-
},
|
|
220
|
-
];
|
|
221
|
-
|
|
222
|
-
beforeEach(() => {
|
|
223
|
-
(benchmark as any)._mem = entries;
|
|
191
|
+
it("lookupModelsDev returns undefined for unknown model", () => {
|
|
192
|
+
expect(lookupModelsDev("cc", "nonexistent-xyz")).toBeUndefined();
|
|
224
193
|
});
|
|
225
194
|
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
expect(
|
|
195
|
+
it("lookupBenchmark falls back to fitted heuristic (no intelligence)", () => {
|
|
196
|
+
const b = lookupBenchmark("claude-haiku-4-5-20251001");
|
|
197
|
+
// no intelligence → 120.6·heur − 10.6 → 42
|
|
198
|
+
expect(b?.overallScore).toBe(42);
|
|
199
|
+
expect(b?.rank).toBe(2); // ranked by score: hy3 (58) > haiku (42)
|
|
200
|
+
expect(b?.inputPrice).toBe(1);
|
|
201
|
+
expect(b?.outputPrice).toBe(5);
|
|
232
202
|
});
|
|
233
203
|
|
|
234
|
-
it("
|
|
235
|
-
|
|
204
|
+
it("lookupBenchmark renormalizes heuristic over present benches", () => {
|
|
205
|
+
const b = lookupBenchmark("tencent/hy3-preview:nitro");
|
|
206
|
+
// coding/agentic indices absent → heuristic renormalizes → fitted → 58
|
|
207
|
+
expect(b?.overallScore).toBe(58);
|
|
208
|
+
expect(b?.rank).toBe(1);
|
|
236
209
|
});
|
|
237
210
|
|
|
238
|
-
it("
|
|
239
|
-
|
|
211
|
+
it("lookupBenchmark returns null score when no benches at all", () => {
|
|
212
|
+
const b = lookupBenchmark("ghost/unbenched");
|
|
213
|
+
expect(b?.overallScore).toBeNull();
|
|
214
|
+
expect(b?.rank).toBe(3); // unscored sinks to the bottom
|
|
240
215
|
});
|
|
241
216
|
|
|
242
|
-
it("
|
|
243
|
-
|
|
217
|
+
it("lookupBenchmark prefers AA intelligence index over heuristic", () => {
|
|
218
|
+
(modelgrep as unknown as { _mem: ModelGrepModel[] })._mem = [
|
|
219
|
+
mg("openai/gpt-5", { bench: { intelligence: 52, coding: 10 } }),
|
|
220
|
+
];
|
|
221
|
+
const b = lookupBenchmark("gpt-5");
|
|
222
|
+
// intelligence present → round(52 / 65 * 100) = 80, ignores low coding
|
|
223
|
+
expect(b?.overallScore).toBe(80);
|
|
244
224
|
});
|
|
245
225
|
|
|
246
|
-
it("returns undefined for unknown model", () => {
|
|
226
|
+
it("lookupBenchmark returns undefined for unknown model", () => {
|
|
247
227
|
expect(lookupBenchmark("nonexistent-model-xyz")).toBeUndefined();
|
|
248
228
|
});
|
|
249
229
|
});
|
package/src/data.ts
CHANGED
|
@@ -1,21 +1,18 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* data.ts — shared Pi model data layer
|
|
3
3
|
*
|
|
4
|
-
* Single source of truth
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* Single source of truth, sourced from modelgrep (coding-sorted), cached at
|
|
5
|
+
* ~/.cache/pi/modelgrep.json (TTL 24h). Provides context, cost, modalities,
|
|
6
|
+
* capabilities, coding-percentile score, and rank.
|
|
7
7
|
*
|
|
8
8
|
* Cache files are shared across all Pi extensions — whichever extension loads
|
|
9
9
|
* first populates the cache; subsequent extensions read from disk.
|
|
10
10
|
*
|
|
11
11
|
* Usage:
|
|
12
|
-
* import {
|
|
12
|
+
* import { modelgrep } from "./data.ts";
|
|
13
13
|
*
|
|
14
|
-
* const
|
|
15
|
-
* const
|
|
16
|
-
*
|
|
17
|
-
* const models = modelsDev.getCached(); // sync, disk-only, no fetch
|
|
18
|
-
* const entries = benchmark.getCached();
|
|
14
|
+
* const catalog = await modelgrep.get(); // async, fetches if stale
|
|
15
|
+
* const catalog = modelgrep.getCached(); // sync, disk-only, no fetch
|
|
19
16
|
*
|
|
20
17
|
* import { lookupModelsDev, lookupBenchmark } from "./data.ts";
|
|
21
18
|
*/
|
|
@@ -57,10 +54,29 @@ export interface BenchmarkEntry {
|
|
|
57
54
|
outputPrice: number | null;
|
|
58
55
|
}
|
|
59
56
|
|
|
60
|
-
interface
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
57
|
+
export interface ModelGrepModel {
|
|
58
|
+
id: string;
|
|
59
|
+
name?: string;
|
|
60
|
+
context_length?: number;
|
|
61
|
+
pricing?: { input?: number; output?: number };
|
|
62
|
+
modality?: { input?: string[]; output?: string[] };
|
|
63
|
+
capabilities?: { reasoning?: boolean };
|
|
64
|
+
benchmarks?: {
|
|
65
|
+
artificial_analysis?: {
|
|
66
|
+
// AA Intelligence Index — authoritative 9-eval composite (~0–65 range).
|
|
67
|
+
intelligence?: number | null;
|
|
68
|
+
coding?: number | null; // 0–100 index
|
|
69
|
+
agentic?: number | null; // 0–100 index
|
|
70
|
+
gpqa?: number | null; // 0–1
|
|
71
|
+
scicode?: number | null; // 0–1
|
|
72
|
+
tau2?: number | null; // 0–1
|
|
73
|
+
hle?: number | null; // 0–1
|
|
74
|
+
};
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
interface ModelGrepResponse {
|
|
79
|
+
data: ModelGrepModel[];
|
|
64
80
|
}
|
|
65
81
|
|
|
66
82
|
// ── DataSource ───────────────────────────────────────────────────────────────
|
|
@@ -76,6 +92,16 @@ interface DataSourceOptions<T> {
|
|
|
76
92
|
empty: T;
|
|
77
93
|
label: string;
|
|
78
94
|
skip?: () => boolean;
|
|
95
|
+
/**
|
|
96
|
+
* Optional override for sources that need multiple requests (pagination).
|
|
97
|
+
* Returns the merged raw payload, which is then handed to `parse`/cached
|
|
98
|
+
* exactly as a single response would be.
|
|
99
|
+
*/
|
|
100
|
+
fetchRaw?: (
|
|
101
|
+
url: string,
|
|
102
|
+
headers: Record<string, string> | undefined,
|
|
103
|
+
timeoutMs: number,
|
|
104
|
+
) => Promise<unknown>;
|
|
79
105
|
}
|
|
80
106
|
|
|
81
107
|
export class DataSource<T> {
|
|
@@ -89,6 +115,7 @@ export class DataSource<T> {
|
|
|
89
115
|
timeoutMs: 10_000,
|
|
90
116
|
headers: () => undefined,
|
|
91
117
|
skip: () => false,
|
|
118
|
+
fetchRaw: defaultFetchRaw,
|
|
92
119
|
...opts,
|
|
93
120
|
};
|
|
94
121
|
}
|
|
@@ -131,14 +158,11 @@ export class DataSource<T> {
|
|
|
131
158
|
try {
|
|
132
159
|
const url =
|
|
133
160
|
typeof this.opts.url === "function" ? this.opts.url() : this.opts.url;
|
|
134
|
-
const
|
|
161
|
+
const raw = await this.opts.fetchRaw(
|
|
135
162
|
url,
|
|
136
|
-
this.opts.timeoutMs,
|
|
137
163
|
this.opts.headers(),
|
|
164
|
+
this.opts.timeoutMs,
|
|
138
165
|
);
|
|
139
|
-
if (!response.ok)
|
|
140
|
-
throw new Error(`${this.opts.label} fetch failed: ${response.status}`);
|
|
141
|
-
const raw = await response.json();
|
|
142
166
|
const val = this.opts.parse(raw);
|
|
143
167
|
this._mem = val;
|
|
144
168
|
void this._writeCache(raw);
|
|
@@ -196,6 +220,52 @@ function fetchWithTimeout(
|
|
|
196
220
|
);
|
|
197
221
|
}
|
|
198
222
|
|
|
223
|
+
/** Single-request raw fetch — the default DataSource fetch strategy. */
|
|
224
|
+
async function defaultFetchRaw(
|
|
225
|
+
url: string,
|
|
226
|
+
headers: Record<string, string> | undefined,
|
|
227
|
+
timeoutMs: number,
|
|
228
|
+
): Promise<unknown> {
|
|
229
|
+
const response = await fetchWithTimeout(url, timeoutMs, headers);
|
|
230
|
+
if (!response.ok) throw new Error(`fetch failed: ${response.status}`);
|
|
231
|
+
return response.json();
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
const MODELGREP_PAGE = 200; // modelgrep hard page-size cap
|
|
235
|
+
const MODELGREP_MAX_PAGES = 10; // safety bound (~2000 models)
|
|
236
|
+
|
|
237
|
+
interface ModelGrepPage {
|
|
238
|
+
data?: ModelGrepModel[];
|
|
239
|
+
meta?: { has_more?: boolean; next_offset?: number };
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Paginating fetch for modelgrep: walks `meta.has_more`/`next_offset` and
|
|
244
|
+
* merges every page into one `{ data }` payload so `parse` and the cache see
|
|
245
|
+
* the full catalog as a single response. `url` already carries the query
|
|
246
|
+
* (sort/limit); we only append `&offset=`.
|
|
247
|
+
*/
|
|
248
|
+
async function fetchModelGrepAll(
|
|
249
|
+
url: string,
|
|
250
|
+
headers: Record<string, string> | undefined,
|
|
251
|
+
timeoutMs: number,
|
|
252
|
+
): Promise<{ data: ModelGrepModel[] }> {
|
|
253
|
+
const all: ModelGrepModel[] = [];
|
|
254
|
+
let offset = 0;
|
|
255
|
+
for (let page = 0; page < MODELGREP_MAX_PAGES; page++) {
|
|
256
|
+
const sep = url.includes("?") ? "&" : "?";
|
|
257
|
+
const res = (await defaultFetchRaw(
|
|
258
|
+
`${url}${sep}offset=${offset}`,
|
|
259
|
+
headers,
|
|
260
|
+
timeoutMs,
|
|
261
|
+
)) as ModelGrepPage;
|
|
262
|
+
if (res.data?.length) all.push(...res.data);
|
|
263
|
+
if (!res.meta?.has_more) break;
|
|
264
|
+
offset = res.meta.next_offset ?? offset + MODELGREP_PAGE;
|
|
265
|
+
}
|
|
266
|
+
return { data: all };
|
|
267
|
+
}
|
|
268
|
+
|
|
199
269
|
// ── Cache dir ─────────────────────────────────────────────────────────────────
|
|
200
270
|
|
|
201
271
|
export const CACHE_DIR = join(
|
|
@@ -205,21 +275,13 @@ export const CACHE_DIR = join(
|
|
|
205
275
|
|
|
206
276
|
// ── Data sources ──────────────────────────────────────────────────────────────
|
|
207
277
|
|
|
208
|
-
export const
|
|
209
|
-
label: "
|
|
210
|
-
url:
|
|
211
|
-
cachePath: join(CACHE_DIR, "
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
});
|
|
216
|
-
|
|
217
|
-
export const benchmark = new DataSource<BenchmarkEntry[]>({
|
|
218
|
-
label: "benchlm",
|
|
219
|
-
url: "https://benchlm.ai/api/data/leaderboard",
|
|
220
|
-
cachePath: join(CACHE_DIR, "benchlm.json"),
|
|
221
|
-
parse: (raw) => (raw as BenchmarkResponse).models ?? [],
|
|
222
|
-
parseCache: (data) => (data as BenchmarkResponse)?.models ?? [],
|
|
278
|
+
export const modelgrep = new DataSource<ModelGrepModel[]>({
|
|
279
|
+
label: "modelgrep",
|
|
280
|
+
url: `https://modelgrep.com/api/v1/models?benchmarked=1&sort=coding&order=desc&limit=${MODELGREP_PAGE}`,
|
|
281
|
+
cachePath: join(CACHE_DIR, "modelgrep.json"),
|
|
282
|
+
fetchRaw: fetchModelGrepAll,
|
|
283
|
+
parse: (raw) => (raw as ModelGrepResponse).data ?? [],
|
|
284
|
+
parseCache: (data) => (data as ModelGrepResponse)?.data ?? [],
|
|
223
285
|
empty: [],
|
|
224
286
|
});
|
|
225
287
|
|
|
@@ -228,8 +290,9 @@ export const benchmark = new DataSource<BenchmarkEntry[]>({
|
|
|
228
290
|
function normalize(id: string): string {
|
|
229
291
|
return id
|
|
230
292
|
.toLowerCase()
|
|
231
|
-
.replace(/[:@].*$/, "")
|
|
232
|
-
.replace(
|
|
293
|
+
.replace(/[:@].*$/, "") // routing suffix (:nitro, @date)
|
|
294
|
+
.replace(/[._]/g, "-") // fold separators: modelgrep `4.5` ↔ Pi routing `4-5`
|
|
295
|
+
.replace(/-\d{8}$/, ""); // trailing -YYYYMMDD
|
|
233
296
|
}
|
|
234
297
|
|
|
235
298
|
function stripPrefix(id: string): string {
|
|
@@ -237,71 +300,172 @@ function stripPrefix(id: string): string {
|
|
|
237
300
|
return i >= 0 ? id.slice(i + 1) : id;
|
|
238
301
|
}
|
|
239
302
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
)
|
|
243
|
-
const index = new Map<string, ModelsDevModel>();
|
|
244
|
-
for (const provider of Object.values(api)) {
|
|
245
|
-
if (!provider?.models) continue;
|
|
246
|
-
for (const [modelId, model] of Object.entries(provider.models)) {
|
|
247
|
-
const m: ModelsDevModel = { ...model, id: modelId };
|
|
248
|
-
if (!index.has(modelId)) index.set(modelId, m);
|
|
249
|
-
const norm = normalize(modelId);
|
|
250
|
-
if (!index.has(norm)) index.set(norm, m);
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
return index;
|
|
303
|
+
/** Slug = model id without its maker/provider prefix. */
|
|
304
|
+
function slugOf(id: string): string {
|
|
305
|
+
return id.includes("/") ? id.slice(id.lastIndexOf("/") + 1) : id;
|
|
254
306
|
}
|
|
255
307
|
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
308
|
+
/**
|
|
309
|
+
* Generic normalized-index lookup: exact slug → normalized slug → fuzzy
|
|
310
|
+
* prefix overlap. Handles routing suffixes (`:nitro`, `@date`, `-YYYYMMDD`)
|
|
311
|
+
* and maker prefixes (e.g. `tencent/hy3-preview:nitro` → `hy3-preview`).
|
|
312
|
+
*/
|
|
313
|
+
function findInIndex<T>(id: string, index: Map<string, T>): T | undefined {
|
|
260
314
|
const stripped = stripPrefix(id);
|
|
261
315
|
const direct = index.get(stripped) ?? index.get(normalize(stripped));
|
|
262
316
|
if (direct) return direct;
|
|
263
317
|
const norm = normalize(stripped);
|
|
264
|
-
for (const [key,
|
|
265
|
-
if (key.startsWith(norm) || norm.startsWith(key)) return
|
|
318
|
+
for (const [key, value] of index) {
|
|
319
|
+
if (key.startsWith(norm) || norm.startsWith(key)) return value;
|
|
266
320
|
}
|
|
267
321
|
return undefined;
|
|
268
322
|
}
|
|
269
323
|
|
|
270
|
-
export function
|
|
271
|
-
provider: string,
|
|
324
|
+
export function lookupInIndex(
|
|
272
325
|
id: string,
|
|
326
|
+
index: Map<string, ModelsDevModel>,
|
|
273
327
|
): ModelsDevModel | undefined {
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
328
|
+
return findInIndex(id, index);
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
function toModelsDevModel(g: ModelGrepModel): ModelsDevModel {
|
|
332
|
+
return {
|
|
333
|
+
id: slugOf(g.id),
|
|
334
|
+
name: g.name,
|
|
335
|
+
reasoning: g.capabilities?.reasoning,
|
|
336
|
+
modalities: g.modality,
|
|
337
|
+
limit: { context: g.context_length },
|
|
338
|
+
cost: { input: g.pricing?.input, output: g.pricing?.output },
|
|
339
|
+
};
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
export function buildModelsDevIndex(
|
|
343
|
+
source: ModelGrepModel[],
|
|
344
|
+
): Map<string, ModelsDevModel> {
|
|
345
|
+
const index = new Map<string, ModelsDevModel>();
|
|
346
|
+
for (const g of source) {
|
|
347
|
+
const m = toModelsDevModel(g);
|
|
348
|
+
if (!index.has(m.id)) index.set(m.id, m);
|
|
349
|
+
const norm = normalize(m.id);
|
|
350
|
+
if (!index.has(norm)) index.set(norm, m);
|
|
281
351
|
}
|
|
282
|
-
return
|
|
352
|
+
return index;
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
export function lookupModelsDev(
|
|
356
|
+
_provider: string,
|
|
357
|
+
id: string,
|
|
358
|
+
): ModelsDevModel | undefined {
|
|
359
|
+
// Provider prefix differs between Pi routing (cc/ds/openrouter) and modelgrep
|
|
360
|
+
// (anthropic/tencent), so join on the model slug only via the normalized index.
|
|
361
|
+
return findInIndex(id, buildModelsDevIndex(modelgrep.getCached()));
|
|
283
362
|
}
|
|
284
363
|
|
|
285
364
|
export async function fetchModelsDevIndex(): Promise<
|
|
286
365
|
Map<string, ModelsDevModel>
|
|
287
366
|
> {
|
|
288
|
-
return buildModelsDevIndex(await
|
|
367
|
+
return buildModelsDevIndex(await modelgrep.get());
|
|
289
368
|
}
|
|
290
369
|
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
370
|
+
// Weighted blend, renormalized over present fields — a missing input dilutes
|
|
371
|
+
// only its own group, never zero-penalizes the whole score.
|
|
372
|
+
function blend(parts: [number, number | null | undefined][]): number | null {
|
|
373
|
+
let weighted = 0;
|
|
374
|
+
let present = 0;
|
|
375
|
+
for (const [w, v] of parts) {
|
|
376
|
+
if (v == null) continue;
|
|
377
|
+
weighted += w * v;
|
|
378
|
+
present += w;
|
|
379
|
+
}
|
|
380
|
+
return present === 0 ? null : weighted / present;
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
const frac = (v: number | null | undefined) => (v == null ? null : v / 100);
|
|
384
|
+
|
|
385
|
+
// AA Intelligence Index ceiling — current leader (Claude Fable 5) scores ~65,
|
|
386
|
+
// so /65 maps the index to ~0–100 with headroom and no clipping.
|
|
387
|
+
const INTELLIGENCE_MAX = 65;
|
|
388
|
+
// Fallback calibration. For the models that carry the index AND the raw benches
|
|
389
|
+
// (deduped overlap, n=29), we fit our heuristic (0–1) to the rescaled index via
|
|
390
|
+
// least-squares:
|
|
391
|
+
// index100 ≈ SLOPE·heuristic + INTERCEPT
|
|
392
|
+
// Heuristic weights below + this line were jointly tuned against the index
|
|
393
|
+
// (R²=0.901, LOOCV-RMSE 6.55pt). Applying it to index-less models maps their
|
|
394
|
+
// heuristic onto the SAME scale as real index scores — a data-fit, not a
|
|
395
|
+
// guessed penalty. Refit if the catalog or weights change.
|
|
396
|
+
const FALLBACK_SLOPE = 120.6;
|
|
397
|
+
const FALLBACK_INTERCEPT = -10.6;
|
|
398
|
+
const clamp01to100 = (x: number) => Math.max(0, Math.min(100, x));
|
|
399
|
+
|
|
400
|
+
// Our coding/agentic-weighted heuristic from the raw evals (each used once —
|
|
401
|
+
// no double-counting with the index). Weights tuned against the AA index:
|
|
402
|
+
// agentic-heavy (.60) since tool-call matters most, coding (.30), reasoning a
|
|
403
|
+
// .10 tiebreaker. Sub-weights likewise fit — tau2 dominates the agentic group.
|
|
404
|
+
function heuristicScore(
|
|
405
|
+
aa: NonNullable<
|
|
406
|
+
NonNullable<ModelGrepModel["benchmarks"]>["artificial_analysis"]
|
|
407
|
+
>,
|
|
408
|
+
): number | null {
|
|
409
|
+
const coding = blend([
|
|
410
|
+
[0.6, frac(aa.coding)],
|
|
411
|
+
[0.4, aa.scicode],
|
|
412
|
+
]);
|
|
413
|
+
const agentic = blend([
|
|
414
|
+
[0.7, aa.tau2],
|
|
415
|
+
[0.3, frac(aa.agentic)],
|
|
416
|
+
]);
|
|
417
|
+
const reasoning = blend([
|
|
418
|
+
[0.6, aa.gpqa],
|
|
419
|
+
[0.4, aa.hle],
|
|
420
|
+
]);
|
|
421
|
+
return blend([
|
|
422
|
+
[0.3, coding],
|
|
423
|
+
[0.6, agentic],
|
|
424
|
+
[0.1, reasoning],
|
|
425
|
+
]);
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
// Model score 0–100. Prefer AA's Intelligence Index (authoritative 9-eval
|
|
429
|
+
// composite); when absent, map our heuristic onto the index scale via the
|
|
430
|
+
// fitted line. Null only when nothing is benchmarked.
|
|
431
|
+
function codingScore(
|
|
432
|
+
bench: NonNullable<ModelGrepModel["benchmarks"]>,
|
|
433
|
+
): number | null {
|
|
434
|
+
const aa = bench.artificial_analysis ?? {};
|
|
435
|
+
if (aa.intelligence != null) {
|
|
436
|
+
return Math.round((aa.intelligence / INTELLIGENCE_MAX) * 100);
|
|
437
|
+
}
|
|
438
|
+
const h = heuristicScore(aa);
|
|
439
|
+
return h == null
|
|
440
|
+
? null
|
|
441
|
+
: Math.round(clamp01to100(FALLBACK_SLOPE * h + FALLBACK_INTERCEPT));
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
function buildBenchIndex(): Map<string, BenchmarkEntry> {
|
|
445
|
+
const index = new Map<string, BenchmarkEntry>();
|
|
446
|
+
// Rank by our computed score (desc); unscored sink to the bottom, holding
|
|
447
|
+
// source order among themselves.
|
|
448
|
+
const scored = modelgrep.getCached().map((g) => ({
|
|
449
|
+
g,
|
|
450
|
+
score: g.benchmarks ? codingScore(g.benchmarks) : null,
|
|
451
|
+
}));
|
|
452
|
+
scored.sort((a, b) => (b.score ?? -1) - (a.score ?? -1));
|
|
453
|
+
scored.forEach(({ g, score }, i) => {
|
|
454
|
+
const slug = slugOf(g.id);
|
|
455
|
+
const entry: BenchmarkEntry = {
|
|
456
|
+
rank: i + 1,
|
|
457
|
+
model: g.name ?? g.id,
|
|
458
|
+
creator: g.id.split("/")[0] ?? "",
|
|
459
|
+
overallScore: score,
|
|
460
|
+
inputPrice: g.pricing?.input ?? null,
|
|
461
|
+
outputPrice: g.pricing?.output ?? null,
|
|
462
|
+
};
|
|
463
|
+
for (const k of [slug, normalize(slug)])
|
|
464
|
+
if (!index.has(k)) index.set(k, entry);
|
|
465
|
+
});
|
|
466
|
+
return index;
|
|
297
467
|
}
|
|
298
468
|
|
|
299
469
|
export function lookupBenchmark(modelName: string): BenchmarkEntry | undefined {
|
|
300
|
-
|
|
301
|
-
const needle = normBench(modelName);
|
|
302
|
-
return (
|
|
303
|
-
entries.find((e) => normBench(e.model) === needle) ??
|
|
304
|
-
entries.find((e) => normBench(e.model).includes(needle)) ??
|
|
305
|
-
entries.find((e) => needle.includes(normBench(e.model)))
|
|
306
|
-
);
|
|
470
|
+
return findInIndex(modelName, buildBenchIndex());
|
|
307
471
|
}
|
package/src/index.ts
CHANGED
|
@@ -4,14 +4,15 @@
|
|
|
4
4
|
* Warms the shared model data cache on session start so other extensions
|
|
5
5
|
* (pix-9router, models picker, footer) can read from ~/.cache/pi/* synchronously.
|
|
6
6
|
*
|
|
7
|
-
*
|
|
7
|
+
* Single non-blocking fetch — Pi session starts immediately.
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
10
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
11
|
-
import {
|
|
11
|
+
import { modelgrep } from "./data.ts";
|
|
12
12
|
|
|
13
13
|
export type {
|
|
14
14
|
BenchmarkEntry,
|
|
15
|
+
ModelGrepModel,
|
|
15
16
|
ModelsDevApi,
|
|
16
17
|
ModelsDevModel,
|
|
17
18
|
} from "./data.ts";
|
|
@@ -19,7 +20,6 @@ export type {
|
|
|
19
20
|
// Consumers (pix-core, pix-9router, …) import these instead of duplicating
|
|
20
21
|
// the DataSource implementation and models.dev/BenchLM lookups.
|
|
21
22
|
export {
|
|
22
|
-
benchmark,
|
|
23
23
|
buildModelsDevIndex,
|
|
24
24
|
CACHE_DIR,
|
|
25
25
|
DataSource,
|
|
@@ -27,10 +27,9 @@ export {
|
|
|
27
27
|
lookupBenchmark,
|
|
28
28
|
lookupInIndex,
|
|
29
29
|
lookupModelsDev,
|
|
30
|
-
|
|
30
|
+
modelgrep,
|
|
31
31
|
} from "./data.ts";
|
|
32
32
|
|
|
33
33
|
export default function (_pi: ExtensionAPI): void {
|
|
34
|
-
void
|
|
35
|
-
void benchmark.get();
|
|
34
|
+
void modelgrep.get();
|
|
36
35
|
}
|