@xynogen/pix-data 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +60 -36
- package/package.json +1 -1
- package/src/data.test.ts +86 -0
- package/src/data.ts +104 -7
- package/src/index.ts +5 -2
package/README.md
CHANGED
|
@@ -1,42 +1,57 @@
|
|
|
1
1
|
# pix-data
|
|
2
2
|
|
|
3
|
-
Pi coding agent extension — shared model data layer.
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
redundant network calls.
|
|
3
|
+
Pi coding agent extension — shared model data layer. Warms two cached
|
|
4
|
+
data sources on session start so other extensions (model picker, footer,
|
|
5
|
+
subagent resolver) can read context window, pricing, and a coding-focused
|
|
6
|
+
score/rank synchronously without redundant network calls:
|
|
8
7
|
|
|
9
|
-
|
|
8
|
+
- **[modelgrep](https://modelgrep.com)** — the model catalog (context window,
|
|
9
|
+
pricing, modalities, capabilities, raw benchmark fields) used as the
|
|
10
|
+
authoritative source when present.
|
|
11
|
+
- **[benchlm.ai](https://benchlm.ai)** — a leaderboard of 0–100 coding scores
|
|
12
|
+
used as a fallback when modelgrep's `artificial_analysis` block is null
|
|
13
|
+
(currently the common case for the long tail of models).
|
|
10
14
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
[Artificial Analysis](https://artificialanalysis.ai).
|
|
15
|
+
Both caches live under `~/.cache/pi/` and are shared across every Pi
|
|
16
|
+
extension using the same `DataSource` class — whichever extension loads
|
|
17
|
+
first populates the cache; subsequent extensions read from disk.
|
|
15
18
|
|
|
16
|
-
|
|
17
|
-
- **Score** — computed locally from the raw benchmark fields (see below).
|
|
18
|
-
- **Rank** — the model's position once the whole catalog is sorted by that score
|
|
19
|
-
(best = `#1`). Unscored models sink to the bottom.
|
|
19
|
+
## Data sources
|
|
20
20
|
|
|
21
|
-
|
|
22
|
-
|
|
21
|
+
- **`modelgrep`** — `GET /api/v1/models?sort=coding&order=desc&limit=200`,
|
|
22
|
+
paginated up to 10 pages (`meta.has_more` / `next_offset`). Free, no API key.
|
|
23
|
+
modelgrep aggregates benchmark numbers from
|
|
24
|
+
[Artificial Analysis](https://artificialanalysis.ai). Context window, pricing,
|
|
25
|
+
and modalities are taken verbatim from the catalog.
|
|
26
|
+
- **`benchlm`** — `GET https://benchlm.ai/api/data/leaderboard`. Free, no API
|
|
27
|
+
key. Each entry has an `overallScore` (0–100) used as the fallback score
|
|
28
|
+
when modelgrep's `artificial_analysis` block is null.
|
|
29
|
+
|
|
30
|
+
Cache files:
|
|
31
|
+
|
|
32
|
+
- `~/.cache/pi/modelgrep.json` (TTL 24h)
|
|
33
|
+
- `~/.cache/pi/benchlm.json` (TTL 24h)
|
|
34
|
+
|
|
35
|
+
On outage the stale cache keeps the picker working until it can refresh.
|
|
23
36
|
|
|
24
37
|
## Scoring methodology
|
|
25
38
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
39
|
+
The score a model receives is the first of the following that succeeds, in
|
|
40
|
+
order:
|
|
41
|
+
|
|
42
|
+
1. **Primary = [Artificial Analysis Intelligence Index](https://artificialanalysis.ai/methodology/intelligence-benchmarking)**
|
|
43
|
+
when present on the modelgrep entry — AA's authoritative composite of 9
|
|
44
|
+
independent evals (agents, coding, scientific reasoning, general), already
|
|
45
|
+
weighted toward agentic work. Rescaled to 0–100
|
|
46
|
+
(`intelligence / 65 × 100`; the current leader scores ~65).
|
|
47
|
+
2. **Heuristic** from modelgrep's raw benchmark fields when the AA index is
|
|
48
|
+
absent. Weighted blend of the same family of evals AA uses, then mapped onto
|
|
49
|
+
the index scale by a least-squares line. Both the heuristic weights *and*
|
|
50
|
+
the line were jointly tuned against the index on the models that carry
|
|
51
|
+
*both* it and the raw benches (`index100 ≈ 120.6·heuristic − 10.6`, deduped
|
|
52
|
+
n=29, R²=0.901, leave-one-out RMSE 6.55pt) — a data calibration, not a
|
|
53
|
+
guessed penalty. The picker exists to choose a model *for coding work in an
|
|
54
|
+
agent*, so the heuristic is weighted toward exactly that:
|
|
40
55
|
|
|
41
56
|
| bench | range | measures |
|
|
42
57
|
|---|---|---|
|
|
@@ -59,6 +74,12 @@ heuristic = 0.30·coding_score + 0.60·agentic_score + 0.10·reasoning_score
|
|
|
59
74
|
score = round(clamp₀₁₀₀(120.6·heuristic − 10.6)) // fitted to the index
|
|
60
75
|
```
|
|
61
76
|
|
|
77
|
+
3. **benchlm.ai fallback** — if the model exists in benchlm but modelgrep has
|
|
78
|
+
no AA index and no raw benches, look up the benchlm `overallScore` (0–100)
|
|
79
|
+
and use it verbatim. Match strategy (in `lookupBenchlmScore`): exact
|
|
80
|
+
normalized slug, then prefix overlap either way, then take the
|
|
81
|
+
highest-scoring match on a tie.
|
|
82
|
+
|
|
62
83
|
**Why a heuristic at all, and why these raw evals only:** the AA Intelligence
|
|
63
84
|
Index *is* the ideal number — but only ~16% of the catalog has it. For the rest
|
|
64
85
|
we rebuild a comparable score from the same family of raw evals. Crucially we
|
|
@@ -89,13 +110,15 @@ place if your priorities differ.
|
|
|
89
110
|
|
|
90
111
|
| Export | Description |
|
|
91
112
|
|---|---|
|
|
92
|
-
| `modelgrep` | `DataSource<ModelGrepModel[]>` — the catalog. TTL 24h → `~/.cache/pi/modelgrep.json` |
|
|
113
|
+
| `modelgrep` | `DataSource<ModelGrepModel[]>` — the modelgrep catalog. TTL 24h → `~/.cache/pi/modelgrep.json` |
|
|
114
|
+
| `benchlm` | `DataSource<BenchLMRawEntry[]>` — the benchlm.ai leaderboard (fallback scores). TTL 24h → `~/.cache/pi/benchlm.json` |
|
|
93
115
|
| `DataSource` | Generic cached data source class |
|
|
94
116
|
| `CACHE_DIR` | Resolved cache directory (`~/.cache/pi`) |
|
|
95
117
|
| `buildModelsDevIndex` | Build a lookup `Map` from the catalog (context/cost/modalities) |
|
|
96
118
|
| `lookupInIndex` | Fuzzy-match a router model id against an index |
|
|
97
|
-
| `lookupModelsDev` | Sync lookup by
|
|
119
|
+
| `lookupModelsDev` | Sync lookup by id from in-memory cache (joined on slug) |
|
|
98
120
|
| `lookupBenchmark` | Sync lookup a model by id — returns score + rank + pricing |
|
|
121
|
+
| `benchScoreColor` | Map a 0–100 score to a `success`/`warning`/`error`/`muted` token |
|
|
99
122
|
|
|
100
123
|
## Install
|
|
101
124
|
|
|
@@ -105,10 +128,11 @@ pi install npm:@xynogen/pix-data
|
|
|
105
128
|
|
|
106
129
|
## How it works
|
|
107
130
|
|
|
108
|
-
On session start the extension fires
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
extension using the same `DataSource` shares
|
|
131
|
+
On session start the extension fires two non-blocking fetches in parallel
|
|
132
|
+
(`modelgrep.get()` and `benchlm.get()`) — Pi session start is not gated on
|
|
133
|
+
either. If the cache is fresh both fetches are skipped. The cache files live
|
|
134
|
+
in `~/.cache/pi/` — any Pi extension using the same `DataSource` shares them
|
|
135
|
+
automatically.
|
|
112
136
|
|
|
113
137
|
## Full distro
|
|
114
138
|
|
package/package.json
CHANGED
package/src/data.test.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { afterEach, beforeEach, describe, expect, it } from "bun:test";
|
|
2
2
|
import {
|
|
3
|
+
benchlm,
|
|
3
4
|
buildModelsDevIndex,
|
|
4
5
|
lookupBenchmark,
|
|
5
6
|
lookupInIndex,
|
|
@@ -227,3 +228,88 @@ describe("modelgrep adapters", () => {
|
|
|
227
228
|
expect(lookupBenchmark("nonexistent-model-xyz")).toBeUndefined();
|
|
228
229
|
});
|
|
229
230
|
});
|
|
231
|
+
|
|
232
|
+
// ── benchlm fallback (modelgrep AA null → benchlm) ────────────────────────────
|
|
233
|
+
|
|
234
|
+
describe("benchlm fallback", () => {
|
|
235
|
+
// modelgrep catalog: every model has null benchmarks (real-world shape today)
|
|
236
|
+
const catalog: ModelGrepModel[] = [
|
|
237
|
+
mg("anthropic/claude-opus-4-8", { name: "Claude Opus 4.8" }),
|
|
238
|
+
mg("minimax/minimax-m3", { name: "MiniMax M3" }),
|
|
239
|
+
mg("deepseek/deepseek-v4-pro", { name: "DeepSeek V4 Pro" }),
|
|
240
|
+
mg("qwen/qwen3.7-max", { name: "Qwen3.7 Max" }),
|
|
241
|
+
mg("ghost/uncataloged", { name: "Ghost" }), // not in benchlm either
|
|
242
|
+
];
|
|
243
|
+
// benchlm: real shape (no benchmarks field, just overallScore 0-100)
|
|
244
|
+
const benchlmEntries = [
|
|
245
|
+
{ rank: 1, model: "Claude Opus 4.8 (Max)", overallScore: 95 },
|
|
246
|
+
{ rank: 2, model: "Claude Opus 4.8", overallScore: 93 },
|
|
247
|
+
{ rank: 25, model: "MiniMax M3", overallScore: 78 },
|
|
248
|
+
{ rank: 39, model: "DeepSeek V4 Pro", overallScore: 68 },
|
|
249
|
+
{ rank: 10, model: "Qwen3.7 Max", overallScore: 90 },
|
|
250
|
+
];
|
|
251
|
+
|
|
252
|
+
beforeEach(() => {
|
|
253
|
+
(modelgrep as unknown as { _mem: ModelGrepModel[] })._mem = catalog;
|
|
254
|
+
(benchlm as unknown as { _mem: typeof benchlmEntries })._mem =
|
|
255
|
+
benchlmEntries;
|
|
256
|
+
});
|
|
257
|
+
afterEach(() => {
|
|
258
|
+
(modelgrep as unknown as { _mem: ModelGrepModel[] | null })._mem = null;
|
|
259
|
+
(benchlm as unknown as { _mem: typeof benchlmEntries | null })._mem = null;
|
|
260
|
+
});
|
|
261
|
+
|
|
262
|
+
it("falls back to benchlm when modelgrep benchmarks are null", () => {
|
|
263
|
+
const b = lookupBenchmark("claude-opus-4-8");
|
|
264
|
+
// Two candidates: (Max)=95, base=93 → pick higher
|
|
265
|
+
expect(b?.overallScore).toBe(95);
|
|
266
|
+
});
|
|
267
|
+
|
|
268
|
+
it("prefers the higher-scoring benchlm variant when multiple match", () => {
|
|
269
|
+
const b = lookupBenchmark("minimax-m3");
|
|
270
|
+
expect(b?.overallScore).toBe(78);
|
|
271
|
+
});
|
|
272
|
+
|
|
273
|
+
it("returns null when both modelgrep and benchlm lack the model", () => {
|
|
274
|
+
const b = lookupBenchmark("uncataloged");
|
|
275
|
+
expect(b?.overallScore).toBeNull();
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
it("ranks scored models above unscored when only some have benchlm data", () => {
|
|
279
|
+
// catalog has 5 models, 4 in benchlm → uncataloged sinks to last
|
|
280
|
+
const b = lookupBenchmark("uncataloged");
|
|
281
|
+
expect(b?.rank).toBe(5); // 4 scored + 1 unscored at bottom
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
it("normalizes dots and parens: qwen3.7-max ↔ Qwen3.7 Max", () => {
|
|
285
|
+
const b = lookupBenchmark("qwen3.7-max");
|
|
286
|
+
expect(b?.overallScore).toBe(90);
|
|
287
|
+
});
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
describe("modelgrep AA primary wins over benchlm", () => {
|
|
291
|
+
const catalog: ModelGrepModel[] = [
|
|
292
|
+
mg("anthropic/claude-opus-4-8", {
|
|
293
|
+
bench: { intelligence: 60 }, // AA index: 60/65 → 92
|
|
294
|
+
}),
|
|
295
|
+
];
|
|
296
|
+
const benchlmEntries = [
|
|
297
|
+
{ rank: 1, model: "Claude Opus 4.8", overallScore: 50 },
|
|
298
|
+
];
|
|
299
|
+
|
|
300
|
+
beforeEach(() => {
|
|
301
|
+
(modelgrep as unknown as { _mem: ModelGrepModel[] })._mem = catalog;
|
|
302
|
+
(benchlm as unknown as { _mem: typeof benchlmEntries })._mem =
|
|
303
|
+
benchlmEntries;
|
|
304
|
+
});
|
|
305
|
+
afterEach(() => {
|
|
306
|
+
(modelgrep as unknown as { _mem: ModelGrepModel[] | null })._mem = null;
|
|
307
|
+
(benchlm as unknown as { _mem: typeof benchlmEntries | null })._mem = null;
|
|
308
|
+
});
|
|
309
|
+
|
|
310
|
+
it("uses AA intelligence when present, ignores benchlm", () => {
|
|
311
|
+
const b = lookupBenchmark("claude-opus-4-8");
|
|
312
|
+
// 60/65 * 100 = 92.23 → 92, not benchlm's 50
|
|
313
|
+
expect(b?.overallScore).toBe(92);
|
|
314
|
+
});
|
|
315
|
+
});
|
package/src/data.ts
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* data.ts — shared Pi model data layer
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
* ~/.cache/pi/modelgrep.json (TTL 24h)
|
|
6
|
-
* capabilities, coding-percentile score,
|
|
4
|
+
* Two data sources, each its own cached DataSource:
|
|
5
|
+
* - modelgrep (coding-sorted catalog) — ~/.cache/pi/modelgrep.json (TTL 24h):
|
|
6
|
+
* context, cost, modalities, capabilities, coding-percentile score, rank.
|
|
7
|
+
* - BenchLM — ~/.cache/pi/benchlm.json: fallback overall score when modelgrep
|
|
8
|
+
* has no benchmark for a model (see lookupBenchmark).
|
|
7
9
|
*
|
|
8
10
|
* Cache files are shared across all Pi extensions — whichever extension loads
|
|
9
11
|
* first populates the cache; subsequent extensions read from disk.
|
|
@@ -285,6 +287,40 @@ export const modelgrep = new DataSource<ModelGrepModel[]>({
|
|
|
285
287
|
empty: [],
|
|
286
288
|
});
|
|
287
289
|
|
|
290
|
+
// ── BenchLM (fallback coding-score source) ────────────────────────────────────
|
|
291
|
+
// Upstream `benchlm.ai` ships a 0–100 `overallScore` per model with category
|
|
292
|
+
// breakdown (coding/agentic/reasoning/…). Used as a fallback when modelgrep's
|
|
293
|
+
// `benchmarks.artificial_analysis` is null (current state). Same name as
|
|
294
|
+
// before the 4dfb443 swap.
|
|
295
|
+
interface BenchLMCategoryScores {
|
|
296
|
+
coding?: number | null;
|
|
297
|
+
agentic?: number | null;
|
|
298
|
+
reasoning?: number | null;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
interface BenchLMRawEntry {
|
|
302
|
+
rank: number;
|
|
303
|
+
model: string;
|
|
304
|
+
creator?: string;
|
|
305
|
+
overallScore: number | null;
|
|
306
|
+
categoryScores?: BenchLMCategoryScores;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
interface BenchLMResponse {
|
|
310
|
+
lastUpdated?: string;
|
|
311
|
+
mode?: string;
|
|
312
|
+
models?: BenchLMRawEntry[];
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
export const benchlm = new DataSource<BenchLMRawEntry[]>({
|
|
316
|
+
label: "benchlm",
|
|
317
|
+
url: "https://benchlm.ai/api/data/leaderboard",
|
|
318
|
+
cachePath: join(CACHE_DIR, "benchlm.json"),
|
|
319
|
+
parse: (raw) => (raw as BenchLMResponse).models ?? [],
|
|
320
|
+
parseCache: (data) => (data as BenchLMResponse)?.models ?? [],
|
|
321
|
+
empty: [],
|
|
322
|
+
});
|
|
323
|
+
|
|
288
324
|
// ── Lookup helpers ─────────────────────────────────────────────────────────────
|
|
289
325
|
|
|
290
326
|
function normalize(id: string): string {
|
|
@@ -443,12 +479,24 @@ function codingScore(
|
|
|
443
479
|
|
|
444
480
|
function buildBenchIndex(): Map<string, BenchmarkEntry> {
|
|
445
481
|
const index = new Map<string, BenchmarkEntry>();
|
|
482
|
+
// BenchLM lookup table: normalized benchlm name → entry, indexed in source
|
|
483
|
+
// order (highest score first when ties exist). Built once per call.
|
|
484
|
+
const benchlmByNorm = new Map<string, BenchLMRawEntry[]>();
|
|
485
|
+
for (const b of benchlm.getCached()) {
|
|
486
|
+
const k = normalizeBenchlmName(b.model);
|
|
487
|
+
if (!k) continue;
|
|
488
|
+
const arr = benchlmByNorm.get(k) ?? [];
|
|
489
|
+
arr.push(b);
|
|
490
|
+
benchlmByNorm.set(k, arr);
|
|
491
|
+
}
|
|
492
|
+
|
|
446
493
|
// Rank by our computed score (desc); unscored sink to the bottom, holding
|
|
447
494
|
// source order among themselves.
|
|
448
|
-
const scored = modelgrep.getCached().map((g) =>
|
|
449
|
-
g
|
|
450
|
-
score
|
|
451
|
-
|
|
495
|
+
const scored = modelgrep.getCached().map((g) => {
|
|
496
|
+
const fromAA = g.benchmarks ? codingScore(g.benchmarks) : null;
|
|
497
|
+
const score = fromAA ?? lookupBenchlmScore(g, benchlmByNorm);
|
|
498
|
+
return { g, score };
|
|
499
|
+
});
|
|
452
500
|
scored.sort((a, b) => (b.score ?? -1) - (a.score ?? -1));
|
|
453
501
|
scored.forEach(({ g, score }, i) => {
|
|
454
502
|
const slug = slugOf(g.id);
|
|
@@ -466,6 +514,55 @@ function buildBenchIndex(): Map<string, BenchmarkEntry> {
|
|
|
466
514
|
return index;
|
|
467
515
|
}
|
|
468
516
|
|
|
517
|
+
// Normalize a benchlm `model` field (e.g. "Claude Opus 4.8 (Max)") to a slug
|
|
518
|
+
// comparable to modelgrep ids (e.g. "claude-opus-4-8"). Drops parenthesized
|
|
519
|
+
// variants, lowercases, folds . _ space → -, strips leading/trailing dashes.
|
|
520
|
+
function normalizeBenchlmName(name: string): string {
|
|
521
|
+
return name
|
|
522
|
+
.replace(/\s*\([^)]*\)\s*/g, " ") // drop "(Max)", "(High)", etc.
|
|
523
|
+
.toLowerCase()
|
|
524
|
+
.replace(/[._\s]+/g, "-")
|
|
525
|
+
.replace(/-+/g, "-")
|
|
526
|
+
.replace(/^-|-$/g, "");
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
// Try to find a benchlm score for a modelgrep model. Match strategy:
|
|
530
|
+
// 1. exact normalized match of modelgrep slug
|
|
531
|
+
// 2. prefix overlap (claude-opus-4-8 ↔ claude-opus-4-8-thinking) — benchlm
|
|
532
|
+
// may list a long-form name; prefer the shortest match on tie (base > variants)
|
|
533
|
+
// 3. if multiple benchlm entries match, return the highest score
|
|
534
|
+
function lookupBenchlmScore(
|
|
535
|
+
g: ModelGrepModel,
|
|
536
|
+
benchlmByNorm: Map<string, BenchLMRawEntry[]>,
|
|
537
|
+
): number | null {
|
|
538
|
+
const slug = slugOf(g.id);
|
|
539
|
+
const norm = normalize(slug);
|
|
540
|
+
|
|
541
|
+
// Collect candidates: exact match + prefix matches (either side).
|
|
542
|
+
const candidates: BenchLMRawEntry[] = [];
|
|
543
|
+
const direct = benchlmByNorm.get(norm);
|
|
544
|
+
if (direct) candidates.push(...direct);
|
|
545
|
+
for (const [key, entries] of benchlmByNorm) {
|
|
546
|
+
if (key === norm) continue;
|
|
547
|
+
if (key.startsWith(norm) || norm.startsWith(key))
|
|
548
|
+
candidates.push(...entries);
|
|
549
|
+
}
|
|
550
|
+
if (candidates.length === 0) return null;
|
|
551
|
+
|
|
552
|
+
// Best entry = highest overallScore. Sort by score desc, then by slug
|
|
553
|
+
// length asc (prefer base name over suffix variants on a tie).
|
|
554
|
+
const best = [...candidates].sort((a, b) => {
|
|
555
|
+
const sa = a.overallScore ?? -Infinity;
|
|
556
|
+
const sb = b.overallScore ?? -Infinity;
|
|
557
|
+
if (sa !== sb) return sb - sa;
|
|
558
|
+
return (
|
|
559
|
+
normalizeBenchlmName(a.model).length -
|
|
560
|
+
normalizeBenchlmName(b.model).length
|
|
561
|
+
);
|
|
562
|
+
})[0];
|
|
563
|
+
return best.overallScore ?? null;
|
|
564
|
+
}
|
|
565
|
+
|
|
469
566
|
/** Map a benchmark score (0–100) to a semantic color token. */
|
|
470
567
|
export function benchScoreColor(
|
|
471
568
|
score: number | null | undefined,
|
package/src/index.ts
CHANGED
|
@@ -4,11 +4,12 @@
|
|
|
4
4
|
* Warms the shared model data cache on session start so other extensions
|
|
5
5
|
* (pix-9router, models picker, footer) can read from ~/.cache/pi/* synchronously.
|
|
6
6
|
*
|
|
7
|
-
*
|
|
7
|
+
* Two non-blocking fetches (modelgrep catalog + BenchLM scores) — Pi session
|
|
8
|
+
* starts immediately; consumers read whichever cache file they need.
|
|
8
9
|
*/
|
|
9
10
|
|
|
10
11
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
11
|
-
import { modelgrep } from "./data.ts";
|
|
12
|
+
import { benchlm, modelgrep } from "./data.ts";
|
|
12
13
|
|
|
13
14
|
export type {
|
|
14
15
|
BenchmarkEntry,
|
|
@@ -20,6 +21,7 @@ export type {
|
|
|
20
21
|
// Consumers (pix-core, pix-9router, …) import these instead of duplicating
|
|
21
22
|
// the DataSource implementation and models.dev/BenchLM lookups.
|
|
22
23
|
export {
|
|
24
|
+
benchlm,
|
|
23
25
|
benchScoreColor,
|
|
24
26
|
buildModelsDevIndex,
|
|
25
27
|
CACHE_DIR,
|
|
@@ -33,4 +35,5 @@ export {
|
|
|
33
35
|
|
|
34
36
|
export default function (_pi: ExtensionAPI): void {
|
|
35
37
|
void modelgrep.get();
|
|
38
|
+
void benchlm.get();
|
|
36
39
|
}
|