tryaii-dre 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/README.md +234 -0
- package/dist/banner.d.ts +24 -0
- package/dist/banner.d.ts.map +1 -0
- package/dist/banner.js +125 -0
- package/dist/banner.js.map +1 -0
- package/dist/benchmarks/index.d.ts +4 -0
- package/dist/benchmarks/index.d.ts.map +1 -0
- package/dist/benchmarks/index.js +3 -0
- package/dist/benchmarks/index.js.map +1 -0
- package/dist/benchmarks/registry.d.ts +69 -0
- package/dist/benchmarks/registry.d.ts.map +1 -0
- package/dist/benchmarks/registry.js +128 -0
- package/dist/benchmarks/registry.js.map +1 -0
- package/dist/benchmarks/standard.d.ts +6 -0
- package/dist/benchmarks/standard.d.ts.map +1 -0
- package/dist/benchmarks/standard.js +115 -0
- package/dist/benchmarks/standard.js.map +1 -0
- package/dist/budget.d.ts +65 -0
- package/dist/budget.d.ts.map +1 -0
- package/dist/budget.js +344 -0
- package/dist/budget.js.map +1 -0
- package/dist/cache/index.d.ts +27 -0
- package/dist/cache/index.d.ts.map +1 -0
- package/dist/cache/index.js +63 -0
- package/dist/cache/index.js.map +1 -0
- package/dist/centroids/data/centroids_all-MiniLM-L6-v2.json +1 -0
- package/dist/centroids/data/trainingQueries.json +246 -0
- package/dist/centroids/generator.d.ts +63 -0
- package/dist/centroids/generator.d.ts.map +1 -0
- package/dist/centroids/generator.js +120 -0
- package/dist/centroids/generator.js.map +1 -0
- package/dist/centroids/index.d.ts +3 -0
- package/dist/centroids/index.d.ts.map +1 -0
- package/dist/centroids/index.js +3 -0
- package/dist/centroids/index.js.map +1 -0
- package/dist/centroids/loader.d.ts +87 -0
- package/dist/centroids/loader.d.ts.map +1 -0
- package/dist/centroids/loader.js +236 -0
- package/dist/centroids/loader.js.map +1 -0
- package/dist/classifiers/base.d.ts +56 -0
- package/dist/classifiers/base.d.ts.map +1 -0
- package/dist/classifiers/base.js +42 -0
- package/dist/classifiers/base.js.map +1 -0
- package/dist/classifiers/embedding.d.ts +68 -0
- package/dist/classifiers/embedding.d.ts.map +1 -0
- package/dist/classifiers/embedding.js +0 -0
- package/dist/classifiers/embedding.js.map +1 -0
- package/dist/classifiers/hybrid.d.ts +31 -0
- package/dist/classifiers/hybrid.d.ts.map +1 -0
- package/dist/classifiers/hybrid.js +61 -0
- package/dist/classifiers/hybrid.js.map +1 -0
- package/dist/classifiers/index.d.ts +4 -0
- package/dist/classifiers/index.d.ts.map +1 -0
- package/dist/classifiers/index.js +3 -0
- package/dist/classifiers/index.js.map +1 -0
- package/dist/classifiers/keyword.d.ts +29 -0
- package/dist/classifiers/keyword.d.ts.map +1 -0
- package/dist/classifiers/keyword.js +264 -0
- package/dist/classifiers/keyword.js.map +1 -0
- package/dist/cli.d.ts +15 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +597 -0
- package/dist/cli.js.map +1 -0
- package/dist/client-types.d.ts +101 -0
- package/dist/client-types.d.ts.map +1 -0
- package/dist/client-types.js +5 -0
- package/dist/client-types.js.map +1 -0
- package/dist/client.d.ts +50 -0
- package/dist/client.d.ts.map +1 -0
- package/dist/client.js +279 -0
- package/dist/client.js.map +1 -0
- package/dist/config.d.ts +45 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +37 -0
- package/dist/config.js.map +1 -0
- package/dist/dashboard/index.d.ts +48 -0
- package/dist/dashboard/index.d.ts.map +1 -0
- package/dist/dashboard/index.js +166 -0
- package/dist/dashboard/index.js.map +1 -0
- package/dist/embeddings/base.d.ts +66 -0
- package/dist/embeddings/base.d.ts.map +1 -0
- package/dist/embeddings/base.js +77 -0
- package/dist/embeddings/base.js.map +1 -0
- package/dist/embeddings/index.d.ts +3 -0
- package/dist/embeddings/index.d.ts.map +1 -0
- package/dist/embeddings/index.js +3 -0
- package/dist/embeddings/index.js.map +1 -0
- package/dist/embeddings/local.d.ts +42 -0
- package/dist/embeddings/local.d.ts.map +1 -0
- package/dist/embeddings/local.js +89 -0
- package/dist/embeddings/local.js.map +1 -0
- package/dist/index.d.ts +44 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +45 -0
- package/dist/index.js.map +1 -0
- package/dist/integrations/index.d.ts +3 -0
- package/dist/integrations/index.d.ts.map +1 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/index.js.map +1 -0
- package/dist/integrations/openrouter.d.ts +84 -0
- package/dist/integrations/openrouter.d.ts.map +1 -0
- package/dist/integrations/openrouter.js +253 -0
- package/dist/integrations/openrouter.js.map +1 -0
- package/dist/registry/index.d.ts +2 -0
- package/dist/registry/index.d.ts.map +1 -0
- package/dist/registry/index.js +2 -0
- package/dist/registry/index.js.map +1 -0
- package/dist/registry/models.d.ts +76 -0
- package/dist/registry/models.d.ts.map +1 -0
- package/dist/registry/models.js +170 -0
- package/dist/registry/models.js.map +1 -0
- package/dist/registry/presets/defaultModels.json +435 -0
- package/dist/router.d.ts +178 -0
- package/dist/router.d.ts.map +1 -0
- package/dist/router.js +259 -0
- package/dist/router.js.map +1 -0
- package/dist/scoring/benchmarks.d.ts +35 -0
- package/dist/scoring/benchmarks.d.ts.map +1 -0
- package/dist/scoring/benchmarks.js +68 -0
- package/dist/scoring/benchmarks.js.map +1 -0
- package/dist/scoring/engine.d.ts +43 -0
- package/dist/scoring/engine.d.ts.map +1 -0
- package/dist/scoring/engine.js +267 -0
- package/dist/scoring/engine.js.map +1 -0
- package/dist/scoring/index.d.ts +6 -0
- package/dist/scoring/index.d.ts.map +1 -0
- package/dist/scoring/index.js +4 -0
- package/dist/scoring/index.js.map +1 -0
- package/dist/scoring/priorities.d.ts +41 -0
- package/dist/scoring/priorities.d.ts.map +1 -0
- package/dist/scoring/priorities.js +49 -0
- package/dist/scoring/priorities.js.map +1 -0
- package/dist/types.d.ts +47 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +5 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/cosine.d.ts +10 -0
- package/dist/utils/cosine.d.ts.map +1 -0
- package/dist/utils/cosine.js +18 -0
- package/dist/utils/cosine.js.map +1 -0
- package/dist/utils/math.d.ts +18 -0
- package/dist/utils/math.d.ts.map +1 -0
- package/dist/utils/math.js +54 -0
- package/dist/utils/math.js.map +1 -0
- package/package.json +65 -0
package/dist/router.js
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Main Router -- the primary public API for TryAii-DRE.
|
|
3
|
+
*
|
|
4
|
+
* Usage:
|
|
5
|
+
* import { Router } from 'tryaii-dre';
|
|
6
|
+
*
|
|
7
|
+
* const router = new Router();
|
|
8
|
+
* const result = await router.route('Write a Python function to merge sorted arrays');
|
|
9
|
+
* console.log(result.bestModel); // e.g., "gpt-5.2"
|
|
10
|
+
* console.log(result.scores); // Top models with scores and reasoning
|
|
11
|
+
*
|
|
12
|
+
* The default `LocalEmbeddingProvider` is async-only, so `route()` itself is
|
|
13
|
+
* async. Callers that have injected a sync embedding provider can use the
|
|
14
|
+
* niche `routeSync()` method for a blocking call.
|
|
15
|
+
*/
|
|
16
|
+
import { BenchmarkRegistry } from './benchmarks/registry.js';
|
|
17
|
+
import { NormalizationRange } from './scoring/benchmarks.js';
|
|
18
|
+
import { CentroidLoader } from './centroids/loader.js';
|
|
19
|
+
import { EmbeddingClassifier } from './classifiers/embedding.js';
|
|
20
|
+
import { createDefaultConfig, centroidFilePath } from './config.js';
|
|
21
|
+
import { LocalEmbeddingProvider } from './embeddings/local.js';
|
|
22
|
+
import { ModelRegistry } from './registry/models.js';
|
|
23
|
+
import { ScoringEngine } from './scoring/engine.js';
|
|
24
|
+
import { DEFAULT_PRIORITIES } from './scoring/priorities.js';
|
|
25
|
+
/**
|
|
26
|
+
* Maximum allowed prompt length (characters). Prompts longer than this are
|
|
27
|
+
* truncated before classification to avoid OOM in the embedding model. Must
|
|
28
|
+
* stay in sync with the Python SDK's MAX_PROMPT_LENGTH (classifiers/base.py).
|
|
29
|
+
*/
|
|
30
|
+
export const MAX_PROMPT_LENGTH = 100000;
|
|
31
|
+
/** Get list of model IDs in ranked order from a RouteResult. */
|
|
32
|
+
export function routeResultTopK(result) {
|
|
33
|
+
return result.scores.map((s) => s.modelId);
|
|
34
|
+
}
|
|
35
|
+
/** Get the score of the top model. */
|
|
36
|
+
export function routeResultBestScore(result) {
|
|
37
|
+
return result.scores[0]?.finalScore ?? 0;
|
|
38
|
+
}
|
|
39
|
+
/** Get the reasoning for why the top model was chosen. */
|
|
40
|
+
export function routeResultBestReasoning(result) {
|
|
41
|
+
return result.scores[0]?.reasoning ?? '';
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Semantic AI model router.
|
|
45
|
+
*
|
|
46
|
+
* Analyzes user prompts using embeddings, matches them against benchmark
|
|
47
|
+
* centroids, and recommends the best AI model based on benchmark performance,
|
|
48
|
+
* pricing, latency, and user priorities.
|
|
49
|
+
*/
|
|
50
|
+
export class Router {
|
|
51
|
+
constructor(opts) {
|
|
52
|
+
this._centroidLoader = null;
|
|
53
|
+
this._classifier = null;
|
|
54
|
+
this._config = createDefaultConfig(opts?.config);
|
|
55
|
+
// Model registry
|
|
56
|
+
this._registry = opts?.registry ?? ModelRegistry.default();
|
|
57
|
+
// Benchmark registry
|
|
58
|
+
this._benchmarkRegistry = opts?.benchmarkRegistry ?? BenchmarkRegistry.default();
|
|
59
|
+
// Scoring engine with normalizer from benchmark registry
|
|
60
|
+
const normalizer = this._benchmarkRegistry.getNormalizer();
|
|
61
|
+
this._scoringEngine = new ScoringEngine(normalizer);
|
|
62
|
+
// Embedding provider (lazy -- only initialized when needed)
|
|
63
|
+
this._embeddingProvider = opts?.embeddingProvider ?? null;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Lazy-initialize the embedding provider + centroid loader.
|
|
67
|
+
*
|
|
68
|
+
* The loader is stored on `this` so that `addBenchmark()` mutates the
|
|
69
|
+
* same instance the classifier reads from -- custom benchmarks added
|
|
70
|
+
* via the Router become visible to subsequent `route()` calls without
|
|
71
|
+
* re-instantiating anything.
|
|
72
|
+
*/
|
|
73
|
+
_ensureCentroidLoader() {
|
|
74
|
+
if (this._centroidLoader !== null)
|
|
75
|
+
return this._centroidLoader;
|
|
76
|
+
if (this._embeddingProvider === null) {
|
|
77
|
+
this._embeddingProvider = new LocalEmbeddingProvider(`Xenova/${this._config.embeddingModel}`);
|
|
78
|
+
}
|
|
79
|
+
this._centroidLoader = new CentroidLoader(this._embeddingProvider, centroidFilePath(this._config));
|
|
80
|
+
return this._centroidLoader;
|
|
81
|
+
}
|
|
82
|
+
/** Lazy-initialize the embedding classifier on first use. */
|
|
83
|
+
_ensureClassifier() {
|
|
84
|
+
if (this._classifier !== null)
|
|
85
|
+
return this._classifier;
|
|
86
|
+
const centroidLoader = this._ensureCentroidLoader();
|
|
87
|
+
// `_embeddingProvider` is non-null after _ensureCentroidLoader returns.
|
|
88
|
+
const provider = this._embeddingProvider;
|
|
89
|
+
this._classifier = new EmbeddingClassifier(provider, centroidLoader, {
|
|
90
|
+
embeddingCacheSize: this._config.cache.embeddingCacheSize,
|
|
91
|
+
classificationCacheSize: this._config.cache.classificationCacheSize,
|
|
92
|
+
ttlSeconds: this._config.cache.ttlSeconds,
|
|
93
|
+
});
|
|
94
|
+
return this._classifier;
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Route a prompt to the best AI model.
|
|
98
|
+
*
|
|
99
|
+
* Async by default -- works with any embedding provider, including the
|
|
100
|
+
* default async `LocalEmbeddingProvider`. For a blocking call backed by a
|
|
101
|
+
* sync provider, see `routeSync()`.
|
|
102
|
+
*
|
|
103
|
+
* @param prompt - The user's input text to classify and route.
|
|
104
|
+
* @param opts - Routing options (priorities, filters, topK).
|
|
105
|
+
* @returns RouteResult with the best model and full scoring breakdown.
|
|
106
|
+
*/
|
|
107
|
+
async route(prompt, opts) {
|
|
108
|
+
prompt = Router._normalizePrompt(prompt);
|
|
109
|
+
const classifier = this._ensureClassifier();
|
|
110
|
+
const classification = await classifier.classifyAsync(prompt);
|
|
111
|
+
return this._buildResult(classification, opts);
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Synchronous version of `route()`.
|
|
115
|
+
*
|
|
116
|
+
* Requires the injected embedding provider to support sync calls
|
|
117
|
+
* (`supportsSync === true`). Throws otherwise -- the default
|
|
118
|
+
* `LocalEmbeddingProvider` is async-only, so calling `routeSync()` on a
|
|
119
|
+
* default `Router` will fail. Inject a sync provider (e.g. a custom
|
|
120
|
+
* cached provider) to use this path.
|
|
121
|
+
*/
|
|
122
|
+
routeSync(prompt, opts) {
|
|
123
|
+
prompt = Router._normalizePrompt(prompt);
|
|
124
|
+
const classifier = this._ensureClassifier();
|
|
125
|
+
if (this._embeddingProvider !== null && !this._embeddingProvider.supportsSync) {
|
|
126
|
+
throw new Error(`routeSync() requires an embedding provider that supports sync calls. ` +
|
|
127
|
+
`${this._embeddingProvider.constructor.name} is async-only -- ` +
|
|
128
|
+
`use route() (async) instead, or inject a sync provider.`);
|
|
129
|
+
}
|
|
130
|
+
const classification = classifier.classify(prompt);
|
|
131
|
+
return this._buildResult(classification, opts);
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Validate and normalize a prompt before classification.
|
|
135
|
+
*
|
|
136
|
+
* Rejects empty/non-string prompts and truncates to MAX_PROMPT_LENGTH,
|
|
137
|
+
* matching the Python Router (which raises ValueError on empty/non-string
|
|
138
|
+
* and slices to the same limit) so both SDKs behave identically.
|
|
139
|
+
*/
|
|
140
|
+
static _normalizePrompt(prompt) {
|
|
141
|
+
if (typeof prompt !== 'string' || prompt.length === 0) {
|
|
142
|
+
throw new Error('prompt must be a non-empty string');
|
|
143
|
+
}
|
|
144
|
+
if (prompt.length > MAX_PROMPT_LENGTH) {
|
|
145
|
+
return prompt.slice(0, MAX_PROMPT_LENGTH);
|
|
146
|
+
}
|
|
147
|
+
return prompt;
|
|
148
|
+
}
|
|
149
|
+
/** Shared post-classification path: filter models, score, return RouteResult. */
|
|
150
|
+
_buildResult(classification, opts) {
|
|
151
|
+
const priorities = opts?.priorities ?? DEFAULT_PRIORITIES;
|
|
152
|
+
const topK = opts?.topK ?? 5;
|
|
153
|
+
let models = this._registry.allModels;
|
|
154
|
+
if (opts?.filterProvider) {
|
|
155
|
+
const providerLower = opts.filterProvider.toLowerCase();
|
|
156
|
+
models = models.filter((m) => m.provider.toLowerCase() === providerLower);
|
|
157
|
+
}
|
|
158
|
+
if (opts?.filterCapability) {
|
|
159
|
+
const cap = opts.filterCapability;
|
|
160
|
+
models = models.filter((m) => m.capabilities.includes(cap));
|
|
161
|
+
}
|
|
162
|
+
if (opts?.filterMaxCost != null) {
|
|
163
|
+
const maxCost = opts.filterMaxCost;
|
|
164
|
+
models = models.filter((m) => m.pricing != null && m.pricing.inputPer1k <= maxCost);
|
|
165
|
+
}
|
|
166
|
+
if (models.length === 0) {
|
|
167
|
+
return {
|
|
168
|
+
bestModel: '',
|
|
169
|
+
scores: [],
|
|
170
|
+
classification,
|
|
171
|
+
priorities,
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
const scores = this._scoringEngine.scoreModels(models, classification.benchmarkScores, priorities, topK);
|
|
175
|
+
const best = scores[0]?.modelId ?? '';
|
|
176
|
+
return {
|
|
177
|
+
bestModel: best,
|
|
178
|
+
scores,
|
|
179
|
+
classification,
|
|
180
|
+
priorities,
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
/**
|
|
184
|
+
* Shortcut to add a model to the registry.
|
|
185
|
+
*
|
|
186
|
+
* @see ModelRegistry.add()
|
|
187
|
+
*/
|
|
188
|
+
addModel(opts) {
|
|
189
|
+
return this._registry.add(opts);
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Add a custom benchmark to the routing system.
|
|
193
|
+
*
|
|
194
|
+
* Async by default -- works with any embedding provider, including the
|
|
195
|
+
* async-only `LocalEmbeddingProvider`. The new centroid is generated
|
|
196
|
+
* immediately and added to the shared centroid loader, so subsequent
|
|
197
|
+
* `route()` calls see it without any restart.
|
|
198
|
+
*
|
|
199
|
+
* For sync-provider callers who want a blocking setup step, use
|
|
200
|
+
* `addBenchmarkSync()`.
|
|
201
|
+
*
|
|
202
|
+
* @param name - Benchmark name (e.g., "CustomerSupportQA").
|
|
203
|
+
* @param queries - Representative prompts for this benchmark (10-20 recommended).
|
|
204
|
+
* @param description - Human-readable description.
|
|
205
|
+
* @param minScore - Minimum score for normalization.
|
|
206
|
+
* @param maxScore - Maximum score for normalization.
|
|
207
|
+
*/
|
|
208
|
+
async addBenchmark(name, queries, description = '', minScore = 0, maxScore = 100) {
|
|
209
|
+
this._registerBenchmark(name, queries, description, minScore, maxScore);
|
|
210
|
+
const loader = this._ensureCentroidLoader();
|
|
211
|
+
await loader.addBenchmarkCentroidAsync(name, queries);
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Synchronous version of `addBenchmark()`.
|
|
215
|
+
*
|
|
216
|
+
* Requires the injected embedding provider to support sync calls
|
|
217
|
+
* (`supportsSync === true`). The default `LocalEmbeddingProvider` is
|
|
218
|
+
* async-only, so calling `addBenchmarkSync()` on a default `Router`
|
|
219
|
+
* will throw.
|
|
220
|
+
*/
|
|
221
|
+
addBenchmarkSync(name, queries, description = '', minScore = 0, maxScore = 100) {
|
|
222
|
+
this._registerBenchmark(name, queries, description, minScore, maxScore);
|
|
223
|
+
const loader = this._ensureCentroidLoader();
|
|
224
|
+
if (this._embeddingProvider !== null && !this._embeddingProvider.supportsSync) {
|
|
225
|
+
throw new Error(`addBenchmarkSync() requires an embedding provider that supports sync calls. ` +
|
|
226
|
+
`${this._embeddingProvider.constructor.name} is async-only -- ` +
|
|
227
|
+
`use addBenchmark() (async) instead, or inject a sync provider.`);
|
|
228
|
+
}
|
|
229
|
+
loader.addBenchmarkCentroid(name, queries);
|
|
230
|
+
}
|
|
231
|
+
/** Register a benchmark in the registry and rebuild the scoring normalizer. */
|
|
232
|
+
_registerBenchmark(name, queries, description, minScore, maxScore) {
|
|
233
|
+
const benchmark = {
|
|
234
|
+
name,
|
|
235
|
+
description,
|
|
236
|
+
trainingQueries: queries,
|
|
237
|
+
normalization: new NormalizationRange(minScore, maxScore, description),
|
|
238
|
+
broadCategory: 'TECHNICAL',
|
|
239
|
+
subcategories: [],
|
|
240
|
+
metadata: {},
|
|
241
|
+
};
|
|
242
|
+
this._benchmarkRegistry.register(benchmark);
|
|
243
|
+
const normalizer = this._benchmarkRegistry.getNormalizer();
|
|
244
|
+
this._scoringEngine = new ScoringEngine(normalizer);
|
|
245
|
+
}
|
|
246
|
+
/** Access the model registry. */
|
|
247
|
+
get models() {
|
|
248
|
+
return this._registry;
|
|
249
|
+
}
|
|
250
|
+
/** Access the benchmark registry. */
|
|
251
|
+
get benchmarks() {
|
|
252
|
+
return this._benchmarkRegistry;
|
|
253
|
+
}
|
|
254
|
+
/** Access the configuration. */
|
|
255
|
+
get config() {
|
|
256
|
+
return this._config;
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
//# sourceMappingURL=router.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"router.js","sourceRoot":"","sources":["../src/router.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAEH,OAAO,EAAE,iBAAiB,EAAuB,MAAM,0BAA0B,CAAC;AAClF,OAAO,EAAE,kBAAkB,EAAE,MAAM,yBAAyB,CAAC;AAC7D,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAEvD,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAC;AACjE,OAAO,EAAmB,mBAAmB,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAErF,OAAO,EAAE,sBAAsB,EAAE,MAAM,uBAAuB,CAAC;AAC/D,OAAO,EAAE,aAAa,EAAa,MAAM,sBAAsB,CAAC;AAChE,OAAO,EAAc,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAChE,OAAO,EAAE,kBAAkB,EAAc,MAAM,yBAAyB,CAAC;AAGzE;;;;GAIG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAG,MAAO,CAAC;AAqBzC,gEAAgE;AAChE,MAAM,UAAU,eAAe,CAAC,MAAmB;IACjD,OAAO,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;AAC7C,CAAC;AAED,sCAAsC;AACtC,MAAM,UAAU,oBAAoB,CAAC,MAAmB;IACtD,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,UAAU,IAAI,CAAC,CAAC;AAC3C,CAAC;AAED,0DAA0D;AAC1D,MAAM,UAAU,wBAAwB,CAAC,MAAmB;IAC1D,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,SAAS,IAAI,EAAE,CAAC;AAC3C,CAAC;AAgBD;;;;;;GAMG;AACH,MAAM,OAAO,MAAM;IASjB,YAAY,IAKX;QARO,oBAAe,GAA0B,IAAI,CAAC;QAC9C,gBAAW,GAA+B,IAAI,CAAC;QAQrD,IAAI,CAAC,OAAO,GAAG,mBAAmB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;QAEjD,iBAAiB;QACjB,IAAI,CAAC,SAAS,GAAG,IAAI,EAAE,QAAQ,IAAI,aAAa,CAAC,OAAO,EAAE,CAAC;QAE3D,qBAAqB;QACrB,IAAI,CAAC,kBAAkB,GAAG,IAAI,EAAE,iBAAiB,IAAI,iBAAiB,CAAC,OAAO,EAAE,CAAC;QAEjF,yDAAyD;QACzD,MAAM,UAAU,GAAG,IAAI,CAAC,kBAAkB,CAAC,aAAa,EAAE,CAAC;QAC3D,IAAI,CAAC,cAAc,GAAG,IAAI,aAAa,CAAC,UAAU,CAAC,CAAC;QAEpD,4DAA4D;QAC5D,IAAI,CAAC,kBAAkB,GAAG,IAAI,EAAE,iBAAiB,IAAI,IAAI,CAAC;IAC5D,CAAC;IAED;;;;;;;OAOG;IACK,qBAAqB;QAC3B,IAAI,IAAI,CAAC,eAAe,KAAK,IAAI;YAAE,OAAO,IAAI,CAAC,eAAe,CAAC;QAE/D,IAAI,IAAI,CAAC,kBAAkB,KAAK,IAAI,EAAE,CAAC;YACrC,IAAI,CAAC,kBAAkB,GAAG,IAAI,sBAAsB,CAClD,UAAU,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,CACxC,CAAC;QACJ,CAAC;QAED,IAAI,CAAC,eAAe,GAAG,IAAI,cAAc,CACvC,IAAI,CAAC,kBAAkB,EACvB,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,CAC/B,CAAC;QAEF,OAAO,IAAI,CAAC,eAAe,CAAC;IAC9B,CAAC;IAED,6DAA6D;IACrD,iBAAiB;QACvB,IAAI,IAAI,CAAC,WAAW,KAAK,IAAI;YAAE,OAAO,IAAI,CAAC,WAAW,CAAC;QAEvD,MAAM,cAAc,GAAG,IAAI,CAAC,qBAAqB,EAAE,CAAC;QACpD,wEAAwE;QACxE,MAAM,QAAQ,GAAG,IAAI,CAAC,kBAA2C,CAAC;QAElE,IAAI,CAAC,WAAW,GAAG,IAAI,mBAAmB,CACxC,QAAQ,EACR,cAAc,EACd;YACE,kBAAkB,EAAE,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,kBAAkB;YACzD,uBAAuB,EAAE,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,uBAAuB;YACnE,UAAU,EAAE,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,UAAU;SAC1C,CACF,CAAC;QAEF,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;IAED;;;;;;;;;;OAUG;IACH,KAAK,CAAC,KAAK,CAAC,MAAc,EAAE,IAAmB;QAC7C,MAAM,GAAG,MAAM,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC;QACzC,MAAM,UAAU,GAAG,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAC5C,MAAM,cAAc,GAAG,MAAM,UAAU,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;QAC9D,OAAO,IAAI,CAAC,YAAY,CAAC,cAAc,EAAE,IAAI,CAAC,CAAC;IACjD,CAAC;IAED;;;;;;;;OAQG;IACH,SAAS,CAAC,MAAc,EAAE,IAAmB;QAC3C,MAAM,GAAG,MAAM,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC;QACzC,MAAM,UAAU,GAAG,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAC5C,IAAI,IAAI,CAAC,kBAAkB,KAAK,IAAI,IAAI,CAAC,IAAI,CAAC,kBAAkB,CAAC,YAAY,EAAE,CAAC;YAC9E,MAAM,IAAI,KAAK,CACb,uEAAuE;gBACrE,GAAG,IAAI,CAAC,kBAAkB,CAAC,WAAW,CAAC,IAAI,oBAAoB;gBAC/D,yDAAyD,CAC5D,CAAC;QACJ,CAAC;QACD,MAAM,cAAc,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QACnD,OAAO,IAAI,CAAC,YAAY,CAAC,cAAc,EAAE,IAAI,CAAC,CAAC;IACjD,CAAC;IAED;;;;;;OAMG;IACK,MAAM,CAAC,gBAAgB,CAAC,MAAc;QAC5C,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtD,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QACvD,CAAC;QACD,IAAI,MAAM,CAAC,MAAM,GAAG,iBAAiB,EAAE,CAAC;YACtC,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,iBAAiB,CAAC,CAAC;QAC5C,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,iFAAiF;IACzE,YAAY,CAClB,cAAoC,EACpC,IAAmB;QAEnB,MAAM,UAAU,GAAG,IAAI,EAAE,UAAU,IAAI,kBAAkB,CAAC;QAC1D,MAAM,IAAI,GAAG,IAAI,EAAE,IAAI,IAAI,CAAC,CAAC;QAE7B,IAAI,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC;QACtC,IAAI,IAAI,EAAE,cAAc,EAAE,CAAC;YACzB,MAAM,aAAa,GAAG,IAAI,CAAC,cAAc,CAAC,WAAW,EAAE,CAAC;YACxD,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,KAAK,aAAa,CAAC,CAAC;QAC5E,CAAC;QACD,IAAI,IAAI,EAAE,gBAAgB,EAAE,CAAC;YAC3B,MAAM,GAAG,GAAG,IAAI,CAAC,gBAAgB,CAAC;YAClC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC;QAC9D,CAAC;QACD,IAAI,IAAI,EAAE,aAAa,IAAI,IAAI,EAAE,CAAC;YAChC,MAAM,OAAO,GAAG,IAAI,CAAC,aAAa,CAAC;YACnC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,IAAI,IAAI,IAAI,CAAC,CAAC,OAAO,CAAC,UAAU,IAAI,OAAO,CAAC,CAAC;QACtF,CAAC;QAED,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,OAAO;gBACL,SAAS,EAAE,EAAE;gBACb,MAAM,EAAE,EAAE;gBACV,cAAc;gBACd,UAAU;aACX,CAAC;QACJ,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,WAAW,CAC5C,MAAM,EACN,cAAc,CAAC,eAAe,EAC9B,UAAU,EACV,IAAI,CACL,CAAC;QAEF,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,EAAE,OAAO,IAAI,EAAE,CAAC;QAEtC,OAAO;YACL,SAAS,EAAE,IAAI;YACf,MAAM;YACN,cAAc;YACd,UAAU;SACX,CAAC;IACJ,CAAC;IAED;;;;OAIG;IACH,QAAQ,CAAC,IAQR;QACC,OAAO,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAClC,CAAC;IAED;;;;;;;;;;;;;;;;OAgBG;IACH,KAAK,CAAC,YAAY,CAChB,IAAY,EACZ,OAAiB,EACjB,WAAW,GAAG,EAAE,EAChB,QAAQ,GAAG,CAAC,EACZ,QAAQ,GAAG,GAAG;QAEd,IAAI,CAAC,kBAAkB,CAAC,IAAI,EAAE,OAAO,EAAE,WAAW,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;QACxE,MAAM,MAAM,GAAG,IAAI,CAAC,qBAAqB,EAAE,CAAC;QAC5C,MAAM,MAAM,CAAC,yBAAyB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IACxD,CAAC;IAED;;;;;;;OAOG;IACH,gBAAgB,CACd,IAAY,EACZ,OAAiB,EACjB,WAAW,GAAG,EAAE,EAChB,QAAQ,GAAG,CAAC,EACZ,QAAQ,GAAG,GAAG;QAEd,IAAI,CAAC,kBAAkB,CAAC,IAAI,EAAE,OAAO,EAAE,WAAW,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;QACxE,MAAM,MAAM,GAAG,IAAI,CAAC,qBAAqB,EAAE,CAAC;QAC5C,IAAI,IAAI,CAAC,kBAAkB,KAAK,IAAI,IAAI,CAAC,IAAI,CAAC,kBAAkB,CAAC,YAAY,EAAE,CAAC;YAC9E,MAAM,IAAI,KAAK,CACb,8EAA8E;gBAC5E,GAAG,IAAI,CAAC,kBAAkB,CAAC,WAAW,CAAC,IAAI,oBAAoB;gBAC/D,gEAAgE,CACnE,CAAC;QACJ,CAAC;QACD,MAAM,CAAC,oBAAoB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC7C,CAAC;IAED,+EAA+E;IACvE,kBAAkB,CACxB,IAAY,EACZ,OAAiB,EACjB,WAAmB,EACnB,QAAgB,EAChB,QAAgB;QAEhB,MAAM,SAAS,GAAwB;YACrC,IAAI;YACJ,WAAW;YACX,eAAe,EAAE,OAAO;YACxB,aAAa,EAAE,IAAI,kBAAkB,CAAC,QAAQ,EAAE,QAAQ,EAAE,WAAW,CAAC;YACtE,aAAa,EAAE,WAAW;YAC1B,aAAa,EAAE,EAAE;YACjB,QAAQ,EAAE,EAAE;SACb,CAAC;QACF,IAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;QAE5C,MAAM,UAAU,GAAG,IAAI,CAAC,kBAAkB,CAAC,aAAa,EAAE,CAAC;QAC3D,IAAI,CAAC,cAAc,GAAG,IAAI,aAAa,CAAC,UAAU,CAAC,CAAC;IACtD,CAAC;IAED,iCAAiC;IACjC,IAAI,MAAM;QACR,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED,qCAAqC;IACrC,IAAI,UAAU;QACZ,OAAO,IAAI,CAAC,kBAAkB,CAAC;IACjC,CAAC;IAED,gCAAgC;IAChC,IAAI,MAAM;QACR,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;CACF"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Benchmark score normalization.
|
|
3
|
+
*
|
|
4
|
+
* Different benchmarks use different scales (0-100%, ELO ratings, etc.).
|
|
5
|
+
* This module normalizes them all to a 0-1 range for fair comparison.
|
|
6
|
+
*/
|
|
7
|
+
export declare class NormalizationRange {
|
|
8
|
+
readonly minScore: number;
|
|
9
|
+
readonly maxScore: number;
|
|
10
|
+
readonly description: string;
|
|
11
|
+
constructor(minScore: number, maxScore: number, description?: string);
|
|
12
|
+
/** Normalize a raw benchmark score to 0-1. */
|
|
13
|
+
normalize(rawScore: number): number;
|
|
14
|
+
}
|
|
15
|
+
/** Standard benchmark normalization ranges. */
|
|
16
|
+
export declare const NORMALIZATION_RANGES: Record<string, NormalizationRange>;
|
|
17
|
+
/**
|
|
18
|
+
* Normalizes benchmark scores across different scales.
|
|
19
|
+
*
|
|
20
|
+
* Supports standard benchmarks out of the box and allows
|
|
21
|
+
* registering custom normalization ranges.
|
|
22
|
+
*/
|
|
23
|
+
export declare class BenchmarkNormalizer {
|
|
24
|
+
private _ranges;
|
|
25
|
+
constructor();
|
|
26
|
+
/** Normalize a raw benchmark score to 0-1. */
|
|
27
|
+
normalize(benchmark: string, rawScore: number): number;
|
|
28
|
+
/** Register a custom normalization range for a benchmark. */
|
|
29
|
+
registerRange(benchmark: string, minScore: number, maxScore: number, description?: string): void;
|
|
30
|
+
/** Get the normalization range for a benchmark. */
|
|
31
|
+
getRange(benchmark: string): NormalizationRange | undefined;
|
|
32
|
+
/** List all benchmarks with registered normalization ranges. */
|
|
33
|
+
get knownBenchmarks(): string[];
|
|
34
|
+
}
|
|
35
|
+
//# sourceMappingURL=benchmarks.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"benchmarks.d.ts","sourceRoot":"","sources":["../../src/scoring/benchmarks.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,qBAAa,kBAAkB;IAC7B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;gBAEjB,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,WAAW,SAAK;IAMhE,8CAA8C;IAC9C,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM;CAKpC;AAED,+CAA+C;AAC/C,eAAO,MAAM,oBAAoB,EAAE,MAAM,CAAC,MAAM,EAAE,kBAAkB,CAanE,CAAC;AAEF;;;;;GAKG;AACH,qBAAa,mBAAmB;IAC9B,OAAO,CAAC,OAAO,CAAkC;;IAMjD,8CAA8C;IAC9C,SAAS,CAAC,SAAS,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM;IAStD,6DAA6D;IAC7D,aAAa,CACX,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,MAAM,EAChB,WAAW,SAAK,GACf,IAAI;IAIP,mDAAmD;IACnD,QAAQ,CAAC,SAAS,EAAE,MAAM,GAAG,kBAAkB,GAAG,SAAS;IAI3D,gEAAgE;IAChE,IAAI,eAAe,IAAI,MAAM,EAAE,CAE9B;CACF"}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Benchmark score normalization.
|
|
3
|
+
*
|
|
4
|
+
* Different benchmarks use different scales (0-100%, ELO ratings, etc.).
|
|
5
|
+
* This module normalizes them all to a 0-1 range for fair comparison.
|
|
6
|
+
*/
|
|
7
|
+
export class NormalizationRange {
|
|
8
|
+
constructor(minScore, maxScore, description = '') {
|
|
9
|
+
this.minScore = minScore;
|
|
10
|
+
this.maxScore = maxScore;
|
|
11
|
+
this.description = description;
|
|
12
|
+
}
|
|
13
|
+
/** Normalize a raw benchmark score to 0-1. */
|
|
14
|
+
normalize(rawScore) {
|
|
15
|
+
if (this.maxScore === this.minScore)
|
|
16
|
+
return 0.5;
|
|
17
|
+
const normalized = (rawScore - this.minScore) / (this.maxScore - this.minScore);
|
|
18
|
+
return Math.max(0.0, Math.min(1.0, normalized));
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
/** Standard benchmark normalization ranges. */
|
|
22
|
+
export const NORMALIZATION_RANGES = {
|
|
23
|
+
'MMLU': new NormalizationRange(25, 95, 'Academic knowledge across 57 subjects'),
|
|
24
|
+
'HellaSwag': new NormalizationRange(50, 98, 'Commonsense reasoning'),
|
|
25
|
+
'HumanEval': new NormalizationRange(20, 95, 'Code generation'),
|
|
26
|
+
'SWE-bench': new NormalizationRange(5, 85, 'Real-world software engineering'),
|
|
27
|
+
'TruthfulQA': new NormalizationRange(20, 85, 'Truthful question answering'),
|
|
28
|
+
'ARC': new NormalizationRange(0, 95, 'Science exam questions'),
|
|
29
|
+
'GSM8K': new NormalizationRange(20, 98, 'Grade school math'),
|
|
30
|
+
'DROP': new NormalizationRange(30, 90, 'Reading comprehension with arithmetic'),
|
|
31
|
+
'SuperGLUE': new NormalizationRange(40, 95, 'Natural language understanding'),
|
|
32
|
+
'Chatbot Arena (LMSys)': new NormalizationRange(1000, 1550, 'Human-rated chat quality'),
|
|
33
|
+
'MT-Bench': new NormalizationRange(5, 10, 'Multi-turn conversation quality'),
|
|
34
|
+
'LiveBench': new NormalizationRange(0, 100, 'Fresh, contamination-resistant evaluation'),
|
|
35
|
+
};
|
|
36
|
+
/**
|
|
37
|
+
* Normalizes benchmark scores across different scales.
|
|
38
|
+
*
|
|
39
|
+
* Supports standard benchmarks out of the box and allows
|
|
40
|
+
* registering custom normalization ranges.
|
|
41
|
+
*/
|
|
42
|
+
export class BenchmarkNormalizer {
|
|
43
|
+
constructor() {
|
|
44
|
+
this._ranges = new Map(Object.entries(NORMALIZATION_RANGES));
|
|
45
|
+
}
|
|
46
|
+
/** Normalize a raw benchmark score to 0-1. */
|
|
47
|
+
normalize(benchmark, rawScore) {
|
|
48
|
+
const range = this._ranges.get(benchmark);
|
|
49
|
+
if (!range) {
|
|
50
|
+
// Unknown benchmark -- assume 0-100 percentage scale
|
|
51
|
+
return Math.max(0.0, Math.min(1.0, rawScore / 100.0));
|
|
52
|
+
}
|
|
53
|
+
return range.normalize(rawScore);
|
|
54
|
+
}
|
|
55
|
+
/** Register a custom normalization range for a benchmark. */
|
|
56
|
+
registerRange(benchmark, minScore, maxScore, description = '') {
|
|
57
|
+
this._ranges.set(benchmark, new NormalizationRange(minScore, maxScore, description));
|
|
58
|
+
}
|
|
59
|
+
/** Get the normalization range for a benchmark. */
|
|
60
|
+
getRange(benchmark) {
|
|
61
|
+
return this._ranges.get(benchmark);
|
|
62
|
+
}
|
|
63
|
+
/** List all benchmarks with registered normalization ranges. */
|
|
64
|
+
get knownBenchmarks() {
|
|
65
|
+
return [...this._ranges.keys()];
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
//# sourceMappingURL=benchmarks.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"benchmarks.js","sourceRoot":"","sources":["../../src/scoring/benchmarks.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,MAAM,OAAO,kBAAkB;IAK7B,YAAY,QAAgB,EAAE,QAAgB,EAAE,WAAW,GAAG,EAAE;QAC9D,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;IACjC,CAAC;IAED,8CAA8C;IAC9C,SAAS,CAAC,QAAgB;QACxB,IAAI,IAAI,CAAC,QAAQ,KAAK,IAAI,CAAC,QAAQ;YAAE,OAAO,GAAG,CAAC;QAChD,MAAM,UAAU,GAAG,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC;QAChF,OAAO,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC,CAAC;IAClD,CAAC;CACF;AAED,+CAA+C;AAC/C,MAAM,CAAC,MAAM,oBAAoB,GAAuC;IACtE,MAAM,EAAE,IAAI,kBAAkB,CAAC,EAAE,EAAE,EAAE,EAAE,uCAAuC,CAAC;IAC/E,WAAW,EAAE,IAAI,kBAAkB,CAAC,EAAE,EAAE,EAAE,EAAE,uBAAuB,CAAC;IACpE,WAAW,EAAE,IAAI,kBAAkB,CAAC,EAAE,EAAE,EAAE,EAAE,iBAAiB,CAAC;IAC9D,WAAW,EAAE,IAAI,kBAAkB,CAAC,CAAC,EAAE,EAAE,EAAE,iCAAiC,CAAC;IAC7E,YAAY,EAAE,IAAI,kBAAkB,CAAC,EAAE,EAAE,EAAE,EAAE,6BAA6B,CAAC;IAC3E,KAAK,EAAE,IAAI,kBAAkB,CAAC,CAAC,EAAE,EAAE,EAAE,wBAAwB,CAAC;IAC9D,OAAO,EAAE,IAAI,kBAAkB,CAAC,EAAE,EAAE,EAAE,EAAE,mBAAmB,CAAC;IAC5D,MAAM,EAAE,IAAI,kBAAkB,CAAC,EAAE,EAAE,EAAE,EAAE,uCAAuC,CAAC;IAC/E,WAAW,EAAE,IAAI,kBAAkB,CAAC,EAAE,EAAE,EAAE,EAAE,gCAAgC,CAAC;IAC7E,uBAAuB,EAAE,IAAI,kBAAkB,CAAC,IAAI,EAAE,IAAI,EAAE,0BAA0B,CAAC;IACvF,UAAU,EAAE,IAAI,kBAAkB,CAAC,CAAC,EAAE,EAAE,EAAE,iCAAiC,CAAC;IAC5E,WAAW,EAAE,IAAI,kBAAkB,CAAC,CAAC,EAAE,GAAG,EAAE,2CAA2C,CAAC;CACzF,CAAC;AAEF;;;;;GAKG;AACH,MAAM,OAAO,mBAAmB;IAG9B;QACE,IAAI,CAAC,OAAO,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,oBAAoB,CAAC,CAAC,CAAC;IAC/D,CAAC;IAED,8CAA8C;IAC9C,SAAS,CAAC,SAAiB,EAAE,QAAgB;QAC3C,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAC1C,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,qDAAqD;YACrD,OAAO,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,QAAQ,GAAG,KAAK,CAAC,CAAC,CAAC;QACxD,CAAC;QACD,OAAO,KAAK,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;IACnC,CAAC;IAED,6DAA6D;IAC7D,aAAa,CACX,SAAiB,EACjB,QAAgB,EAChB,QAAgB,EAChB,WAAW,GAAG,EAAE;QAEhB,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,SAAS,EAAE,IAAI,kBAAkB,CAAC,QAAQ,EAAE,QAAQ,EAAE,WAAW,CAAC,CAAC,CAAC;IACvF,CAAC;IAED,mDAAmD;IACnD,QAAQ,CAAC,SAAiB;QACxB,OAAO,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;IACrC,CAAC;IAED,gEAAgE;IAChE,IAAI,eAAe;QACjB,OAAO,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IAClC,CAAC;CACF"}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Dynamic model scoring engine.
|
|
3
|
+
*
|
|
4
|
+
* Combines benchmark performance, cost, and speed into a single score
|
|
5
|
+
* weighted by user priorities. This is the heart of the routing logic.
|
|
6
|
+
*/
|
|
7
|
+
import { ModelInfo } from '../registry/models.js';
|
|
8
|
+
import { BenchmarkNormalizer } from './benchmarks.js';
|
|
9
|
+
import { Priorities } from './priorities.js';
|
|
10
|
+
export interface ModelScore {
|
|
11
|
+
modelId: string;
|
|
12
|
+
finalScore: number;
|
|
13
|
+
qualityScore: number;
|
|
14
|
+
costScore: number;
|
|
15
|
+
speedScore: number;
|
|
16
|
+
qualityContribution: number;
|
|
17
|
+
costContribution: number;
|
|
18
|
+
speedContribution: number;
|
|
19
|
+
topBenchmarks: Array<[string, number]>;
|
|
20
|
+
reasoning: string;
|
|
21
|
+
}
|
|
22
|
+
/** Speed tier -> numeric score. */
|
|
23
|
+
export declare const SPEED_SCORES: Record<string, number>;
|
|
24
|
+
/**
|
|
25
|
+
* Scores models against a classified prompt.
|
|
26
|
+
*
|
|
27
|
+
* Takes benchmark similarity scores (from the classifier) and user priorities,
|
|
28
|
+
* then ranks all available models using a three-factor weighted algorithm:
|
|
29
|
+
*
|
|
30
|
+
* final = (quality * qW + cost * cW + speed * sW) / (qW + cW + sW)
|
|
31
|
+
*
|
|
32
|
+
* Where weights are derived from user priorities (1-5 scale).
|
|
33
|
+
*/
|
|
34
|
+
export declare class ScoringEngine {
|
|
35
|
+
private _normalizer;
|
|
36
|
+
constructor(normalizer?: BenchmarkNormalizer);
|
|
37
|
+
/**
|
|
38
|
+
* Score and rank models based on benchmark similarities and priorities.
|
|
39
|
+
*/
|
|
40
|
+
scoreModels(models: ModelInfo[], benchmarkSimilarities: Record<string, number>, priorities?: Priorities, topK?: number): ModelScore[];
|
|
41
|
+
private _scoreSingleModel;
|
|
42
|
+
}
|
|
43
|
+
//# sourceMappingURL=engine.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"engine.d.ts","sourceRoot":"","sources":["../../src/scoring/engine.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AAClD,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AACtD,OAAO,EAAsB,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAEjE,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,gBAAgB,EAAE,MAAM,CAAC;IACzB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,aAAa,EAAE,KAAK,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IACvC,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,mCAAmC;AACnC,eAAO,MAAM,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAM/C,CAAC;AAwDF;;;;;;;;;GASG;AACH,qBAAa,aAAa;IACxB,OAAO,CAAC,WAAW,CAAsB;gBAE7B,UAAU,CAAC,EAAE,mBAAmB;IAI5C;;OAEG;IACH,WAAW,CACT,MAAM,EAAE,SAAS,EAAE,EACnB,qBAAqB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EAC7C,UAAU,GAAE,UAA+B,EAC3C,IAAI,SAAI,GACP,UAAU,EAAE;IA2Ff,OAAO,CAAC,iBAAiB;CAwH1B"}
|