tryaii-dre 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/README.md +234 -0
- package/dist/banner.d.ts +24 -0
- package/dist/banner.d.ts.map +1 -0
- package/dist/banner.js +125 -0
- package/dist/banner.js.map +1 -0
- package/dist/benchmarks/index.d.ts +4 -0
- package/dist/benchmarks/index.d.ts.map +1 -0
- package/dist/benchmarks/index.js +3 -0
- package/dist/benchmarks/index.js.map +1 -0
- package/dist/benchmarks/registry.d.ts +69 -0
- package/dist/benchmarks/registry.d.ts.map +1 -0
- package/dist/benchmarks/registry.js +128 -0
- package/dist/benchmarks/registry.js.map +1 -0
- package/dist/benchmarks/standard.d.ts +6 -0
- package/dist/benchmarks/standard.d.ts.map +1 -0
- package/dist/benchmarks/standard.js +115 -0
- package/dist/benchmarks/standard.js.map +1 -0
- package/dist/budget.d.ts +65 -0
- package/dist/budget.d.ts.map +1 -0
- package/dist/budget.js +344 -0
- package/dist/budget.js.map +1 -0
- package/dist/cache/index.d.ts +27 -0
- package/dist/cache/index.d.ts.map +1 -0
- package/dist/cache/index.js +63 -0
- package/dist/cache/index.js.map +1 -0
- package/dist/centroids/data/centroids_all-MiniLM-L6-v2.json +1 -0
- package/dist/centroids/data/trainingQueries.json +246 -0
- package/dist/centroids/generator.d.ts +63 -0
- package/dist/centroids/generator.d.ts.map +1 -0
- package/dist/centroids/generator.js +120 -0
- package/dist/centroids/generator.js.map +1 -0
- package/dist/centroids/index.d.ts +3 -0
- package/dist/centroids/index.d.ts.map +1 -0
- package/dist/centroids/index.js +3 -0
- package/dist/centroids/index.js.map +1 -0
- package/dist/centroids/loader.d.ts +87 -0
- package/dist/centroids/loader.d.ts.map +1 -0
- package/dist/centroids/loader.js +236 -0
- package/dist/centroids/loader.js.map +1 -0
- package/dist/classifiers/base.d.ts +56 -0
- package/dist/classifiers/base.d.ts.map +1 -0
- package/dist/classifiers/base.js +42 -0
- package/dist/classifiers/base.js.map +1 -0
- package/dist/classifiers/embedding.d.ts +68 -0
- package/dist/classifiers/embedding.d.ts.map +1 -0
- package/dist/classifiers/embedding.js +0 -0
- package/dist/classifiers/embedding.js.map +1 -0
- package/dist/classifiers/hybrid.d.ts +31 -0
- package/dist/classifiers/hybrid.d.ts.map +1 -0
- package/dist/classifiers/hybrid.js +61 -0
- package/dist/classifiers/hybrid.js.map +1 -0
- package/dist/classifiers/index.d.ts +4 -0
- package/dist/classifiers/index.d.ts.map +1 -0
- package/dist/classifiers/index.js +3 -0
- package/dist/classifiers/index.js.map +1 -0
- package/dist/classifiers/keyword.d.ts +29 -0
- package/dist/classifiers/keyword.d.ts.map +1 -0
- package/dist/classifiers/keyword.js +264 -0
- package/dist/classifiers/keyword.js.map +1 -0
- package/dist/cli.d.ts +15 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +597 -0
- package/dist/cli.js.map +1 -0
- package/dist/client-types.d.ts +101 -0
- package/dist/client-types.d.ts.map +1 -0
- package/dist/client-types.js +5 -0
- package/dist/client-types.js.map +1 -0
- package/dist/client.d.ts +50 -0
- package/dist/client.d.ts.map +1 -0
- package/dist/client.js +279 -0
- package/dist/client.js.map +1 -0
- package/dist/config.d.ts +45 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +37 -0
- package/dist/config.js.map +1 -0
- package/dist/dashboard/index.d.ts +48 -0
- package/dist/dashboard/index.d.ts.map +1 -0
- package/dist/dashboard/index.js +166 -0
- package/dist/dashboard/index.js.map +1 -0
- package/dist/embeddings/base.d.ts +66 -0
- package/dist/embeddings/base.d.ts.map +1 -0
- package/dist/embeddings/base.js +77 -0
- package/dist/embeddings/base.js.map +1 -0
- package/dist/embeddings/index.d.ts +3 -0
- package/dist/embeddings/index.d.ts.map +1 -0
- package/dist/embeddings/index.js +3 -0
- package/dist/embeddings/index.js.map +1 -0
- package/dist/embeddings/local.d.ts +42 -0
- package/dist/embeddings/local.d.ts.map +1 -0
- package/dist/embeddings/local.js +89 -0
- package/dist/embeddings/local.js.map +1 -0
- package/dist/index.d.ts +44 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +45 -0
- package/dist/index.js.map +1 -0
- package/dist/integrations/index.d.ts +3 -0
- package/dist/integrations/index.d.ts.map +1 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/index.js.map +1 -0
- package/dist/integrations/openrouter.d.ts +84 -0
- package/dist/integrations/openrouter.d.ts.map +1 -0
- package/dist/integrations/openrouter.js +253 -0
- package/dist/integrations/openrouter.js.map +1 -0
- package/dist/registry/index.d.ts +2 -0
- package/dist/registry/index.d.ts.map +1 -0
- package/dist/registry/index.js +2 -0
- package/dist/registry/index.js.map +1 -0
- package/dist/registry/models.d.ts +76 -0
- package/dist/registry/models.d.ts.map +1 -0
- package/dist/registry/models.js +170 -0
- package/dist/registry/models.js.map +1 -0
- package/dist/registry/presets/defaultModels.json +435 -0
- package/dist/router.d.ts +178 -0
- package/dist/router.d.ts.map +1 -0
- package/dist/router.js +259 -0
- package/dist/router.js.map +1 -0
- package/dist/scoring/benchmarks.d.ts +35 -0
- package/dist/scoring/benchmarks.d.ts.map +1 -0
- package/dist/scoring/benchmarks.js +68 -0
- package/dist/scoring/benchmarks.js.map +1 -0
- package/dist/scoring/engine.d.ts +43 -0
- package/dist/scoring/engine.d.ts.map +1 -0
- package/dist/scoring/engine.js +267 -0
- package/dist/scoring/engine.js.map +1 -0
- package/dist/scoring/index.d.ts +6 -0
- package/dist/scoring/index.d.ts.map +1 -0
- package/dist/scoring/index.js +4 -0
- package/dist/scoring/index.js.map +1 -0
- package/dist/scoring/priorities.d.ts +41 -0
- package/dist/scoring/priorities.d.ts.map +1 -0
- package/dist/scoring/priorities.js +49 -0
- package/dist/scoring/priorities.js.map +1 -0
- package/dist/types.d.ts +47 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +5 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/cosine.d.ts +10 -0
- package/dist/utils/cosine.d.ts.map +1 -0
- package/dist/utils/cosine.js +18 -0
- package/dist/utils/cosine.js.map +1 -0
- package/dist/utils/math.d.ts +18 -0
- package/dist/utils/math.d.ts.map +1 -0
- package/dist/utils/math.js +54 -0
- package/dist/utils/math.js.map +1 -0
- package/package.json +65 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"registry.d.ts","sourceRoot":"","sources":["../../src/benchmarks/registry.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAIH,OAAO,EAAE,mBAAmB,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC;AAGnF,0CAA0C;AAC1C,MAAM,WAAW,mBAAmB;IAClC,qCAAqC;IACrC,IAAI,EAAE,MAAM,CAAC;IAEb,kCAAkC;IAClC,WAAW,EAAE,MAAM,CAAC;IAEpB,sDAAsD;IACtD,eAAe,EAAE,MAAM,EAAE,CAAC;IAE1B,0CAA0C;IAC1C,aAAa,EAAE,kBAAkB,CAAC;IAElC,kDAAkD;IAClD,aAAa,EAAE,MAAM,CAAC;IAEtB,2CAA2C;IAC3C,aAAa,EAAE,MAAM,EAAE,CAAC;IAExB,yBAAyB;IACzB,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACnC;AAED,gFAAgF;AAChF,wBAAgB,iBAAiB,CAAC,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,mBAAmB,CAcjF;AAED,yDAAyD;AACzD,wBAAgB,eAAe,CAAC,CAAC,EAAE,mBAAmB,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAa/E;AAED;;;;;;;;GAQG;AACH,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,WAAW,CAAmC;;IAMtD,uDAAuD;IACvD,MAAM,CAAC,OAAO,IAAI,iBAAiB;IAQnC,0DAA0D;IAC1D,QAAQ,CAAC,SAAS,EAAE,mBAAmB,GAAG,IAAI;IAI9C,sDAAsD;IACtD,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO;IAIjC,+BAA+B;IAC/B,GAAG,CAAC,IAAI,EAAE,MAAM,GAAG,mBAAmB,GAAG,SAAS;IAIlD,sCAAsC;IACtC,IAAI,KAAK,IAAI,MAAM,EAAE,CAEpB;IAED,iCAAiC;IACjC,IAAI,aAAa,IAAI,mBAAmB,EAAE,CAEzC;IAED,0DAA0D;IAC1D,kBAAkB,IAAI,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC;IAU9C,mEAAmE;IACnE,aAAa,IAAI,mBAAmB;IAapC;;;;OAIG;IACH,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM;IAalC,4CAA4C;IAC5C,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAOhC,IAAI,MAAM,IAAI,MAAM,CAEnB;IAED,GAAG,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO;CAG3B"}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Extensible benchmark registry.
|
|
3
|
+
*
|
|
4
|
+
* Allows users to register custom benchmarks with their own training queries
|
|
5
|
+
* and normalization ranges. Designed for high connectivity with external
|
|
6
|
+
* benchmark-creation tools.
|
|
7
|
+
*/
|
|
8
|
+
import { readFileSync, writeFileSync } from 'node:fs';
|
|
9
|
+
import { BenchmarkNormalizer, NormalizationRange } from '../scoring/benchmarks.js';
|
|
10
|
+
import { STANDARD_BENCHMARKS } from './standard.js';
|
|
11
|
+
/** Create a BenchmarkDefinition from a plain object (e.g. loaded from JSON). */
|
|
12
|
+
export function benchmarkFromDict(d) {
|
|
13
|
+
const norm = (d.normalization ?? {});
|
|
14
|
+
return {
|
|
15
|
+
name: d.name ?? '',
|
|
16
|
+
description: d.description ?? '',
|
|
17
|
+
trainingQueries: d.training_queries ?? d.trainingQueries ?? [],
|
|
18
|
+
normalization: new NormalizationRange(norm.min_score ?? norm.minScore ?? 0, norm.max_score ?? norm.maxScore ?? 100),
|
|
19
|
+
broadCategory: d.broad_category ?? d.broadCategory ?? 'TECHNICAL',
|
|
20
|
+
subcategories: d.subcategories ?? [],
|
|
21
|
+
metadata: d.metadata ?? {},
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
/** Serialize a BenchmarkDefinition to a plain object. */
|
|
25
|
+
export function benchmarkToDict(b) {
|
|
26
|
+
return {
|
|
27
|
+
name: b.name,
|
|
28
|
+
description: b.description,
|
|
29
|
+
training_queries: b.trainingQueries,
|
|
30
|
+
normalization: {
|
|
31
|
+
min_score: b.normalization.minScore,
|
|
32
|
+
max_score: b.normalization.maxScore,
|
|
33
|
+
},
|
|
34
|
+
broad_category: b.broadCategory,
|
|
35
|
+
subcategories: b.subcategories,
|
|
36
|
+
metadata: b.metadata,
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Registry for benchmark definitions.
|
|
41
|
+
*
|
|
42
|
+
* Provides a clean interface for:
|
|
43
|
+
* - Registering custom benchmarks
|
|
44
|
+
* - Loading benchmarks from JSON files (for tool connectivity)
|
|
45
|
+
* - Exporting benchmark definitions
|
|
46
|
+
* - Integrating with the centroid generator and scoring engine
|
|
47
|
+
*/
|
|
48
|
+
export class BenchmarkRegistry {
|
|
49
|
+
constructor() {
|
|
50
|
+
this._benchmarks = new Map();
|
|
51
|
+
}
|
|
52
|
+
/** Create registry with the standard 12 benchmarks. */
|
|
53
|
+
static default() {
|
|
54
|
+
const registry = new BenchmarkRegistry();
|
|
55
|
+
for (const benchmark of STANDARD_BENCHMARKS) {
|
|
56
|
+
registry._benchmarks.set(benchmark.name, benchmark);
|
|
57
|
+
}
|
|
58
|
+
return registry;
|
|
59
|
+
}
|
|
60
|
+
/** Register a new benchmark or update an existing one. */
|
|
61
|
+
register(benchmark) {
|
|
62
|
+
this._benchmarks.set(benchmark.name, benchmark);
|
|
63
|
+
}
|
|
64
|
+
/** Remove a benchmark. Returns true if it existed. */
|
|
65
|
+
unregister(name) {
|
|
66
|
+
return this._benchmarks.delete(name);
|
|
67
|
+
}
|
|
68
|
+
/** Get a benchmark by name. */
|
|
69
|
+
get(name) {
|
|
70
|
+
return this._benchmarks.get(name);
|
|
71
|
+
}
|
|
72
|
+
/** All registered benchmark names. */
|
|
73
|
+
get names() {
|
|
74
|
+
return [...this._benchmarks.keys()];
|
|
75
|
+
}
|
|
76
|
+
/** All registered benchmarks. */
|
|
77
|
+
get allBenchmarks() {
|
|
78
|
+
return [...this._benchmarks.values()];
|
|
79
|
+
}
|
|
80
|
+
/** Get all training queries grouped by benchmark name. */
|
|
81
|
+
getTrainingQueries() {
|
|
82
|
+
const result = {};
|
|
83
|
+
for (const [name, b] of this._benchmarks) {
|
|
84
|
+
if (b.trainingQueries.length > 0) {
|
|
85
|
+
result[name] = b.trainingQueries;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
return result;
|
|
89
|
+
}
|
|
90
|
+
/** Create a BenchmarkNormalizer from all registered benchmarks. */
|
|
91
|
+
getNormalizer() {
|
|
92
|
+
const normalizer = new BenchmarkNormalizer();
|
|
93
|
+
for (const [name, benchmark] of this._benchmarks) {
|
|
94
|
+
normalizer.registerRange(name, benchmark.normalization.minScore, benchmark.normalization.maxScore, benchmark.description);
|
|
95
|
+
}
|
|
96
|
+
return normalizer;
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Load benchmarks from a JSON file.
|
|
100
|
+
*
|
|
101
|
+
* @returns Number of benchmarks loaded.
|
|
102
|
+
*/
|
|
103
|
+
loadFromFile(path) {
|
|
104
|
+
const raw = readFileSync(path, 'utf-8');
|
|
105
|
+
const data = JSON.parse(raw);
|
|
106
|
+
let count = 0;
|
|
107
|
+
for (const item of data.benchmarks ?? []) {
|
|
108
|
+
const benchmark = benchmarkFromDict(item);
|
|
109
|
+
this.register(benchmark);
|
|
110
|
+
count++;
|
|
111
|
+
}
|
|
112
|
+
return count;
|
|
113
|
+
}
|
|
114
|
+
/** Export all benchmarks to a JSON file. */
|
|
115
|
+
exportToFile(path) {
|
|
116
|
+
const data = {
|
|
117
|
+
benchmarks: [...this._benchmarks.values()].map(benchmarkToDict),
|
|
118
|
+
};
|
|
119
|
+
writeFileSync(path, JSON.stringify(data, null, 2));
|
|
120
|
+
}
|
|
121
|
+
get length() {
|
|
122
|
+
return this._benchmarks.size;
|
|
123
|
+
}
|
|
124
|
+
has(name) {
|
|
125
|
+
return this._benchmarks.has(name);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
//# sourceMappingURL=registry.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"registry.js","sourceRoot":"","sources":["../../src/benchmarks/registry.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAEtD,OAAO,EAAE,mBAAmB,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC;AACnF,OAAO,EAAE,mBAAmB,EAAE,MAAM,eAAe,CAAC;AA0BpD,gFAAgF;AAChF,MAAM,UAAU,iBAAiB,CAAC,CAA0B;IAC1D,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,aAAa,IAAI,EAAE,CAA2B,CAAC;IAC/D,OAAO;QACL,IAAI,EAAG,CAAC,CAAC,IAAe,IAAI,EAAE;QAC9B,WAAW,EAAG,CAAC,CAAC,WAAsB,IAAI,EAAE;QAC5C,eAAe,EAAG,CAAC,CAAC,gBAA6B,IAAK,CAAC,CAAC,eAA4B,IAAI,EAAE;QAC1F,aAAa,EAAE,IAAI,kBAAkB,CACnC,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,QAAQ,IAAI,CAAC,EACpC,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,QAAQ,IAAI,GAAG,CACvC;QACD,aAAa,EAAG,CAAC,CAAC,cAAyB,IAAK,CAAC,CAAC,aAAwB,IAAI,WAAW;QACzF,aAAa,EAAG,CAAC,CAAC,aAA0B,IAAI,EAAE;QAClD,QAAQ,EAAG,CAAC,CAAC,QAAoC,IAAI,EAAE;KACxD,CAAC;AACJ,CAAC;AAED,yDAAyD;AACzD,MAAM,UAAU,eAAe,CAAC,CAAsB;IACpD,OAAO;QACL,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,WAAW,EAAE,CAAC,CAAC,WAAW;QAC1B,gBAAgB,EAAE,CAAC,CAAC,eAAe;QACnC,aAAa,EAAE;YACb,SAAS,EAAE,CAAC,CAAC,aAAa,CAAC,QAAQ;YACnC,SAAS,EAAE,CAAC,CAAC,aAAa,CAAC,QAAQ;SACpC;QACD,cAAc,EAAE,CAAC,CAAC,aAAa;QAC/B,aAAa,EAAE,CAAC,CAAC,aAAa;QAC9B,QAAQ,EAAE,CAAC,CAAC,QAAQ;KACrB,CAAC;AACJ,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,OAAO,iBAAiB;IAG5B;QACE,IAAI,CAAC,WAAW,GAAG,IAAI,GAAG,EAAE,CAAC;IAC/B,CAAC;IAED,uDAAuD;IACvD,MAAM,CAAC,OAAO;QACZ,MAAM,QAAQ,GAAG,IAAI,iBAAiB,EAAE,CAAC;QACzC,KAAK,MAAM,SAAS,IAAI,mBAAmB,EAAE,CAAC;YAC5C,QAAQ,CAAC,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;QACtD,CAAC;QACD,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,0DAA0D;IAC1D,QAAQ,CAAC,SAA8B;QACrC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;IAClD,CAAC;IAED,sDAAsD;IACtD,UAAU,CAAC,IAAY;QACrB,OAAO,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACvC,CAAC;IAED,+BAA+B;IAC/B,GAAG,CAAC,IAAY;QACd,OAAO,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACpC,CAAC;IAED,sCAAsC;IACtC,IAAI,KAAK;QACP,OAAO,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC;IACtC,CAAC;IAED,iCAAiC;IACjC,IAAI,aAAa;QACf,OAAO,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC,CAAC;IACxC,CAAC;IAED,0DAA0D;IAC1D,kBAAkB;QAChB,MAAM,MAAM,GAA6B,EAAE,CAAC;QAC5C,KAAK,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACzC,IAAI,CAAC,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACjC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,eAAe,CAAC;YACnC,CAAC;QACH,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,mEAAmE;IACnE,aAAa;QACX,MAAM,UAAU,GAAG,IAAI,mBAAmB,EAAE,CAAC;QAC7C,KAAK,MAAM,CAAC,IAAI,EAAE,SAAS,CAAC,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACjD,UAAU,CAAC,aAAa,CACtB,IAAI,EACJ,SAAS,CAAC,aAAa,CAAC,QAAQ,EAChC,SAAS,CAAC,aAAa,CAAC,QAAQ,EAChC,SAAS,CAAC,WAAW,CACtB,CAAC;QACJ,CAAC;QACD,OAAO,UAAU,CAAC;IACpB,CAAC;IAED;;;;OAIG;IACH,YAAY,CAAC,IAAY;QACvB,MAAM,GAAG,GAAG,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACxC,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAE7B,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,UAAU,IAAI,EAAE,EAAE,CAAC;YACzC,MAAM,SAAS,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC;YAC1C,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;YACzB,KAAK,EAAE,CAAC;QACV,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,4CAA4C;IAC5C,YAAY,CAAC,IAAY;QACvB,MAAM,IAAI,GAAG;YACX,UAAU,EAAE,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,eAAe,CAAC;SAChE,CAAC;QACF,aAAa,CAAC,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACrD,CAAC;IAED,IAAI,MAAM;QACR,OAAO,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC;IAC/B,CAAC;IAED,GAAG,CAAC,IAAY;QACd,OAAO,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACpC,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"standard.d.ts","sourceRoot":"","sources":["../../src/benchmarks/standard.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,eAAe,CAAC;AAEzD,eAAO,MAAM,mBAAmB,EAAE,mBAAmB,EA6GpD,CAAC"}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Standard benchmark definitions -- the 12 benchmarks shipped with TryAii-DRE.
|
|
3
|
+
*/
|
|
4
|
+
import { NormalizationRange } from '../scoring/benchmarks.js';
|
|
5
|
+
export const STANDARD_BENCHMARKS = [
|
|
6
|
+
{
|
|
7
|
+
name: 'MMLU',
|
|
8
|
+
description: 'Academic knowledge across 57 subjects',
|
|
9
|
+
trainingQueries: [], // Loaded from trainingQueries.json at runtime
|
|
10
|
+
normalization: new NormalizationRange(25, 95),
|
|
11
|
+
broadCategory: 'EDUCATIONAL',
|
|
12
|
+
subcategories: ['ACADEMIC_INSTRUCTION', 'RESEARCH_METHODOLOGY'],
|
|
13
|
+
metadata: {},
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
name: 'HellaSwag',
|
|
17
|
+
description: 'Commonsense reasoning about everyday situations',
|
|
18
|
+
trainingQueries: [],
|
|
19
|
+
normalization: new NormalizationRange(50, 98),
|
|
20
|
+
broadCategory: 'CONVERSATIONAL',
|
|
21
|
+
subcategories: ['PERSONAL_ADVICE'],
|
|
22
|
+
metadata: {},
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
name: 'HumanEval',
|
|
26
|
+
description: 'Code generation and programming tasks',
|
|
27
|
+
trainingQueries: [],
|
|
28
|
+
normalization: new NormalizationRange(20, 95),
|
|
29
|
+
broadCategory: 'TECHNICAL',
|
|
30
|
+
subcategories: ['CODE_TECHNICAL'],
|
|
31
|
+
metadata: {},
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
name: 'SWE-bench',
|
|
35
|
+
description: 'Real-world software engineering and debugging',
|
|
36
|
+
trainingQueries: [],
|
|
37
|
+
normalization: new NormalizationRange(5, 85),
|
|
38
|
+
broadCategory: 'TECHNICAL',
|
|
39
|
+
subcategories: ['CODE_TECHNICAL', 'DATA_SCIENCE'],
|
|
40
|
+
metadata: {},
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
name: 'TruthfulQA',
|
|
44
|
+
description: 'Truthful and accurate question answering',
|
|
45
|
+
trainingQueries: [],
|
|
46
|
+
normalization: new NormalizationRange(20, 85),
|
|
47
|
+
broadCategory: 'CONVERSATIONAL',
|
|
48
|
+
subcategories: ['PERSONAL_ADVICE'],
|
|
49
|
+
metadata: {},
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
name: 'ARC',
|
|
53
|
+
description: 'Science exam questions requiring reasoning',
|
|
54
|
+
trainingQueries: [],
|
|
55
|
+
normalization: new NormalizationRange(0, 95),
|
|
56
|
+
broadCategory: 'EDUCATIONAL',
|
|
57
|
+
subcategories: ['ACADEMIC_INSTRUCTION', 'STUDY_ASSISTANCE'],
|
|
58
|
+
metadata: {},
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
name: 'GSM8K',
|
|
62
|
+
description: 'Grade school math word problems',
|
|
63
|
+
trainingQueries: [],
|
|
64
|
+
normalization: new NormalizationRange(20, 98),
|
|
65
|
+
broadCategory: 'TECHNICAL',
|
|
66
|
+
subcategories: ['MATHEMATICAL_SCIENTIFIC'],
|
|
67
|
+
metadata: {},
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
name: 'DROP',
|
|
71
|
+
description: 'Reading comprehension requiring arithmetic and reasoning',
|
|
72
|
+
trainingQueries: [],
|
|
73
|
+
normalization: new NormalizationRange(30, 90),
|
|
74
|
+
broadCategory: 'TECHNICAL',
|
|
75
|
+
subcategories: ['MATHEMATICAL_SCIENTIFIC', 'DATA_SCIENCE'],
|
|
76
|
+
metadata: {},
|
|
77
|
+
},
|
|
78
|
+
{
|
|
79
|
+
name: 'SuperGLUE',
|
|
80
|
+
description: 'Natural language understanding tasks',
|
|
81
|
+
trainingQueries: [],
|
|
82
|
+
normalization: new NormalizationRange(40, 95),
|
|
83
|
+
broadCategory: 'BUSINESS',
|
|
84
|
+
subcategories: ['PROFESSIONAL_COMMUNICATION'],
|
|
85
|
+
metadata: {},
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
name: 'Chatbot Arena (LMSys)',
|
|
89
|
+
description: 'Human-rated conversational quality',
|
|
90
|
+
trainingQueries: [],
|
|
91
|
+
normalization: new NormalizationRange(1000, 1550),
|
|
92
|
+
broadCategory: 'CONVERSATIONAL',
|
|
93
|
+
subcategories: ['PERSONAL_ADVICE', 'RECOMMENDATIONS'],
|
|
94
|
+
metadata: {},
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
name: 'MT-Bench',
|
|
98
|
+
description: 'Multi-turn conversation and instruction following',
|
|
99
|
+
trainingQueries: [],
|
|
100
|
+
normalization: new NormalizationRange(5, 10),
|
|
101
|
+
broadCategory: 'CREATIVE',
|
|
102
|
+
subcategories: ['WRITING_LITERARY'],
|
|
103
|
+
metadata: {},
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
name: 'LiveBench',
|
|
107
|
+
description: 'Fresh, contamination-resistant evaluation tasks',
|
|
108
|
+
trainingQueries: [],
|
|
109
|
+
normalization: new NormalizationRange(0, 100),
|
|
110
|
+
broadCategory: 'TECHNICAL',
|
|
111
|
+
subcategories: ['CODE_TECHNICAL', 'MATHEMATICAL_SCIENTIFIC'],
|
|
112
|
+
metadata: {},
|
|
113
|
+
},
|
|
114
|
+
];
|
|
115
|
+
//# sourceMappingURL=standard.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"standard.js","sourceRoot":"","sources":["../../src/benchmarks/standard.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC;AAG9D,MAAM,CAAC,MAAM,mBAAmB,GAA0B;IACxD;QACE,IAAI,EAAE,MAAM;QACZ,WAAW,EAAE,uCAAuC;QACpD,eAAe,EAAE,EAAE,EAAE,8CAA8C;QACnE,aAAa,EAAE,IAAI,kBAAkB,CAAC,EAAE,EAAE,EAAE,CAAC;QAC7C,aAAa,EAAE,aAAa;QAC5B,aAAa,EAAE,CAAC,sBAAsB,EAAE,sBAAsB,CAAC;QAC/D,QAAQ,EAAE,EAAE;KACb;IACD;QACE,IAAI,EAAE,WAAW;QACjB,WAAW,EAAE,iDAAiD;QAC9D,eAAe,EAAE,EAAE;QACnB,aAAa,EAAE,IAAI,kBAAkB,CAAC,EAAE,EAAE,EAAE,CAAC;QAC7C,aAAa,EAAE,gBAAgB;QAC/B,aAAa,EAAE,CAAC,iBAAiB,CAAC;QAClC,QAAQ,EAAE,EAAE;KACb;IACD;QACE,IAAI,EAAE,WAAW;QACjB,WAAW,EAAE,uCAAuC;QACpD,eAAe,EAAE,EAAE;QACnB,aAAa,EAAE,IAAI,kBAAkB,CAAC,EAAE,EAAE,EAAE,CAAC;QAC7C,aAAa,EAAE,WAAW;QAC1B,aAAa,EAAE,CAAC,gBAAgB,CAAC;QACjC,QAAQ,EAAE,EAAE;KACb;IACD;QACE,IAAI,EAAE,WAAW;QACjB,WAAW,EAAE,+CAA+C;QAC5D,eAAe,EAAE,EAAE;QACnB,aAAa,EAAE,IAAI,kBAAkB,CAAC,CAAC,EAAE,EAAE,CAAC;QAC5C,aAAa,EAAE,WAAW;QAC1B,aAAa,EAAE,CAAC,gBAAgB,EAAE,cAAc,CAAC;QACjD,QAAQ,EAAE,EAAE;KACb;IACD;QACE,IAAI,EAAE,YAAY;QAClB,WAAW,EAAE,0CAA0C;QACvD,eAAe,EAAE,EAAE;QACnB,aAAa,EAAE,IAAI,kBAAkB,CAAC,EAAE,EAAE,EAAE,CAAC;QAC7C,aAAa,EAAE,gBAAgB;QAC/B,aAAa,EAAE,CAAC,iBAAiB,CAAC;QAClC,QAAQ,EAAE,EAAE;KACb;IACD;QACE,IAAI,EAAE,KAAK;QACX,WAAW,EAAE,4CAA4C;QACzD,eAAe,EAAE,EAAE;QACnB,aAAa,EAAE,IAAI,kBAAkB,CAAC,CAAC,EAAE,EAAE,CAAC;QAC5C,aAAa,EAAE,aAAa;QAC5B,aAAa,EAAE,CAAC,sBAAsB,EAAE,kBAAkB,CAAC;QAC3D,QAAQ,EAAE,EAAE;KACb;IACD;QACE,IAAI,EAAE,OAAO;QACb,WAAW,EAAE,iCAAiC;QAC9C,eAAe,EAAE,EAAE;QACnB,aAAa,EAAE,IAAI,kBAAkB,CAAC,EAAE,EAAE,EAAE,CAAC;QAC7C,aAAa,EAAE,WAAW;QAC1B,aAAa,EAAE,CAAC,yBAAyB,CAAC;QAC1C,QAAQ,EAAE,EAAE;KACb;IACD;QACE,IAAI,EAAE,MAAM;QACZ,WAAW,EAAE,0DAA0D;QACvE,eAAe,EAAE,EAAE;QACnB,aAAa,EAAE,IAAI,kBAAkB,CAAC,EAAE,EAAE,EAAE,CAAC;QAC7C,aAAa,EAAE,WAAW;QAC1B,aAAa,EAAE,CAAC,yBAAyB,EAAE,cAAc,CAAC;QAC1D,QAAQ,EAAE,EAAE;KACb;IACD;QACE,IAAI,EAAE,WAAW;QACjB,WAAW,EAAE,sCAAsC;QACnD,eAAe,EAAE,EAAE;QACnB,aAAa,EAAE,IAAI,kBAAkB,CAAC,EAAE,EAAE,EAAE,CAAC;QAC7C,aAAa,EAAE,UAAU;QACzB,aAAa,EAAE,CAAC,4BAA4B,CAAC;QAC7C,QAAQ,EAAE,EAAE;KACb;IACD;QACE,IAAI,EAAE,uBAAuB;QAC7B,WAAW,EAAE,oCAAoC;QACjD,eAAe,EAAE,EAAE;QACnB,aAAa,EAAE,IAAI,kBAAkB,CAAC,IAAI,EAAE,IAAI,CAAC;QACjD,aAAa,EAAE,gBAAgB;QAC/B,aAAa,EAAE,CAAC,iBAAiB,EAAE,iBAAiB,CAAC;QACrD,QAAQ,EAAE,EAAE;KACb;IACD;QACE,IAAI,EAAE,UAAU;QAChB,WAAW,EAAE,mDAAmD;QAChE,eAAe,EAAE,EAAE;QACnB,aAAa,EAAE,IAAI,kBAAkB,CAAC,CAAC,EAAE,EAAE,CAAC;QAC5C,aAAa,EAAE,UAAU;QACzB,aAAa,EAAE,CAAC,kBAAkB,CAAC;QACnC,QAAQ,EAAE,EAAE;KACb;IACD;QACE,IAAI,EAAE,WAAW;QACjB,WAAW,EAAE,iDAAiD;QAC9D,eAAe,EAAE,EAAE;QACnB,aAAa,EAAE,IAAI,kBAAkB,CAAC,CAAC,EAAE,GAAG,CAAC;QAC7C,aAAa,EAAE,WAAW;QAC1B,aAAa,EAAE,CAAC,gBAAgB,EAAE,yBAAyB,CAAC;QAC5D,QAAQ,EAAE,EAAE;KACb;CACF,CAAC"}
|
package/dist/budget.d.ts
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Budget-aware dataset routing utilities.
|
|
3
|
+
*
|
|
4
|
+
* This solves the routing budget problem as a multiple-choice knapsack:
|
|
5
|
+
* for each prompt, choose one model candidate; maximize total utility while
|
|
6
|
+
* keeping estimated generation cost under one shared budget.
|
|
7
|
+
*/
|
|
8
|
+
import type { ModelInfo } from './registry/models.js';
|
|
9
|
+
import type { Router, RouteResult } from './router.js';
|
|
10
|
+
import { Priorities } from './scoring/priorities.js';
|
|
11
|
+
export type BudgetMode = 'strict' | 'fit-output';
|
|
12
|
+
export interface BudgetCandidate {
|
|
13
|
+
promptIndex: number;
|
|
14
|
+
modelId: string;
|
|
15
|
+
utility: number;
|
|
16
|
+
estimatedCost: number;
|
|
17
|
+
costUnits: number;
|
|
18
|
+
inputTokens: number;
|
|
19
|
+
outputTokens: number;
|
|
20
|
+
finalScore: number;
|
|
21
|
+
reasoning: string;
|
|
22
|
+
normalBestModel: string;
|
|
23
|
+
}
|
|
24
|
+
export interface BudgetOptimizationResult {
|
|
25
|
+
status: 'optimal' | 'infeasible';
|
|
26
|
+
selected: BudgetCandidate[];
|
|
27
|
+
totalEstimatedCost: number;
|
|
28
|
+
minimumRequiredBudget: number;
|
|
29
|
+
budget: number;
|
|
30
|
+
costUnit: number;
|
|
31
|
+
message?: string;
|
|
32
|
+
budgetMode?: BudgetMode;
|
|
33
|
+
requestedOutputTokens?: number;
|
|
34
|
+
effectiveOutputTokens?: number;
|
|
35
|
+
requestedMinimumRequiredBudget?: number;
|
|
36
|
+
budgetShortfall?: number;
|
|
37
|
+
}
|
|
38
|
+
export interface BudgetedRouteResult {
|
|
39
|
+
routeResult: RouteResult;
|
|
40
|
+
selected: BudgetCandidate;
|
|
41
|
+
cumulativeCost: number;
|
|
42
|
+
remainingBudget: number;
|
|
43
|
+
routeMs: number;
|
|
44
|
+
}
|
|
45
|
+
export interface RouteDatasetWithBudgetOptions {
|
|
46
|
+
router: Router;
|
|
47
|
+
prompts: string[];
|
|
48
|
+
priorities: Priorities;
|
|
49
|
+
maxPrice: number;
|
|
50
|
+
outputTokens: number;
|
|
51
|
+
budgetMode?: BudgetMode;
|
|
52
|
+
progressCallback?: (done: number, total: number) => void;
|
|
53
|
+
}
|
|
54
|
+
/** Approximate token count with a deterministic 4 chars ~= 1 token rule. */
|
|
55
|
+
export declare function estimateTokens(text: string): number;
|
|
56
|
+
/** Estimate USD generation cost for a model, or null when pricing is missing. */
|
|
57
|
+
export declare function estimateGenerationCost(model: ModelInfo | undefined, inputTokens: number, outputTokens: number): number | null;
|
|
58
|
+
export declare function costUnitForBudget(maxPrice: number): number;
|
|
59
|
+
export declare function paretoPrune(candidates: BudgetCandidate[]): BudgetCandidate[];
|
|
60
|
+
export declare function optimizeBudgetCandidates(candidateGroups: BudgetCandidate[][], maxPrice: number, costUnit?: number): BudgetOptimizationResult;
|
|
61
|
+
export declare function routeDatasetWithBudget(options: RouteDatasetWithBudgetOptions): Promise<{
|
|
62
|
+
results: BudgetedRouteResult[];
|
|
63
|
+
optimization: BudgetOptimizationResult;
|
|
64
|
+
}>;
|
|
65
|
+
//# sourceMappingURL=budget.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"budget.d.ts","sourceRoot":"","sources":["../src/budget.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AACvD,OAAO,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AAErD,MAAM,MAAM,UAAU,GAAG,QAAQ,GAAG,YAAY,CAAC;AAEjD,MAAM,WAAW,eAAe;IAC9B,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,wBAAwB;IACvC,MAAM,EAAE,SAAS,GAAG,YAAY,CAAC;IACjC,QAAQ,EAAE,eAAe,EAAE,CAAC;IAC5B,kBAAkB,EAAE,MAAM,CAAC;IAC3B,qBAAqB,EAAE,MAAM,CAAC;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,8BAA8B,CAAC,EAAE,MAAM,CAAC;IACxC,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED,MAAM,WAAW,mBAAmB;IAClC,WAAW,EAAE,WAAW,CAAC;IACzB,QAAQ,EAAE,eAAe,CAAC;IAC1B,cAAc,EAAE,MAAM,CAAC;IACvB,eAAe,EAAE,MAAM,CAAC;IACxB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,6BAA6B;IAC5C,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,UAAU,EAAE,UAAU,CAAC;IACvB,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,gBAAgB,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;CAC1D;AAQD,4EAA4E;AAC5E,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEnD;AAED,iFAAiF;AACjF,wBAAgB,sBAAsB,CACpC,KAAK,EAAE,SAAS,GAAG,SAAS,EAC5B,WAAW,EAAE,MAAM,EACnB,YAAY,EAAE,MAAM,GACnB,MAAM,GAAG,IAAI,CAWf;AAED,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAU1D;AAaD,wBAAgB,WAAW,CAAC,UAAU,EAAE,eAAe,EAAE,GAAG,eAAe,EAAE,CAW5E;AAED,wBAAgB,wBAAwB,CACtC,eAAe,EAAE,eAAe,EAAE,EAAE,EACpC,QAAQ,EAAE,MAAM,EAChB,QAAQ,SAA8B,GACrC,wBAAwB,CA2H1B;AAyID,wBAAsB,sBAAsB,CAC1C,OAAO,EAAE,6BAA6B,GACrC,OAAO,CAAC;IAAE,OAAO,EAAE,mBAAmB,EAAE,CAAC;IAAC,YAAY,EAAE,wBAAwB,CAAA;CAAE,CAAC,CAmGrF"}
|