tryaii-dre 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/README.md +234 -0
- package/dist/banner.d.ts +24 -0
- package/dist/banner.d.ts.map +1 -0
- package/dist/banner.js +125 -0
- package/dist/banner.js.map +1 -0
- package/dist/benchmarks/index.d.ts +4 -0
- package/dist/benchmarks/index.d.ts.map +1 -0
- package/dist/benchmarks/index.js +3 -0
- package/dist/benchmarks/index.js.map +1 -0
- package/dist/benchmarks/registry.d.ts +69 -0
- package/dist/benchmarks/registry.d.ts.map +1 -0
- package/dist/benchmarks/registry.js +128 -0
- package/dist/benchmarks/registry.js.map +1 -0
- package/dist/benchmarks/standard.d.ts +6 -0
- package/dist/benchmarks/standard.d.ts.map +1 -0
- package/dist/benchmarks/standard.js +115 -0
- package/dist/benchmarks/standard.js.map +1 -0
- package/dist/budget.d.ts +65 -0
- package/dist/budget.d.ts.map +1 -0
- package/dist/budget.js +344 -0
- package/dist/budget.js.map +1 -0
- package/dist/cache/index.d.ts +27 -0
- package/dist/cache/index.d.ts.map +1 -0
- package/dist/cache/index.js +63 -0
- package/dist/cache/index.js.map +1 -0
- package/dist/centroids/data/centroids_all-MiniLM-L6-v2.json +1 -0
- package/dist/centroids/data/trainingQueries.json +246 -0
- package/dist/centroids/generator.d.ts +63 -0
- package/dist/centroids/generator.d.ts.map +1 -0
- package/dist/centroids/generator.js +120 -0
- package/dist/centroids/generator.js.map +1 -0
- package/dist/centroids/index.d.ts +3 -0
- package/dist/centroids/index.d.ts.map +1 -0
- package/dist/centroids/index.js +3 -0
- package/dist/centroids/index.js.map +1 -0
- package/dist/centroids/loader.d.ts +87 -0
- package/dist/centroids/loader.d.ts.map +1 -0
- package/dist/centroids/loader.js +236 -0
- package/dist/centroids/loader.js.map +1 -0
- package/dist/classifiers/base.d.ts +56 -0
- package/dist/classifiers/base.d.ts.map +1 -0
- package/dist/classifiers/base.js +42 -0
- package/dist/classifiers/base.js.map +1 -0
- package/dist/classifiers/embedding.d.ts +68 -0
- package/dist/classifiers/embedding.d.ts.map +1 -0
- package/dist/classifiers/embedding.js +0 -0
- package/dist/classifiers/embedding.js.map +1 -0
- package/dist/classifiers/hybrid.d.ts +31 -0
- package/dist/classifiers/hybrid.d.ts.map +1 -0
- package/dist/classifiers/hybrid.js +61 -0
- package/dist/classifiers/hybrid.js.map +1 -0
- package/dist/classifiers/index.d.ts +4 -0
- package/dist/classifiers/index.d.ts.map +1 -0
- package/dist/classifiers/index.js +3 -0
- package/dist/classifiers/index.js.map +1 -0
- package/dist/classifiers/keyword.d.ts +29 -0
- package/dist/classifiers/keyword.d.ts.map +1 -0
- package/dist/classifiers/keyword.js +264 -0
- package/dist/classifiers/keyword.js.map +1 -0
- package/dist/cli.d.ts +15 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +597 -0
- package/dist/cli.js.map +1 -0
- package/dist/client-types.d.ts +101 -0
- package/dist/client-types.d.ts.map +1 -0
- package/dist/client-types.js +5 -0
- package/dist/client-types.js.map +1 -0
- package/dist/client.d.ts +50 -0
- package/dist/client.d.ts.map +1 -0
- package/dist/client.js +279 -0
- package/dist/client.js.map +1 -0
- package/dist/config.d.ts +45 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +37 -0
- package/dist/config.js.map +1 -0
- package/dist/dashboard/index.d.ts +48 -0
- package/dist/dashboard/index.d.ts.map +1 -0
- package/dist/dashboard/index.js +166 -0
- package/dist/dashboard/index.js.map +1 -0
- package/dist/embeddings/base.d.ts +66 -0
- package/dist/embeddings/base.d.ts.map +1 -0
- package/dist/embeddings/base.js +77 -0
- package/dist/embeddings/base.js.map +1 -0
- package/dist/embeddings/index.d.ts +3 -0
- package/dist/embeddings/index.d.ts.map +1 -0
- package/dist/embeddings/index.js +3 -0
- package/dist/embeddings/index.js.map +1 -0
- package/dist/embeddings/local.d.ts +42 -0
- package/dist/embeddings/local.d.ts.map +1 -0
- package/dist/embeddings/local.js +89 -0
- package/dist/embeddings/local.js.map +1 -0
- package/dist/index.d.ts +44 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +45 -0
- package/dist/index.js.map +1 -0
- package/dist/integrations/index.d.ts +3 -0
- package/dist/integrations/index.d.ts.map +1 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/index.js.map +1 -0
- package/dist/integrations/openrouter.d.ts +84 -0
- package/dist/integrations/openrouter.d.ts.map +1 -0
- package/dist/integrations/openrouter.js +253 -0
- package/dist/integrations/openrouter.js.map +1 -0
- package/dist/registry/index.d.ts +2 -0
- package/dist/registry/index.d.ts.map +1 -0
- package/dist/registry/index.js +2 -0
- package/dist/registry/index.js.map +1 -0
- package/dist/registry/models.d.ts +76 -0
- package/dist/registry/models.d.ts.map +1 -0
- package/dist/registry/models.js +170 -0
- package/dist/registry/models.js.map +1 -0
- package/dist/registry/presets/defaultModels.json +435 -0
- package/dist/router.d.ts +178 -0
- package/dist/router.d.ts.map +1 -0
- package/dist/router.js +259 -0
- package/dist/router.js.map +1 -0
- package/dist/scoring/benchmarks.d.ts +35 -0
- package/dist/scoring/benchmarks.d.ts.map +1 -0
- package/dist/scoring/benchmarks.js +68 -0
- package/dist/scoring/benchmarks.js.map +1 -0
- package/dist/scoring/engine.d.ts +43 -0
- package/dist/scoring/engine.d.ts.map +1 -0
- package/dist/scoring/engine.js +267 -0
- package/dist/scoring/engine.js.map +1 -0
- package/dist/scoring/index.d.ts +6 -0
- package/dist/scoring/index.d.ts.map +1 -0
- package/dist/scoring/index.js +4 -0
- package/dist/scoring/index.js.map +1 -0
- package/dist/scoring/priorities.d.ts +41 -0
- package/dist/scoring/priorities.d.ts.map +1 -0
- package/dist/scoring/priorities.js +49 -0
- package/dist/scoring/priorities.js.map +1 -0
- package/dist/types.d.ts +47 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +5 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/cosine.d.ts +10 -0
- package/dist/utils/cosine.d.ts.map +1 -0
- package/dist/utils/cosine.js +18 -0
- package/dist/utils/cosine.js.map +1 -0
- package/dist/utils/math.d.ts +18 -0
- package/dist/utils/math.d.ts.map +1 -0
- package/dist/utils/math.js +54 -0
- package/dist/utils/math.js.map +1 -0
- package/package.json +65 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Centroid generator -- creates benchmark centroids from training queries.
|
|
3
|
+
*
|
|
4
|
+
* Centroids are the average embedding of all training queries for a benchmark.
|
|
5
|
+
* They are used by the EmbeddingClassifier to measure how similar a user's
|
|
6
|
+
* prompt is to each benchmark category.
|
|
7
|
+
*
|
|
8
|
+
* Centroids are regenerated when the embedding model changes, because different
|
|
9
|
+
* models produce different vector spaces.
|
|
10
|
+
*/
|
|
11
|
+
import { readFileSync, writeFileSync, mkdirSync } from 'node:fs';
|
|
12
|
+
import { dirname, join } from 'node:path';
|
|
13
|
+
import { fileURLToPath } from 'node:url';
|
|
14
|
+
import { vectorMean, vectorNormalize } from '../utils/math.js';
|
|
15
|
+
const currentDir = dirname(fileURLToPath(import.meta.url));
|
|
16
|
+
/** Path to bundled training queries. */
|
|
17
|
+
export const TRAINING_QUERIES_PATH = join(currentDir, 'data', 'trainingQueries.json');
|
|
18
|
+
/**
|
|
19
|
+
* Generates and manages benchmark centroids.
|
|
20
|
+
*
|
|
21
|
+
* Centroids are the average embedding vector of representative queries
|
|
22
|
+
* for each benchmark. When a user sends a prompt, we compute cosine
|
|
23
|
+
* similarity between their prompt's embedding and each centroid to
|
|
24
|
+
* determine what kind of task they're asking about.
|
|
25
|
+
*/
|
|
26
|
+
export class CentroidGenerator {
|
|
27
|
+
constructor(embeddingProvider) {
|
|
28
|
+
this._provider = embeddingProvider;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Generate centroids from training queries.
|
|
32
|
+
*
|
|
33
|
+
* @param trainingQueries - Dict of benchmark_name -> list of queries.
|
|
34
|
+
* If undefined, uses bundled default queries.
|
|
35
|
+
* @returns Dict of benchmark_name -> centroid vector (number array).
|
|
36
|
+
*/
|
|
37
|
+
generate(trainingQueries) {
|
|
38
|
+
if (!trainingQueries) {
|
|
39
|
+
trainingQueries = this._loadDefaultQueries();
|
|
40
|
+
}
|
|
41
|
+
const centroids = {};
|
|
42
|
+
for (const [benchmark, queries] of Object.entries(trainingQueries)) {
|
|
43
|
+
// Embed all queries for this benchmark
|
|
44
|
+
const embeddings = this._provider.embedBatch(queries);
|
|
45
|
+
// Centroid = average of all embeddings, then normalize
|
|
46
|
+
const centroid = vectorNormalize(vectorMean(embeddings));
|
|
47
|
+
centroids[benchmark] = centroid;
|
|
48
|
+
}
|
|
49
|
+
return centroids;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Async version of `generate` -- routes through the provider's async path
|
|
53
|
+
* so it works with async-only providers like LocalEmbeddingProvider.
|
|
54
|
+
* Sync providers work too via the base class's default async fallback.
|
|
55
|
+
*/
|
|
56
|
+
async generateAsync(trainingQueries) {
|
|
57
|
+
if (!trainingQueries) {
|
|
58
|
+
trainingQueries = this._loadDefaultQueries();
|
|
59
|
+
}
|
|
60
|
+
const centroids = {};
|
|
61
|
+
for (const [benchmark, queries] of Object.entries(trainingQueries)) {
|
|
62
|
+
const embeddings = await this._provider.embedBatchAsync(queries);
|
|
63
|
+
const centroid = vectorNormalize(vectorMean(embeddings));
|
|
64
|
+
centroids[benchmark] = centroid;
|
|
65
|
+
}
|
|
66
|
+
return centroids;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Generate a single centroid for a custom benchmark.
|
|
70
|
+
*
|
|
71
|
+
* @param benchmarkName - Name of the benchmark.
|
|
72
|
+
* @param queries - Representative queries for this benchmark.
|
|
73
|
+
* @returns Centroid vector (number array).
|
|
74
|
+
*/
|
|
75
|
+
generateFromCustom(benchmarkName, queries) {
|
|
76
|
+
const embeddings = this._provider.embedBatch(queries);
|
|
77
|
+
return vectorNormalize(vectorMean(embeddings));
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Async version of `generateFromCustom`. Works with any provider
|
|
81
|
+
* (sync via default fallback, async via its native async path).
|
|
82
|
+
*/
|
|
83
|
+
async generateFromCustomAsync(benchmarkName, queries) {
|
|
84
|
+
const embeddings = await this._provider.embedBatchAsync(queries);
|
|
85
|
+
return vectorNormalize(vectorMean(embeddings));
|
|
86
|
+
}
|
|
87
|
+
/** Save centroids to a JSON file. */
|
|
88
|
+
save(centroids, path) {
|
|
89
|
+
mkdirSync(dirname(path), { recursive: true });
|
|
90
|
+
const data = {
|
|
91
|
+
metadata: {
|
|
92
|
+
model: this._provider.modelName,
|
|
93
|
+
dimension: this._provider.dimension,
|
|
94
|
+
benchmark_count: Object.keys(centroids).length,
|
|
95
|
+
},
|
|
96
|
+
centroids,
|
|
97
|
+
};
|
|
98
|
+
writeFileSync(path, JSON.stringify(data));
|
|
99
|
+
}
|
|
100
|
+
/** Load centroids from a JSON file. */
|
|
101
|
+
static load(path) {
|
|
102
|
+
const raw = readFileSync(path, 'utf-8');
|
|
103
|
+
const data = JSON.parse(raw);
|
|
104
|
+
return {
|
|
105
|
+
centroids: data.centroids,
|
|
106
|
+
metadata: data.metadata,
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
/** Load bundled training queries. */
|
|
110
|
+
_loadDefaultQueries() {
|
|
111
|
+
const raw = readFileSync(TRAINING_QUERIES_PATH, 'utf-8');
|
|
112
|
+
const data = JSON.parse(raw);
|
|
113
|
+
const queries = {};
|
|
114
|
+
for (const [name, benchData] of Object.entries(data.benchmarks)) {
|
|
115
|
+
queries[name] = benchData.queries;
|
|
116
|
+
}
|
|
117
|
+
return queries;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
//# sourceMappingURL=generator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"generator.js","sourceRoot":"","sources":["../../src/centroids/generator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AACjE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAIzC,OAAO,EAAE,UAAU,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AAE/D,MAAM,UAAU,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAE3D,wCAAwC;AACxC,MAAM,CAAC,MAAM,qBAAqB,GAAG,IAAI,CAAC,UAAU,EAAE,MAAM,EAAE,sBAAsB,CAAC,CAAC;AAEtF;;;;;;;GAOG;AACH,MAAM,OAAO,iBAAiB;IAG5B,YAAY,iBAAwC;QAClD,IAAI,CAAC,SAAS,GAAG,iBAAiB,CAAC;IACrC,CAAC;IAED;;;;;;OAMG;IACH,QAAQ,CAAC,eAA0C;QACjD,IAAI,CAAC,eAAe,EAAE,CAAC;YACrB,eAAe,GAAG,IAAI,CAAC,mBAAmB,EAAE,CAAC;QAC/C,CAAC;QAED,MAAM,SAAS,GAA6B,EAAE,CAAC;QAE/C,KAAK,MAAM,CAAC,SAAS,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC,EAAE,CAAC;YACnE,uCAAuC;YACvC,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;YAEtD,uDAAuD;YACvD,MAAM,QAAQ,GAAG,eAAe,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC,CAAC;YACzD,SAAS,CAAC,SAAS,CAAC,GAAG,QAAQ,CAAC;QAClC,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,aAAa,CAAC,eAA0C;QAC5D,IAAI,CAAC,eAAe,EAAE,CAAC;YACrB,eAAe,GAAG,IAAI,CAAC,mBAAmB,EAAE,CAAC;QAC/C,CAAC;QAED,MAAM,SAAS,GAA6B,EAAE,CAAC;QAE/C,KAAK,MAAM,CAAC,SAAS,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC,EAAE,CAAC;YACnE,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;YACjE,MAAM,QAAQ,GAAG,eAAe,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC,CAAC;YACzD,SAAS,CAAC,SAAS,CAAC,GAAG,QAAQ,CAAC;QAClC,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;;OAMG;IACH,kBAAkB,CAAC,aAAqB,EAAE,OAAiB;QACzD,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;QACtD,OAAO,eAAe,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC,CAAC;IACjD,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,uBAAuB,CAAC,aAAqB,EAAE,OAAiB;QACpE,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;QACjE,OAAO,eAAe,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC,CAAC;IACjD,CAAC;IAED,qCAAqC;IACrC,IAAI,CAAC,SAAmC,EAAE,IAAY;QACpD,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAE9C,MAAM,IAAI,GAAkB;YAC1B,QAAQ,EAAE;gBACR,KAAK,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS;gBAC/B,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS;gBACnC,eAAe,EAAE,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,MAAM;aAC/C;YACD,SAAS;SACV,CAAC;QAEF,aAAa,CAAC,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC;IAC5C,CAAC;IAED,uCAAuC;IACvC,MAAM,CAAC,IAAI,CAAC,IAAY;QACtB,MAAM,GAAG,GAAG,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACxC,MAAM,IAAI,GAAkB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAE5C,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,QAAQ,EAAE,IAAI,CAAC,QAAQ;SACxB,CAAC;IACJ,CAAC;IAED,qCAAqC;IAC7B,mBAAmB;QACzB,MAAM,GAAG,GAAG,YAAY,CAAC,qBAAqB,EAAE,OAAO,CAAC,CAAC;QACzD,MAAM,IAAI,GAAwB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAElD,MAAM,OAAO,GAA6B,EAAE,CAAC;QAC7C,KAAK,MAAM,CAAC,IAAI,EAAE,SAAS,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;YAChE,OAAO,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,OAAO,CAAC;QACpC,CAAC;QACD,OAAO,OAAO,CAAC;IACjB,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/centroids/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AAC1E,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/centroids/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AAC1E,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC"}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Centroid loader -- handles lazy initialization and model compatibility.
|
|
3
|
+
*
|
|
4
|
+
* Loading priority:
|
|
5
|
+
* 1. In-memory cache (already loaded)
|
|
6
|
+
* 2. User's cache directory (previously generated for their model)
|
|
7
|
+
* 3. Bundled static file (ships with package for default model -- zero delay)
|
|
8
|
+
* 4. Generate from training queries (only if using a non-default model)
|
|
9
|
+
*/
|
|
10
|
+
import { BaseEmbeddingProvider } from '../embeddings/base.js';
|
|
11
|
+
/**
|
|
12
|
+
* Stable fingerprint of the benchmark set present in a centroid file.
|
|
13
|
+
*
|
|
14
|
+
* Defined as the sorted benchmark names joined by "|". Used to detect when a
|
|
15
|
+
* cached/bundled centroid file was generated against a different benchmark set
|
|
16
|
+
* and must be regenerated. Must stay identical to the Python SDK's
|
|
17
|
+
* `benchmark_fingerprint` (centroids/generator.py).
|
|
18
|
+
*/
|
|
19
|
+
export declare function benchmarkFingerprint(benchmarkNames: Iterable<string>): string;
|
|
20
|
+
/**
|
|
21
|
+
* Manages centroid lifecycle: load, validate, regenerate.
|
|
22
|
+
*
|
|
23
|
+
* For the default embedding model (all-MiniLM-L6-v2), centroids are
|
|
24
|
+
* bundled with the package -- zero first-run delay. For other models,
|
|
25
|
+
* centroids are generated on first use and cached to disk.
|
|
26
|
+
*/
|
|
27
|
+
export declare class CentroidLoader {
|
|
28
|
+
private _provider;
|
|
29
|
+
private _centroids;
|
|
30
|
+
private _generator;
|
|
31
|
+
private _userCachePath;
|
|
32
|
+
constructor(embeddingProvider: BaseEmbeddingProvider, userCachePath?: string);
|
|
33
|
+
/**
|
|
34
|
+
* Get centroids, loading from best available source.
|
|
35
|
+
*
|
|
36
|
+
* Priority: memory > user cache > bundled static > generate fresh.
|
|
37
|
+
*
|
|
38
|
+
* Synchronous path -- if regeneration is needed (non-default model,
|
|
39
|
+
* first run) and the provider is async-only, this will throw. Async
|
|
40
|
+
* callers should use `getCentroidsAsync()` instead.
|
|
41
|
+
*/
|
|
42
|
+
getCentroids(): Record<string, number[]>;
|
|
43
|
+
/**
|
|
44
|
+
* Async version of `getCentroids`. Works with any embedding provider --
|
|
45
|
+
* if centroid regeneration is needed, routes through the provider's
|
|
46
|
+
* async path.
|
|
47
|
+
*/
|
|
48
|
+
getCentroidsAsync(): Promise<Record<string, number[]>>;
|
|
49
|
+
/**
|
|
50
|
+
* Try memory / user cache / bundled centroid file. Returns null if none
|
|
51
|
+
* of these sources have a valid file for the current provider's model.
|
|
52
|
+
* Shared between the sync and async load paths.
|
|
53
|
+
*/
|
|
54
|
+
private _tryLoadFromFiles;
|
|
55
|
+
private _tryLoad;
|
|
56
|
+
private _regenerate;
|
|
57
|
+
private _regenerateAsync;
|
|
58
|
+
/**
|
|
59
|
+
* Force regeneration of centroids.
|
|
60
|
+
*
|
|
61
|
+
* @param customQueries - Optional custom training queries. If undefined, uses defaults.
|
|
62
|
+
*/
|
|
63
|
+
regenerate(customQueries?: Record<string, string[]>): Record<string, number[]>;
|
|
64
|
+
/**
|
|
65
|
+
* Add a custom benchmark centroid to the existing set (sync).
|
|
66
|
+
*
|
|
67
|
+
* Requires a sync-capable embedding provider. Callers with async-only
|
|
68
|
+
* providers (e.g. LocalEmbeddingProvider) should use
|
|
69
|
+
* `addBenchmarkCentroidAsync()`.
|
|
70
|
+
*
|
|
71
|
+
* @param benchmarkName - Name of the new benchmark.
|
|
72
|
+
* @param queries - Representative queries for this benchmark.
|
|
73
|
+
* @returns The generated centroid vector.
|
|
74
|
+
*/
|
|
75
|
+
addBenchmarkCentroid(benchmarkName: string, queries: string[]): number[];
|
|
76
|
+
/**
|
|
77
|
+
* Async version of `addBenchmarkCentroid`. Works with any provider.
|
|
78
|
+
* Mutates the in-memory centroid map so subsequent `getCentroids()` calls
|
|
79
|
+
* (including from classifiers sharing this loader) see the new benchmark.
|
|
80
|
+
*/
|
|
81
|
+
addBenchmarkCentroidAsync(benchmarkName: string, queries: string[]): Promise<number[]>;
|
|
82
|
+
/** Remove a benchmark centroid. Returns true if it existed. */
|
|
83
|
+
removeBenchmark(benchmarkName: string): boolean;
|
|
84
|
+
/** List all available benchmark names. */
|
|
85
|
+
get availableBenchmarks(): string[];
|
|
86
|
+
}
|
|
87
|
+
//# sourceMappingURL=loader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"loader.d.ts","sourceRoot":"","sources":["../../src/centroids/loader.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAMH,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAc9D;;;;;;;GAOG;AACH,wBAAgB,oBAAoB,CAAC,cAAc,EAAE,QAAQ,CAAC,MAAM,CAAC,GAAG,MAAM,CAE7E;AA6BD;;;;;;GAMG;AACH,qBAAa,cAAc;IACzB,OAAO,CAAC,SAAS,CAAwB;IACzC,OAAO,CAAC,UAAU,CAAyC;IAC3D,OAAO,CAAC,UAAU,CAAoB;IACtC,OAAO,CAAC,cAAc,CAAgB;gBAGpC,iBAAiB,EAAE,qBAAqB,EACxC,aAAa,CAAC,EAAE,MAAM;IAOxB;;;;;;;;OAQG;IACH,YAAY,IAAI,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC;IAMxC;;;;OAIG;IACG,iBAAiB,IAAI,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;IAM5D;;;;OAIG;IACH,OAAO,CAAC,iBAAiB;IAuBzB,OAAO,CAAC,QAAQ;IAoChB,OAAO,CAAC,WAAW;YAYL,gBAAgB;IAW9B;;;;OAIG;IACH,UAAU,CAAC,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC;IAW9E;;;;;;;;;;OAUG;IACH,oBAAoB,CAAC,aAAa,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE;IAaxE;;;;OAIG;IACG,yBAAyB,CAAC,aAAa,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAY5F,+DAA+D;IAC/D,eAAe,CAAC,aAAa,EAAE,MAAM,GAAG,OAAO;IAY/C,0CAA0C;IAC1C,IAAI,mBAAmB,IAAI,MAAM,EAAE,CAElC;CACF"}
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Centroid loader -- handles lazy initialization and model compatibility.
|
|
3
|
+
*
|
|
4
|
+
* Loading priority:
|
|
5
|
+
* 1. In-memory cache (already loaded)
|
|
6
|
+
* 2. User's cache directory (previously generated for their model)
|
|
7
|
+
* 3. Bundled static file (ships with package for default model -- zero delay)
|
|
8
|
+
* 4. Generate from training queries (only if using a non-default model)
|
|
9
|
+
*/
|
|
10
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
11
|
+
import { dirname, join } from 'node:path';
|
|
12
|
+
import { fileURLToPath } from 'node:url';
|
|
13
|
+
import { CentroidGenerator, TRAINING_QUERIES_PATH } from './generator.js';
|
|
14
|
+
const currentDir = dirname(fileURLToPath(import.meta.url));
|
|
15
|
+
/** Path to bundled centroids (ships with the package). */
|
|
16
|
+
const BUNDLED_CENTROIDS_DIR = join(currentDir, 'data');
|
|
17
|
+
/** Get path to the bundled centroid file for a given model. */
|
|
18
|
+
function bundledCentroidPath(modelName) {
|
|
19
|
+
const safeName = modelName.replace(/\//g, '__');
|
|
20
|
+
return join(BUNDLED_CENTROIDS_DIR, `centroids_${safeName}.json`);
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Stable fingerprint of the benchmark set present in a centroid file.
|
|
24
|
+
*
|
|
25
|
+
* Defined as the sorted benchmark names joined by "|". Used to detect when a
|
|
26
|
+
* cached/bundled centroid file was generated against a different benchmark set
|
|
27
|
+
* and must be regenerated. Must stay identical to the Python SDK's
|
|
28
|
+
* `benchmark_fingerprint` (centroids/generator.py).
|
|
29
|
+
*/
|
|
30
|
+
export function benchmarkFingerprint(benchmarkNames) {
|
|
31
|
+
return [...benchmarkNames].sort().join('|');
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Fingerprint of the bundled default benchmark set, derived from the shipped
|
|
35
|
+
* training queries. A centroid file whose benchmark set doesn't match this was
|
|
36
|
+
* built against a different benchmark set and must be regenerated. Mirrors the
|
|
37
|
+
* Python SDK's `CentroidGenerator.default_benchmark_fingerprint()`.
|
|
38
|
+
*/
|
|
39
|
+
function defaultBenchmarkFingerprint() {
|
|
40
|
+
const raw = readFileSync(TRAINING_QUERIES_PATH, 'utf-8');
|
|
41
|
+
const data = JSON.parse(raw);
|
|
42
|
+
return benchmarkFingerprint(Object.keys(data.benchmarks));
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Whether the provider has reported a real embedding dimension yet.
|
|
46
|
+
*
|
|
47
|
+
* The default `LocalEmbeddingProvider` returns a hardcoded fallback (384)
|
|
48
|
+
* before its model is initialized, so comparing against it would wrongly
|
|
49
|
+
* reject/accept centroid files for non-default-dimension models. We detect the
|
|
50
|
+
* uninitialized state via its private `_dimension` marker; providers that
|
|
51
|
+
* always report a real dimension are treated as known.
|
|
52
|
+
*/
|
|
53
|
+
function providerDimensionKnown(provider) {
|
|
54
|
+
const dim = provider._dimension;
|
|
55
|
+
// Providers without the marker (custom providers) report a real dimension.
|
|
56
|
+
return dim === undefined ? true : dim !== null;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Manages centroid lifecycle: load, validate, regenerate.
|
|
60
|
+
*
|
|
61
|
+
* For the default embedding model (all-MiniLM-L6-v2), centroids are
|
|
62
|
+
* bundled with the package -- zero first-run delay. For other models,
|
|
63
|
+
* centroids are generated on first use and cached to disk.
|
|
64
|
+
*/
|
|
65
|
+
export class CentroidLoader {
|
|
66
|
+
constructor(embeddingProvider, userCachePath) {
|
|
67
|
+
this._centroids = null;
|
|
68
|
+
this._provider = embeddingProvider;
|
|
69
|
+
this._generator = new CentroidGenerator(embeddingProvider);
|
|
70
|
+
this._userCachePath = userCachePath ?? null;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Get centroids, loading from best available source.
|
|
74
|
+
*
|
|
75
|
+
* Priority: memory > user cache > bundled static > generate fresh.
|
|
76
|
+
*
|
|
77
|
+
* Synchronous path -- if regeneration is needed (non-default model,
|
|
78
|
+
* first run) and the provider is async-only, this will throw. Async
|
|
79
|
+
* callers should use `getCentroidsAsync()` instead.
|
|
80
|
+
*/
|
|
81
|
+
getCentroids() {
|
|
82
|
+
const cached = this._tryLoadFromFiles();
|
|
83
|
+
if (cached !== null)
|
|
84
|
+
return cached;
|
|
85
|
+
return this._regenerate();
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Async version of `getCentroids`. Works with any embedding provider --
|
|
89
|
+
* if centroid regeneration is needed, routes through the provider's
|
|
90
|
+
* async path.
|
|
91
|
+
*/
|
|
92
|
+
async getCentroidsAsync() {
|
|
93
|
+
const cached = this._tryLoadFromFiles();
|
|
94
|
+
if (cached !== null)
|
|
95
|
+
return cached;
|
|
96
|
+
return this._regenerateAsync();
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Try memory / user cache / bundled centroid file. Returns null if none
|
|
100
|
+
* of these sources have a valid file for the current provider's model.
|
|
101
|
+
* Shared between the sync and async load paths.
|
|
102
|
+
*/
|
|
103
|
+
_tryLoadFromFiles() {
|
|
104
|
+
if (this._centroids !== null)
|
|
105
|
+
return this._centroids;
|
|
106
|
+
// 1. Try user's cached centroids
|
|
107
|
+
if (this._userCachePath) {
|
|
108
|
+
const loaded = this._tryLoad(this._userCachePath);
|
|
109
|
+
if (loaded !== null) {
|
|
110
|
+
this._centroids = loaded;
|
|
111
|
+
return this._centroids;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
// 2. Try bundled static centroids (ships with package)
|
|
115
|
+
const bundledPath = bundledCentroidPath(this._provider.modelName);
|
|
116
|
+
const loaded = this._tryLoad(bundledPath);
|
|
117
|
+
if (loaded !== null) {
|
|
118
|
+
this._centroids = loaded;
|
|
119
|
+
return this._centroids;
|
|
120
|
+
}
|
|
121
|
+
return null;
|
|
122
|
+
}
|
|
123
|
+
_tryLoad(path) {
|
|
124
|
+
if (!existsSync(path))
|
|
125
|
+
return null;
|
|
126
|
+
try {
|
|
127
|
+
const { centroids, metadata } = CentroidGenerator.load(path);
|
|
128
|
+
const savedModel = metadata.model ?? '';
|
|
129
|
+
const savedDim = metadata.dimension ?? 0;
|
|
130
|
+
// Model name must always match.
|
|
131
|
+
if (savedModel !== this._provider.modelName) {
|
|
132
|
+
return null;
|
|
133
|
+
}
|
|
134
|
+
// Dimension check: skip while the provider hasn't reported a real
|
|
135
|
+
// dimension (it may still be returning a hardcoded fallback). Otherwise
|
|
136
|
+
// a non-default-dimension file would be wrongly rejected/accepted.
|
|
137
|
+
if (providerDimensionKnown(this._provider) && savedDim !== this._provider.dimension) {
|
|
138
|
+
return null;
|
|
139
|
+
}
|
|
140
|
+
// Benchmark-set check: fingerprint the benchmarks actually present in the
|
|
141
|
+
// file and compare against the expected default set. Computing from the
|
|
142
|
+
// file's own keys (rather than a stored metadata value that older/bundled
|
|
143
|
+
// files lack) lets pre-fingerprint bundled files load while still
|
|
144
|
+
// regenerating a file built against a different benchmark set.
|
|
145
|
+
if (benchmarkFingerprint(Object.keys(centroids)) !== defaultBenchmarkFingerprint()) {
|
|
146
|
+
return null;
|
|
147
|
+
}
|
|
148
|
+
return centroids;
|
|
149
|
+
}
|
|
150
|
+
catch {
|
|
151
|
+
return null;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
_regenerate() {
|
|
155
|
+
const centroids = this._generator.generate();
|
|
156
|
+
// Save to user cache for future runs
|
|
157
|
+
if (this._userCachePath) {
|
|
158
|
+
this._generator.save(centroids, this._userCachePath);
|
|
159
|
+
}
|
|
160
|
+
this._centroids = centroids;
|
|
161
|
+
return centroids;
|
|
162
|
+
}
|
|
163
|
+
async _regenerateAsync() {
|
|
164
|
+
const centroids = await this._generator.generateAsync();
|
|
165
|
+
if (this._userCachePath) {
|
|
166
|
+
this._generator.save(centroids, this._userCachePath);
|
|
167
|
+
}
|
|
168
|
+
this._centroids = centroids;
|
|
169
|
+
return centroids;
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Force regeneration of centroids.
|
|
173
|
+
*
|
|
174
|
+
* @param customQueries - Optional custom training queries. If undefined, uses defaults.
|
|
175
|
+
*/
|
|
176
|
+
regenerate(customQueries) {
|
|
177
|
+
const centroids = this._generator.generate(customQueries);
|
|
178
|
+
if (this._userCachePath) {
|
|
179
|
+
this._generator.save(centroids, this._userCachePath);
|
|
180
|
+
}
|
|
181
|
+
this._centroids = centroids;
|
|
182
|
+
return centroids;
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Add a custom benchmark centroid to the existing set (sync).
|
|
186
|
+
*
|
|
187
|
+
* Requires a sync-capable embedding provider. Callers with async-only
|
|
188
|
+
* providers (e.g. LocalEmbeddingProvider) should use
|
|
189
|
+
* `addBenchmarkCentroidAsync()`.
|
|
190
|
+
*
|
|
191
|
+
* @param benchmarkName - Name of the new benchmark.
|
|
192
|
+
* @param queries - Representative queries for this benchmark.
|
|
193
|
+
* @returns The generated centroid vector.
|
|
194
|
+
*/
|
|
195
|
+
addBenchmarkCentroid(benchmarkName, queries) {
|
|
196
|
+
const centroids = this.getCentroids();
|
|
197
|
+
const newCentroid = this._generator.generateFromCustom(benchmarkName, queries);
|
|
198
|
+
centroids[benchmarkName] = newCentroid;
|
|
199
|
+
// Save updated centroids to user cache
|
|
200
|
+
if (this._userCachePath) {
|
|
201
|
+
this._generator.save(centroids, this._userCachePath);
|
|
202
|
+
}
|
|
203
|
+
return newCentroid;
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Async version of `addBenchmarkCentroid`. Works with any provider.
|
|
207
|
+
* Mutates the in-memory centroid map so subsequent `getCentroids()` calls
|
|
208
|
+
* (including from classifiers sharing this loader) see the new benchmark.
|
|
209
|
+
*/
|
|
210
|
+
async addBenchmarkCentroidAsync(benchmarkName, queries) {
|
|
211
|
+
const centroids = await this.getCentroidsAsync();
|
|
212
|
+
const newCentroid = await this._generator.generateFromCustomAsync(benchmarkName, queries);
|
|
213
|
+
centroids[benchmarkName] = newCentroid;
|
|
214
|
+
if (this._userCachePath) {
|
|
215
|
+
this._generator.save(centroids, this._userCachePath);
|
|
216
|
+
}
|
|
217
|
+
return newCentroid;
|
|
218
|
+
}
|
|
219
|
+
/** Remove a benchmark centroid. Returns true if it existed. */
|
|
220
|
+
removeBenchmark(benchmarkName) {
|
|
221
|
+
const centroids = this.getCentroids();
|
|
222
|
+
if (benchmarkName in centroids) {
|
|
223
|
+
delete centroids[benchmarkName];
|
|
224
|
+
if (this._userCachePath) {
|
|
225
|
+
this._generator.save(centroids, this._userCachePath);
|
|
226
|
+
}
|
|
227
|
+
return true;
|
|
228
|
+
}
|
|
229
|
+
return false;
|
|
230
|
+
}
|
|
231
|
+
/** List all available benchmark names. */
|
|
232
|
+
get availableBenchmarks() {
|
|
233
|
+
return Object.keys(this.getCentroids());
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
//# sourceMappingURL=loader.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"loader.js","sourceRoot":"","sources":["../../src/centroids/loader.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACnD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAGzC,OAAO,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AAE1E,MAAM,UAAU,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAE3D,0DAA0D;AAC1D,MAAM,qBAAqB,GAAG,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;AAEvD,+DAA+D;AAC/D,SAAS,mBAAmB,CAAC,SAAiB;IAC5C,MAAM,QAAQ,GAAG,SAAS,CAAC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;IAChD,OAAO,IAAI,CAAC,qBAAqB,EAAE,aAAa,QAAQ,OAAO,CAAC,CAAC;AACnE,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,oBAAoB,CAAC,cAAgC;IACnE,OAAO,CAAC,GAAG,cAAc,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC9C,CAAC;AAED;;;;;GAKG;AACH,SAAS,2BAA2B;IAClC,MAAM,GAAG,GAAG,YAAY,CAAC,qBAAqB,EAAE,OAAO,CAAC,CAAC;IACzD,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAA4C,CAAC;IACxE,OAAO,oBAAoB,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;AAC5D,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,sBAAsB,CAAC,QAA+B;IAC7D,MAAM,GAAG,GAAI,QAAsD,CAAC,UAAU,CAAC;IAC/E,2EAA2E;IAC3E,OAAO,GAAG,KAAK,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,KAAK,IAAI,CAAC;AACjD,CAAC;AAED;;;;;;GAMG;AACH,MAAM,OAAO,cAAc;IAMzB,YACE,iBAAwC,EACxC,aAAsB;QANhB,eAAU,GAAoC,IAAI,CAAC;QAQzD,IAAI,CAAC,SAAS,GAAG,iBAAiB,CAAC;QACnC,IAAI,CAAC,UAAU,GAAG,IAAI,iBAAiB,CAAC,iBAAiB,CAAC,CAAC;QAC3D,IAAI,CAAC,cAAc,GAAG,aAAa,IAAI,IAAI,CAAC;IAC9C,CAAC;IAED;;;;;;;;OAQG;IACH,YAAY;QACV,MAAM,MAAM,GAAG,IAAI,CAAC,iBAAiB,EAAE,CAAC;QACxC,IAAI,MAAM,KAAK,IAAI;YAAE,OAAO,MAAM,CAAC;QACnC,OAAO,IAAI,CAAC,WAAW,EAAE,CAAC;IAC5B,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,iBAAiB;QACrB,MAAM,MAAM,GAAG,IAAI,CAAC,iBAAiB,EAAE,CAAC;QACxC,IAAI,MAAM,KAAK,IAAI;YAAE,OAAO,MAAM,CAAC;QACnC,OAAO,IAAI,CAAC,gBAAgB,EAAE,CAAC;IACjC,CAAC;IAED;;;;OAIG;IACK,iBAAiB;QACvB,IAAI,IAAI,CAAC,UAAU,KAAK,IAAI;YAAE,OAAO,IAAI,CAAC,UAAU,CAAC;QAErD,iCAAiC;QACjC,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACxB,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;YAClD,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;gBACpB,IAAI,CAAC,UAAU,GAAG,MAAM,CAAC;gBACzB,OAAO,IAAI,CAAC,UAAU,CAAC;YACzB,CAAC;QACH,CAAC;QAED,uDAAuD;QACvD,MAAM,WAAW,GAAG,mBAAmB,CAAC,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;QAClE,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;QAC1C,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;YACpB,IAAI,CAAC,UAAU,GAAG,MAAM,CAAC;YACzB,OAAO,IAAI,CAAC,UAAU,CAAC;QACzB,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAEO,QAAQ,CAAC,IAAY;QAC3B,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC;YAAE,OAAO,IAAI,CAAC;QAEnC,IAAI,CAAC;YACH,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,GAAG,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAE7D,MAAM,UAAU,GAAG,QAAQ,CAAC,KAAK,IAAI,EAAE,CAAC;YACxC,MAAM,QAAQ,GAAG,QAAQ,CAAC,SAAS,IAAI,CAAC,CAAC;YAEzC,gCAAgC;YAChC,IAAI,UAAU,KAAK,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,CAAC;gBAC5C,OAAO,IAAI,CAAC;YACd,CAAC;YAED,kEAAkE;YAClE,wEAAwE;YACxE,mEAAmE;YACnE,IAAI,sBAAsB,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,QAAQ,KAAK,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,CAAC;gBACpF,OAAO,IAAI,CAAC;YACd,CAAC;YAED,0EAA0E;YAC1E,wEAAwE;YACxE,0EAA0E;YAC1E,kEAAkE;YAClE,+DAA+D;YAC/D,IAAI,oBAAoB,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,KAAK,2BAA2B,EAAE,EAAE,CAAC;gBACnF,OAAO,IAAI,CAAC;YACd,CAAC;YAED,OAAO,SAAS,CAAC;QACnB,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAEO,WAAW;QACjB,MAAM,SAAS,GAAG,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,CAAC;QAE7C,qCAAqC;QACrC,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACxB,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;QACvD,CAAC;QAED,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,OAAO,SAAS,CAAC;IACnB,CAAC;IAEO,KAAK,CAAC,gBAAgB;QAC5B,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,aAAa,EAAE,CAAC;QAExD,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACxB,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;QACvD,CAAC;QAED,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;OAIG;IACH,UAAU,CAAC,aAAwC;QACjD,MAAM,SAAS,GAAG,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAE1D,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACxB,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;QACvD,CAAC;QAED,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;;;;;;OAUG;IACH,oBAAoB,CAAC,aAAqB,EAAE,OAAiB;QAC3D,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QACtC,MAAM,WAAW,GAAG,IAAI,CAAC,UAAU,CAAC,kBAAkB,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;QAC/E,SAAS,CAAC,aAAa,CAAC,GAAG,WAAW,CAAC;QAEvC,uCAAuC;QACvC,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACxB,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;QACvD,CAAC;QAED,OAAO,WAAW,CAAC;IACrB,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,yBAAyB,CAAC,aAAqB,EAAE,OAAiB;QACtE,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QACjD,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,uBAAuB,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;QAC1F,SAAS,CAAC,aAAa,CAAC,GAAG,WAAW,CAAC;QAEvC,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACxB,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;QACvD,CAAC;QAED,OAAO,WAAW,CAAC;IACrB,CAAC;IAED,+DAA+D;IAC/D,eAAe,CAAC,aAAqB;QACnC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QACtC,IAAI,aAAa,IAAI,SAAS,EAAE,CAAC;YAC/B,OAAO,SAAS,CAAC,aAAa,CAAC,CAAC;YAChC,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;gBACxB,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;YACvD,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,0CAA0C;IAC1C,IAAI,mBAAmB;QACrB,OAAO,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC,CAAC;IAC1C,CAAC;CACF"}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Abstract base classifier.
|
|
3
|
+
*
|
|
4
|
+
* The embedding classifier implements this interface. The abstraction is
|
|
5
|
+
* kept so custom classifiers can be plugged in for tests or research.
|
|
6
|
+
*/
|
|
7
|
+
/** Output of any classifier. */
|
|
8
|
+
export interface ClassificationResult {
|
|
9
|
+
/** Cosine similarity to each benchmark centroid (0-1). */
|
|
10
|
+
benchmarkScores: Record<string, number>;
|
|
11
|
+
/** Broad category (for display / debugging). */
|
|
12
|
+
broadCategory: string;
|
|
13
|
+
/** Subcategory refinement. */
|
|
14
|
+
subcategory: string;
|
|
15
|
+
/** Confidence of the classification (0-1). */
|
|
16
|
+
confidence: number;
|
|
17
|
+
/** Which classifier produced this result (always "embedding" in the current system). */
|
|
18
|
+
classifierUsed: string;
|
|
19
|
+
/** Whether this result came from cache. */
|
|
20
|
+
cacheHit: boolean;
|
|
21
|
+
/** How long classification took in milliseconds. */
|
|
22
|
+
processingTimeMs: number;
|
|
23
|
+
}
|
|
24
|
+
/** Create a default (empty) ClassificationResult. */
|
|
25
|
+
export declare function emptyClassificationResult(): ClassificationResult;
|
|
26
|
+
/** Get top benchmarks sorted by similarity score (descending). */
|
|
27
|
+
export declare function topBenchmarks(result: ClassificationResult): Array<[string, number]>;
|
|
28
|
+
/**
|
|
29
|
+
* Abstract base class for prompt classifiers.
|
|
30
|
+
*
|
|
31
|
+
* A classifier takes a user prompt and returns benchmark similarity scores.
|
|
32
|
+
* These scores tell us "what kind of task is this?" in terms of which
|
|
33
|
+
* AI benchmarks it most resembles.
|
|
34
|
+
*/
|
|
35
|
+
export declare abstract class BaseClassifier {
|
|
36
|
+
/**
|
|
37
|
+
* Classify a prompt synchronously.
|
|
38
|
+
*
|
|
39
|
+
* Implementations whose embedding backend is async-only should throw
|
|
40
|
+
* a clear error here -- callers should use `classifyAsync` instead.
|
|
41
|
+
*
|
|
42
|
+
* @param prompt - The user's input text.
|
|
43
|
+
* @returns ClassificationResult with benchmarkScores populated.
|
|
44
|
+
*/
|
|
45
|
+
abstract classify(prompt: string): ClassificationResult;
|
|
46
|
+
/**
|
|
47
|
+
* Classify a prompt asynchronously.
|
|
48
|
+
*
|
|
49
|
+
* Default implementation wraps `classify()` so sync classifiers work
|
|
50
|
+
* out of the box on the async path. Async-only classifiers must override.
|
|
51
|
+
*/
|
|
52
|
+
classifyAsync(prompt: string): Promise<ClassificationResult>;
|
|
53
|
+
/** Check if the classifier is initialized and ready to use. */
|
|
54
|
+
abstract isReady(): boolean;
|
|
55
|
+
}
|
|
56
|
+
//# sourceMappingURL=base.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"base.d.ts","sourceRoot":"","sources":["../../src/classifiers/base.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,gCAAgC;AAChC,MAAM,WAAW,oBAAoB;IACnC,0DAA0D;IAC1D,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAExC,gDAAgD;IAChD,aAAa,EAAE,MAAM,CAAC;IAEtB,8BAA8B;IAC9B,WAAW,EAAE,MAAM,CAAC;IAEpB,8CAA8C;IAC9C,UAAU,EAAE,MAAM,CAAC;IAEnB,wFAAwF;IACxF,cAAc,EAAE,MAAM,CAAC;IAEvB,2CAA2C;IAC3C,QAAQ,EAAE,OAAO,CAAC;IAElB,oDAAoD;IACpD,gBAAgB,EAAE,MAAM,CAAC;CAC1B;AAED,qDAAqD;AACrD,wBAAgB,yBAAyB,IAAI,oBAAoB,CAUhE;AAED,kEAAkE;AAClE,wBAAgB,aAAa,CAAC,MAAM,EAAE,oBAAoB,GAAG,KAAK,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAGnF;AAED;;;;;;GAMG;AACH,8BAAsB,cAAc;IAClC;;;;;;;;OAQG;IACH,QAAQ,CAAC,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,oBAAoB;IAEvD;;;;;OAKG;IACG,aAAa,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC;IAIlE,+DAA+D;IAC/D,QAAQ,CAAC,OAAO,IAAI,OAAO;CAC5B"}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Abstract base classifier.
|
|
3
|
+
*
|
|
4
|
+
* The embedding classifier implements this interface. The abstraction is
|
|
5
|
+
* kept so custom classifiers can be plugged in for tests or research.
|
|
6
|
+
*/
|
|
7
|
+
/** Create a default (empty) ClassificationResult. */
|
|
8
|
+
export function emptyClassificationResult() {
|
|
9
|
+
return {
|
|
10
|
+
benchmarkScores: {},
|
|
11
|
+
broadCategory: '',
|
|
12
|
+
subcategory: '',
|
|
13
|
+
confidence: 0,
|
|
14
|
+
classifierUsed: '',
|
|
15
|
+
cacheHit: false,
|
|
16
|
+
processingTimeMs: 0,
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
/** Get top benchmarks sorted by similarity score (descending). */
|
|
20
|
+
export function topBenchmarks(result) {
|
|
21
|
+
return Object.entries(result.benchmarkScores)
|
|
22
|
+
.sort((a, b) => b[1] - a[1]);
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Abstract base class for prompt classifiers.
|
|
26
|
+
*
|
|
27
|
+
* A classifier takes a user prompt and returns benchmark similarity scores.
|
|
28
|
+
* These scores tell us "what kind of task is this?" in terms of which
|
|
29
|
+
* AI benchmarks it most resembles.
|
|
30
|
+
*/
|
|
31
|
+
export class BaseClassifier {
|
|
32
|
+
/**
|
|
33
|
+
* Classify a prompt asynchronously.
|
|
34
|
+
*
|
|
35
|
+
* Default implementation wraps `classify()` so sync classifiers work
|
|
36
|
+
* out of the box on the async path. Async-only classifiers must override.
|
|
37
|
+
*/
|
|
38
|
+
async classifyAsync(prompt) {
|
|
39
|
+
return this.classify(prompt);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
//# sourceMappingURL=base.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"base.js","sourceRoot":"","sources":["../../src/classifiers/base.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AA0BH,qDAAqD;AACrD,MAAM,UAAU,yBAAyB;IACvC,OAAO;QACL,eAAe,EAAE,EAAE;QACnB,aAAa,EAAE,EAAE;QACjB,WAAW,EAAE,EAAE;QACf,UAAU,EAAE,CAAC;QACb,cAAc,EAAE,EAAE;QAClB,QAAQ,EAAE,KAAK;QACf,gBAAgB,EAAE,CAAC;KACpB,CAAC;AACJ,CAAC;AAED,kEAAkE;AAClE,MAAM,UAAU,aAAa,CAAC,MAA4B;IACxD,OAAO,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,eAAe,CAAC;SAC1C,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AACjC,CAAC;AAED;;;;;;GAMG;AACH,MAAM,OAAgB,cAAc;IAYlC;;;;;OAKG;IACH,KAAK,CAAC,aAAa,CAAC,MAAc;QAChC,OAAO,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;IAC/B,CAAC;CAIF"}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Neural embedding classifier.
|
|
3
|
+
*
|
|
4
|
+
* Classifies prompts by computing cosine similarity between the prompt's
|
|
5
|
+
* embedding vector and pre-computed benchmark centroids. This gives a
|
|
6
|
+
* semantic understanding of "what kind of task" a prompt represents.
|
|
7
|
+
*
|
|
8
|
+
* Exposes both sync (`classify`) and async (`classifyAsync`) entry points.
|
|
9
|
+
* The sync path requires an embedding provider whose `supportsSync` is true;
|
|
10
|
+
* the async path works with any provider.
|
|
11
|
+
*/
|
|
12
|
+
import { BaseClassifier, ClassificationResult } from './base.js';
|
|
13
|
+
import { BaseEmbeddingProvider } from '../embeddings/base.js';
|
|
14
|
+
import { CentroidLoader } from '../centroids/loader.js';
|
|
15
|
+
/** Benchmark -> broad category mapping for display purposes. */
|
|
16
|
+
export declare const BENCHMARK_CATEGORIES: Record<string, [string, string]>;
|
|
17
|
+
/**
|
|
18
|
+
* Semantic classifier using embedding cosine similarity.
|
|
19
|
+
*
|
|
20
|
+
* Flow:
|
|
21
|
+
* 1. Embed the user prompt using the configured embedding provider
|
|
22
|
+
* 2. Compute cosine similarity against each benchmark centroid
|
|
23
|
+
* 3. Return similarity scores as the classification result
|
|
24
|
+
*
|
|
25
|
+
* Includes LRU caching for both embeddings and full classification results.
|
|
26
|
+
*/
|
|
27
|
+
export declare class EmbeddingClassifier extends BaseClassifier {
|
|
28
|
+
private _provider;
|
|
29
|
+
private _centroidLoader;
|
|
30
|
+
private _embeddingCache;
|
|
31
|
+
private _classificationCache;
|
|
32
|
+
constructor(embeddingProvider: BaseEmbeddingProvider, centroidLoader: CentroidLoader, opts?: {
|
|
33
|
+
embeddingCacheSize?: number;
|
|
34
|
+
classificationCacheSize?: number;
|
|
35
|
+
ttlSeconds?: number;
|
|
36
|
+
});
|
|
37
|
+
/**
|
|
38
|
+
* Synchronous classification. Requires the underlying provider to support
|
|
39
|
+
* `embed()` (`supportsSync === true`); otherwise the provider will throw.
|
|
40
|
+
*/
|
|
41
|
+
classify(prompt: string): ClassificationResult;
|
|
42
|
+
/**
|
|
43
|
+
* Asynchronous classification. Works with any embedding provider; sync
|
|
44
|
+
* providers route through their default async fallback in BaseEmbeddingProvider.
|
|
45
|
+
*/
|
|
46
|
+
classifyAsync(prompt: string): Promise<ClassificationResult>;
|
|
47
|
+
/** Try to return a cached classification result, stamped with fresh timing. */
|
|
48
|
+
private _readCache;
|
|
49
|
+
/** Score an embedding against the given centroids, cache the result, and return it. */
|
|
50
|
+
private _scoreAndCache;
|
|
51
|
+
private _getEmbeddingSync;
|
|
52
|
+
private _getEmbeddingAsync;
|
|
53
|
+
isReady(): boolean;
|
|
54
|
+
/**
|
|
55
|
+
* Embedding cache key. Includes the embedding model name and dimension so
|
|
56
|
+
* vectors from different models/dimensions never collide for the same prompt.
|
|
57
|
+
*/
|
|
58
|
+
private _embeddingCacheKey;
|
|
59
|
+
/**
|
|
60
|
+
* Classification cache key. Includes the embedding model name, dimension, and
|
|
61
|
+
* benchmark-set fingerprint (not just md5(prompt)) so cached classifications
|
|
62
|
+
* are invalidated when the model, dimension, or benchmark set changes. Must
|
|
63
|
+
* stay identical to the Python SDK's classification cache key.
|
|
64
|
+
*/
|
|
65
|
+
private _classificationCacheKey;
|
|
66
|
+
private static _hash;
|
|
67
|
+
}
|
|
68
|
+
//# sourceMappingURL=embedding.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embedding.d.ts","sourceRoot":"","sources":["../../src/classifiers/embedding.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAKH,OAAO,EAAE,cAAc,EAAE,oBAAoB,EAAE,MAAM,WAAW,CAAC;AAEjE,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,EAAE,cAAc,EAAwB,MAAM,wBAAwB,CAAC;AAE9E,gEAAgE;AAChE,eAAO,MAAM,oBAAoB,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAajE,CAAC;AAEF;;;;;;;;;GASG;AACH,qBAAa,mBAAoB,SAAQ,cAAc;IACrD,OAAO,CAAC,SAAS,CAAwB;IACzC,OAAO,CAAC,eAAe,CAAiB;IACxC,OAAO,CAAC,eAAe,CAAqB;IAC5C,OAAO,CAAC,oBAAoB,CAAiC;gBAG3D,iBAAiB,EAAE,qBAAqB,EACxC,cAAc,EAAE,cAAc,EAC9B,IAAI,CAAC,EAAE;QACL,kBAAkB,CAAC,EAAE,MAAM,CAAC;QAC5B,uBAAuB,CAAC,EAAE,MAAM,CAAC;QACjC,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB;IAgBH;;;OAGG;IACH,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,oBAAoB;IAgB9C;;;OAGG;IACG,aAAa,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC;IAelE,+EAA+E;IAC/E,OAAO,CAAC,UAAU;IAUlB,uFAAuF;IACvF,OAAO,CAAC,cAAc;IAwDtB,OAAO,CAAC,iBAAiB;YAUX,kBAAkB;IAUhC,OAAO,IAAI,OAAO;IAIlB;;;OAGG;IACH,OAAO,CAAC,kBAAkB;IAM1B;;;;;OAKG;IACH,OAAO,CAAC,uBAAuB;IAO/B,OAAO,CAAC,MAAM,CAAC,KAAK;CAGrB"}
|