tryaii-dre 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/LICENSE +190 -0
  2. package/README.md +234 -0
  3. package/dist/banner.d.ts +24 -0
  4. package/dist/banner.d.ts.map +1 -0
  5. package/dist/banner.js +125 -0
  6. package/dist/banner.js.map +1 -0
  7. package/dist/benchmarks/index.d.ts +4 -0
  8. package/dist/benchmarks/index.d.ts.map +1 -0
  9. package/dist/benchmarks/index.js +3 -0
  10. package/dist/benchmarks/index.js.map +1 -0
  11. package/dist/benchmarks/registry.d.ts +69 -0
  12. package/dist/benchmarks/registry.d.ts.map +1 -0
  13. package/dist/benchmarks/registry.js +128 -0
  14. package/dist/benchmarks/registry.js.map +1 -0
  15. package/dist/benchmarks/standard.d.ts +6 -0
  16. package/dist/benchmarks/standard.d.ts.map +1 -0
  17. package/dist/benchmarks/standard.js +115 -0
  18. package/dist/benchmarks/standard.js.map +1 -0
  19. package/dist/budget.d.ts +65 -0
  20. package/dist/budget.d.ts.map +1 -0
  21. package/dist/budget.js +344 -0
  22. package/dist/budget.js.map +1 -0
  23. package/dist/cache/index.d.ts +27 -0
  24. package/dist/cache/index.d.ts.map +1 -0
  25. package/dist/cache/index.js +63 -0
  26. package/dist/cache/index.js.map +1 -0
  27. package/dist/centroids/data/centroids_all-MiniLM-L6-v2.json +1 -0
  28. package/dist/centroids/data/trainingQueries.json +246 -0
  29. package/dist/centroids/generator.d.ts +63 -0
  30. package/dist/centroids/generator.d.ts.map +1 -0
  31. package/dist/centroids/generator.js +120 -0
  32. package/dist/centroids/generator.js.map +1 -0
  33. package/dist/centroids/index.d.ts +3 -0
  34. package/dist/centroids/index.d.ts.map +1 -0
  35. package/dist/centroids/index.js +3 -0
  36. package/dist/centroids/index.js.map +1 -0
  37. package/dist/centroids/loader.d.ts +87 -0
  38. package/dist/centroids/loader.d.ts.map +1 -0
  39. package/dist/centroids/loader.js +236 -0
  40. package/dist/centroids/loader.js.map +1 -0
  41. package/dist/classifiers/base.d.ts +56 -0
  42. package/dist/classifiers/base.d.ts.map +1 -0
  43. package/dist/classifiers/base.js +42 -0
  44. package/dist/classifiers/base.js.map +1 -0
  45. package/dist/classifiers/embedding.d.ts +68 -0
  46. package/dist/classifiers/embedding.d.ts.map +1 -0
  47. package/dist/classifiers/embedding.js +0 -0
  48. package/dist/classifiers/embedding.js.map +1 -0
  49. package/dist/classifiers/hybrid.d.ts +31 -0
  50. package/dist/classifiers/hybrid.d.ts.map +1 -0
  51. package/dist/classifiers/hybrid.js +61 -0
  52. package/dist/classifiers/hybrid.js.map +1 -0
  53. package/dist/classifiers/index.d.ts +4 -0
  54. package/dist/classifiers/index.d.ts.map +1 -0
  55. package/dist/classifiers/index.js +3 -0
  56. package/dist/classifiers/index.js.map +1 -0
  57. package/dist/classifiers/keyword.d.ts +29 -0
  58. package/dist/classifiers/keyword.d.ts.map +1 -0
  59. package/dist/classifiers/keyword.js +264 -0
  60. package/dist/classifiers/keyword.js.map +1 -0
  61. package/dist/cli.d.ts +15 -0
  62. package/dist/cli.d.ts.map +1 -0
  63. package/dist/cli.js +597 -0
  64. package/dist/cli.js.map +1 -0
  65. package/dist/client-types.d.ts +101 -0
  66. package/dist/client-types.d.ts.map +1 -0
  67. package/dist/client-types.js +5 -0
  68. package/dist/client-types.js.map +1 -0
  69. package/dist/client.d.ts +50 -0
  70. package/dist/client.d.ts.map +1 -0
  71. package/dist/client.js +279 -0
  72. package/dist/client.js.map +1 -0
  73. package/dist/config.d.ts +45 -0
  74. package/dist/config.d.ts.map +1 -0
  75. package/dist/config.js +37 -0
  76. package/dist/config.js.map +1 -0
  77. package/dist/dashboard/index.d.ts +48 -0
  78. package/dist/dashboard/index.d.ts.map +1 -0
  79. package/dist/dashboard/index.js +166 -0
  80. package/dist/dashboard/index.js.map +1 -0
  81. package/dist/embeddings/base.d.ts +66 -0
  82. package/dist/embeddings/base.d.ts.map +1 -0
  83. package/dist/embeddings/base.js +77 -0
  84. package/dist/embeddings/base.js.map +1 -0
  85. package/dist/embeddings/index.d.ts +3 -0
  86. package/dist/embeddings/index.d.ts.map +1 -0
  87. package/dist/embeddings/index.js +3 -0
  88. package/dist/embeddings/index.js.map +1 -0
  89. package/dist/embeddings/local.d.ts +42 -0
  90. package/dist/embeddings/local.d.ts.map +1 -0
  91. package/dist/embeddings/local.js +89 -0
  92. package/dist/embeddings/local.js.map +1 -0
  93. package/dist/index.d.ts +44 -0
  94. package/dist/index.d.ts.map +1 -0
  95. package/dist/index.js +45 -0
  96. package/dist/index.js.map +1 -0
  97. package/dist/integrations/index.d.ts +3 -0
  98. package/dist/integrations/index.d.ts.map +1 -0
  99. package/dist/integrations/index.js +2 -0
  100. package/dist/integrations/index.js.map +1 -0
  101. package/dist/integrations/openrouter.d.ts +84 -0
  102. package/dist/integrations/openrouter.d.ts.map +1 -0
  103. package/dist/integrations/openrouter.js +253 -0
  104. package/dist/integrations/openrouter.js.map +1 -0
  105. package/dist/registry/index.d.ts +2 -0
  106. package/dist/registry/index.d.ts.map +1 -0
  107. package/dist/registry/index.js +2 -0
  108. package/dist/registry/index.js.map +1 -0
  109. package/dist/registry/models.d.ts +76 -0
  110. package/dist/registry/models.d.ts.map +1 -0
  111. package/dist/registry/models.js +170 -0
  112. package/dist/registry/models.js.map +1 -0
  113. package/dist/registry/presets/defaultModels.json +435 -0
  114. package/dist/router.d.ts +178 -0
  115. package/dist/router.d.ts.map +1 -0
  116. package/dist/router.js +259 -0
  117. package/dist/router.js.map +1 -0
  118. package/dist/scoring/benchmarks.d.ts +35 -0
  119. package/dist/scoring/benchmarks.d.ts.map +1 -0
  120. package/dist/scoring/benchmarks.js +68 -0
  121. package/dist/scoring/benchmarks.js.map +1 -0
  122. package/dist/scoring/engine.d.ts +43 -0
  123. package/dist/scoring/engine.d.ts.map +1 -0
  124. package/dist/scoring/engine.js +267 -0
  125. package/dist/scoring/engine.js.map +1 -0
  126. package/dist/scoring/index.d.ts +6 -0
  127. package/dist/scoring/index.d.ts.map +1 -0
  128. package/dist/scoring/index.js +4 -0
  129. package/dist/scoring/index.js.map +1 -0
  130. package/dist/scoring/priorities.d.ts +41 -0
  131. package/dist/scoring/priorities.d.ts.map +1 -0
  132. package/dist/scoring/priorities.js +49 -0
  133. package/dist/scoring/priorities.js.map +1 -0
  134. package/dist/types.d.ts +47 -0
  135. package/dist/types.d.ts.map +1 -0
  136. package/dist/types.js +5 -0
  137. package/dist/types.js.map +1 -0
  138. package/dist/utils/cosine.d.ts +10 -0
  139. package/dist/utils/cosine.d.ts.map +1 -0
  140. package/dist/utils/cosine.js +18 -0
  141. package/dist/utils/cosine.js.map +1 -0
  142. package/dist/utils/math.d.ts +18 -0
  143. package/dist/utils/math.d.ts.map +1 -0
  144. package/dist/utils/math.js +54 -0
  145. package/dist/utils/math.js.map +1 -0
  146. package/package.json +65 -0
@@ -0,0 +1,120 @@
1
+ /**
2
+ * Centroid generator -- creates benchmark centroids from training queries.
3
+ *
4
+ * Centroids are the average embedding of all training queries for a benchmark.
5
+ * They are used by the EmbeddingClassifier to measure how similar a user's
6
+ * prompt is to each benchmark category.
7
+ *
8
+ * Centroids are regenerated when the embedding model changes, because different
9
+ * models produce different vector spaces.
10
+ */
11
+ import { readFileSync, writeFileSync, mkdirSync } from 'node:fs';
12
+ import { dirname, join } from 'node:path';
13
+ import { fileURLToPath } from 'node:url';
14
+ import { vectorMean, vectorNormalize } from '../utils/math.js';
15
+ const currentDir = dirname(fileURLToPath(import.meta.url));
16
+ /** Path to bundled training queries. */
17
+ export const TRAINING_QUERIES_PATH = join(currentDir, 'data', 'trainingQueries.json');
18
+ /**
19
+ * Generates and manages benchmark centroids.
20
+ *
21
+ * Centroids are the average embedding vector of representative queries
22
+ * for each benchmark. When a user sends a prompt, we compute cosine
23
+ * similarity between their prompt's embedding and each centroid to
24
+ * determine what kind of task they're asking about.
25
+ */
26
+ export class CentroidGenerator {
27
+ constructor(embeddingProvider) {
28
+ this._provider = embeddingProvider;
29
+ }
30
+ /**
31
+ * Generate centroids from training queries.
32
+ *
33
+ * @param trainingQueries - Dict of benchmark_name -> list of queries.
34
+ * If undefined, uses bundled default queries.
35
+ * @returns Dict of benchmark_name -> centroid vector (number array).
36
+ */
37
+ generate(trainingQueries) {
38
+ if (!trainingQueries) {
39
+ trainingQueries = this._loadDefaultQueries();
40
+ }
41
+ const centroids = {};
42
+ for (const [benchmark, queries] of Object.entries(trainingQueries)) {
43
+ // Embed all queries for this benchmark
44
+ const embeddings = this._provider.embedBatch(queries);
45
+ // Centroid = average of all embeddings, then normalize
46
+ const centroid = vectorNormalize(vectorMean(embeddings));
47
+ centroids[benchmark] = centroid;
48
+ }
49
+ return centroids;
50
+ }
51
+ /**
52
+ * Async version of `generate` -- routes through the provider's async path
53
+ * so it works with async-only providers like LocalEmbeddingProvider.
54
+ * Sync providers work too via the base class's default async fallback.
55
+ */
56
+ async generateAsync(trainingQueries) {
57
+ if (!trainingQueries) {
58
+ trainingQueries = this._loadDefaultQueries();
59
+ }
60
+ const centroids = {};
61
+ for (const [benchmark, queries] of Object.entries(trainingQueries)) {
62
+ const embeddings = await this._provider.embedBatchAsync(queries);
63
+ const centroid = vectorNormalize(vectorMean(embeddings));
64
+ centroids[benchmark] = centroid;
65
+ }
66
+ return centroids;
67
+ }
68
+ /**
69
+ * Generate a single centroid for a custom benchmark.
70
+ *
71
+ * @param benchmarkName - Name of the benchmark.
72
+ * @param queries - Representative queries for this benchmark.
73
+ * @returns Centroid vector (number array).
74
+ */
75
+ generateFromCustom(benchmarkName, queries) {
76
+ const embeddings = this._provider.embedBatch(queries);
77
+ return vectorNormalize(vectorMean(embeddings));
78
+ }
79
+ /**
80
+ * Async version of `generateFromCustom`. Works with any provider
81
+ * (sync via default fallback, async via its native async path).
82
+ */
83
+ async generateFromCustomAsync(benchmarkName, queries) {
84
+ const embeddings = await this._provider.embedBatchAsync(queries);
85
+ return vectorNormalize(vectorMean(embeddings));
86
+ }
87
+ /** Save centroids to a JSON file. */
88
+ save(centroids, path) {
89
+ mkdirSync(dirname(path), { recursive: true });
90
+ const data = {
91
+ metadata: {
92
+ model: this._provider.modelName,
93
+ dimension: this._provider.dimension,
94
+ benchmark_count: Object.keys(centroids).length,
95
+ },
96
+ centroids,
97
+ };
98
+ writeFileSync(path, JSON.stringify(data));
99
+ }
100
+ /** Load centroids from a JSON file. */
101
+ static load(path) {
102
+ const raw = readFileSync(path, 'utf-8');
103
+ const data = JSON.parse(raw);
104
+ return {
105
+ centroids: data.centroids,
106
+ metadata: data.metadata,
107
+ };
108
+ }
109
+ /** Load bundled training queries. */
110
+ _loadDefaultQueries() {
111
+ const raw = readFileSync(TRAINING_QUERIES_PATH, 'utf-8');
112
+ const data = JSON.parse(raw);
113
+ const queries = {};
114
+ for (const [name, benchData] of Object.entries(data.benchmarks)) {
115
+ queries[name] = benchData.queries;
116
+ }
117
+ return queries;
118
+ }
119
+ }
120
+ //# sourceMappingURL=generator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"generator.js","sourceRoot":"","sources":["../../src/centroids/generator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AACjE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAIzC,OAAO,EAAE,UAAU,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AAE/D,MAAM,UAAU,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAE3D,wCAAwC;AACxC,MAAM,CAAC,MAAM,qBAAqB,GAAG,IAAI,CAAC,UAAU,EAAE,MAAM,EAAE,sBAAsB,CAAC,CAAC;AAEtF;;;;;;;GAOG;AACH,MAAM,OAAO,iBAAiB;IAG5B,YAAY,iBAAwC;QAClD,IAAI,CAAC,SAAS,GAAG,iBAAiB,CAAC;IACrC,CAAC;IAED;;;;;;OAMG;IACH,QAAQ,CAAC,eAA0C;QACjD,IAAI,CAAC,eAAe,EAAE,CAAC;YACrB,eAAe,GAAG,IAAI,CAAC,mBAAmB,EAAE,CAAC;QAC/C,CAAC;QAED,MAAM,SAAS,GAA6B,EAAE,CAAC;QAE/C,KAAK,MAAM,CAAC,SAAS,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC,EAAE,CAAC;YACnE,uCAAuC;YACvC,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;YAEtD,uDAAuD;YACvD,MAAM,QAAQ,GAAG,eAAe,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC,CAAC;YACzD,SAAS,CAAC,SAAS,CAAC,GAAG,QAAQ,CAAC;QAClC,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,aAAa,CAAC,eAA0C;QAC5D,IAAI,CAAC,eAAe,EAAE,CAAC;YACrB,eAAe,GAAG,IAAI,CAAC,mBAAmB,EAAE,CAAC;QAC/C,CAAC;QAED,MAAM,SAAS,GAA6B,EAAE,CAAC;QAE/C,KAAK,MAAM,CAAC,SAAS,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC,EAAE,CAAC;YACnE,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;YACjE,MAAM,QAAQ,GAAG,eAAe,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC,CAAC;YACzD,SAAS,CAAC,SAAS,CAAC,GAAG,QAAQ,CAAC;QAClC,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;;OAMG;IACH,kBAAkB,CAAC,aAAqB,EAAE,OAAiB;QACzD,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;QACtD,OAAO,eAAe,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC,CAAC;IACjD,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,uBAAuB,CAAC,aAAqB,EAAE,OAAiB;QACpE,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;QACjE,OAAO,eAAe,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC,CAAC;IACjD,CAAC;IAED,qCAAqC;IACrC,IAAI,CAAC,SAAmC,EAAE,IAAY;QACpD,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAE9C,MAAM,IAAI,GAAkB;YAC1B,QAAQ,EAAE;gBACR,KAAK,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS;gBAC/B,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS;gBACnC,eAAe,EAAE,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,MAAM;aAC/C;YACD,SAAS;SACV,CAAC;QAEF,aAAa,CAAC,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC;IAC5C,CAAC;IAED,uCAAuC;IACvC,MAAM,CAAC,IAAI,CAAC,IAAY;QACtB,MAAM,GAAG,GAAG,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACxC,MAAM,IAAI,GAAkB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAE5C,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,QAAQ,EAAE,IAAI,CAAC,QAAQ;SACxB,CAAC;IACJ,CAAC;IAED,qCAAqC;IAC7B,mBAAmB;QACzB,MAAM,GAAG,GAAG,YAAY,CAAC,qBAAqB,EAAE,OAAO,CAAC,CAAC;QACzD,MAAM,IAAI,GAAwB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAElD,MAAM,OAAO,GAA6B,EAAE,CAAC;QAC7C,KAAK,MAAM,CAAC,IAAI,EAAE,SAAS,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;YAChE,OAAO,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,OAAO,CAAC;QACpC,CAAC;QACD,OAAO,OAAO,CAAC;IACjB,CAAC;CACF"}
@@ -0,0 +1,3 @@
1
+ export { CentroidGenerator, TRAINING_QUERIES_PATH } from './generator.js';
2
+ export { CentroidLoader } from './loader.js';
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/centroids/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AAC1E,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC"}
@@ -0,0 +1,3 @@
1
+ export { CentroidGenerator, TRAINING_QUERIES_PATH } from './generator.js';
2
+ export { CentroidLoader } from './loader.js';
3
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/centroids/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AAC1E,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC"}
@@ -0,0 +1,87 @@
1
+ /**
2
+ * Centroid loader -- handles lazy initialization and model compatibility.
3
+ *
4
+ * Loading priority:
5
+ * 1. In-memory cache (already loaded)
6
+ * 2. User's cache directory (previously generated for their model)
7
+ * 3. Bundled static file (ships with package for default model -- zero delay)
8
+ * 4. Generate from training queries (only if using a non-default model)
9
+ */
10
+ import { BaseEmbeddingProvider } from '../embeddings/base.js';
11
+ /**
12
+ * Stable fingerprint of the benchmark set present in a centroid file.
13
+ *
14
+ * Defined as the sorted benchmark names joined by "|". Used to detect when a
15
+ * cached/bundled centroid file was generated against a different benchmark set
16
+ * and must be regenerated. Must stay identical to the Python SDK's
17
+ * `benchmark_fingerprint` (centroids/generator.py).
18
+ */
19
+ export declare function benchmarkFingerprint(benchmarkNames: Iterable<string>): string;
20
+ /**
21
+ * Manages centroid lifecycle: load, validate, regenerate.
22
+ *
23
+ * For the default embedding model (all-MiniLM-L6-v2), centroids are
24
+ * bundled with the package -- zero first-run delay. For other models,
25
+ * centroids are generated on first use and cached to disk.
26
+ */
27
+ export declare class CentroidLoader {
28
+ private _provider;
29
+ private _centroids;
30
+ private _generator;
31
+ private _userCachePath;
32
+ constructor(embeddingProvider: BaseEmbeddingProvider, userCachePath?: string);
33
+ /**
34
+ * Get centroids, loading from best available source.
35
+ *
36
+ * Priority: memory > user cache > bundled static > generate fresh.
37
+ *
38
+ * Synchronous path -- if regeneration is needed (non-default model,
39
+ * first run) and the provider is async-only, this will throw. Async
40
+ * callers should use `getCentroidsAsync()` instead.
41
+ */
42
+ getCentroids(): Record<string, number[]>;
43
+ /**
44
+ * Async version of `getCentroids`. Works with any embedding provider --
45
+ * if centroid regeneration is needed, routes through the provider's
46
+ * async path.
47
+ */
48
+ getCentroidsAsync(): Promise<Record<string, number[]>>;
49
+ /**
50
+ * Try memory / user cache / bundled centroid file. Returns null if none
51
+ * of these sources have a valid file for the current provider's model.
52
+ * Shared between the sync and async load paths.
53
+ */
54
+ private _tryLoadFromFiles;
55
+ private _tryLoad;
56
+ private _regenerate;
57
+ private _regenerateAsync;
58
+ /**
59
+ * Force regeneration of centroids.
60
+ *
61
+ * @param customQueries - Optional custom training queries. If undefined, uses defaults.
62
+ */
63
+ regenerate(customQueries?: Record<string, string[]>): Record<string, number[]>;
64
+ /**
65
+ * Add a custom benchmark centroid to the existing set (sync).
66
+ *
67
+ * Requires a sync-capable embedding provider. Callers with async-only
68
+ * providers (e.g. LocalEmbeddingProvider) should use
69
+ * `addBenchmarkCentroidAsync()`.
70
+ *
71
+ * @param benchmarkName - Name of the new benchmark.
72
+ * @param queries - Representative queries for this benchmark.
73
+ * @returns The generated centroid vector.
74
+ */
75
+ addBenchmarkCentroid(benchmarkName: string, queries: string[]): number[];
76
+ /**
77
+ * Async version of `addBenchmarkCentroid`. Works with any provider.
78
+ * Mutates the in-memory centroid map so subsequent `getCentroids()` calls
79
+ * (including from classifiers sharing this loader) see the new benchmark.
80
+ */
81
+ addBenchmarkCentroidAsync(benchmarkName: string, queries: string[]): Promise<number[]>;
82
+ /** Remove a benchmark centroid. Returns true if it existed. */
83
+ removeBenchmark(benchmarkName: string): boolean;
84
+ /** List all available benchmark names. */
85
+ get availableBenchmarks(): string[];
86
+ }
87
+ //# sourceMappingURL=loader.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"loader.d.ts","sourceRoot":"","sources":["../../src/centroids/loader.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAMH,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAc9D;;;;;;;GAOG;AACH,wBAAgB,oBAAoB,CAAC,cAAc,EAAE,QAAQ,CAAC,MAAM,CAAC,GAAG,MAAM,CAE7E;AA6BD;;;;;;GAMG;AACH,qBAAa,cAAc;IACzB,OAAO,CAAC,SAAS,CAAwB;IACzC,OAAO,CAAC,UAAU,CAAyC;IAC3D,OAAO,CAAC,UAAU,CAAoB;IACtC,OAAO,CAAC,cAAc,CAAgB;gBAGpC,iBAAiB,EAAE,qBAAqB,EACxC,aAAa,CAAC,EAAE,MAAM;IAOxB;;;;;;;;OAQG;IACH,YAAY,IAAI,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC;IAMxC;;;;OAIG;IACG,iBAAiB,IAAI,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;IAM5D;;;;OAIG;IACH,OAAO,CAAC,iBAAiB;IAuBzB,OAAO,CAAC,QAAQ;IAoChB,OAAO,CAAC,WAAW;YAYL,gBAAgB;IAW9B;;;;OAIG;IACH,UAAU,CAAC,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC;IAW9E;;;;;;;;;;OAUG;IACH,oBAAoB,CAAC,aAAa,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE;IAaxE;;;;OAIG;IACG,yBAAyB,CAAC,aAAa,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAY5F,+DAA+D;IAC/D,eAAe,CAAC,aAAa,EAAE,MAAM,GAAG,OAAO;IAY/C,0CAA0C;IAC1C,IAAI,mBAAmB,IAAI,MAAM,EAAE,CAElC;CACF"}
@@ -0,0 +1,236 @@
1
+ /**
2
+ * Centroid loader -- handles lazy initialization and model compatibility.
3
+ *
4
+ * Loading priority:
5
+ * 1. In-memory cache (already loaded)
6
+ * 2. User's cache directory (previously generated for their model)
7
+ * 3. Bundled static file (ships with package for default model -- zero delay)
8
+ * 4. Generate from training queries (only if using a non-default model)
9
+ */
10
+ import { existsSync, readFileSync } from 'node:fs';
11
+ import { dirname, join } from 'node:path';
12
+ import { fileURLToPath } from 'node:url';
13
+ import { CentroidGenerator, TRAINING_QUERIES_PATH } from './generator.js';
14
+ const currentDir = dirname(fileURLToPath(import.meta.url));
15
+ /** Path to bundled centroids (ships with the package). */
16
+ const BUNDLED_CENTROIDS_DIR = join(currentDir, 'data');
17
+ /** Get path to the bundled centroid file for a given model. */
18
+ function bundledCentroidPath(modelName) {
19
+ const safeName = modelName.replace(/\//g, '__');
20
+ return join(BUNDLED_CENTROIDS_DIR, `centroids_${safeName}.json`);
21
+ }
22
+ /**
23
+ * Stable fingerprint of the benchmark set present in a centroid file.
24
+ *
25
+ * Defined as the sorted benchmark names joined by "|". Used to detect when a
26
+ * cached/bundled centroid file was generated against a different benchmark set
27
+ * and must be regenerated. Must stay identical to the Python SDK's
28
+ * `benchmark_fingerprint` (centroids/generator.py).
29
+ */
30
+ export function benchmarkFingerprint(benchmarkNames) {
31
+ return [...benchmarkNames].sort().join('|');
32
+ }
33
+ /**
34
+ * Fingerprint of the bundled default benchmark set, derived from the shipped
35
+ * training queries. A centroid file whose benchmark set doesn't match this was
36
+ * built against a different benchmark set and must be regenerated. Mirrors the
37
+ * Python SDK's `CentroidGenerator.default_benchmark_fingerprint()`.
38
+ */
39
+ function defaultBenchmarkFingerprint() {
40
+ const raw = readFileSync(TRAINING_QUERIES_PATH, 'utf-8');
41
+ const data = JSON.parse(raw);
42
+ return benchmarkFingerprint(Object.keys(data.benchmarks));
43
+ }
44
+ /**
45
+ * Whether the provider has reported a real embedding dimension yet.
46
+ *
47
+ * The default `LocalEmbeddingProvider` returns a hardcoded fallback (384)
48
+ * before its model is initialized, so comparing against it would wrongly
49
+ * reject/accept centroid files for non-default-dimension models. We detect the
50
+ * uninitialized state via its private `_dimension` marker; providers that
51
+ * always report a real dimension are treated as known.
52
+ */
53
+ function providerDimensionKnown(provider) {
54
+ const dim = provider._dimension;
55
+ // Providers without the marker (custom providers) report a real dimension.
56
+ return dim === undefined ? true : dim !== null;
57
+ }
58
+ /**
59
+ * Manages centroid lifecycle: load, validate, regenerate.
60
+ *
61
+ * For the default embedding model (all-MiniLM-L6-v2), centroids are
62
+ * bundled with the package -- zero first-run delay. For other models,
63
+ * centroids are generated on first use and cached to disk.
64
+ */
65
+ export class CentroidLoader {
66
+ constructor(embeddingProvider, userCachePath) {
67
+ this._centroids = null;
68
+ this._provider = embeddingProvider;
69
+ this._generator = new CentroidGenerator(embeddingProvider);
70
+ this._userCachePath = userCachePath ?? null;
71
+ }
72
+ /**
73
+ * Get centroids, loading from best available source.
74
+ *
75
+ * Priority: memory > user cache > bundled static > generate fresh.
76
+ *
77
+ * Synchronous path -- if regeneration is needed (non-default model,
78
+ * first run) and the provider is async-only, this will throw. Async
79
+ * callers should use `getCentroidsAsync()` instead.
80
+ */
81
+ getCentroids() {
82
+ const cached = this._tryLoadFromFiles();
83
+ if (cached !== null)
84
+ return cached;
85
+ return this._regenerate();
86
+ }
87
+ /**
88
+ * Async version of `getCentroids`. Works with any embedding provider --
89
+ * if centroid regeneration is needed, routes through the provider's
90
+ * async path.
91
+ */
92
+ async getCentroidsAsync() {
93
+ const cached = this._tryLoadFromFiles();
94
+ if (cached !== null)
95
+ return cached;
96
+ return this._regenerateAsync();
97
+ }
98
+ /**
99
+ * Try memory / user cache / bundled centroid file. Returns null if none
100
+ * of these sources have a valid file for the current provider's model.
101
+ * Shared between the sync and async load paths.
102
+ */
103
+ _tryLoadFromFiles() {
104
+ if (this._centroids !== null)
105
+ return this._centroids;
106
+ // 1. Try user's cached centroids
107
+ if (this._userCachePath) {
108
+ const loaded = this._tryLoad(this._userCachePath);
109
+ if (loaded !== null) {
110
+ this._centroids = loaded;
111
+ return this._centroids;
112
+ }
113
+ }
114
+ // 2. Try bundled static centroids (ships with package)
115
+ const bundledPath = bundledCentroidPath(this._provider.modelName);
116
+ const loaded = this._tryLoad(bundledPath);
117
+ if (loaded !== null) {
118
+ this._centroids = loaded;
119
+ return this._centroids;
120
+ }
121
+ return null;
122
+ }
123
+ _tryLoad(path) {
124
+ if (!existsSync(path))
125
+ return null;
126
+ try {
127
+ const { centroids, metadata } = CentroidGenerator.load(path);
128
+ const savedModel = metadata.model ?? '';
129
+ const savedDim = metadata.dimension ?? 0;
130
+ // Model name must always match.
131
+ if (savedModel !== this._provider.modelName) {
132
+ return null;
133
+ }
134
+ // Dimension check: skip while the provider hasn't reported a real
135
+ // dimension (it may still be returning a hardcoded fallback). Otherwise
136
+ // a non-default-dimension file would be wrongly rejected/accepted.
137
+ if (providerDimensionKnown(this._provider) && savedDim !== this._provider.dimension) {
138
+ return null;
139
+ }
140
+ // Benchmark-set check: fingerprint the benchmarks actually present in the
141
+ // file and compare against the expected default set. Computing from the
142
+ // file's own keys (rather than a stored metadata value that older/bundled
143
+ // files lack) lets pre-fingerprint bundled files load while still
144
+ // regenerating a file built against a different benchmark set.
145
+ if (benchmarkFingerprint(Object.keys(centroids)) !== defaultBenchmarkFingerprint()) {
146
+ return null;
147
+ }
148
+ return centroids;
149
+ }
150
+ catch {
151
+ return null;
152
+ }
153
+ }
154
+ _regenerate() {
155
+ const centroids = this._generator.generate();
156
+ // Save to user cache for future runs
157
+ if (this._userCachePath) {
158
+ this._generator.save(centroids, this._userCachePath);
159
+ }
160
+ this._centroids = centroids;
161
+ return centroids;
162
+ }
163
+ async _regenerateAsync() {
164
+ const centroids = await this._generator.generateAsync();
165
+ if (this._userCachePath) {
166
+ this._generator.save(centroids, this._userCachePath);
167
+ }
168
+ this._centroids = centroids;
169
+ return centroids;
170
+ }
171
+ /**
172
+ * Force regeneration of centroids.
173
+ *
174
+ * @param customQueries - Optional custom training queries. If undefined, uses defaults.
175
+ */
176
+ regenerate(customQueries) {
177
+ const centroids = this._generator.generate(customQueries);
178
+ if (this._userCachePath) {
179
+ this._generator.save(centroids, this._userCachePath);
180
+ }
181
+ this._centroids = centroids;
182
+ return centroids;
183
+ }
184
+ /**
185
+ * Add a custom benchmark centroid to the existing set (sync).
186
+ *
187
+ * Requires a sync-capable embedding provider. Callers with async-only
188
+ * providers (e.g. LocalEmbeddingProvider) should use
189
+ * `addBenchmarkCentroidAsync()`.
190
+ *
191
+ * @param benchmarkName - Name of the new benchmark.
192
+ * @param queries - Representative queries for this benchmark.
193
+ * @returns The generated centroid vector.
194
+ */
195
+ addBenchmarkCentroid(benchmarkName, queries) {
196
+ const centroids = this.getCentroids();
197
+ const newCentroid = this._generator.generateFromCustom(benchmarkName, queries);
198
+ centroids[benchmarkName] = newCentroid;
199
+ // Save updated centroids to user cache
200
+ if (this._userCachePath) {
201
+ this._generator.save(centroids, this._userCachePath);
202
+ }
203
+ return newCentroid;
204
+ }
205
+ /**
206
+ * Async version of `addBenchmarkCentroid`. Works with any provider.
207
+ * Mutates the in-memory centroid map so subsequent `getCentroids()` calls
208
+ * (including from classifiers sharing this loader) see the new benchmark.
209
+ */
210
+ async addBenchmarkCentroidAsync(benchmarkName, queries) {
211
+ const centroids = await this.getCentroidsAsync();
212
+ const newCentroid = await this._generator.generateFromCustomAsync(benchmarkName, queries);
213
+ centroids[benchmarkName] = newCentroid;
214
+ if (this._userCachePath) {
215
+ this._generator.save(centroids, this._userCachePath);
216
+ }
217
+ return newCentroid;
218
+ }
219
+ /** Remove a benchmark centroid. Returns true if it existed. */
220
+ removeBenchmark(benchmarkName) {
221
+ const centroids = this.getCentroids();
222
+ if (benchmarkName in centroids) {
223
+ delete centroids[benchmarkName];
224
+ if (this._userCachePath) {
225
+ this._generator.save(centroids, this._userCachePath);
226
+ }
227
+ return true;
228
+ }
229
+ return false;
230
+ }
231
+ /** List all available benchmark names. */
232
+ get availableBenchmarks() {
233
+ return Object.keys(this.getCentroids());
234
+ }
235
+ }
236
+ //# sourceMappingURL=loader.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"loader.js","sourceRoot":"","sources":["../../src/centroids/loader.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACnD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAGzC,OAAO,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AAE1E,MAAM,UAAU,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAE3D,0DAA0D;AAC1D,MAAM,qBAAqB,GAAG,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;AAEvD,+DAA+D;AAC/D,SAAS,mBAAmB,CAAC,SAAiB;IAC5C,MAAM,QAAQ,GAAG,SAAS,CAAC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;IAChD,OAAO,IAAI,CAAC,qBAAqB,EAAE,aAAa,QAAQ,OAAO,CAAC,CAAC;AACnE,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,oBAAoB,CAAC,cAAgC;IACnE,OAAO,CAAC,GAAG,cAAc,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC9C,CAAC;AAED;;;;;GAKG;AACH,SAAS,2BAA2B;IAClC,MAAM,GAAG,GAAG,YAAY,CAAC,qBAAqB,EAAE,OAAO,CAAC,CAAC;IACzD,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAA4C,CAAC;IACxE,OAAO,oBAAoB,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;AAC5D,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,sBAAsB,CAAC,QAA+B;IAC7D,MAAM,GAAG,GAAI,QAAsD,CAAC,UAAU,CAAC;IAC/E,2EAA2E;IAC3E,OAAO,GAAG,KAAK,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,KAAK,IAAI,CAAC;AACjD,CAAC;AAED;;;;;;GAMG;AACH,MAAM,OAAO,cAAc;IAMzB,YACE,iBAAwC,EACxC,aAAsB;QANhB,eAAU,GAAoC,IAAI,CAAC;QAQzD,IAAI,CAAC,SAAS,GAAG,iBAAiB,CAAC;QACnC,IAAI,CAAC,UAAU,GAAG,IAAI,iBAAiB,CAAC,iBAAiB,CAAC,CAAC;QAC3D,IAAI,CAAC,cAAc,GAAG,aAAa,IAAI,IAAI,CAAC;IAC9C,CAAC;IAED;;;;;;;;OAQG;IACH,YAAY;QACV,MAAM,MAAM,GAAG,IAAI,CAAC,iBAAiB,EAAE,CAAC;QACxC,IAAI,MAAM,KAAK,IAAI;YAAE,OAAO,MAAM,CAAC;QACnC,OAAO,IAAI,CAAC,WAAW,EAAE,CAAC;IAC5B,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,iBAAiB;QACrB,MAAM,MAAM,GAAG,IAAI,CAAC,iBAAiB,EAAE,CAAC;QACxC,IAAI,MAAM,KAAK,IAAI;YAAE,OAAO,MAAM,CAAC;QACnC,OAAO,IAAI,CAAC,gBAAgB,EAAE,CAAC;IACjC,CAAC;IAED;;;;OAIG;IACK,iBAAiB;QACvB,IAAI,IAAI,CAAC,UAAU,KAAK,IAAI;YAAE,OAAO,IAAI,CAAC,UAAU,CAAC;QAErD,iCAAiC;QACjC,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACxB,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;YAClD,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;gBACpB,IAAI,CAAC,UAAU,GAAG,MAAM,CAAC;gBACzB,OAAO,IAAI,CAAC,UAAU,CAAC;YACzB,CAAC;QACH,CAAC;QAED,uDAAuD;QACvD,MAAM,WAAW,GAAG,mBAAmB,CAAC,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;QAClE,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;QAC1C,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;YACpB,IAAI,CAAC,UAAU,GAAG,MAAM,CAAC;YACzB,OAAO,IAAI,CAAC,UAAU,CAAC;QACzB,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAEO,QAAQ,CAAC,IAAY;QAC3B,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC;YAAE,OAAO,IAAI,CAAC;QAEnC,IAAI,CAAC;YACH,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,GAAG,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAE7D,MAAM,UAAU,GAAG,QAAQ,CAAC,KAAK,IAAI,EAAE,CAAC;YACxC,MAAM,QAAQ,GAAG,QAAQ,CAAC,SAAS,IAAI,CAAC,CAAC;YAEzC,gCAAgC;YAChC,IAAI,UAAU,KAAK,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,CAAC;gBAC5C,OAAO,IAAI,CAAC;YACd,CAAC;YAED,kEAAkE;YAClE,wEAAwE;YACxE,mEAAmE;YACnE,IAAI,sBAAsB,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,QAAQ,KAAK,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,CAAC;gBACpF,OAAO,IAAI,CAAC;YACd,CAAC;YAED,0EAA0E;YAC1E,wEAAwE;YACxE,0EAA0E;YAC1E,kEAAkE;YAClE,+DAA+D;YAC/D,IAAI,oBAAoB,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,KAAK,2BAA2B,EAAE,EAAE,CAAC;gBACnF,OAAO,IAAI,CAAC;YACd,CAAC;YAED,OAAO,SAAS,CAAC;QACnB,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAEO,WAAW;QACjB,MAAM,SAAS,GAAG,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,CAAC;QAE7C,qCAAqC;QACrC,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACxB,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;QACvD,CAAC;QAED,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,OAAO,SAAS,CAAC;IACnB,CAAC;IAEO,KAAK,CAAC,gBAAgB;QAC5B,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,aAAa,EAAE,CAAC;QAExD,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACxB,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;QACvD,CAAC;QAED,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;OAIG;IACH,UAAU,CAAC,aAAwC;QACjD,MAAM,SAAS,GAAG,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAE1D,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACxB,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;QACvD,CAAC;QAED,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;;;;;;OAUG;IACH,oBAAoB,CAAC,aAAqB,EAAE,OAAiB;QAC3D,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QACtC,MAAM,WAAW,GAAG,IAAI,CAAC,UAAU,CAAC,kBAAkB,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;QAC/E,SAAS,CAAC,aAAa,CAAC,GAAG,WAAW,CAAC;QAEvC,uCAAuC;QACvC,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACxB,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;QACvD,CAAC;QAED,OAAO,WAAW,CAAC;IACrB,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,yBAAyB,CAAC,aAAqB,EAAE,OAAiB;QACtE,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QACjD,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,uBAAuB,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;QAC1F,SAAS,CAAC,aAAa,CAAC,GAAG,WAAW,CAAC;QAEvC,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACxB,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;QACvD,CAAC;QAED,OAAO,WAAW,CAAC;IACrB,CAAC;IAED,+DAA+D;IAC/D,eAAe,CAAC,aAAqB;QACnC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QACtC,IAAI,aAAa,IAAI,SAAS,EAAE,CAAC;YAC/B,OAAO,SAAS,CAAC,aAAa,CAAC,CAAC;YAChC,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;gBACxB,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;YACvD,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,0CAA0C;IAC1C,IAAI,mBAAmB;QACrB,OAAO,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC,CAAC;IAC1C,CAAC;CACF"}
@@ -0,0 +1,56 @@
1
+ /**
2
+ * Abstract base classifier.
3
+ *
4
+ * The embedding classifier implements this interface. The abstraction is
5
+ * kept so custom classifiers can be plugged in for tests or research.
6
+ */
7
+ /** Output of any classifier. */
8
+ export interface ClassificationResult {
9
+ /** Cosine similarity to each benchmark centroid (0-1). */
10
+ benchmarkScores: Record<string, number>;
11
+ /** Broad category (for display / debugging). */
12
+ broadCategory: string;
13
+ /** Subcategory refinement. */
14
+ subcategory: string;
15
+ /** Confidence of the classification (0-1). */
16
+ confidence: number;
17
+ /** Which classifier produced this result (always "embedding" in the current system). */
18
+ classifierUsed: string;
19
+ /** Whether this result came from cache. */
20
+ cacheHit: boolean;
21
+ /** How long classification took in milliseconds. */
22
+ processingTimeMs: number;
23
+ }
24
+ /** Create a default (empty) ClassificationResult. */
25
+ export declare function emptyClassificationResult(): ClassificationResult;
26
+ /** Get top benchmarks sorted by similarity score (descending). */
27
+ export declare function topBenchmarks(result: ClassificationResult): Array<[string, number]>;
28
+ /**
29
+ * Abstract base class for prompt classifiers.
30
+ *
31
+ * A classifier takes a user prompt and returns benchmark similarity scores.
32
+ * These scores tell us "what kind of task is this?" in terms of which
33
+ * AI benchmarks it most resembles.
34
+ */
35
+ export declare abstract class BaseClassifier {
36
+ /**
37
+ * Classify a prompt synchronously.
38
+ *
39
+ * Implementations whose embedding backend is async-only should throw
40
+ * a clear error here -- callers should use `classifyAsync` instead.
41
+ *
42
+ * @param prompt - The user's input text.
43
+ * @returns ClassificationResult with benchmarkScores populated.
44
+ */
45
+ abstract classify(prompt: string): ClassificationResult;
46
+ /**
47
+ * Classify a prompt asynchronously.
48
+ *
49
+ * Default implementation wraps `classify()` so sync classifiers work
50
+ * out of the box on the async path. Async-only classifiers must override.
51
+ */
52
+ classifyAsync(prompt: string): Promise<ClassificationResult>;
53
+ /** Check if the classifier is initialized and ready to use. */
54
+ abstract isReady(): boolean;
55
+ }
56
+ //# sourceMappingURL=base.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"base.d.ts","sourceRoot":"","sources":["../../src/classifiers/base.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,gCAAgC;AAChC,MAAM,WAAW,oBAAoB;IACnC,0DAA0D;IAC1D,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAExC,gDAAgD;IAChD,aAAa,EAAE,MAAM,CAAC;IAEtB,8BAA8B;IAC9B,WAAW,EAAE,MAAM,CAAC;IAEpB,8CAA8C;IAC9C,UAAU,EAAE,MAAM,CAAC;IAEnB,wFAAwF;IACxF,cAAc,EAAE,MAAM,CAAC;IAEvB,2CAA2C;IAC3C,QAAQ,EAAE,OAAO,CAAC;IAElB,oDAAoD;IACpD,gBAAgB,EAAE,MAAM,CAAC;CAC1B;AAED,qDAAqD;AACrD,wBAAgB,yBAAyB,IAAI,oBAAoB,CAUhE;AAED,kEAAkE;AAClE,wBAAgB,aAAa,CAAC,MAAM,EAAE,oBAAoB,GAAG,KAAK,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAGnF;AAED;;;;;;GAMG;AACH,8BAAsB,cAAc;IAClC;;;;;;;;OAQG;IACH,QAAQ,CAAC,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,oBAAoB;IAEvD;;;;;OAKG;IACG,aAAa,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC;IAIlE,+DAA+D;IAC/D,QAAQ,CAAC,OAAO,IAAI,OAAO;CAC5B"}
@@ -0,0 +1,42 @@
1
+ /**
2
+ * Abstract base classifier.
3
+ *
4
+ * The embedding classifier implements this interface. The abstraction is
5
+ * kept so custom classifiers can be plugged in for tests or research.
6
+ */
7
+ /** Create a default (empty) ClassificationResult. */
8
+ export function emptyClassificationResult() {
9
+ return {
10
+ benchmarkScores: {},
11
+ broadCategory: '',
12
+ subcategory: '',
13
+ confidence: 0,
14
+ classifierUsed: '',
15
+ cacheHit: false,
16
+ processingTimeMs: 0,
17
+ };
18
+ }
19
+ /** Get top benchmarks sorted by similarity score (descending). */
20
+ export function topBenchmarks(result) {
21
+ return Object.entries(result.benchmarkScores)
22
+ .sort((a, b) => b[1] - a[1]);
23
+ }
24
+ /**
25
+ * Abstract base class for prompt classifiers.
26
+ *
27
+ * A classifier takes a user prompt and returns benchmark similarity scores.
28
+ * These scores tell us "what kind of task is this?" in terms of which
29
+ * AI benchmarks it most resembles.
30
+ */
31
+ export class BaseClassifier {
32
+ /**
33
+ * Classify a prompt asynchronously.
34
+ *
35
+ * Default implementation wraps `classify()` so sync classifiers work
36
+ * out of the box on the async path. Async-only classifiers must override.
37
+ */
38
+ async classifyAsync(prompt) {
39
+ return this.classify(prompt);
40
+ }
41
+ }
42
+ //# sourceMappingURL=base.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"base.js","sourceRoot":"","sources":["../../src/classifiers/base.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AA0BH,qDAAqD;AACrD,MAAM,UAAU,yBAAyB;IACvC,OAAO;QACL,eAAe,EAAE,EAAE;QACnB,aAAa,EAAE,EAAE;QACjB,WAAW,EAAE,EAAE;QACf,UAAU,EAAE,CAAC;QACb,cAAc,EAAE,EAAE;QAClB,QAAQ,EAAE,KAAK;QACf,gBAAgB,EAAE,CAAC;KACpB,CAAC;AACJ,CAAC;AAED,kEAAkE;AAClE,MAAM,UAAU,aAAa,CAAC,MAA4B;IACxD,OAAO,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,eAAe,CAAC;SAC1C,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AACjC,CAAC;AAED;;;;;;GAMG;AACH,MAAM,OAAgB,cAAc;IAYlC;;;;;OAKG;IACH,KAAK,CAAC,aAAa,CAAC,MAAc;QAChC,OAAO,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;IAC/B,CAAC;CAIF"}
@@ -0,0 +1,68 @@
1
+ /**
2
+ * Neural embedding classifier.
3
+ *
4
+ * Classifies prompts by computing cosine similarity between the prompt's
5
+ * embedding vector and pre-computed benchmark centroids. This gives a
6
+ * semantic understanding of "what kind of task" a prompt represents.
7
+ *
8
+ * Exposes both sync (`classify`) and async (`classifyAsync`) entry points.
9
+ * The sync path requires an embedding provider whose `supportsSync` is true;
10
+ * the async path works with any provider.
11
+ */
12
+ import { BaseClassifier, ClassificationResult } from './base.js';
13
+ import { BaseEmbeddingProvider } from '../embeddings/base.js';
14
+ import { CentroidLoader } from '../centroids/loader.js';
15
+ /** Benchmark -> broad category mapping for display purposes. */
16
+ export declare const BENCHMARK_CATEGORIES: Record<string, [string, string]>;
17
+ /**
18
+ * Semantic classifier using embedding cosine similarity.
19
+ *
20
+ * Flow:
21
+ * 1. Embed the user prompt using the configured embedding provider
22
+ * 2. Compute cosine similarity against each benchmark centroid
23
+ * 3. Return similarity scores as the classification result
24
+ *
25
+ * Includes LRU caching for both embeddings and full classification results.
26
+ */
27
+ export declare class EmbeddingClassifier extends BaseClassifier {
28
+ private _provider;
29
+ private _centroidLoader;
30
+ private _embeddingCache;
31
+ private _classificationCache;
32
+ constructor(embeddingProvider: BaseEmbeddingProvider, centroidLoader: CentroidLoader, opts?: {
33
+ embeddingCacheSize?: number;
34
+ classificationCacheSize?: number;
35
+ ttlSeconds?: number;
36
+ });
37
+ /**
38
+ * Synchronous classification. Requires the underlying provider to support
39
+ * `embed()` (`supportsSync === true`); otherwise the provider will throw.
40
+ */
41
+ classify(prompt: string): ClassificationResult;
42
+ /**
43
+ * Asynchronous classification. Works with any embedding provider; sync
44
+ * providers route through their default async fallback in BaseEmbeddingProvider.
45
+ */
46
+ classifyAsync(prompt: string): Promise<ClassificationResult>;
47
+ /** Try to return a cached classification result, stamped with fresh timing. */
48
+ private _readCache;
49
+ /** Score an embedding against the given centroids, cache the result, and return it. */
50
+ private _scoreAndCache;
51
+ private _getEmbeddingSync;
52
+ private _getEmbeddingAsync;
53
+ isReady(): boolean;
54
+ /**
55
+ * Embedding cache key. Includes the embedding model name and dimension so
56
+ * vectors from different models/dimensions never collide for the same prompt.
57
+ */
58
+ private _embeddingCacheKey;
59
+ /**
60
+ * Classification cache key. Includes the embedding model name, dimension, and
61
+ * benchmark-set fingerprint (not just md5(prompt)) so cached classifications
62
+ * are invalidated when the model, dimension, or benchmark set changes. Must
63
+ * stay identical to the Python SDK's classification cache key.
64
+ */
65
+ private _classificationCacheKey;
66
+ private static _hash;
67
+ }
68
+ //# sourceMappingURL=embedding.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedding.d.ts","sourceRoot":"","sources":["../../src/classifiers/embedding.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAKH,OAAO,EAAE,cAAc,EAAE,oBAAoB,EAAE,MAAM,WAAW,CAAC;AAEjE,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,EAAE,cAAc,EAAwB,MAAM,wBAAwB,CAAC;AAE9E,gEAAgE;AAChE,eAAO,MAAM,oBAAoB,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAajE,CAAC;AAEF;;;;;;;;;GASG;AACH,qBAAa,mBAAoB,SAAQ,cAAc;IACrD,OAAO,CAAC,SAAS,CAAwB;IACzC,OAAO,CAAC,eAAe,CAAiB;IACxC,OAAO,CAAC,eAAe,CAAqB;IAC5C,OAAO,CAAC,oBAAoB,CAAiC;gBAG3D,iBAAiB,EAAE,qBAAqB,EACxC,cAAc,EAAE,cAAc,EAC9B,IAAI,CAAC,EAAE;QACL,kBAAkB,CAAC,EAAE,MAAM,CAAC;QAC5B,uBAAuB,CAAC,EAAE,MAAM,CAAC;QACjC,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB;IAgBH;;;OAGG;IACH,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,oBAAoB;IAgB9C;;;OAGG;IACG,aAAa,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC;IAelE,+EAA+E;IAC/E,OAAO,CAAC,UAAU;IAUlB,uFAAuF;IACvF,OAAO,CAAC,cAAc;IAwDtB,OAAO,CAAC,iBAAiB;YAUX,kBAAkB;IAUhC,OAAO,IAAI,OAAO;IAIlB;;;OAGG;IACH,OAAO,CAAC,kBAAkB;IAM1B;;;;;OAKG;IACH,OAAO,CAAC,uBAAuB;IAO/B,OAAO,CAAC,MAAM,CAAC,KAAK;CAGrB"}