agentic-flow 2.0.1-alpha.13 → 2.0.1-alpha.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +63 -0
- package/dist/.tsbuildinfo +1 -1
- package/dist/intelligence/EmbeddingService.d.ts +168 -0
- package/dist/intelligence/EmbeddingService.d.ts.map +1 -0
- package/dist/intelligence/EmbeddingService.js +526 -0
- package/dist/intelligence/EmbeddingService.js.map +1 -0
- package/dist/intelligence/embedding-benchmark.d.ts +7 -0
- package/dist/intelligence/embedding-benchmark.d.ts.map +1 -0
- package/dist/intelligence/embedding-benchmark.js +155 -0
- package/dist/intelligence/embedding-benchmark.js.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/intelligence-bridge.d.ts.map +1 -1
- package/dist/mcp/fastmcp/tools/hooks/intelligence-bridge.js +13 -17
- package/dist/mcp/fastmcp/tools/hooks/intelligence-bridge.js.map +1 -1
- package/package.json +4 -2
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* EmbeddingService - Unified embedding interface for agentic-flow
|
|
3
|
+
*
|
|
4
|
+
* Uses ruvector@0.1.61+ for ONNX embeddings with:
|
|
5
|
+
* - SIMD128 acceleration (6x faster)
|
|
6
|
+
* - Parallel worker threads (7 workers)
|
|
7
|
+
* - all-MiniLM-L6-v2 model (384 dimensions)
|
|
8
|
+
*
|
|
9
|
+
* Configure via:
|
|
10
|
+
* - AGENTIC_FLOW_EMBEDDINGS=simple|onnx|auto (default: auto)
|
|
11
|
+
* - AGENTIC_FLOW_EMBEDDING_MODEL=all-MiniLM-L6-v2 (default)
|
|
12
|
+
*/
|
|
13
|
+
export type EmbeddingBackend = 'simple' | 'onnx' | 'auto';
|
|
14
|
+
export interface EmbeddingStats {
|
|
15
|
+
backend: EmbeddingBackend;
|
|
16
|
+
effectiveBackend: EmbeddingBackend;
|
|
17
|
+
dimension: number;
|
|
18
|
+
totalEmbeddings: number;
|
|
19
|
+
totalLatencyMs: number;
|
|
20
|
+
avgLatencyMs: number;
|
|
21
|
+
cacheHits: number;
|
|
22
|
+
modelLoaded: boolean;
|
|
23
|
+
modelName?: string;
|
|
24
|
+
simdAvailable?: boolean;
|
|
25
|
+
parallelWorkers?: number;
|
|
26
|
+
}
|
|
27
|
+
export interface SimilarityResult {
|
|
28
|
+
similarity: number;
|
|
29
|
+
timeMs: number;
|
|
30
|
+
}
|
|
31
|
+
export interface SearchResult {
|
|
32
|
+
text: string;
|
|
33
|
+
index: number;
|
|
34
|
+
similarity: number;
|
|
35
|
+
}
|
|
36
|
+
export interface DuplicateGroup {
|
|
37
|
+
indices: number[];
|
|
38
|
+
texts: string[];
|
|
39
|
+
similarity: number;
|
|
40
|
+
}
|
|
41
|
+
export declare class EmbeddingService {
|
|
42
|
+
private static instance;
|
|
43
|
+
private backend;
|
|
44
|
+
private effectiveBackend;
|
|
45
|
+
private dimension;
|
|
46
|
+
private modelName;
|
|
47
|
+
private modelLoaded;
|
|
48
|
+
private loadingPromise;
|
|
49
|
+
private totalEmbeddings;
|
|
50
|
+
private totalLatencyMs;
|
|
51
|
+
private cacheHits;
|
|
52
|
+
private cache;
|
|
53
|
+
private cacheEnabled;
|
|
54
|
+
private corpus;
|
|
55
|
+
private constructor();
|
|
56
|
+
static getInstance(): EmbeddingService;
|
|
57
|
+
/**
|
|
58
|
+
* Resolve the effective backend based on ONNX detection
|
|
59
|
+
*/
|
|
60
|
+
private resolveBackend;
|
|
61
|
+
/**
|
|
62
|
+
* Get configured backend (may be 'auto')
|
|
63
|
+
*/
|
|
64
|
+
getBackend(): EmbeddingBackend;
|
|
65
|
+
/**
|
|
66
|
+
* Get effective backend after detection
|
|
67
|
+
*/
|
|
68
|
+
getEffectiveBackend(): EmbeddingBackend;
|
|
69
|
+
/**
|
|
70
|
+
* Get embedding dimension
|
|
71
|
+
*/
|
|
72
|
+
getDimension(): number;
|
|
73
|
+
/**
|
|
74
|
+
* Check if ONNX model is loaded
|
|
75
|
+
*/
|
|
76
|
+
isModelLoaded(): boolean;
|
|
77
|
+
/**
|
|
78
|
+
* Generate embedding for text
|
|
79
|
+
* Auto-detects ONNX and uses it if available (default behavior)
|
|
80
|
+
*/
|
|
81
|
+
embed(text: string): Promise<Float32Array>;
|
|
82
|
+
/**
|
|
83
|
+
* Generate embeddings for multiple texts (batch processing with parallel workers)
|
|
84
|
+
* Batch processing provides significant speedup with parallel ONNX workers
|
|
85
|
+
*/
|
|
86
|
+
embedBatch(texts: string[]): Promise<Float32Array[]>;
|
|
87
|
+
/**
|
|
88
|
+
* Compute similarity between two texts
|
|
89
|
+
*/
|
|
90
|
+
similarity(text1: string, text2: string): Promise<number>;
|
|
91
|
+
/**
|
|
92
|
+
* Compute NxN similarity matrix for a list of texts
|
|
93
|
+
* Uses parallel workers for ONNX backend
|
|
94
|
+
*/
|
|
95
|
+
similarityMatrix(texts: string[]): Promise<number[][]>;
|
|
96
|
+
/**
|
|
97
|
+
* Build a corpus for semantic search
|
|
98
|
+
*/
|
|
99
|
+
buildCorpus(texts: string[]): Promise<void>;
|
|
100
|
+
/**
|
|
101
|
+
* Semantic search against the corpus
|
|
102
|
+
* Returns top-k most similar texts
|
|
103
|
+
*/
|
|
104
|
+
semanticSearch(query: string, topK?: number): Promise<SearchResult[]>;
|
|
105
|
+
/**
|
|
106
|
+
* Find near-duplicate texts in a list
|
|
107
|
+
* Groups texts with similarity above threshold
|
|
108
|
+
*/
|
|
109
|
+
findDuplicates(texts: string[], threshold?: number): Promise<DuplicateGroup[]>;
|
|
110
|
+
/**
|
|
111
|
+
* K-means clustering of texts
|
|
112
|
+
* Returns cluster assignments and centroids
|
|
113
|
+
*/
|
|
114
|
+
clusterTexts(texts: string[], k?: number, maxIterations?: number): Promise<{
|
|
115
|
+
clusters: number[];
|
|
116
|
+
centroids: Float32Array[];
|
|
117
|
+
}>;
|
|
118
|
+
/**
|
|
119
|
+
* Stream embeddings for large batches (memory efficient)
|
|
120
|
+
* Yields embeddings one at a time
|
|
121
|
+
*/
|
|
122
|
+
streamEmbed(texts: string[], batchSize?: number): AsyncGenerator<{
|
|
123
|
+
index: number;
|
|
124
|
+
text: string;
|
|
125
|
+
embedding: Float32Array;
|
|
126
|
+
}>;
|
|
127
|
+
/**
|
|
128
|
+
* Simple hash-based embedding (fast, not semantic)
|
|
129
|
+
*/
|
|
130
|
+
simpleEmbed(text: string, dim?: number): Float32Array;
|
|
131
|
+
/**
|
|
132
|
+
* Compute cosine similarity between two embeddings
|
|
133
|
+
*/
|
|
134
|
+
cosineSimilarity(a: Float32Array, b: Float32Array): number;
|
|
135
|
+
/**
|
|
136
|
+
* Get statistics
|
|
137
|
+
*/
|
|
138
|
+
getStats(): EmbeddingStats;
|
|
139
|
+
/**
|
|
140
|
+
* Clear cache
|
|
141
|
+
*/
|
|
142
|
+
clearCache(): void;
|
|
143
|
+
/**
|
|
144
|
+
* Clear corpus
|
|
145
|
+
*/
|
|
146
|
+
clearCorpus(): void;
|
|
147
|
+
/**
|
|
148
|
+
* Shutdown (cleanup workers)
|
|
149
|
+
*/
|
|
150
|
+
shutdown(): Promise<void>;
|
|
151
|
+
/**
|
|
152
|
+
* Reset instance (for testing)
|
|
153
|
+
*/
|
|
154
|
+
static reset(): Promise<void>;
|
|
155
|
+
}
|
|
156
|
+
export declare function getEmbeddingService(): EmbeddingService;
|
|
157
|
+
export declare function embed(text: string): Promise<Float32Array>;
|
|
158
|
+
export declare function embedBatch(texts: string[]): Promise<Float32Array[]>;
|
|
159
|
+
export declare function textSimilarity(text1: string, text2: string): Promise<number>;
|
|
160
|
+
export declare function simpleEmbed(text: string, dim?: number): Float32Array;
|
|
161
|
+
export declare function similarityMatrix(texts: string[]): Promise<number[][]>;
|
|
162
|
+
export declare function semanticSearch(query: string, topK?: number): Promise<SearchResult[]>;
|
|
163
|
+
export declare function findDuplicates(texts: string[], threshold?: number): Promise<DuplicateGroup[]>;
|
|
164
|
+
export declare function clusterTexts(texts: string[], k?: number): Promise<{
|
|
165
|
+
clusters: number[];
|
|
166
|
+
centroids: Float32Array[];
|
|
167
|
+
}>;
|
|
168
|
+
//# sourceMappingURL=EmbeddingService.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"EmbeddingService.d.ts","sourceRoot":"","sources":["../../src/intelligence/EmbeddingService.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,MAAM,MAAM,gBAAgB,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,CAAC;AAE1D,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,gBAAgB,CAAC;IAC1B,gBAAgB,EAAE,gBAAgB,CAAC;IACnC,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,OAAO,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED,MAAM,WAAW,gBAAgB;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;CACpB;AAyFD,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAiC;IAExD,OAAO,CAAC,OAAO,CAAmB;IAClC,OAAO,CAAC,gBAAgB,CAAiC;IACzD,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,SAAS,CAAS;IAG1B,OAAO,CAAC,WAAW,CAAkB;IACrC,OAAO,CAAC,cAAc,CAA8B;IAGpD,OAAO,CAAC,eAAe,CAAa;IACpC,OAAO,CAAC,cAAc,CAAa;IACnC,OAAO,CAAC,SAAS,CAAa;IAG9B,OAAO,CAAC,KAAK,CAAiB;IAC9B,OAAO,CAAC,YAAY,CAAU;IAG9B,OAAO,CAAC,MAAM,CAAkF;IAEhG,OAAO;IASP,MAAM,CAAC,WAAW,IAAI,gBAAgB;IAOtC;;OAEG;YACW,cAAc;IAsB5B;;OAEG;IACH,UAAU,IAAI,gBAAgB;IAI9B;;OAEG;IACH,mBAAmB,IAAI,gBAAgB;IAIvC;;OAEG;IACH,YAAY,IAAI,MAAM;IAItB;;OAEG;IACH,aAAa,IAAI,OAAO;IAIxB;;;OAGG;IACG,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAwChD;;;OAGG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAyC1D;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAa/D;;;OAGG;IACG,gBAAgB,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAiB5D;;OAEG;IACG,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAKjD;;;OAGG;IACG,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,MAAU,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAsB9E;;;OAGG;IACG,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,GAAE,MAAY,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC;IAoCzF;;;OAGG;IACG,YAAY,CAChB,KAAK,EAAE,MAAM,EAAE,EACf,CAAC,GAAE,MAAU,EACb,aAAa,GAAE,MAAY,GAC1B,OAAO,CAAC;QAAE,QAAQ,EAAE,MAAM,EAAE,CAAC;QAAC,SAAS,EAAE,YAAY,EAAE,CAAA;KAAE,CAAC;IAyE7D;;;OAGG;IACI,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,GAAE,MAAW,GAAG,cAAc,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,YAAY,CAAA;KAAE,CAAC;IAerI;;OAEG;IACH,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,GAAE,MAAY,GAAG,YAAY;IAwB1D;;OAEG;IACH,gBAAgB,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM;IAiB1D;;OAEG;IACH,QAAQ,IAAI,cAAc;IAmB1B;;OAEG;IACH,UAAU,IAAI,IAAI;IAIlB;;OAEG;IACH,WAAW,IAAI,IAAI;IAInB;;OAEG;IACG,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC;IAM/B;;OAEG;WACU,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAQpC;AAGD,wBAAgB,mBAAmB,IAAI,gBAAgB,CAEtD;AAGD,wBAAsB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,CAE/D;AAED,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAEzE;AAED,wBAAsB,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAElF;AAED,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,GAAE,MAAY,GAAG,YAAY,CAEzE;AAED,wBAAsB,gBAAgB,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAE3E;AAED,wBAAsB,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,MAAU,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAE7F;AAED,wBAAsB,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,GAAE,MAAY,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC,CAExG;AAED,wBAAsB,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,CAAC,GAAE,MAAU,GAAG,OAAO,CAAC;IAAE,QAAQ,EAAE,MAAM,EAAE,CAAC;IAAC,SAAS,EAAE,YAAY,EAAE,CAAA;CAAE,CAAC,CAE7H"}
|
|
@@ -0,0 +1,526 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* EmbeddingService - Unified embedding interface for agentic-flow
|
|
3
|
+
*
|
|
4
|
+
* Uses ruvector@0.1.61+ for ONNX embeddings with:
|
|
5
|
+
* - SIMD128 acceleration (6x faster)
|
|
6
|
+
* - Parallel worker threads (7 workers)
|
|
7
|
+
* - all-MiniLM-L6-v2 model (384 dimensions)
|
|
8
|
+
*
|
|
9
|
+
* Configure via:
|
|
10
|
+
* - AGENTIC_FLOW_EMBEDDINGS=simple|onnx|auto (default: auto)
|
|
11
|
+
* - AGENTIC_FLOW_EMBEDDING_MODEL=all-MiniLM-L6-v2 (default)
|
|
12
|
+
*/
|
|
13
|
+
// ONNX availability cache
|
|
14
|
+
let onnxAvailable = null;
|
|
15
|
+
let ruvectorModule = null;
|
|
16
|
+
/**
|
|
17
|
+
* Detect ONNX/SIMD support by loading ruvector
|
|
18
|
+
*/
|
|
19
|
+
async function detectOnnx() {
|
|
20
|
+
if (onnxAvailable !== null) {
|
|
21
|
+
return onnxAvailable;
|
|
22
|
+
}
|
|
23
|
+
try {
|
|
24
|
+
const mod = await import('ruvector');
|
|
25
|
+
ruvectorModule = mod;
|
|
26
|
+
onnxAvailable = mod.isOnnxAvailable?.() ?? false;
|
|
27
|
+
return onnxAvailable;
|
|
28
|
+
}
|
|
29
|
+
catch (error) {
|
|
30
|
+
// Ruvector loading failed - fall back to simple embeddings
|
|
31
|
+
onnxAvailable = false;
|
|
32
|
+
return false;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
// Simple LRU cache for embeddings
|
|
36
|
+
class EmbeddingCache {
|
|
37
|
+
cache = new Map();
|
|
38
|
+
maxSize;
|
|
39
|
+
constructor(maxSize = 1000) {
|
|
40
|
+
this.maxSize = maxSize;
|
|
41
|
+
}
|
|
42
|
+
get(key) {
|
|
43
|
+
const value = this.cache.get(key);
|
|
44
|
+
if (value) {
|
|
45
|
+
// Move to end (most recently used)
|
|
46
|
+
this.cache.delete(key);
|
|
47
|
+
this.cache.set(key, value);
|
|
48
|
+
}
|
|
49
|
+
return value;
|
|
50
|
+
}
|
|
51
|
+
set(key, value) {
|
|
52
|
+
if (this.cache.size >= this.maxSize) {
|
|
53
|
+
// Delete oldest (first) entry
|
|
54
|
+
const firstKey = this.cache.keys().next().value;
|
|
55
|
+
if (firstKey) {
|
|
56
|
+
this.cache.delete(firstKey);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
this.cache.set(key, value);
|
|
60
|
+
}
|
|
61
|
+
clear() {
|
|
62
|
+
this.cache.clear();
|
|
63
|
+
}
|
|
64
|
+
get size() {
|
|
65
|
+
return this.cache.size;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
export class EmbeddingService {
|
|
69
|
+
static instance = null;
|
|
70
|
+
backend;
|
|
71
|
+
effectiveBackend = null;
|
|
72
|
+
dimension;
|
|
73
|
+
modelName;
|
|
74
|
+
// ONNX state
|
|
75
|
+
modelLoaded = false;
|
|
76
|
+
loadingPromise = null;
|
|
77
|
+
// Stats
|
|
78
|
+
totalEmbeddings = 0;
|
|
79
|
+
totalLatencyMs = 0;
|
|
80
|
+
cacheHits = 0;
|
|
81
|
+
// Cache
|
|
82
|
+
cache;
|
|
83
|
+
cacheEnabled;
|
|
84
|
+
// Corpus for search operations
|
|
85
|
+
corpus = { texts: [], embeddings: [] };
|
|
86
|
+
constructor() {
|
|
87
|
+
// Default to 'auto' which will detect ONNX and use it if available
|
|
88
|
+
this.backend = process.env.AGENTIC_FLOW_EMBEDDINGS || 'auto';
|
|
89
|
+
this.modelName = process.env.AGENTIC_FLOW_EMBEDDING_MODEL || 'all-MiniLM-L6-v2';
|
|
90
|
+
this.dimension = 256; // Will be updated when ONNX loads (384)
|
|
91
|
+
this.cacheEnabled = process.env.AGENTIC_FLOW_EMBEDDING_CACHE !== 'false';
|
|
92
|
+
this.cache = new EmbeddingCache(1000);
|
|
93
|
+
}
|
|
94
|
+
static getInstance() {
|
|
95
|
+
if (!EmbeddingService.instance) {
|
|
96
|
+
EmbeddingService.instance = new EmbeddingService();
|
|
97
|
+
}
|
|
98
|
+
return EmbeddingService.instance;
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Resolve the effective backend based on ONNX detection
|
|
102
|
+
*/
|
|
103
|
+
async resolveBackend() {
|
|
104
|
+
if (this.effectiveBackend) {
|
|
105
|
+
return this.effectiveBackend;
|
|
106
|
+
}
|
|
107
|
+
if (this.backend === 'auto') {
|
|
108
|
+
const hasOnnx = await detectOnnx();
|
|
109
|
+
this.effectiveBackend = hasOnnx ? 'onnx' : 'simple';
|
|
110
|
+
if (hasOnnx) {
|
|
111
|
+
this.dimension = 384; // all-MiniLM-L6-v2 dimension
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
else {
|
|
115
|
+
this.effectiveBackend = this.backend;
|
|
116
|
+
if (this.backend === 'onnx') {
|
|
117
|
+
await detectOnnx(); // Ensure module is loaded
|
|
118
|
+
this.dimension = 384;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
return this.effectiveBackend;
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Get configured backend (may be 'auto')
|
|
125
|
+
*/
|
|
126
|
+
getBackend() {
|
|
127
|
+
return this.backend;
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Get effective backend after detection
|
|
131
|
+
*/
|
|
132
|
+
getEffectiveBackend() {
|
|
133
|
+
return this.effectiveBackend || this.backend;
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Get embedding dimension
|
|
137
|
+
*/
|
|
138
|
+
getDimension() {
|
|
139
|
+
return this.dimension;
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Check if ONNX model is loaded
|
|
143
|
+
*/
|
|
144
|
+
isModelLoaded() {
|
|
145
|
+
return this.modelLoaded;
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Generate embedding for text
|
|
149
|
+
* Auto-detects ONNX and uses it if available (default behavior)
|
|
150
|
+
*/
|
|
151
|
+
async embed(text) {
|
|
152
|
+
const startTime = performance.now();
|
|
153
|
+
// Check cache
|
|
154
|
+
if (this.cacheEnabled) {
|
|
155
|
+
const cached = this.cache.get(text);
|
|
156
|
+
if (cached) {
|
|
157
|
+
this.cacheHits++;
|
|
158
|
+
return cached;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
// Resolve backend (handles 'auto' mode)
|
|
162
|
+
const effectiveBackend = await this.resolveBackend();
|
|
163
|
+
let embedding;
|
|
164
|
+
if (effectiveBackend === 'onnx' && ruvectorModule) {
|
|
165
|
+
const result = await ruvectorModule.embed(text);
|
|
166
|
+
if (result?.embedding) {
|
|
167
|
+
embedding = result.embedding;
|
|
168
|
+
this.modelLoaded = true;
|
|
169
|
+
}
|
|
170
|
+
else {
|
|
171
|
+
embedding = this.simpleEmbed(text);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
else {
|
|
175
|
+
embedding = this.simpleEmbed(text);
|
|
176
|
+
}
|
|
177
|
+
// Update stats
|
|
178
|
+
this.totalEmbeddings++;
|
|
179
|
+
this.totalLatencyMs += performance.now() - startTime;
|
|
180
|
+
// Cache result
|
|
181
|
+
if (this.cacheEnabled) {
|
|
182
|
+
this.cache.set(text, embedding);
|
|
183
|
+
}
|
|
184
|
+
return embedding;
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* Generate embeddings for multiple texts (batch processing with parallel workers)
|
|
188
|
+
* Batch processing provides significant speedup with parallel ONNX workers
|
|
189
|
+
*/
|
|
190
|
+
async embedBatch(texts) {
|
|
191
|
+
const startTime = performance.now();
|
|
192
|
+
// Check cache for all texts first
|
|
193
|
+
if (this.cacheEnabled) {
|
|
194
|
+
const cachedResults = texts.map(t => this.cache.get(t) || null);
|
|
195
|
+
const allCached = cachedResults.every(r => r !== null);
|
|
196
|
+
if (allCached) {
|
|
197
|
+
this.cacheHits += texts.length;
|
|
198
|
+
return cachedResults;
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
// Resolve backend
|
|
202
|
+
const effectiveBackend = await this.resolveBackend();
|
|
203
|
+
if (effectiveBackend === 'onnx' && ruvectorModule) {
|
|
204
|
+
const result = await ruvectorModule.embedBatch(texts);
|
|
205
|
+
if (result?.embeddings && result.embeddings.length === texts.length) {
|
|
206
|
+
const embeddings = result.embeddings;
|
|
207
|
+
// Cache individual embeddings
|
|
208
|
+
if (this.cacheEnabled) {
|
|
209
|
+
for (let i = 0; i < texts.length; i++) {
|
|
210
|
+
this.cache.set(texts[i], embeddings[i]);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
// Update stats
|
|
214
|
+
this.totalEmbeddings += texts.length;
|
|
215
|
+
this.totalLatencyMs += performance.now() - startTime;
|
|
216
|
+
this.modelLoaded = true;
|
|
217
|
+
return embeddings;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
// Fall back to sequential for simple backend
|
|
221
|
+
return Promise.all(texts.map(t => this.embed(t)));
|
|
222
|
+
}
|
|
223
|
+
/**
|
|
224
|
+
* Compute similarity between two texts
|
|
225
|
+
*/
|
|
226
|
+
async similarity(text1, text2) {
|
|
227
|
+
const effectiveBackend = await this.resolveBackend();
|
|
228
|
+
if (effectiveBackend === 'onnx' && ruvectorModule) {
|
|
229
|
+
const result = await ruvectorModule.similarity(text1, text2);
|
|
230
|
+
return result.similarity;
|
|
231
|
+
}
|
|
232
|
+
// Fall back to embedding + cosine
|
|
233
|
+
const [e1, e2] = await Promise.all([this.embed(text1), this.embed(text2)]);
|
|
234
|
+
return this.cosineSimilarity(e1, e2);
|
|
235
|
+
}
|
|
236
|
+
/**
|
|
237
|
+
* Compute NxN similarity matrix for a list of texts
|
|
238
|
+
* Uses parallel workers for ONNX backend
|
|
239
|
+
*/
|
|
240
|
+
async similarityMatrix(texts) {
|
|
241
|
+
const embeddings = await this.embedBatch(texts);
|
|
242
|
+
const n = texts.length;
|
|
243
|
+
const matrix = Array(n).fill(null).map(() => Array(n).fill(0));
|
|
244
|
+
for (let i = 0; i < n; i++) {
|
|
245
|
+
matrix[i][i] = 1.0; // Self-similarity
|
|
246
|
+
for (let j = i + 1; j < n; j++) {
|
|
247
|
+
const sim = this.cosineSimilarity(embeddings[i], embeddings[j]);
|
|
248
|
+
matrix[i][j] = sim;
|
|
249
|
+
matrix[j][i] = sim; // Symmetric
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
return matrix;
|
|
253
|
+
}
|
|
254
|
+
/**
|
|
255
|
+
* Build a corpus for semantic search
|
|
256
|
+
*/
|
|
257
|
+
async buildCorpus(texts) {
|
|
258
|
+
this.corpus.texts = texts;
|
|
259
|
+
this.corpus.embeddings = await this.embedBatch(texts);
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Semantic search against the corpus
|
|
263
|
+
* Returns top-k most similar texts
|
|
264
|
+
*/
|
|
265
|
+
async semanticSearch(query, topK = 5) {
|
|
266
|
+
if (this.corpus.texts.length === 0) {
|
|
267
|
+
throw new Error('Corpus not built. Call buildCorpus() first.');
|
|
268
|
+
}
|
|
269
|
+
const queryEmbedding = await this.embed(query);
|
|
270
|
+
const results = [];
|
|
271
|
+
for (let i = 0; i < this.corpus.texts.length; i++) {
|
|
272
|
+
const sim = this.cosineSimilarity(queryEmbedding, this.corpus.embeddings[i]);
|
|
273
|
+
results.push({
|
|
274
|
+
text: this.corpus.texts[i],
|
|
275
|
+
index: i,
|
|
276
|
+
similarity: sim,
|
|
277
|
+
});
|
|
278
|
+
}
|
|
279
|
+
// Sort by similarity (descending) and return top-k
|
|
280
|
+
results.sort((a, b) => b.similarity - a.similarity);
|
|
281
|
+
return results.slice(0, topK);
|
|
282
|
+
}
|
|
283
|
+
/**
|
|
284
|
+
* Find near-duplicate texts in a list
|
|
285
|
+
* Groups texts with similarity above threshold
|
|
286
|
+
*/
|
|
287
|
+
async findDuplicates(texts, threshold = 0.9) {
|
|
288
|
+
const embeddings = await this.embedBatch(texts);
|
|
289
|
+
const n = texts.length;
|
|
290
|
+
const visited = new Set();
|
|
291
|
+
const groups = [];
|
|
292
|
+
for (let i = 0; i < n; i++) {
|
|
293
|
+
if (visited.has(i))
|
|
294
|
+
continue;
|
|
295
|
+
const group = {
|
|
296
|
+
indices: [i],
|
|
297
|
+
texts: [texts[i]],
|
|
298
|
+
similarity: 1.0,
|
|
299
|
+
};
|
|
300
|
+
for (let j = i + 1; j < n; j++) {
|
|
301
|
+
if (visited.has(j))
|
|
302
|
+
continue;
|
|
303
|
+
const sim = this.cosineSimilarity(embeddings[i], embeddings[j]);
|
|
304
|
+
if (sim >= threshold) {
|
|
305
|
+
group.indices.push(j);
|
|
306
|
+
group.texts.push(texts[j]);
|
|
307
|
+
group.similarity = Math.min(group.similarity, sim);
|
|
308
|
+
visited.add(j);
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
if (group.indices.length > 1) {
|
|
312
|
+
visited.add(i);
|
|
313
|
+
groups.push(group);
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
return groups;
|
|
317
|
+
}
|
|
318
|
+
/**
|
|
319
|
+
* K-means clustering of texts
|
|
320
|
+
* Returns cluster assignments and centroids
|
|
321
|
+
*/
|
|
322
|
+
async clusterTexts(texts, k = 3, maxIterations = 100) {
|
|
323
|
+
const embeddings = await this.embedBatch(texts);
|
|
324
|
+
const n = texts.length;
|
|
325
|
+
const dim = this.dimension;
|
|
326
|
+
// Initialize centroids randomly (copy to new ArrayBuffer for consistent typing)
|
|
327
|
+
const centroidIndices = new Set();
|
|
328
|
+
while (centroidIndices.size < k && centroidIndices.size < n) {
|
|
329
|
+
centroidIndices.add(Math.floor(Math.random() * n));
|
|
330
|
+
}
|
|
331
|
+
let centroids = Array.from(centroidIndices).map(i => {
|
|
332
|
+
const copy = new Float32Array(dim);
|
|
333
|
+
copy.set(embeddings[i]);
|
|
334
|
+
return copy;
|
|
335
|
+
});
|
|
336
|
+
let clusters = new Array(n).fill(0);
|
|
337
|
+
for (let iter = 0; iter < maxIterations; iter++) {
|
|
338
|
+
// Assign points to nearest centroid
|
|
339
|
+
const newClusters = embeddings.map(emb => {
|
|
340
|
+
let bestCluster = 0;
|
|
341
|
+
let bestSim = -Infinity;
|
|
342
|
+
for (let c = 0; c < k; c++) {
|
|
343
|
+
const sim = this.cosineSimilarity(emb, centroids[c]);
|
|
344
|
+
if (sim > bestSim) {
|
|
345
|
+
bestSim = sim;
|
|
346
|
+
bestCluster = c;
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
return bestCluster;
|
|
350
|
+
});
|
|
351
|
+
// Check convergence
|
|
352
|
+
const changed = newClusters.some((c, i) => c !== clusters[i]);
|
|
353
|
+
clusters = newClusters;
|
|
354
|
+
if (!changed)
|
|
355
|
+
break;
|
|
356
|
+
// Update centroids
|
|
357
|
+
const newCentroids = [];
|
|
358
|
+
for (let c = 0; c < k; c++) {
|
|
359
|
+
newCentroids.push(new Float32Array(dim));
|
|
360
|
+
}
|
|
361
|
+
const counts = new Array(k).fill(0);
|
|
362
|
+
for (let i = 0; i < n; i++) {
|
|
363
|
+
const c = clusters[i];
|
|
364
|
+
counts[c]++;
|
|
365
|
+
for (let d = 0; d < dim; d++) {
|
|
366
|
+
newCentroids[c][d] += embeddings[i][d];
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
// Normalize centroids
|
|
370
|
+
for (let c = 0; c < k; c++) {
|
|
371
|
+
if (counts[c] > 0) {
|
|
372
|
+
let norm = 0;
|
|
373
|
+
for (let d = 0; d < dim; d++) {
|
|
374
|
+
newCentroids[c][d] /= counts[c];
|
|
375
|
+
norm += newCentroids[c][d] * newCentroids[c][d];
|
|
376
|
+
}
|
|
377
|
+
norm = Math.sqrt(norm) || 1;
|
|
378
|
+
for (let d = 0; d < dim; d++) {
|
|
379
|
+
newCentroids[c][d] /= norm;
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
centroids = newCentroids;
|
|
384
|
+
}
|
|
385
|
+
return { clusters, centroids };
|
|
386
|
+
}
|
|
387
|
+
/**
|
|
388
|
+
* Stream embeddings for large batches (memory efficient)
|
|
389
|
+
* Yields embeddings one at a time
|
|
390
|
+
*/
|
|
391
|
+
async *streamEmbed(texts, batchSize = 32) {
|
|
392
|
+
for (let i = 0; i < texts.length; i += batchSize) {
|
|
393
|
+
const batch = texts.slice(i, i + batchSize);
|
|
394
|
+
const embeddings = await this.embedBatch(batch);
|
|
395
|
+
for (let j = 0; j < batch.length; j++) {
|
|
396
|
+
yield {
|
|
397
|
+
index: i + j,
|
|
398
|
+
text: batch[j],
|
|
399
|
+
embedding: embeddings[j],
|
|
400
|
+
};
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
/**
|
|
405
|
+
* Simple hash-based embedding (fast, not semantic)
|
|
406
|
+
*/
|
|
407
|
+
simpleEmbed(text, dim = 256) {
|
|
408
|
+
const embedding = new Float32Array(dim);
|
|
409
|
+
// Multi-pass hash for better distribution
|
|
410
|
+
for (let i = 0; i < text.length; i++) {
|
|
411
|
+
const code = text.charCodeAt(i);
|
|
412
|
+
embedding[i % dim] += code / 255;
|
|
413
|
+
embedding[(i * 7) % dim] += (code * 0.3) / 255;
|
|
414
|
+
embedding[(i * 13) % dim] += (code * 0.2) / 255;
|
|
415
|
+
}
|
|
416
|
+
// Normalize
|
|
417
|
+
let norm = 0;
|
|
418
|
+
for (let i = 0; i < dim; i++) {
|
|
419
|
+
norm += embedding[i] * embedding[i];
|
|
420
|
+
}
|
|
421
|
+
norm = Math.sqrt(norm) || 1;
|
|
422
|
+
for (let i = 0; i < dim; i++) {
|
|
423
|
+
embedding[i] /= norm;
|
|
424
|
+
}
|
|
425
|
+
return embedding;
|
|
426
|
+
}
|
|
427
|
+
/**
|
|
428
|
+
* Compute cosine similarity between two embeddings
|
|
429
|
+
*/
|
|
430
|
+
cosineSimilarity(a, b) {
|
|
431
|
+
if (ruvectorModule?.cosineSimilarity) {
|
|
432
|
+
return ruvectorModule.cosineSimilarity(a, b);
|
|
433
|
+
}
|
|
434
|
+
// JS fallback
|
|
435
|
+
let dot = 0;
|
|
436
|
+
let normA = 0;
|
|
437
|
+
let normB = 0;
|
|
438
|
+
for (let i = 0; i < a.length; i++) {
|
|
439
|
+
dot += a[i] * b[i];
|
|
440
|
+
normA += a[i] * a[i];
|
|
441
|
+
normB += b[i] * b[i];
|
|
442
|
+
}
|
|
443
|
+
return dot / (Math.sqrt(normA) * Math.sqrt(normB) || 1);
|
|
444
|
+
}
|
|
445
|
+
/**
|
|
446
|
+
* Get statistics
|
|
447
|
+
*/
|
|
448
|
+
getStats() {
|
|
449
|
+
const effective = this.effectiveBackend || this.backend;
|
|
450
|
+
const ruvectorStats = ruvectorModule?.getStats?.() || {};
|
|
451
|
+
return {
|
|
452
|
+
backend: this.backend,
|
|
453
|
+
effectiveBackend: effective,
|
|
454
|
+
dimension: this.dimension,
|
|
455
|
+
totalEmbeddings: this.totalEmbeddings,
|
|
456
|
+
totalLatencyMs: this.totalLatencyMs,
|
|
457
|
+
avgLatencyMs: this.totalEmbeddings > 0 ? this.totalLatencyMs / this.totalEmbeddings : 0,
|
|
458
|
+
cacheHits: this.cacheHits,
|
|
459
|
+
modelLoaded: this.modelLoaded,
|
|
460
|
+
modelName: effective === 'onnx' ? this.modelName : undefined,
|
|
461
|
+
simdAvailable: ruvectorStats.simdAvailable ?? onnxAvailable,
|
|
462
|
+
parallelWorkers: ruvectorStats.workerCount ?? undefined,
|
|
463
|
+
};
|
|
464
|
+
}
|
|
465
|
+
/**
|
|
466
|
+
* Clear cache
|
|
467
|
+
*/
|
|
468
|
+
clearCache() {
|
|
469
|
+
this.cache.clear();
|
|
470
|
+
}
|
|
471
|
+
/**
|
|
472
|
+
* Clear corpus
|
|
473
|
+
*/
|
|
474
|
+
clearCorpus() {
|
|
475
|
+
this.corpus = { texts: [], embeddings: [] };
|
|
476
|
+
}
|
|
477
|
+
/**
|
|
478
|
+
* Shutdown (cleanup workers)
|
|
479
|
+
*/
|
|
480
|
+
async shutdown() {
|
|
481
|
+
if (ruvectorModule?.shutdown) {
|
|
482
|
+
await ruvectorModule.shutdown();
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
/**
|
|
486
|
+
* Reset instance (for testing)
|
|
487
|
+
*/
|
|
488
|
+
static async reset() {
|
|
489
|
+
if (EmbeddingService.instance) {
|
|
490
|
+
await EmbeddingService.instance.shutdown();
|
|
491
|
+
}
|
|
492
|
+
EmbeddingService.instance = null;
|
|
493
|
+
onnxAvailable = null;
|
|
494
|
+
ruvectorModule = null;
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
// Export singleton getter
|
|
498
|
+
export function getEmbeddingService() {
|
|
499
|
+
return EmbeddingService.getInstance();
|
|
500
|
+
}
|
|
501
|
+
// Export convenience functions
|
|
502
|
+
export async function embed(text) {
|
|
503
|
+
return getEmbeddingService().embed(text);
|
|
504
|
+
}
|
|
505
|
+
export async function embedBatch(texts) {
|
|
506
|
+
return getEmbeddingService().embedBatch(texts);
|
|
507
|
+
}
|
|
508
|
+
export async function textSimilarity(text1, text2) {
|
|
509
|
+
return getEmbeddingService().similarity(text1, text2);
|
|
510
|
+
}
|
|
511
|
+
export function simpleEmbed(text, dim = 256) {
|
|
512
|
+
return getEmbeddingService().simpleEmbed(text, dim);
|
|
513
|
+
}
|
|
514
|
+
export async function similarityMatrix(texts) {
|
|
515
|
+
return getEmbeddingService().similarityMatrix(texts);
|
|
516
|
+
}
|
|
517
|
+
export async function semanticSearch(query, topK = 5) {
|
|
518
|
+
return getEmbeddingService().semanticSearch(query, topK);
|
|
519
|
+
}
|
|
520
|
+
export async function findDuplicates(texts, threshold = 0.9) {
|
|
521
|
+
return getEmbeddingService().findDuplicates(texts, threshold);
|
|
522
|
+
}
|
|
523
|
+
export async function clusterTexts(texts, k = 3) {
|
|
524
|
+
return getEmbeddingService().clusterTexts(texts, k);
|
|
525
|
+
}
|
|
526
|
+
//# sourceMappingURL=EmbeddingService.js.map
|