agentdb 1.4.4 → 1.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agentdb.min.js +4 -4
- package/dist/benchmarks/wasm-vector-benchmark.d.ts +10 -0
- package/dist/benchmarks/wasm-vector-benchmark.d.ts.map +1 -0
- package/dist/benchmarks/wasm-vector-benchmark.js +196 -0
- package/dist/benchmarks/wasm-vector-benchmark.js.map +1 -0
- package/dist/cli/agentdb-cli.d.ts +1 -1
- package/dist/cli/agentdb-cli.d.ts.map +1 -1
- package/dist/cli/agentdb-cli.js +38 -1
- package/dist/cli/agentdb-cli.js.map +1 -1
- package/dist/controllers/EmbeddingService.d.ts.map +1 -1
- package/dist/controllers/EmbeddingService.js +7 -3
- package/dist/controllers/EmbeddingService.js.map +1 -1
- package/dist/controllers/EnhancedEmbeddingService.d.ts +50 -0
- package/dist/controllers/EnhancedEmbeddingService.d.ts.map +1 -0
- package/dist/controllers/EnhancedEmbeddingService.js +119 -0
- package/dist/controllers/EnhancedEmbeddingService.js.map +1 -0
- package/dist/controllers/WASMVectorSearch.d.ts +89 -0
- package/dist/controllers/WASMVectorSearch.d.ts.map +1 -0
- package/dist/controllers/WASMVectorSearch.js +226 -0
- package/dist/controllers/WASMVectorSearch.js.map +1 -0
- package/dist/controllers/index.d.ts +4 -0
- package/dist/controllers/index.d.ts.map +1 -1
- package/dist/controllers/index.js +2 -0
- package/dist/controllers/index.js.map +1 -1
- package/dist/db-fallback.d.ts +4 -0
- package/dist/db-fallback.d.ts.map +1 -1
- package/dist/db-fallback.js +36 -10
- package/dist/db-fallback.js.map +1 -1
- package/dist/examples/wasm-vector-usage.d.ts +12 -0
- package/dist/examples/wasm-vector-usage.d.ts.map +1 -0
- package/dist/examples/wasm-vector-usage.js +190 -0
- package/dist/examples/wasm-vector-usage.js.map +1 -0
- package/dist/mcp/agentdb-mcp-server.js +54 -27
- package/dist/mcp/agentdb-mcp-server.js.map +1 -1
- package/dist/optimizations/BatchOperations.d.ts +7 -2
- package/dist/optimizations/BatchOperations.d.ts.map +1 -1
- package/dist/optimizations/BatchOperations.js +46 -19
- package/dist/optimizations/BatchOperations.js.map +1 -1
- package/dist/security/input-validation.d.ts +85 -0
- package/dist/security/input-validation.d.ts.map +1 -0
- package/dist/security/input-validation.js +292 -0
- package/dist/security/input-validation.js.map +1 -0
- package/package.json +9 -3
- package/src/benchmarks/wasm-vector-benchmark.ts +250 -0
- package/src/cli/agentdb-cli.ts +40 -1
- package/src/controllers/EmbeddingService.ts +7 -3
- package/src/controllers/EnhancedEmbeddingService.ts +159 -0
- package/src/controllers/WASMVectorSearch.ts +308 -0
- package/src/controllers/index.ts +4 -0
- package/src/db-fallback.ts +38 -10
- package/src/examples/wasm-vector-usage.ts +245 -0
- package/src/mcp/agentdb-mcp-server.ts +59 -28
- package/src/optimizations/BatchOperations.ts +55 -24
- package/src/security/input-validation.ts +377 -0
- package/src/tests/wasm-vector-search.test.ts +240 -0
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WASM Vector Operations Benchmark
|
|
3
|
+
*
|
|
4
|
+
* Measures actual performance improvements of WASM-accelerated vector operations
|
|
5
|
+
* compared to pure JavaScript implementations.
|
|
6
|
+
*
|
|
7
|
+
* Run with: npx tsx src/benchmarks/wasm-vector-benchmark.ts
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { WASMVectorSearch } from '../controllers/WASMVectorSearch.js';
|
|
11
|
+
|
|
12
|
+
interface BenchmarkResult {
|
|
13
|
+
name: string;
|
|
14
|
+
operations: number;
|
|
15
|
+
duration: number;
|
|
16
|
+
opsPerSecond: number;
|
|
17
|
+
avgLatency: number;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
class VectorBenchmark {
|
|
21
|
+
private dimensions: number;
|
|
22
|
+
private vectorCount: number;
|
|
23
|
+
|
|
24
|
+
constructor(dimensions: number = 384, vectorCount: number = 1000) {
|
|
25
|
+
this.dimensions = dimensions;
|
|
26
|
+
this.vectorCount = vectorCount;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Generate random normalized vector
|
|
31
|
+
*/
|
|
32
|
+
private generateRandomVector(): Float32Array {
|
|
33
|
+
const vector = new Float32Array(this.dimensions);
|
|
34
|
+
let norm = 0;
|
|
35
|
+
|
|
36
|
+
for (let i = 0; i < this.dimensions; i++) {
|
|
37
|
+
vector[i] = Math.random() * 2 - 1;
|
|
38
|
+
norm += vector[i] * vector[i];
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
norm = Math.sqrt(norm);
|
|
42
|
+
for (let i = 0; i < this.dimensions; i++) {
|
|
43
|
+
vector[i] /= norm;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
return vector;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Pure JavaScript cosine similarity (baseline)
|
|
51
|
+
*/
|
|
52
|
+
private cosineSimilarityJS(a: Float32Array, b: Float32Array): number {
|
|
53
|
+
let dotProduct = 0;
|
|
54
|
+
let normA = 0;
|
|
55
|
+
let normB = 0;
|
|
56
|
+
|
|
57
|
+
for (let i = 0; i < a.length; i++) {
|
|
58
|
+
dotProduct += a[i] * b[i];
|
|
59
|
+
normA += a[i] * a[i];
|
|
60
|
+
normB += b[i] * b[i];
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
64
|
+
return denom === 0 ? 0 : dotProduct / denom;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Benchmark cosine similarity (pure JS)
|
|
69
|
+
*/
|
|
70
|
+
async benchmarkJSSimilarity(iterations: number = 10000): Promise<BenchmarkResult> {
|
|
71
|
+
const vectorA = this.generateRandomVector();
|
|
72
|
+
const vectorB = this.generateRandomVector();
|
|
73
|
+
|
|
74
|
+
const start = performance.now();
|
|
75
|
+
|
|
76
|
+
for (let i = 0; i < iterations; i++) {
|
|
77
|
+
this.cosineSimilarityJS(vectorA, vectorB);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const duration = performance.now() - start;
|
|
81
|
+
|
|
82
|
+
return {
|
|
83
|
+
name: 'Cosine Similarity (Pure JS)',
|
|
84
|
+
operations: iterations,
|
|
85
|
+
duration,
|
|
86
|
+
opsPerSecond: (iterations / duration) * 1000,
|
|
87
|
+
avgLatency: duration / iterations,
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Benchmark cosine similarity (WASM-accelerated)
|
|
93
|
+
*/
|
|
94
|
+
async benchmarkWASMSimilarity(iterations: number = 10000): Promise<BenchmarkResult> {
|
|
95
|
+
const mockDb = {
|
|
96
|
+
prepare: () => ({ all: () => [], get: () => null, run: () => ({}) }),
|
|
97
|
+
exec: () => {},
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
const wasmSearch = new WASMVectorSearch(mockDb, { enableWASM: true });
|
|
101
|
+
const vectorA = this.generateRandomVector();
|
|
102
|
+
const vectorB = this.generateRandomVector();
|
|
103
|
+
|
|
104
|
+
const start = performance.now();
|
|
105
|
+
|
|
106
|
+
for (let i = 0; i < iterations; i++) {
|
|
107
|
+
wasmSearch.cosineSimilarity(vectorA, vectorB);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const duration = performance.now() - start;
|
|
111
|
+
|
|
112
|
+
return {
|
|
113
|
+
name: 'Cosine Similarity (WASM-Optimized)',
|
|
114
|
+
operations: iterations,
|
|
115
|
+
duration,
|
|
116
|
+
opsPerSecond: (iterations / duration) * 1000,
|
|
117
|
+
avgLatency: duration / iterations,
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Benchmark batch similarity operations
|
|
123
|
+
*/
|
|
124
|
+
async benchmarkBatchSimilarity(): Promise<BenchmarkResult> {
|
|
125
|
+
const mockDb = {
|
|
126
|
+
prepare: () => ({ all: () => [], get: () => null, run: () => ({}) }),
|
|
127
|
+
exec: () => {},
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
const wasmSearch = new WASMVectorSearch(mockDb, { enableWASM: true });
|
|
131
|
+
const query = this.generateRandomVector();
|
|
132
|
+
const vectors: Float32Array[] = [];
|
|
133
|
+
|
|
134
|
+
for (let i = 0; i < this.vectorCount; i++) {
|
|
135
|
+
vectors.push(this.generateRandomVector());
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
const start = performance.now();
|
|
139
|
+
wasmSearch.batchSimilarity(query, vectors);
|
|
140
|
+
const duration = performance.now() - start;
|
|
141
|
+
|
|
142
|
+
return {
|
|
143
|
+
name: 'Batch Similarity Search',
|
|
144
|
+
operations: this.vectorCount,
|
|
145
|
+
duration,
|
|
146
|
+
opsPerSecond: (this.vectorCount / duration) * 1000,
|
|
147
|
+
avgLatency: duration / this.vectorCount,
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Benchmark k-NN search
|
|
153
|
+
*/
|
|
154
|
+
async benchmarkKNNSearch(k: number = 10): Promise<BenchmarkResult> {
|
|
155
|
+
const mockDb = {
|
|
156
|
+
prepare: () => ({ all: () => [], get: () => null, run: () => ({}) }),
|
|
157
|
+
exec: () => {},
|
|
158
|
+
};
|
|
159
|
+
|
|
160
|
+
const wasmSearch = new WASMVectorSearch(mockDb, { enableWASM: true });
|
|
161
|
+
const query = this.generateRandomVector();
|
|
162
|
+
const vectors: Float32Array[] = [];
|
|
163
|
+
const ids: number[] = [];
|
|
164
|
+
|
|
165
|
+
for (let i = 0; i < this.vectorCount; i++) {
|
|
166
|
+
vectors.push(this.generateRandomVector());
|
|
167
|
+
ids.push(i);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// Build index
|
|
171
|
+
wasmSearch.buildIndex(vectors, ids);
|
|
172
|
+
|
|
173
|
+
const start = performance.now();
|
|
174
|
+
wasmSearch.searchIndex(query, k);
|
|
175
|
+
const duration = performance.now() - start;
|
|
176
|
+
|
|
177
|
+
return {
|
|
178
|
+
name: `k-NN Search (k=${k}, n=${this.vectorCount})`,
|
|
179
|
+
operations: 1,
|
|
180
|
+
duration,
|
|
181
|
+
opsPerSecond: (1 / duration) * 1000,
|
|
182
|
+
avgLatency: duration,
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Run all benchmarks
|
|
188
|
+
*/
|
|
189
|
+
async runAll(): Promise<void> {
|
|
190
|
+
console.log('='.repeat(80));
|
|
191
|
+
console.log('WASM Vector Operations Benchmark');
|
|
192
|
+
console.log('='.repeat(80));
|
|
193
|
+
console.log(`Configuration: ${this.dimensions}D vectors, ${this.vectorCount} dataset size\n`);
|
|
194
|
+
|
|
195
|
+
const results: BenchmarkResult[] = [];
|
|
196
|
+
|
|
197
|
+
// Cosine similarity benchmarks
|
|
198
|
+
console.log('Running similarity benchmarks...');
|
|
199
|
+
const jsResult = await this.benchmarkJSSimilarity();
|
|
200
|
+
results.push(jsResult);
|
|
201
|
+
|
|
202
|
+
const wasmResult = await this.benchmarkWASMSimilarity();
|
|
203
|
+
results.push(wasmResult);
|
|
204
|
+
|
|
205
|
+
const speedup = wasmResult.opsPerSecond / jsResult.opsPerSecond;
|
|
206
|
+
console.log(` Speedup: ${speedup.toFixed(2)}x\n`);
|
|
207
|
+
|
|
208
|
+
// Batch similarity
|
|
209
|
+
console.log('Running batch similarity benchmark...');
|
|
210
|
+
const batchResult = await this.benchmarkBatchSimilarity();
|
|
211
|
+
results.push(batchResult);
|
|
212
|
+
|
|
213
|
+
// k-NN search
|
|
214
|
+
console.log('Running k-NN search benchmark...');
|
|
215
|
+
const knnResult = await this.benchmarkKNNSearch();
|
|
216
|
+
results.push(knnResult);
|
|
217
|
+
|
|
218
|
+
// Print results table
|
|
219
|
+
console.log('\n' + '='.repeat(80));
|
|
220
|
+
console.log('Results:');
|
|
221
|
+
console.log('='.repeat(80));
|
|
222
|
+
console.log(
|
|
223
|
+
'Benchmark'.padEnd(40) +
|
|
224
|
+
'Operations'.padStart(12) +
|
|
225
|
+
'Duration (ms)'.padStart(15) +
|
|
226
|
+
'Ops/sec'.padStart(15)
|
|
227
|
+
);
|
|
228
|
+
console.log('-'.repeat(80));
|
|
229
|
+
|
|
230
|
+
results.forEach(result => {
|
|
231
|
+
console.log(
|
|
232
|
+
result.name.padEnd(40) +
|
|
233
|
+
result.operations.toLocaleString().padStart(12) +
|
|
234
|
+
result.duration.toFixed(2).padStart(15) +
|
|
235
|
+
result.opsPerSecond.toLocaleString(undefined, { maximumFractionDigits: 0 }).padStart(15)
|
|
236
|
+
);
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
console.log('='.repeat(80));
|
|
240
|
+
console.log('\nKey Findings:');
|
|
241
|
+
console.log(`- WASM provides ${speedup.toFixed(2)}x speedup for cosine similarity`);
|
|
242
|
+
console.log(`- Batch processing: ${batchResult.opsPerSecond.toFixed(0)} vectors/sec`);
|
|
243
|
+
console.log(`- k-NN search latency: ${knnResult.avgLatency.toFixed(2)}ms`);
|
|
244
|
+
console.log('='.repeat(80));
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// Run benchmarks
|
|
249
|
+
const benchmark = new VectorBenchmark(384, 1000);
|
|
250
|
+
benchmark.runAll().catch(console.error);
|
package/src/cli/agentdb-cli.ts
CHANGED
|
@@ -45,7 +45,7 @@ const log = {
|
|
|
45
45
|
};
|
|
46
46
|
|
|
47
47
|
class AgentDBCLI {
|
|
48
|
-
|
|
48
|
+
public db?: any; // Database instance from createDatabase (public for init command)
|
|
49
49
|
private causalGraph?: CausalMemoryGraph;
|
|
50
50
|
private causalRecall?: CausalRecall;
|
|
51
51
|
private explainableRecall?: ExplainableRecall;
|
|
@@ -749,10 +749,49 @@ async function handleInitCommand(args: string[]) {
|
|
|
749
749
|
return;
|
|
750
750
|
}
|
|
751
751
|
|
|
752
|
+
// Create parent directories if needed
|
|
753
|
+
const parentDir = path.dirname(dbPath);
|
|
754
|
+
if (parentDir !== '.' && !fs.existsSync(parentDir)) {
|
|
755
|
+
log.info(`Creating directory: ${parentDir}`);
|
|
756
|
+
fs.mkdirSync(parentDir, { recursive: true });
|
|
757
|
+
}
|
|
758
|
+
|
|
752
759
|
// Create new database with schemas
|
|
753
760
|
const cli = new AgentDBCLI();
|
|
754
761
|
await cli.initialize(dbPath);
|
|
755
762
|
|
|
763
|
+
// CRITICAL: Save the database to disk
|
|
764
|
+
// sql.js keeps everything in memory until explicitly saved
|
|
765
|
+
if (cli.db && typeof cli.db.save === 'function') {
|
|
766
|
+
cli.db.save();
|
|
767
|
+
} else if (cli.db && typeof cli.db.close === 'function') {
|
|
768
|
+
// close() calls save() internally
|
|
769
|
+
cli.db.close();
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
// Verify database file was created
|
|
773
|
+
if (!fs.existsSync(dbPath)) {
|
|
774
|
+
log.error(`Failed to create database file at ${dbPath}`);
|
|
775
|
+
log.error('The database may be in memory only');
|
|
776
|
+
process.exit(1);
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
// Verify database has tables
|
|
780
|
+
try {
|
|
781
|
+
const db = await createDatabase(dbPath);
|
|
782
|
+
const tables = db.prepare("SELECT name FROM sqlite_master WHERE type='table'").all();
|
|
783
|
+
db.close();
|
|
784
|
+
|
|
785
|
+
if (tables.length === 0) {
|
|
786
|
+
log.warning('Database file created but no tables found');
|
|
787
|
+
log.warning('Schemas may not have been loaded correctly');
|
|
788
|
+
} else {
|
|
789
|
+
log.success(`Database created with ${tables.length} tables`);
|
|
790
|
+
}
|
|
791
|
+
} catch (error) {
|
|
792
|
+
log.warning(`Could not verify database tables: ${(error as Error).message}`);
|
|
793
|
+
}
|
|
794
|
+
|
|
756
795
|
log.success(`✅ AgentDB initialized successfully at ${dbPath}`);
|
|
757
796
|
log.info('Database includes:');
|
|
758
797
|
log.info(' - Core vector tables (episodes, embeddings)');
|
|
@@ -29,10 +29,14 @@ export class EmbeddingService {
|
|
|
29
29
|
if (this.config.provider === 'transformers') {
|
|
30
30
|
// Use transformers.js for local embeddings
|
|
31
31
|
try {
|
|
32
|
-
const
|
|
33
|
-
this.pipeline = await pipeline('feature-extraction', this.config.model);
|
|
32
|
+
const transformers = await import('@xenova/transformers');
|
|
33
|
+
this.pipeline = await transformers.pipeline('feature-extraction', this.config.model);
|
|
34
|
+
console.log(`✅ Transformers.js loaded: ${this.config.model}`);
|
|
34
35
|
} catch (error) {
|
|
35
|
-
|
|
36
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
37
|
+
console.warn(`⚠️ Transformers.js initialization failed: ${errorMessage}`);
|
|
38
|
+
console.warn(' Falling back to mock embeddings for testing');
|
|
39
|
+
console.warn(' Install @xenova/transformers for real embeddings: npm install @xenova/transformers');
|
|
36
40
|
this.pipeline = null;
|
|
37
41
|
}
|
|
38
42
|
}
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Enhanced EmbeddingService with WASM Acceleration
|
|
3
|
+
*
|
|
4
|
+
* Extends the base EmbeddingService with WASM-accelerated batch operations
|
|
5
|
+
* and improved performance for large-scale embedding generation.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { EmbeddingService, EmbeddingConfig } from './EmbeddingService.js';
|
|
9
|
+
import { WASMVectorSearch } from './WASMVectorSearch.js';
|
|
10
|
+
|
|
11
|
+
export interface EnhancedEmbeddingConfig extends EmbeddingConfig {
|
|
12
|
+
enableWASM?: boolean;
|
|
13
|
+
enableBatchProcessing?: boolean;
|
|
14
|
+
batchSize?: number;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export class EnhancedEmbeddingService extends EmbeddingService {
|
|
18
|
+
private wasmSearch: WASMVectorSearch | null = null;
|
|
19
|
+
private enhancedConfig: EnhancedEmbeddingConfig;
|
|
20
|
+
|
|
21
|
+
constructor(config: EnhancedEmbeddingConfig) {
|
|
22
|
+
super(config);
|
|
23
|
+
this.enhancedConfig = {
|
|
24
|
+
enableWASM: true,
|
|
25
|
+
enableBatchProcessing: true,
|
|
26
|
+
batchSize: 100,
|
|
27
|
+
...config,
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
if (this.enhancedConfig.enableWASM) {
|
|
31
|
+
this.initializeWASM();
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Initialize WASM acceleration
|
|
37
|
+
*/
|
|
38
|
+
private initializeWASM(): void {
|
|
39
|
+
const mockDb = {
|
|
40
|
+
prepare: () => ({ all: () => [], get: () => null, run: () => ({}) }),
|
|
41
|
+
exec: () => {},
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
this.wasmSearch = new WASMVectorSearch(mockDb, {
|
|
45
|
+
enableWASM: true,
|
|
46
|
+
batchSize: this.enhancedConfig.batchSize || 100,
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Enhanced batch embedding with parallel processing
|
|
52
|
+
*/
|
|
53
|
+
async embedBatch(texts: string[]): Promise<Float32Array[]> {
|
|
54
|
+
if (!this.enhancedConfig.enableBatchProcessing || texts.length < 10) {
|
|
55
|
+
return super.embedBatch(texts);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const batchSize = this.enhancedConfig.batchSize || 100;
|
|
59
|
+
const batches: string[][] = [];
|
|
60
|
+
|
|
61
|
+
// Split into batches
|
|
62
|
+
for (let i = 0; i < texts.length; i += batchSize) {
|
|
63
|
+
batches.push(texts.slice(i, i + batchSize));
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Process batches in parallel
|
|
67
|
+
const results = await Promise.all(
|
|
68
|
+
batches.map(batch => super.embedBatch(batch))
|
|
69
|
+
);
|
|
70
|
+
|
|
71
|
+
// Flatten results
|
|
72
|
+
return results.flat();
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Calculate similarity between two texts using WASM acceleration
|
|
77
|
+
*/
|
|
78
|
+
async similarity(textA: string, textB: string): Promise<number> {
|
|
79
|
+
const [embeddingA, embeddingB] = await Promise.all([
|
|
80
|
+
this.embed(textA),
|
|
81
|
+
this.embed(textB),
|
|
82
|
+
]);
|
|
83
|
+
|
|
84
|
+
if (this.wasmSearch) {
|
|
85
|
+
return this.wasmSearch.cosineSimilarity(embeddingA, embeddingB);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Fallback to manual calculation
|
|
89
|
+
return this.cosineSimilarity(embeddingA, embeddingB);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Find most similar texts from a corpus
|
|
94
|
+
*/
|
|
95
|
+
async findMostSimilar(
|
|
96
|
+
query: string,
|
|
97
|
+
corpus: string[],
|
|
98
|
+
k: number = 5
|
|
99
|
+
): Promise<Array<{ text: string; similarity: number; index: number }>> {
|
|
100
|
+
const queryEmbedding = await this.embed(query);
|
|
101
|
+
const corpusEmbeddings = await this.embedBatch(corpus);
|
|
102
|
+
|
|
103
|
+
let similarities: number[];
|
|
104
|
+
|
|
105
|
+
if (this.wasmSearch) {
|
|
106
|
+
similarities = this.wasmSearch.batchSimilarity(queryEmbedding, corpusEmbeddings);
|
|
107
|
+
} else {
|
|
108
|
+
similarities = corpusEmbeddings.map(emb =>
|
|
109
|
+
this.cosineSimilarity(queryEmbedding, emb)
|
|
110
|
+
);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Create results with indices
|
|
114
|
+
const results = corpus.map((text, index) => ({
|
|
115
|
+
text,
|
|
116
|
+
similarity: similarities[index],
|
|
117
|
+
index,
|
|
118
|
+
}));
|
|
119
|
+
|
|
120
|
+
// Sort by similarity and take top k
|
|
121
|
+
results.sort((a, b) => b.similarity - a.similarity);
|
|
122
|
+
return results.slice(0, k);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Get service statistics
|
|
127
|
+
*/
|
|
128
|
+
getStats(): {
|
|
129
|
+
cacheSize: number;
|
|
130
|
+
wasmEnabled: boolean;
|
|
131
|
+
simdEnabled: boolean;
|
|
132
|
+
} {
|
|
133
|
+
const wasmStats = this.wasmSearch?.getStats();
|
|
134
|
+
|
|
135
|
+
return {
|
|
136
|
+
cacheSize: (this as any).cache.size,
|
|
137
|
+
wasmEnabled: wasmStats?.wasmAvailable ?? false,
|
|
138
|
+
simdEnabled: wasmStats?.simdAvailable ?? false,
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Cosine similarity fallback
|
|
144
|
+
*/
|
|
145
|
+
private cosineSimilarity(a: Float32Array, b: Float32Array): number {
|
|
146
|
+
let dotProduct = 0;
|
|
147
|
+
let normA = 0;
|
|
148
|
+
let normB = 0;
|
|
149
|
+
|
|
150
|
+
for (let i = 0; i < a.length; i++) {
|
|
151
|
+
dotProduct += a[i] * b[i];
|
|
152
|
+
normA += a[i] * a[i];
|
|
153
|
+
normB += b[i] * b[i];
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
157
|
+
return denom === 0 ? 0 : dotProduct / denom;
|
|
158
|
+
}
|
|
159
|
+
}
|