@soulcraft/brainy 3.32.2 → 3.35.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +175 -0
- package/dist/augmentations/typeMatching/brainyTypes.d.ts +5 -1
- package/dist/augmentations/typeMatching/brainyTypes.js +14 -7
- package/dist/brainy.d.ts +31 -0
- package/dist/brainy.js +119 -34
- package/dist/hnsw/hnswIndex.d.ts +24 -0
- package/dist/hnsw/hnswIndex.js +137 -0
- package/dist/hnsw/hnswIndexOptimized.d.ts +2 -13
- package/dist/hnsw/hnswIndexOptimized.js +8 -37
- package/dist/importers/SmartExcelImporter.js +12 -0
- package/dist/interfaces/IIndex.d.ts +186 -0
- package/dist/interfaces/IIndex.js +15 -0
- package/dist/neural/embeddedTypeEmbeddings.d.ts +34 -0
- package/dist/neural/embeddedTypeEmbeddings.js +96 -0
- package/dist/neural/entityExtractor.d.ts +2 -0
- package/dist/neural/entityExtractor.js +21 -42
- package/dist/neural/naturalLanguageProcessor.d.ts +2 -1
- package/dist/neural/naturalLanguageProcessor.js +17 -31
- package/dist/storage/adapters/baseStorageAdapter.d.ts +54 -0
- package/dist/storage/adapters/baseStorageAdapter.js +105 -10
- package/dist/storage/adapters/fileSystemStorage.d.ts +32 -0
- package/dist/storage/adapters/fileSystemStorage.js +66 -0
- package/dist/storage/adapters/gcsStorage.d.ts +45 -0
- package/dist/storage/adapters/gcsStorage.js +122 -4
- package/dist/storage/adapters/memoryStorage.d.ts +32 -0
- package/dist/storage/adapters/memoryStorage.js +43 -0
- package/dist/storage/adapters/opfsStorage.d.ts +36 -0
- package/dist/storage/adapters/opfsStorage.js +101 -0
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +45 -0
- package/dist/storage/adapters/s3CompatibleStorage.js +123 -0
- package/package.json +5 -2
package/dist/hnsw/hnswIndex.js
CHANGED
|
@@ -20,12 +20,14 @@ export class HNSWIndex {
|
|
|
20
20
|
this.MAX_TRACKED_LEVELS = 10; // Only track top levels for memory efficiency
|
|
21
21
|
this.dimension = null;
|
|
22
22
|
this.useParallelization = true; // Whether to use parallelization for performance-critical operations
|
|
23
|
+
this.storage = null; // Storage adapter for HNSW persistence (v3.35.0+)
|
|
23
24
|
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
24
25
|
this.distanceFunction = distanceFunction;
|
|
25
26
|
this.useParallelization =
|
|
26
27
|
options.useParallelization !== undefined
|
|
27
28
|
? options.useParallelization
|
|
28
29
|
: true;
|
|
30
|
+
this.storage = options.storage || null;
|
|
29
31
|
}
|
|
30
32
|
/**
|
|
31
33
|
* Set whether to use parallelization for performance-critical operations
|
|
@@ -182,6 +184,19 @@ export class HNSWIndex {
|
|
|
182
184
|
if (neighbor.connections.get(level).size > this.config.M) {
|
|
183
185
|
this.pruneConnections(neighbor, level);
|
|
184
186
|
}
|
|
187
|
+
// Persist updated neighbor HNSW data (v3.35.0+)
|
|
188
|
+
if (this.storage) {
|
|
189
|
+
const neighborConnectionsObj = {};
|
|
190
|
+
for (const [lvl, nounIds] of neighbor.connections.entries()) {
|
|
191
|
+
neighborConnectionsObj[lvl.toString()] = Array.from(nounIds);
|
|
192
|
+
}
|
|
193
|
+
this.storage.saveHNSWData(neighborId, {
|
|
194
|
+
level: neighbor.level,
|
|
195
|
+
connections: neighborConnectionsObj
|
|
196
|
+
}).catch((error) => {
|
|
197
|
+
console.error(`Failed to persist neighbor HNSW data for ${neighborId}:`, error);
|
|
198
|
+
});
|
|
199
|
+
}
|
|
185
200
|
}
|
|
186
201
|
// Update entry point for the next level
|
|
187
202
|
if (nearestNouns.size > 0) {
|
|
@@ -213,6 +228,27 @@ export class HNSWIndex {
|
|
|
213
228
|
}
|
|
214
229
|
this.highLevelNodes.get(nounLevel).add(id);
|
|
215
230
|
}
|
|
231
|
+
// Persist HNSW graph data to storage (v3.35.0+)
|
|
232
|
+
if (this.storage) {
|
|
233
|
+
// Convert connections Map to serializable format
|
|
234
|
+
const connectionsObj = {};
|
|
235
|
+
for (const [level, nounIds] of noun.connections.entries()) {
|
|
236
|
+
connectionsObj[level.toString()] = Array.from(nounIds);
|
|
237
|
+
}
|
|
238
|
+
await this.storage.saveHNSWData(id, {
|
|
239
|
+
level: nounLevel,
|
|
240
|
+
connections: connectionsObj
|
|
241
|
+
}).catch((error) => {
|
|
242
|
+
console.error(`Failed to persist HNSW data for ${id}:`, error);
|
|
243
|
+
});
|
|
244
|
+
// Persist system data (entry point and max level)
|
|
245
|
+
await this.storage.saveHNSWSystem({
|
|
246
|
+
entryPointId: this.entryPointId,
|
|
247
|
+
maxLevel: this.maxLevel
|
|
248
|
+
}).catch((error) => {
|
|
249
|
+
console.error('Failed to persist HNSW system data:', error);
|
|
250
|
+
});
|
|
251
|
+
}
|
|
216
252
|
return id;
|
|
217
253
|
}
|
|
218
254
|
/**
|
|
@@ -451,6 +487,107 @@ export class HNSWIndex {
|
|
|
451
487
|
}
|
|
452
488
|
return nodesAtLevel;
|
|
453
489
|
}
|
|
490
|
+
/**
|
|
491
|
+
* Rebuild HNSW index from persisted graph data (v3.35.0+)
|
|
492
|
+
*
|
|
493
|
+
* This is a production-grade O(N) rebuild that restores the pre-computed graph structure
|
|
494
|
+
* from storage. Much faster than re-building which is O(N log N).
|
|
495
|
+
*
|
|
496
|
+
* Designed for millions of entities with:
|
|
497
|
+
* - Cursor-based pagination (no memory overflow)
|
|
498
|
+
* - Batch processing (configurable batch size)
|
|
499
|
+
* - Progress reporting (optional callback)
|
|
500
|
+
* - Error recovery (continues on partial failures)
|
|
501
|
+
* - Lazy mode support (memory-efficient for constrained environments)
|
|
502
|
+
*
|
|
503
|
+
* @param options Rebuild options
|
|
504
|
+
* @returns Promise that resolves when rebuild is complete
|
|
505
|
+
*/
|
|
506
|
+
async rebuild(options = {}) {
|
|
507
|
+
if (!this.storage) {
|
|
508
|
+
console.warn('HNSW rebuild skipped: no storage adapter configured');
|
|
509
|
+
return;
|
|
510
|
+
}
|
|
511
|
+
const batchSize = options.batchSize || 1000;
|
|
512
|
+
const lazy = options.lazy || false;
|
|
513
|
+
try {
|
|
514
|
+
// Step 1: Clear existing in-memory index
|
|
515
|
+
this.clear();
|
|
516
|
+
// Step 2: Load system data (entry point, max level)
|
|
517
|
+
const systemData = await this.storage.getHNSWSystem();
|
|
518
|
+
if (systemData) {
|
|
519
|
+
this.entryPointId = systemData.entryPointId;
|
|
520
|
+
this.maxLevel = systemData.maxLevel;
|
|
521
|
+
}
|
|
522
|
+
// Step 3: Paginate through all nouns and restore HNSW graph structure
|
|
523
|
+
let loadedCount = 0;
|
|
524
|
+
let totalCount = undefined;
|
|
525
|
+
let hasMore = true;
|
|
526
|
+
let cursor = undefined;
|
|
527
|
+
while (hasMore) {
|
|
528
|
+
// Fetch batch of nouns from storage (cast needed as method is not in base interface)
|
|
529
|
+
const result = await this.storage.getNounsWithPagination({
|
|
530
|
+
limit: batchSize,
|
|
531
|
+
cursor
|
|
532
|
+
});
|
|
533
|
+
// Set total count on first batch
|
|
534
|
+
if (totalCount === undefined && result.totalCount !== undefined) {
|
|
535
|
+
totalCount = result.totalCount;
|
|
536
|
+
}
|
|
537
|
+
// Process each noun in the batch
|
|
538
|
+
for (const nounData of result.items) {
|
|
539
|
+
try {
|
|
540
|
+
// Load HNSW graph data for this entity
|
|
541
|
+
const hnswData = await this.storage.getHNSWData(nounData.id);
|
|
542
|
+
if (!hnswData) {
|
|
543
|
+
// No HNSW data - skip (might be entity added before persistence)
|
|
544
|
+
continue;
|
|
545
|
+
}
|
|
546
|
+
// Create noun object with restored connections
|
|
547
|
+
const noun = {
|
|
548
|
+
id: nounData.id,
|
|
549
|
+
vector: lazy ? [] : nounData.vector, // Empty vector in lazy mode
|
|
550
|
+
connections: new Map(),
|
|
551
|
+
level: hnswData.level
|
|
552
|
+
};
|
|
553
|
+
// Restore connections from persisted data
|
|
554
|
+
for (const [levelStr, nounIds] of Object.entries(hnswData.connections)) {
|
|
555
|
+
const level = parseInt(levelStr, 10);
|
|
556
|
+
noun.connections.set(level, new Set(nounIds));
|
|
557
|
+
}
|
|
558
|
+
// Add to in-memory index
|
|
559
|
+
this.nouns.set(nounData.id, noun);
|
|
560
|
+
// Track high-level nodes for O(1) entry point selection
|
|
561
|
+
if (noun.level >= 2 && noun.level <= this.MAX_TRACKED_LEVELS) {
|
|
562
|
+
if (!this.highLevelNodes.has(noun.level)) {
|
|
563
|
+
this.highLevelNodes.set(noun.level, new Set());
|
|
564
|
+
}
|
|
565
|
+
this.highLevelNodes.get(noun.level).add(nounData.id);
|
|
566
|
+
}
|
|
567
|
+
loadedCount++;
|
|
568
|
+
}
|
|
569
|
+
catch (error) {
|
|
570
|
+
// Log error but continue (robust error recovery)
|
|
571
|
+
console.error(`Failed to rebuild HNSW data for ${nounData.id}:`, error);
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
// Report progress
|
|
575
|
+
if (options.onProgress && totalCount !== undefined) {
|
|
576
|
+
options.onProgress(loadedCount, totalCount);
|
|
577
|
+
}
|
|
578
|
+
// Check for more data
|
|
579
|
+
hasMore = result.hasMore;
|
|
580
|
+
cursor = result.nextCursor;
|
|
581
|
+
}
|
|
582
|
+
console.log(`HNSW index rebuilt successfully: ${loadedCount} entities, ` +
|
|
583
|
+
`${this.maxLevel + 1} levels, entry point: ${this.entryPointId || 'none'}` +
|
|
584
|
+
(lazy ? ' (lazy mode - vectors loaded on-demand)' : ''));
|
|
585
|
+
}
|
|
586
|
+
catch (error) {
|
|
587
|
+
console.error('HNSW rebuild failed:', error);
|
|
588
|
+
throw new Error(`Failed to rebuild HNSW index: ${error}`);
|
|
589
|
+
}
|
|
590
|
+
}
|
|
454
591
|
/**
|
|
455
592
|
* Get level statistics for understanding the hierarchy
|
|
456
593
|
*/
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
*/
|
|
6
6
|
import { DistanceFunction, HNSWConfig, Vector, VectorDocument } from '../coreTypes.js';
|
|
7
7
|
import { HNSWIndex } from './hnswIndex.js';
|
|
8
|
-
import {
|
|
8
|
+
import type { BaseStorage } from '../storage/baseStorage.js';
|
|
9
9
|
export interface HNSWOptimizedConfig extends HNSWConfig {
|
|
10
10
|
memoryThreshold?: number;
|
|
11
11
|
productQuantization?: {
|
|
@@ -88,7 +88,6 @@ declare class ProductQuantizer {
|
|
|
88
88
|
export declare class HNSWIndexOptimized extends HNSWIndex {
|
|
89
89
|
private optimizedConfig;
|
|
90
90
|
private productQuantizer;
|
|
91
|
-
private storage;
|
|
92
91
|
private useDiskBasedIndex;
|
|
93
92
|
private useProductQuantization;
|
|
94
93
|
private quantizedVectors;
|
|
@@ -96,7 +95,7 @@ export declare class HNSWIndexOptimized extends HNSWIndex {
|
|
|
96
95
|
private vectorCount;
|
|
97
96
|
private memoryUpdateLock;
|
|
98
97
|
private unifiedCache;
|
|
99
|
-
constructor(config: Partial<HNSWOptimizedConfig>, distanceFunction: DistanceFunction, storage?:
|
|
98
|
+
constructor(config: Partial<HNSWOptimizedConfig>, distanceFunction: DistanceFunction, storage?: BaseStorage | null);
|
|
100
99
|
/**
|
|
101
100
|
* Thread-safe method to update memory usage
|
|
102
101
|
* @param memoryDelta Change in memory usage (can be negative)
|
|
@@ -145,16 +144,6 @@ export declare class HNSWIndexOptimized extends HNSWIndex {
|
|
|
145
144
|
* @returns Estimated memory usage in bytes
|
|
146
145
|
*/
|
|
147
146
|
getMemoryUsage(): number;
|
|
148
|
-
/**
|
|
149
|
-
* Set the storage adapter
|
|
150
|
-
* @param storage Storage adapter
|
|
151
|
-
*/
|
|
152
|
-
setStorage(storage: StorageAdapter): void;
|
|
153
|
-
/**
|
|
154
|
-
* Get the storage adapter
|
|
155
|
-
* @returns Storage adapter or null if not set
|
|
156
|
-
*/
|
|
157
|
-
getStorage(): StorageAdapter | null;
|
|
158
147
|
/**
|
|
159
148
|
* Set whether to use disk-based index
|
|
160
149
|
* @param useDiskBasedIndex Whether to use disk-based index
|
|
@@ -211,10 +211,9 @@ class ProductQuantizer {
|
|
|
211
211
|
*/
|
|
212
212
|
export class HNSWIndexOptimized extends HNSWIndex {
|
|
213
213
|
constructor(config = {}, distanceFunction, storage = null) {
|
|
214
|
-
// Initialize base HNSW index with standard config
|
|
215
|
-
super(config, distanceFunction);
|
|
214
|
+
// Initialize base HNSW index with standard config and storage
|
|
215
|
+
super(config, distanceFunction, { storage: storage || undefined });
|
|
216
216
|
this.productQuantizer = null;
|
|
217
|
-
this.storage = null;
|
|
218
217
|
this.useDiskBasedIndex = false;
|
|
219
218
|
this.useProductQuantization = false;
|
|
220
219
|
this.quantizedVectors = new Map();
|
|
@@ -224,8 +223,6 @@ export class HNSWIndexOptimized extends HNSWIndex {
|
|
|
224
223
|
this.memoryUpdateLock = Promise.resolve();
|
|
225
224
|
// Set optimized config
|
|
226
225
|
this.optimizedConfig = { ...DEFAULT_OPTIMIZED_CONFIG, ...config };
|
|
227
|
-
// Set storage adapter
|
|
228
|
-
this.storage = storage;
|
|
229
226
|
// Initialize product quantizer if enabled
|
|
230
227
|
if (this.optimizedConfig.productQuantization?.enabled) {
|
|
231
228
|
this.useProductQuantization = true;
|
|
@@ -302,18 +299,9 @@ export class HNSWIndexOptimized extends HNSWIndex {
|
|
|
302
299
|
return await super.addItem({ id, vector: reconstructedVector });
|
|
303
300
|
}
|
|
304
301
|
// If disk-based index is active and storage is available, store the vector
|
|
305
|
-
if (this.useDiskBasedIndex
|
|
306
|
-
//
|
|
307
|
-
|
|
308
|
-
id,
|
|
309
|
-
vector,
|
|
310
|
-
connections: new Map(),
|
|
311
|
-
level: 0
|
|
312
|
-
};
|
|
313
|
-
// Store the noun
|
|
314
|
-
this.storage.saveNoun(noun).catch((error) => {
|
|
315
|
-
console.error(`Failed to save noun ${id} to storage:`, error);
|
|
316
|
-
});
|
|
302
|
+
if (this.useDiskBasedIndex) {
|
|
303
|
+
// Storage is handled by the base class now via HNSW persistence
|
|
304
|
+
// No additional storage needed here
|
|
317
305
|
}
|
|
318
306
|
// Add the vector to the in-memory index
|
|
319
307
|
return await super.addItem(item);
|
|
@@ -349,12 +337,8 @@ export class HNSWIndexOptimized extends HNSWIndex {
|
|
|
349
337
|
if (this.useProductQuantization) {
|
|
350
338
|
this.quantizedVectors.delete(id);
|
|
351
339
|
}
|
|
352
|
-
// If disk-based index is active
|
|
353
|
-
|
|
354
|
-
this.storage.deleteNoun(id).catch((error) => {
|
|
355
|
-
console.error(`Failed to delete noun ${id} from storage:`, error);
|
|
356
|
-
});
|
|
357
|
-
}
|
|
340
|
+
// If disk-based index is active, removal is handled by base class
|
|
341
|
+
// No additional removal needed here
|
|
358
342
|
// Update memory usage estimate (async operation, but don't block removal)
|
|
359
343
|
this.getMemoryUsageAsync().then((currentMemoryUsage) => {
|
|
360
344
|
if (currentMemoryUsage.vectorCount > 0) {
|
|
@@ -428,20 +412,7 @@ export class HNSWIndexOptimized extends HNSWIndex {
|
|
|
428
412
|
getMemoryUsage() {
|
|
429
413
|
return this.memoryUsage;
|
|
430
414
|
}
|
|
431
|
-
|
|
432
|
-
* Set the storage adapter
|
|
433
|
-
* @param storage Storage adapter
|
|
434
|
-
*/
|
|
435
|
-
setStorage(storage) {
|
|
436
|
-
this.storage = storage;
|
|
437
|
-
}
|
|
438
|
-
/**
|
|
439
|
-
* Get the storage adapter
|
|
440
|
-
* @returns Storage adapter or null if not set
|
|
441
|
-
*/
|
|
442
|
-
getStorage() {
|
|
443
|
-
return this.storage;
|
|
444
|
-
}
|
|
415
|
+
// Storage methods removed - now handled by base class
|
|
445
416
|
/**
|
|
446
417
|
* Set whether to use disk-based index
|
|
447
418
|
* @param useDiskBasedIndex Whether to use disk-based index
|
|
@@ -37,6 +37,18 @@ export class SmartExcelImporter {
|
|
|
37
37
|
const opts = {
|
|
38
38
|
enableNeuralExtraction: true,
|
|
39
39
|
enableRelationshipInference: true,
|
|
40
|
+
// CONCEPT EXTRACTION PRODUCTION-READY (v3.33.0+):
|
|
41
|
+
// Type embeddings are now pre-computed at build time - zero runtime cost!
|
|
42
|
+
// All 31 noun types + 40 verb types instantly available
|
|
43
|
+
//
|
|
44
|
+
// Performance profile:
|
|
45
|
+
// - Type embeddings: INSTANT (pre-computed at build time, ~100KB in-memory)
|
|
46
|
+
// - Model loading: ~2-5 seconds (one-time, cached after first use)
|
|
47
|
+
// - Per-row extraction: ~50-200ms depending on definition length
|
|
48
|
+
// - 100 rows: ~5-20 seconds total (production ready)
|
|
49
|
+
// - 1000 rows: ~50-200 seconds (disable if needed via enableConceptExtraction: false)
|
|
50
|
+
//
|
|
51
|
+
// Enabled by default for production use.
|
|
40
52
|
enableConceptExtraction: true,
|
|
41
53
|
confidenceThreshold: 0.6,
|
|
42
54
|
termColumn: 'term|name|title|concept',
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unified Index Interface (v3.35.0+)
|
|
3
|
+
*
|
|
4
|
+
* Standardizes index lifecycle across all index types in Brainy.
|
|
5
|
+
* All indexes (HNSW Vector, Graph Adjacency, Metadata Field) implement this interface
|
|
6
|
+
* for consistent rebuild, clear, and stats operations.
|
|
7
|
+
*
|
|
8
|
+
* This enables:
|
|
9
|
+
* - Parallel index rebuilds during initialization
|
|
10
|
+
* - Consistent index management across the system
|
|
11
|
+
* - Easy addition of new index types
|
|
12
|
+
* - Unified monitoring and health checks
|
|
13
|
+
*/
|
|
14
|
+
/**
|
|
15
|
+
* Index statistics returned by getStats()
|
|
16
|
+
*/
|
|
17
|
+
export interface IndexStats {
|
|
18
|
+
/**
|
|
19
|
+
* Total number of items in the index
|
|
20
|
+
*/
|
|
21
|
+
totalItems: number;
|
|
22
|
+
/**
|
|
23
|
+
* Estimated memory usage in bytes (optional)
|
|
24
|
+
*/
|
|
25
|
+
memoryUsage?: number;
|
|
26
|
+
/**
|
|
27
|
+
* Timestamp of last rebuild (optional)
|
|
28
|
+
*/
|
|
29
|
+
lastRebuilt?: number;
|
|
30
|
+
/**
|
|
31
|
+
* Index-specific statistics (optional)
|
|
32
|
+
* - HNSW: { maxLevel, entryPointId, levels, avgDegree }
|
|
33
|
+
* - Graph: { totalRelationships, verbTypes }
|
|
34
|
+
* - Metadata: { totalFields, totalEntries }
|
|
35
|
+
*/
|
|
36
|
+
specifics?: Record<string, any>;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Progress callback for rebuild operations
|
|
40
|
+
* Reports current progress and total count
|
|
41
|
+
*/
|
|
42
|
+
export type RebuildProgressCallback = (loaded: number, total: number) => void;
|
|
43
|
+
/**
|
|
44
|
+
* Rebuild options for index rebuilding
|
|
45
|
+
*/
|
|
46
|
+
export interface RebuildOptions {
|
|
47
|
+
/**
|
|
48
|
+
* Lazy mode: Load structure only, data on-demand
|
|
49
|
+
* Saves memory at cost of first-access latency
|
|
50
|
+
* (HNSW: vectors loaded on-demand, Graph: relationships cached, Metadata: lazy field indexing)
|
|
51
|
+
*/
|
|
52
|
+
lazy?: boolean;
|
|
53
|
+
/**
|
|
54
|
+
* Batch size for pagination during rebuild
|
|
55
|
+
* Default: 1000 (tune based on available memory)
|
|
56
|
+
*/
|
|
57
|
+
batchSize?: number;
|
|
58
|
+
/**
|
|
59
|
+
* Progress callback for monitoring rebuild progress
|
|
60
|
+
* Called periodically with (loaded, total) counts
|
|
61
|
+
*/
|
|
62
|
+
onProgress?: RebuildProgressCallback;
|
|
63
|
+
/**
|
|
64
|
+
* Force rebuild even if index appears populated
|
|
65
|
+
* Useful for repairing corrupted indexes
|
|
66
|
+
*/
|
|
67
|
+
force?: boolean;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Unified Index Interface
|
|
71
|
+
*
|
|
72
|
+
* All indexes in Brainy implement this interface for consistent lifecycle management.
|
|
73
|
+
* This enables parallel rebuilds, unified monitoring, and standardized operations.
|
|
74
|
+
*/
|
|
75
|
+
export interface IIndex {
|
|
76
|
+
/**
|
|
77
|
+
* Rebuild index from persisted storage
|
|
78
|
+
*
|
|
79
|
+
* Called during Brainy initialization when:
|
|
80
|
+
* - Container restarts and in-memory indexes are empty
|
|
81
|
+
* - Storage has persisted data but indexes need rebuilding
|
|
82
|
+
* - Force rebuild is requested
|
|
83
|
+
*
|
|
84
|
+
* Implementation must:
|
|
85
|
+
* - Clear existing in-memory state
|
|
86
|
+
* - Load data from storage using pagination
|
|
87
|
+
* - Restore index structure efficiently (O(N) preferred over O(N log N))
|
|
88
|
+
* - Handle millions of entities via batching
|
|
89
|
+
* - Support lazy loading for memory-constrained environments
|
|
90
|
+
* - Provide progress reporting for large datasets
|
|
91
|
+
* - Recover gracefully from partial failures
|
|
92
|
+
*
|
|
93
|
+
* @param options Rebuild options (lazy mode, batch size, progress callback, force)
|
|
94
|
+
* @returns Promise that resolves when rebuild is complete
|
|
95
|
+
* @throws Error if rebuild fails critically (should log warnings for partial failures)
|
|
96
|
+
*/
|
|
97
|
+
rebuild(options?: RebuildOptions): Promise<void>;
|
|
98
|
+
/**
|
|
99
|
+
* Clear all in-memory index data
|
|
100
|
+
*
|
|
101
|
+
* Called when:
|
|
102
|
+
* - User explicitly calls brain.clear()
|
|
103
|
+
* - System needs to reset without rebuilding
|
|
104
|
+
* - Tests need clean state
|
|
105
|
+
*
|
|
106
|
+
* Implementation must:
|
|
107
|
+
* - Clear all in-memory data structures
|
|
108
|
+
* - Reset counters and statistics
|
|
109
|
+
* - NOT delete persisted storage data
|
|
110
|
+
* - Be idempotent (safe to call multiple times)
|
|
111
|
+
*
|
|
112
|
+
* Note: This is a memory-only operation. To delete persisted data,
|
|
113
|
+
* use storage.clear() instead.
|
|
114
|
+
*/
|
|
115
|
+
clear(): void;
|
|
116
|
+
/**
|
|
117
|
+
* Get current index statistics
|
|
118
|
+
*
|
|
119
|
+
* Returns real-time statistics about the index state:
|
|
120
|
+
* - Total items indexed
|
|
121
|
+
* - Memory usage (if available)
|
|
122
|
+
* - Last rebuild timestamp
|
|
123
|
+
* - Index-specific metrics
|
|
124
|
+
*
|
|
125
|
+
* Used for:
|
|
126
|
+
* - Health monitoring
|
|
127
|
+
* - Determining if rebuild is needed
|
|
128
|
+
* - Performance analysis
|
|
129
|
+
* - Debugging
|
|
130
|
+
*
|
|
131
|
+
* @returns Promise that resolves to index statistics
|
|
132
|
+
*/
|
|
133
|
+
getStats(): Promise<IndexStats>;
|
|
134
|
+
/**
|
|
135
|
+
* Get the current size of the index
|
|
136
|
+
*
|
|
137
|
+
* Fast O(1) operation returning the number of items in the index.
|
|
138
|
+
* Used for quick health checks and deciding rebuild strategy.
|
|
139
|
+
*
|
|
140
|
+
* @returns Number of items in the index
|
|
141
|
+
*/
|
|
142
|
+
size(): number;
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Extended index interface with cache support (optional)
|
|
146
|
+
*
|
|
147
|
+
* Indexes can optionally implement cache integration for:
|
|
148
|
+
* - Hot/warm/cold tier management
|
|
149
|
+
* - Memory-efficient lazy loading
|
|
150
|
+
* - Adaptive caching based on access patterns
|
|
151
|
+
*/
|
|
152
|
+
export interface ICachedIndex extends IIndex {
|
|
153
|
+
/**
|
|
154
|
+
* Set cache for resource management
|
|
155
|
+
*
|
|
156
|
+
* Enables the index to use UnifiedCache for:
|
|
157
|
+
* - Lazy loading of vectors/data
|
|
158
|
+
* - Hot/warm/cold tier management
|
|
159
|
+
* - Memory pressure handling
|
|
160
|
+
*
|
|
161
|
+
* @param cache UnifiedCache instance
|
|
162
|
+
*/
|
|
163
|
+
setCache?(cache: any): void;
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* Extended index interface with persistence support (optional)
|
|
167
|
+
*
|
|
168
|
+
* Indexes can optionally implement explicit persistence:
|
|
169
|
+
* - Manual triggering of data saves
|
|
170
|
+
* - Batch write optimization
|
|
171
|
+
* - Checkpoint creation
|
|
172
|
+
*/
|
|
173
|
+
export interface IPersistentIndex extends IIndex {
|
|
174
|
+
/**
|
|
175
|
+
* Manually persist current index state to storage
|
|
176
|
+
*
|
|
177
|
+
* Most indexes auto-persist during operations (e.g., HNSW persists on addItem).
|
|
178
|
+
* This method allows explicit persistence for:
|
|
179
|
+
* - Checkpointing before risky operations
|
|
180
|
+
* - Forced flush before shutdown
|
|
181
|
+
* - Manual backup creation
|
|
182
|
+
*
|
|
183
|
+
* @returns Promise that resolves when persistence is complete
|
|
184
|
+
*/
|
|
185
|
+
persist?(): Promise<void>;
|
|
186
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unified Index Interface (v3.35.0+)
|
|
3
|
+
*
|
|
4
|
+
* Standardizes index lifecycle across all index types in Brainy.
|
|
5
|
+
* All indexes (HNSW Vector, Graph Adjacency, Metadata Field) implement this interface
|
|
6
|
+
* for consistent rebuild, clear, and stats operations.
|
|
7
|
+
*
|
|
8
|
+
* This enables:
|
|
9
|
+
* - Parallel index rebuilds during initialization
|
|
10
|
+
* - Consistent index management across the system
|
|
11
|
+
* - Easy addition of new index types
|
|
12
|
+
* - Unified monitoring and health checks
|
|
13
|
+
*/
|
|
14
|
+
export {};
|
|
15
|
+
//# sourceMappingURL=IIndex.js.map
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 🧠 BRAINY EMBEDDED TYPE EMBEDDINGS
|
|
3
|
+
*
|
|
4
|
+
* AUTO-GENERATED - DO NOT EDIT
|
|
5
|
+
* Generated: 2025-10-10T01:27:22.642Z
|
|
6
|
+
* Noun Types: 31
|
|
7
|
+
* Verb Types: 40
|
|
8
|
+
*
|
|
9
|
+
* This file contains pre-computed embeddings for all NounTypes and VerbTypes.
|
|
10
|
+
* No runtime computation needed, instant availability!
|
|
11
|
+
*/
|
|
12
|
+
import { NounType, VerbType } from '../types/graphTypes.js';
|
|
13
|
+
import { Vector } from '../coreTypes.js';
|
|
14
|
+
export declare const TYPE_METADATA: {
|
|
15
|
+
nounTypes: number;
|
|
16
|
+
verbTypes: number;
|
|
17
|
+
totalTypes: number;
|
|
18
|
+
embeddingDimensions: number;
|
|
19
|
+
generatedAt: string;
|
|
20
|
+
sizeBytes: {
|
|
21
|
+
embeddings: number;
|
|
22
|
+
base64: number;
|
|
23
|
+
};
|
|
24
|
+
};
|
|
25
|
+
/**
|
|
26
|
+
* Get noun type embeddings as a Map for fast lookup
|
|
27
|
+
* This is called once and cached
|
|
28
|
+
*/
|
|
29
|
+
export declare function getNounTypeEmbeddings(): Map<NounType, Vector>;
|
|
30
|
+
/**
|
|
31
|
+
* Get verb type embeddings as a Map for fast lookup
|
|
32
|
+
* This is called once and cached
|
|
33
|
+
*/
|
|
34
|
+
export declare function getVerbTypeEmbeddings(): Map<VerbType, Vector>;
|