@soulcraft/brainy 3.32.2 → 3.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/CHANGELOG.md +175 -0
  2. package/dist/augmentations/typeMatching/brainyTypes.d.ts +5 -1
  3. package/dist/augmentations/typeMatching/brainyTypes.js +14 -7
  4. package/dist/brainy.d.ts +31 -0
  5. package/dist/brainy.js +119 -34
  6. package/dist/hnsw/hnswIndex.d.ts +24 -0
  7. package/dist/hnsw/hnswIndex.js +137 -0
  8. package/dist/hnsw/hnswIndexOptimized.d.ts +2 -13
  9. package/dist/hnsw/hnswIndexOptimized.js +8 -37
  10. package/dist/importers/SmartExcelImporter.js +12 -0
  11. package/dist/interfaces/IIndex.d.ts +186 -0
  12. package/dist/interfaces/IIndex.js +15 -0
  13. package/dist/neural/embeddedTypeEmbeddings.d.ts +34 -0
  14. package/dist/neural/embeddedTypeEmbeddings.js +96 -0
  15. package/dist/neural/entityExtractor.d.ts +2 -0
  16. package/dist/neural/entityExtractor.js +21 -42
  17. package/dist/neural/naturalLanguageProcessor.d.ts +2 -1
  18. package/dist/neural/naturalLanguageProcessor.js +17 -31
  19. package/dist/storage/adapters/baseStorageAdapter.d.ts +54 -0
  20. package/dist/storage/adapters/baseStorageAdapter.js +105 -10
  21. package/dist/storage/adapters/fileSystemStorage.d.ts +32 -0
  22. package/dist/storage/adapters/fileSystemStorage.js +66 -0
  23. package/dist/storage/adapters/gcsStorage.d.ts +45 -0
  24. package/dist/storage/adapters/gcsStorage.js +122 -4
  25. package/dist/storage/adapters/memoryStorage.d.ts +32 -0
  26. package/dist/storage/adapters/memoryStorage.js +43 -0
  27. package/dist/storage/adapters/opfsStorage.d.ts +36 -0
  28. package/dist/storage/adapters/opfsStorage.js +101 -0
  29. package/dist/storage/adapters/s3CompatibleStorage.d.ts +45 -0
  30. package/dist/storage/adapters/s3CompatibleStorage.js +123 -0
  31. package/package.json +5 -2
package/CHANGELOG.md CHANGED
@@ -2,6 +2,181 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
4
4
 
5
+ ### [3.35.0](https://github.com/soulcraftlabs/brainy/compare/v3.34.0...v3.35.0) (2025-10-10)
6
+
7
+ - feat: implement HNSW index rebuild and unified index interface (6a4d1ae)
8
+ - cleaning up (12d78ba)
9
+
10
+
11
+ ### [3.34.0](https://github.com/soulcraftlabs/brainy/compare/v3.33.0...v3.34.0) (2025-10-09)
12
+
13
+ - test: adjust type-matching tests for real embeddings (v3.33.0) (1c5c77e)
14
+ - perf: pre-compute type embeddings at build time (zero runtime cost) (0d649b8)
15
+ - perf: optimize concept extraction for production (15x faster) (87eb60d)
16
+ - perf: implement smart count batching for 10x faster bulk operations (e52bcaf)
17
+
18
+
19
+ ## [3.33.0](https://github.com/soulcraftlabs/brainy/compare/v3.32.5...v3.33.0) (2025-10-09)
20
+
21
+ ### 🚀 Performance - Build-Time Type Embeddings (Zero Runtime Cost)
22
+
23
+ **Production Optimization: All type embeddings are now pre-computed at build time**
24
+
25
+ #### Problem
26
+ Type embeddings for 31 NounTypes + 40 VerbTypes were computed at runtime in 3 different places:
27
+ - `NeuralEntityExtractor` computed noun type embeddings on first use
28
+ - `BrainyTypes` computed all 31+40 type embeddings on init
29
+ - `NaturalLanguageProcessor` computed all 31+40 type embeddings on init
30
+ - **Result**: Every process restart = ~70+ embedding operations = 5-10 second initialization delay
31
+
32
+ #### Solution
33
+ Pre-computed type embeddings at build time (similar to pattern embeddings):
34
+ - Created `scripts/buildTypeEmbeddings.ts` - generates embeddings for all types once during build
35
+ - Created `src/neural/embeddedTypeEmbeddings.ts` - stores pre-computed embeddings as base64 data
36
+ - All consumers now load instant embeddings instead of computing at runtime
37
+
38
+ #### Benefits
39
+ - ✅ **Zero runtime computation** - type embeddings loaded instantly from embedded data
40
+ - ✅ **Survives all restarts** - embeddings bundled in package, no re-computation needed
41
+ - ✅ **All 71 types available** - 31 noun + 40 verb types instantly accessible
42
+ - ✅ **~100KB overhead** - small memory cost for huge performance gain
43
+ - ✅ **Permanent optimization** - build once, fast forever
44
+
45
+ #### Build Process
46
+ ```bash
47
+ # Manual rebuild (if types change)
48
+ npm run build:types:force
49
+
50
+ # Automatic check (integrated into build)
51
+ npm run build # Rebuilds types only if source changed
52
+ ```
53
+
54
+ #### Files Changed
55
+ - `scripts/buildTypeEmbeddings.ts` - Build script to generate type embeddings
56
+ - `scripts/check-type-embeddings.cjs` - Check if rebuild needed
57
+ - `src/neural/embeddedTypeEmbeddings.ts` - Pre-computed embeddings (auto-generated)
58
+ - `src/neural/entityExtractor.ts` - Uses embedded types (no runtime computation)
59
+ - `src/augmentations/typeMatching/brainyTypes.ts` - Uses embedded types (instant init)
60
+ - `src/neural/naturalLanguageProcessor.ts` - Uses embedded types (instant init)
61
+ - `src/importers/SmartExcelImporter.ts` - Updated comments to reflect zero-cost embeddings
62
+ - `package.json` - Added type embedding build scripts
63
+
64
+ #### Impact
65
+ - v3.32.5: Type embeddings computed at runtime (2-31 operations per restart)
66
+ - v3.33.0: Type embeddings loaded instantly (0 operations, pre-computed at build)
67
+ - **Permanent 100% elimination of type embedding runtime cost**
68
+
69
+ ---
70
+
71
+ ### [3.32.5](https://github.com/soulcraftlabs/brainy/compare/v3.32.4...v3.32.5) (2025-10-09)
72
+
73
+ ### 🚀 Performance - Neural Extraction Optimization (15x Faster)
74
+
75
+ **Fixed: Concept extraction now production-ready for large files**
76
+
77
+ #### Problem
78
+ `brain.extractConcepts()` appeared to hang on large Excel/PDF/Markdown files:
79
+ - Previously initialized ALL 31 NounTypes (31 embedding operations)
80
+ - For 100-row Excel file: 3,100+ embedding operations
81
+ - Caused apparent hangs/timeouts in production
82
+
83
+ #### Solution
84
+ Optimized `NeuralEntityExtractor` to only initialize requested types:
85
+ - `extractConcepts()` now only initializes Concept + Topic types (2 embeds vs 31)
86
+ - **15x faster initialization** (31 embeds → 2 embeds)
87
+ - Re-enabled concept extraction by default in Excel importer
88
+
89
+ #### Performance Impact
90
+ - **Small files (<100 rows)**: 5-20 seconds (was: appeared to hang)
91
+ - **Medium files (100-500 rows)**: 20-100 seconds (was: timeout)
92
+ - **Large files (500+ rows)**: Can be disabled if needed via `enableConceptExtraction: false`
93
+
94
+ #### Files Changed
95
+ - `src/neural/entityExtractor.ts`: Lazy type initialization
96
+ - `src/importers/SmartExcelImporter.ts`: Re-enabled with optimization notes
97
+
98
+ ### 🔧 Diagnostics - GCS Initialization Logging
99
+
100
+ **Added: Enhanced logging for GCS bucket scanning**
101
+
102
+ Added detailed diagnostic logs to help debug GCS initialization issues:
103
+ - Shows prefixes being scanned
104
+ - Displays file counts and sample filenames
105
+ - Warns if no entities found
106
+
107
+ #### Files Changed
108
+ - `src/storage/adapters/gcsStorage.ts`: Enhanced `initializeCountsFromScan()` logging
109
+
110
+ ---
111
+
112
+ ### [3.32.3](https://github.com/soulcraftlabs/brainy/compare/v3.32.2...v3.32.3) (2025-10-09)
113
+
114
+ ### ⚡ Performance Optimization - Smart Count Batching for Production Scale
115
+
116
+ **Optimized: 10x faster bulk operations with storage-aware count batching**
117
+
118
+ #### What Changed
119
+ v3.32.2 fixed the critical container restart bug by persisting counts on EVERY operation. This made the system reliable but introduced performance overhead for bulk operations (1000 entities = 1000 GCS writes = ~50 seconds).
120
+
121
+ v3.32.3 introduces **Smart Count Batching** - a storage-type aware optimization that maintains v3.32.2's reliability while dramatically improving bulk operation performance.
122
+
123
+ #### How It Works
124
+ - **Cloud storage** (GCS, S3, R2): Batches count persistence (10 operations OR 5 seconds, whichever first)
125
+ - **Local storage** (File System, Memory): Persists immediately (already fast, no benefit from batching)
126
+ - **Graceful shutdown hooks**: SIGTERM/SIGINT handlers flush pending counts before shutdown
127
+
128
+ #### Performance Impact
129
+
130
+ **API Use Case (1-10 entities):**
131
+ - Before: 2 entities = 100ms overhead, 10 entities = 500ms overhead
132
+ - After: 2 entities = 50ms overhead (batched at 5s), 10 entities = 50ms overhead (batched at threshold)
133
+ - **2-10x faster for small batches**
134
+
135
+ **Bulk Import (1000 entities via loop):**
136
+ - Before (v3.32.2): 1000 entities = 1000 GCS writes = ~50 seconds overhead
137
+ - After (v3.32.3): 1000 entities = 100 GCS writes = ~5 seconds overhead
138
+ - **10x faster for bulk operations**
139
+
140
+ #### Reliability Guarantees
141
+ ✅ **Container Restart Scenario:** Same reliability as v3.32.2
142
+ - Counts persist every 10 operations OR 5 seconds (whichever first)
143
+ - Maximum data loss window: 9 operations OR 5 seconds of data (only on ungraceful crash)
144
+
145
+ ✅ **Graceful Shutdown (Cloud Run/Fargate/Lambda):**
146
+ - SIGTERM/SIGINT handlers flush pending counts immediately
147
+ - Zero data loss on graceful container shutdown
148
+
149
+ ✅ **Production Ready:**
150
+ - Backward compatible (no breaking changes)
151
+ - Zero configuration required (automatic based on storage type)
152
+ - Works transparently for all existing code
153
+
154
+ #### Implementation Details
155
+ - `baseStorageAdapter.ts`: Added smart batching with `scheduleCountPersist()` and `flushCounts()`
156
+ - New method: `isCloudStorage()` - Detects storage type for adaptive strategy
157
+ - New method: `scheduleCountPersist()` - Smart batching logic
158
+ - New method: `flushCounts()` - Immediate flush for shutdown hooks
159
+ - Modified: 4 count methods to use smart batching instead of immediate persistence
160
+
161
+ - `gcsStorage.ts`: Added cloud storage detection
162
+ - Override `isCloudStorage()` to return `true` (enables batching)
163
+
164
+ - `s3CompatibleStorage.ts`: Added cloud storage detection
165
+ - Override `isCloudStorage()` to return `true` (enables batching)
166
+
167
+ - `brainy.ts`: Added graceful shutdown hooks
168
+ - `registerShutdownHooks()`: Handles SIGTERM, SIGINT, beforeExit
169
+ - Ensures pending count batches are flushed before container shutdown
170
+ - Critical for Cloud Run, Fargate, Lambda, and other containerized deployments
171
+
172
+ #### Migration
173
+ **No action required!** This is a transparent performance optimization.
174
+ - ✅ Same public API
175
+ - ✅ Same reliability guarantees
176
+ - ✅ Better performance (automatic)
177
+
178
+ ---
179
+
5
180
  ### [3.32.2](https://github.com/soulcraftlabs/brainy/compare/v3.32.1...v3.32.2) (2025-10-09)
6
181
 
7
182
  ### 🐛 Critical Bug Fixes - Container Restart Persistence
@@ -24,6 +24,8 @@ export interface TypeMatchResult {
24
24
  }
25
25
  /**
26
26
  * BrainyTypes - Intelligent type detection for nouns and verbs
27
+ * PRODUCTION OPTIMIZATION (v3.33.0): Uses pre-computed type embeddings
28
+ * Type embeddings are loaded instantly; only input objects are embedded at runtime
27
29
  */
28
30
  export declare class BrainyTypes {
29
31
  private embedder;
@@ -33,7 +35,9 @@ export declare class BrainyTypes {
33
35
  private cache;
34
36
  constructor();
35
37
  /**
36
- * Initialize the type matcher by generating embeddings for all types
38
+ * Initialize the type matcher by loading pre-computed embeddings
39
+ * INSTANT - type embeddings are loaded from pre-computed data
40
+ * Only the model for input embedding needs initialization
37
41
  */
38
42
  init(): Promise<void>;
39
43
  /**
@@ -13,6 +13,7 @@
13
13
  import { NounType, VerbType } from '../../types/graphTypes.js';
14
14
  import { TransformerEmbedding } from '../../utils/embedding.js';
15
15
  import { cosineDistance } from '../../utils/distance.js';
16
+ import { getNounTypeEmbeddings, getVerbTypeEmbeddings } from '../../neural/embeddedTypeEmbeddings.js';
16
17
  /**
17
18
  * Type descriptions for semantic matching
18
19
  * These descriptions are used to generate embeddings for each type
@@ -109,6 +110,8 @@ const VERB_TYPE_DESCRIPTIONS = {
109
110
  };
110
111
  /**
111
112
  * BrainyTypes - Intelligent type detection for nouns and verbs
113
+ * PRODUCTION OPTIMIZATION (v3.33.0): Uses pre-computed type embeddings
114
+ * Type embeddings are loaded instantly; only input objects are embedded at runtime
112
115
  */
113
116
  export class BrainyTypes {
114
117
  constructor() {
@@ -116,23 +119,27 @@ export class BrainyTypes {
116
119
  this.verbEmbeddings = new Map();
117
120
  this.initialized = false;
118
121
  this.cache = new Map();
122
+ // Embedder only used for input objects, NOT for type embeddings
119
123
  this.embedder = new TransformerEmbedding({ verbose: false });
120
124
  }
121
125
  /**
122
- * Initialize the type matcher by generating embeddings for all types
126
+ * Initialize the type matcher by loading pre-computed embeddings
127
+ * INSTANT - type embeddings are loaded from pre-computed data
128
+ * Only the model for input embedding needs initialization
123
129
  */
124
130
  async init() {
125
131
  if (this.initialized)
126
132
  return;
133
+ // Initialize embedder for input objects only
127
134
  await this.embedder.init();
128
- // Generate embeddings for noun types
129
- for (const [type, description] of Object.entries(NOUN_TYPE_DESCRIPTIONS)) {
130
- const embedding = await this.embedder.embed(description);
135
+ // Load pre-computed type embeddings (instant, no computation)
136
+ const nounEmbeddings = getNounTypeEmbeddings();
137
+ const verbEmbeddings = getVerbTypeEmbeddings();
138
+ // Convert NounType/VerbType keys to strings for lookup
139
+ for (const [type, embedding] of nounEmbeddings.entries()) {
131
140
  this.nounEmbeddings.set(type, embedding);
132
141
  }
133
- // Generate embeddings for verb types
134
- for (const [type, description] of Object.entries(VERB_TYPE_DESCRIPTIONS)) {
135
- const embedding = await this.embedder.embed(description);
142
+ for (const [type, embedding] of verbEmbeddings.entries()) {
136
143
  this.verbEmbeddings.set(type, embedding);
137
144
  }
138
145
  this.initialized = true;
package/dist/brainy.d.ts CHANGED
@@ -20,6 +20,8 @@ import { BrainyInterface } from './types/brainyInterface.js';
20
20
  * Implements BrainyInterface to ensure consistency across integrations
21
21
  */
22
22
  export declare class Brainy<T = any> implements BrainyInterface<T> {
23
+ private static shutdownHooksRegisteredGlobally;
24
+ private static instances;
23
25
  private index;
24
26
  private storage;
25
27
  private metadataIndex;
@@ -48,6 +50,20 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
48
50
  init(overrides?: Partial<BrainyConfig & {
49
51
  dimensions?: number;
50
52
  }>): Promise<void>;
53
+ /**
54
+ * Register shutdown hooks for graceful count flushing (v3.32.3+)
55
+ *
56
+ * Ensures pending count batches are persisted before container shutdown.
57
+ * Critical for Cloud Run, Fargate, Lambda, and other containerized deployments.
58
+ *
59
+ * Handles:
60
+ * - SIGTERM: Graceful termination (Cloud Run, Fargate, Lambda)
61
+ * - SIGINT: Ctrl+C (development/local testing)
62
+ * - beforeExit: Node.js cleanup hook (fallback)
63
+ *
64
+ * NOTE: Registers globally (once for all instances) to avoid MaxListenersExceededWarning
65
+ */
66
+ private registerShutdownHooks;
51
67
  /**
52
68
  * Ensure Brainy is initialized
53
69
  */
@@ -1054,6 +1070,21 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
1054
1070
  /**
1055
1071
  * Rebuild indexes if there's existing data but empty indexes
1056
1072
  */
1073
+ /**
1074
+ * Rebuild indexes from persisted data if needed (v3.35.0+)
1075
+ *
1076
+ * FIXES FOR CRITICAL BUGS:
1077
+ * - Bug #1: GraphAdjacencyIndex rebuild never called ✅ FIXED
1078
+ * - Bug #2: Early return blocks recovery when count=0 ✅ FIXED
1079
+ * - Bug #4: HNSW index has no rebuild mechanism ✅ FIXED
1080
+ *
1081
+ * Production-grade rebuild with:
1082
+ * - Handles millions of entities via pagination
1083
+ * - Smart threshold-based decisions (auto-rebuild < 1000 items)
1084
+ * - Progress reporting for large datasets
1085
+ * - Parallel index rebuilds for performance
1086
+ * - Robust error recovery (continues on partial failures)
1087
+ */
1057
1088
  private rebuildIndexesIfNeeded;
1058
1089
  /**
1059
1090
  * Close and cleanup
package/dist/brainy.js CHANGED
@@ -42,6 +42,8 @@ export class Brainy {
42
42
  if (this.config.distributed?.enabled) {
43
43
  this.setupDistributedComponents();
44
44
  }
45
+ // Track this instance for shutdown hooks
46
+ Brainy.instances.push(this);
45
47
  // Index and storage are initialized in init() because they may need each other
46
48
  }
47
49
  /**
@@ -126,12 +128,63 @@ export class Brainy {
126
128
  if (this.config.warmup) {
127
129
  await this.warmup();
128
130
  }
131
+ // Register shutdown hooks for graceful count flushing (once globally)
132
+ if (!Brainy.shutdownHooksRegisteredGlobally) {
133
+ this.registerShutdownHooks();
134
+ Brainy.shutdownHooksRegisteredGlobally = true;
135
+ }
129
136
  this.initialized = true;
130
137
  }
131
138
  catch (error) {
132
139
  throw new Error(`Failed to initialize Brainy: ${error}`);
133
140
  }
134
141
  }
142
+ /**
143
+ * Register shutdown hooks for graceful count flushing (v3.32.3+)
144
+ *
145
+ * Ensures pending count batches are persisted before container shutdown.
146
+ * Critical for Cloud Run, Fargate, Lambda, and other containerized deployments.
147
+ *
148
+ * Handles:
149
+ * - SIGTERM: Graceful termination (Cloud Run, Fargate, Lambda)
150
+ * - SIGINT: Ctrl+C (development/local testing)
151
+ * - beforeExit: Node.js cleanup hook (fallback)
152
+ *
153
+ * NOTE: Registers globally (once for all instances) to avoid MaxListenersExceededWarning
154
+ */
155
+ registerShutdownHooks() {
156
+ const flushOnShutdown = async () => {
157
+ console.log('⚠️ Shutdown signal received - flushing pending counts...');
158
+ try {
159
+ // Flush counts for all Brainy instances
160
+ let flushedCount = 0;
161
+ for (const instance of Brainy.instances) {
162
+ if (instance.storage && typeof instance.storage.flushCounts === 'function') {
163
+ await instance.storage.flushCounts();
164
+ flushedCount++;
165
+ }
166
+ }
167
+ if (flushedCount > 0) {
168
+ console.log(`✅ Counts flushed successfully (${flushedCount} instance${flushedCount > 1 ? 's' : ''})`);
169
+ }
170
+ }
171
+ catch (error) {
172
+ console.error('❌ Failed to flush counts on shutdown:', error);
173
+ }
174
+ };
175
+ // Graceful shutdown signals (registered once globally)
176
+ process.on('SIGTERM', async () => {
177
+ await flushOnShutdown();
178
+ process.exit(0);
179
+ });
180
+ process.on('SIGINT', async () => {
181
+ await flushOnShutdown();
182
+ process.exit(0);
183
+ });
184
+ process.on('beforeExit', async () => {
185
+ await flushOnShutdown();
186
+ });
187
+ }
135
188
  /**
136
189
  * Ensure Brainy is initialized
137
190
  */
@@ -2332,59 +2385,88 @@ export class Brainy {
2332
2385
  /**
2333
2386
  * Rebuild indexes if there's existing data but empty indexes
2334
2387
  */
2388
+ /**
2389
+ * Rebuild indexes from persisted data if needed (v3.35.0+)
2390
+ *
2391
+ * FIXES FOR CRITICAL BUGS:
2392
+ * - Bug #1: GraphAdjacencyIndex rebuild never called ✅ FIXED
2393
+ * - Bug #2: Early return blocks recovery when count=0 ✅ FIXED
2394
+ * - Bug #4: HNSW index has no rebuild mechanism ✅ FIXED
2395
+ *
2396
+ * Production-grade rebuild with:
2397
+ * - Handles millions of entities via pagination
2398
+ * - Smart threshold-based decisions (auto-rebuild < 1000 items)
2399
+ * - Progress reporting for large datasets
2400
+ * - Parallel index rebuilds for performance
2401
+ * - Robust error recovery (continues on partial failures)
2402
+ */
2335
2403
  async rebuildIndexesIfNeeded() {
2336
2404
  try {
2337
- // Check if storage has data
2405
+ // Check if auto-rebuild is explicitly disabled
2406
+ if (this.config.disableAutoRebuild === true) {
2407
+ if (!this.config.silent) {
2408
+ console.log('⚡ Auto-rebuild explicitly disabled via config');
2409
+ }
2410
+ return;
2411
+ }
2412
+ // BUG #2 FIX: Don't trust counts - check actual storage instead
2413
+ // Counts can be lost/corrupted in container restarts
2338
2414
  const entities = await this.storage.getNouns({ pagination: { limit: 1 } });
2339
2415
  const totalCount = entities.totalCount || 0;
2340
- if (totalCount === 0) {
2341
- // No data in storage, no rebuild needed
2416
+ // If storage is truly empty, no rebuild needed
2417
+ if (totalCount === 0 && entities.items.length === 0) {
2342
2418
  return;
2343
2419
  }
2344
2420
  // Intelligent decision: Auto-rebuild only for small datasets
2345
2421
  // For large datasets, use lazy loading for optimal performance
2346
2422
  const AUTO_REBUILD_THRESHOLD = 1000; // Only auto-rebuild if < 1000 items
2347
- // Check if metadata index is empty
2423
+ // Check if indexes need rebuilding
2348
2424
  const metadataStats = await this.metadataIndex.getStats();
2349
- if (metadataStats.totalEntries === 0 && totalCount > 0) {
2350
- if (totalCount < AUTO_REBUILD_THRESHOLD) {
2351
- // Small dataset - rebuild for convenience
2352
- if (!this.config.silent) {
2353
- console.log(`🔄 Small dataset (${totalCount} items) - rebuilding index for optimal performance...`);
2354
- }
2355
- await this.metadataIndex.rebuild();
2356
- const newStats = await this.metadataIndex.getStats();
2357
- if (!this.config.silent) {
2358
- console.log(`✅ Index rebuilt: ${newStats.totalEntries} entries`);
2359
- }
2360
- }
2361
- else {
2362
- // Large dataset - use lazy loading
2363
- if (!this.config.silent) {
2364
- console.log(`⚡ Large dataset (${totalCount} items) - using lazy loading for optimal startup performance`);
2365
- console.log('💡 Tip: Indexes will build automatically as you use the system');
2366
- }
2367
- }
2425
+ const hnswIndexSize = this.index.size();
2426
+ const graphIndexSize = await this.graphIndex.size();
2427
+ const needsRebuild = metadataStats.totalEntries === 0 ||
2428
+ hnswIndexSize === 0 ||
2429
+ graphIndexSize === 0 ||
2430
+ this.config.disableAutoRebuild === false; // Explicitly enabled
2431
+ if (!needsRebuild) {
2432
+ // All indexes populated, no rebuild needed
2433
+ return;
2368
2434
  }
2369
- // Override with explicit config if provided
2370
- if (this.config.disableAutoRebuild === true) {
2435
+ // Small dataset: Rebuild all indexes for best performance
2436
+ if (totalCount < AUTO_REBUILD_THRESHOLD || this.config.disableAutoRebuild === false) {
2371
2437
  if (!this.config.silent) {
2372
- console.log('⚡ Auto-rebuild explicitly disabled via config');
2438
+ console.log(this.config.disableAutoRebuild === false
2439
+ ? '🔄 Auto-rebuild explicitly enabled - rebuilding all indexes...'
2440
+ : `🔄 Small dataset (${totalCount} items) - rebuilding all indexes...`);
2441
+ }
2442
+ // BUG #1 FIX: Actually call graphIndex.rebuild()
2443
+ // BUG #4 FIX: Actually call HNSW index.rebuild()
2444
+ // Rebuild all 3 indexes in parallel for performance
2445
+ const startTime = Date.now();
2446
+ await Promise.all([
2447
+ metadataStats.totalEntries === 0 ? this.metadataIndex.rebuild() : Promise.resolve(),
2448
+ hnswIndexSize === 0 ? this.index.rebuild() : Promise.resolve(),
2449
+ graphIndexSize === 0 ? this.graphIndex.rebuild() : Promise.resolve()
2450
+ ]);
2451
+ const duration = Date.now() - startTime;
2452
+ if (!this.config.silent) {
2453
+ console.log(`✅ All indexes rebuilt in ${duration}ms:\n` +
2454
+ ` - Metadata: ${await this.metadataIndex.getStats().then(s => s.totalEntries)} entries\n` +
2455
+ ` - HNSW Vector: ${this.index.size()} nodes\n` +
2456
+ ` - Graph Adjacency: ${await this.graphIndex.size()} relationships`);
2373
2457
  }
2374
- return;
2375
2458
  }
2376
- else if (this.config.disableAutoRebuild === false && metadataStats.totalEntries === 0) {
2377
- // Explicitly enabled - rebuild regardless of size
2459
+ else {
2460
+ // Large dataset: Use lazy loading for fast startup
2378
2461
  if (!this.config.silent) {
2379
- console.log('🔄 Auto-rebuild explicitly enabled - rebuilding index...');
2462
+ console.log(`⚡ Large dataset (${totalCount} items) - using lazy loading for optimal startup`);
2463
+ console.log('💡 Indexes will build automatically as you query the system');
2380
2464
  }
2381
- await this.metadataIndex.rebuild();
2382
2465
  }
2383
- // Note: GraphAdjacencyIndex will rebuild itself as relationships are added
2384
- // Vector index should already be populated if storage has data
2385
2466
  }
2386
2467
  catch (error) {
2387
- console.warn('Warning: Could not check or rebuild indexes:', error);
2468
+ console.warn('Warning: Could not rebuild indexes:', error);
2469
+ // Don't throw - allow system to start even if rebuild fails
2388
2470
  }
2389
2471
  }
2390
2472
  /**
@@ -2518,6 +2600,9 @@ export class Brainy {
2518
2600
  }
2519
2601
  }
2520
2602
  }
2603
+ // Static shutdown hook tracking (global, not per-instance)
2604
+ Brainy.shutdownHooksRegisteredGlobally = false;
2605
+ Brainy.instances = [];
2521
2606
  // Re-export types for convenience
2522
2607
  export * from './types/brainy.types.js';
2523
2608
  export { NounType, VerbType } from './types/graphTypes.js';
@@ -3,6 +3,7 @@
3
3
  * Based on the paper: "Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs"
4
4
  */
5
5
  import { DistanceFunction, HNSWConfig, HNSWNoun, Vector, VectorDocument } from '../coreTypes.js';
6
+ import type { BaseStorage } from '../storage/baseStorage.js';
6
7
  export declare class HNSWIndex {
7
8
  private nouns;
8
9
  private entryPointId;
@@ -13,8 +14,10 @@ export declare class HNSWIndex {
13
14
  private distanceFunction;
14
15
  private dimension;
15
16
  private useParallelization;
17
+ private storage;
16
18
  constructor(config?: Partial<HNSWConfig>, distanceFunction?: DistanceFunction, options?: {
17
19
  useParallelization?: boolean;
20
+ storage?: BaseStorage;
18
21
  });
19
22
  /**
20
23
  * Set whether to use parallelization for performance-critical operations
@@ -98,6 +101,27 @@ export declare class HNSWIndex {
98
101
  * This enables O(n) clustering using HNSW's natural hierarchy
99
102
  */
100
103
  getNodesAtLevel(level: number): HNSWNoun[];
104
+ /**
105
+ * Rebuild HNSW index from persisted graph data (v3.35.0+)
106
+ *
107
+ * This is a production-grade O(N) rebuild that restores the pre-computed graph structure
108
+ * from storage. Much faster than re-building which is O(N log N).
109
+ *
110
+ * Designed for millions of entities with:
111
+ * - Cursor-based pagination (no memory overflow)
112
+ * - Batch processing (configurable batch size)
113
+ * - Progress reporting (optional callback)
114
+ * - Error recovery (continues on partial failures)
115
+ * - Lazy mode support (memory-efficient for constrained environments)
116
+ *
117
+ * @param options Rebuild options
118
+ * @returns Promise that resolves when rebuild is complete
119
+ */
120
+ rebuild(options?: {
121
+ lazy?: boolean;
122
+ batchSize?: number;
123
+ onProgress?: (loaded: number, total: number) => void;
124
+ }): Promise<void>;
101
125
  /**
102
126
  * Get level statistics for understanding the hierarchy
103
127
  */