npm - @soulcraft/brainy - Versions diffs - 3.32.2 → 3.35.0 - Mend

@soulcraft/brainy 3.32.2 → 3.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/CHANGELOG.md +175 -0
package/dist/augmentations/typeMatching/brainyTypes.d.ts +5 -1
package/dist/augmentations/typeMatching/brainyTypes.js +14 -7
package/dist/brainy.d.ts +31 -0
package/dist/brainy.js +119 -34
package/dist/hnsw/hnswIndex.d.ts +24 -0
package/dist/hnsw/hnswIndex.js +137 -0
package/dist/hnsw/hnswIndexOptimized.d.ts +2 -13
package/dist/hnsw/hnswIndexOptimized.js +8 -37
package/dist/importers/SmartExcelImporter.js +12 -0
package/dist/interfaces/IIndex.d.ts +186 -0
package/dist/interfaces/IIndex.js +15 -0
package/dist/neural/embeddedTypeEmbeddings.d.ts +34 -0
package/dist/neural/embeddedTypeEmbeddings.js +96 -0
package/dist/neural/entityExtractor.d.ts +2 -0
package/dist/neural/entityExtractor.js +21 -42
package/dist/neural/naturalLanguageProcessor.d.ts +2 -1
package/dist/neural/naturalLanguageProcessor.js +17 -31
package/dist/storage/adapters/baseStorageAdapter.d.ts +54 -0
package/dist/storage/adapters/baseStorageAdapter.js +105 -10
package/dist/storage/adapters/fileSystemStorage.d.ts +32 -0
package/dist/storage/adapters/fileSystemStorage.js +66 -0
package/dist/storage/adapters/gcsStorage.d.ts +45 -0
package/dist/storage/adapters/gcsStorage.js +122 -4
package/dist/storage/adapters/memoryStorage.d.ts +32 -0
package/dist/storage/adapters/memoryStorage.js +43 -0
package/dist/storage/adapters/opfsStorage.d.ts +36 -0
package/dist/storage/adapters/opfsStorage.js +101 -0
package/dist/storage/adapters/s3CompatibleStorage.d.ts +45 -0
package/dist/storage/adapters/s3CompatibleStorage.js +123 -0
package/package.json +5 -2

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,181 @@
 All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
+### [3.35.0](https://github.com/soulcraftlabs/brainy/compare/v3.34.0...v3.35.0) (2025-10-10)
+- feat: implement HNSW index rebuild and unified index interface (6a4d1ae)
+- cleaning up (12d78ba)
+### [3.34.0](https://github.com/soulcraftlabs/brainy/compare/v3.33.0...v3.34.0) (2025-10-09)
+- test: adjust type-matching tests for real embeddings (v3.33.0) (1c5c77e)
+- perf: pre-compute type embeddings at build time (zero runtime cost) (0d649b8)
+- perf: optimize concept extraction for production (15x faster) (87eb60d)
+- perf: implement smart count batching for 10x faster bulk operations (e52bcaf)
+## [3.33.0](https://github.com/soulcraftlabs/brainy/compare/v3.32.5...v3.33.0) (2025-10-09)
+### 🚀 Performance - Build-Time Type Embeddings (Zero Runtime Cost)
+**Production Optimization: All type embeddings are now pre-computed at build time**
+#### Problem
+Type embeddings for 31 NounTypes + 40 VerbTypes were computed at runtime in 3 different places:
+- `NeuralEntityExtractor` computed noun type embeddings on first use
+- `BrainyTypes` computed all 31+40 type embeddings on init
+- `NaturalLanguageProcessor` computed all 31+40 type embeddings on init
+- **Result**: Every process restart = ~70+ embedding operations = 5-10 second initialization delay
+#### Solution
+Pre-computed type embeddings at build time (similar to pattern embeddings):
+- Created `scripts/buildTypeEmbeddings.ts` - generates embeddings for all types once during build
+- Created `src/neural/embeddedTypeEmbeddings.ts` - stores pre-computed embeddings as base64 data
+- All consumers now load instant embeddings instead of computing at runtime
+#### Benefits
+- ✅ **Zero runtime computation** - type embeddings loaded instantly from embedded data
+- ✅ **Survives all restarts** - embeddings bundled in package, no re-computation needed
+- ✅ **All 71 types available** - 31 noun + 40 verb types instantly accessible
+- ✅ **~100KB overhead** - small memory cost for huge performance gain
+- ✅ **Permanent optimization** - build once, fast forever
+#### Build Process
+```bash
+# Manual rebuild (if types change)
+npm run build:types:force
+# Automatic check (integrated into build)
+npm run build  # Rebuilds types only if source changed
+```
+#### Files Changed
+- `scripts/buildTypeEmbeddings.ts` - Build script to generate type embeddings
+- `scripts/check-type-embeddings.cjs` - Check if rebuild needed
+- `src/neural/embeddedTypeEmbeddings.ts` - Pre-computed embeddings (auto-generated)
+- `src/neural/entityExtractor.ts` - Uses embedded types (no runtime computation)
+- `src/augmentations/typeMatching/brainyTypes.ts` - Uses embedded types (instant init)
+- `src/neural/naturalLanguageProcessor.ts` - Uses embedded types (instant init)
+- `src/importers/SmartExcelImporter.ts` - Updated comments to reflect zero-cost embeddings
+- `package.json` - Added type embedding build scripts
+#### Impact
+- v3.32.5: Type embeddings computed at runtime (2-31 operations per restart)
+- v3.33.0: Type embeddings loaded instantly (0 operations, pre-computed at build)
+- **Permanent 100% elimination of type embedding runtime cost**
+---
+### [3.32.5](https://github.com/soulcraftlabs/brainy/compare/v3.32.4...v3.32.5) (2025-10-09)
+### 🚀 Performance - Neural Extraction Optimization (15x Faster)
+**Fixed: Concept extraction now production-ready for large files**
+#### Problem
+`brain.extractConcepts()` appeared to hang on large Excel/PDF/Markdown files:
+- Previously initialized ALL 31 NounTypes (31 embedding operations)
+- For 100-row Excel file: 3,100+ embedding operations
+- Caused apparent hangs/timeouts in production
+#### Solution
+Optimized `NeuralEntityExtractor` to only initialize requested types:
+- `extractConcepts()` now only initializes Concept + Topic types (2 embeds vs 31)
+- **15x faster initialization** (31 embeds → 2 embeds)
+- Re-enabled concept extraction by default in Excel importer
+#### Performance Impact
+- **Small files (<100 rows)**: 5-20 seconds (was: appeared to hang)
+- **Medium files (100-500 rows)**: 20-100 seconds (was: timeout)
+- **Large files (500+ rows)**: Can be disabled if needed via `enableConceptExtraction: false`
+#### Files Changed
+- `src/neural/entityExtractor.ts`: Lazy type initialization
+- `src/importers/SmartExcelImporter.ts`: Re-enabled with optimization notes
+### 🔧 Diagnostics - GCS Initialization Logging
+**Added: Enhanced logging for GCS bucket scanning**
+Added detailed diagnostic logs to help debug GCS initialization issues:
+- Shows prefixes being scanned
+- Displays file counts and sample filenames
+- Warns if no entities found
+#### Files Changed
+- `src/storage/adapters/gcsStorage.ts`: Enhanced `initializeCountsFromScan()` logging
+---
+### [3.32.3](https://github.com/soulcraftlabs/brainy/compare/v3.32.2...v3.32.3) (2025-10-09)
+### ⚡ Performance Optimization - Smart Count Batching for Production Scale
+**Optimized: 10x faster bulk operations with storage-aware count batching**
+#### What Changed
+v3.32.2 fixed the critical container restart bug by persisting counts on EVERY operation. This made the system reliable but introduced performance overhead for bulk operations (1000 entities = 1000 GCS writes = ~50 seconds).
+v3.32.3 introduces **Smart Count Batching** - a storage-type aware optimization that maintains v3.32.2's reliability while dramatically improving bulk operation performance.
+#### How It Works
+- **Cloud storage** (GCS, S3, R2): Batches count persistence (10 operations OR 5 seconds, whichever first)
+- **Local storage** (File System, Memory): Persists immediately (already fast, no benefit from batching)
+- **Graceful shutdown hooks**: SIGTERM/SIGINT handlers flush pending counts before shutdown
+#### Performance Impact
+**API Use Case (1-10 entities):**
+- Before: 2 entities = 100ms overhead, 10 entities = 500ms overhead
+- After: 2 entities = 50ms overhead (batched at 5s), 10 entities = 50ms overhead (batched at threshold)
+- **2-10x faster for small batches**
+**Bulk Import (1000 entities via loop):**
+- Before (v3.32.2): 1000 entities = 1000 GCS writes = ~50 seconds overhead
+- After (v3.32.3): 1000 entities = 100 GCS writes = ~5 seconds overhead
+- **10x faster for bulk operations**
+#### Reliability Guarantees
+✅ **Container Restart Scenario:** Same reliability as v3.32.2
+- Counts persist every 10 operations OR 5 seconds (whichever first)
+- Maximum data loss window: 9 operations OR 5 seconds of data (only on ungraceful crash)
+✅ **Graceful Shutdown (Cloud Run/Fargate/Lambda):**
+- SIGTERM/SIGINT handlers flush pending counts immediately
+- Zero data loss on graceful container shutdown
+✅ **Production Ready:**
+- Backward compatible (no breaking changes)
+- Zero configuration required (automatic based on storage type)
+- Works transparently for all existing code
+#### Implementation Details
+- `baseStorageAdapter.ts`: Added smart batching with `scheduleCountPersist()` and `flushCounts()`
+  - New method: `isCloudStorage()` - Detects storage type for adaptive strategy
+  - New method: `scheduleCountPersist()` - Smart batching logic
+  - New method: `flushCounts()` - Immediate flush for shutdown hooks
+  - Modified: 4 count methods to use smart batching instead of immediate persistence
+- `gcsStorage.ts`: Added cloud storage detection
+  - Override `isCloudStorage()` to return `true` (enables batching)
+- `s3CompatibleStorage.ts`: Added cloud storage detection
+  - Override `isCloudStorage()` to return `true` (enables batching)
+- `brainy.ts`: Added graceful shutdown hooks
+  - `registerShutdownHooks()`: Handles SIGTERM, SIGINT, beforeExit
+  - Ensures pending count batches are flushed before container shutdown
+  - Critical for Cloud Run, Fargate, Lambda, and other containerized deployments
+#### Migration
+**No action required!** This is a transparent performance optimization.
+- ✅ Same public API
+- ✅ Same reliability guarantees
+- ✅ Better performance (automatic)
+---
 ### [3.32.2](https://github.com/soulcraftlabs/brainy/compare/v3.32.1...v3.32.2) (2025-10-09)
 ### 🐛 Critical Bug Fixes - Container Restart Persistence

package/dist/augmentations/typeMatching/brainyTypes.d.ts CHANGED Viewed

@@ -24,6 +24,8 @@ export interface TypeMatchResult {
 }
 /**
  * BrainyTypes - Intelligent type detection for nouns and verbs
+ * PRODUCTION OPTIMIZATION (v3.33.0): Uses pre-computed type embeddings
+ * Type embeddings are loaded instantly; only input objects are embedded at runtime
  */
 export declare class BrainyTypes {
     private embedder;
@@ -33,7 +35,9 @@ export declare class BrainyTypes {
     private cache;
     constructor();
     /**
-     * Initialize the type matcher by generating embeddings for all types
+     * Initialize the type matcher by loading pre-computed embeddings
+     * INSTANT - type embeddings are loaded from pre-computed data
+     * Only the model for input embedding needs initialization
      */
     init(): Promise<void>;
     /**

package/dist/augmentations/typeMatching/brainyTypes.js CHANGED Viewed

@@ -13,6 +13,7 @@
 import { NounType, VerbType } from '../../types/graphTypes.js';
 import { TransformerEmbedding } from '../../utils/embedding.js';
 import { cosineDistance } from '../../utils/distance.js';
+import { getNounTypeEmbeddings, getVerbTypeEmbeddings } from '../../neural/embeddedTypeEmbeddings.js';
 /**
  * Type descriptions for semantic matching
  * These descriptions are used to generate embeddings for each type
@@ -109,6 +110,8 @@ const VERB_TYPE_DESCRIPTIONS = {
 };
 /**
  * BrainyTypes - Intelligent type detection for nouns and verbs
+ * PRODUCTION OPTIMIZATION (v3.33.0): Uses pre-computed type embeddings
+ * Type embeddings are loaded instantly; only input objects are embedded at runtime
  */
 export class BrainyTypes {
     constructor() {
@@ -116,23 +119,27 @@ export class BrainyTypes {
         this.verbEmbeddings = new Map();
         this.initialized = false;
         this.cache = new Map();
+        // Embedder only used for input objects, NOT for type embeddings
         this.embedder = new TransformerEmbedding({ verbose: false });
     }
     /**
-     * Initialize the type matcher by generating embeddings for all types
+     * Initialize the type matcher by loading pre-computed embeddings
+     * INSTANT - type embeddings are loaded from pre-computed data
+     * Only the model for input embedding needs initialization
      */
     async init() {
         if (this.initialized)
             return;
+        // Initialize embedder for input objects only
         await this.embedder.init();
-        // Generate embeddings for noun types
-        for (const [type, description] of Object.entries(NOUN_TYPE_DESCRIPTIONS)) {
-            const embedding = await this.embedder.embed(description);
+        // Load pre-computed type embeddings (instant, no computation)
+        const nounEmbeddings = getNounTypeEmbeddings();
+        const verbEmbeddings = getVerbTypeEmbeddings();
+        // Convert NounType/VerbType keys to strings for lookup
+        for (const [type, embedding] of nounEmbeddings.entries()) {
             this.nounEmbeddings.set(type, embedding);
         }
-        // Generate embeddings for verb types
-        for (const [type, description] of Object.entries(VERB_TYPE_DESCRIPTIONS)) {
-            const embedding = await this.embedder.embed(description);
+        for (const [type, embedding] of verbEmbeddings.entries()) {
             this.verbEmbeddings.set(type, embedding);
         }
         this.initialized = true;

package/dist/brainy.d.ts CHANGED Viewed

@@ -20,6 +20,8 @@ import { BrainyInterface } from './types/brainyInterface.js';
  * Implements BrainyInterface to ensure consistency across integrations
  */
 export declare class Brainy<T = any> implements BrainyInterface<T> {
+    private static shutdownHooksRegisteredGlobally;
+    private static instances;
     private index;
     private storage;
     private metadataIndex;
@@ -48,6 +50,20 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
     init(overrides?: Partial<BrainyConfig & {
         dimensions?: number;
     }>): Promise<void>;
+    /**
+     * Register shutdown hooks for graceful count flushing (v3.32.3+)
+     *
+     * Ensures pending count batches are persisted before container shutdown.
+     * Critical for Cloud Run, Fargate, Lambda, and other containerized deployments.
+     *
+     * Handles:
+     * - SIGTERM: Graceful termination (Cloud Run, Fargate, Lambda)
+     * - SIGINT: Ctrl+C (development/local testing)
+     * - beforeExit: Node.js cleanup hook (fallback)
+     *
+     * NOTE: Registers globally (once for all instances) to avoid MaxListenersExceededWarning
+     */
+    private registerShutdownHooks;
     /**
      * Ensure Brainy is initialized
      */
@@ -1054,6 +1070,21 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
     /**
      * Rebuild indexes if there's existing data but empty indexes
      */
+    /**
+     * Rebuild indexes from persisted data if needed (v3.35.0+)
+     *
+     * FIXES FOR CRITICAL BUGS:
+     * - Bug #1: GraphAdjacencyIndex rebuild never called ✅ FIXED
+     * - Bug #2: Early return blocks recovery when count=0 ✅ FIXED
+     * - Bug #4: HNSW index has no rebuild mechanism ✅ FIXED
+     *
+     * Production-grade rebuild with:
+     * - Handles millions of entities via pagination
+     * - Smart threshold-based decisions (auto-rebuild < 1000 items)
+     * - Progress reporting for large datasets
+     * - Parallel index rebuilds for performance
+     * - Robust error recovery (continues on partial failures)
+     */
     private rebuildIndexesIfNeeded;
     /**
      * Close and cleanup

package/dist/brainy.js CHANGED Viewed

@@ -42,6 +42,8 @@ export class Brainy {
         if (this.config.distributed?.enabled) {
             this.setupDistributedComponents();
         }
+        // Track this instance for shutdown hooks
+        Brainy.instances.push(this);
         // Index and storage are initialized in init() because they may need each other
     }
     /**
@@ -126,12 +128,63 @@ export class Brainy {
             if (this.config.warmup) {
                 await this.warmup();
             }
+            // Register shutdown hooks for graceful count flushing (once globally)
+            if (!Brainy.shutdownHooksRegisteredGlobally) {
+                this.registerShutdownHooks();
+                Brainy.shutdownHooksRegisteredGlobally = true;
+            }
             this.initialized = true;
         }
         catch (error) {
             throw new Error(`Failed to initialize Brainy: ${error}`);
         }
     }
+    /**
+     * Register shutdown hooks for graceful count flushing (v3.32.3+)
+     *
+     * Ensures pending count batches are persisted before container shutdown.
+     * Critical for Cloud Run, Fargate, Lambda, and other containerized deployments.
+     *
+     * Handles:
+     * - SIGTERM: Graceful termination (Cloud Run, Fargate, Lambda)
+     * - SIGINT: Ctrl+C (development/local testing)
+     * - beforeExit: Node.js cleanup hook (fallback)
+     *
+     * NOTE: Registers globally (once for all instances) to avoid MaxListenersExceededWarning
+     */
+    registerShutdownHooks() {
+        const flushOnShutdown = async () => {
+            console.log('⚠️  Shutdown signal received - flushing pending counts...');
+            try {
+                // Flush counts for all Brainy instances
+                let flushedCount = 0;
+                for (const instance of Brainy.instances) {
+                    if (instance.storage && typeof instance.storage.flushCounts === 'function') {
+                        await instance.storage.flushCounts();
+                        flushedCount++;
+                    }
+                }
+                if (flushedCount > 0) {
+                    console.log(`✅ Counts flushed successfully (${flushedCount} instance${flushedCount > 1 ? 's' : ''})`);
+                }
+            }
+            catch (error) {
+                console.error('❌ Failed to flush counts on shutdown:', error);
+            }
+        };
+        // Graceful shutdown signals (registered once globally)
+        process.on('SIGTERM', async () => {
+            await flushOnShutdown();
+            process.exit(0);
+        });
+        process.on('SIGINT', async () => {
+            await flushOnShutdown();
+            process.exit(0);
+        });
+        process.on('beforeExit', async () => {
+            await flushOnShutdown();
+        });
+    }
     /**
      * Ensure Brainy is initialized
      */
@@ -2332,59 +2385,88 @@ export class Brainy {
     /**
      * Rebuild indexes if there's existing data but empty indexes
      */
+    /**
+     * Rebuild indexes from persisted data if needed (v3.35.0+)
+     *
+     * FIXES FOR CRITICAL BUGS:
+     * - Bug #1: GraphAdjacencyIndex rebuild never called ✅ FIXED
+     * - Bug #2: Early return blocks recovery when count=0 ✅ FIXED
+     * - Bug #4: HNSW index has no rebuild mechanism ✅ FIXED
+     *
+     * Production-grade rebuild with:
+     * - Handles millions of entities via pagination
+     * - Smart threshold-based decisions (auto-rebuild < 1000 items)
+     * - Progress reporting for large datasets
+     * - Parallel index rebuilds for performance
+     * - Robust error recovery (continues on partial failures)
+     */
     async rebuildIndexesIfNeeded() {
         try {
-            // Check if storage has data
+            // Check if auto-rebuild is explicitly disabled
+            if (this.config.disableAutoRebuild === true) {
+                if (!this.config.silent) {
+                    console.log('⚡ Auto-rebuild explicitly disabled via config');
+                }
+                return;
+            }
+            // BUG #2 FIX: Don't trust counts - check actual storage instead
+            // Counts can be lost/corrupted in container restarts
             const entities = await this.storage.getNouns({ pagination: { limit: 1 } });
             const totalCount = entities.totalCount || 0;
-            if (totalCount === 0) {
-                // No data in storage, no rebuild needed
+            // If storage is truly empty, no rebuild needed
+            if (totalCount === 0 && entities.items.length === 0) {
                 return;
             }
             // Intelligent decision: Auto-rebuild only for small datasets
             // For large datasets, use lazy loading for optimal performance
             const AUTO_REBUILD_THRESHOLD = 1000; // Only auto-rebuild if < 1000 items
-            // Check if metadata index is empty
+            // Check if indexes need rebuilding
             const metadataStats = await this.metadataIndex.getStats();
-            if (metadataStats.totalEntries === 0 && totalCount > 0) {
-                if (totalCount < AUTO_REBUILD_THRESHOLD) {
-                    // Small dataset - rebuild for convenience
-                    if (!this.config.silent) {
-                        console.log(`🔄 Small dataset (${totalCount} items) - rebuilding index for optimal performance...`);
-                    }
-                    await this.metadataIndex.rebuild();
-                    const newStats = await this.metadataIndex.getStats();
-                    if (!this.config.silent) {
-                        console.log(`✅ Index rebuilt: ${newStats.totalEntries} entries`);
-                    }
-                }
-                else {
-                    // Large dataset - use lazy loading
-                    if (!this.config.silent) {
-                        console.log(`⚡ Large dataset (${totalCount} items) - using lazy loading for optimal startup performance`);
-                        console.log('💡 Tip: Indexes will build automatically as you use the system');
-                    }
-                }
+            const hnswIndexSize = this.index.size();
+            const graphIndexSize = await this.graphIndex.size();
+            const needsRebuild = metadataStats.totalEntries === 0 ||
+                hnswIndexSize === 0 ||
+                graphIndexSize === 0 ||
+                this.config.disableAutoRebuild === false; // Explicitly enabled
+            if (!needsRebuild) {
+                // All indexes populated, no rebuild needed
+                return;
             }
-            // Override with explicit config if provided
-            if (this.config.disableAutoRebuild === true) {
+            // Small dataset: Rebuild all indexes for best performance
+            if (totalCount < AUTO_REBUILD_THRESHOLD || this.config.disableAutoRebuild === false) {
                 if (!this.config.silent) {
-                    console.log('⚡ Auto-rebuild explicitly disabled via config');
+                    console.log(this.config.disableAutoRebuild === false
+                        ? '🔄 Auto-rebuild explicitly enabled - rebuilding all indexes...'
+                        : `🔄 Small dataset (${totalCount} items) - rebuilding all indexes...`);
+                }
+                // BUG #1 FIX: Actually call graphIndex.rebuild()
+                // BUG #4 FIX: Actually call HNSW index.rebuild()
+                // Rebuild all 3 indexes in parallel for performance
+                const startTime = Date.now();
+                await Promise.all([
+                    metadataStats.totalEntries === 0 ? this.metadataIndex.rebuild() : Promise.resolve(),
+                    hnswIndexSize === 0 ? this.index.rebuild() : Promise.resolve(),
+                    graphIndexSize === 0 ? this.graphIndex.rebuild() : Promise.resolve()
+                ]);
+                const duration = Date.now() - startTime;
+                if (!this.config.silent) {
+                    console.log(`✅ All indexes rebuilt in ${duration}ms:\n` +
+                        `   - Metadata: ${await this.metadataIndex.getStats().then(s => s.totalEntries)} entries\n` +
+                        `   - HNSW Vector: ${this.index.size()} nodes\n` +
+                        `   - Graph Adjacency: ${await this.graphIndex.size()} relationships`);
                 }
-                return;
             }
-            else if (this.config.disableAutoRebuild === false && metadataStats.totalEntries === 0) {
-                // Explicitly enabled - rebuild regardless of size
+            else {
+                // Large dataset: Use lazy loading for fast startup
                 if (!this.config.silent) {
-                    console.log('🔄 Auto-rebuild explicitly enabled - rebuilding index...');
+                    console.log(`⚡ Large dataset (${totalCount} items) - using lazy loading for optimal startup`);
+                    console.log('💡 Indexes will build automatically as you query the system');
                 }
-                await this.metadataIndex.rebuild();
             }
-            // Note: GraphAdjacencyIndex will rebuild itself as relationships are added
-            // Vector index should already be populated if storage has data
         }
         catch (error) {
-            console.warn('Warning: Could not check or rebuild indexes:', error);
+            console.warn('Warning: Could not rebuild indexes:', error);
+            // Don't throw - allow system to start even if rebuild fails
         }
     }
     /**
@@ -2518,6 +2600,9 @@ export class Brainy {
         }
     }
 }
+// Static shutdown hook tracking (global, not per-instance)
+Brainy.shutdownHooksRegisteredGlobally = false;
+Brainy.instances = [];
 // Re-export types for convenience
 export * from './types/brainy.types.js';
 export { NounType, VerbType } from './types/graphTypes.js';

package/dist/hnsw/hnswIndex.d.ts CHANGED Viewed

@@ -3,6 +3,7 @@
  * Based on the paper: "Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs"
  */
 import { DistanceFunction, HNSWConfig, HNSWNoun, Vector, VectorDocument } from '../coreTypes.js';
+import type { BaseStorage } from '../storage/baseStorage.js';
 export declare class HNSWIndex {
     private nouns;
     private entryPointId;
@@ -13,8 +14,10 @@ export declare class HNSWIndex {
     private distanceFunction;
     private dimension;
     private useParallelization;
+    private storage;
     constructor(config?: Partial<HNSWConfig>, distanceFunction?: DistanceFunction, options?: {
         useParallelization?: boolean;
+        storage?: BaseStorage;
     });
     /**
      * Set whether to use parallelization for performance-critical operations
@@ -98,6 +101,27 @@ export declare class HNSWIndex {
      * This enables O(n) clustering using HNSW's natural hierarchy
      */
     getNodesAtLevel(level: number): HNSWNoun[];
+    /**
+     * Rebuild HNSW index from persisted graph data (v3.35.0+)
+     *
+     * This is a production-grade O(N) rebuild that restores the pre-computed graph structure
+     * from storage. Much faster than re-building which is O(N log N).
+     *
+     * Designed for millions of entities with:
+     * - Cursor-based pagination (no memory overflow)
+     * - Batch processing (configurable batch size)
+     * - Progress reporting (optional callback)
+     * - Error recovery (continues on partial failures)
+     * - Lazy mode support (memory-efficient for constrained environments)
+     *
+     * @param options Rebuild options
+     * @returns Promise that resolves when rebuild is complete
+     */
+    rebuild(options?: {
+        lazy?: boolean;
+        batchSize?: number;
+        onProgress?: (loaded: number, total: number) => void;
+    }): Promise<void>;
     /**
      * Get level statistics for understanding the hierarchy
      */