npm - @soulcraft/brainy - Versions diffs - 3.32.1 → 3.34.0 - Mend

@soulcraft/brainy 3.32.1 → 3.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/CHANGELOG.md +212 -0
package/dist/augmentations/typeMatching/brainyTypes.d.ts +5 -1
package/dist/augmentations/typeMatching/brainyTypes.js +14 -7
package/dist/brainy.d.ts +16 -0
package/dist/brainy.js +56 -0
package/dist/importers/SmartExcelImporter.js +12 -0
package/dist/neural/embeddedTypeEmbeddings.d.ts +34 -0
package/dist/neural/embeddedTypeEmbeddings.js +96 -0
package/dist/neural/entityExtractor.d.ts +2 -0
package/dist/neural/entityExtractor.js +21 -42
package/dist/neural/naturalLanguageProcessor.d.ts +2 -1
package/dist/neural/naturalLanguageProcessor.js +17 -31
package/dist/storage/adapters/baseStorageAdapter.d.ts +37 -0
package/dist/storage/adapters/baseStorageAdapter.js +105 -15
package/dist/storage/adapters/gcsStorage.d.ts +9 -0
package/dist/storage/adapters/gcsStorage.js +46 -10
package/dist/storage/adapters/s3CompatibleStorage.d.ts +9 -0
package/dist/storage/adapters/s3CompatibleStorage.js +11 -0
package/package.json +5 -2

package/dist/neural/naturalLanguageProcessor.js CHANGED Viewed

@@ -10,6 +10,7 @@
  */
 import { PatternLibrary } from './patternLibrary.js';
 import { NounType, VerbType } from '../types/graphTypes.js';
+import { getNounTypeEmbeddings, getVerbTypeEmbeddings } from './embeddedTypeEmbeddings.js';
 export class NaturalLanguageProcessor {
     constructor(brain) {
         this.initialized = false;
@@ -54,41 +55,26 @@ export class NaturalLanguageProcessor {
     }
     /**
      * Initialize embeddings for all NounTypes and VerbTypes
-     * These are fixed types that never change - perfect for caching
+     * PRODUCTION OPTIMIZATION (v3.33.0): Uses pre-computed type embeddings
+     * Zero runtime cost - embeddings are loaded instantly from embedded data
      */
     async initializeTypeEmbeddings() {
         if (this.typeEmbeddingsInitialized)
             return;
-        // Embed all NounTypes (30+ types)
-        for (const [key, value] of Object.entries(NounType)) {
-            if (typeof value === 'string') {
-                // Embed both the key (Person) and value (person)
-                const keyEmbedding = await this.getEmbedding(key);
-                const valueEmbedding = await this.getEmbedding(value);
-                this.nounTypeEmbeddings.set(key, keyEmbedding);
-                this.nounTypeEmbeddings.set(value, valueEmbedding);
-                // Also embed common variations
-                const spaceSeparated = key.replace(/([A-Z])/g, ' $1').trim().toLowerCase();
-                if (spaceSeparated !== value) {
-                    const variantEmbedding = await this.getEmbedding(spaceSeparated);
-                    this.nounTypeEmbeddings.set(spaceSeparated, variantEmbedding);
-                }
-            }
-        }
-        // Embed all VerbTypes (40+ types)
-        for (const [key, value] of Object.entries(VerbType)) {
-            if (typeof value === 'string') {
-                const keyEmbedding = await this.getEmbedding(key);
-                const valueEmbedding = await this.getEmbedding(value);
-                this.verbTypeEmbeddings.set(key, keyEmbedding);
-                this.verbTypeEmbeddings.set(value, valueEmbedding);
-                // Common variations for verbs
-                const spaceSeparated = key.replace(/([A-Z])/g, ' $1').trim().toLowerCase();
-                if (spaceSeparated !== value) {
-                    const variantEmbedding = await this.getEmbedding(spaceSeparated);
-                    this.verbTypeEmbeddings.set(spaceSeparated, variantEmbedding);
-                }
-            }
+        // Load pre-computed embeddings (instant, no computation)
+        const nounEmbeddings = getNounTypeEmbeddings();
+        const verbEmbeddings = getVerbTypeEmbeddings();
+        // Store noun type embeddings with all variations for lookup
+        for (const [type, embedding] of nounEmbeddings.entries()) {
+            this.nounTypeEmbeddings.set(type, embedding);
+            // Also store lowercase version for case-insensitive matching
+            this.nounTypeEmbeddings.set(type.toLowerCase(), embedding);
+        }
+        // Store verb type embeddings with all variations for lookup
+        for (const [type, embedding] of verbEmbeddings.entries()) {
+            this.verbTypeEmbeddings.set(type, embedding);
+            // Also store lowercase version for case-insensitive matching
+            this.verbTypeEmbeddings.set(type.toLowerCase(), embedding);
         }
         this.typeEmbeddingsInitialized = true;
     }

package/dist/storage/adapters/baseStorageAdapter.d.ts CHANGED Viewed

@@ -263,6 +263,12 @@ export declare abstract class BaseStorageAdapter implements StorageAdapter {
         timestamp: number;
     }>;
     protected readonly COUNT_CACHE_TTL = 60000;
+    protected pendingCountPersist: boolean;
+    protected lastCountPersistTime: number;
+    protected scheduledCountPersistTimeout: NodeJS.Timeout | null;
+    protected pendingCountOperations: number;
+    protected countPersistBatchSize: number;
+    protected countPersistInterval: number;
     /**
      * Get total noun count - O(1) operation
      * @returns Promise that resolves to the total number of nouns
@@ -303,6 +309,37 @@ export declare abstract class BaseStorageAdapter implements StorageAdapter {
      * @param type The verb type
      */
     protected decrementVerbCount(type: string): Promise<void>;
+    /**
+     * Detect if this storage adapter uses cloud storage (network I/O)
+     * Cloud storage benefits from batching; local storage does not.
+     *
+     * Override this method in subclasses for accurate detection.
+     * Default implementation checks storage type from getStorageStatus().
+     *
+     * @returns true if cloud storage (GCS, S3, R2), false if local (File, Memory)
+     */
+    protected isCloudStorage(): boolean;
+    /**
+     * Schedule a smart batched persist operation.
+     *
+     * Strategy:
+     * - Local Storage: Persist immediately (fast, no network latency)
+     * - Cloud Storage: Batch persist (10 ops OR 5 seconds, whichever first)
+     *
+     * This mirrors the statistics batching pattern for consistency.
+     */
+    protected scheduleCountPersist(): Promise<void>;
+    /**
+     * Flush counts immediately to storage.
+     *
+     * Used for:
+     * - Graceful shutdown (SIGTERM handler)
+     * - Forced persist (batch threshold reached)
+     * - Local storage immediate persist
+     *
+     * This is the public API that shutdown hooks can call.
+     */
+    flushCounts(): Promise<void>;
     /**
      * Initialize counts from storage - must be implemented by each adapter
      * @protected

package/dist/storage/adapters/baseStorageAdapter.js CHANGED Viewed

@@ -48,6 +48,17 @@ export class BaseStorageAdapter {
         this.verbCounts = new Map(); // verb type -> count
         this.countCache = new Map();
         this.COUNT_CACHE_TTL = 60000; // 1 minute cache TTL
+        // =============================================
+        // Smart Count Batching (v3.32.3+)
+        // =============================================
+        // Count batching state - mirrors statistics batching pattern
+        this.pendingCountPersist = false; // Counts changed since last persist?
+        this.lastCountPersistTime = 0; // Timestamp of last persist
+        this.scheduledCountPersistTimeout = null; // Scheduled persist timer
+        this.pendingCountOperations = 0; // Operations since last persist
+        // Batching configuration (overridable by subclasses for custom strategies)
+        this.countPersistBatchSize = 10; // Operations before forcing persist (cloud storage)
+        this.countPersistInterval = 5000; // Milliseconds before forcing persist (cloud storage)
     }
     /**
      * Save statistics data
@@ -659,10 +670,10 @@ export class BaseStorageAdapter {
         const mutex = getGlobalMutex();
         await mutex.runExclusive(`count-entity-${type}`, async () => {
             this.incrementEntityCount(type);
-            // Persist counts periodically
-            if (this.totalNounCount % 10 === 0) {
-                await this.persistCounts();
-            }
+            // Smart batching (v3.32.3+): Adapts to storage type
+            // - Cloud storage (GCS, S3): Batches 10 ops OR 5 seconds
+            // - Local storage (File, Memory): Persists immediately
+            await this.scheduleCountPersist();
         });
     }
     /**
@@ -693,9 +704,8 @@ export class BaseStorageAdapter {
         const mutex = getGlobalMutex();
         await mutex.runExclusive(`count-entity-${type}`, async () => {
             this.decrementEntityCount(type);
-            if (this.totalNounCount % 10 === 0) {
-                await this.persistCounts();
-            }
+            // Smart batching (v3.32.3+): Adapts to storage type
+            await this.scheduleCountPersist();
         });
     }
     /**
@@ -712,10 +722,8 @@ export class BaseStorageAdapter {
                 count: this.totalVerbCount,
                 timestamp: Date.now()
             });
-            // Persist counts immediately for consistency
-            if (this.totalVerbCount % 10 === 0) {
-                await this.persistCounts();
-            }
+            // Smart batching (v3.32.3+): Adapts to storage type
+            await this.scheduleCountPersist();
         });
     }
     /**
@@ -740,11 +748,93 @@ export class BaseStorageAdapter {
                 count: this.totalVerbCount,
                 timestamp: Date.now()
             });
-            // Persist counts immediately for consistency
-            if (this.totalVerbCount % 10 === 0) {
-                await this.persistCounts();
-            }
+            // Smart batching (v3.32.3+): Adapts to storage type
+            await this.scheduleCountPersist();
         });
     }
+    // =============================================
+    // Smart Batching Methods (v3.32.3+)
+    // =============================================
+    /**
+     * Detect if this storage adapter uses cloud storage (network I/O)
+     * Cloud storage benefits from batching; local storage does not.
+     *
+     * Override this method in subclasses for accurate detection.
+     * Default implementation checks storage type from getStorageStatus().
+     *
+     * @returns true if cloud storage (GCS, S3, R2), false if local (File, Memory)
+     */
+    isCloudStorage() {
+        // Default: assume local storage (conservative, prefers reliability over performance)
+        // Subclasses should override this for accurate detection
+        return false;
+    }
+    /**
+     * Schedule a smart batched persist operation.
+     *
+     * Strategy:
+     * - Local Storage: Persist immediately (fast, no network latency)
+     * - Cloud Storage: Batch persist (10 ops OR 5 seconds, whichever first)
+     *
+     * This mirrors the statistics batching pattern for consistency.
+     */
+    async scheduleCountPersist() {
+        // Mark counts as pending persist
+        this.pendingCountPersist = true;
+        this.pendingCountOperations++;
+        // Local storage: persist immediately (fast enough, no benefit from batching)
+        if (!this.isCloudStorage()) {
+            await this.flushCounts();
+            return;
+        }
+        // Cloud storage: use smart batching
+        // Persist if we've hit the batch size threshold
+        if (this.pendingCountOperations >= this.countPersistBatchSize) {
+            await this.flushCounts();
+            return;
+        }
+        // Otherwise, schedule a time-based persist if not already scheduled
+        if (!this.scheduledCountPersistTimeout) {
+            this.scheduledCountPersistTimeout = setTimeout(() => {
+                this.flushCounts().catch(error => {
+                    console.error('Failed to flush counts on timer:', error);
+                });
+            }, this.countPersistInterval);
+        }
+    }
+    /**
+     * Flush counts immediately to storage.
+     *
+     * Used for:
+     * - Graceful shutdown (SIGTERM handler)
+     * - Forced persist (batch threshold reached)
+     * - Local storage immediate persist
+     *
+     * This is the public API that shutdown hooks can call.
+     */
+    async flushCounts() {
+        // Clear any scheduled persist
+        if (this.scheduledCountPersistTimeout) {
+            clearTimeout(this.scheduledCountPersistTimeout);
+            this.scheduledCountPersistTimeout = null;
+        }
+        // Nothing to flush?
+        if (!this.pendingCountPersist) {
+            return;
+        }
+        try {
+            // Persist to storage (implemented by subclass)
+            await this.persistCounts();
+            // Update state
+            this.lastCountPersistTime = Date.now();
+            this.pendingCountPersist = false;
+            this.pendingCountOperations = 0;
+        }
+        catch (error) {
+            console.error('❌ CRITICAL: Failed to flush counts to storage:', error);
+            // Keep pending flag set so we retry on next operation
+            throw error;
+        }
+    }
 }
 //# sourceMappingURL=baseStorageAdapter.js.map

package/dist/storage/adapters/gcsStorage.d.ts CHANGED Viewed

@@ -102,6 +102,15 @@ export declare class GcsStorage extends BaseStorage {
      * Override base class method to detect GCS-specific throttling errors
      */
     protected isThrottlingError(error: any): boolean;
+    /**
+     * Override base class to enable smart batching for cloud storage (v3.32.3+)
+     *
+     * GCS is cloud storage with network latency (~50ms per write).
+     * Smart batching reduces writes from 1000 ops → 100 batches.
+     *
+     * @returns true (GCS is cloud storage)
+     */
+    protected isCloudStorage(): boolean;
     /**
      * Apply backpressure before starting an operation
      * @returns Request ID for tracking

package/dist/storage/adapters/gcsStorage.js CHANGED Viewed

@@ -195,6 +195,17 @@ export class GcsStorage extends BaseStorage {
             message.includes('rate limit') ||
             message.includes('too many requests'));
     }
+    /**
+     * Override base class to enable smart batching for cloud storage (v3.32.3+)
+     *
+     * GCS is cloud storage with network latency (~50ms per write).
+     * Smart batching reduces writes from 1000 ops → 100 batches.
+     *
+     * @returns true (GCS is cloud storage)
+     */
+    isCloudStorage() {
+        return true; // GCS benefits from batching
+    }
     /**
      * Apply backpressure before starting an operation
      * @returns Request ID for tracking
@@ -1095,8 +1106,8 @@ export class GcsStorage extends BaseStorage {
      * Initialize counts from storage
      */
     async initializeCounts() {
+        const key = `${this.systemPrefix}counts.json`;
         try {
-            const key = `${this.systemPrefix}counts.json`;
             const file = this.bucket.file(key);
             const [contents] = await file.download();
             const counts = JSON.parse(contents.toString());
@@ -1104,16 +1115,21 @@ export class GcsStorage extends BaseStorage {
             this.totalVerbCount = counts.totalVerbCount || 0;
             this.entityCounts = new Map(Object.entries(counts.entityCounts || {}));
             this.verbCounts = new Map(Object.entries(counts.verbCounts || {}));
-            prodLog.info(`📊 Loaded counts: ${this.totalNounCount} nouns, ${this.totalVerbCount} verbs`);
+            prodLog.info(`📊 Loaded counts from storage: ${this.totalNounCount} nouns, ${this.totalVerbCount} verbs`);
         }
         catch (error) {
             if (error.code === 404) {
-                // No counts file yet - initialize from scan
-                prodLog.info('📊 No counts file found - initializing from storage scan...');
+                // No counts file yet - initialize from scan (first-time setup or counts not persisted)
+                prodLog.info('📊 No counts file found - this is normal for first init or if <10 entities were added');
                 await this.initializeCountsFromScan();
             }
             else {
-                this.logger.error('Error loading counts:', error);
+                // CRITICAL FIX: Don't silently fail on network/permission errors
+                this.logger.error('❌ CRITICAL: Failed to load counts from GCS:', error);
+                prodLog.error(`❌ Error loading ${key}: ${error.message}`);
+                // Try to recover by scanning the bucket
+                prodLog.warn('⚠️  Attempting recovery by scanning GCS bucket...');
+                await this.initializeCountsFromScan();
             }
         }
     }
@@ -1122,18 +1138,38 @@ export class GcsStorage extends BaseStorage {
      */
     async initializeCountsFromScan() {
         try {
+            prodLog.info('📊 Scanning GCS bucket to initialize counts...');
+            prodLog.info(`🔍 Noun prefix: ${this.nounPrefix}`);
+            prodLog.info(`🔍 Verb prefix: ${this.verbPrefix}`);
             // Count nouns
             const [nounFiles] = await this.bucket.getFiles({ prefix: this.nounPrefix });
-            this.totalNounCount = nounFiles?.filter((f) => f.name?.endsWith('.json')).length || 0;
+            prodLog.info(`🔍 Found ${nounFiles?.length || 0} total files under noun prefix`);
+            const jsonNounFiles = nounFiles?.filter((f) => f.name?.endsWith('.json')) || [];
+            this.totalNounCount = jsonNounFiles.length;
+            if (jsonNounFiles.length > 0 && jsonNounFiles.length <= 5) {
+                prodLog.info(`📄 Sample noun files: ${jsonNounFiles.slice(0, 5).map((f) => f.name).join(', ')}`);
+            }
             // Count verbs
             const [verbFiles] = await this.bucket.getFiles({ prefix: this.verbPrefix });
-            this.totalVerbCount = verbFiles?.filter((f) => f.name?.endsWith('.json')).length || 0;
+            prodLog.info(`🔍 Found ${verbFiles?.length || 0} total files under verb prefix`);
+            const jsonVerbFiles = verbFiles?.filter((f) => f.name?.endsWith('.json')) || [];
+            this.totalVerbCount = jsonVerbFiles.length;
+            if (jsonVerbFiles.length > 0 && jsonVerbFiles.length <= 5) {
+                prodLog.info(`📄 Sample verb files: ${jsonVerbFiles.slice(0, 5).map((f) => f.name).join(', ')}`);
+            }
             // Save initial counts
-            await this.persistCounts();
-            prodLog.info(`✅ Initialized counts: ${this.totalNounCount} nouns, ${this.totalVerbCount} verbs`);
+            if (this.totalNounCount > 0 || this.totalVerbCount > 0) {
+                await this.persistCounts();
+                prodLog.info(`✅ Initialized counts from scan: ${this.totalNounCount} nouns, ${this.totalVerbCount} verbs`);
+            }
+            else {
+                prodLog.warn(`⚠️  No entities found during bucket scan. Check that entities exist and prefixes are correct.`);
+            }
         }
         catch (error) {
-            this.logger.error('Error initializing counts from scan:', error);
+            // CRITICAL FIX: Don't silently fail - this prevents data loss scenarios
+            this.logger.error('❌ CRITICAL: Failed to initialize counts from GCS bucket scan:', error);
+            throw new Error(`Failed to initialize GCS storage counts: ${error}. This prevents container restarts from working correctly.`);
         }
     }
     /**

package/dist/storage/adapters/s3CompatibleStorage.d.ts CHANGED Viewed

@@ -563,4 +563,13 @@ export declare class S3CompatibleStorage extends BaseStorage {
      * Persist counts to S3 storage
      */
     protected persistCounts(): Promise<void>;
+    /**
+     * Override base class to enable smart batching for cloud storage (v3.32.3+)
+     *
+     * S3 is cloud storage with network latency (~50ms per write).
+     * Smart batching reduces writes from 1000 ops → 100 batches.
+     *
+     * @returns true (S3 is cloud storage)
+     */
+    protected isCloudStorage(): boolean;
 }

package/dist/storage/adapters/s3CompatibleStorage.js CHANGED Viewed

@@ -2754,5 +2754,16 @@ export class S3CompatibleStorage extends BaseStorage {
             console.error('Error persisting counts to S3:', error);
         }
     }
+    /**
+     * Override base class to enable smart batching for cloud storage (v3.32.3+)
+     *
+     * S3 is cloud storage with network latency (~50ms per write).
+     * Smart batching reduces writes from 1000 ops → 100 batches.
+     *
+     * @returns true (S3 is cloud storage)
+     */
+    isCloudStorage() {
+        return true; // S3 benefits from batching
+    }
 }
 //# sourceMappingURL=s3CompatibleStorage.js.map

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@soulcraft/brainy",
-  "version": "3.32.1",
+  "version": "3.34.0",
   "description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
   "main": "dist/index.js",
   "module": "dist/index.js",
@@ -55,7 +55,10 @@
     "node": "22.x"
   },
   "scripts": {
-    "build": "npm run build:patterns:if-needed && tsc && tsc -p tsconfig.cli.json",
+    "build": "npm run build:types:if-needed && npm run build:patterns:if-needed && tsc && tsc -p tsconfig.cli.json",
+    "build:types": "tsx scripts/buildTypeEmbeddings.ts",
+    "build:types:if-needed": "node scripts/check-type-embeddings.cjs || npm run build:types",
+    "build:types:force": "npm run build:types",
     "build:patterns": "tsx scripts/buildEmbeddedPatterns.ts",
     "build:patterns:if-needed": "node scripts/check-patterns.cjs || npm run build:patterns",
     "build:patterns:force": "npm run build:patterns",