npm - @soulcraft/brainy - Versions diffs - 5.11.1 → 6.0.0 - Mend

@soulcraft/brainy 5.11.1 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/CHANGELOG.md +155 -5
package/README.md +2 -6
package/dist/api/DataAPI.d.ts +0 -40
package/dist/api/DataAPI.js +0 -235
package/dist/brainy.d.ts +28 -106
package/dist/brainy.js +53 -370
package/dist/cli/commands/cow.d.ts +1 -9
package/dist/cli/commands/cow.js +1 -61
package/dist/cli/commands/data.d.ts +1 -13
package/dist/cli/commands/data.js +1 -74
package/dist/cli/index.js +1 -16
package/dist/neural/embeddedTypeEmbeddings.d.ts +1 -1
package/dist/neural/embeddedTypeEmbeddings.js +2 -2
package/dist/storage/adapters/azureBlobStorage.d.ts +21 -7
package/dist/storage/adapters/azureBlobStorage.js +69 -14
package/dist/storage/adapters/fileSystemStorage.js +2 -1
package/dist/storage/adapters/gcsStorage.d.ts +29 -15
package/dist/storage/adapters/gcsStorage.js +82 -27
package/dist/storage/adapters/historicalStorageAdapter.js +2 -2
package/dist/storage/adapters/memoryStorage.d.ts +1 -1
package/dist/storage/adapters/memoryStorage.js +9 -11
package/dist/storage/adapters/opfsStorage.js +2 -1
package/dist/storage/adapters/r2Storage.d.ts +21 -10
package/dist/storage/adapters/r2Storage.js +73 -17
package/dist/storage/adapters/s3CompatibleStorage.d.ts +20 -7
package/dist/storage/adapters/s3CompatibleStorage.js +72 -14
package/dist/storage/baseStorage.d.ts +153 -24
package/dist/storage/baseStorage.js +758 -459
package/dist/vfs/PathResolver.js +6 -2
package/dist/vfs/VirtualFileSystem.d.ts +46 -24
package/dist/vfs/VirtualFileSystem.js +176 -156
package/package.json +1 -1

package/dist/storage/adapters/s3CompatibleStorage.js CHANGED Viewed

@@ -132,30 +132,87 @@ export class S3CompatibleStorage extends BaseStorage {
         this.verbCacheManager = new CacheManager(options.cacheConfig);
     }
     /**
-     * Get S3-optimized batch configuration
+     * Get S3-optimized batch configuration with native batch API support
      *
-     * S3 has higher throughput than GCS and handles parallel writes efficiently:
-     * - Larger batch sizes (100 items)
-     * - Parallel processing supported
-     * - Shorter delays between batches (50ms)
+     * S3 has excellent throughput and handles parallel operations efficiently:
+     * - Large batch sizes (up to 1000 paths)
+     * - No artificial delay needed (S3 handles load automatically)
+     * - High concurrency (150 parallel requests optimal for most workloads)
      *
-     * S3 can handle ~3500 operations/second per bucket with good performance
+     * S3 supports ~5000 operations/second with burst capacity up to 10,000
      *
      * @returns S3-optimized batch configuration
-     * @since v4.11.0
+     * @since v5.12.0 - Updated for native batch API
      */
     getBatchConfig() {
         return {
-            maxBatchSize: 100,
-            batchDelayMs: 50,
-            maxConcurrent: 100,
-            supportsParallelWrites: true, // S3 handles parallel writes efficiently
+            maxBatchSize: 1000, // S3 can handle very large batches
+            batchDelayMs: 0, // No rate limiting needed
+            maxConcurrent: 150, // Optimal for S3 (tested up to 250)
+            supportsParallelWrites: true, // S3 excels at parallel writes
             rateLimit: {
-                operationsPerSecond: 3500, // S3 is more permissive than GCS
-                burstCapacity: 1000
+                operationsPerSecond: 5000, // S3 has high throughput
+                burstCapacity: 10000
             }
         };
     }
+    /**
+     * Batch read operation using S3's parallel download capabilities
+     *
+     * Uses Promise.allSettled() for maximum parallelism with GetObjectCommand.
+     * S3's HTTP/2 and connection pooling make this extremely efficient.
+     *
+     * Performance: ~150 concurrent requests = <500ms for 150 objects
+     *
+     * @param paths - Array of S3 object keys to read
+     * @returns Map of path -> parsed JSON data (only successful reads)
+     * @since v5.12.0
+     */
+    async readBatch(paths) {
+        await this.ensureInitialized();
+        const results = new Map();
+        if (paths.length === 0)
+            return results;
+        const batchConfig = this.getBatchConfig();
+        const chunkSize = batchConfig.maxConcurrent || 150;
+        this.logger.debug(`[S3 Batch] Reading ${paths.length} objects in chunks of ${chunkSize}`);
+        // Import GetObjectCommand
+        const { GetObjectCommand } = await import('@aws-sdk/client-s3');
+        // Process in chunks to respect concurrency limits
+        for (let i = 0; i < paths.length; i += chunkSize) {
+            const chunk = paths.slice(i, i + chunkSize);
+            // Parallel download for this chunk
+            const chunkResults = await Promise.allSettled(chunk.map(async (path) => {
+                try {
+                    const response = await this.s3Client.send(new GetObjectCommand({
+                        Bucket: this.bucketName,
+                        Key: path
+                    }));
+                    if (!response || !response.Body) {
+                        return { path, data: null, success: false };
+                    }
+                    const bodyContents = await response.Body.transformToString();
+                    const data = JSON.parse(bodyContents);
+                    return { path, data, success: true };
+                }
+                catch (error) {
+                    // 404 and other errors are expected (not all paths may exist)
+                    if (error.name !== 'NoSuchKey' && error.$metadata?.httpStatusCode !== 404) {
+                        this.logger.warn(`[S3 Batch] Failed to read ${path}: ${error.message}`);
+                    }
+                    return { path, data: null, success: false };
+                }
+            }));
+            // Collect successful results
+            for (const result of chunkResults) {
+                if (result.status === 'fulfilled' && result.value.success && result.value.data !== null) {
+                    results.set(result.value.path, result.value.data);
+                }
+            }
+        }
+        this.logger.debug(`[S3 Batch] Successfully read ${results.size}/${paths.length} objects`);
+        return results;
+    }
     /**
      * Initialize the storage adapter
      */
@@ -288,7 +345,8 @@ export class S3CompatibleStorage extends BaseStorage {
             else {
                 prodLog.info('🧹 Node cache is empty - starting fresh');
             }
-            this.isInitialized = true;
+            // v6.0.0: Initialize GraphAdjacencyIndex and type statistics
+            await super.init();
             this.logger.info(`Initialized ${this.serviceType} storage with bucket ${this.bucketName}`);
         }
         catch (error) {

package/dist/storage/baseStorage.d.ts CHANGED Viewed

@@ -60,8 +60,6 @@ export declare abstract class BaseStorage extends BaseStorageAdapter {
     currentBranch: string;
     protected nounCountsByType: Uint32Array<ArrayBuffer>;
     protected verbCountsByType: Uint32Array<ArrayBuffer>;
-    protected nounTypeCache: Map<string, NounType>;
-    protected verbTypeCache: Map<string, VerbType>;
     private typeCountsRebuilt;
     /**
      * Analyze a storage key to determine its routing and path
@@ -78,6 +76,12 @@ export declare abstract class BaseStorage extends BaseStorageAdapter {
      * IMPORTANT: If your adapter overrides init(), call await super.init() first!
      */
     init(): Promise<void>;
+    /**
+     * Rebuild GraphAdjacencyIndex from existing verbs (v6.0.0)
+     * Call this manually if you have existing verb data that needs to be indexed
+     * @public
+     */
+    rebuildGraphIndex(): Promise<void>;
     /**
      * Ensure the storage adapter is initialized
      */
@@ -406,6 +410,17 @@ export declare abstract class BaseStorage extends BaseStorageAdapter {
     /**
      * Get noun metadata from storage (METADATA-ONLY, NO VECTORS)
      *
+     * **Performance (v6.0.0)**: Direct O(1) ID-first lookup - NO type search needed!
+     * - **All lookups**: 1 read, ~500ms on cloud (consistent performance)
+     * - **No cache needed**: Type is in the metadata, not the path
+     * - **No type search**: ID-first paths eliminate 42-type search entirely
+     *
+     * **Clean architecture (v6.0.0)**:
+     * - Path: `entities/nouns/{SHARD}/{ID}/metadata.json`
+     * - Type is just a field in metadata (`noun: "document"`)
+     * - MetadataIndex handles type queries (no path scanning needed)
+     * - Scales to billions without any overhead
+     *
      * **Performance (v5.11.1)**: Fast path for metadata-only reads
      * - **Speed**: 10ms vs 43ms (76-81% faster than getNoun)
      * - **Bandwidth**: 300 bytes vs 6KB (95% less)
@@ -435,18 +450,99 @@ export declare abstract class BaseStorage extends BaseStorageAdapter {
      * @returns Metadata or null if not found
      *
      * @performance
-     * - Type cache O(1) lookup for cached entities
-     * - Type scan O(N_types) for cache misses (typically <100ms)
-     * - Uses readWithInheritance() for COW branch support
+     * - O(1) direct ID lookup - always 1 read (~500ms on cloud, ~10ms local)
+     * - No caching complexity
+     * - No type search fallbacks
+     * - Works in distributed systems without sync issues
      *
      * @since v4.0.0
-     * @since v5.4.0 - Type-first paths
+     * @since v5.4.0 - Type-first paths (removed in v6.0.0)
      * @since v5.11.1 - Promoted to fast path for brain.get() optimization
+     * @since v6.0.0 - CLEAN FIX: ID-first paths eliminate all type-search complexity
      */
     getNounMetadata(id: string): Promise<NounMetadata | null>;
     /**
-     * Delete noun metadata from storage
-     * v5.4.0: Uses type-first paths (must match saveNounMetadata_internal)
+     * Batch fetch noun metadata from storage (v5.12.0 - Cloud Storage Optimization)
+     *
+     * **Performance**: Reduces N sequential calls → 1-2 batch calls
+     * - Local storage: N × 10ms → 1 × 10ms parallel (N× faster)
+     * - Cloud storage: N × 300ms → 1 × 300ms batch (N× faster)
+     *
+     * **Use cases:**
+     * - VFS tree traversal (fetch all children at once)
+     * - brain.find() result hydration (batch load entities)
+     * - brain.getRelations() target entities (eliminate N+1)
+     * - Import operations (batch existence checks)
+     *
+     * @param ids Array of entity IDs to fetch
+     * @returns Map of id → metadata (only successful fetches included)
+     *
+     * @example
+     * ```typescript
+     * // Before (N+1 pattern)
+     * for (const id of ids) {
+     *   const metadata = await storage.getNounMetadata(id)  // N calls
+     * }
+     *
+     * // After (batched)
+     * const metadataMap = await storage.getNounMetadataBatch(ids)  // 1 call
+     * for (const id of ids) {
+     *   const metadata = metadataMap.get(id)
+     * }
+     * ```
+     *
+     * @since v5.12.0
+     */
+    getNounMetadataBatch(ids: string[]): Promise<Map<string, NounMetadata>>;
+    /**
+     * Batch read multiple storage paths with COW inheritance support (v5.12.0)
+     *
+     * Core batching primitive that all batch operations build upon.
+     * Handles write cache, branch inheritance, and adapter-specific batching.
+     *
+     * **Performance**:
+     * - Uses adapter's native batch API when available (GCS, S3, Azure)
+     * - Falls back to parallel reads for non-batch adapters
+     * - Respects rate limits via StorageBatchConfig
+     *
+     * @param paths Array of storage paths to read
+     * @param branch Optional branch (defaults to current branch)
+     * @returns Map of path → data (only successful reads included)
+     *
+     * @protected - Available to subclasses and batch operations
+     * @since v5.12.0
+     */
+    protected readBatchWithInheritance(paths: string[], branch?: string): Promise<Map<string, any>>;
+    /**
+     * Adapter-level batch read with automatic batching strategy (v5.12.0)
+     *
+     * Uses adapter's native batch API when available:
+     * - GCS: batch API (100 ops)
+     * - S3/R2: batch operations (1000 ops)
+     * - Azure: batch API (100 ops)
+     * - Others: parallel reads via Promise.all()
+     *
+     * Automatically chunks large batches based on adapter's maxBatchSize.
+     *
+     * @param paths Array of resolved storage paths
+     * @returns Map of path → data
+     *
+     * @private
+     * @since v5.12.0
+     */
+    private readBatchFromAdapter;
+    /**
+     * Get batch configuration for this storage adapter (v5.12.0)
+     *
+     * Override in subclasses to provide adapter-specific batch limits.
+     * Defaults to conservative limits for safety.
+     *
+     * @public - Inherited from BaseStorageAdapter
+     * @since v5.12.0
+     */
+    getBatchConfig(): StorageBatchConfig;
+    /**
+     * Delete noun metadata from storage (v6.0.0: ID-first, O(1) delete)
      */
     deleteNounMetadata(id: string): Promise<void>;
     /**
@@ -456,7 +552,7 @@ export declare abstract class BaseStorage extends BaseStorageAdapter {
     saveVerbMetadata(id: string, metadata: VerbMetadata): Promise<void>;
     /**
      * Internal method for saving verb metadata (v4.0.0: now typed)
-     * v5.4.0: Uses type-first paths (must match getVerbMetadata)
+     * v5.4.0: Uses ID-first paths (must match getVerbMetadata)
      *
      * CRITICAL (v4.1.2): Count synchronization happens here
      * This ensures verb counts are updated AFTER metadata exists, fixing the race condition
@@ -469,12 +565,11 @@ export declare abstract class BaseStorage extends BaseStorageAdapter {
     protected saveVerbMetadata_internal(id: string, metadata: VerbMetadata): Promise<void>;
     /**
      * Get verb metadata from storage (v4.0.0: now typed)
-     * v5.4.0: Uses type-first paths (must match saveVerbMetadata_internal)
+     * v5.4.0: Uses ID-first paths (must match saveVerbMetadata_internal)
      */
     getVerbMetadata(id: string): Promise<VerbMetadata | null>;
     /**
-     * Delete verb metadata from storage
-     * v5.4.0: Uses type-first paths (must match saveVerbMetadata_internal)
+     * Delete verb metadata from storage (v6.0.0: ID-first, O(1) delete)
      */
     deleteVerbMetadata(id: string): Promise<void>;
     /**
@@ -494,8 +589,9 @@ export declare abstract class BaseStorage extends BaseStorageAdapter {
      */
     protected rebuildTypeCounts(): Promise<void>;
     /**
-     * Get noun type from cache or metadata
-     * Relies on nounTypeCache populated during metadata saves
+     * Get noun type (v6.0.0: type no longer needed for paths!)
+     * With ID-first paths, this is only used for internal statistics tracking.
+     * The actual type is stored in metadata and indexed by MetadataIndexManager.
      */
     protected getNounType(noun: HNSWNoun): NounType;
     /**
@@ -528,34 +624,67 @@ export declare abstract class BaseStorage extends BaseStorageAdapter {
      */
     protected deserializeVerb(data: any): HNSWVerb;
     /**
-     * Save a noun to storage (type-first path)
+     * Save a noun to storage (ID-first path)
      */
     protected saveNoun_internal(noun: HNSWNoun): Promise<void>;
     /**
-     * Get a noun from storage (type-first path)
+     * Get a noun from storage (ID-first path)
      */
     protected getNoun_internal(id: string): Promise<HNSWNoun | null>;
     /**
-     * Get nouns by noun type (O(1) with type-first paths!)
+     * Get nouns by noun type (v6.0.0: Shard-based iteration!)
      */
     protected getNounsByNounType_internal(nounType: string): Promise<HNSWNoun[]>;
     /**
-     * Delete a noun from storage (type-first path)
+     * Delete a noun from storage (v6.0.0: ID-first, O(1) delete)
      */
     protected deleteNoun_internal(id: string): Promise<void>;
     /**
-     * Save a verb to storage (type-first path)
+     * Save a verb to storage (ID-first path)
      */
     protected saveVerb_internal(verb: HNSWVerb): Promise<void>;
     /**
-     * Get a verb from storage (type-first path)
+     * Get a verb from storage (ID-first path)
      */
     protected getVerb_internal(id: string): Promise<HNSWVerb | null>;
     /**
-     * Get verbs by source (COW-aware implementation)
-     * v5.4.0: Fixed to directly list verb files instead of directories
+     * Get verbs by source (v6.0.0: Uses GraphAdjacencyIndex when available)
+     * Falls back to shard iteration during initialization to avoid circular dependency
      */
     protected getVerbsBySource_internal(sourceId: string): Promise<HNSWVerbWithMetadata[]>;
+    /**
+     * Batch get verbs by source IDs (v5.12.0 - Cloud Storage Optimization)
+     *
+     * **Performance**: Eliminates N+1 query pattern for relationship lookups
+     * - Current: N × getVerbsBySource() = N × (list all verbs + filter)
+     * - Batched: 1 × list all verbs + filter by N sourceIds
+     *
+     * **Use cases:**
+     * - VFS tree traversal (get Contains edges for multiple directories)
+     * - brain.getRelations() for multiple entities
+     * - Graph traversal (fetch neighbors of multiple nodes)
+     *
+     * @param sourceIds Array of source entity IDs
+     * @param verbType Optional verb type filter (e.g., VerbType.Contains for VFS)
+     * @returns Map of sourceId → verbs[]
+     *
+     * @example
+     * ```typescript
+     * // Before (N+1 pattern)
+     * for (const dirId of dirIds) {
+     *   const children = await storage.getVerbsBySource(dirId)  // N calls
+     * }
+     *
+     * // After (batched)
+     * const childrenByDir = await storage.getVerbsBySourceBatch(dirIds, VerbType.Contains)  // 1 scan
+     * for (const dirId of dirIds) {
+     *   const children = childrenByDir.get(dirId) || []
+     * }
+     * ```
+     *
+     * @since v5.12.0
+     */
+    getVerbsBySourceBatch(sourceIds: string[], verbType?: VerbType): Promise<Map<string, HNSWVerbWithMetadata[]>>;
     /**
      * Get verbs by target (COW-aware implementation)
      * v5.7.1: Reverted to v5.6.3 implementation to fix circular dependency deadlock
@@ -563,11 +692,11 @@ export declare abstract class BaseStorage extends BaseStorageAdapter {
      */
     protected getVerbsByTarget_internal(targetId: string): Promise<HNSWVerbWithMetadata[]>;
     /**
-     * Get verbs by type (O(1) with type-first paths!)
+     * Get verbs by type (v6.0.0: Shard iteration with type filtering)
      */
     protected getVerbsByType_internal(verbType: string): Promise<HNSWVerbWithMetadata[]>;
     /**
-     * Delete a verb from storage (type-first path)
+     * Delete a verb from storage (v6.0.0: ID-first, O(1) delete)
      */
     protected deleteVerb_internal(id: string): Promise<void>;
     /**