@soulcraft/brainy 3.36.1 → 3.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,11 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
4
4
 
5
+ ### [3.37.0](https://github.com/soulcraftlabs/brainy/compare/v3.36.1...v3.37.0) (2025-10-10)
6
+
7
+ - fix: implement 2-file storage architecture for GCS scalability (59da5f6)
8
+
9
+
5
10
  ### [3.36.1](https://github.com/soulcraftlabs/brainy/compare/v3.36.0...v3.36.1) (2025-10-10)
6
11
 
7
12
  - fix: resolve critical GCS storage bugs preventing production use (3cd0b9a)
@@ -168,9 +168,14 @@ export class FileSystemStorage extends BaseStorage {
168
168
  // Check if this is a new node to update counts
169
169
  const isNew = !(await this.fileExists(this.getNodePath(node.id)));
170
170
  // Convert connections Map to a serializable format
171
+ // CRITICAL: Only save lightweight vector data (no metadata)
172
+ // Metadata is saved separately via saveNounMetadata() (2-file system)
171
173
  const serializableNode = {
172
- ...node,
173
- connections: this.mapToObject(node.connections, (set) => Array.from(set))
174
+ id: node.id,
175
+ vector: node.vector,
176
+ connections: this.mapToObject(node.connections, (set) => Array.from(set)),
177
+ level: node.level || 0
178
+ // NO metadata field - saved separately for scalability
174
179
  };
175
180
  const filePath = this.getNodePath(node.id);
176
181
  await this.ensureDirectoryExists(path.dirname(filePath));
@@ -200,12 +205,14 @@ export class FileSystemStorage extends BaseStorage {
200
205
  for (const [level, nodeIds] of Object.entries(parsedNode.connections)) {
201
206
  connections.set(Number(level), new Set(nodeIds));
202
207
  }
208
+ // CRITICAL: Only return lightweight vector data (no metadata)
209
+ // Metadata is retrieved separately via getNounMetadata() (2-file system)
203
210
  return {
204
211
  id: parsedNode.id,
205
212
  vector: parsedNode.vector,
206
213
  connections,
207
- level: parsedNode.level || 0,
208
- metadata: parsedNode.metadata
214
+ level: parsedNode.level || 0
215
+ // NO metadata field - retrieved separately for scalability
209
216
  };
210
217
  }
211
218
  catch (error) {
@@ -329,9 +336,13 @@ export class FileSystemStorage extends BaseStorage {
329
336
  // Check if this is a new edge to update counts
330
337
  const isNew = !(await this.fileExists(this.getVerbPath(edge.id)));
331
338
  // Convert connections Map to a serializable format
339
+ // CRITICAL: Only save lightweight vector data (no metadata)
340
+ // Metadata is saved separately via saveVerbMetadata() (2-file system)
332
341
  const serializableEdge = {
333
- ...edge,
342
+ id: edge.id,
343
+ vector: edge.vector,
334
344
  connections: this.mapToObject(edge.connections, (set) => Array.from(set))
345
+ // NO metadata field - saved separately for scalability
335
346
  };
336
347
  const filePath = this.getVerbPath(edge.id);
337
348
  await this.ensureDirectoryExists(path.dirname(filePath));
@@ -558,7 +569,9 @@ export class FileSystemStorage extends BaseStorage {
558
569
  const batch = ids.slice(i, i + batchSize);
559
570
  const batchPromises = batch.map(async (id) => {
560
571
  try {
561
- const metadata = await this.getMetadata(id);
572
+ // CRITICAL: Use getNounMetadata() instead of deprecated getMetadata()
573
+ // This ensures we fetch from the correct noun metadata store (2-file system)
574
+ const metadata = await this.getNounMetadata(id);
562
575
  return { id, metadata };
563
576
  }
564
577
  catch (error) {
@@ -306,6 +306,13 @@ export declare class GcsStorage extends BaseStorage {
306
306
  hasMore: boolean;
307
307
  nextCursor?: string;
308
308
  }>;
309
+ /**
310
+ * Batch fetch metadata for multiple noun IDs (efficient for large queries)
311
+ * Uses smaller batches to prevent GCS socket exhaustion
312
+ * @param ids Array of noun IDs to fetch metadata for
313
+ * @returns Map of ID to metadata
314
+ */
315
+ getMetadataBatch(ids: string[]): Promise<Map<string, any>>;
309
316
  /**
310
317
  * Clear all data from storage
311
318
  */
@@ -316,12 +316,17 @@ export class GcsStorage extends BaseStorage {
316
316
  try {
317
317
  this.logger.trace(`Saving node ${node.id}`);
318
318
  // Convert connections Map to a serializable format
319
+ // CRITICAL: Only save lightweight vector data (no metadata)
320
+ // Metadata is saved separately via saveNounMetadata() (2-file system)
319
321
  const serializableNode = {
320
- ...node,
322
+ id: node.id,
323
+ vector: node.vector,
321
324
  connections: Object.fromEntries(Array.from(node.connections.entries()).map(([level, nounIds]) => [
322
325
  level,
323
326
  Array.from(nounIds)
324
- ]))
327
+ ])),
328
+ level: node.level || 0
329
+ // NO metadata field - saved separately for scalability
325
330
  };
326
331
  // Get the GCS key with UUID-based sharding
327
332
  const key = this.getNounKey(node.id);
@@ -385,12 +390,14 @@ export class GcsStorage extends BaseStorage {
385
390
  for (const [level, nounIds] of Object.entries(data.connections || {})) {
386
391
  connections.set(Number(level), new Set(nounIds));
387
392
  }
393
+ // CRITICAL: Only return lightweight vector data (no metadata)
394
+ // Metadata is retrieved separately via getNounMetadata() (2-file system)
388
395
  const node = {
389
396
  id: data.id,
390
397
  vector: data.vector,
391
398
  connections,
392
- level: data.level || 0,
393
- metadata: data.metadata // CRITICAL: Include metadata for entity reconstruction
399
+ level: data.level || 0
400
+ // NO metadata field - retrieved separately for scalability
394
401
  };
395
402
  // Update cache
396
403
  this.nounCacheManager.set(id, node);
@@ -571,12 +578,16 @@ export class GcsStorage extends BaseStorage {
571
578
  try {
572
579
  this.logger.trace(`Saving edge ${edge.id}`);
573
580
  // Convert connections Map to serializable format
581
+ // CRITICAL: Only save lightweight vector data (no metadata)
582
+ // Metadata is saved separately via saveVerbMetadata() (2-file system)
574
583
  const serializableEdge = {
575
- ...edge,
584
+ id: edge.id,
585
+ vector: edge.vector,
576
586
  connections: Object.fromEntries(Array.from(edge.connections.entries()).map(([level, verbIds]) => [
577
587
  level,
578
588
  Array.from(verbIds)
579
589
  ]))
590
+ // NO metadata field - saved separately for scalability
580
591
  };
581
592
  // Get the GCS key with UUID-based sharding
582
593
  const key = this.getVerbKey(edge.id);
@@ -1001,6 +1012,46 @@ export class GcsStorage extends BaseStorage {
1001
1012
  filter: options?.filter
1002
1013
  });
1003
1014
  }
1015
+ /**
1016
+ * Batch fetch metadata for multiple noun IDs (efficient for large queries)
1017
+ * Uses smaller batches to prevent GCS socket exhaustion
1018
+ * @param ids Array of noun IDs to fetch metadata for
1019
+ * @returns Map of ID to metadata
1020
+ */
1021
+ async getMetadataBatch(ids) {
1022
+ await this.ensureInitialized();
1023
+ const results = new Map();
1024
+ const batchSize = 10; // Smaller batches for metadata to prevent socket exhaustion
1025
+ // Process in smaller batches
1026
+ for (let i = 0; i < ids.length; i += batchSize) {
1027
+ const batch = ids.slice(i, i + batchSize);
1028
+ const batchPromises = batch.map(async (id) => {
1029
+ try {
1030
+ // CRITICAL: Use getNounMetadata() instead of deprecated getMetadata()
1031
+ // This ensures we fetch from the correct noun metadata store (2-file system)
1032
+ const metadata = await this.getNounMetadata(id);
1033
+ return { id, metadata };
1034
+ }
1035
+ catch (error) {
1036
+ // Handle GCS-specific errors
1037
+ if (this.isThrottlingError(error)) {
1038
+ await this.handleThrottling(error);
1039
+ }
1040
+ this.logger.debug(`Failed to read metadata for ${id}:`, error);
1041
+ return { id, metadata: null };
1042
+ }
1043
+ });
1044
+ const batchResults = await Promise.all(batchPromises);
1045
+ for (const { id, metadata } of batchResults) {
1046
+ if (metadata !== null) {
1047
+ results.set(id, metadata);
1048
+ }
1049
+ }
1050
+ // Small yield between batches to prevent overwhelming GCS
1051
+ await new Promise(resolve => setImmediate(resolve));
1052
+ }
1053
+ return results;
1054
+ }
1004
1055
  /**
1005
1056
  * Clear all data from storage
1006
1057
  */
@@ -41,12 +41,14 @@ export class MemoryStorage extends BaseStorage {
41
41
  async saveNoun_internal(noun) {
42
42
  const isNew = !this.nouns.has(noun.id);
43
43
  // Create a deep copy to avoid reference issues
44
+ // CRITICAL: Only save lightweight vector data (no metadata)
45
+ // Metadata is saved separately via saveNounMetadata() (2-file system)
44
46
  const nounCopy = {
45
47
  id: noun.id,
46
48
  vector: [...noun.vector],
47
49
  connections: new Map(),
48
- level: noun.level || 0,
49
- metadata: noun.metadata
50
+ level: noun.level || 0
51
+ // NO metadata field - saved separately for scalability
50
52
  };
51
53
  // Copy connections
52
54
  for (const [level, connections] of noun.connections.entries()) {
@@ -71,12 +73,14 @@ export class MemoryStorage extends BaseStorage {
71
73
  return null;
72
74
  }
73
75
  // Return a deep copy to avoid reference issues
76
+ // CRITICAL: Only return lightweight vector data (no metadata)
77
+ // Metadata is retrieved separately via getNounMetadata() (2-file system)
74
78
  const nounCopy = {
75
79
  id: noun.id,
76
80
  vector: [...noun.vector],
77
81
  connections: new Map(),
78
- level: noun.level || 0,
79
- metadata: noun.metadata
82
+ level: noun.level || 0
83
+ // NO metadata field - retrieved separately for scalability
80
84
  };
81
85
  // Copy connections
82
86
  for (const [level, connections] of noun.connections.entries()) {
@@ -475,7 +479,9 @@ export class MemoryStorage extends BaseStorage {
475
479
  const results = new Map();
476
480
  // Memory storage can handle all IDs at once since it's in-memory
477
481
  for (const id of ids) {
478
- const metadata = await this.getMetadata(id);
482
+ // CRITICAL: Use getNounMetadata() instead of deprecated getMetadata()
483
+ // This ensures we fetch from the correct noun metadata store (2-file system)
484
+ const metadata = await this.getNounMetadata(id);
479
485
  if (metadata) {
480
486
  results.set(id, metadata);
481
487
  }
@@ -143,10 +143,14 @@ export class OPFSStorage extends BaseStorage {
143
143
  async saveNoun_internal(noun) {
144
144
  await this.ensureInitialized();
145
145
  try {
146
- // Convert connections Map to a serializable format
146
+ // CRITICAL: Only save lightweight vector data (no metadata)
147
+ // Metadata is saved separately via saveNounMetadata() (2-file system)
147
148
  const serializableNoun = {
148
- ...noun,
149
- connections: this.mapToObject(noun.connections, (set) => Array.from(set))
149
+ id: noun.id,
150
+ vector: noun.vector,
151
+ connections: this.mapToObject(noun.connections, (set) => Array.from(set)),
152
+ level: noun.level || 0
153
+ // NO metadata field - saved separately for scalability
150
154
  };
151
155
  // Use UUID-based sharding for nouns
152
156
  const shardId = getShardIdFromUuid(noun.id);
@@ -299,10 +303,13 @@ export class OPFSStorage extends BaseStorage {
299
303
  async saveEdge(edge) {
300
304
  await this.ensureInitialized();
301
305
  try {
302
- // Convert connections Map to a serializable format
306
+ // CRITICAL: Only save lightweight vector data (no metadata)
307
+ // Metadata is saved separately via saveVerbMetadata() (2-file system)
303
308
  const serializableEdge = {
304
- ...edge,
309
+ id: edge.id,
310
+ vector: edge.vector,
305
311
  connections: this.mapToObject(edge.connections, (set) => Array.from(set))
312
+ // NO metadata field - saved separately for scalability
306
313
  };
307
314
  // Use UUID-based sharding for verbs
308
315
  const shardId = getShardIdFromUuid(edge.id);
@@ -717,9 +717,14 @@ export class S3CompatibleStorage extends BaseStorage {
717
717
  try {
718
718
  this.logger.trace(`Saving node ${node.id}`);
719
719
  // Convert connections Map to a serializable format
720
+ // CRITICAL: Only save lightweight vector data (no metadata)
721
+ // Metadata is saved separately via saveNounMetadata() (2-file system)
720
722
  const serializableNode = {
721
- ...node,
722
- connections: this.mapToObject(node.connections, (set) => Array.from(set))
723
+ id: node.id,
724
+ vector: node.vector,
725
+ connections: this.mapToObject(node.connections, (set) => Array.from(set)),
726
+ level: node.level || 0
727
+ // NO metadata field - saved separately for scalability
723
728
  };
724
729
  // Import the PutObjectCommand only when needed
725
730
  const { PutObjectCommand } = await import('@aws-sdk/client-s3');
@@ -763,6 +768,13 @@ export class S3CompatibleStorage extends BaseStorage {
763
768
  catch (verifyError) {
764
769
  this.logger.warn(`Failed to verify node ${node.id} was saved correctly:`, verifyError);
765
770
  }
771
+ // Increment noun count - always increment total, and increment by type if metadata exists
772
+ this.totalNounCount++;
773
+ const metadata = await this.getNounMetadata(node.id);
774
+ if (metadata && metadata.type) {
775
+ const currentCount = this.entityCounts.get(metadata.type) || 0;
776
+ this.entityCounts.set(metadata.type, currentCount + 1);
777
+ }
766
778
  // Release backpressure on success
767
779
  this.releaseBackpressure(true, requestId);
768
780
  }
@@ -1112,9 +1124,13 @@ export class S3CompatibleStorage extends BaseStorage {
1112
1124
  const requestId = await this.applyBackpressure();
1113
1125
  try {
1114
1126
  // Convert connections Map to a serializable format
1127
+ // CRITICAL: Only save lightweight vector data (no metadata)
1128
+ // Metadata is saved separately via saveVerbMetadata() (2-file system)
1115
1129
  const serializableEdge = {
1116
- ...edge,
1130
+ id: edge.id,
1131
+ vector: edge.vector,
1117
1132
  connections: this.mapToObject(edge.connections, (set) => Array.from(set))
1133
+ // NO metadata field - saved separately for scalability
1118
1134
  };
1119
1135
  // Import the PutObjectCommand only when needed
1120
1136
  const { PutObjectCommand } = await import('@aws-sdk/client-s3');
@@ -1135,6 +1151,13 @@ export class S3CompatibleStorage extends BaseStorage {
1135
1151
  vector: edge.vector
1136
1152
  }
1137
1153
  });
1154
+ // Increment verb count - always increment total, and increment by type if metadata exists
1155
+ this.totalVerbCount++;
1156
+ const metadata = await this.getVerbMetadata(edge.id);
1157
+ if (metadata && metadata.type) {
1158
+ const currentCount = this.verbCounts.get(metadata.type) || 0;
1159
+ this.verbCounts.set(metadata.type, currentCount + 1);
1160
+ }
1138
1161
  // Release backpressure on success
1139
1162
  this.releaseBackpressure(true, requestId);
1140
1163
  }
@@ -1643,8 +1666,10 @@ export class S3CompatibleStorage extends BaseStorage {
1643
1666
  const batchPromises = batch.map(async (id) => {
1644
1667
  try {
1645
1668
  // Add timeout wrapper for individual metadata reads
1669
+ // CRITICAL: Use getNounMetadata() instead of deprecated getMetadata()
1670
+ // This ensures we fetch from the correct noun metadata store (2-file system)
1646
1671
  const metadata = await Promise.race([
1647
- this.getMetadata(id),
1672
+ this.getNounMetadata(id),
1648
1673
  new Promise((_, reject) => setTimeout(() => reject(new Error('Metadata read timeout')), 5000) // 5 second timeout
1649
1674
  )
1650
1675
  ]);
@@ -229,6 +229,11 @@ export declare abstract class BaseStorage extends BaseStorageAdapter {
229
229
  * Uses routing logic to handle both UUIDs (sharded) and system keys (unsharded)
230
230
  */
231
231
  getNounMetadata(id: string): Promise<any | null>;
232
+ /**
233
+ * Delete noun metadata from storage
234
+ * Uses routing logic to handle both UUIDs (sharded) and system keys (unsharded)
235
+ */
236
+ deleteNounMetadata(id: string): Promise<void>;
232
237
  /**
233
238
  * Save verb metadata to storage
234
239
  * Routes to correct sharded location based on UUID
@@ -141,11 +141,33 @@ export class BaseStorage extends BaseStorageAdapter {
141
141
  async saveNoun(noun) {
142
142
  await this.ensureInitialized();
143
143
  // Validate noun type before saving - storage boundary protection
144
- const metadata = await this.getNounMetadata(noun.id);
145
- if (metadata?.noun) {
146
- validateNounType(metadata.noun);
144
+ if (noun.metadata?.noun) {
145
+ validateNounType(noun.metadata.noun);
146
+ }
147
+ // Save both the HNSWNoun vector data and metadata separately (2-file system)
148
+ try {
149
+ // Save the lightweight HNSWNoun vector file first
150
+ await this.saveNoun_internal(noun);
151
+ // Then save the metadata to separate file (if present)
152
+ if (noun.metadata) {
153
+ await this.saveNounMetadata(noun.id, noun.metadata);
154
+ }
155
+ }
156
+ catch (error) {
157
+ console.error(`[ERROR] Failed to save noun ${noun.id}:`, error);
158
+ // Attempt cleanup - remove noun file if metadata failed
159
+ try {
160
+ const nounExists = await this.getNoun_internal(noun.id);
161
+ if (nounExists) {
162
+ console.log(`[CLEANUP] Attempting to remove orphaned noun file ${noun.id}`);
163
+ await this.deleteNoun_internal(noun.id);
164
+ }
165
+ }
166
+ catch (cleanupError) {
167
+ console.error(`[ERROR] Failed to cleanup orphaned noun ${noun.id}:`, cleanupError);
168
+ }
169
+ throw new Error(`Failed to save noun ${noun.id}: ${error instanceof Error ? error.message : String(error)}`);
147
170
  }
148
- return this.saveNoun_internal(noun);
149
171
  }
150
172
  /**
151
173
  * Get a noun from storage
@@ -168,7 +190,16 @@ export class BaseStorage extends BaseStorageAdapter {
168
190
  */
169
191
  async deleteNoun(id) {
170
192
  await this.ensureInitialized();
171
- return this.deleteNoun_internal(id);
193
+ // Delete both the vector file and metadata file (2-file system)
194
+ await this.deleteNoun_internal(id);
195
+ // Delete metadata file (if it exists)
196
+ try {
197
+ await this.deleteNounMetadata(id);
198
+ }
199
+ catch (error) {
200
+ // Ignore if metadata file doesn't exist
201
+ console.debug(`No metadata file to delete for noun ${id}`);
202
+ }
172
203
  }
173
204
  /**
174
205
  * Save a verb to storage
@@ -618,7 +649,16 @@ export class BaseStorage extends BaseStorageAdapter {
618
649
  */
619
650
  async deleteVerb(id) {
620
651
  await this.ensureInitialized();
621
- return this.deleteVerb_internal(id);
652
+ // Delete both the vector file and metadata file (2-file system)
653
+ await this.deleteVerb_internal(id);
654
+ // Delete metadata file (if it exists)
655
+ try {
656
+ await this.deleteVerbMetadata(id);
657
+ }
658
+ catch (error) {
659
+ // Ignore if metadata file doesn't exist
660
+ console.debug(`No metadata file to delete for verb ${id}`);
661
+ }
622
662
  }
623
663
  /**
624
664
  * Get graph index (lazy initialization)
@@ -684,6 +724,15 @@ export class BaseStorage extends BaseStorageAdapter {
684
724
  const keyInfo = this.analyzeKey(id, 'noun-metadata');
685
725
  return this.readObjectFromPath(keyInfo.fullPath);
686
726
  }
727
+ /**
728
+ * Delete noun metadata from storage
729
+ * Uses routing logic to handle both UUIDs (sharded) and system keys (unsharded)
730
+ */
731
+ async deleteNounMetadata(id) {
732
+ await this.ensureInitialized();
733
+ const keyInfo = this.analyzeKey(id, 'noun-metadata');
734
+ return this.deleteObjectFromPath(keyInfo.fullPath);
735
+ }
687
736
  /**
688
737
  * Save verb metadata to storage
689
738
  * Routes to correct sharded location based on UUID
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@soulcraft/brainy",
3
- "version": "3.36.1",
3
+ "version": "3.37.0",
4
4
  "description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",