@soulcraft/brainy 3.37.7 β†’ 3.38.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -342,8 +342,12 @@ export class GcsStorage extends BaseStorage {
342
342
  contentType: 'application/json',
343
343
  resumable: false // For small objects, non-resumable is faster
344
344
  });
345
- // Update cache
346
- this.nounCacheManager.set(node.id, node);
345
+ // CRITICAL FIX (v3.37.8): Only cache nodes with non-empty vectors
346
+ // This prevents cache pollution from HNSW's lazy-loading nodes (vector: [])
347
+ if (node.vector && Array.isArray(node.vector) && node.vector.length > 0) {
348
+ this.nounCacheManager.set(node.id, node);
349
+ }
350
+ // Note: Empty vectors are intentional during HNSW lazy mode - not logged
347
351
  // Increment noun count
348
352
  const metadata = await this.getNounMetadata(node.id);
349
353
  if (metadata && metadata.type) {
@@ -386,26 +390,28 @@ export class GcsStorage extends BaseStorage {
386
390
  */
387
391
  async getNode(id) {
388
392
  await this.ensureInitialized();
389
- // Check cache first WITH LOGGING
390
- const cached = this.nounCacheManager.get(id);
391
- // DIAGNOSTIC LOGGING: Reveal cache poisoning
392
- prodLog.info(`[getNode] πŸ” Cache check for ${id.substring(0, 8)}...:`, {
393
- hasCached: cached !== undefined,
394
- isNull: cached === null,
395
- isObject: cached !== null && typeof cached === 'object',
396
- type: typeof cached
397
- });
398
- // CRITICAL FIX: Only return cached value if it's valid (not null/undefined)
393
+ // Check cache first
394
+ const cached = await this.nounCacheManager.get(id);
395
+ // Validate cached object before returning (v3.37.8+)
399
396
  if (cached !== undefined && cached !== null) {
400
- prodLog.info(`[getNode] βœ… Cache HIT - returning cached node for ${id.substring(0, 8)}...`);
401
- this.logger.trace(`Cache hit for noun ${id}`);
402
- return cached;
397
+ // Validate cached object has required fields (including non-empty vector!)
398
+ if (!cached.id || !cached.vector || !Array.isArray(cached.vector) || cached.vector.length === 0) {
399
+ // Invalid cache detected - log and auto-recover
400
+ prodLog.warn(`[GCS] Invalid cached object for ${id.substring(0, 8)} (${!cached.id ? 'missing id' :
401
+ !cached.vector ? 'missing vector' :
402
+ !Array.isArray(cached.vector) ? 'vector not array' :
403
+ 'empty vector'}) - removing from cache and reloading`);
404
+ this.nounCacheManager.delete(id);
405
+ // Fall through to load from GCS
406
+ }
407
+ else {
408
+ // Valid cache hit
409
+ this.logger.trace(`Cache hit for noun ${id}`);
410
+ return cached;
411
+ }
403
412
  }
404
413
  else if (cached === null) {
405
- prodLog.warn(`[getNode] ⚠️ Cache contains NULL for ${id.substring(0, 8)}... - ignoring and loading from GCS`);
406
- }
407
- else {
408
- prodLog.info(`[getNode] ❌ Cache MISS - loading from GCS for ${id.substring(0, 8)}...`);
414
+ prodLog.warn(`[GCS] Cache contains null for ${id.substring(0, 8)} - reloading from storage`);
409
415
  }
410
416
  // Apply backpressure
411
417
  const requestId = await this.applyBackpressure();
@@ -413,20 +419,11 @@ export class GcsStorage extends BaseStorage {
413
419
  this.logger.trace(`Getting node ${id}`);
414
420
  // Get the GCS key with UUID-based sharding
415
421
  const key = this.getNounKey(id);
416
- // DIAGNOSTIC LOGGING: Show exact path being accessed
417
- prodLog.info(`[getNode] πŸ” Attempting to load:`);
418
- prodLog.info(`[getNode] UUID: ${id}`);
419
- prodLog.info(`[getNode] Path: ${key}`);
420
- prodLog.info(`[getNode] Bucket: ${this.bucketName}`);
421
422
  // Download from GCS
422
423
  const file = this.bucket.file(key);
423
- prodLog.info(`[getNode] πŸ“₯ Downloading file...`);
424
424
  const [contents] = await file.download();
425
- prodLog.info(`[getNode] βœ… Download successful: ${contents.length} bytes`);
426
425
  // Parse JSON
427
- prodLog.info(`[getNode] πŸ”§ Parsing JSON...`);
428
426
  const data = JSON.parse(contents.toString());
429
- prodLog.info(`[getNode] βœ… JSON parsed successfully, id: ${data.id}`);
430
427
  // Convert serialized connections back to Map<number, Set<string>>
431
428
  const connections = new Map();
432
429
  for (const [level, nounIds] of Object.entries(data.connections || {})) {
@@ -441,13 +438,12 @@ export class GcsStorage extends BaseStorage {
441
438
  level: data.level || 0
442
439
  // NO metadata field - retrieved separately for scalability
443
440
  };
444
- // CRITICAL FIX: Only cache valid nodes (never cache null)
445
- if (node && node.id && node.vector && Array.isArray(node.vector)) {
441
+ // CRITICAL FIX: Only cache valid nodes with non-empty vectors (never cache null or empty)
442
+ if (node && node.id && node.vector && Array.isArray(node.vector) && node.vector.length > 0) {
446
443
  this.nounCacheManager.set(id, node);
447
- prodLog.info(`[getNode] πŸ’Ύ Cached node ${id.substring(0, 8)}... successfully`);
448
444
  }
449
445
  else {
450
- prodLog.warn(`[getNode] ⚠️ NOT caching invalid node for ${id.substring(0, 8)}...`);
446
+ prodLog.warn(`[GCS] Not caching invalid node ${id.substring(0, 8)} (missing id/vector or empty vector)`);
451
447
  }
452
448
  this.logger.trace(`Successfully retrieved node ${id}`);
453
449
  this.releaseBackpressure(true, requestId);
@@ -835,13 +831,6 @@ export class GcsStorage extends BaseStorage {
835
831
  await this.ensureInitialized(); // CRITICAL: Must initialize before using this.bucket
836
832
  const limit = options.limit || 100;
837
833
  const useCache = options.useCache !== false;
838
- // DIAGNOSTIC LOGGING: Track pagination performance
839
- prodLog.info(`[getNodesWithPagination] Starting pagination: limit=${limit}, cursor=${options.cursor || 'none'}`);
840
- const startTime = Date.now();
841
- let shardsChecked = 0;
842
- let filesFound = 0;
843
- let nodesLoaded = 0;
844
- let nodesFailed = 0;
845
834
  try {
846
835
  const nodes = [];
847
836
  // Parse cursor (format: "shardIndex:gcsPageToken")
@@ -856,7 +845,6 @@ export class GcsStorage extends BaseStorage {
856
845
  for (let shardIndex = startShardIndex; shardIndex < TOTAL_SHARDS; shardIndex++) {
857
846
  const shardId = getShardIdByIndex(shardIndex);
858
847
  const shardPrefix = `${this.nounPrefix}${shardId}/`;
859
- shardsChecked++;
860
848
  // List objects in this shard
861
849
  // Cap maxResults to GCS API limit to prevent "Invalid unsigned integer" errors
862
850
  const requestedPageSize = limit - nodes.length;
@@ -866,12 +854,6 @@ export class GcsStorage extends BaseStorage {
866
854
  maxResults: cappedPageSize,
867
855
  pageToken: shardIndex === startShardIndex ? gcsPageToken : undefined
868
856
  });
869
- // DIAGNOSTIC LOGGING: Show files found per shard (only log non-empty shards)
870
- if (files && files.length > 0) {
871
- filesFound += files.length;
872
- prodLog.info(`[Shard ${shardId}] Found ${files.length} files in "${shardPrefix}"`);
873
- prodLog.info(`[Shard ${shardId}] Sample file names: ${files.slice(0, 3).map((f) => f.name).join(', ')}`);
874
- }
875
857
  // Extract node IDs from file names
876
858
  if (files && files.length > 0) {
877
859
  const nodeIds = files
@@ -888,21 +870,11 @@ export class GcsStorage extends BaseStorage {
888
870
  return name;
889
871
  })
890
872
  .filter((id) => id && id.length > 0);
891
- // DIAGNOSTIC LOGGING: Show extracted UUIDs
892
- prodLog.info(`[Shard ${shardId}] Extracted ${nodeIds.length} UUIDs: ${nodeIds.slice(0, 3).join(', ')}...`);
893
873
  // Load nodes
894
874
  for (const id of nodeIds) {
895
- // DIAGNOSTIC LOGGING: Show each getNode() attempt
896
- prodLog.info(`[Shard ${shardId}] Calling getNode("${id}")...`);
897
875
  const node = await this.getNode(id);
898
876
  if (node) {
899
877
  nodes.push(node);
900
- nodesLoaded++;
901
- prodLog.info(`[Shard ${shardId}] βœ… Successfully loaded node ${id}`);
902
- }
903
- else {
904
- nodesFailed++;
905
- prodLog.warn(`[Shard ${shardId}] ❌ getNode("${id}") returned null!`);
906
878
  }
907
879
  if (nodes.length >= limit) {
908
880
  break;
@@ -935,14 +907,6 @@ export class GcsStorage extends BaseStorage {
935
907
  // Continue to next shard
936
908
  }
937
909
  // No more shards or nodes
938
- // DIAGNOSTIC LOGGING: Final summary
939
- const elapsedTime = Date.now() - startTime;
940
- prodLog.info(`[getNodesWithPagination] COMPLETED in ${elapsedTime}ms:`);
941
- prodLog.info(` - Shards checked: ${shardsChecked}/${TOTAL_SHARDS}`);
942
- prodLog.info(` - Files found: ${filesFound}`);
943
- prodLog.info(` - Nodes loaded: ${nodesLoaded}`);
944
- prodLog.info(` - Nodes failed: ${nodesFailed}`);
945
- prodLog.info(` - Success rate: ${filesFound > 0 ? ((nodesLoaded / filesFound) * 100).toFixed(1) : 'N/A'}%`);
946
910
  return {
947
911
  nodes,
948
912
  totalCount: this.totalNounCount,
@@ -818,55 +818,47 @@ export class S3CompatibleStorage extends BaseStorage {
818
818
  */
819
819
  async getNode(id) {
820
820
  await this.ensureInitialized();
821
- // Check cache first WITH LOGGING
821
+ // Check cache first
822
822
  const cached = this.nodeCache.get(id);
823
- // DIAGNOSTIC LOGGING: Reveal cache poisoning
824
- prodLog.info(`[getNode] πŸ” Cache check for ${id.substring(0, 8)}...:`, {
825
- hasCached: cached !== undefined,
826
- isNull: cached === null,
827
- isObject: cached !== null && typeof cached === 'object',
828
- type: typeof cached
829
- });
830
- // CRITICAL FIX: Only return cached value if it's valid (not null/undefined)
823
+ // Validate cached object before returning (v3.37.8+)
831
824
  if (cached !== undefined && cached !== null) {
832
- prodLog.info(`[getNode] βœ… Cache HIT - returning cached node for ${id.substring(0, 8)}...`);
833
- this.logger.trace(`Cache hit for node ${id}`);
834
- return cached;
825
+ // Validate cached object has required fields (including non-empty vector!)
826
+ if (!cached.id || !cached.vector || !Array.isArray(cached.vector) || cached.vector.length === 0) {
827
+ // Invalid cache detected - log and auto-recover
828
+ prodLog.warn(`[S3] Invalid cached object for ${id.substring(0, 8)} (${!cached.id ? 'missing id' :
829
+ !cached.vector ? 'missing vector' :
830
+ !Array.isArray(cached.vector) ? 'vector not array' :
831
+ 'empty vector'}) - removing from cache and reloading`);
832
+ this.nodeCache.delete(id);
833
+ // Fall through to load from S3
834
+ }
835
+ else {
836
+ // Valid cache hit
837
+ this.logger.trace(`Cache hit for node ${id}`);
838
+ return cached;
839
+ }
835
840
  }
836
841
  else if (cached === null) {
837
- prodLog.warn(`[getNode] ⚠️ Cache contains NULL for ${id.substring(0, 8)}... - ignoring and loading from S3`);
838
- }
839
- else {
840
- prodLog.info(`[getNode] ❌ Cache MISS - loading from S3 for ${id.substring(0, 8)}...`);
842
+ prodLog.warn(`[S3] Cache contains null for ${id.substring(0, 8)} - reloading from storage`);
841
843
  }
842
844
  try {
843
845
  // Import the GetObjectCommand only when needed
844
846
  const { GetObjectCommand } = await import('@aws-sdk/client-s3');
845
847
  // Use getNounKey() to properly handle sharding
846
848
  const key = this.getNounKey(id);
847
- // DIAGNOSTIC LOGGING: Show exact path being accessed
848
- prodLog.info(`[getNode] πŸ” Attempting to load:`);
849
- prodLog.info(`[getNode] UUID: ${id}`);
850
- prodLog.info(`[getNode] Path: ${key}`);
851
- prodLog.info(`[getNode] Bucket: ${this.bucketName}`);
852
849
  // Try to get the node from the nouns directory
853
- prodLog.info(`[getNode] πŸ“₯ Downloading file...`);
854
850
  const response = await this.s3Client.send(new GetObjectCommand({
855
851
  Bucket: this.bucketName,
856
852
  Key: key
857
853
  }));
858
854
  // Check if response is null or undefined
859
855
  if (!response || !response.Body) {
860
- prodLog.warn(`[getNode] ❌ Response or Body is null/undefined`);
856
+ prodLog.warn(`[S3] Response or Body is null/undefined for ${id.substring(0, 8)}`);
861
857
  return null;
862
858
  }
863
- // Convert the response body to a string
859
+ // Convert the response body to a string and parse JSON
864
860
  const bodyContents = await response.Body.transformToString();
865
- prodLog.info(`[getNode] βœ… Download successful: ${bodyContents.length} bytes`);
866
- // Parse the JSON string
867
- prodLog.info(`[getNode] πŸ”§ Parsing JSON...`);
868
861
  const parsedNode = JSON.parse(bodyContents);
869
- prodLog.info(`[getNode] βœ… JSON parsed successfully, id: ${parsedNode.id}`);
870
862
  // Ensure the parsed node has the expected properties
871
863
  if (!parsedNode ||
872
864
  !parsedNode.id ||
@@ -889,44 +881,28 @@ export class S3CompatibleStorage extends BaseStorage {
889
881
  connections,
890
882
  level: parsedNode.level || 0
891
883
  };
892
- // CRITICAL FIX: Only cache valid nodes (never cache null)
893
- if (node && node.id && node.vector && Array.isArray(node.vector)) {
884
+ // CRITICAL FIX: Only cache valid nodes with non-empty vectors (never cache null or empty)
885
+ if (node && node.id && node.vector && Array.isArray(node.vector) && node.vector.length > 0) {
894
886
  this.nodeCache.set(id, node);
895
- prodLog.info(`[getNode] πŸ’Ύ Cached node ${id.substring(0, 8)}... successfully`);
896
887
  }
897
888
  else {
898
- prodLog.warn(`[getNode] ⚠️ NOT caching invalid node for ${id.substring(0, 8)}...`);
889
+ prodLog.warn(`[S3] Not caching invalid node ${id.substring(0, 8)} (missing id/vector or empty vector)`);
899
890
  }
900
891
  this.logger.trace(`Successfully retrieved node ${id}`);
901
892
  return node;
902
893
  }
903
894
  catch (error) {
904
- // DIAGNOSTIC LOGGING: Log EVERY error before any conditional checks
905
- const key = this.getNounKey(id);
906
- prodLog.error(`[getNode] ❌ EXCEPTION CAUGHT:`);
907
- prodLog.error(`[getNode] UUID: ${id}`);
908
- prodLog.error(`[getNode] Path: ${key}`);
909
- prodLog.error(`[getNode] Bucket: ${this.bucketName}`);
910
- prodLog.error(`[getNode] Error type: ${error?.constructor?.name || typeof error}`);
911
- prodLog.error(`[getNode] Error name: ${error?.name}`);
912
- prodLog.error(`[getNode] Error code: ${JSON.stringify(error?.Code || error?.code)}`);
913
- prodLog.error(`[getNode] Error message: ${error?.message || String(error)}`);
914
- prodLog.error(`[getNode] HTTP status: ${error?.$metadata?.httpStatusCode}`);
915
- prodLog.error(`[getNode] Error object:`, JSON.stringify(error, null, 2));
916
895
  // Check if this is a "not found" error (S3 uses "NoSuchKey")
917
896
  if (error?.name === 'NoSuchKey' || error?.Code === 'NoSuchKey' || error?.$metadata?.httpStatusCode === 404) {
918
- prodLog.warn(`[getNode] Identified as 404/NoSuchKey error - returning null WITHOUT caching`);
919
- // CRITICAL FIX: Do NOT cache null values
897
+ // File not found - not cached, just return null
920
898
  return null;
921
899
  }
922
900
  // Handle throttling
923
901
  if (this.isThrottlingError(error)) {
924
- prodLog.warn(`[getNode] Identified as throttling error - rethrowing`);
925
902
  await this.handleThrottling(error);
926
903
  throw error;
927
904
  }
928
905
  // All other errors should throw, not return null
929
- prodLog.error(`[getNode] Unhandled error - rethrowing`);
930
906
  this.logger.error(`Failed to get node ${id}:`, error);
931
907
  throw BrainyError.fromError(error, `getNoun(${id})`);
932
908
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@soulcraft/brainy",
3
- "version": "3.37.7",
3
+ "version": "3.38.0",
4
4
  "description": "Universal Knowledge Protocolβ„’ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns Γ— 40 verbs for infinite expressiveness.",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",