@soulcraft/brainy 3.37.7 β 3.38.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -342,8 +342,12 @@ export class GcsStorage extends BaseStorage {
|
|
|
342
342
|
contentType: 'application/json',
|
|
343
343
|
resumable: false // For small objects, non-resumable is faster
|
|
344
344
|
});
|
|
345
|
-
//
|
|
346
|
-
|
|
345
|
+
// CRITICAL FIX (v3.37.8): Only cache nodes with non-empty vectors
|
|
346
|
+
// This prevents cache pollution from HNSW's lazy-loading nodes (vector: [])
|
|
347
|
+
if (node.vector && Array.isArray(node.vector) && node.vector.length > 0) {
|
|
348
|
+
this.nounCacheManager.set(node.id, node);
|
|
349
|
+
}
|
|
350
|
+
// Note: Empty vectors are intentional during HNSW lazy mode - not logged
|
|
347
351
|
// Increment noun count
|
|
348
352
|
const metadata = await this.getNounMetadata(node.id);
|
|
349
353
|
if (metadata && metadata.type) {
|
|
@@ -386,26 +390,28 @@ export class GcsStorage extends BaseStorage {
|
|
|
386
390
|
*/
|
|
387
391
|
async getNode(id) {
|
|
388
392
|
await this.ensureInitialized();
|
|
389
|
-
// Check cache first
|
|
390
|
-
const cached = this.nounCacheManager.get(id);
|
|
391
|
-
//
|
|
392
|
-
prodLog.info(`[getNode] π Cache check for ${id.substring(0, 8)}...:`, {
|
|
393
|
-
hasCached: cached !== undefined,
|
|
394
|
-
isNull: cached === null,
|
|
395
|
-
isObject: cached !== null && typeof cached === 'object',
|
|
396
|
-
type: typeof cached
|
|
397
|
-
});
|
|
398
|
-
// CRITICAL FIX: Only return cached value if it's valid (not null/undefined)
|
|
393
|
+
// Check cache first
|
|
394
|
+
const cached = await this.nounCacheManager.get(id);
|
|
395
|
+
// Validate cached object before returning (v3.37.8+)
|
|
399
396
|
if (cached !== undefined && cached !== null) {
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
397
|
+
// Validate cached object has required fields (including non-empty vector!)
|
|
398
|
+
if (!cached.id || !cached.vector || !Array.isArray(cached.vector) || cached.vector.length === 0) {
|
|
399
|
+
// Invalid cache detected - log and auto-recover
|
|
400
|
+
prodLog.warn(`[GCS] Invalid cached object for ${id.substring(0, 8)} (${!cached.id ? 'missing id' :
|
|
401
|
+
!cached.vector ? 'missing vector' :
|
|
402
|
+
!Array.isArray(cached.vector) ? 'vector not array' :
|
|
403
|
+
'empty vector'}) - removing from cache and reloading`);
|
|
404
|
+
this.nounCacheManager.delete(id);
|
|
405
|
+
// Fall through to load from GCS
|
|
406
|
+
}
|
|
407
|
+
else {
|
|
408
|
+
// Valid cache hit
|
|
409
|
+
this.logger.trace(`Cache hit for noun ${id}`);
|
|
410
|
+
return cached;
|
|
411
|
+
}
|
|
403
412
|
}
|
|
404
413
|
else if (cached === null) {
|
|
405
|
-
prodLog.warn(`[
|
|
406
|
-
}
|
|
407
|
-
else {
|
|
408
|
-
prodLog.info(`[getNode] β Cache MISS - loading from GCS for ${id.substring(0, 8)}...`);
|
|
414
|
+
prodLog.warn(`[GCS] Cache contains null for ${id.substring(0, 8)} - reloading from storage`);
|
|
409
415
|
}
|
|
410
416
|
// Apply backpressure
|
|
411
417
|
const requestId = await this.applyBackpressure();
|
|
@@ -413,20 +419,11 @@ export class GcsStorage extends BaseStorage {
|
|
|
413
419
|
this.logger.trace(`Getting node ${id}`);
|
|
414
420
|
// Get the GCS key with UUID-based sharding
|
|
415
421
|
const key = this.getNounKey(id);
|
|
416
|
-
// DIAGNOSTIC LOGGING: Show exact path being accessed
|
|
417
|
-
prodLog.info(`[getNode] π Attempting to load:`);
|
|
418
|
-
prodLog.info(`[getNode] UUID: ${id}`);
|
|
419
|
-
prodLog.info(`[getNode] Path: ${key}`);
|
|
420
|
-
prodLog.info(`[getNode] Bucket: ${this.bucketName}`);
|
|
421
422
|
// Download from GCS
|
|
422
423
|
const file = this.bucket.file(key);
|
|
423
|
-
prodLog.info(`[getNode] π₯ Downloading file...`);
|
|
424
424
|
const [contents] = await file.download();
|
|
425
|
-
prodLog.info(`[getNode] β
Download successful: ${contents.length} bytes`);
|
|
426
425
|
// Parse JSON
|
|
427
|
-
prodLog.info(`[getNode] π§ Parsing JSON...`);
|
|
428
426
|
const data = JSON.parse(contents.toString());
|
|
429
|
-
prodLog.info(`[getNode] β
JSON parsed successfully, id: ${data.id}`);
|
|
430
427
|
// Convert serialized connections back to Map<number, Set<string>>
|
|
431
428
|
const connections = new Map();
|
|
432
429
|
for (const [level, nounIds] of Object.entries(data.connections || {})) {
|
|
@@ -441,13 +438,12 @@ export class GcsStorage extends BaseStorage {
|
|
|
441
438
|
level: data.level || 0
|
|
442
439
|
// NO metadata field - retrieved separately for scalability
|
|
443
440
|
};
|
|
444
|
-
// CRITICAL FIX: Only cache valid nodes (never cache null)
|
|
445
|
-
if (node && node.id && node.vector && Array.isArray(node.vector)) {
|
|
441
|
+
// CRITICAL FIX: Only cache valid nodes with non-empty vectors (never cache null or empty)
|
|
442
|
+
if (node && node.id && node.vector && Array.isArray(node.vector) && node.vector.length > 0) {
|
|
446
443
|
this.nounCacheManager.set(id, node);
|
|
447
|
-
prodLog.info(`[getNode] πΎ Cached node ${id.substring(0, 8)}... successfully`);
|
|
448
444
|
}
|
|
449
445
|
else {
|
|
450
|
-
prodLog.warn(`[
|
|
446
|
+
prodLog.warn(`[GCS] Not caching invalid node ${id.substring(0, 8)} (missing id/vector or empty vector)`);
|
|
451
447
|
}
|
|
452
448
|
this.logger.trace(`Successfully retrieved node ${id}`);
|
|
453
449
|
this.releaseBackpressure(true, requestId);
|
|
@@ -835,13 +831,6 @@ export class GcsStorage extends BaseStorage {
|
|
|
835
831
|
await this.ensureInitialized(); // CRITICAL: Must initialize before using this.bucket
|
|
836
832
|
const limit = options.limit || 100;
|
|
837
833
|
const useCache = options.useCache !== false;
|
|
838
|
-
// DIAGNOSTIC LOGGING: Track pagination performance
|
|
839
|
-
prodLog.info(`[getNodesWithPagination] Starting pagination: limit=${limit}, cursor=${options.cursor || 'none'}`);
|
|
840
|
-
const startTime = Date.now();
|
|
841
|
-
let shardsChecked = 0;
|
|
842
|
-
let filesFound = 0;
|
|
843
|
-
let nodesLoaded = 0;
|
|
844
|
-
let nodesFailed = 0;
|
|
845
834
|
try {
|
|
846
835
|
const nodes = [];
|
|
847
836
|
// Parse cursor (format: "shardIndex:gcsPageToken")
|
|
@@ -856,7 +845,6 @@ export class GcsStorage extends BaseStorage {
|
|
|
856
845
|
for (let shardIndex = startShardIndex; shardIndex < TOTAL_SHARDS; shardIndex++) {
|
|
857
846
|
const shardId = getShardIdByIndex(shardIndex);
|
|
858
847
|
const shardPrefix = `${this.nounPrefix}${shardId}/`;
|
|
859
|
-
shardsChecked++;
|
|
860
848
|
// List objects in this shard
|
|
861
849
|
// Cap maxResults to GCS API limit to prevent "Invalid unsigned integer" errors
|
|
862
850
|
const requestedPageSize = limit - nodes.length;
|
|
@@ -866,12 +854,6 @@ export class GcsStorage extends BaseStorage {
|
|
|
866
854
|
maxResults: cappedPageSize,
|
|
867
855
|
pageToken: shardIndex === startShardIndex ? gcsPageToken : undefined
|
|
868
856
|
});
|
|
869
|
-
// DIAGNOSTIC LOGGING: Show files found per shard (only log non-empty shards)
|
|
870
|
-
if (files && files.length > 0) {
|
|
871
|
-
filesFound += files.length;
|
|
872
|
-
prodLog.info(`[Shard ${shardId}] Found ${files.length} files in "${shardPrefix}"`);
|
|
873
|
-
prodLog.info(`[Shard ${shardId}] Sample file names: ${files.slice(0, 3).map((f) => f.name).join(', ')}`);
|
|
874
|
-
}
|
|
875
857
|
// Extract node IDs from file names
|
|
876
858
|
if (files && files.length > 0) {
|
|
877
859
|
const nodeIds = files
|
|
@@ -888,21 +870,11 @@ export class GcsStorage extends BaseStorage {
|
|
|
888
870
|
return name;
|
|
889
871
|
})
|
|
890
872
|
.filter((id) => id && id.length > 0);
|
|
891
|
-
// DIAGNOSTIC LOGGING: Show extracted UUIDs
|
|
892
|
-
prodLog.info(`[Shard ${shardId}] Extracted ${nodeIds.length} UUIDs: ${nodeIds.slice(0, 3).join(', ')}...`);
|
|
893
873
|
// Load nodes
|
|
894
874
|
for (const id of nodeIds) {
|
|
895
|
-
// DIAGNOSTIC LOGGING: Show each getNode() attempt
|
|
896
|
-
prodLog.info(`[Shard ${shardId}] Calling getNode("${id}")...`);
|
|
897
875
|
const node = await this.getNode(id);
|
|
898
876
|
if (node) {
|
|
899
877
|
nodes.push(node);
|
|
900
|
-
nodesLoaded++;
|
|
901
|
-
prodLog.info(`[Shard ${shardId}] β
Successfully loaded node ${id}`);
|
|
902
|
-
}
|
|
903
|
-
else {
|
|
904
|
-
nodesFailed++;
|
|
905
|
-
prodLog.warn(`[Shard ${shardId}] β getNode("${id}") returned null!`);
|
|
906
878
|
}
|
|
907
879
|
if (nodes.length >= limit) {
|
|
908
880
|
break;
|
|
@@ -935,14 +907,6 @@ export class GcsStorage extends BaseStorage {
|
|
|
935
907
|
// Continue to next shard
|
|
936
908
|
}
|
|
937
909
|
// No more shards or nodes
|
|
938
|
-
// DIAGNOSTIC LOGGING: Final summary
|
|
939
|
-
const elapsedTime = Date.now() - startTime;
|
|
940
|
-
prodLog.info(`[getNodesWithPagination] COMPLETED in ${elapsedTime}ms:`);
|
|
941
|
-
prodLog.info(` - Shards checked: ${shardsChecked}/${TOTAL_SHARDS}`);
|
|
942
|
-
prodLog.info(` - Files found: ${filesFound}`);
|
|
943
|
-
prodLog.info(` - Nodes loaded: ${nodesLoaded}`);
|
|
944
|
-
prodLog.info(` - Nodes failed: ${nodesFailed}`);
|
|
945
|
-
prodLog.info(` - Success rate: ${filesFound > 0 ? ((nodesLoaded / filesFound) * 100).toFixed(1) : 'N/A'}%`);
|
|
946
910
|
return {
|
|
947
911
|
nodes,
|
|
948
912
|
totalCount: this.totalNounCount,
|
|
@@ -818,55 +818,47 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
818
818
|
*/
|
|
819
819
|
async getNode(id) {
|
|
820
820
|
await this.ensureInitialized();
|
|
821
|
-
// Check cache first
|
|
821
|
+
// Check cache first
|
|
822
822
|
const cached = this.nodeCache.get(id);
|
|
823
|
-
//
|
|
824
|
-
prodLog.info(`[getNode] π Cache check for ${id.substring(0, 8)}...:`, {
|
|
825
|
-
hasCached: cached !== undefined,
|
|
826
|
-
isNull: cached === null,
|
|
827
|
-
isObject: cached !== null && typeof cached === 'object',
|
|
828
|
-
type: typeof cached
|
|
829
|
-
});
|
|
830
|
-
// CRITICAL FIX: Only return cached value if it's valid (not null/undefined)
|
|
823
|
+
// Validate cached object before returning (v3.37.8+)
|
|
831
824
|
if (cached !== undefined && cached !== null) {
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
825
|
+
// Validate cached object has required fields (including non-empty vector!)
|
|
826
|
+
if (!cached.id || !cached.vector || !Array.isArray(cached.vector) || cached.vector.length === 0) {
|
|
827
|
+
// Invalid cache detected - log and auto-recover
|
|
828
|
+
prodLog.warn(`[S3] Invalid cached object for ${id.substring(0, 8)} (${!cached.id ? 'missing id' :
|
|
829
|
+
!cached.vector ? 'missing vector' :
|
|
830
|
+
!Array.isArray(cached.vector) ? 'vector not array' :
|
|
831
|
+
'empty vector'}) - removing from cache and reloading`);
|
|
832
|
+
this.nodeCache.delete(id);
|
|
833
|
+
// Fall through to load from S3
|
|
834
|
+
}
|
|
835
|
+
else {
|
|
836
|
+
// Valid cache hit
|
|
837
|
+
this.logger.trace(`Cache hit for node ${id}`);
|
|
838
|
+
return cached;
|
|
839
|
+
}
|
|
835
840
|
}
|
|
836
841
|
else if (cached === null) {
|
|
837
|
-
prodLog.warn(`[
|
|
838
|
-
}
|
|
839
|
-
else {
|
|
840
|
-
prodLog.info(`[getNode] β Cache MISS - loading from S3 for ${id.substring(0, 8)}...`);
|
|
842
|
+
prodLog.warn(`[S3] Cache contains null for ${id.substring(0, 8)} - reloading from storage`);
|
|
841
843
|
}
|
|
842
844
|
try {
|
|
843
845
|
// Import the GetObjectCommand only when needed
|
|
844
846
|
const { GetObjectCommand } = await import('@aws-sdk/client-s3');
|
|
845
847
|
// Use getNounKey() to properly handle sharding
|
|
846
848
|
const key = this.getNounKey(id);
|
|
847
|
-
// DIAGNOSTIC LOGGING: Show exact path being accessed
|
|
848
|
-
prodLog.info(`[getNode] π Attempting to load:`);
|
|
849
|
-
prodLog.info(`[getNode] UUID: ${id}`);
|
|
850
|
-
prodLog.info(`[getNode] Path: ${key}`);
|
|
851
|
-
prodLog.info(`[getNode] Bucket: ${this.bucketName}`);
|
|
852
849
|
// Try to get the node from the nouns directory
|
|
853
|
-
prodLog.info(`[getNode] π₯ Downloading file...`);
|
|
854
850
|
const response = await this.s3Client.send(new GetObjectCommand({
|
|
855
851
|
Bucket: this.bucketName,
|
|
856
852
|
Key: key
|
|
857
853
|
}));
|
|
858
854
|
// Check if response is null or undefined
|
|
859
855
|
if (!response || !response.Body) {
|
|
860
|
-
prodLog.warn(`[
|
|
856
|
+
prodLog.warn(`[S3] Response or Body is null/undefined for ${id.substring(0, 8)}`);
|
|
861
857
|
return null;
|
|
862
858
|
}
|
|
863
|
-
// Convert the response body to a string
|
|
859
|
+
// Convert the response body to a string and parse JSON
|
|
864
860
|
const bodyContents = await response.Body.transformToString();
|
|
865
|
-
prodLog.info(`[getNode] β
Download successful: ${bodyContents.length} bytes`);
|
|
866
|
-
// Parse the JSON string
|
|
867
|
-
prodLog.info(`[getNode] π§ Parsing JSON...`);
|
|
868
861
|
const parsedNode = JSON.parse(bodyContents);
|
|
869
|
-
prodLog.info(`[getNode] β
JSON parsed successfully, id: ${parsedNode.id}`);
|
|
870
862
|
// Ensure the parsed node has the expected properties
|
|
871
863
|
if (!parsedNode ||
|
|
872
864
|
!parsedNode.id ||
|
|
@@ -889,44 +881,28 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
889
881
|
connections,
|
|
890
882
|
level: parsedNode.level || 0
|
|
891
883
|
};
|
|
892
|
-
// CRITICAL FIX: Only cache valid nodes (never cache null)
|
|
893
|
-
if (node && node.id && node.vector && Array.isArray(node.vector)) {
|
|
884
|
+
// CRITICAL FIX: Only cache valid nodes with non-empty vectors (never cache null or empty)
|
|
885
|
+
if (node && node.id && node.vector && Array.isArray(node.vector) && node.vector.length > 0) {
|
|
894
886
|
this.nodeCache.set(id, node);
|
|
895
|
-
prodLog.info(`[getNode] πΎ Cached node ${id.substring(0, 8)}... successfully`);
|
|
896
887
|
}
|
|
897
888
|
else {
|
|
898
|
-
prodLog.warn(`[
|
|
889
|
+
prodLog.warn(`[S3] Not caching invalid node ${id.substring(0, 8)} (missing id/vector or empty vector)`);
|
|
899
890
|
}
|
|
900
891
|
this.logger.trace(`Successfully retrieved node ${id}`);
|
|
901
892
|
return node;
|
|
902
893
|
}
|
|
903
894
|
catch (error) {
|
|
904
|
-
// DIAGNOSTIC LOGGING: Log EVERY error before any conditional checks
|
|
905
|
-
const key = this.getNounKey(id);
|
|
906
|
-
prodLog.error(`[getNode] β EXCEPTION CAUGHT:`);
|
|
907
|
-
prodLog.error(`[getNode] UUID: ${id}`);
|
|
908
|
-
prodLog.error(`[getNode] Path: ${key}`);
|
|
909
|
-
prodLog.error(`[getNode] Bucket: ${this.bucketName}`);
|
|
910
|
-
prodLog.error(`[getNode] Error type: ${error?.constructor?.name || typeof error}`);
|
|
911
|
-
prodLog.error(`[getNode] Error name: ${error?.name}`);
|
|
912
|
-
prodLog.error(`[getNode] Error code: ${JSON.stringify(error?.Code || error?.code)}`);
|
|
913
|
-
prodLog.error(`[getNode] Error message: ${error?.message || String(error)}`);
|
|
914
|
-
prodLog.error(`[getNode] HTTP status: ${error?.$metadata?.httpStatusCode}`);
|
|
915
|
-
prodLog.error(`[getNode] Error object:`, JSON.stringify(error, null, 2));
|
|
916
895
|
// Check if this is a "not found" error (S3 uses "NoSuchKey")
|
|
917
896
|
if (error?.name === 'NoSuchKey' || error?.Code === 'NoSuchKey' || error?.$metadata?.httpStatusCode === 404) {
|
|
918
|
-
|
|
919
|
-
// CRITICAL FIX: Do NOT cache null values
|
|
897
|
+
// File not found - not cached, just return null
|
|
920
898
|
return null;
|
|
921
899
|
}
|
|
922
900
|
// Handle throttling
|
|
923
901
|
if (this.isThrottlingError(error)) {
|
|
924
|
-
prodLog.warn(`[getNode] Identified as throttling error - rethrowing`);
|
|
925
902
|
await this.handleThrottling(error);
|
|
926
903
|
throw error;
|
|
927
904
|
}
|
|
928
905
|
// All other errors should throw, not return null
|
|
929
|
-
prodLog.error(`[getNode] Unhandled error - rethrowing`);
|
|
930
906
|
this.logger.error(`Failed to get node ${id}:`, error);
|
|
931
907
|
throw BrainyError.fromError(error, `getNoun(${id})`);
|
|
932
908
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.38.0",
|
|
4
4
|
"description": "Universal Knowledge Protocolβ’ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns Γ 40 verbs for infinite expressiveness.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|