@soulcraft/brainy 3.37.8 β 3.38.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -347,9 +347,7 @@ export class GcsStorage extends BaseStorage {
|
|
|
347
347
|
if (node.vector && Array.isArray(node.vector) && node.vector.length > 0) {
|
|
348
348
|
this.nounCacheManager.set(node.id, node);
|
|
349
349
|
}
|
|
350
|
-
|
|
351
|
-
prodLog.warn(`[saveNode] Not caching node ${node.id.substring(0, 8)}... with empty vector (HNSW lazy mode)`);
|
|
352
|
-
}
|
|
350
|
+
// Note: Empty vectors are intentional during HNSW lazy mode - not logged
|
|
353
351
|
// Increment noun count
|
|
354
352
|
const metadata = await this.getNounMetadata(node.id);
|
|
355
353
|
if (metadata && metadata.type) {
|
|
@@ -392,53 +390,28 @@ export class GcsStorage extends BaseStorage {
|
|
|
392
390
|
*/
|
|
393
391
|
async getNode(id) {
|
|
394
392
|
await this.ensureInitialized();
|
|
395
|
-
// Check cache first
|
|
393
|
+
// Check cache first
|
|
396
394
|
const cached = await this.nounCacheManager.get(id);
|
|
397
|
-
//
|
|
398
|
-
prodLog.info(`[getNode] π Cache check for ${id.substring(0, 8)}...:`, {
|
|
399
|
-
hasCached: cached !== undefined,
|
|
400
|
-
isNull: cached === null,
|
|
401
|
-
isObject: cached !== null && typeof cached === 'object',
|
|
402
|
-
type: typeof cached
|
|
403
|
-
});
|
|
404
|
-
// CRITICAL FIX (v3.37.8): Validate cached object before returning
|
|
395
|
+
// Validate cached object before returning (v3.37.8+)
|
|
405
396
|
if (cached !== undefined && cached !== null) {
|
|
406
|
-
// Log cached object structure to diagnose incomplete objects
|
|
407
|
-
prodLog.info(`[getNode] Cached object structure:`, {
|
|
408
|
-
hasId: !!cached.id,
|
|
409
|
-
idMatches: cached.id === id,
|
|
410
|
-
hasVector: !!cached.vector,
|
|
411
|
-
vectorLength: cached.vector?.length,
|
|
412
|
-
hasConnections: !!cached.connections,
|
|
413
|
-
connectionsType: typeof cached.connections,
|
|
414
|
-
hasLevel: cached.level !== undefined,
|
|
415
|
-
level: cached.level,
|
|
416
|
-
objectKeys: Object.keys(cached || {})
|
|
417
|
-
});
|
|
418
397
|
// Validate cached object has required fields (including non-empty vector!)
|
|
419
398
|
if (!cached.id || !cached.vector || !Array.isArray(cached.vector) || cached.vector.length === 0) {
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
'unknown'
|
|
426
|
-
});
|
|
427
|
-
prodLog.error(`[getNode] Removing invalid object from cache and loading from GCS`);
|
|
399
|
+
// Invalid cache detected - log and auto-recover
|
|
400
|
+
prodLog.warn(`[GCS] Invalid cached object for ${id.substring(0, 8)} (${!cached.id ? 'missing id' :
|
|
401
|
+
!cached.vector ? 'missing vector' :
|
|
402
|
+
!Array.isArray(cached.vector) ? 'vector not array' :
|
|
403
|
+
'empty vector'}) - removing from cache and reloading`);
|
|
428
404
|
this.nounCacheManager.delete(id);
|
|
429
405
|
// Fall through to load from GCS
|
|
430
406
|
}
|
|
431
407
|
else {
|
|
432
|
-
|
|
408
|
+
// Valid cache hit
|
|
433
409
|
this.logger.trace(`Cache hit for noun ${id}`);
|
|
434
410
|
return cached;
|
|
435
411
|
}
|
|
436
412
|
}
|
|
437
413
|
else if (cached === null) {
|
|
438
|
-
prodLog.warn(`[
|
|
439
|
-
}
|
|
440
|
-
else {
|
|
441
|
-
prodLog.info(`[getNode] β Cache MISS - loading from GCS for ${id.substring(0, 8)}...`);
|
|
414
|
+
prodLog.warn(`[GCS] Cache contains null for ${id.substring(0, 8)} - reloading from storage`);
|
|
442
415
|
}
|
|
443
416
|
// Apply backpressure
|
|
444
417
|
const requestId = await this.applyBackpressure();
|
|
@@ -446,20 +419,11 @@ export class GcsStorage extends BaseStorage {
|
|
|
446
419
|
this.logger.trace(`Getting node ${id}`);
|
|
447
420
|
// Get the GCS key with UUID-based sharding
|
|
448
421
|
const key = this.getNounKey(id);
|
|
449
|
-
// DIAGNOSTIC LOGGING: Show exact path being accessed
|
|
450
|
-
prodLog.info(`[getNode] π Attempting to load:`);
|
|
451
|
-
prodLog.info(`[getNode] UUID: ${id}`);
|
|
452
|
-
prodLog.info(`[getNode] Path: ${key}`);
|
|
453
|
-
prodLog.info(`[getNode] Bucket: ${this.bucketName}`);
|
|
454
422
|
// Download from GCS
|
|
455
423
|
const file = this.bucket.file(key);
|
|
456
|
-
prodLog.info(`[getNode] π₯ Downloading file...`);
|
|
457
424
|
const [contents] = await file.download();
|
|
458
|
-
prodLog.info(`[getNode] β
Download successful: ${contents.length} bytes`);
|
|
459
425
|
// Parse JSON
|
|
460
|
-
prodLog.info(`[getNode] π§ Parsing JSON...`);
|
|
461
426
|
const data = JSON.parse(contents.toString());
|
|
462
|
-
prodLog.info(`[getNode] β
JSON parsed successfully, id: ${data.id}`);
|
|
463
427
|
// Convert serialized connections back to Map<number, Set<string>>
|
|
464
428
|
const connections = new Map();
|
|
465
429
|
for (const [level, nounIds] of Object.entries(data.connections || {})) {
|
|
@@ -477,10 +441,9 @@ export class GcsStorage extends BaseStorage {
|
|
|
477
441
|
// CRITICAL FIX: Only cache valid nodes with non-empty vectors (never cache null or empty)
|
|
478
442
|
if (node && node.id && node.vector && Array.isArray(node.vector) && node.vector.length > 0) {
|
|
479
443
|
this.nounCacheManager.set(id, node);
|
|
480
|
-
prodLog.info(`[getNode] πΎ Cached node ${id.substring(0, 8)}... successfully`);
|
|
481
444
|
}
|
|
482
445
|
else {
|
|
483
|
-
prodLog.warn(`[
|
|
446
|
+
prodLog.warn(`[GCS] Not caching invalid node ${id.substring(0, 8)} (missing id/vector or empty vector)`);
|
|
484
447
|
}
|
|
485
448
|
this.logger.trace(`Successfully retrieved node ${id}`);
|
|
486
449
|
this.releaseBackpressure(true, requestId);
|
|
@@ -868,13 +831,6 @@ export class GcsStorage extends BaseStorage {
|
|
|
868
831
|
await this.ensureInitialized(); // CRITICAL: Must initialize before using this.bucket
|
|
869
832
|
const limit = options.limit || 100;
|
|
870
833
|
const useCache = options.useCache !== false;
|
|
871
|
-
// DIAGNOSTIC LOGGING: Track pagination performance
|
|
872
|
-
prodLog.info(`[getNodesWithPagination] Starting pagination: limit=${limit}, cursor=${options.cursor || 'none'}`);
|
|
873
|
-
const startTime = Date.now();
|
|
874
|
-
let shardsChecked = 0;
|
|
875
|
-
let filesFound = 0;
|
|
876
|
-
let nodesLoaded = 0;
|
|
877
|
-
let nodesFailed = 0;
|
|
878
834
|
try {
|
|
879
835
|
const nodes = [];
|
|
880
836
|
// Parse cursor (format: "shardIndex:gcsPageToken")
|
|
@@ -889,7 +845,6 @@ export class GcsStorage extends BaseStorage {
|
|
|
889
845
|
for (let shardIndex = startShardIndex; shardIndex < TOTAL_SHARDS; shardIndex++) {
|
|
890
846
|
const shardId = getShardIdByIndex(shardIndex);
|
|
891
847
|
const shardPrefix = `${this.nounPrefix}${shardId}/`;
|
|
892
|
-
shardsChecked++;
|
|
893
848
|
// List objects in this shard
|
|
894
849
|
// Cap maxResults to GCS API limit to prevent "Invalid unsigned integer" errors
|
|
895
850
|
const requestedPageSize = limit - nodes.length;
|
|
@@ -899,12 +854,6 @@ export class GcsStorage extends BaseStorage {
|
|
|
899
854
|
maxResults: cappedPageSize,
|
|
900
855
|
pageToken: shardIndex === startShardIndex ? gcsPageToken : undefined
|
|
901
856
|
});
|
|
902
|
-
// DIAGNOSTIC LOGGING: Show files found per shard (only log non-empty shards)
|
|
903
|
-
if (files && files.length > 0) {
|
|
904
|
-
filesFound += files.length;
|
|
905
|
-
prodLog.info(`[Shard ${shardId}] Found ${files.length} files in "${shardPrefix}"`);
|
|
906
|
-
prodLog.info(`[Shard ${shardId}] Sample file names: ${files.slice(0, 3).map((f) => f.name).join(', ')}`);
|
|
907
|
-
}
|
|
908
857
|
// Extract node IDs from file names
|
|
909
858
|
if (files && files.length > 0) {
|
|
910
859
|
const nodeIds = files
|
|
@@ -921,21 +870,11 @@ export class GcsStorage extends BaseStorage {
|
|
|
921
870
|
return name;
|
|
922
871
|
})
|
|
923
872
|
.filter((id) => id && id.length > 0);
|
|
924
|
-
// DIAGNOSTIC LOGGING: Show extracted UUIDs
|
|
925
|
-
prodLog.info(`[Shard ${shardId}] Extracted ${nodeIds.length} UUIDs: ${nodeIds.slice(0, 3).join(', ')}...`);
|
|
926
873
|
// Load nodes
|
|
927
874
|
for (const id of nodeIds) {
|
|
928
|
-
// DIAGNOSTIC LOGGING: Show each getNode() attempt
|
|
929
|
-
prodLog.info(`[Shard ${shardId}] Calling getNode("${id}")...`);
|
|
930
875
|
const node = await this.getNode(id);
|
|
931
876
|
if (node) {
|
|
932
877
|
nodes.push(node);
|
|
933
|
-
nodesLoaded++;
|
|
934
|
-
prodLog.info(`[Shard ${shardId}] β
Successfully loaded node ${id}`);
|
|
935
|
-
}
|
|
936
|
-
else {
|
|
937
|
-
nodesFailed++;
|
|
938
|
-
prodLog.warn(`[Shard ${shardId}] β getNode("${id}") returned null!`);
|
|
939
878
|
}
|
|
940
879
|
if (nodes.length >= limit) {
|
|
941
880
|
break;
|
|
@@ -968,14 +907,6 @@ export class GcsStorage extends BaseStorage {
|
|
|
968
907
|
// Continue to next shard
|
|
969
908
|
}
|
|
970
909
|
// No more shards or nodes
|
|
971
|
-
// DIAGNOSTIC LOGGING: Final summary
|
|
972
|
-
const elapsedTime = Date.now() - startTime;
|
|
973
|
-
prodLog.info(`[getNodesWithPagination] COMPLETED in ${elapsedTime}ms:`);
|
|
974
|
-
prodLog.info(` - Shards checked: ${shardsChecked}/${TOTAL_SHARDS}`);
|
|
975
|
-
prodLog.info(` - Files found: ${filesFound}`);
|
|
976
|
-
prodLog.info(` - Nodes loaded: ${nodesLoaded}`);
|
|
977
|
-
prodLog.info(` - Nodes failed: ${nodesFailed}`);
|
|
978
|
-
prodLog.info(` - Success rate: ${filesFound > 0 ? ((nodesLoaded / filesFound) * 100).toFixed(1) : 'N/A'}%`);
|
|
979
910
|
return {
|
|
980
911
|
nodes,
|
|
981
912
|
totalCount: this.totalNounCount,
|
|
@@ -818,80 +818,47 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
818
818
|
*/
|
|
819
819
|
async getNode(id) {
|
|
820
820
|
await this.ensureInitialized();
|
|
821
|
-
// Check cache first
|
|
821
|
+
// Check cache first
|
|
822
822
|
const cached = this.nodeCache.get(id);
|
|
823
|
-
//
|
|
824
|
-
prodLog.info(`[getNode] π Cache check for ${id.substring(0, 8)}...:`, {
|
|
825
|
-
hasCached: cached !== undefined,
|
|
826
|
-
isNull: cached === null,
|
|
827
|
-
isObject: cached !== null && typeof cached === 'object',
|
|
828
|
-
type: typeof cached
|
|
829
|
-
});
|
|
830
|
-
// CRITICAL FIX (v3.37.8): Validate cached object before returning
|
|
823
|
+
// Validate cached object before returning (v3.37.8+)
|
|
831
824
|
if (cached !== undefined && cached !== null) {
|
|
832
|
-
// Log cached object structure to diagnose incomplete objects
|
|
833
|
-
prodLog.info(`[getNode] Cached object structure:`, {
|
|
834
|
-
hasId: !!cached.id,
|
|
835
|
-
idMatches: cached.id === id,
|
|
836
|
-
hasVector: !!cached.vector,
|
|
837
|
-
vectorLength: cached.vector?.length,
|
|
838
|
-
hasConnections: !!cached.connections,
|
|
839
|
-
connectionsType: typeof cached.connections,
|
|
840
|
-
objectKeys: Object.keys(cached || {})
|
|
841
|
-
});
|
|
842
825
|
// Validate cached object has required fields (including non-empty vector!)
|
|
843
826
|
if (!cached.id || !cached.vector || !Array.isArray(cached.vector) || cached.vector.length === 0) {
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
'unknown'
|
|
850
|
-
});
|
|
851
|
-
prodLog.error(`[getNode] Removing invalid object from cache and loading from S3`);
|
|
827
|
+
// Invalid cache detected - log and auto-recover
|
|
828
|
+
prodLog.warn(`[S3] Invalid cached object for ${id.substring(0, 8)} (${!cached.id ? 'missing id' :
|
|
829
|
+
!cached.vector ? 'missing vector' :
|
|
830
|
+
!Array.isArray(cached.vector) ? 'vector not array' :
|
|
831
|
+
'empty vector'}) - removing from cache and reloading`);
|
|
852
832
|
this.nodeCache.delete(id);
|
|
853
833
|
// Fall through to load from S3
|
|
854
834
|
}
|
|
855
835
|
else {
|
|
856
|
-
|
|
836
|
+
// Valid cache hit
|
|
857
837
|
this.logger.trace(`Cache hit for node ${id}`);
|
|
858
838
|
return cached;
|
|
859
839
|
}
|
|
860
840
|
}
|
|
861
841
|
else if (cached === null) {
|
|
862
|
-
prodLog.warn(`[
|
|
863
|
-
}
|
|
864
|
-
else {
|
|
865
|
-
prodLog.info(`[getNode] β Cache MISS - loading from S3 for ${id.substring(0, 8)}...`);
|
|
842
|
+
prodLog.warn(`[S3] Cache contains null for ${id.substring(0, 8)} - reloading from storage`);
|
|
866
843
|
}
|
|
867
844
|
try {
|
|
868
845
|
// Import the GetObjectCommand only when needed
|
|
869
846
|
const { GetObjectCommand } = await import('@aws-sdk/client-s3');
|
|
870
847
|
// Use getNounKey() to properly handle sharding
|
|
871
848
|
const key = this.getNounKey(id);
|
|
872
|
-
// DIAGNOSTIC LOGGING: Show exact path being accessed
|
|
873
|
-
prodLog.info(`[getNode] π Attempting to load:`);
|
|
874
|
-
prodLog.info(`[getNode] UUID: ${id}`);
|
|
875
|
-
prodLog.info(`[getNode] Path: ${key}`);
|
|
876
|
-
prodLog.info(`[getNode] Bucket: ${this.bucketName}`);
|
|
877
849
|
// Try to get the node from the nouns directory
|
|
878
|
-
prodLog.info(`[getNode] π₯ Downloading file...`);
|
|
879
850
|
const response = await this.s3Client.send(new GetObjectCommand({
|
|
880
851
|
Bucket: this.bucketName,
|
|
881
852
|
Key: key
|
|
882
853
|
}));
|
|
883
854
|
// Check if response is null or undefined
|
|
884
855
|
if (!response || !response.Body) {
|
|
885
|
-
prodLog.warn(`[
|
|
856
|
+
prodLog.warn(`[S3] Response or Body is null/undefined for ${id.substring(0, 8)}`);
|
|
886
857
|
return null;
|
|
887
858
|
}
|
|
888
|
-
// Convert the response body to a string
|
|
859
|
+
// Convert the response body to a string and parse JSON
|
|
889
860
|
const bodyContents = await response.Body.transformToString();
|
|
890
|
-
prodLog.info(`[getNode] β
Download successful: ${bodyContents.length} bytes`);
|
|
891
|
-
// Parse the JSON string
|
|
892
|
-
prodLog.info(`[getNode] π§ Parsing JSON...`);
|
|
893
861
|
const parsedNode = JSON.parse(bodyContents);
|
|
894
|
-
prodLog.info(`[getNode] β
JSON parsed successfully, id: ${parsedNode.id}`);
|
|
895
862
|
// Ensure the parsed node has the expected properties
|
|
896
863
|
if (!parsedNode ||
|
|
897
864
|
!parsedNode.id ||
|
|
@@ -917,41 +884,25 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
917
884
|
// CRITICAL FIX: Only cache valid nodes with non-empty vectors (never cache null or empty)
|
|
918
885
|
if (node && node.id && node.vector && Array.isArray(node.vector) && node.vector.length > 0) {
|
|
919
886
|
this.nodeCache.set(id, node);
|
|
920
|
-
prodLog.info(`[getNode] πΎ Cached node ${id.substring(0, 8)}... successfully`);
|
|
921
887
|
}
|
|
922
888
|
else {
|
|
923
|
-
prodLog.warn(`[
|
|
889
|
+
prodLog.warn(`[S3] Not caching invalid node ${id.substring(0, 8)} (missing id/vector or empty vector)`);
|
|
924
890
|
}
|
|
925
891
|
this.logger.trace(`Successfully retrieved node ${id}`);
|
|
926
892
|
return node;
|
|
927
893
|
}
|
|
928
894
|
catch (error) {
|
|
929
|
-
// DIAGNOSTIC LOGGING: Log EVERY error before any conditional checks
|
|
930
|
-
const key = this.getNounKey(id);
|
|
931
|
-
prodLog.error(`[getNode] β EXCEPTION CAUGHT:`);
|
|
932
|
-
prodLog.error(`[getNode] UUID: ${id}`);
|
|
933
|
-
prodLog.error(`[getNode] Path: ${key}`);
|
|
934
|
-
prodLog.error(`[getNode] Bucket: ${this.bucketName}`);
|
|
935
|
-
prodLog.error(`[getNode] Error type: ${error?.constructor?.name || typeof error}`);
|
|
936
|
-
prodLog.error(`[getNode] Error name: ${error?.name}`);
|
|
937
|
-
prodLog.error(`[getNode] Error code: ${JSON.stringify(error?.Code || error?.code)}`);
|
|
938
|
-
prodLog.error(`[getNode] Error message: ${error?.message || String(error)}`);
|
|
939
|
-
prodLog.error(`[getNode] HTTP status: ${error?.$metadata?.httpStatusCode}`);
|
|
940
|
-
prodLog.error(`[getNode] Error object:`, JSON.stringify(error, null, 2));
|
|
941
895
|
// Check if this is a "not found" error (S3 uses "NoSuchKey")
|
|
942
896
|
if (error?.name === 'NoSuchKey' || error?.Code === 'NoSuchKey' || error?.$metadata?.httpStatusCode === 404) {
|
|
943
|
-
|
|
944
|
-
// CRITICAL FIX: Do NOT cache null values
|
|
897
|
+
// File not found - not cached, just return null
|
|
945
898
|
return null;
|
|
946
899
|
}
|
|
947
900
|
// Handle throttling
|
|
948
901
|
if (this.isThrottlingError(error)) {
|
|
949
|
-
prodLog.warn(`[getNode] Identified as throttling error - rethrowing`);
|
|
950
902
|
await this.handleThrottling(error);
|
|
951
903
|
throw error;
|
|
952
904
|
}
|
|
953
905
|
// All other errors should throw, not return null
|
|
954
|
-
prodLog.error(`[getNode] Unhandled error - rethrowing`);
|
|
955
906
|
this.logger.error(`Failed to get node ${id}:`, error);
|
|
956
907
|
throw BrainyError.fromError(error, `getNoun(${id})`);
|
|
957
908
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.38.0",
|
|
4
4
|
"description": "Universal Knowledge Protocolβ’ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns Γ 40 verbs for infinite expressiveness.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|