@soulcraft/brainy 5.3.5 → 5.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +72 -0
- package/dist/brainy.d.ts +61 -0
- package/dist/brainy.js +188 -24
- package/dist/storage/adapters/azureBlobStorage.d.ts +13 -64
- package/dist/storage/adapters/azureBlobStorage.js +78 -388
- package/dist/storage/adapters/fileSystemStorage.d.ts +12 -78
- package/dist/storage/adapters/fileSystemStorage.js +49 -395
- package/dist/storage/adapters/gcsStorage.d.ts +13 -134
- package/dist/storage/adapters/gcsStorage.js +79 -557
- package/dist/storage/adapters/historicalStorageAdapter.d.ts +181 -0
- package/dist/storage/adapters/historicalStorageAdapter.js +332 -0
- package/dist/storage/adapters/memoryStorage.d.ts +4 -113
- package/dist/storage/adapters/memoryStorage.js +34 -471
- package/dist/storage/adapters/opfsStorage.d.ts +14 -127
- package/dist/storage/adapters/opfsStorage.js +44 -693
- package/dist/storage/adapters/r2Storage.d.ts +8 -41
- package/dist/storage/adapters/r2Storage.js +49 -237
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +13 -111
- package/dist/storage/adapters/s3CompatibleStorage.js +77 -596
- package/dist/storage/baseStorage.d.ts +78 -38
- package/dist/storage/baseStorage.js +699 -23
- package/dist/storage/cow/BlobStorage.d.ts +2 -2
- package/dist/storage/cow/BlobStorage.js +4 -4
- package/dist/storage/storageFactory.d.ts +2 -3
- package/dist/storage/storageFactory.js +114 -66
- package/dist/vfs/types.d.ts +6 -2
- package/package.json +1 -1
|
@@ -8,7 +8,6 @@
|
|
|
8
8
|
* 3. Service Account Credentials Object
|
|
9
9
|
* 4. HMAC Keys (fallback for backward compatibility)
|
|
10
10
|
*/
|
|
11
|
-
import { NounType } from '../../coreTypes.js';
|
|
12
11
|
import { BaseStorage, SYSTEM_DIR, STATISTICS_KEY, getDirectoryPath } from '../baseStorage.js';
|
|
13
12
|
import { BrainyError } from '../../errors/brainyError.js';
|
|
14
13
|
import { CacheManager } from '../cacheManager.js';
|
|
@@ -16,7 +15,7 @@ import { createModuleLogger, prodLog } from '../../utils/logger.js';
|
|
|
16
15
|
import { getGlobalBackpressure } from '../../utils/adaptiveBackpressure.js';
|
|
17
16
|
import { getWriteBuffer } from '../../utils/writeBuffer.js';
|
|
18
17
|
import { getCoalescer } from '../../utils/requestCoalescer.js';
|
|
19
|
-
import { getShardIdFromUuid
|
|
18
|
+
import { getShardIdFromUuid } from '../sharding.js';
|
|
20
19
|
// GCS API limits
|
|
21
20
|
// Maximum value for maxResults parameter in GCS API calls
|
|
22
21
|
// Values above this cause "Invalid unsigned integer" errors
|
|
@@ -30,6 +29,12 @@ const MAX_GCS_PAGE_SIZE = 5000;
|
|
|
30
29
|
* 2. Service Account Key File (if keyFilename provided)
|
|
31
30
|
* 3. Service Account Credentials Object (if credentials provided)
|
|
32
31
|
* 4. HMAC Keys (if accessKeyId/secretAccessKey provided)
|
|
32
|
+
*
|
|
33
|
+
* v5.4.0: Type-aware storage now built into BaseStorage
|
|
34
|
+
* - Removed 10 *_internal method overrides (now inherit from BaseStorage's type-first implementation)
|
|
35
|
+
* - Removed 2 pagination method overrides (getNounsWithPagination, getVerbsWithPagination)
|
|
36
|
+
* - Updated HNSW methods to use BaseStorage's getNoun/saveNoun (type-first paths)
|
|
37
|
+
* - All operations now use type-first paths: entities/nouns/{type}/vectors/{shard}/{id}.json
|
|
33
38
|
*/
|
|
34
39
|
export class GcsStorage extends BaseStorage {
|
|
35
40
|
/**
|
|
@@ -65,6 +70,8 @@ export class GcsStorage extends BaseStorage {
|
|
|
65
70
|
this.forceHighVolumeMode = false; // Environment variable override
|
|
66
71
|
// Module logger
|
|
67
72
|
this.logger = createModuleLogger('GcsStorage');
|
|
73
|
+
// v5.4.0: HNSW mutex locks to prevent read-modify-write races
|
|
74
|
+
this.hnswLocks = new Map();
|
|
68
75
|
// Configuration options
|
|
69
76
|
this.skipInitialScan = false;
|
|
70
77
|
this.skipCountsFile = false;
|
|
@@ -320,12 +327,7 @@ export class GcsStorage extends BaseStorage {
|
|
|
320
327
|
});
|
|
321
328
|
await Promise.all(writes);
|
|
322
329
|
}
|
|
323
|
-
|
|
324
|
-
* Save a noun to storage (internal implementation)
|
|
325
|
-
*/
|
|
326
|
-
async saveNoun_internal(noun) {
|
|
327
|
-
return this.saveNode(noun);
|
|
328
|
-
}
|
|
330
|
+
// v5.4.0: Removed saveNoun_internal - now inherit from BaseStorage's type-first implementation
|
|
329
331
|
/**
|
|
330
332
|
* Save a node to storage
|
|
331
333
|
*/
|
|
@@ -396,20 +398,7 @@ export class GcsStorage extends BaseStorage {
|
|
|
396
398
|
throw new Error(`Failed to save node ${node.id}: ${error}`);
|
|
397
399
|
}
|
|
398
400
|
}
|
|
399
|
-
|
|
400
|
-
* Get a noun from storage (internal implementation)
|
|
401
|
-
* v4.0.0: Returns ONLY vector data (no metadata field)
|
|
402
|
-
* Base class combines with metadata via getNoun() -> HNSWNounWithMetadata
|
|
403
|
-
*/
|
|
404
|
-
async getNoun_internal(id) {
|
|
405
|
-
// v4.0.0: Return ONLY vector data (no metadata field)
|
|
406
|
-
const node = await this.getNode(id);
|
|
407
|
-
if (!node) {
|
|
408
|
-
return null;
|
|
409
|
-
}
|
|
410
|
-
// Return pure vector structure
|
|
411
|
-
return node;
|
|
412
|
-
}
|
|
401
|
+
// v5.4.0: Removed getNoun_internal - now inherit from BaseStorage's type-first implementation
|
|
413
402
|
/**
|
|
414
403
|
* Get a node from storage
|
|
415
404
|
*/
|
|
@@ -504,45 +493,7 @@ export class GcsStorage extends BaseStorage {
|
|
|
504
493
|
throw BrainyError.fromError(error, `getNoun(${id})`);
|
|
505
494
|
}
|
|
506
495
|
}
|
|
507
|
-
|
|
508
|
-
* Delete a noun from storage (internal implementation)
|
|
509
|
-
*/
|
|
510
|
-
async deleteNoun_internal(id) {
|
|
511
|
-
await this.ensureInitialized();
|
|
512
|
-
const requestId = await this.applyBackpressure();
|
|
513
|
-
try {
|
|
514
|
-
this.logger.trace(`Deleting noun ${id}`);
|
|
515
|
-
// Get the GCS key
|
|
516
|
-
const key = this.getNounKey(id);
|
|
517
|
-
// Delete from GCS
|
|
518
|
-
const file = this.bucket.file(key);
|
|
519
|
-
await file.delete();
|
|
520
|
-
// Remove from cache
|
|
521
|
-
this.nounCacheManager.delete(id);
|
|
522
|
-
// Decrement noun count
|
|
523
|
-
const metadata = await this.getNounMetadata(id);
|
|
524
|
-
if (metadata && metadata.type) {
|
|
525
|
-
await this.decrementEntityCountSafe(metadata.type);
|
|
526
|
-
}
|
|
527
|
-
this.logger.trace(`Noun ${id} deleted successfully`);
|
|
528
|
-
this.releaseBackpressure(true, requestId);
|
|
529
|
-
}
|
|
530
|
-
catch (error) {
|
|
531
|
-
this.releaseBackpressure(false, requestId);
|
|
532
|
-
if (error.code === 404) {
|
|
533
|
-
// Already deleted
|
|
534
|
-
this.logger.trace(`Noun ${id} not found (already deleted)`);
|
|
535
|
-
return;
|
|
536
|
-
}
|
|
537
|
-
// Handle throttling
|
|
538
|
-
if (this.isThrottlingError(error)) {
|
|
539
|
-
await this.handleThrottling(error);
|
|
540
|
-
throw error;
|
|
541
|
-
}
|
|
542
|
-
this.logger.error(`Failed to delete noun ${id}:`, error);
|
|
543
|
-
throw new Error(`Failed to delete noun ${id}: ${error}`);
|
|
544
|
-
}
|
|
545
|
-
}
|
|
496
|
+
// v5.4.0: Removed deleteNoun_internal - now inherit from BaseStorage's type-first implementation
|
|
546
497
|
/**
|
|
547
498
|
* Write an object to a specific path in GCS
|
|
548
499
|
* Primitive operation required by base class
|
|
@@ -631,12 +582,7 @@ export class GcsStorage extends BaseStorage {
|
|
|
631
582
|
throw new Error(`Failed to list objects under ${prefix}: ${error}`);
|
|
632
583
|
}
|
|
633
584
|
}
|
|
634
|
-
|
|
635
|
-
* Save a verb to storage (internal implementation)
|
|
636
|
-
*/
|
|
637
|
-
async saveVerb_internal(verb) {
|
|
638
|
-
return this.saveEdge(verb);
|
|
639
|
-
}
|
|
585
|
+
// v5.4.0: Removed saveVerb_internal - now inherit from BaseStorage's type-first implementation
|
|
640
586
|
/**
|
|
641
587
|
* Save an edge to storage
|
|
642
588
|
*/
|
|
@@ -702,20 +648,7 @@ export class GcsStorage extends BaseStorage {
|
|
|
702
648
|
throw new Error(`Failed to save edge ${edge.id}: ${error}`);
|
|
703
649
|
}
|
|
704
650
|
}
|
|
705
|
-
|
|
706
|
-
* Get a verb from storage (internal implementation)
|
|
707
|
-
* v4.0.0: Returns ONLY vector + core relational fields (no metadata field)
|
|
708
|
-
* Base class combines with metadata via getVerb() -> HNSWVerbWithMetadata
|
|
709
|
-
*/
|
|
710
|
-
async getVerb_internal(id) {
|
|
711
|
-
// v4.0.0: Return ONLY vector + core relational data (no metadata field)
|
|
712
|
-
const edge = await this.getEdge(id);
|
|
713
|
-
if (!edge) {
|
|
714
|
-
return null;
|
|
715
|
-
}
|
|
716
|
-
// Return pure vector + core fields structure
|
|
717
|
-
return edge;
|
|
718
|
-
}
|
|
651
|
+
// v5.4.0: Removed getVerb_internal - now inherit from BaseStorage's type-first implementation
|
|
719
652
|
/**
|
|
720
653
|
* Get an edge from storage
|
|
721
654
|
*/
|
|
@@ -775,402 +708,12 @@ export class GcsStorage extends BaseStorage {
|
|
|
775
708
|
throw BrainyError.fromError(error, `getVerb(${id})`);
|
|
776
709
|
}
|
|
777
710
|
}
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
try {
|
|
785
|
-
this.logger.trace(`Deleting verb ${id}`);
|
|
786
|
-
// Get the GCS key
|
|
787
|
-
const key = this.getVerbKey(id);
|
|
788
|
-
// Delete from GCS
|
|
789
|
-
const file = this.bucket.file(key);
|
|
790
|
-
await file.delete();
|
|
791
|
-
// Remove from cache
|
|
792
|
-
this.verbCacheManager.delete(id);
|
|
793
|
-
// Decrement verb count
|
|
794
|
-
const metadata = await this.getVerbMetadata(id);
|
|
795
|
-
if (metadata && metadata.type) {
|
|
796
|
-
await this.decrementVerbCount(metadata.type);
|
|
797
|
-
}
|
|
798
|
-
this.logger.trace(`Verb ${id} deleted successfully`);
|
|
799
|
-
this.releaseBackpressure(true, requestId);
|
|
800
|
-
}
|
|
801
|
-
catch (error) {
|
|
802
|
-
this.releaseBackpressure(false, requestId);
|
|
803
|
-
if (error.code === 404) {
|
|
804
|
-
// Already deleted
|
|
805
|
-
this.logger.trace(`Verb ${id} not found (already deleted)`);
|
|
806
|
-
return;
|
|
807
|
-
}
|
|
808
|
-
if (this.isThrottlingError(error)) {
|
|
809
|
-
await this.handleThrottling(error);
|
|
810
|
-
throw error;
|
|
811
|
-
}
|
|
812
|
-
this.logger.error(`Failed to delete verb ${id}:`, error);
|
|
813
|
-
throw new Error(`Failed to delete verb ${id}: ${error}`);
|
|
814
|
-
}
|
|
815
|
-
}
|
|
816
|
-
/**
|
|
817
|
-
* Get nouns with pagination
|
|
818
|
-
* v4.0.0: Returns HNSWNounWithMetadata[] (includes metadata field)
|
|
819
|
-
* Iterates through all UUID-based shards (00-ff) for consistent pagination
|
|
820
|
-
*/
|
|
821
|
-
async getNounsWithPagination(options = {}) {
|
|
822
|
-
await this.ensureInitialized();
|
|
823
|
-
const limit = options.limit || 100;
|
|
824
|
-
const cursor = options.cursor;
|
|
825
|
-
// Get paginated nodes
|
|
826
|
-
const result = await this.getNodesWithPagination({
|
|
827
|
-
limit,
|
|
828
|
-
cursor,
|
|
829
|
-
useCache: true
|
|
830
|
-
});
|
|
831
|
-
// v4.0.0: Combine nodes with metadata to create HNSWNounWithMetadata[]
|
|
832
|
-
const items = [];
|
|
833
|
-
for (const node of result.nodes) {
|
|
834
|
-
// FIX v4.7.4: Don't skip nouns without metadata - metadata is optional in v4.0.0
|
|
835
|
-
const metadata = await this.getNounMetadata(node.id);
|
|
836
|
-
// Apply filters if provided
|
|
837
|
-
if (options.filter) {
|
|
838
|
-
// Filter by noun type
|
|
839
|
-
if (options.filter.nounType) {
|
|
840
|
-
const nounTypes = Array.isArray(options.filter.nounType)
|
|
841
|
-
? options.filter.nounType
|
|
842
|
-
: [options.filter.nounType];
|
|
843
|
-
const nounType = metadata.type || metadata.noun;
|
|
844
|
-
if (!nounType || !nounTypes.includes(nounType)) {
|
|
845
|
-
continue;
|
|
846
|
-
}
|
|
847
|
-
}
|
|
848
|
-
// Filter by metadata fields if specified
|
|
849
|
-
if (options.filter.metadata) {
|
|
850
|
-
let metadataMatch = true;
|
|
851
|
-
for (const [key, value] of Object.entries(options.filter.metadata)) {
|
|
852
|
-
const metadataValue = metadata[key];
|
|
853
|
-
if (metadataValue !== value) {
|
|
854
|
-
metadataMatch = false;
|
|
855
|
-
break;
|
|
856
|
-
}
|
|
857
|
-
}
|
|
858
|
-
if (!metadataMatch)
|
|
859
|
-
continue;
|
|
860
|
-
}
|
|
861
|
-
}
|
|
862
|
-
// v4.8.0: Extract standard fields from metadata to top-level
|
|
863
|
-
const metadataObj = (metadata || {});
|
|
864
|
-
const { noun: nounType, createdAt, updatedAt, confidence, weight, service, data, createdBy, ...customMetadata } = metadataObj;
|
|
865
|
-
const nounWithMetadata = {
|
|
866
|
-
id: node.id,
|
|
867
|
-
vector: [...node.vector],
|
|
868
|
-
connections: new Map(node.connections),
|
|
869
|
-
level: node.level || 0,
|
|
870
|
-
type: nounType || NounType.Thing,
|
|
871
|
-
createdAt: createdAt || Date.now(),
|
|
872
|
-
updatedAt: updatedAt || Date.now(),
|
|
873
|
-
confidence: confidence,
|
|
874
|
-
weight: weight,
|
|
875
|
-
service: service,
|
|
876
|
-
data: data,
|
|
877
|
-
createdBy,
|
|
878
|
-
metadata: customMetadata
|
|
879
|
-
};
|
|
880
|
-
items.push(nounWithMetadata);
|
|
881
|
-
}
|
|
882
|
-
return {
|
|
883
|
-
items,
|
|
884
|
-
totalCount: result.totalCount,
|
|
885
|
-
hasMore: result.hasMore,
|
|
886
|
-
nextCursor: result.nextCursor
|
|
887
|
-
};
|
|
888
|
-
}
|
|
889
|
-
/**
|
|
890
|
-
* Get nodes with pagination (internal implementation)
|
|
891
|
-
* Iterates through UUID-based shards for consistent pagination
|
|
892
|
-
*/
|
|
893
|
-
async getNodesWithPagination(options) {
|
|
894
|
-
await this.ensureInitialized(); // CRITICAL: Must initialize before using this.bucket
|
|
895
|
-
const limit = options.limit || 100;
|
|
896
|
-
const useCache = options.useCache !== false;
|
|
897
|
-
try {
|
|
898
|
-
const nodes = [];
|
|
899
|
-
// Parse cursor (format: "shardIndex:gcsPageToken")
|
|
900
|
-
let startShardIndex = 0;
|
|
901
|
-
let gcsPageToken;
|
|
902
|
-
if (options.cursor) {
|
|
903
|
-
const parts = options.cursor.split(':', 2);
|
|
904
|
-
startShardIndex = parseInt(parts[0]) || 0;
|
|
905
|
-
gcsPageToken = parts[1] || undefined;
|
|
906
|
-
}
|
|
907
|
-
// Iterate through shards starting from cursor position
|
|
908
|
-
for (let shardIndex = startShardIndex; shardIndex < TOTAL_SHARDS; shardIndex++) {
|
|
909
|
-
const shardId = getShardIdByIndex(shardIndex);
|
|
910
|
-
const shardPrefix = `${this.nounPrefix}${shardId}/`;
|
|
911
|
-
// List objects in this shard
|
|
912
|
-
// Cap maxResults to GCS API limit to prevent "Invalid unsigned integer" errors
|
|
913
|
-
const requestedPageSize = limit - nodes.length;
|
|
914
|
-
const cappedPageSize = Math.min(requestedPageSize, MAX_GCS_PAGE_SIZE);
|
|
915
|
-
const [files, , response] = await this.bucket.getFiles({
|
|
916
|
-
prefix: shardPrefix,
|
|
917
|
-
maxResults: cappedPageSize,
|
|
918
|
-
pageToken: shardIndex === startShardIndex ? gcsPageToken : undefined
|
|
919
|
-
});
|
|
920
|
-
// Extract node IDs from file names
|
|
921
|
-
if (files && files.length > 0) {
|
|
922
|
-
const nodeIds = files
|
|
923
|
-
.filter((file) => file && file.name)
|
|
924
|
-
.map((file) => {
|
|
925
|
-
// Extract UUID from: entities/nouns/vectors/ab/ab123456-uuid.json
|
|
926
|
-
let name = file.name;
|
|
927
|
-
if (name.startsWith(shardPrefix)) {
|
|
928
|
-
name = name.substring(shardPrefix.length);
|
|
929
|
-
}
|
|
930
|
-
if (name.endsWith('.json')) {
|
|
931
|
-
name = name.substring(0, name.length - 5);
|
|
932
|
-
}
|
|
933
|
-
return name;
|
|
934
|
-
})
|
|
935
|
-
.filter((id) => id && id.length > 0);
|
|
936
|
-
// Load nodes
|
|
937
|
-
for (const id of nodeIds) {
|
|
938
|
-
const node = await this.getNode(id);
|
|
939
|
-
if (node) {
|
|
940
|
-
nodes.push(node);
|
|
941
|
-
}
|
|
942
|
-
if (nodes.length >= limit) {
|
|
943
|
-
break;
|
|
944
|
-
}
|
|
945
|
-
}
|
|
946
|
-
}
|
|
947
|
-
// Check if we have enough nodes or if there are more files in current shard
|
|
948
|
-
if (nodes.length >= limit) {
|
|
949
|
-
const nextCursor = response?.nextPageToken
|
|
950
|
-
? `${shardIndex}:${response.nextPageToken}`
|
|
951
|
-
: shardIndex + 1 < TOTAL_SHARDS
|
|
952
|
-
? `${shardIndex + 1}:`
|
|
953
|
-
: undefined;
|
|
954
|
-
return {
|
|
955
|
-
nodes,
|
|
956
|
-
totalCount: this.totalNounCount,
|
|
957
|
-
hasMore: !!nextCursor,
|
|
958
|
-
nextCursor
|
|
959
|
-
};
|
|
960
|
-
}
|
|
961
|
-
// If this shard has more pages, create cursor for next page
|
|
962
|
-
if (response?.nextPageToken) {
|
|
963
|
-
return {
|
|
964
|
-
nodes,
|
|
965
|
-
totalCount: this.totalNounCount,
|
|
966
|
-
hasMore: true,
|
|
967
|
-
nextCursor: `${shardIndex}:${response.nextPageToken}`
|
|
968
|
-
};
|
|
969
|
-
}
|
|
970
|
-
// Continue to next shard
|
|
971
|
-
}
|
|
972
|
-
// No more shards or nodes
|
|
973
|
-
return {
|
|
974
|
-
nodes,
|
|
975
|
-
totalCount: this.totalNounCount,
|
|
976
|
-
hasMore: false,
|
|
977
|
-
nextCursor: undefined
|
|
978
|
-
};
|
|
979
|
-
}
|
|
980
|
-
catch (error) {
|
|
981
|
-
this.logger.error('Error in getNodesWithPagination:', error);
|
|
982
|
-
throw new Error(`Failed to get nodes with pagination: ${error}`);
|
|
983
|
-
}
|
|
984
|
-
}
|
|
985
|
-
/**
|
|
986
|
-
* Get nouns by noun type (internal implementation)
|
|
987
|
-
*/
|
|
988
|
-
async getNounsByNounType_internal(nounType) {
|
|
989
|
-
const result = await this.getNounsWithPagination({
|
|
990
|
-
limit: 10000, // Large limit for backward compatibility
|
|
991
|
-
filter: { nounType }
|
|
992
|
-
});
|
|
993
|
-
return result.items;
|
|
994
|
-
}
|
|
995
|
-
/**
|
|
996
|
-
* Get verbs by source ID (internal implementation)
|
|
997
|
-
*/
|
|
998
|
-
async getVerbsBySource_internal(sourceId) {
|
|
999
|
-
// Use the paginated approach to properly handle HNSWVerb to GraphVerb conversion
|
|
1000
|
-
const result = await this.getVerbsWithPagination({
|
|
1001
|
-
limit: Number.MAX_SAFE_INTEGER,
|
|
1002
|
-
filter: { sourceId: [sourceId] }
|
|
1003
|
-
});
|
|
1004
|
-
return result.items;
|
|
1005
|
-
}
|
|
1006
|
-
/**
|
|
1007
|
-
* Get verbs by target ID (internal implementation)
|
|
1008
|
-
*/
|
|
1009
|
-
async getVerbsByTarget_internal(targetId) {
|
|
1010
|
-
// Use the paginated approach to properly handle HNSWVerb to GraphVerb conversion
|
|
1011
|
-
const result = await this.getVerbsWithPagination({
|
|
1012
|
-
limit: Number.MAX_SAFE_INTEGER,
|
|
1013
|
-
filter: { targetId: [targetId] }
|
|
1014
|
-
});
|
|
1015
|
-
return result.items;
|
|
1016
|
-
}
|
|
1017
|
-
/**
|
|
1018
|
-
* Get verbs by type (internal implementation)
|
|
1019
|
-
*/
|
|
1020
|
-
async getVerbsByType_internal(type) {
|
|
1021
|
-
// Use the paginated approach to properly handle HNSWVerb to GraphVerb conversion
|
|
1022
|
-
const result = await this.getVerbsWithPagination({
|
|
1023
|
-
limit: Number.MAX_SAFE_INTEGER,
|
|
1024
|
-
filter: { verbType: type }
|
|
1025
|
-
});
|
|
1026
|
-
return result.items;
|
|
1027
|
-
}
|
|
1028
|
-
/**
|
|
1029
|
-
* Get verbs with pagination
|
|
1030
|
-
* v4.0.0: Returns HNSWVerbWithMetadata[] (includes metadata field)
|
|
1031
|
-
*/
|
|
1032
|
-
async getVerbsWithPagination(options = {}) {
|
|
1033
|
-
await this.ensureInitialized();
|
|
1034
|
-
const limit = options.limit || 100;
|
|
1035
|
-
try {
|
|
1036
|
-
// List verbs (simplified - not sharded yet in original implementation)
|
|
1037
|
-
// Cap maxResults to GCS API limit to prevent "Invalid unsigned integer" errors
|
|
1038
|
-
const cappedLimit = Math.min(limit, MAX_GCS_PAGE_SIZE);
|
|
1039
|
-
const [files, , response] = await this.bucket.getFiles({
|
|
1040
|
-
prefix: this.verbPrefix,
|
|
1041
|
-
maxResults: cappedLimit,
|
|
1042
|
-
pageToken: options.cursor
|
|
1043
|
-
});
|
|
1044
|
-
// If no files, return empty result
|
|
1045
|
-
if (!files || files.length === 0) {
|
|
1046
|
-
return {
|
|
1047
|
-
items: [],
|
|
1048
|
-
totalCount: 0,
|
|
1049
|
-
hasMore: false,
|
|
1050
|
-
nextCursor: undefined
|
|
1051
|
-
};
|
|
1052
|
-
}
|
|
1053
|
-
// Extract verb IDs and load verbs as HNSW verbs
|
|
1054
|
-
const hnswVerbs = [];
|
|
1055
|
-
for (const file of files) {
|
|
1056
|
-
if (!file.name)
|
|
1057
|
-
continue;
|
|
1058
|
-
// Extract UUID from path
|
|
1059
|
-
let name = file.name;
|
|
1060
|
-
if (name.startsWith(this.verbPrefix)) {
|
|
1061
|
-
name = name.substring(this.verbPrefix.length);
|
|
1062
|
-
}
|
|
1063
|
-
if (name.endsWith('.json')) {
|
|
1064
|
-
name = name.substring(0, name.length - 5);
|
|
1065
|
-
}
|
|
1066
|
-
const verb = await this.getEdge(name);
|
|
1067
|
-
if (verb) {
|
|
1068
|
-
hnswVerbs.push(verb);
|
|
1069
|
-
}
|
|
1070
|
-
}
|
|
1071
|
-
// v4.0.0: Combine HNSWVerbs with metadata to create HNSWVerbWithMetadata[]
|
|
1072
|
-
const items = [];
|
|
1073
|
-
for (const hnswVerb of hnswVerbs) {
|
|
1074
|
-
const metadata = await this.getVerbMetadata(hnswVerb.id);
|
|
1075
|
-
// Apply filters
|
|
1076
|
-
if (options.filter) {
|
|
1077
|
-
// v4.0.0: Core fields (verb, sourceId, targetId) are in HNSWVerb structure
|
|
1078
|
-
if (options.filter.sourceId) {
|
|
1079
|
-
const sourceIds = Array.isArray(options.filter.sourceId)
|
|
1080
|
-
? options.filter.sourceId
|
|
1081
|
-
: [options.filter.sourceId];
|
|
1082
|
-
if (!hnswVerb.sourceId || !sourceIds.includes(hnswVerb.sourceId)) {
|
|
1083
|
-
continue;
|
|
1084
|
-
}
|
|
1085
|
-
}
|
|
1086
|
-
if (options.filter.targetId) {
|
|
1087
|
-
const targetIds = Array.isArray(options.filter.targetId)
|
|
1088
|
-
? options.filter.targetId
|
|
1089
|
-
: [options.filter.targetId];
|
|
1090
|
-
if (!hnswVerb.targetId || !targetIds.includes(hnswVerb.targetId)) {
|
|
1091
|
-
continue;
|
|
1092
|
-
}
|
|
1093
|
-
}
|
|
1094
|
-
if (options.filter.verbType) {
|
|
1095
|
-
const verbTypes = Array.isArray(options.filter.verbType)
|
|
1096
|
-
? options.filter.verbType
|
|
1097
|
-
: [options.filter.verbType];
|
|
1098
|
-
if (!hnswVerb.verb || !verbTypes.includes(hnswVerb.verb)) {
|
|
1099
|
-
continue;
|
|
1100
|
-
}
|
|
1101
|
-
}
|
|
1102
|
-
// Filter by metadata fields if specified
|
|
1103
|
-
if (options.filter.metadata && metadata) {
|
|
1104
|
-
let metadataMatch = true;
|
|
1105
|
-
for (const [key, value] of Object.entries(options.filter.metadata)) {
|
|
1106
|
-
const metadataValue = metadata[key];
|
|
1107
|
-
if (metadataValue !== value) {
|
|
1108
|
-
metadataMatch = false;
|
|
1109
|
-
break;
|
|
1110
|
-
}
|
|
1111
|
-
}
|
|
1112
|
-
if (!metadataMatch)
|
|
1113
|
-
continue;
|
|
1114
|
-
}
|
|
1115
|
-
}
|
|
1116
|
-
// v4.8.0: Extract standard fields from metadata to top-level
|
|
1117
|
-
const metadataObj = (metadata || {});
|
|
1118
|
-
const { createdAt, updatedAt, confidence, weight, service, data, createdBy, ...customMetadata } = metadataObj;
|
|
1119
|
-
const verbWithMetadata = {
|
|
1120
|
-
id: hnswVerb.id,
|
|
1121
|
-
vector: [...hnswVerb.vector],
|
|
1122
|
-
connections: new Map(hnswVerb.connections),
|
|
1123
|
-
verb: hnswVerb.verb,
|
|
1124
|
-
sourceId: hnswVerb.sourceId,
|
|
1125
|
-
targetId: hnswVerb.targetId,
|
|
1126
|
-
createdAt: createdAt || Date.now(),
|
|
1127
|
-
updatedAt: updatedAt || Date.now(),
|
|
1128
|
-
confidence: confidence,
|
|
1129
|
-
weight: weight,
|
|
1130
|
-
service: service,
|
|
1131
|
-
data: data,
|
|
1132
|
-
createdBy,
|
|
1133
|
-
metadata: customMetadata
|
|
1134
|
-
};
|
|
1135
|
-
items.push(verbWithMetadata);
|
|
1136
|
-
}
|
|
1137
|
-
return {
|
|
1138
|
-
items,
|
|
1139
|
-
totalCount: this.totalVerbCount,
|
|
1140
|
-
hasMore: !!response?.nextPageToken,
|
|
1141
|
-
nextCursor: response?.nextPageToken
|
|
1142
|
-
};
|
|
1143
|
-
}
|
|
1144
|
-
catch (error) {
|
|
1145
|
-
this.logger.error('Error in getVerbsWithPagination:', error);
|
|
1146
|
-
throw new Error(`Failed to get verbs with pagination: ${error}`);
|
|
1147
|
-
}
|
|
1148
|
-
}
|
|
1149
|
-
/**
|
|
1150
|
-
* Get nouns with filtering and pagination (public API)
|
|
1151
|
-
*/
|
|
1152
|
-
async getNouns(options) {
|
|
1153
|
-
const limit = options?.pagination?.limit || 100;
|
|
1154
|
-
const cursor = options?.pagination?.cursor;
|
|
1155
|
-
return this.getNounsWithPagination({
|
|
1156
|
-
limit,
|
|
1157
|
-
cursor,
|
|
1158
|
-
filter: options?.filter
|
|
1159
|
-
});
|
|
1160
|
-
}
|
|
1161
|
-
/**
|
|
1162
|
-
* Get verbs with filtering and pagination (public API)
|
|
1163
|
-
* v4.0.0: Returns HNSWVerbWithMetadata[] (includes metadata field)
|
|
1164
|
-
*/
|
|
1165
|
-
async getVerbs(options) {
|
|
1166
|
-
const limit = options?.pagination?.limit || 100;
|
|
1167
|
-
const cursor = options?.pagination?.cursor;
|
|
1168
|
-
return this.getVerbsWithPagination({
|
|
1169
|
-
limit,
|
|
1170
|
-
cursor,
|
|
1171
|
-
filter: options?.filter
|
|
1172
|
-
});
|
|
1173
|
-
}
|
|
711
|
+
// v5.4.0: Removed deleteVerb_internal - now inherit from BaseStorage's type-first implementation
|
|
712
|
+
// v5.4.0: Removed pagination overrides - use BaseStorage's type-first implementation
|
|
713
|
+
// - getNounsWithPagination, getNodesWithPagination, getVerbsWithPagination
|
|
714
|
+
// - getNouns, getVerbs (public wrappers)
|
|
715
|
+
// v5.4.0: Removed 4 query *_internal methods - now inherit from BaseStorage's type-first implementation
|
|
716
|
+
// (getNounsByNounType_internal, getVerbsBySource_internal, getVerbsByTarget_internal, getVerbsByType_internal)
|
|
1174
717
|
/**
|
|
1175
718
|
* Batch fetch metadata for multiple noun IDs (efficient for large queries)
|
|
1176
719
|
* Uses smaller batches to prevent GCS socket exhaustion
|
|
@@ -1501,105 +1044,84 @@ export class GcsStorage extends BaseStorage {
|
|
|
1501
1044
|
// HNSW Index Persistence (v3.35.0+)
|
|
1502
1045
|
/**
|
|
1503
1046
|
* Get a noun's vector for HNSW rebuild
|
|
1047
|
+
* v5.4.0: Uses BaseStorage's getNoun (type-first paths)
|
|
1504
1048
|
*/
|
|
1505
1049
|
async getNounVector(id) {
|
|
1506
|
-
await this.
|
|
1507
|
-
const noun = await this.getNode(id);
|
|
1050
|
+
const noun = await this.getNoun(id);
|
|
1508
1051
|
return noun ? noun.vector : null;
|
|
1509
1052
|
}
|
|
1510
1053
|
/**
|
|
1511
1054
|
* Save HNSW graph data for a noun
|
|
1512
|
-
*
|
|
1055
|
+
*
|
|
1056
|
+
* v5.4.0: Uses BaseStorage's getNoun/saveNoun (type-first paths)
|
|
1057
|
+
* CRITICAL: Uses mutex locking to prevent read-modify-write races
|
|
1513
1058
|
*/
|
|
1514
1059
|
async saveHNSWData(nounId, hnswData) {
|
|
1515
|
-
|
|
1516
|
-
// CRITICAL FIX (v4.
|
|
1517
|
-
//
|
|
1518
|
-
//
|
|
1519
|
-
//
|
|
1520
|
-
//
|
|
1521
|
-
//
|
|
1522
|
-
|
|
1523
|
-
|
|
1524
|
-
|
|
1525
|
-
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
}
|
|
1540
|
-
catch (error) {
|
|
1541
|
-
// File doesn't exist yet - will create new
|
|
1542
|
-
if (error.code !== 404) {
|
|
1543
|
-
throw error;
|
|
1544
|
-
}
|
|
1545
|
-
}
|
|
1546
|
-
// Preserve id and vector, update only HNSW graph metadata
|
|
1547
|
-
const updatedNode = {
|
|
1548
|
-
...existingNode, // Preserve all existing fields (id, vector, etc.)
|
|
1549
|
-
level: hnswData.level,
|
|
1550
|
-
connections: hnswData.connections
|
|
1551
|
-
};
|
|
1552
|
-
// ATOMIC WRITE: Use generation precondition
|
|
1553
|
-
// If currentGeneration exists, only write if generation matches (no concurrent modification)
|
|
1554
|
-
// If no generation, only write if file doesn't exist (ifGenerationMatch: 0)
|
|
1555
|
-
await file.save(JSON.stringify(updatedNode, null, 2), {
|
|
1556
|
-
contentType: 'application/json',
|
|
1557
|
-
resumable: false,
|
|
1558
|
-
preconditionOpts: currentGeneration
|
|
1559
|
-
? { ifGenerationMatch: currentGeneration }
|
|
1560
|
-
: { ifGenerationMatch: '0' } // Only create if doesn't exist
|
|
1561
|
-
});
|
|
1562
|
-
// Success! Exit retry loop
|
|
1563
|
-
return;
|
|
1060
|
+
const lockKey = `hnsw/${nounId}`;
|
|
1061
|
+
// CRITICAL FIX (v4.10.1): Mutex lock to prevent read-modify-write races
|
|
1062
|
+
// Problem: Without mutex, concurrent operations can:
|
|
1063
|
+
// 1. Thread A reads noun (connections: [1,2,3])
|
|
1064
|
+
// 2. Thread B reads noun (connections: [1,2,3])
|
|
1065
|
+
// 3. Thread A adds connection 4, writes [1,2,3,4]
|
|
1066
|
+
// 4. Thread B adds connection 5, writes [1,2,3,5] ← Connection 4 LOST!
|
|
1067
|
+
// Solution: Mutex serializes operations per entity (like FileSystem/OPFS adapters)
|
|
1068
|
+
// Production scale: Prevents corruption at 1000+ concurrent operations
|
|
1069
|
+
// Wait for any pending operations on this entity
|
|
1070
|
+
while (this.hnswLocks.has(lockKey)) {
|
|
1071
|
+
await this.hnswLocks.get(lockKey);
|
|
1072
|
+
}
|
|
1073
|
+
// Acquire lock
|
|
1074
|
+
let releaseLock;
|
|
1075
|
+
const lockPromise = new Promise(resolve => { releaseLock = resolve; });
|
|
1076
|
+
this.hnswLocks.set(lockKey, lockPromise);
|
|
1077
|
+
try {
|
|
1078
|
+
// v5.4.0: Use BaseStorage's getNoun (type-first paths)
|
|
1079
|
+
// Read existing noun data (if exists)
|
|
1080
|
+
const existingNoun = await this.getNoun(nounId);
|
|
1081
|
+
if (!existingNoun) {
|
|
1082
|
+
// Noun doesn't exist - cannot update HNSW data for non-existent noun
|
|
1083
|
+
throw new Error(`Cannot save HNSW data: noun ${nounId} not found`);
|
|
1564
1084
|
}
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
this.logger.error(`Max retries (${maxRetries}) exceeded for ${nounId} - concurrent modification conflict`);
|
|
1570
|
-
throw new Error(`Failed to save HNSW data for ${nounId}: max retries exceeded due to concurrent modifications`);
|
|
1571
|
-
}
|
|
1572
|
-
// Exponential backoff: 50ms, 100ms, 200ms, 400ms, 800ms
|
|
1573
|
-
const backoffMs = 50 * Math.pow(2, attempt);
|
|
1574
|
-
await new Promise(resolve => setTimeout(resolve, backoffMs));
|
|
1575
|
-
continue;
|
|
1576
|
-
}
|
|
1577
|
-
// Other error - rethrow
|
|
1578
|
-
this.logger.error(`Failed to save HNSW data for ${nounId}:`, error);
|
|
1579
|
-
throw new Error(`Failed to save HNSW data for ${nounId}: ${error}`);
|
|
1085
|
+
// Convert connections from Record to Map format for storage
|
|
1086
|
+
const connectionsMap = new Map();
|
|
1087
|
+
for (const [level, nodeIds] of Object.entries(hnswData.connections)) {
|
|
1088
|
+
connectionsMap.set(Number(level), new Set(nodeIds));
|
|
1580
1089
|
}
|
|
1090
|
+
// Preserve id and vector, update only HNSW graph metadata
|
|
1091
|
+
const updatedNoun = {
|
|
1092
|
+
...existingNoun,
|
|
1093
|
+
level: hnswData.level,
|
|
1094
|
+
connections: connectionsMap
|
|
1095
|
+
};
|
|
1096
|
+
// v5.4.0: Use BaseStorage's saveNoun (type-first paths, atomic write via writeObjectToBranch)
|
|
1097
|
+
await this.saveNoun(updatedNoun);
|
|
1098
|
+
}
|
|
1099
|
+
finally {
|
|
1100
|
+
// Release lock (ALWAYS runs, even if error thrown)
|
|
1101
|
+
this.hnswLocks.delete(lockKey);
|
|
1102
|
+
releaseLock();
|
|
1581
1103
|
}
|
|
1582
1104
|
}
|
|
1583
1105
|
/**
|
|
1584
1106
|
* Get HNSW graph data for a noun
|
|
1585
|
-
*
|
|
1107
|
+
* v5.4.0: Uses BaseStorage's getNoun (type-first paths)
|
|
1586
1108
|
*/
|
|
1587
1109
|
async getHNSWData(nounId) {
|
|
1588
|
-
await this.
|
|
1589
|
-
|
|
1590
|
-
|
|
1591
|
-
const key = `entities/nouns/hnsw/${shard}/${nounId}.json`;
|
|
1592
|
-
const file = this.bucket.file(key);
|
|
1593
|
-
const [contents] = await file.download();
|
|
1594
|
-
return JSON.parse(contents.toString());
|
|
1110
|
+
const noun = await this.getNoun(nounId);
|
|
1111
|
+
if (!noun) {
|
|
1112
|
+
return null;
|
|
1595
1113
|
}
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
|
|
1114
|
+
// Convert connections from Map to Record format
|
|
1115
|
+
const connectionsRecord = {};
|
|
1116
|
+
if (noun.connections) {
|
|
1117
|
+
for (const [level, nodeIds] of noun.connections.entries()) {
|
|
1118
|
+
connectionsRecord[String(level)] = Array.from(nodeIds);
|
|
1599
1119
|
}
|
|
1600
|
-
this.logger.error(`Failed to get HNSW data for ${nounId}:`, error);
|
|
1601
|
-
throw new Error(`Failed to get HNSW data for ${nounId}: ${error}`);
|
|
1602
1120
|
}
|
|
1121
|
+
return {
|
|
1122
|
+
level: noun.level || 0,
|
|
1123
|
+
connections: connectionsRecord
|
|
1124
|
+
};
|
|
1603
1125
|
}
|
|
1604
1126
|
/**
|
|
1605
1127
|
* Save HNSW system data (entry point, max level)
|