@soulcraft/brainy 5.3.6 → 5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/CHANGELOG.md +110 -0
  2. package/README.md +4 -3
  3. package/dist/augmentations/display/fieldPatterns.js +3 -3
  4. package/dist/augmentations/display/intelligentComputation.js +0 -2
  5. package/dist/augmentations/typeMatching/brainyTypes.js +6 -8
  6. package/dist/brainy.d.ts +61 -0
  7. package/dist/brainy.js +180 -24
  8. package/dist/cortex/neuralImport.js +0 -1
  9. package/dist/importers/SmartExcelImporter.js +1 -1
  10. package/dist/index.d.ts +2 -2
  11. package/dist/neural/embeddedKeywordEmbeddings.d.ts +1 -1
  12. package/dist/neural/embeddedKeywordEmbeddings.js +56 -56
  13. package/dist/neural/embeddedTypeEmbeddings.d.ts +3 -3
  14. package/dist/neural/embeddedTypeEmbeddings.js +14 -14
  15. package/dist/neural/entityExtractor.js +2 -2
  16. package/dist/neural/relationshipConfidence.js +1 -1
  17. package/dist/neural/signals/VerbContextSignal.js +6 -6
  18. package/dist/neural/signals/VerbExactMatchSignal.js +9 -9
  19. package/dist/neural/signals/VerbPatternSignal.js +5 -5
  20. package/dist/query/typeAwareQueryPlanner.js +2 -3
  21. package/dist/storage/adapters/azureBlobStorage.d.ts +13 -64
  22. package/dist/storage/adapters/azureBlobStorage.js +78 -388
  23. package/dist/storage/adapters/fileSystemStorage.d.ts +12 -78
  24. package/dist/storage/adapters/fileSystemStorage.js +49 -395
  25. package/dist/storage/adapters/gcsStorage.d.ts +13 -134
  26. package/dist/storage/adapters/gcsStorage.js +79 -557
  27. package/dist/storage/adapters/historicalStorageAdapter.d.ts +181 -0
  28. package/dist/storage/adapters/historicalStorageAdapter.js +332 -0
  29. package/dist/storage/adapters/memoryStorage.d.ts +4 -113
  30. package/dist/storage/adapters/memoryStorage.js +34 -471
  31. package/dist/storage/adapters/opfsStorage.d.ts +14 -127
  32. package/dist/storage/adapters/opfsStorage.js +44 -693
  33. package/dist/storage/adapters/r2Storage.d.ts +8 -41
  34. package/dist/storage/adapters/r2Storage.js +49 -237
  35. package/dist/storage/adapters/s3CompatibleStorage.d.ts +13 -111
  36. package/dist/storage/adapters/s3CompatibleStorage.js +77 -596
  37. package/dist/storage/baseStorage.d.ts +78 -38
  38. package/dist/storage/baseStorage.js +692 -23
  39. package/dist/storage/cow/BlobStorage.d.ts +2 -2
  40. package/dist/storage/cow/BlobStorage.js +4 -4
  41. package/dist/storage/storageFactory.d.ts +2 -3
  42. package/dist/storage/storageFactory.js +114 -66
  43. package/dist/types/graphTypes.d.ts +588 -230
  44. package/dist/types/graphTypes.js +683 -248
  45. package/dist/types/typeMigration.d.ts +95 -0
  46. package/dist/types/typeMigration.js +141 -0
  47. package/dist/utils/intelligentTypeMapper.js +2 -2
  48. package/dist/utils/metadataIndex.js +6 -6
  49. package/dist/vfs/types.d.ts +6 -2
  50. package/package.json +2 -2
@@ -8,7 +8,6 @@
8
8
  * 3. Service Account Credentials Object
9
9
  * 4. HMAC Keys (fallback for backward compatibility)
10
10
  */
11
- import { NounType } from '../../coreTypes.js';
12
11
  import { BaseStorage, SYSTEM_DIR, STATISTICS_KEY, getDirectoryPath } from '../baseStorage.js';
13
12
  import { BrainyError } from '../../errors/brainyError.js';
14
13
  import { CacheManager } from '../cacheManager.js';
@@ -16,7 +15,7 @@ import { createModuleLogger, prodLog } from '../../utils/logger.js';
16
15
  import { getGlobalBackpressure } from '../../utils/adaptiveBackpressure.js';
17
16
  import { getWriteBuffer } from '../../utils/writeBuffer.js';
18
17
  import { getCoalescer } from '../../utils/requestCoalescer.js';
19
- import { getShardIdFromUuid, getShardIdByIndex, TOTAL_SHARDS } from '../sharding.js';
18
+ import { getShardIdFromUuid } from '../sharding.js';
20
19
  // GCS API limits
21
20
  // Maximum value for maxResults parameter in GCS API calls
22
21
  // Values above this cause "Invalid unsigned integer" errors
@@ -30,6 +29,12 @@ const MAX_GCS_PAGE_SIZE = 5000;
30
29
  * 2. Service Account Key File (if keyFilename provided)
31
30
  * 3. Service Account Credentials Object (if credentials provided)
32
31
  * 4. HMAC Keys (if accessKeyId/secretAccessKey provided)
32
+ *
33
+ * v5.4.0: Type-aware storage now built into BaseStorage
34
+ * - Removed 10 *_internal method overrides (now inherit from BaseStorage's type-first implementation)
35
+ * - Removed 2 pagination method overrides (getNounsWithPagination, getVerbsWithPagination)
36
+ * - Updated HNSW methods to use BaseStorage's getNoun/saveNoun (type-first paths)
37
+ * - All operations now use type-first paths: entities/nouns/{type}/vectors/{shard}/{id}.json
33
38
  */
34
39
  export class GcsStorage extends BaseStorage {
35
40
  /**
@@ -65,6 +70,8 @@ export class GcsStorage extends BaseStorage {
65
70
  this.forceHighVolumeMode = false; // Environment variable override
66
71
  // Module logger
67
72
  this.logger = createModuleLogger('GcsStorage');
73
+ // v5.4.0: HNSW mutex locks to prevent read-modify-write races
74
+ this.hnswLocks = new Map();
68
75
  // Configuration options
69
76
  this.skipInitialScan = false;
70
77
  this.skipCountsFile = false;
@@ -320,12 +327,7 @@ export class GcsStorage extends BaseStorage {
320
327
  });
321
328
  await Promise.all(writes);
322
329
  }
323
- /**
324
- * Save a noun to storage (internal implementation)
325
- */
326
- async saveNoun_internal(noun) {
327
- return this.saveNode(noun);
328
- }
330
+ // v5.4.0: Removed saveNoun_internal - now inherit from BaseStorage's type-first implementation
329
331
  /**
330
332
  * Save a node to storage
331
333
  */
@@ -396,20 +398,7 @@ export class GcsStorage extends BaseStorage {
396
398
  throw new Error(`Failed to save node ${node.id}: ${error}`);
397
399
  }
398
400
  }
399
- /**
400
- * Get a noun from storage (internal implementation)
401
- * v4.0.0: Returns ONLY vector data (no metadata field)
402
- * Base class combines with metadata via getNoun() -> HNSWNounWithMetadata
403
- */
404
- async getNoun_internal(id) {
405
- // v4.0.0: Return ONLY vector data (no metadata field)
406
- const node = await this.getNode(id);
407
- if (!node) {
408
- return null;
409
- }
410
- // Return pure vector structure
411
- return node;
412
- }
401
+ // v5.4.0: Removed getNoun_internal - now inherit from BaseStorage's type-first implementation
413
402
  /**
414
403
  * Get a node from storage
415
404
  */
@@ -504,45 +493,7 @@ export class GcsStorage extends BaseStorage {
504
493
  throw BrainyError.fromError(error, `getNoun(${id})`);
505
494
  }
506
495
  }
507
- /**
508
- * Delete a noun from storage (internal implementation)
509
- */
510
- async deleteNoun_internal(id) {
511
- await this.ensureInitialized();
512
- const requestId = await this.applyBackpressure();
513
- try {
514
- this.logger.trace(`Deleting noun ${id}`);
515
- // Get the GCS key
516
- const key = this.getNounKey(id);
517
- // Delete from GCS
518
- const file = this.bucket.file(key);
519
- await file.delete();
520
- // Remove from cache
521
- this.nounCacheManager.delete(id);
522
- // Decrement noun count
523
- const metadata = await this.getNounMetadata(id);
524
- if (metadata && metadata.type) {
525
- await this.decrementEntityCountSafe(metadata.type);
526
- }
527
- this.logger.trace(`Noun ${id} deleted successfully`);
528
- this.releaseBackpressure(true, requestId);
529
- }
530
- catch (error) {
531
- this.releaseBackpressure(false, requestId);
532
- if (error.code === 404) {
533
- // Already deleted
534
- this.logger.trace(`Noun ${id} not found (already deleted)`);
535
- return;
536
- }
537
- // Handle throttling
538
- if (this.isThrottlingError(error)) {
539
- await this.handleThrottling(error);
540
- throw error;
541
- }
542
- this.logger.error(`Failed to delete noun ${id}:`, error);
543
- throw new Error(`Failed to delete noun ${id}: ${error}`);
544
- }
545
- }
496
+ // v5.4.0: Removed deleteNoun_internal - now inherit from BaseStorage's type-first implementation
546
497
  /**
547
498
  * Write an object to a specific path in GCS
548
499
  * Primitive operation required by base class
@@ -631,12 +582,7 @@ export class GcsStorage extends BaseStorage {
631
582
  throw new Error(`Failed to list objects under ${prefix}: ${error}`);
632
583
  }
633
584
  }
634
- /**
635
- * Save a verb to storage (internal implementation)
636
- */
637
- async saveVerb_internal(verb) {
638
- return this.saveEdge(verb);
639
- }
585
+ // v5.4.0: Removed saveVerb_internal - now inherit from BaseStorage's type-first implementation
640
586
  /**
641
587
  * Save an edge to storage
642
588
  */
@@ -702,20 +648,7 @@ export class GcsStorage extends BaseStorage {
702
648
  throw new Error(`Failed to save edge ${edge.id}: ${error}`);
703
649
  }
704
650
  }
705
- /**
706
- * Get a verb from storage (internal implementation)
707
- * v4.0.0: Returns ONLY vector + core relational fields (no metadata field)
708
- * Base class combines with metadata via getVerb() -> HNSWVerbWithMetadata
709
- */
710
- async getVerb_internal(id) {
711
- // v4.0.0: Return ONLY vector + core relational data (no metadata field)
712
- const edge = await this.getEdge(id);
713
- if (!edge) {
714
- return null;
715
- }
716
- // Return pure vector + core fields structure
717
- return edge;
718
- }
651
+ // v5.4.0: Removed getVerb_internal - now inherit from BaseStorage's type-first implementation
719
652
  /**
720
653
  * Get an edge from storage
721
654
  */
@@ -775,402 +708,12 @@ export class GcsStorage extends BaseStorage {
775
708
  throw BrainyError.fromError(error, `getVerb(${id})`);
776
709
  }
777
710
  }
778
- /**
779
- * Delete a verb from storage (internal implementation)
780
- */
781
- async deleteVerb_internal(id) {
782
- await this.ensureInitialized();
783
- const requestId = await this.applyBackpressure();
784
- try {
785
- this.logger.trace(`Deleting verb ${id}`);
786
- // Get the GCS key
787
- const key = this.getVerbKey(id);
788
- // Delete from GCS
789
- const file = this.bucket.file(key);
790
- await file.delete();
791
- // Remove from cache
792
- this.verbCacheManager.delete(id);
793
- // Decrement verb count
794
- const metadata = await this.getVerbMetadata(id);
795
- if (metadata && metadata.type) {
796
- await this.decrementVerbCount(metadata.type);
797
- }
798
- this.logger.trace(`Verb ${id} deleted successfully`);
799
- this.releaseBackpressure(true, requestId);
800
- }
801
- catch (error) {
802
- this.releaseBackpressure(false, requestId);
803
- if (error.code === 404) {
804
- // Already deleted
805
- this.logger.trace(`Verb ${id} not found (already deleted)`);
806
- return;
807
- }
808
- if (this.isThrottlingError(error)) {
809
- await this.handleThrottling(error);
810
- throw error;
811
- }
812
- this.logger.error(`Failed to delete verb ${id}:`, error);
813
- throw new Error(`Failed to delete verb ${id}: ${error}`);
814
- }
815
- }
816
- /**
817
- * Get nouns with pagination
818
- * v4.0.0: Returns HNSWNounWithMetadata[] (includes metadata field)
819
- * Iterates through all UUID-based shards (00-ff) for consistent pagination
820
- */
821
- async getNounsWithPagination(options = {}) {
822
- await this.ensureInitialized();
823
- const limit = options.limit || 100;
824
- const cursor = options.cursor;
825
- // Get paginated nodes
826
- const result = await this.getNodesWithPagination({
827
- limit,
828
- cursor,
829
- useCache: true
830
- });
831
- // v4.0.0: Combine nodes with metadata to create HNSWNounWithMetadata[]
832
- const items = [];
833
- for (const node of result.nodes) {
834
- // FIX v4.7.4: Don't skip nouns without metadata - metadata is optional in v4.0.0
835
- const metadata = await this.getNounMetadata(node.id);
836
- // Apply filters if provided
837
- if (options.filter) {
838
- // Filter by noun type
839
- if (options.filter.nounType) {
840
- const nounTypes = Array.isArray(options.filter.nounType)
841
- ? options.filter.nounType
842
- : [options.filter.nounType];
843
- const nounType = metadata.type || metadata.noun;
844
- if (!nounType || !nounTypes.includes(nounType)) {
845
- continue;
846
- }
847
- }
848
- // Filter by metadata fields if specified
849
- if (options.filter.metadata) {
850
- let metadataMatch = true;
851
- for (const [key, value] of Object.entries(options.filter.metadata)) {
852
- const metadataValue = metadata[key];
853
- if (metadataValue !== value) {
854
- metadataMatch = false;
855
- break;
856
- }
857
- }
858
- if (!metadataMatch)
859
- continue;
860
- }
861
- }
862
- // v4.8.0: Extract standard fields from metadata to top-level
863
- const metadataObj = (metadata || {});
864
- const { noun: nounType, createdAt, updatedAt, confidence, weight, service, data, createdBy, ...customMetadata } = metadataObj;
865
- const nounWithMetadata = {
866
- id: node.id,
867
- vector: [...node.vector],
868
- connections: new Map(node.connections),
869
- level: node.level || 0,
870
- type: nounType || NounType.Thing,
871
- createdAt: createdAt || Date.now(),
872
- updatedAt: updatedAt || Date.now(),
873
- confidence: confidence,
874
- weight: weight,
875
- service: service,
876
- data: data,
877
- createdBy,
878
- metadata: customMetadata
879
- };
880
- items.push(nounWithMetadata);
881
- }
882
- return {
883
- items,
884
- totalCount: result.totalCount,
885
- hasMore: result.hasMore,
886
- nextCursor: result.nextCursor
887
- };
888
- }
889
- /**
890
- * Get nodes with pagination (internal implementation)
891
- * Iterates through UUID-based shards for consistent pagination
892
- */
893
- async getNodesWithPagination(options) {
894
- await this.ensureInitialized(); // CRITICAL: Must initialize before using this.bucket
895
- const limit = options.limit || 100;
896
- const useCache = options.useCache !== false;
897
- try {
898
- const nodes = [];
899
- // Parse cursor (format: "shardIndex:gcsPageToken")
900
- let startShardIndex = 0;
901
- let gcsPageToken;
902
- if (options.cursor) {
903
- const parts = options.cursor.split(':', 2);
904
- startShardIndex = parseInt(parts[0]) || 0;
905
- gcsPageToken = parts[1] || undefined;
906
- }
907
- // Iterate through shards starting from cursor position
908
- for (let shardIndex = startShardIndex; shardIndex < TOTAL_SHARDS; shardIndex++) {
909
- const shardId = getShardIdByIndex(shardIndex);
910
- const shardPrefix = `${this.nounPrefix}${shardId}/`;
911
- // List objects in this shard
912
- // Cap maxResults to GCS API limit to prevent "Invalid unsigned integer" errors
913
- const requestedPageSize = limit - nodes.length;
914
- const cappedPageSize = Math.min(requestedPageSize, MAX_GCS_PAGE_SIZE);
915
- const [files, , response] = await this.bucket.getFiles({
916
- prefix: shardPrefix,
917
- maxResults: cappedPageSize,
918
- pageToken: shardIndex === startShardIndex ? gcsPageToken : undefined
919
- });
920
- // Extract node IDs from file names
921
- if (files && files.length > 0) {
922
- const nodeIds = files
923
- .filter((file) => file && file.name)
924
- .map((file) => {
925
- // Extract UUID from: entities/nouns/vectors/ab/ab123456-uuid.json
926
- let name = file.name;
927
- if (name.startsWith(shardPrefix)) {
928
- name = name.substring(shardPrefix.length);
929
- }
930
- if (name.endsWith('.json')) {
931
- name = name.substring(0, name.length - 5);
932
- }
933
- return name;
934
- })
935
- .filter((id) => id && id.length > 0);
936
- // Load nodes
937
- for (const id of nodeIds) {
938
- const node = await this.getNode(id);
939
- if (node) {
940
- nodes.push(node);
941
- }
942
- if (nodes.length >= limit) {
943
- break;
944
- }
945
- }
946
- }
947
- // Check if we have enough nodes or if there are more files in current shard
948
- if (nodes.length >= limit) {
949
- const nextCursor = response?.nextPageToken
950
- ? `${shardIndex}:${response.nextPageToken}`
951
- : shardIndex + 1 < TOTAL_SHARDS
952
- ? `${shardIndex + 1}:`
953
- : undefined;
954
- return {
955
- nodes,
956
- totalCount: this.totalNounCount,
957
- hasMore: !!nextCursor,
958
- nextCursor
959
- };
960
- }
961
- // If this shard has more pages, create cursor for next page
962
- if (response?.nextPageToken) {
963
- return {
964
- nodes,
965
- totalCount: this.totalNounCount,
966
- hasMore: true,
967
- nextCursor: `${shardIndex}:${response.nextPageToken}`
968
- };
969
- }
970
- // Continue to next shard
971
- }
972
- // No more shards or nodes
973
- return {
974
- nodes,
975
- totalCount: this.totalNounCount,
976
- hasMore: false,
977
- nextCursor: undefined
978
- };
979
- }
980
- catch (error) {
981
- this.logger.error('Error in getNodesWithPagination:', error);
982
- throw new Error(`Failed to get nodes with pagination: ${error}`);
983
- }
984
- }
985
- /**
986
- * Get nouns by noun type (internal implementation)
987
- */
988
- async getNounsByNounType_internal(nounType) {
989
- const result = await this.getNounsWithPagination({
990
- limit: 10000, // Large limit for backward compatibility
991
- filter: { nounType }
992
- });
993
- return result.items;
994
- }
995
- /**
996
- * Get verbs by source ID (internal implementation)
997
- */
998
- async getVerbsBySource_internal(sourceId) {
999
- // Use the paginated approach to properly handle HNSWVerb to GraphVerb conversion
1000
- const result = await this.getVerbsWithPagination({
1001
- limit: Number.MAX_SAFE_INTEGER,
1002
- filter: { sourceId: [sourceId] }
1003
- });
1004
- return result.items;
1005
- }
1006
- /**
1007
- * Get verbs by target ID (internal implementation)
1008
- */
1009
- async getVerbsByTarget_internal(targetId) {
1010
- // Use the paginated approach to properly handle HNSWVerb to GraphVerb conversion
1011
- const result = await this.getVerbsWithPagination({
1012
- limit: Number.MAX_SAFE_INTEGER,
1013
- filter: { targetId: [targetId] }
1014
- });
1015
- return result.items;
1016
- }
1017
- /**
1018
- * Get verbs by type (internal implementation)
1019
- */
1020
- async getVerbsByType_internal(type) {
1021
- // Use the paginated approach to properly handle HNSWVerb to GraphVerb conversion
1022
- const result = await this.getVerbsWithPagination({
1023
- limit: Number.MAX_SAFE_INTEGER,
1024
- filter: { verbType: type }
1025
- });
1026
- return result.items;
1027
- }
1028
- /**
1029
- * Get verbs with pagination
1030
- * v4.0.0: Returns HNSWVerbWithMetadata[] (includes metadata field)
1031
- */
1032
- async getVerbsWithPagination(options = {}) {
1033
- await this.ensureInitialized();
1034
- const limit = options.limit || 100;
1035
- try {
1036
- // List verbs (simplified - not sharded yet in original implementation)
1037
- // Cap maxResults to GCS API limit to prevent "Invalid unsigned integer" errors
1038
- const cappedLimit = Math.min(limit, MAX_GCS_PAGE_SIZE);
1039
- const [files, , response] = await this.bucket.getFiles({
1040
- prefix: this.verbPrefix,
1041
- maxResults: cappedLimit,
1042
- pageToken: options.cursor
1043
- });
1044
- // If no files, return empty result
1045
- if (!files || files.length === 0) {
1046
- return {
1047
- items: [],
1048
- totalCount: 0,
1049
- hasMore: false,
1050
- nextCursor: undefined
1051
- };
1052
- }
1053
- // Extract verb IDs and load verbs as HNSW verbs
1054
- const hnswVerbs = [];
1055
- for (const file of files) {
1056
- if (!file.name)
1057
- continue;
1058
- // Extract UUID from path
1059
- let name = file.name;
1060
- if (name.startsWith(this.verbPrefix)) {
1061
- name = name.substring(this.verbPrefix.length);
1062
- }
1063
- if (name.endsWith('.json')) {
1064
- name = name.substring(0, name.length - 5);
1065
- }
1066
- const verb = await this.getEdge(name);
1067
- if (verb) {
1068
- hnswVerbs.push(verb);
1069
- }
1070
- }
1071
- // v4.0.0: Combine HNSWVerbs with metadata to create HNSWVerbWithMetadata[]
1072
- const items = [];
1073
- for (const hnswVerb of hnswVerbs) {
1074
- const metadata = await this.getVerbMetadata(hnswVerb.id);
1075
- // Apply filters
1076
- if (options.filter) {
1077
- // v4.0.0: Core fields (verb, sourceId, targetId) are in HNSWVerb structure
1078
- if (options.filter.sourceId) {
1079
- const sourceIds = Array.isArray(options.filter.sourceId)
1080
- ? options.filter.sourceId
1081
- : [options.filter.sourceId];
1082
- if (!hnswVerb.sourceId || !sourceIds.includes(hnswVerb.sourceId)) {
1083
- continue;
1084
- }
1085
- }
1086
- if (options.filter.targetId) {
1087
- const targetIds = Array.isArray(options.filter.targetId)
1088
- ? options.filter.targetId
1089
- : [options.filter.targetId];
1090
- if (!hnswVerb.targetId || !targetIds.includes(hnswVerb.targetId)) {
1091
- continue;
1092
- }
1093
- }
1094
- if (options.filter.verbType) {
1095
- const verbTypes = Array.isArray(options.filter.verbType)
1096
- ? options.filter.verbType
1097
- : [options.filter.verbType];
1098
- if (!hnswVerb.verb || !verbTypes.includes(hnswVerb.verb)) {
1099
- continue;
1100
- }
1101
- }
1102
- // Filter by metadata fields if specified
1103
- if (options.filter.metadata && metadata) {
1104
- let metadataMatch = true;
1105
- for (const [key, value] of Object.entries(options.filter.metadata)) {
1106
- const metadataValue = metadata[key];
1107
- if (metadataValue !== value) {
1108
- metadataMatch = false;
1109
- break;
1110
- }
1111
- }
1112
- if (!metadataMatch)
1113
- continue;
1114
- }
1115
- }
1116
- // v4.8.0: Extract standard fields from metadata to top-level
1117
- const metadataObj = (metadata || {});
1118
- const { createdAt, updatedAt, confidence, weight, service, data, createdBy, ...customMetadata } = metadataObj;
1119
- const verbWithMetadata = {
1120
- id: hnswVerb.id,
1121
- vector: [...hnswVerb.vector],
1122
- connections: new Map(hnswVerb.connections),
1123
- verb: hnswVerb.verb,
1124
- sourceId: hnswVerb.sourceId,
1125
- targetId: hnswVerb.targetId,
1126
- createdAt: createdAt || Date.now(),
1127
- updatedAt: updatedAt || Date.now(),
1128
- confidence: confidence,
1129
- weight: weight,
1130
- service: service,
1131
- data: data,
1132
- createdBy,
1133
- metadata: customMetadata
1134
- };
1135
- items.push(verbWithMetadata);
1136
- }
1137
- return {
1138
- items,
1139
- totalCount: this.totalVerbCount,
1140
- hasMore: !!response?.nextPageToken,
1141
- nextCursor: response?.nextPageToken
1142
- };
1143
- }
1144
- catch (error) {
1145
- this.logger.error('Error in getVerbsWithPagination:', error);
1146
- throw new Error(`Failed to get verbs with pagination: ${error}`);
1147
- }
1148
- }
1149
- /**
1150
- * Get nouns with filtering and pagination (public API)
1151
- */
1152
- async getNouns(options) {
1153
- const limit = options?.pagination?.limit || 100;
1154
- const cursor = options?.pagination?.cursor;
1155
- return this.getNounsWithPagination({
1156
- limit,
1157
- cursor,
1158
- filter: options?.filter
1159
- });
1160
- }
1161
- /**
1162
- * Get verbs with filtering and pagination (public API)
1163
- * v4.0.0: Returns HNSWVerbWithMetadata[] (includes metadata field)
1164
- */
1165
- async getVerbs(options) {
1166
- const limit = options?.pagination?.limit || 100;
1167
- const cursor = options?.pagination?.cursor;
1168
- return this.getVerbsWithPagination({
1169
- limit,
1170
- cursor,
1171
- filter: options?.filter
1172
- });
1173
- }
711
+ // v5.4.0: Removed deleteVerb_internal - now inherit from BaseStorage's type-first implementation
712
+ // v5.4.0: Removed pagination overrides - use BaseStorage's type-first implementation
713
+ // - getNounsWithPagination, getNodesWithPagination, getVerbsWithPagination
714
+ // - getNouns, getVerbs (public wrappers)
715
+ // v5.4.0: Removed 4 query *_internal methods - now inherit from BaseStorage's type-first implementation
716
+ // (getNounsByNounType_internal, getVerbsBySource_internal, getVerbsByTarget_internal, getVerbsByType_internal)
1174
717
  /**
1175
718
  * Batch fetch metadata for multiple noun IDs (efficient for large queries)
1176
719
  * Uses smaller batches to prevent GCS socket exhaustion
@@ -1501,105 +1044,84 @@ export class GcsStorage extends BaseStorage {
1501
1044
  // HNSW Index Persistence (v3.35.0+)
1502
1045
  /**
1503
1046
  * Get a noun's vector for HNSW rebuild
1047
+ * v5.4.0: Uses BaseStorage's getNoun (type-first paths)
1504
1048
  */
1505
1049
  async getNounVector(id) {
1506
- await this.ensureInitialized();
1507
- const noun = await this.getNode(id);
1050
+ const noun = await this.getNoun(id);
1508
1051
  return noun ? noun.vector : null;
1509
1052
  }
1510
1053
  /**
1511
1054
  * Save HNSW graph data for a noun
1512
- * Storage path: entities/nouns/hnsw/{shard}/{id}.json
1055
+ *
1056
+ * v5.4.0: Uses BaseStorage's getNoun/saveNoun (type-first paths)
1057
+ * CRITICAL: Uses mutex locking to prevent read-modify-write races
1513
1058
  */
1514
1059
  async saveHNSWData(nounId, hnswData) {
1515
- await this.ensureInitialized();
1516
- // CRITICAL FIX (v4.7.3): Must preserve existing node data (id, vector) when updating HNSW metadata
1517
- // Previous implementation overwrote the entire file, destroying vector data
1518
- // Now we READ the existing node, UPDATE only connections/level, then WRITE back the complete node
1519
- // CRITICAL FIX (v4.10.1): Optimistic locking with generation numbers to prevent race conditions
1520
- // Uses GCS generation preconditions - retries with exponential backoff on conflicts
1521
- // Prevents data corruption when multiple entities connect to same neighbor simultaneously
1522
- const shard = getShardIdFromUuid(nounId);
1523
- const key = `entities/nouns/hnsw/${shard}/${nounId}.json`;
1524
- const file = this.bucket.file(key);
1525
- const maxRetries = 5;
1526
- for (let attempt = 0; attempt < maxRetries; attempt++) {
1527
- try {
1528
- // Get current generation and data
1529
- let currentGeneration;
1530
- let existingNode = {};
1531
- try {
1532
- // Download file and get metadata in parallel
1533
- const [data, metadata] = await Promise.all([
1534
- file.download(),
1535
- file.getMetadata()
1536
- ]);
1537
- existingNode = JSON.parse(data[0].toString('utf-8'));
1538
- currentGeneration = metadata[0].generation?.toString();
1539
- }
1540
- catch (error) {
1541
- // File doesn't exist yet - will create new
1542
- if (error.code !== 404) {
1543
- throw error;
1544
- }
1545
- }
1546
- // Preserve id and vector, update only HNSW graph metadata
1547
- const updatedNode = {
1548
- ...existingNode, // Preserve all existing fields (id, vector, etc.)
1549
- level: hnswData.level,
1550
- connections: hnswData.connections
1551
- };
1552
- // ATOMIC WRITE: Use generation precondition
1553
- // If currentGeneration exists, only write if generation matches (no concurrent modification)
1554
- // If no generation, only write if file doesn't exist (ifGenerationMatch: 0)
1555
- await file.save(JSON.stringify(updatedNode, null, 2), {
1556
- contentType: 'application/json',
1557
- resumable: false,
1558
- preconditionOpts: currentGeneration
1559
- ? { ifGenerationMatch: currentGeneration }
1560
- : { ifGenerationMatch: '0' } // Only create if doesn't exist
1561
- });
1562
- // Success! Exit retry loop
1563
- return;
1060
+ const lockKey = `hnsw/${nounId}`;
1061
+ // CRITICAL FIX (v4.10.1): Mutex lock to prevent read-modify-write races
1062
+ // Problem: Without mutex, concurrent operations can:
1063
+ // 1. Thread A reads noun (connections: [1,2,3])
1064
+ // 2. Thread B reads noun (connections: [1,2,3])
1065
+ // 3. Thread A adds connection 4, writes [1,2,3,4]
1066
+ // 4. Thread B adds connection 5, writes [1,2,3,5] Connection 4 LOST!
1067
+ // Solution: Mutex serializes operations per entity (like FileSystem/OPFS adapters)
1068
+ // Production scale: Prevents corruption at 1000+ concurrent operations
1069
+ // Wait for any pending operations on this entity
1070
+ while (this.hnswLocks.has(lockKey)) {
1071
+ await this.hnswLocks.get(lockKey);
1072
+ }
1073
+ // Acquire lock
1074
+ let releaseLock;
1075
+ const lockPromise = new Promise(resolve => { releaseLock = resolve; });
1076
+ this.hnswLocks.set(lockKey, lockPromise);
1077
+ try {
1078
+ // v5.4.0: Use BaseStorage's getNoun (type-first paths)
1079
+ // Read existing noun data (if exists)
1080
+ const existingNoun = await this.getNoun(nounId);
1081
+ if (!existingNoun) {
1082
+ // Noun doesn't exist - cannot update HNSW data for non-existent noun
1083
+ throw new Error(`Cannot save HNSW data: noun ${nounId} not found`);
1564
1084
  }
1565
- catch (error) {
1566
- // Precondition failed (412) - concurrent modification detected
1567
- if (error.code === 412) {
1568
- if (attempt === maxRetries - 1) {
1569
- this.logger.error(`Max retries (${maxRetries}) exceeded for ${nounId} - concurrent modification conflict`);
1570
- throw new Error(`Failed to save HNSW data for ${nounId}: max retries exceeded due to concurrent modifications`);
1571
- }
1572
- // Exponential backoff: 50ms, 100ms, 200ms, 400ms, 800ms
1573
- const backoffMs = 50 * Math.pow(2, attempt);
1574
- await new Promise(resolve => setTimeout(resolve, backoffMs));
1575
- continue;
1576
- }
1577
- // Other error - rethrow
1578
- this.logger.error(`Failed to save HNSW data for ${nounId}:`, error);
1579
- throw new Error(`Failed to save HNSW data for ${nounId}: ${error}`);
1085
+ // Convert connections from Record to Map format for storage
1086
+ const connectionsMap = new Map();
1087
+ for (const [level, nodeIds] of Object.entries(hnswData.connections)) {
1088
+ connectionsMap.set(Number(level), new Set(nodeIds));
1580
1089
  }
1090
+ // Preserve id and vector, update only HNSW graph metadata
1091
+ const updatedNoun = {
1092
+ ...existingNoun,
1093
+ level: hnswData.level,
1094
+ connections: connectionsMap
1095
+ };
1096
+ // v5.4.0: Use BaseStorage's saveNoun (type-first paths, atomic write via writeObjectToBranch)
1097
+ await this.saveNoun(updatedNoun);
1098
+ }
1099
+ finally {
1100
+ // Release lock (ALWAYS runs, even if error thrown)
1101
+ this.hnswLocks.delete(lockKey);
1102
+ releaseLock();
1581
1103
  }
1582
1104
  }
1583
1105
  /**
1584
1106
  * Get HNSW graph data for a noun
1585
- * Storage path: entities/nouns/hnsw/{shard}/{id}.json
1107
+ * v5.4.0: Uses BaseStorage's getNoun (type-first paths)
1586
1108
  */
1587
1109
  async getHNSWData(nounId) {
1588
- await this.ensureInitialized();
1589
- try {
1590
- const shard = getShardIdFromUuid(nounId);
1591
- const key = `entities/nouns/hnsw/${shard}/${nounId}.json`;
1592
- const file = this.bucket.file(key);
1593
- const [contents] = await file.download();
1594
- return JSON.parse(contents.toString());
1110
+ const noun = await this.getNoun(nounId);
1111
+ if (!noun) {
1112
+ return null;
1595
1113
  }
1596
- catch (error) {
1597
- if (error.code === 404) {
1598
- return null;
1114
+ // Convert connections from Map to Record format
1115
+ const connectionsRecord = {};
1116
+ if (noun.connections) {
1117
+ for (const [level, nodeIds] of noun.connections.entries()) {
1118
+ connectionsRecord[String(level)] = Array.from(nodeIds);
1599
1119
  }
1600
- this.logger.error(`Failed to get HNSW data for ${nounId}:`, error);
1601
- throw new Error(`Failed to get HNSW data for ${nounId}: ${error}`);
1602
1120
  }
1121
+ return {
1122
+ level: noun.level || 0,
1123
+ connections: connectionsRecord
1124
+ };
1603
1125
  }
1604
1126
  /**
1605
1127
  * Save HNSW system data (entry point, max level)