@soulcraft/brainy 5.3.6 → 5.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,6 @@
3
3
  * Uses the AWS S3 client to interact with S3-compatible storage services
4
4
  * including Amazon S3, Cloudflare R2, and Google Cloud Storage
5
5
  */
6
- import { NounType } from '../../coreTypes.js';
7
6
  import { BaseStorage, INDEX_DIR, SYSTEM_DIR, STATISTICS_KEY, getDirectoryPath } from '../baseStorage.js';
8
7
  import { StorageCompatibilityLayer } from '../backwardCompatibility.js';
9
8
  import { StorageOperationExecutors } from '../../utils/operationUtils.js';
@@ -36,6 +35,12 @@ import { getShardIdFromUuid, getShardIdByIndex, TOTAL_SHARDS } from '../sharding
36
35
  * - credentials: GCS credentials (accessKeyId and secretAccessKey)
37
36
  * - endpoint: GCS endpoint (e.g., 'https://storage.googleapis.com')
38
37
  * - bucketName: GCS bucket name
38
+ *
39
+ * v5.4.0: Type-aware storage now built into BaseStorage
40
+ * - Removed 10 *_internal method overrides (now inherit from BaseStorage's type-first implementation)
41
+ * - Removed 2 pagination method overrides (getNounsWithPagination, getVerbsWithPagination)
42
+ * - Updated HNSW methods to use BaseStorage's getNoun/saveNoun (type-first paths)
43
+ * - All operations now use type-first paths: entities/nouns/{type}/vectors/{shard}/{id}.json
39
44
  */
40
45
  export class S3CompatibleStorage extends BaseStorage {
41
46
  /**
@@ -80,6 +85,8 @@ export class S3CompatibleStorage extends BaseStorage {
80
85
  this.forceHighVolumeMode = false; // Environment variable override
81
86
  // Module logger
82
87
  this.logger = createModuleLogger('S3Storage');
88
+ // v5.4.0: HNSW mutex locks to prevent read-modify-write races
89
+ this.hnswLocks = new Map();
83
90
  // Node cache to avoid redundant API calls
84
91
  this.nodeCache = new Map();
85
92
  // Batch update timer ID
@@ -724,12 +731,7 @@ export class S3CompatibleStorage extends BaseStorage {
724
731
  // Use adaptive socket manager's batch size
725
732
  return this.socketManager.getBatchSize();
726
733
  }
727
- /**
728
- * Save a noun to storage (internal implementation)
729
- */
730
- async saveNoun_internal(noun) {
731
- return this.saveNode(noun);
732
- }
734
+ // v5.4.0: Removed 10 *_internal method overrides (lines 984-2069) - now inherit from BaseStorage's type-first implementation
733
735
  /**
734
736
  * Save a node to storage
735
737
  */
@@ -819,20 +821,7 @@ export class S3CompatibleStorage extends BaseStorage {
819
821
  throw new Error(`Failed to save node ${node.id}: ${error}`);
820
822
  }
821
823
  }
822
- /**
823
- * Get a noun from storage (internal implementation)
824
- * v4.0.0: Returns ONLY vector data (no metadata field)
825
- * Base class combines with metadata via getNoun() -> HNSWNounWithMetadata
826
- */
827
- async getNoun_internal(id) {
828
- // v4.0.0: Return ONLY vector data (no metadata field)
829
- const node = await this.getNode(id);
830
- if (!node) {
831
- return null;
832
- }
833
- // Return pure vector structure
834
- return node;
835
- }
824
+ // v5.4.0: Removed getNoun_internal override - uses BaseStorage type-first implementation
836
825
  /**
837
826
  * Get a node from storage
838
827
  */
@@ -1091,243 +1080,8 @@ export class S3CompatibleStorage extends BaseStorage {
1091
1080
  }
1092
1081
  return nodes;
1093
1082
  }
1094
- /**
1095
- * Get nouns by noun type (internal implementation)
1096
- * @param nounType The noun type to filter by
1097
- * @returns Promise that resolves to an array of nouns of the specified noun type
1098
- */
1099
- async getNounsByNounType_internal(nounType) {
1100
- return this.getNodesByNounType(nounType);
1101
- }
1102
- /**
1103
- * Get nodes by noun type
1104
- * @param nounType The noun type to filter by
1105
- * @returns Promise that resolves to an array of nodes of the specified noun type
1106
- */
1107
- async getNodesByNounType(nounType) {
1108
- await this.ensureInitialized();
1109
- try {
1110
- const filteredNodes = [];
1111
- let hasMore = true;
1112
- let cursor = undefined;
1113
- // Use pagination to process nodes in batches
1114
- while (hasMore) {
1115
- // Get a batch of nodes
1116
- const result = await this.getNodesWithPagination({
1117
- limit: 100,
1118
- cursor,
1119
- useCache: true
1120
- });
1121
- // Filter nodes by noun type using metadata
1122
- for (const node of result.nodes) {
1123
- const metadata = await this.getMetadata(node.id);
1124
- if (metadata && metadata.noun === nounType) {
1125
- filteredNodes.push(node);
1126
- }
1127
- }
1128
- // Update pagination state
1129
- hasMore = result.hasMore;
1130
- cursor = result.nextCursor;
1131
- // Safety check to prevent infinite loops
1132
- if (!cursor && hasMore) {
1133
- this.logger.warn('No cursor returned but hasMore is true, breaking loop');
1134
- break;
1135
- }
1136
- }
1137
- return filteredNodes;
1138
- }
1139
- catch (error) {
1140
- this.logger.error(`Failed to get nodes by noun type ${nounType}:`, error);
1141
- return [];
1142
- }
1143
- }
1144
- /**
1145
- * Delete a noun from storage (internal implementation)
1146
- */
1147
- async deleteNoun_internal(id) {
1148
- return this.deleteNode(id);
1149
- }
1150
- /**
1151
- * Delete a node from storage
1152
- */
1153
- async deleteNode(id) {
1154
- await this.ensureInitialized();
1155
- try {
1156
- // Import the DeleteObjectCommand only when needed
1157
- const { DeleteObjectCommand } = await import('@aws-sdk/client-s3');
1158
- // Delete the node from S3-compatible storage
1159
- await this.s3Client.send(new DeleteObjectCommand({
1160
- Bucket: this.bucketName,
1161
- Key: `${this.nounPrefix}${id}.json`
1162
- }));
1163
- // Log the change for efficient synchronization
1164
- await this.appendToChangeLog({
1165
- timestamp: Date.now(),
1166
- operation: 'delete',
1167
- entityType: 'noun',
1168
- entityId: id
1169
- });
1170
- }
1171
- catch (error) {
1172
- this.logger.error(`Failed to delete node ${id}:`, error);
1173
- throw new Error(`Failed to delete node ${id}: ${error}`);
1174
- }
1175
- }
1176
- /**
1177
- * Save a verb to storage (internal implementation)
1178
- */
1179
- async saveVerb_internal(verb) {
1180
- return this.saveEdge(verb);
1181
- }
1182
- /**
1183
- * Save an edge to storage
1184
- */
1185
- async saveEdge(edge) {
1186
- await this.ensureInitialized();
1187
- // ALWAYS check if we should use high-volume mode (critical for detection)
1188
- this.checkVolumeMode();
1189
- // Use write buffer in high-volume mode
1190
- if (this.highVolumeMode && this.verbWriteBuffer) {
1191
- this.logger.trace(`📝 BUFFERING: Adding verb ${edge.id} to write buffer (high-volume mode active)`);
1192
- await this.verbWriteBuffer.add(edge.id, edge);
1193
- return;
1194
- }
1195
- else if (!this.highVolumeMode) {
1196
- this.logger.trace(`📝 DIRECT WRITE: Saving verb ${edge.id} directly (high-volume mode inactive)`);
1197
- }
1198
- // Apply backpressure before starting operation
1199
- const requestId = await this.applyBackpressure();
1200
- try {
1201
- // Convert connections Map to a serializable format
1202
- // CRITICAL: Only save lightweight vector data (no metadata)
1203
- // Metadata is saved separately via saveVerbMetadata() (2-file system)
1204
- // ARCHITECTURAL FIX (v3.50.1): Include core relational fields in verb vector file
1205
- const serializableEdge = {
1206
- id: edge.id,
1207
- vector: edge.vector,
1208
- connections: this.mapToObject(edge.connections, (set) => Array.from(set)),
1209
- // CORE RELATIONAL DATA (v3.50.1+)
1210
- verb: edge.verb,
1211
- sourceId: edge.sourceId,
1212
- targetId: edge.targetId,
1213
- // NO metadata field - saved separately for scalability
1214
- };
1215
- // Import the PutObjectCommand only when needed
1216
- const { PutObjectCommand } = await import('@aws-sdk/client-s3');
1217
- // Save the edge to S3-compatible storage using sharding if available
1218
- await this.s3Client.send(new PutObjectCommand({
1219
- Bucket: this.bucketName,
1220
- Key: this.getVerbKey(edge.id),
1221
- Body: JSON.stringify(serializableEdge, null, 2),
1222
- ContentType: 'application/json'
1223
- }));
1224
- // Log the change for efficient synchronization
1225
- await this.appendToChangeLog({
1226
- timestamp: Date.now(),
1227
- operation: 'add', // Could be 'update' if we track existing edges
1228
- entityType: 'verb',
1229
- entityId: edge.id,
1230
- data: {
1231
- vector: edge.vector
1232
- }
1233
- });
1234
- // Increment verb count - always increment total, and increment by type if metadata exists
1235
- this.totalVerbCount++;
1236
- const metadata = await this.getVerbMetadata(edge.id);
1237
- if (metadata && metadata.type) {
1238
- const currentCount = this.verbCounts.get(metadata.type) || 0;
1239
- this.verbCounts.set(metadata.type, currentCount + 1);
1240
- }
1241
- // Release backpressure on success
1242
- this.releaseBackpressure(true, requestId);
1243
- }
1244
- catch (error) {
1245
- // Release backpressure on error
1246
- this.releaseBackpressure(false, requestId);
1247
- this.logger.error(`Failed to save edge ${edge.id}:`, error);
1248
- throw new Error(`Failed to save edge ${edge.id}: ${error}`);
1249
- }
1250
- }
1251
- /**
1252
- * Get a verb from storage (internal implementation)
1253
- * v4.0.0: Returns ONLY vector + core relational fields (no metadata field)
1254
- * Base class combines with metadata via getVerb() -> HNSWVerbWithMetadata
1255
- */
1256
- async getVerb_internal(id) {
1257
- // v4.0.0: Return ONLY vector + core relational data (no metadata field)
1258
- const edge = await this.getEdge(id);
1259
- if (!edge) {
1260
- return null;
1261
- }
1262
- // Return pure vector + core fields structure
1263
- return edge;
1264
- }
1265
- /**
1266
- * Get an edge from storage
1267
- */
1268
- async getEdge(id) {
1269
- await this.ensureInitialized();
1270
- try {
1271
- // Import the GetObjectCommand only when needed
1272
- const { GetObjectCommand } = await import('@aws-sdk/client-s3');
1273
- const key = this.getVerbKey(id);
1274
- this.logger.trace(`Getting edge ${id} from key: ${key}`);
1275
- // Try to get the edge from the verbs directory
1276
- const response = await this.s3Client.send(new GetObjectCommand({
1277
- Bucket: this.bucketName,
1278
- Key: key
1279
- }));
1280
- // Check if response is null or undefined
1281
- if (!response || !response.Body) {
1282
- this.logger.trace(`No edge found for ${id}`);
1283
- return null;
1284
- }
1285
- // Convert the response body to a string
1286
- const bodyContents = await response.Body.transformToString();
1287
- this.logger.trace(`Retrieved edge body for ${id}`);
1288
- // Parse the JSON string
1289
- try {
1290
- const parsedEdge = JSON.parse(bodyContents);
1291
- this.logger.trace(`Parsed edge data for ${id}`);
1292
- // Ensure the parsed edge has the expected properties
1293
- if (!parsedEdge ||
1294
- !parsedEdge.id ||
1295
- !parsedEdge.vector ||
1296
- !parsedEdge.connections) {
1297
- this.logger.warn(`Invalid edge data for ${id}`);
1298
- return null;
1299
- }
1300
- // Convert serialized connections back to Map<number, Set<string>>
1301
- const connections = new Map();
1302
- for (const [level, nodeIds] of Object.entries(parsedEdge.connections)) {
1303
- connections.set(Number(level), new Set(nodeIds));
1304
- }
1305
- // v4.0.0: Return HNSWVerb with core relational fields (NO metadata field)
1306
- const edge = {
1307
- id: parsedEdge.id,
1308
- vector: parsedEdge.vector,
1309
- connections,
1310
- // CORE RELATIONAL DATA (read from vector file)
1311
- verb: parsedEdge.verb,
1312
- sourceId: parsedEdge.sourceId,
1313
- targetId: parsedEdge.targetId
1314
- // ✅ NO metadata field in v4.0.0
1315
- // User metadata retrieved separately via getVerbMetadata()
1316
- };
1317
- this.logger.trace(`Successfully retrieved edge ${id}`);
1318
- return edge;
1319
- }
1320
- catch (parseError) {
1321
- this.logger.error(`Failed to parse edge data for ${id}:`, parseError);
1322
- return null;
1323
- }
1324
- }
1325
- catch (error) {
1326
- // Edge not found or other error
1327
- this.logger.trace(`Edge not found for ${id}`);
1328
- return null;
1329
- }
1330
- }
1083
+ // v5.4.0: Removed 4 *_internal method overrides (getNounsByNounType_internal, deleteNoun_internal, saveVerb_internal, getVerb_internal)
1084
+ // Now inherit from BaseStorage's type-first implementation
1331
1085
  /**
1332
1086
  * Get all edges from storage
1333
1087
  * @deprecated This method is deprecated and will be removed in a future version.
@@ -1465,174 +1219,9 @@ export class S3CompatibleStorage extends BaseStorage {
1465
1219
  this.logger.trace('Edge filtering is deprecated and not supported with the new storage pattern');
1466
1220
  return true; // Return all edges since filtering requires metadata
1467
1221
  }
1468
- /**
1469
- * Get verbs with pagination
1470
- * @param options Pagination options
1471
- * @returns Promise that resolves to a paginated result of verbs
1472
- */
1473
- async getVerbsWithPagination(options = {}) {
1474
- await this.ensureInitialized();
1475
- // Convert filter to edge filter format
1476
- const edgeFilter = {};
1477
- if (options.filter) {
1478
- // Handle sourceId filter
1479
- if (options.filter.sourceId) {
1480
- edgeFilter.sourceId = Array.isArray(options.filter.sourceId)
1481
- ? options.filter.sourceId[0]
1482
- : options.filter.sourceId;
1483
- }
1484
- // Handle targetId filter
1485
- if (options.filter.targetId) {
1486
- edgeFilter.targetId = Array.isArray(options.filter.targetId)
1487
- ? options.filter.targetId[0]
1488
- : options.filter.targetId;
1489
- }
1490
- // Handle verbType filter
1491
- if (options.filter.verbType) {
1492
- edgeFilter.type = Array.isArray(options.filter.verbType)
1493
- ? options.filter.verbType[0]
1494
- : options.filter.verbType;
1495
- }
1496
- }
1497
- // Get edges with pagination
1498
- const result = await this.getEdgesWithPagination({
1499
- limit: options.limit,
1500
- cursor: options.cursor,
1501
- useCache: true,
1502
- filter: edgeFilter
1503
- });
1504
- // v4.0.0: Convert HNSWVerbs to HNSWVerbWithMetadata by combining with metadata
1505
- const verbsWithMetadata = [];
1506
- for (const hnswVerb of result.edges) {
1507
- const metadata = await this.getVerbMetadata(hnswVerb.id);
1508
- // v4.8.0: Extract standard fields from metadata to top-level
1509
- const metadataObj = (metadata || {});
1510
- const { createdAt, updatedAt, confidence, weight, service, data, createdBy, ...customMetadata } = metadataObj;
1511
- const verbWithMetadata = {
1512
- id: hnswVerb.id,
1513
- vector: [...hnswVerb.vector],
1514
- connections: new Map(hnswVerb.connections),
1515
- verb: hnswVerb.verb,
1516
- sourceId: hnswVerb.sourceId,
1517
- targetId: hnswVerb.targetId,
1518
- createdAt: createdAt || Date.now(),
1519
- updatedAt: updatedAt || Date.now(),
1520
- confidence: confidence,
1521
- weight: weight,
1522
- service: service,
1523
- data: data,
1524
- createdBy,
1525
- metadata: customMetadata
1526
- };
1527
- verbsWithMetadata.push(verbWithMetadata);
1528
- }
1529
- // Apply filtering at HNSWVerbWithMetadata level
1530
- // v4.0.0: Core fields (verb, sourceId, targetId) are in HNSWVerb, not metadata
1531
- let filteredVerbs = verbsWithMetadata;
1532
- if (options.filter) {
1533
- filteredVerbs = verbsWithMetadata.filter((verbWithMetadata) => {
1534
- // Filter by sourceId
1535
- if (options.filter.sourceId) {
1536
- const sourceIds = Array.isArray(options.filter.sourceId)
1537
- ? options.filter.sourceId
1538
- : [options.filter.sourceId];
1539
- if (!verbWithMetadata.sourceId || !sourceIds.includes(verbWithMetadata.sourceId)) {
1540
- return false;
1541
- }
1542
- }
1543
- // Filter by targetId
1544
- if (options.filter.targetId) {
1545
- const targetIds = Array.isArray(options.filter.targetId)
1546
- ? options.filter.targetId
1547
- : [options.filter.targetId];
1548
- if (!verbWithMetadata.targetId || !targetIds.includes(verbWithMetadata.targetId)) {
1549
- return false;
1550
- }
1551
- }
1552
- // Filter by verbType
1553
- if (options.filter.verbType) {
1554
- const verbTypes = Array.isArray(options.filter.verbType)
1555
- ? options.filter.verbType
1556
- : [options.filter.verbType];
1557
- if (!verbWithMetadata.verb || !verbTypes.includes(verbWithMetadata.verb)) {
1558
- return false;
1559
- }
1560
- }
1561
- return true;
1562
- });
1563
- }
1564
- return {
1565
- items: filteredVerbs,
1566
- totalCount: this.totalVerbCount, // Use pre-calculated count from init()
1567
- hasMore: result.hasMore,
1568
- nextCursor: result.nextCursor
1569
- };
1570
- }
1571
- /**
1572
- * Get verbs by source (internal implementation)
1573
- */
1574
- async getVerbsBySource_internal(sourceId) {
1575
- // Use the paginated approach to properly handle HNSWVerb to GraphVerb conversion
1576
- const result = await this.getVerbsWithPagination({
1577
- filter: { sourceId: [sourceId] },
1578
- limit: Number.MAX_SAFE_INTEGER // Get all matching results
1579
- });
1580
- return result.items;
1581
- }
1582
- /**
1583
- * Get verbs by target (internal implementation)
1584
- */
1585
- async getVerbsByTarget_internal(targetId) {
1586
- // Use the paginated approach to properly handle HNSWVerb to GraphVerb conversion
1587
- const result = await this.getVerbsWithPagination({
1588
- filter: { targetId: [targetId] },
1589
- limit: Number.MAX_SAFE_INTEGER // Get all matching results
1590
- });
1591
- return result.items;
1592
- }
1593
- /**
1594
- * Get verbs by type (internal implementation)
1595
- */
1596
- async getVerbsByType_internal(type) {
1597
- // Use the paginated approach to properly handle HNSWVerb to GraphVerb conversion
1598
- const result = await this.getVerbsWithPagination({
1599
- filter: { verbType: [type] },
1600
- limit: Number.MAX_SAFE_INTEGER // Get all matching results
1601
- });
1602
- return result.items;
1603
- }
1604
- /**
1605
- * Delete a verb from storage (internal implementation)
1606
- */
1607
- async deleteVerb_internal(id) {
1608
- return this.deleteEdge(id);
1609
- }
1610
- /**
1611
- * Delete an edge from storage
1612
- */
1613
- async deleteEdge(id) {
1614
- await this.ensureInitialized();
1615
- try {
1616
- // Import the DeleteObjectCommand only when needed
1617
- const { DeleteObjectCommand } = await import('@aws-sdk/client-s3');
1618
- // Delete the edge from S3-compatible storage
1619
- await this.s3Client.send(new DeleteObjectCommand({
1620
- Bucket: this.bucketName,
1621
- Key: `${this.verbPrefix}${id}.json`
1622
- }));
1623
- // Log the change for efficient synchronization
1624
- await this.appendToChangeLog({
1625
- timestamp: Date.now(),
1626
- operation: 'delete',
1627
- entityType: 'verb',
1628
- entityId: id
1629
- });
1630
- }
1631
- catch (error) {
1632
- this.logger.error(`Failed to delete edge ${id}:`, error);
1633
- throw new Error(`Failed to delete edge ${id}: ${error}`);
1634
- }
1635
- }
1222
+ // v5.4.0: Removed getVerbsWithPagination override - use BaseStorage's type-first implementation
1223
+ // v5.4.0: Removed 4 more *_internal method overrides (getVerbsBySource, getVerbsByTarget, getVerbsByType, deleteVerb)
1224
+ // Total: 8 *_internal methods removed - all now inherit from BaseStorage's type-first implementation
1636
1225
  /**
1637
1226
  * Primitive operation: Write object to path
1638
1227
  * All metadata operations use this internally via base class routing
@@ -2885,83 +2474,7 @@ export class S3CompatibleStorage extends BaseStorage {
2885
2474
  this.logger.warn('Failed to cleanup expired locks:', error);
2886
2475
  }
2887
2476
  }
2888
- /**
2889
- * Get nouns with pagination support
2890
- * @param options Pagination options
2891
- * @returns Promise that resolves to a paginated result of nouns
2892
- */
2893
- async getNounsWithPagination(options = {}) {
2894
- await this.ensureInitialized();
2895
- const limit = options.limit || 100;
2896
- const cursor = options.cursor;
2897
- // Get paginated nodes
2898
- const result = await this.getNodesWithPagination({
2899
- limit,
2900
- cursor,
2901
- useCache: true
2902
- });
2903
- // v4.0.0: Combine nodes with metadata to create HNSWNounWithMetadata[]
2904
- const nounsWithMetadata = [];
2905
- for (const node of result.nodes) {
2906
- // FIX v4.7.4: Don't skip nouns without metadata - metadata is optional in v4.0.0
2907
- const metadata = await this.getNounMetadata(node.id);
2908
- // Apply filters if provided
2909
- if (options.filter && metadata) {
2910
- // Filter by noun type
2911
- if (options.filter.nounType) {
2912
- const nounTypes = Array.isArray(options.filter.nounType)
2913
- ? options.filter.nounType
2914
- : [options.filter.nounType];
2915
- const nounType = (metadata.type || metadata.noun);
2916
- if (!nounType || !nounTypes.includes(nounType)) {
2917
- continue;
2918
- }
2919
- }
2920
- // Filter by service
2921
- if (options.filter.service) {
2922
- const services = Array.isArray(options.filter.service)
2923
- ? options.filter.service
2924
- : [options.filter.service];
2925
- if (!metadata.service || !services.includes(metadata.service)) {
2926
- continue;
2927
- }
2928
- }
2929
- // Filter by metadata fields
2930
- if (options.filter.metadata) {
2931
- const metadataFilter = options.filter.metadata;
2932
- const matches = Object.entries(metadataFilter).every(([key, value]) => metadata[key] === value);
2933
- if (!matches) {
2934
- continue;
2935
- }
2936
- }
2937
- }
2938
- // v4.8.0: Extract standard fields from metadata to top-level
2939
- const metadataObj = (metadata || {});
2940
- const { noun: nounType, createdAt, updatedAt, confidence, weight, service, data, createdBy, ...customMetadata } = metadataObj;
2941
- const nounWithMetadata = {
2942
- id: node.id,
2943
- vector: [...node.vector],
2944
- connections: new Map(node.connections),
2945
- level: node.level || 0,
2946
- type: nounType || NounType.Thing,
2947
- createdAt: createdAt || Date.now(),
2948
- updatedAt: updatedAt || Date.now(),
2949
- confidence: confidence,
2950
- weight: weight,
2951
- service: service,
2952
- data: data,
2953
- createdBy,
2954
- metadata: customMetadata
2955
- };
2956
- nounsWithMetadata.push(nounWithMetadata);
2957
- }
2958
- return {
2959
- items: nounsWithMetadata,
2960
- totalCount: this.totalNounCount, // Use pre-calculated count from init()
2961
- hasMore: result.hasMore,
2962
- nextCursor: result.nextCursor
2963
- };
2964
- }
2477
+ // v5.4.0: Removed getNounsWithPagination override - use BaseStorage's type-first implementation
2965
2478
  /**
2966
2479
  * Estimate total noun count by listing objects across all shards
2967
2480
  * This is more efficient than loading all nouns
@@ -3087,116 +2600,84 @@ export class S3CompatibleStorage extends BaseStorage {
3087
2600
  // HNSW Index Persistence (v3.35.0+)
3088
2601
  /**
3089
2602
  * Get a noun's vector for HNSW rebuild
2603
+ * v5.4.0: Uses BaseStorage's getNoun (type-first paths)
3090
2604
  */
3091
2605
  async getNounVector(id) {
3092
- await this.ensureInitialized();
3093
- const noun = await this.getNode(id);
2606
+ const noun = await this.getNoun(id);
3094
2607
  return noun ? noun.vector : null;
3095
2608
  }
3096
2609
  /**
3097
2610
  * Save HNSW graph data for a noun
3098
- * Storage path: entities/nouns/hnsw/{shard}/{id}.json
2611
+ *
2612
+ * v5.4.0: Uses BaseStorage's getNoun/saveNoun (type-first paths)
2613
+ * CRITICAL: Uses mutex locking to prevent read-modify-write races
3099
2614
  */
3100
2615
  async saveHNSWData(nounId, hnswData) {
3101
- await this.ensureInitialized();
3102
- const { PutObjectCommand, GetObjectCommand } = await import('@aws-sdk/client-s3');
3103
- // CRITICAL FIX (v4.7.3): Must preserve existing node data (id, vector) when updating HNSW metadata
3104
- // Previous implementation overwrote the entire file, destroying vector data
3105
- // Now we READ the existing node, UPDATE only connections/level, then WRITE back the complete node
3106
- // CRITICAL FIX (v4.10.1): Optimistic locking with ETags to prevent race conditions
3107
- // Uses S3 IfMatch preconditions - retries with exponential backoff on conflicts
3108
- // Prevents data corruption when multiple entities connect to same neighbor simultaneously
3109
- const shard = getShardIdFromUuid(nounId);
3110
- const key = `entities/nouns/hnsw/${shard}/${nounId}.json`;
3111
- const maxRetries = 5;
3112
- for (let attempt = 0; attempt < maxRetries; attempt++) {
3113
- try {
3114
- // Get current ETag and data
3115
- let currentETag;
3116
- let existingNode = {};
3117
- try {
3118
- const getResponse = await this.s3Client.send(new GetObjectCommand({
3119
- Bucket: this.bucketName,
3120
- Key: key
3121
- }));
3122
- const existingData = await getResponse.Body.transformToString();
3123
- existingNode = JSON.parse(existingData);
3124
- currentETag = getResponse.ETag;
3125
- }
3126
- catch (error) {
3127
- // File doesn't exist yet - will create new
3128
- if (error.name !== 'NoSuchKey' && error.Code !== 'NoSuchKey') {
3129
- throw error;
3130
- }
3131
- }
3132
- // Preserve id and vector, update only HNSW graph metadata
3133
- const updatedNode = {
3134
- ...existingNode, // Preserve all existing fields (id, vector, etc.)
3135
- level: hnswData.level,
3136
- connections: hnswData.connections
3137
- };
3138
- // ATOMIC WRITE: Use ETag precondition
3139
- // If currentETag exists, only write if ETag matches (no concurrent modification)
3140
- // If no ETag, only write if file doesn't exist (IfNoneMatch: *)
3141
- await this.s3Client.send(new PutObjectCommand({
3142
- Bucket: this.bucketName,
3143
- Key: key,
3144
- Body: JSON.stringify(updatedNode, null, 2),
3145
- ContentType: 'application/json',
3146
- ...(currentETag
3147
- ? { IfMatch: currentETag }
3148
- : { IfNoneMatch: '*' }) // Only create if doesn't exist
3149
- }));
3150
- // Success! Exit retry loop
3151
- return;
3152
- }
3153
- catch (error) {
3154
- // Precondition failed - concurrent modification detected
3155
- if (error.name === 'PreconditionFailed' || error.Code === 'PreconditionFailed') {
3156
- if (attempt === maxRetries - 1) {
3157
- this.logger.error(`Max retries (${maxRetries}) exceeded for ${nounId} - concurrent modification conflict`);
3158
- throw new Error(`Failed to save HNSW data for ${nounId}: max retries exceeded due to concurrent modifications`);
3159
- }
3160
- // Exponential backoff: 50ms, 100ms, 200ms, 400ms, 800ms
3161
- const backoffMs = 50 * Math.pow(2, attempt);
3162
- await new Promise(resolve => setTimeout(resolve, backoffMs));
3163
- continue;
3164
- }
3165
- // Other error - rethrow
3166
- this.logger.error(`Failed to save HNSW data for ${nounId}:`, error);
3167
- throw new Error(`Failed to save HNSW data for ${nounId}: ${error}`);
3168
- }
2616
+ const lockKey = `hnsw/${nounId}`;
2617
+ // CRITICAL FIX (v4.10.1): Mutex lock to prevent read-modify-write races
2618
+ // Problem: Without mutex, concurrent operations can:
2619
+ // 1. Thread A reads noun (connections: [1,2,3])
2620
+ // 2. Thread B reads noun (connections: [1,2,3])
2621
+ // 3. Thread A adds connection 4, writes [1,2,3,4]
2622
+ // 4. Thread B adds connection 5, writes [1,2,3,5] Connection 4 LOST!
2623
+ // Solution: Mutex serializes operations per entity (like FileSystem/OPFS adapters)
2624
+ // Production scale: Prevents corruption at 1000+ concurrent operations
2625
+ // Wait for any pending operations on this entity
2626
+ while (this.hnswLocks.has(lockKey)) {
2627
+ await this.hnswLocks.get(lockKey);
2628
+ }
2629
+ // Acquire lock
2630
+ let releaseLock;
2631
+ const lockPromise = new Promise(resolve => { releaseLock = resolve; });
2632
+ this.hnswLocks.set(lockKey, lockPromise);
2633
+ try {
2634
+ // v5.4.0: Use BaseStorage's getNoun (type-first paths)
2635
+ // Read existing noun data (if exists)
2636
+ const existingNoun = await this.getNoun(nounId);
2637
+ if (!existingNoun) {
2638
+ // Noun doesn't exist - cannot update HNSW data for non-existent noun
2639
+ throw new Error(`Cannot save HNSW data: noun ${nounId} not found`);
2640
+ }
2641
+ // Convert connections from Record to Map format for storage
2642
+ const connectionsMap = new Map();
2643
+ for (const [level, nodeIds] of Object.entries(hnswData.connections)) {
2644
+ connectionsMap.set(Number(level), new Set(nodeIds));
2645
+ }
2646
+ // Preserve id and vector, update only HNSW graph metadata
2647
+ const updatedNoun = {
2648
+ ...existingNoun,
2649
+ level: hnswData.level,
2650
+ connections: connectionsMap
2651
+ };
2652
+ // v5.4.0: Use BaseStorage's saveNoun (type-first paths, atomic write via writeObjectToBranch)
2653
+ await this.saveNoun(updatedNoun);
2654
+ }
2655
+ finally {
2656
+ // Release lock (ALWAYS runs, even if error thrown)
2657
+ this.hnswLocks.delete(lockKey);
2658
+ releaseLock();
3169
2659
  }
3170
2660
  }
3171
2661
  /**
3172
2662
  * Get HNSW graph data for a noun
3173
- * Storage path: entities/nouns/hnsw/{shard}/{id}.json
2663
+ * v5.4.0: Uses BaseStorage's getNoun (type-first paths)
3174
2664
  */
3175
2665
  async getHNSWData(nounId) {
3176
- await this.ensureInitialized();
3177
- try {
3178
- const { GetObjectCommand } = await import('@aws-sdk/client-s3');
3179
- const shard = getShardIdFromUuid(nounId);
3180
- const key = `entities/nouns/hnsw/${shard}/${nounId}.json`;
3181
- const response = await this.s3Client.send(new GetObjectCommand({
3182
- Bucket: this.bucketName,
3183
- Key: key
3184
- }));
3185
- if (!response || !response.Body) {
3186
- return null;
3187
- }
3188
- const bodyContents = await response.Body.transformToString();
3189
- return JSON.parse(bodyContents);
2666
+ const noun = await this.getNoun(nounId);
2667
+ if (!noun) {
2668
+ return null;
3190
2669
  }
3191
- catch (error) {
3192
- if (error.name === 'NoSuchKey' ||
3193
- error.message?.includes('NoSuchKey') ||
3194
- error.message?.includes('not found')) {
3195
- return null;
2670
+ // Convert connections from Map to Record format
2671
+ const connectionsRecord = {};
2672
+ if (noun.connections) {
2673
+ for (const [level, nodeIds] of noun.connections.entries()) {
2674
+ connectionsRecord[String(level)] = Array.from(nodeIds);
3196
2675
  }
3197
- this.logger.error(`Failed to get HNSW data for ${nounId}:`, error);
3198
- throw new Error(`Failed to get HNSW data for ${nounId}: ${error}`);
3199
2676
  }
2677
+ return {
2678
+ level: noun.level || 0,
2679
+ connections: connectionsRecord
2680
+ };
3200
2681
  }
3201
2682
  /**
3202
2683
  * Save HNSW system data (entry point, max level)