@soulcraft/brainy 5.3.6 → 5.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +65 -0
- package/dist/brainy.d.ts +61 -0
- package/dist/brainy.js +179 -23
- package/dist/storage/adapters/azureBlobStorage.d.ts +13 -64
- package/dist/storage/adapters/azureBlobStorage.js +78 -388
- package/dist/storage/adapters/fileSystemStorage.d.ts +12 -78
- package/dist/storage/adapters/fileSystemStorage.js +49 -395
- package/dist/storage/adapters/gcsStorage.d.ts +13 -134
- package/dist/storage/adapters/gcsStorage.js +79 -557
- package/dist/storage/adapters/historicalStorageAdapter.d.ts +181 -0
- package/dist/storage/adapters/historicalStorageAdapter.js +332 -0
- package/dist/storage/adapters/memoryStorage.d.ts +4 -113
- package/dist/storage/adapters/memoryStorage.js +34 -471
- package/dist/storage/adapters/opfsStorage.d.ts +14 -127
- package/dist/storage/adapters/opfsStorage.js +44 -693
- package/dist/storage/adapters/r2Storage.d.ts +8 -41
- package/dist/storage/adapters/r2Storage.js +49 -237
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +13 -111
- package/dist/storage/adapters/s3CompatibleStorage.js +77 -596
- package/dist/storage/baseStorage.d.ts +78 -38
- package/dist/storage/baseStorage.js +692 -23
- package/dist/storage/cow/BlobStorage.d.ts +2 -2
- package/dist/storage/cow/BlobStorage.js +4 -4
- package/dist/storage/storageFactory.d.ts +2 -3
- package/dist/storage/storageFactory.js +114 -66
- package/dist/vfs/types.d.ts +6 -2
- package/package.json +1 -1
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
* Uses the AWS S3 client to interact with S3-compatible storage services
|
|
4
4
|
* including Amazon S3, Cloudflare R2, and Google Cloud Storage
|
|
5
5
|
*/
|
|
6
|
-
import { NounType } from '../../coreTypes.js';
|
|
7
6
|
import { BaseStorage, INDEX_DIR, SYSTEM_DIR, STATISTICS_KEY, getDirectoryPath } from '../baseStorage.js';
|
|
8
7
|
import { StorageCompatibilityLayer } from '../backwardCompatibility.js';
|
|
9
8
|
import { StorageOperationExecutors } from '../../utils/operationUtils.js';
|
|
@@ -36,6 +35,12 @@ import { getShardIdFromUuid, getShardIdByIndex, TOTAL_SHARDS } from '../sharding
|
|
|
36
35
|
* - credentials: GCS credentials (accessKeyId and secretAccessKey)
|
|
37
36
|
* - endpoint: GCS endpoint (e.g., 'https://storage.googleapis.com')
|
|
38
37
|
* - bucketName: GCS bucket name
|
|
38
|
+
*
|
|
39
|
+
* v5.4.0: Type-aware storage now built into BaseStorage
|
|
40
|
+
* - Removed 10 *_internal method overrides (now inherit from BaseStorage's type-first implementation)
|
|
41
|
+
* - Removed 2 pagination method overrides (getNounsWithPagination, getVerbsWithPagination)
|
|
42
|
+
* - Updated HNSW methods to use BaseStorage's getNoun/saveNoun (type-first paths)
|
|
43
|
+
* - All operations now use type-first paths: entities/nouns/{type}/vectors/{shard}/{id}.json
|
|
39
44
|
*/
|
|
40
45
|
export class S3CompatibleStorage extends BaseStorage {
|
|
41
46
|
/**
|
|
@@ -80,6 +85,8 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
80
85
|
this.forceHighVolumeMode = false; // Environment variable override
|
|
81
86
|
// Module logger
|
|
82
87
|
this.logger = createModuleLogger('S3Storage');
|
|
88
|
+
// v5.4.0: HNSW mutex locks to prevent read-modify-write races
|
|
89
|
+
this.hnswLocks = new Map();
|
|
83
90
|
// Node cache to avoid redundant API calls
|
|
84
91
|
this.nodeCache = new Map();
|
|
85
92
|
// Batch update timer ID
|
|
@@ -724,12 +731,7 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
724
731
|
// Use adaptive socket manager's batch size
|
|
725
732
|
return this.socketManager.getBatchSize();
|
|
726
733
|
}
|
|
727
|
-
|
|
728
|
-
* Save a noun to storage (internal implementation)
|
|
729
|
-
*/
|
|
730
|
-
async saveNoun_internal(noun) {
|
|
731
|
-
return this.saveNode(noun);
|
|
732
|
-
}
|
|
734
|
+
// v5.4.0: Removed 10 *_internal method overrides (lines 984-2069) - now inherit from BaseStorage's type-first implementation
|
|
733
735
|
/**
|
|
734
736
|
* Save a node to storage
|
|
735
737
|
*/
|
|
@@ -819,20 +821,7 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
819
821
|
throw new Error(`Failed to save node ${node.id}: ${error}`);
|
|
820
822
|
}
|
|
821
823
|
}
|
|
822
|
-
|
|
823
|
-
* Get a noun from storage (internal implementation)
|
|
824
|
-
* v4.0.0: Returns ONLY vector data (no metadata field)
|
|
825
|
-
* Base class combines with metadata via getNoun() -> HNSWNounWithMetadata
|
|
826
|
-
*/
|
|
827
|
-
async getNoun_internal(id) {
|
|
828
|
-
// v4.0.0: Return ONLY vector data (no metadata field)
|
|
829
|
-
const node = await this.getNode(id);
|
|
830
|
-
if (!node) {
|
|
831
|
-
return null;
|
|
832
|
-
}
|
|
833
|
-
// Return pure vector structure
|
|
834
|
-
return node;
|
|
835
|
-
}
|
|
824
|
+
// v5.4.0: Removed getNoun_internal override - uses BaseStorage type-first implementation
|
|
836
825
|
/**
|
|
837
826
|
* Get a node from storage
|
|
838
827
|
*/
|
|
@@ -1091,243 +1080,8 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1091
1080
|
}
|
|
1092
1081
|
return nodes;
|
|
1093
1082
|
}
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
* @param nounType The noun type to filter by
|
|
1097
|
-
* @returns Promise that resolves to an array of nouns of the specified noun type
|
|
1098
|
-
*/
|
|
1099
|
-
async getNounsByNounType_internal(nounType) {
|
|
1100
|
-
return this.getNodesByNounType(nounType);
|
|
1101
|
-
}
|
|
1102
|
-
/**
|
|
1103
|
-
* Get nodes by noun type
|
|
1104
|
-
* @param nounType The noun type to filter by
|
|
1105
|
-
* @returns Promise that resolves to an array of nodes of the specified noun type
|
|
1106
|
-
*/
|
|
1107
|
-
async getNodesByNounType(nounType) {
|
|
1108
|
-
await this.ensureInitialized();
|
|
1109
|
-
try {
|
|
1110
|
-
const filteredNodes = [];
|
|
1111
|
-
let hasMore = true;
|
|
1112
|
-
let cursor = undefined;
|
|
1113
|
-
// Use pagination to process nodes in batches
|
|
1114
|
-
while (hasMore) {
|
|
1115
|
-
// Get a batch of nodes
|
|
1116
|
-
const result = await this.getNodesWithPagination({
|
|
1117
|
-
limit: 100,
|
|
1118
|
-
cursor,
|
|
1119
|
-
useCache: true
|
|
1120
|
-
});
|
|
1121
|
-
// Filter nodes by noun type using metadata
|
|
1122
|
-
for (const node of result.nodes) {
|
|
1123
|
-
const metadata = await this.getMetadata(node.id);
|
|
1124
|
-
if (metadata && metadata.noun === nounType) {
|
|
1125
|
-
filteredNodes.push(node);
|
|
1126
|
-
}
|
|
1127
|
-
}
|
|
1128
|
-
// Update pagination state
|
|
1129
|
-
hasMore = result.hasMore;
|
|
1130
|
-
cursor = result.nextCursor;
|
|
1131
|
-
// Safety check to prevent infinite loops
|
|
1132
|
-
if (!cursor && hasMore) {
|
|
1133
|
-
this.logger.warn('No cursor returned but hasMore is true, breaking loop');
|
|
1134
|
-
break;
|
|
1135
|
-
}
|
|
1136
|
-
}
|
|
1137
|
-
return filteredNodes;
|
|
1138
|
-
}
|
|
1139
|
-
catch (error) {
|
|
1140
|
-
this.logger.error(`Failed to get nodes by noun type ${nounType}:`, error);
|
|
1141
|
-
return [];
|
|
1142
|
-
}
|
|
1143
|
-
}
|
|
1144
|
-
/**
|
|
1145
|
-
* Delete a noun from storage (internal implementation)
|
|
1146
|
-
*/
|
|
1147
|
-
async deleteNoun_internal(id) {
|
|
1148
|
-
return this.deleteNode(id);
|
|
1149
|
-
}
|
|
1150
|
-
/**
|
|
1151
|
-
* Delete a node from storage
|
|
1152
|
-
*/
|
|
1153
|
-
async deleteNode(id) {
|
|
1154
|
-
await this.ensureInitialized();
|
|
1155
|
-
try {
|
|
1156
|
-
// Import the DeleteObjectCommand only when needed
|
|
1157
|
-
const { DeleteObjectCommand } = await import('@aws-sdk/client-s3');
|
|
1158
|
-
// Delete the node from S3-compatible storage
|
|
1159
|
-
await this.s3Client.send(new DeleteObjectCommand({
|
|
1160
|
-
Bucket: this.bucketName,
|
|
1161
|
-
Key: `${this.nounPrefix}${id}.json`
|
|
1162
|
-
}));
|
|
1163
|
-
// Log the change for efficient synchronization
|
|
1164
|
-
await this.appendToChangeLog({
|
|
1165
|
-
timestamp: Date.now(),
|
|
1166
|
-
operation: 'delete',
|
|
1167
|
-
entityType: 'noun',
|
|
1168
|
-
entityId: id
|
|
1169
|
-
});
|
|
1170
|
-
}
|
|
1171
|
-
catch (error) {
|
|
1172
|
-
this.logger.error(`Failed to delete node ${id}:`, error);
|
|
1173
|
-
throw new Error(`Failed to delete node ${id}: ${error}`);
|
|
1174
|
-
}
|
|
1175
|
-
}
|
|
1176
|
-
/**
|
|
1177
|
-
* Save a verb to storage (internal implementation)
|
|
1178
|
-
*/
|
|
1179
|
-
async saveVerb_internal(verb) {
|
|
1180
|
-
return this.saveEdge(verb);
|
|
1181
|
-
}
|
|
1182
|
-
/**
|
|
1183
|
-
* Save an edge to storage
|
|
1184
|
-
*/
|
|
1185
|
-
async saveEdge(edge) {
|
|
1186
|
-
await this.ensureInitialized();
|
|
1187
|
-
// ALWAYS check if we should use high-volume mode (critical for detection)
|
|
1188
|
-
this.checkVolumeMode();
|
|
1189
|
-
// Use write buffer in high-volume mode
|
|
1190
|
-
if (this.highVolumeMode && this.verbWriteBuffer) {
|
|
1191
|
-
this.logger.trace(`📝 BUFFERING: Adding verb ${edge.id} to write buffer (high-volume mode active)`);
|
|
1192
|
-
await this.verbWriteBuffer.add(edge.id, edge);
|
|
1193
|
-
return;
|
|
1194
|
-
}
|
|
1195
|
-
else if (!this.highVolumeMode) {
|
|
1196
|
-
this.logger.trace(`📝 DIRECT WRITE: Saving verb ${edge.id} directly (high-volume mode inactive)`);
|
|
1197
|
-
}
|
|
1198
|
-
// Apply backpressure before starting operation
|
|
1199
|
-
const requestId = await this.applyBackpressure();
|
|
1200
|
-
try {
|
|
1201
|
-
// Convert connections Map to a serializable format
|
|
1202
|
-
// CRITICAL: Only save lightweight vector data (no metadata)
|
|
1203
|
-
// Metadata is saved separately via saveVerbMetadata() (2-file system)
|
|
1204
|
-
// ARCHITECTURAL FIX (v3.50.1): Include core relational fields in verb vector file
|
|
1205
|
-
const serializableEdge = {
|
|
1206
|
-
id: edge.id,
|
|
1207
|
-
vector: edge.vector,
|
|
1208
|
-
connections: this.mapToObject(edge.connections, (set) => Array.from(set)),
|
|
1209
|
-
// CORE RELATIONAL DATA (v3.50.1+)
|
|
1210
|
-
verb: edge.verb,
|
|
1211
|
-
sourceId: edge.sourceId,
|
|
1212
|
-
targetId: edge.targetId,
|
|
1213
|
-
// NO metadata field - saved separately for scalability
|
|
1214
|
-
};
|
|
1215
|
-
// Import the PutObjectCommand only when needed
|
|
1216
|
-
const { PutObjectCommand } = await import('@aws-sdk/client-s3');
|
|
1217
|
-
// Save the edge to S3-compatible storage using sharding if available
|
|
1218
|
-
await this.s3Client.send(new PutObjectCommand({
|
|
1219
|
-
Bucket: this.bucketName,
|
|
1220
|
-
Key: this.getVerbKey(edge.id),
|
|
1221
|
-
Body: JSON.stringify(serializableEdge, null, 2),
|
|
1222
|
-
ContentType: 'application/json'
|
|
1223
|
-
}));
|
|
1224
|
-
// Log the change for efficient synchronization
|
|
1225
|
-
await this.appendToChangeLog({
|
|
1226
|
-
timestamp: Date.now(),
|
|
1227
|
-
operation: 'add', // Could be 'update' if we track existing edges
|
|
1228
|
-
entityType: 'verb',
|
|
1229
|
-
entityId: edge.id,
|
|
1230
|
-
data: {
|
|
1231
|
-
vector: edge.vector
|
|
1232
|
-
}
|
|
1233
|
-
});
|
|
1234
|
-
// Increment verb count - always increment total, and increment by type if metadata exists
|
|
1235
|
-
this.totalVerbCount++;
|
|
1236
|
-
const metadata = await this.getVerbMetadata(edge.id);
|
|
1237
|
-
if (metadata && metadata.type) {
|
|
1238
|
-
const currentCount = this.verbCounts.get(metadata.type) || 0;
|
|
1239
|
-
this.verbCounts.set(metadata.type, currentCount + 1);
|
|
1240
|
-
}
|
|
1241
|
-
// Release backpressure on success
|
|
1242
|
-
this.releaseBackpressure(true, requestId);
|
|
1243
|
-
}
|
|
1244
|
-
catch (error) {
|
|
1245
|
-
// Release backpressure on error
|
|
1246
|
-
this.releaseBackpressure(false, requestId);
|
|
1247
|
-
this.logger.error(`Failed to save edge ${edge.id}:`, error);
|
|
1248
|
-
throw new Error(`Failed to save edge ${edge.id}: ${error}`);
|
|
1249
|
-
}
|
|
1250
|
-
}
|
|
1251
|
-
/**
|
|
1252
|
-
* Get a verb from storage (internal implementation)
|
|
1253
|
-
* v4.0.0: Returns ONLY vector + core relational fields (no metadata field)
|
|
1254
|
-
* Base class combines with metadata via getVerb() -> HNSWVerbWithMetadata
|
|
1255
|
-
*/
|
|
1256
|
-
async getVerb_internal(id) {
|
|
1257
|
-
// v4.0.0: Return ONLY vector + core relational data (no metadata field)
|
|
1258
|
-
const edge = await this.getEdge(id);
|
|
1259
|
-
if (!edge) {
|
|
1260
|
-
return null;
|
|
1261
|
-
}
|
|
1262
|
-
// Return pure vector + core fields structure
|
|
1263
|
-
return edge;
|
|
1264
|
-
}
|
|
1265
|
-
/**
|
|
1266
|
-
* Get an edge from storage
|
|
1267
|
-
*/
|
|
1268
|
-
async getEdge(id) {
|
|
1269
|
-
await this.ensureInitialized();
|
|
1270
|
-
try {
|
|
1271
|
-
// Import the GetObjectCommand only when needed
|
|
1272
|
-
const { GetObjectCommand } = await import('@aws-sdk/client-s3');
|
|
1273
|
-
const key = this.getVerbKey(id);
|
|
1274
|
-
this.logger.trace(`Getting edge ${id} from key: ${key}`);
|
|
1275
|
-
// Try to get the edge from the verbs directory
|
|
1276
|
-
const response = await this.s3Client.send(new GetObjectCommand({
|
|
1277
|
-
Bucket: this.bucketName,
|
|
1278
|
-
Key: key
|
|
1279
|
-
}));
|
|
1280
|
-
// Check if response is null or undefined
|
|
1281
|
-
if (!response || !response.Body) {
|
|
1282
|
-
this.logger.trace(`No edge found for ${id}`);
|
|
1283
|
-
return null;
|
|
1284
|
-
}
|
|
1285
|
-
// Convert the response body to a string
|
|
1286
|
-
const bodyContents = await response.Body.transformToString();
|
|
1287
|
-
this.logger.trace(`Retrieved edge body for ${id}`);
|
|
1288
|
-
// Parse the JSON string
|
|
1289
|
-
try {
|
|
1290
|
-
const parsedEdge = JSON.parse(bodyContents);
|
|
1291
|
-
this.logger.trace(`Parsed edge data for ${id}`);
|
|
1292
|
-
// Ensure the parsed edge has the expected properties
|
|
1293
|
-
if (!parsedEdge ||
|
|
1294
|
-
!parsedEdge.id ||
|
|
1295
|
-
!parsedEdge.vector ||
|
|
1296
|
-
!parsedEdge.connections) {
|
|
1297
|
-
this.logger.warn(`Invalid edge data for ${id}`);
|
|
1298
|
-
return null;
|
|
1299
|
-
}
|
|
1300
|
-
// Convert serialized connections back to Map<number, Set<string>>
|
|
1301
|
-
const connections = new Map();
|
|
1302
|
-
for (const [level, nodeIds] of Object.entries(parsedEdge.connections)) {
|
|
1303
|
-
connections.set(Number(level), new Set(nodeIds));
|
|
1304
|
-
}
|
|
1305
|
-
// v4.0.0: Return HNSWVerb with core relational fields (NO metadata field)
|
|
1306
|
-
const edge = {
|
|
1307
|
-
id: parsedEdge.id,
|
|
1308
|
-
vector: parsedEdge.vector,
|
|
1309
|
-
connections,
|
|
1310
|
-
// CORE RELATIONAL DATA (read from vector file)
|
|
1311
|
-
verb: parsedEdge.verb,
|
|
1312
|
-
sourceId: parsedEdge.sourceId,
|
|
1313
|
-
targetId: parsedEdge.targetId
|
|
1314
|
-
// ✅ NO metadata field in v4.0.0
|
|
1315
|
-
// User metadata retrieved separately via getVerbMetadata()
|
|
1316
|
-
};
|
|
1317
|
-
this.logger.trace(`Successfully retrieved edge ${id}`);
|
|
1318
|
-
return edge;
|
|
1319
|
-
}
|
|
1320
|
-
catch (parseError) {
|
|
1321
|
-
this.logger.error(`Failed to parse edge data for ${id}:`, parseError);
|
|
1322
|
-
return null;
|
|
1323
|
-
}
|
|
1324
|
-
}
|
|
1325
|
-
catch (error) {
|
|
1326
|
-
// Edge not found or other error
|
|
1327
|
-
this.logger.trace(`Edge not found for ${id}`);
|
|
1328
|
-
return null;
|
|
1329
|
-
}
|
|
1330
|
-
}
|
|
1083
|
+
// v5.4.0: Removed 4 *_internal method overrides (getNounsByNounType_internal, deleteNoun_internal, saveVerb_internal, getVerb_internal)
|
|
1084
|
+
// Now inherit from BaseStorage's type-first implementation
|
|
1331
1085
|
/**
|
|
1332
1086
|
* Get all edges from storage
|
|
1333
1087
|
* @deprecated This method is deprecated and will be removed in a future version.
|
|
@@ -1465,174 +1219,9 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1465
1219
|
this.logger.trace('Edge filtering is deprecated and not supported with the new storage pattern');
|
|
1466
1220
|
return true; // Return all edges since filtering requires metadata
|
|
1467
1221
|
}
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
* @returns Promise that resolves to a paginated result of verbs
|
|
1472
|
-
*/
|
|
1473
|
-
async getVerbsWithPagination(options = {}) {
|
|
1474
|
-
await this.ensureInitialized();
|
|
1475
|
-
// Convert filter to edge filter format
|
|
1476
|
-
const edgeFilter = {};
|
|
1477
|
-
if (options.filter) {
|
|
1478
|
-
// Handle sourceId filter
|
|
1479
|
-
if (options.filter.sourceId) {
|
|
1480
|
-
edgeFilter.sourceId = Array.isArray(options.filter.sourceId)
|
|
1481
|
-
? options.filter.sourceId[0]
|
|
1482
|
-
: options.filter.sourceId;
|
|
1483
|
-
}
|
|
1484
|
-
// Handle targetId filter
|
|
1485
|
-
if (options.filter.targetId) {
|
|
1486
|
-
edgeFilter.targetId = Array.isArray(options.filter.targetId)
|
|
1487
|
-
? options.filter.targetId[0]
|
|
1488
|
-
: options.filter.targetId;
|
|
1489
|
-
}
|
|
1490
|
-
// Handle verbType filter
|
|
1491
|
-
if (options.filter.verbType) {
|
|
1492
|
-
edgeFilter.type = Array.isArray(options.filter.verbType)
|
|
1493
|
-
? options.filter.verbType[0]
|
|
1494
|
-
: options.filter.verbType;
|
|
1495
|
-
}
|
|
1496
|
-
}
|
|
1497
|
-
// Get edges with pagination
|
|
1498
|
-
const result = await this.getEdgesWithPagination({
|
|
1499
|
-
limit: options.limit,
|
|
1500
|
-
cursor: options.cursor,
|
|
1501
|
-
useCache: true,
|
|
1502
|
-
filter: edgeFilter
|
|
1503
|
-
});
|
|
1504
|
-
// v4.0.0: Convert HNSWVerbs to HNSWVerbWithMetadata by combining with metadata
|
|
1505
|
-
const verbsWithMetadata = [];
|
|
1506
|
-
for (const hnswVerb of result.edges) {
|
|
1507
|
-
const metadata = await this.getVerbMetadata(hnswVerb.id);
|
|
1508
|
-
// v4.8.0: Extract standard fields from metadata to top-level
|
|
1509
|
-
const metadataObj = (metadata || {});
|
|
1510
|
-
const { createdAt, updatedAt, confidence, weight, service, data, createdBy, ...customMetadata } = metadataObj;
|
|
1511
|
-
const verbWithMetadata = {
|
|
1512
|
-
id: hnswVerb.id,
|
|
1513
|
-
vector: [...hnswVerb.vector],
|
|
1514
|
-
connections: new Map(hnswVerb.connections),
|
|
1515
|
-
verb: hnswVerb.verb,
|
|
1516
|
-
sourceId: hnswVerb.sourceId,
|
|
1517
|
-
targetId: hnswVerb.targetId,
|
|
1518
|
-
createdAt: createdAt || Date.now(),
|
|
1519
|
-
updatedAt: updatedAt || Date.now(),
|
|
1520
|
-
confidence: confidence,
|
|
1521
|
-
weight: weight,
|
|
1522
|
-
service: service,
|
|
1523
|
-
data: data,
|
|
1524
|
-
createdBy,
|
|
1525
|
-
metadata: customMetadata
|
|
1526
|
-
};
|
|
1527
|
-
verbsWithMetadata.push(verbWithMetadata);
|
|
1528
|
-
}
|
|
1529
|
-
// Apply filtering at HNSWVerbWithMetadata level
|
|
1530
|
-
// v4.0.0: Core fields (verb, sourceId, targetId) are in HNSWVerb, not metadata
|
|
1531
|
-
let filteredVerbs = verbsWithMetadata;
|
|
1532
|
-
if (options.filter) {
|
|
1533
|
-
filteredVerbs = verbsWithMetadata.filter((verbWithMetadata) => {
|
|
1534
|
-
// Filter by sourceId
|
|
1535
|
-
if (options.filter.sourceId) {
|
|
1536
|
-
const sourceIds = Array.isArray(options.filter.sourceId)
|
|
1537
|
-
? options.filter.sourceId
|
|
1538
|
-
: [options.filter.sourceId];
|
|
1539
|
-
if (!verbWithMetadata.sourceId || !sourceIds.includes(verbWithMetadata.sourceId)) {
|
|
1540
|
-
return false;
|
|
1541
|
-
}
|
|
1542
|
-
}
|
|
1543
|
-
// Filter by targetId
|
|
1544
|
-
if (options.filter.targetId) {
|
|
1545
|
-
const targetIds = Array.isArray(options.filter.targetId)
|
|
1546
|
-
? options.filter.targetId
|
|
1547
|
-
: [options.filter.targetId];
|
|
1548
|
-
if (!verbWithMetadata.targetId || !targetIds.includes(verbWithMetadata.targetId)) {
|
|
1549
|
-
return false;
|
|
1550
|
-
}
|
|
1551
|
-
}
|
|
1552
|
-
// Filter by verbType
|
|
1553
|
-
if (options.filter.verbType) {
|
|
1554
|
-
const verbTypes = Array.isArray(options.filter.verbType)
|
|
1555
|
-
? options.filter.verbType
|
|
1556
|
-
: [options.filter.verbType];
|
|
1557
|
-
if (!verbWithMetadata.verb || !verbTypes.includes(verbWithMetadata.verb)) {
|
|
1558
|
-
return false;
|
|
1559
|
-
}
|
|
1560
|
-
}
|
|
1561
|
-
return true;
|
|
1562
|
-
});
|
|
1563
|
-
}
|
|
1564
|
-
return {
|
|
1565
|
-
items: filteredVerbs,
|
|
1566
|
-
totalCount: this.totalVerbCount, // Use pre-calculated count from init()
|
|
1567
|
-
hasMore: result.hasMore,
|
|
1568
|
-
nextCursor: result.nextCursor
|
|
1569
|
-
};
|
|
1570
|
-
}
|
|
1571
|
-
/**
|
|
1572
|
-
* Get verbs by source (internal implementation)
|
|
1573
|
-
*/
|
|
1574
|
-
async getVerbsBySource_internal(sourceId) {
|
|
1575
|
-
// Use the paginated approach to properly handle HNSWVerb to GraphVerb conversion
|
|
1576
|
-
const result = await this.getVerbsWithPagination({
|
|
1577
|
-
filter: { sourceId: [sourceId] },
|
|
1578
|
-
limit: Number.MAX_SAFE_INTEGER // Get all matching results
|
|
1579
|
-
});
|
|
1580
|
-
return result.items;
|
|
1581
|
-
}
|
|
1582
|
-
/**
|
|
1583
|
-
* Get verbs by target (internal implementation)
|
|
1584
|
-
*/
|
|
1585
|
-
async getVerbsByTarget_internal(targetId) {
|
|
1586
|
-
// Use the paginated approach to properly handle HNSWVerb to GraphVerb conversion
|
|
1587
|
-
const result = await this.getVerbsWithPagination({
|
|
1588
|
-
filter: { targetId: [targetId] },
|
|
1589
|
-
limit: Number.MAX_SAFE_INTEGER // Get all matching results
|
|
1590
|
-
});
|
|
1591
|
-
return result.items;
|
|
1592
|
-
}
|
|
1593
|
-
/**
|
|
1594
|
-
* Get verbs by type (internal implementation)
|
|
1595
|
-
*/
|
|
1596
|
-
async getVerbsByType_internal(type) {
|
|
1597
|
-
// Use the paginated approach to properly handle HNSWVerb to GraphVerb conversion
|
|
1598
|
-
const result = await this.getVerbsWithPagination({
|
|
1599
|
-
filter: { verbType: [type] },
|
|
1600
|
-
limit: Number.MAX_SAFE_INTEGER // Get all matching results
|
|
1601
|
-
});
|
|
1602
|
-
return result.items;
|
|
1603
|
-
}
|
|
1604
|
-
/**
|
|
1605
|
-
* Delete a verb from storage (internal implementation)
|
|
1606
|
-
*/
|
|
1607
|
-
async deleteVerb_internal(id) {
|
|
1608
|
-
return this.deleteEdge(id);
|
|
1609
|
-
}
|
|
1610
|
-
/**
|
|
1611
|
-
* Delete an edge from storage
|
|
1612
|
-
*/
|
|
1613
|
-
async deleteEdge(id) {
|
|
1614
|
-
await this.ensureInitialized();
|
|
1615
|
-
try {
|
|
1616
|
-
// Import the DeleteObjectCommand only when needed
|
|
1617
|
-
const { DeleteObjectCommand } = await import('@aws-sdk/client-s3');
|
|
1618
|
-
// Delete the edge from S3-compatible storage
|
|
1619
|
-
await this.s3Client.send(new DeleteObjectCommand({
|
|
1620
|
-
Bucket: this.bucketName,
|
|
1621
|
-
Key: `${this.verbPrefix}${id}.json`
|
|
1622
|
-
}));
|
|
1623
|
-
// Log the change for efficient synchronization
|
|
1624
|
-
await this.appendToChangeLog({
|
|
1625
|
-
timestamp: Date.now(),
|
|
1626
|
-
operation: 'delete',
|
|
1627
|
-
entityType: 'verb',
|
|
1628
|
-
entityId: id
|
|
1629
|
-
});
|
|
1630
|
-
}
|
|
1631
|
-
catch (error) {
|
|
1632
|
-
this.logger.error(`Failed to delete edge ${id}:`, error);
|
|
1633
|
-
throw new Error(`Failed to delete edge ${id}: ${error}`);
|
|
1634
|
-
}
|
|
1635
|
-
}
|
|
1222
|
+
// v5.4.0: Removed getVerbsWithPagination override - use BaseStorage's type-first implementation
|
|
1223
|
+
// v5.4.0: Removed 4 more *_internal method overrides (getVerbsBySource, getVerbsByTarget, getVerbsByType, deleteVerb)
|
|
1224
|
+
// Total: 8 *_internal methods removed - all now inherit from BaseStorage's type-first implementation
|
|
1636
1225
|
/**
|
|
1637
1226
|
* Primitive operation: Write object to path
|
|
1638
1227
|
* All metadata operations use this internally via base class routing
|
|
@@ -2885,83 +2474,7 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
2885
2474
|
this.logger.warn('Failed to cleanup expired locks:', error);
|
|
2886
2475
|
}
|
|
2887
2476
|
}
|
|
2888
|
-
|
|
2889
|
-
* Get nouns with pagination support
|
|
2890
|
-
* @param options Pagination options
|
|
2891
|
-
* @returns Promise that resolves to a paginated result of nouns
|
|
2892
|
-
*/
|
|
2893
|
-
async getNounsWithPagination(options = {}) {
|
|
2894
|
-
await this.ensureInitialized();
|
|
2895
|
-
const limit = options.limit || 100;
|
|
2896
|
-
const cursor = options.cursor;
|
|
2897
|
-
// Get paginated nodes
|
|
2898
|
-
const result = await this.getNodesWithPagination({
|
|
2899
|
-
limit,
|
|
2900
|
-
cursor,
|
|
2901
|
-
useCache: true
|
|
2902
|
-
});
|
|
2903
|
-
// v4.0.0: Combine nodes with metadata to create HNSWNounWithMetadata[]
|
|
2904
|
-
const nounsWithMetadata = [];
|
|
2905
|
-
for (const node of result.nodes) {
|
|
2906
|
-
// FIX v4.7.4: Don't skip nouns without metadata - metadata is optional in v4.0.0
|
|
2907
|
-
const metadata = await this.getNounMetadata(node.id);
|
|
2908
|
-
// Apply filters if provided
|
|
2909
|
-
if (options.filter && metadata) {
|
|
2910
|
-
// Filter by noun type
|
|
2911
|
-
if (options.filter.nounType) {
|
|
2912
|
-
const nounTypes = Array.isArray(options.filter.nounType)
|
|
2913
|
-
? options.filter.nounType
|
|
2914
|
-
: [options.filter.nounType];
|
|
2915
|
-
const nounType = (metadata.type || metadata.noun);
|
|
2916
|
-
if (!nounType || !nounTypes.includes(nounType)) {
|
|
2917
|
-
continue;
|
|
2918
|
-
}
|
|
2919
|
-
}
|
|
2920
|
-
// Filter by service
|
|
2921
|
-
if (options.filter.service) {
|
|
2922
|
-
const services = Array.isArray(options.filter.service)
|
|
2923
|
-
? options.filter.service
|
|
2924
|
-
: [options.filter.service];
|
|
2925
|
-
if (!metadata.service || !services.includes(metadata.service)) {
|
|
2926
|
-
continue;
|
|
2927
|
-
}
|
|
2928
|
-
}
|
|
2929
|
-
// Filter by metadata fields
|
|
2930
|
-
if (options.filter.metadata) {
|
|
2931
|
-
const metadataFilter = options.filter.metadata;
|
|
2932
|
-
const matches = Object.entries(metadataFilter).every(([key, value]) => metadata[key] === value);
|
|
2933
|
-
if (!matches) {
|
|
2934
|
-
continue;
|
|
2935
|
-
}
|
|
2936
|
-
}
|
|
2937
|
-
}
|
|
2938
|
-
// v4.8.0: Extract standard fields from metadata to top-level
|
|
2939
|
-
const metadataObj = (metadata || {});
|
|
2940
|
-
const { noun: nounType, createdAt, updatedAt, confidence, weight, service, data, createdBy, ...customMetadata } = metadataObj;
|
|
2941
|
-
const nounWithMetadata = {
|
|
2942
|
-
id: node.id,
|
|
2943
|
-
vector: [...node.vector],
|
|
2944
|
-
connections: new Map(node.connections),
|
|
2945
|
-
level: node.level || 0,
|
|
2946
|
-
type: nounType || NounType.Thing,
|
|
2947
|
-
createdAt: createdAt || Date.now(),
|
|
2948
|
-
updatedAt: updatedAt || Date.now(),
|
|
2949
|
-
confidence: confidence,
|
|
2950
|
-
weight: weight,
|
|
2951
|
-
service: service,
|
|
2952
|
-
data: data,
|
|
2953
|
-
createdBy,
|
|
2954
|
-
metadata: customMetadata
|
|
2955
|
-
};
|
|
2956
|
-
nounsWithMetadata.push(nounWithMetadata);
|
|
2957
|
-
}
|
|
2958
|
-
return {
|
|
2959
|
-
items: nounsWithMetadata,
|
|
2960
|
-
totalCount: this.totalNounCount, // Use pre-calculated count from init()
|
|
2961
|
-
hasMore: result.hasMore,
|
|
2962
|
-
nextCursor: result.nextCursor
|
|
2963
|
-
};
|
|
2964
|
-
}
|
|
2477
|
+
// v5.4.0: Removed getNounsWithPagination override - use BaseStorage's type-first implementation
|
|
2965
2478
|
/**
|
|
2966
2479
|
* Estimate total noun count by listing objects across all shards
|
|
2967
2480
|
* This is more efficient than loading all nouns
|
|
@@ -3087,116 +2600,84 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
3087
2600
|
// HNSW Index Persistence (v3.35.0+)
|
|
3088
2601
|
/**
|
|
3089
2602
|
* Get a noun's vector for HNSW rebuild
|
|
2603
|
+
* v5.4.0: Uses BaseStorage's getNoun (type-first paths)
|
|
3090
2604
|
*/
|
|
3091
2605
|
async getNounVector(id) {
|
|
3092
|
-
await this.
|
|
3093
|
-
const noun = await this.getNode(id);
|
|
2606
|
+
const noun = await this.getNoun(id);
|
|
3094
2607
|
return noun ? noun.vector : null;
|
|
3095
2608
|
}
|
|
3096
2609
|
/**
|
|
3097
2610
|
* Save HNSW graph data for a noun
|
|
3098
|
-
*
|
|
2611
|
+
*
|
|
2612
|
+
* v5.4.0: Uses BaseStorage's getNoun/saveNoun (type-first paths)
|
|
2613
|
+
* CRITICAL: Uses mutex locking to prevent read-modify-write races
|
|
3099
2614
|
*/
|
|
3100
2615
|
async saveHNSWData(nounId, hnswData) {
|
|
3101
|
-
|
|
3102
|
-
|
|
3103
|
-
//
|
|
3104
|
-
//
|
|
3105
|
-
//
|
|
3106
|
-
//
|
|
3107
|
-
//
|
|
3108
|
-
//
|
|
3109
|
-
|
|
3110
|
-
|
|
3111
|
-
|
|
3112
|
-
|
|
3113
|
-
|
|
3114
|
-
|
|
3115
|
-
|
|
3116
|
-
|
|
3117
|
-
|
|
3118
|
-
|
|
3119
|
-
|
|
3120
|
-
|
|
3121
|
-
|
|
3122
|
-
|
|
3123
|
-
|
|
3124
|
-
|
|
3125
|
-
|
|
3126
|
-
|
|
3127
|
-
|
|
3128
|
-
|
|
3129
|
-
|
|
3130
|
-
|
|
3131
|
-
|
|
3132
|
-
|
|
3133
|
-
|
|
3134
|
-
|
|
3135
|
-
|
|
3136
|
-
|
|
3137
|
-
|
|
3138
|
-
|
|
3139
|
-
|
|
3140
|
-
|
|
3141
|
-
|
|
3142
|
-
|
|
3143
|
-
|
|
3144
|
-
Body: JSON.stringify(updatedNode, null, 2),
|
|
3145
|
-
ContentType: 'application/json',
|
|
3146
|
-
...(currentETag
|
|
3147
|
-
? { IfMatch: currentETag }
|
|
3148
|
-
: { IfNoneMatch: '*' }) // Only create if doesn't exist
|
|
3149
|
-
}));
|
|
3150
|
-
// Success! Exit retry loop
|
|
3151
|
-
return;
|
|
3152
|
-
}
|
|
3153
|
-
catch (error) {
|
|
3154
|
-
// Precondition failed - concurrent modification detected
|
|
3155
|
-
if (error.name === 'PreconditionFailed' || error.Code === 'PreconditionFailed') {
|
|
3156
|
-
if (attempt === maxRetries - 1) {
|
|
3157
|
-
this.logger.error(`Max retries (${maxRetries}) exceeded for ${nounId} - concurrent modification conflict`);
|
|
3158
|
-
throw new Error(`Failed to save HNSW data for ${nounId}: max retries exceeded due to concurrent modifications`);
|
|
3159
|
-
}
|
|
3160
|
-
// Exponential backoff: 50ms, 100ms, 200ms, 400ms, 800ms
|
|
3161
|
-
const backoffMs = 50 * Math.pow(2, attempt);
|
|
3162
|
-
await new Promise(resolve => setTimeout(resolve, backoffMs));
|
|
3163
|
-
continue;
|
|
3164
|
-
}
|
|
3165
|
-
// Other error - rethrow
|
|
3166
|
-
this.logger.error(`Failed to save HNSW data for ${nounId}:`, error);
|
|
3167
|
-
throw new Error(`Failed to save HNSW data for ${nounId}: ${error}`);
|
|
3168
|
-
}
|
|
2616
|
+
const lockKey = `hnsw/${nounId}`;
|
|
2617
|
+
// CRITICAL FIX (v4.10.1): Mutex lock to prevent read-modify-write races
|
|
2618
|
+
// Problem: Without mutex, concurrent operations can:
|
|
2619
|
+
// 1. Thread A reads noun (connections: [1,2,3])
|
|
2620
|
+
// 2. Thread B reads noun (connections: [1,2,3])
|
|
2621
|
+
// 3. Thread A adds connection 4, writes [1,2,3,4]
|
|
2622
|
+
// 4. Thread B adds connection 5, writes [1,2,3,5] ← Connection 4 LOST!
|
|
2623
|
+
// Solution: Mutex serializes operations per entity (like FileSystem/OPFS adapters)
|
|
2624
|
+
// Production scale: Prevents corruption at 1000+ concurrent operations
|
|
2625
|
+
// Wait for any pending operations on this entity
|
|
2626
|
+
while (this.hnswLocks.has(lockKey)) {
|
|
2627
|
+
await this.hnswLocks.get(lockKey);
|
|
2628
|
+
}
|
|
2629
|
+
// Acquire lock
|
|
2630
|
+
let releaseLock;
|
|
2631
|
+
const lockPromise = new Promise(resolve => { releaseLock = resolve; });
|
|
2632
|
+
this.hnswLocks.set(lockKey, lockPromise);
|
|
2633
|
+
try {
|
|
2634
|
+
// v5.4.0: Use BaseStorage's getNoun (type-first paths)
|
|
2635
|
+
// Read existing noun data (if exists)
|
|
2636
|
+
const existingNoun = await this.getNoun(nounId);
|
|
2637
|
+
if (!existingNoun) {
|
|
2638
|
+
// Noun doesn't exist - cannot update HNSW data for non-existent noun
|
|
2639
|
+
throw new Error(`Cannot save HNSW data: noun ${nounId} not found`);
|
|
2640
|
+
}
|
|
2641
|
+
// Convert connections from Record to Map format for storage
|
|
2642
|
+
const connectionsMap = new Map();
|
|
2643
|
+
for (const [level, nodeIds] of Object.entries(hnswData.connections)) {
|
|
2644
|
+
connectionsMap.set(Number(level), new Set(nodeIds));
|
|
2645
|
+
}
|
|
2646
|
+
// Preserve id and vector, update only HNSW graph metadata
|
|
2647
|
+
const updatedNoun = {
|
|
2648
|
+
...existingNoun,
|
|
2649
|
+
level: hnswData.level,
|
|
2650
|
+
connections: connectionsMap
|
|
2651
|
+
};
|
|
2652
|
+
// v5.4.0: Use BaseStorage's saveNoun (type-first paths, atomic write via writeObjectToBranch)
|
|
2653
|
+
await this.saveNoun(updatedNoun);
|
|
2654
|
+
}
|
|
2655
|
+
finally {
|
|
2656
|
+
// Release lock (ALWAYS runs, even if error thrown)
|
|
2657
|
+
this.hnswLocks.delete(lockKey);
|
|
2658
|
+
releaseLock();
|
|
3169
2659
|
}
|
|
3170
2660
|
}
|
|
3171
2661
|
/**
|
|
3172
2662
|
* Get HNSW graph data for a noun
|
|
3173
|
-
*
|
|
2663
|
+
* v5.4.0: Uses BaseStorage's getNoun (type-first paths)
|
|
3174
2664
|
*/
|
|
3175
2665
|
async getHNSWData(nounId) {
|
|
3176
|
-
await this.
|
|
3177
|
-
|
|
3178
|
-
|
|
3179
|
-
const shard = getShardIdFromUuid(nounId);
|
|
3180
|
-
const key = `entities/nouns/hnsw/${shard}/${nounId}.json`;
|
|
3181
|
-
const response = await this.s3Client.send(new GetObjectCommand({
|
|
3182
|
-
Bucket: this.bucketName,
|
|
3183
|
-
Key: key
|
|
3184
|
-
}));
|
|
3185
|
-
if (!response || !response.Body) {
|
|
3186
|
-
return null;
|
|
3187
|
-
}
|
|
3188
|
-
const bodyContents = await response.Body.transformToString();
|
|
3189
|
-
return JSON.parse(bodyContents);
|
|
2666
|
+
const noun = await this.getNoun(nounId);
|
|
2667
|
+
if (!noun) {
|
|
2668
|
+
return null;
|
|
3190
2669
|
}
|
|
3191
|
-
|
|
3192
|
-
|
|
3193
|
-
|
|
3194
|
-
|
|
3195
|
-
|
|
2670
|
+
// Convert connections from Map to Record format
|
|
2671
|
+
const connectionsRecord = {};
|
|
2672
|
+
if (noun.connections) {
|
|
2673
|
+
for (const [level, nodeIds] of noun.connections.entries()) {
|
|
2674
|
+
connectionsRecord[String(level)] = Array.from(nodeIds);
|
|
3196
2675
|
}
|
|
3197
|
-
this.logger.error(`Failed to get HNSW data for ${nounId}:`, error);
|
|
3198
|
-
throw new Error(`Failed to get HNSW data for ${nounId}: ${error}`);
|
|
3199
2676
|
}
|
|
2677
|
+
return {
|
|
2678
|
+
level: noun.level || 0,
|
|
2679
|
+
connections: connectionsRecord
|
|
2680
|
+
};
|
|
3200
2681
|
}
|
|
3201
2682
|
/**
|
|
3202
2683
|
* Save HNSW system data (entry point, max level)
|