@soulcraft/brainy 5.3.6 → 5.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +65 -0
- package/dist/brainy.d.ts +61 -0
- package/dist/brainy.js +179 -23
- package/dist/storage/adapters/azureBlobStorage.d.ts +13 -64
- package/dist/storage/adapters/azureBlobStorage.js +78 -388
- package/dist/storage/adapters/fileSystemStorage.d.ts +12 -78
- package/dist/storage/adapters/fileSystemStorage.js +49 -395
- package/dist/storage/adapters/gcsStorage.d.ts +13 -134
- package/dist/storage/adapters/gcsStorage.js +79 -557
- package/dist/storage/adapters/historicalStorageAdapter.d.ts +181 -0
- package/dist/storage/adapters/historicalStorageAdapter.js +332 -0
- package/dist/storage/adapters/memoryStorage.d.ts +4 -113
- package/dist/storage/adapters/memoryStorage.js +34 -471
- package/dist/storage/adapters/opfsStorage.d.ts +14 -127
- package/dist/storage/adapters/opfsStorage.js +44 -693
- package/dist/storage/adapters/r2Storage.d.ts +8 -41
- package/dist/storage/adapters/r2Storage.js +49 -237
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +13 -111
- package/dist/storage/adapters/s3CompatibleStorage.js +77 -596
- package/dist/storage/baseStorage.d.ts +78 -38
- package/dist/storage/baseStorage.js +692 -23
- package/dist/storage/cow/BlobStorage.d.ts +2 -2
- package/dist/storage/cow/BlobStorage.js +4 -4
- package/dist/storage/storageFactory.d.ts +2 -3
- package/dist/storage/storageFactory.js +114 -66
- package/dist/vfs/types.d.ts +6 -2
- package/package.json +1 -1
|
@@ -11,7 +11,6 @@
|
|
|
11
11
|
*
|
|
12
12
|
* v4.0.0: Fully compatible with metadata/vector separation architecture
|
|
13
13
|
*/
|
|
14
|
-
import { NounType } from '../../coreTypes.js';
|
|
15
14
|
import { BaseStorage, SYSTEM_DIR, STATISTICS_KEY, getDirectoryPath } from '../baseStorage.js';
|
|
16
15
|
import { BrainyError } from '../../errors/brainyError.js';
|
|
17
16
|
import { CacheManager } from '../cacheManager.js';
|
|
@@ -31,6 +30,12 @@ const MAX_AZURE_PAGE_SIZE = 5000;
|
|
|
31
30
|
* 2. Connection String - if connectionString provided
|
|
32
31
|
* 3. Storage Account Key - if accountName + accountKey provided
|
|
33
32
|
* 4. SAS Token - if accountName + sasToken provided
|
|
33
|
+
*
|
|
34
|
+
* v5.4.0: Type-aware storage now built into BaseStorage
|
|
35
|
+
* - Removed 10 *_internal method overrides (now inherit from BaseStorage's type-first implementation)
|
|
36
|
+
* - Removed pagination overrides
|
|
37
|
+
* - Updated HNSW methods to use BaseStorage's getNoun/saveNoun (type-first paths)
|
|
38
|
+
* - All operations now use type-first paths: entities/nouns/{type}/vectors/{shard}/{id}.json
|
|
34
39
|
*/
|
|
35
40
|
export class AzureBlobStorage extends BaseStorage {
|
|
36
41
|
/**
|
|
@@ -61,6 +66,8 @@ export class AzureBlobStorage extends BaseStorage {
|
|
|
61
66
|
this.forceHighVolumeMode = false; // Environment variable override
|
|
62
67
|
// Module logger
|
|
63
68
|
this.logger = createModuleLogger('AzureBlobStorage');
|
|
69
|
+
// v5.4.0: HNSW mutex locks to prevent read-modify-write races
|
|
70
|
+
this.hnswLocks = new Map();
|
|
64
71
|
this.containerName = options.containerName;
|
|
65
72
|
this.connectionString = options.connectionString;
|
|
66
73
|
this.accountName = options.accountName;
|
|
@@ -315,12 +322,7 @@ export class AzureBlobStorage extends BaseStorage {
|
|
|
315
322
|
});
|
|
316
323
|
await Promise.all(writes);
|
|
317
324
|
}
|
|
318
|
-
|
|
319
|
-
* Save a noun to storage (internal implementation)
|
|
320
|
-
*/
|
|
321
|
-
async saveNoun_internal(noun) {
|
|
322
|
-
return this.saveNode(noun);
|
|
323
|
-
}
|
|
325
|
+
// v5.4.0: Removed saveNoun_internal - now inherit from BaseStorage's type-first implementation
|
|
324
326
|
/**
|
|
325
327
|
* Save a node to storage
|
|
326
328
|
*/
|
|
@@ -393,20 +395,7 @@ export class AzureBlobStorage extends BaseStorage {
|
|
|
393
395
|
throw new Error(`Failed to save node ${node.id}: ${error}`);
|
|
394
396
|
}
|
|
395
397
|
}
|
|
396
|
-
|
|
397
|
-
* Get a noun from storage (internal implementation)
|
|
398
|
-
* v4.0.0: Returns ONLY vector data (no metadata field)
|
|
399
|
-
* Base class combines with metadata via getNoun() -> HNSWNounWithMetadata
|
|
400
|
-
*/
|
|
401
|
-
async getNoun_internal(id) {
|
|
402
|
-
// v4.0.0: Return ONLY vector data (no metadata field)
|
|
403
|
-
const node = await this.getNode(id);
|
|
404
|
-
if (!node) {
|
|
405
|
-
return null;
|
|
406
|
-
}
|
|
407
|
-
// Return pure vector structure
|
|
408
|
-
return node;
|
|
409
|
-
}
|
|
398
|
+
// v5.4.0: Removed getNoun_internal - now inherit from BaseStorage's type-first implementation
|
|
410
399
|
/**
|
|
411
400
|
* Get a node from storage
|
|
412
401
|
*/
|
|
@@ -490,45 +479,7 @@ export class AzureBlobStorage extends BaseStorage {
|
|
|
490
479
|
throw BrainyError.fromError(error, `getNoun(${id})`);
|
|
491
480
|
}
|
|
492
481
|
}
|
|
493
|
-
|
|
494
|
-
* Delete a noun from storage (internal implementation)
|
|
495
|
-
*/
|
|
496
|
-
async deleteNoun_internal(id) {
|
|
497
|
-
await this.ensureInitialized();
|
|
498
|
-
const requestId = await this.applyBackpressure();
|
|
499
|
-
try {
|
|
500
|
-
this.logger.trace(`Deleting noun ${id}`);
|
|
501
|
-
// Get the Azure blob name
|
|
502
|
-
const blobName = this.getNounKey(id);
|
|
503
|
-
// Delete from Azure
|
|
504
|
-
const blockBlobClient = this.containerClient.getBlockBlobClient(blobName);
|
|
505
|
-
await blockBlobClient.delete();
|
|
506
|
-
// Remove from cache
|
|
507
|
-
this.nounCacheManager.delete(id);
|
|
508
|
-
// Decrement noun count
|
|
509
|
-
const metadata = await this.getNounMetadata(id);
|
|
510
|
-
if (metadata && metadata.type) {
|
|
511
|
-
await this.decrementEntityCountSafe(metadata.type);
|
|
512
|
-
}
|
|
513
|
-
this.logger.trace(`Noun ${id} deleted successfully`);
|
|
514
|
-
this.releaseBackpressure(true, requestId);
|
|
515
|
-
}
|
|
516
|
-
catch (error) {
|
|
517
|
-
this.releaseBackpressure(false, requestId);
|
|
518
|
-
if (error.statusCode === 404 || error.code === 'BlobNotFound') {
|
|
519
|
-
// Already deleted
|
|
520
|
-
this.logger.trace(`Noun ${id} not found (already deleted)`);
|
|
521
|
-
return;
|
|
522
|
-
}
|
|
523
|
-
// Handle throttling
|
|
524
|
-
if (this.isThrottlingError(error)) {
|
|
525
|
-
await this.handleThrottling(error);
|
|
526
|
-
throw error;
|
|
527
|
-
}
|
|
528
|
-
this.logger.error(`Failed to delete noun ${id}:`, error);
|
|
529
|
-
throw new Error(`Failed to delete noun ${id}: ${error}`);
|
|
530
|
-
}
|
|
531
|
-
}
|
|
482
|
+
// v5.4.0: Removed deleteNoun_internal - now inherit from BaseStorage's type-first implementation
|
|
532
483
|
/**
|
|
533
484
|
* Write an object to a specific path in Azure
|
|
534
485
|
* Primitive operation required by base class
|
|
@@ -762,12 +713,7 @@ export class AzureBlobStorage extends BaseStorage {
|
|
|
762
713
|
readableStream.on('error', reject);
|
|
763
714
|
});
|
|
764
715
|
}
|
|
765
|
-
|
|
766
|
-
* Save a verb to storage (internal implementation)
|
|
767
|
-
*/
|
|
768
|
-
async saveVerb_internal(verb) {
|
|
769
|
-
return this.saveEdge(verb);
|
|
770
|
-
}
|
|
716
|
+
// v5.4.0: Removed saveVerb_internal - now inherit from BaseStorage's type-first implementation
|
|
771
717
|
/**
|
|
772
718
|
* Save an edge to storage
|
|
773
719
|
*/
|
|
@@ -832,20 +778,7 @@ export class AzureBlobStorage extends BaseStorage {
|
|
|
832
778
|
throw new Error(`Failed to save edge ${edge.id}: ${error}`);
|
|
833
779
|
}
|
|
834
780
|
}
|
|
835
|
-
|
|
836
|
-
* Get a verb from storage (internal implementation)
|
|
837
|
-
* v4.0.0: Returns ONLY vector + core relational fields (no metadata field)
|
|
838
|
-
* Base class combines with metadata via getVerb() -> HNSWVerbWithMetadata
|
|
839
|
-
*/
|
|
840
|
-
async getVerb_internal(id) {
|
|
841
|
-
// v4.0.0: Return ONLY vector + core relational data (no metadata field)
|
|
842
|
-
const edge = await this.getEdge(id);
|
|
843
|
-
if (!edge) {
|
|
844
|
-
return null;
|
|
845
|
-
}
|
|
846
|
-
// Return pure vector + core fields structure
|
|
847
|
-
return edge;
|
|
848
|
-
}
|
|
781
|
+
// v5.4.0: Removed getVerb_internal - now inherit from BaseStorage's type-first implementation
|
|
849
782
|
/**
|
|
850
783
|
* Get an edge from storage
|
|
851
784
|
*/
|
|
@@ -906,236 +839,10 @@ export class AzureBlobStorage extends BaseStorage {
|
|
|
906
839
|
throw BrainyError.fromError(error, `getVerb(${id})`);
|
|
907
840
|
}
|
|
908
841
|
}
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
await this.ensureInitialized();
|
|
914
|
-
const requestId = await this.applyBackpressure();
|
|
915
|
-
try {
|
|
916
|
-
this.logger.trace(`Deleting verb ${id}`);
|
|
917
|
-
// Get the Azure blob name
|
|
918
|
-
const blobName = this.getVerbKey(id);
|
|
919
|
-
// Delete from Azure
|
|
920
|
-
const blockBlobClient = this.containerClient.getBlockBlobClient(blobName);
|
|
921
|
-
await blockBlobClient.delete();
|
|
922
|
-
// Remove from cache
|
|
923
|
-
this.verbCacheManager.delete(id);
|
|
924
|
-
// Decrement verb count
|
|
925
|
-
const metadata = await this.getVerbMetadata(id);
|
|
926
|
-
if (metadata && metadata.type) {
|
|
927
|
-
await this.decrementVerbCount(metadata.type);
|
|
928
|
-
}
|
|
929
|
-
this.logger.trace(`Verb ${id} deleted successfully`);
|
|
930
|
-
this.releaseBackpressure(true, requestId);
|
|
931
|
-
}
|
|
932
|
-
catch (error) {
|
|
933
|
-
this.releaseBackpressure(false, requestId);
|
|
934
|
-
if (error.statusCode === 404 || error.code === 'BlobNotFound') {
|
|
935
|
-
// Already deleted
|
|
936
|
-
this.logger.trace(`Verb ${id} not found (already deleted)`);
|
|
937
|
-
return;
|
|
938
|
-
}
|
|
939
|
-
if (this.isThrottlingError(error)) {
|
|
940
|
-
await this.handleThrottling(error);
|
|
941
|
-
throw error;
|
|
942
|
-
}
|
|
943
|
-
this.logger.error(`Failed to delete verb ${id}:`, error);
|
|
944
|
-
throw new Error(`Failed to delete verb ${id}: ${error}`);
|
|
945
|
-
}
|
|
946
|
-
}
|
|
947
|
-
/**
|
|
948
|
-
* Get nouns with pagination
|
|
949
|
-
* v4.0.0: Returns HNSWNounWithMetadata[] (includes metadata field)
|
|
950
|
-
* Iterates through all UUID-based shards (00-ff) for consistent pagination
|
|
951
|
-
*/
|
|
952
|
-
async getNounsWithPagination(options = {}) {
|
|
953
|
-
await this.ensureInitialized();
|
|
954
|
-
const limit = options.limit || 100;
|
|
955
|
-
// Simplified implementation for Azure (can be optimized similar to GCS)
|
|
956
|
-
const items = [];
|
|
957
|
-
const iterator = this.containerClient.listBlobsFlat({ prefix: this.nounPrefix });
|
|
958
|
-
let count = 0;
|
|
959
|
-
for await (const blob of iterator) {
|
|
960
|
-
if (count >= limit)
|
|
961
|
-
break;
|
|
962
|
-
if (!blob.name || !blob.name.endsWith('.json'))
|
|
963
|
-
continue;
|
|
964
|
-
// Extract UUID from blob name
|
|
965
|
-
const parts = blob.name.split('/');
|
|
966
|
-
const fileName = parts[parts.length - 1];
|
|
967
|
-
const id = fileName.replace('.json', '');
|
|
968
|
-
const node = await this.getNode(id);
|
|
969
|
-
if (!node)
|
|
970
|
-
continue;
|
|
971
|
-
// FIX v4.7.4: Don't skip nouns without metadata - metadata is optional in v4.0.0
|
|
972
|
-
const metadata = await this.getNounMetadata(id);
|
|
973
|
-
// Apply filters if provided
|
|
974
|
-
if (options.filter) {
|
|
975
|
-
if (options.filter.nounType) {
|
|
976
|
-
const nounTypes = Array.isArray(options.filter.nounType)
|
|
977
|
-
? options.filter.nounType
|
|
978
|
-
: [options.filter.nounType];
|
|
979
|
-
const nounType = metadata.type || metadata.noun;
|
|
980
|
-
if (!nounType || !nounTypes.includes(nounType)) {
|
|
981
|
-
continue;
|
|
982
|
-
}
|
|
983
|
-
}
|
|
984
|
-
}
|
|
985
|
-
// v4.8.0: Extract standard fields from metadata to top-level
|
|
986
|
-
const metadataObj = (metadata || {});
|
|
987
|
-
const { noun: nounType, createdAt, updatedAt, confidence, weight, service, data, createdBy, ...customMetadata } = metadataObj;
|
|
988
|
-
items.push({
|
|
989
|
-
id: node.id,
|
|
990
|
-
vector: node.vector,
|
|
991
|
-
connections: node.connections,
|
|
992
|
-
level: node.level || 0,
|
|
993
|
-
type: nounType || NounType.Thing,
|
|
994
|
-
createdAt: createdAt || Date.now(),
|
|
995
|
-
updatedAt: updatedAt || Date.now(),
|
|
996
|
-
confidence: confidence,
|
|
997
|
-
weight: weight,
|
|
998
|
-
service: service,
|
|
999
|
-
data: data,
|
|
1000
|
-
createdBy,
|
|
1001
|
-
metadata: customMetadata
|
|
1002
|
-
});
|
|
1003
|
-
count++;
|
|
1004
|
-
}
|
|
1005
|
-
return {
|
|
1006
|
-
items,
|
|
1007
|
-
totalCount: this.totalNounCount,
|
|
1008
|
-
hasMore: false,
|
|
1009
|
-
nextCursor: undefined
|
|
1010
|
-
};
|
|
1011
|
-
}
|
|
1012
|
-
/**
|
|
1013
|
-
* Get nouns by noun type (internal implementation)
|
|
1014
|
-
*/
|
|
1015
|
-
async getNounsByNounType_internal(nounType) {
|
|
1016
|
-
const result = await this.getNounsWithPagination({
|
|
1017
|
-
limit: 10000, // Large limit for backward compatibility
|
|
1018
|
-
filter: { nounType }
|
|
1019
|
-
});
|
|
1020
|
-
return result.items;
|
|
1021
|
-
}
|
|
1022
|
-
/**
|
|
1023
|
-
* Get verbs by source ID (internal implementation)
|
|
1024
|
-
*/
|
|
1025
|
-
async getVerbsBySource_internal(sourceId) {
|
|
1026
|
-
// Simplified: scan all verbs and filter
|
|
1027
|
-
const items = [];
|
|
1028
|
-
const iterator = this.containerClient.listBlobsFlat({ prefix: this.verbPrefix });
|
|
1029
|
-
for await (const blob of iterator) {
|
|
1030
|
-
if (!blob.name || !blob.name.endsWith('.json'))
|
|
1031
|
-
continue;
|
|
1032
|
-
const parts = blob.name.split('/');
|
|
1033
|
-
const fileName = parts[parts.length - 1];
|
|
1034
|
-
const id = fileName.replace('.json', '');
|
|
1035
|
-
const verb = await this.getEdge(id);
|
|
1036
|
-
if (!verb || verb.sourceId !== sourceId)
|
|
1037
|
-
continue;
|
|
1038
|
-
const metadata = await this.getVerbMetadata(id);
|
|
1039
|
-
// v4.8.0: Extract standard fields from metadata to top-level
|
|
1040
|
-
const metadataObj = (metadata || {});
|
|
1041
|
-
const { createdAt, updatedAt, confidence, weight, service, data, createdBy, ...customMetadata } = metadataObj;
|
|
1042
|
-
items.push({
|
|
1043
|
-
id: verb.id,
|
|
1044
|
-
vector: verb.vector,
|
|
1045
|
-
connections: verb.connections,
|
|
1046
|
-
verb: verb.verb,
|
|
1047
|
-
sourceId: verb.sourceId,
|
|
1048
|
-
targetId: verb.targetId,
|
|
1049
|
-
createdAt: createdAt || Date.now(),
|
|
1050
|
-
updatedAt: updatedAt || Date.now(),
|
|
1051
|
-
confidence: confidence,
|
|
1052
|
-
weight: weight,
|
|
1053
|
-
service: service,
|
|
1054
|
-
data: data,
|
|
1055
|
-
createdBy,
|
|
1056
|
-
metadata: customMetadata
|
|
1057
|
-
});
|
|
1058
|
-
}
|
|
1059
|
-
return items;
|
|
1060
|
-
}
|
|
1061
|
-
/**
|
|
1062
|
-
* Get verbs by target ID (internal implementation)
|
|
1063
|
-
*/
|
|
1064
|
-
async getVerbsByTarget_internal(targetId) {
|
|
1065
|
-
// Simplified: scan all verbs and filter
|
|
1066
|
-
const items = [];
|
|
1067
|
-
const iterator = this.containerClient.listBlobsFlat({ prefix: this.verbPrefix });
|
|
1068
|
-
for await (const blob of iterator) {
|
|
1069
|
-
if (!blob.name || !blob.name.endsWith('.json'))
|
|
1070
|
-
continue;
|
|
1071
|
-
const parts = blob.name.split('/');
|
|
1072
|
-
const fileName = parts[parts.length - 1];
|
|
1073
|
-
const id = fileName.replace('.json', '');
|
|
1074
|
-
const verb = await this.getEdge(id);
|
|
1075
|
-
if (!verb || verb.targetId !== targetId)
|
|
1076
|
-
continue;
|
|
1077
|
-
const metadata = await this.getVerbMetadata(id);
|
|
1078
|
-
// v4.8.0: Extract standard fields from metadata to top-level
|
|
1079
|
-
const metadataObj = (metadata || {});
|
|
1080
|
-
const { createdAt, updatedAt, confidence, weight, service, data, createdBy, ...customMetadata } = metadataObj;
|
|
1081
|
-
items.push({
|
|
1082
|
-
id: verb.id,
|
|
1083
|
-
vector: verb.vector,
|
|
1084
|
-
connections: verb.connections,
|
|
1085
|
-
verb: verb.verb,
|
|
1086
|
-
sourceId: verb.sourceId,
|
|
1087
|
-
targetId: verb.targetId,
|
|
1088
|
-
createdAt: createdAt || Date.now(),
|
|
1089
|
-
updatedAt: updatedAt || Date.now(),
|
|
1090
|
-
confidence: confidence,
|
|
1091
|
-
weight: weight,
|
|
1092
|
-
service: service,
|
|
1093
|
-
data: data,
|
|
1094
|
-
createdBy,
|
|
1095
|
-
metadata: customMetadata
|
|
1096
|
-
});
|
|
1097
|
-
}
|
|
1098
|
-
return items;
|
|
1099
|
-
}
|
|
1100
|
-
/**
|
|
1101
|
-
* Get verbs by type (internal implementation)
|
|
1102
|
-
*/
|
|
1103
|
-
async getVerbsByType_internal(type) {
|
|
1104
|
-
// Simplified: scan all verbs and filter
|
|
1105
|
-
const items = [];
|
|
1106
|
-
const iterator = this.containerClient.listBlobsFlat({ prefix: this.verbPrefix });
|
|
1107
|
-
for await (const blob of iterator) {
|
|
1108
|
-
if (!blob.name || !blob.name.endsWith('.json'))
|
|
1109
|
-
continue;
|
|
1110
|
-
const parts = blob.name.split('/');
|
|
1111
|
-
const fileName = parts[parts.length - 1];
|
|
1112
|
-
const id = fileName.replace('.json', '');
|
|
1113
|
-
const verb = await this.getEdge(id);
|
|
1114
|
-
if (!verb || verb.verb !== type)
|
|
1115
|
-
continue;
|
|
1116
|
-
const metadata = await this.getVerbMetadata(id);
|
|
1117
|
-
// v4.8.0: Extract standard fields from metadata to top-level
|
|
1118
|
-
const metadataObj = (metadata || {});
|
|
1119
|
-
const { createdAt, updatedAt, confidence, weight, service, data, createdBy, ...customMetadata } = metadataObj;
|
|
1120
|
-
items.push({
|
|
1121
|
-
id: verb.id,
|
|
1122
|
-
vector: verb.vector,
|
|
1123
|
-
connections: verb.connections,
|
|
1124
|
-
verb: verb.verb,
|
|
1125
|
-
sourceId: verb.sourceId,
|
|
1126
|
-
targetId: verb.targetId,
|
|
1127
|
-
createdAt: createdAt || Date.now(),
|
|
1128
|
-
updatedAt: updatedAt || Date.now(),
|
|
1129
|
-
confidence: confidence,
|
|
1130
|
-
weight: weight,
|
|
1131
|
-
service: service,
|
|
1132
|
-
data: data,
|
|
1133
|
-
createdBy,
|
|
1134
|
-
metadata: customMetadata
|
|
1135
|
-
});
|
|
1136
|
-
}
|
|
1137
|
-
return items;
|
|
1138
|
-
}
|
|
842
|
+
// v5.4.0: Removed deleteVerb_internal - now inherit from BaseStorage's type-first implementation
|
|
843
|
+
// v5.4.0: Removed getNounsWithPagination - now inherit from BaseStorage's type-first implementation
|
|
844
|
+
// v5.4.0: Removed getNounsByNounType_internal - now inherit from BaseStorage's type-first implementation
|
|
845
|
+
// v5.4.0: Removed 3 verb query *_internal methods (getVerbsBySource, getVerbsByTarget, getVerbsByType) - now inherit from BaseStorage's type-first implementation
|
|
1139
846
|
/**
|
|
1140
847
|
* Clear all data from storage
|
|
1141
848
|
*/
|
|
@@ -1346,101 +1053,84 @@ export class AzureBlobStorage extends BaseStorage {
|
|
|
1346
1053
|
}
|
|
1347
1054
|
/**
|
|
1348
1055
|
* Get a noun's vector for HNSW rebuild
|
|
1056
|
+
* v5.4.0: Uses BaseStorage's getNoun (type-first paths)
|
|
1349
1057
|
*/
|
|
1350
1058
|
async getNounVector(id) {
|
|
1351
|
-
await this.
|
|
1352
|
-
const noun = await this.getNode(id);
|
|
1059
|
+
const noun = await this.getNoun(id);
|
|
1353
1060
|
return noun ? noun.vector : null;
|
|
1354
1061
|
}
|
|
1355
1062
|
/**
|
|
1356
1063
|
* Save HNSW graph data for a noun
|
|
1064
|
+
*
|
|
1065
|
+
* v5.4.0: Uses BaseStorage's getNoun/saveNoun (type-first paths)
|
|
1066
|
+
* CRITICAL: Uses mutex locking to prevent read-modify-write races
|
|
1357
1067
|
*/
|
|
1358
1068
|
async saveHNSWData(nounId, hnswData) {
|
|
1359
|
-
|
|
1360
|
-
// CRITICAL FIX (v4.
|
|
1361
|
-
//
|
|
1362
|
-
//
|
|
1363
|
-
//
|
|
1364
|
-
//
|
|
1365
|
-
//
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
});
|
|
1403
|
-
// Success! Exit retry loop
|
|
1404
|
-
return;
|
|
1405
|
-
}
|
|
1406
|
-
catch (error) {
|
|
1407
|
-
// Precondition failed - concurrent modification detected
|
|
1408
|
-
if (error.statusCode === 412 || error.code === 'ConditionNotMet') {
|
|
1409
|
-
if (attempt === maxRetries - 1) {
|
|
1410
|
-
this.logger.error(`Max retries (${maxRetries}) exceeded for ${nounId} - concurrent modification conflict`);
|
|
1411
|
-
throw new Error(`Failed to save HNSW data for ${nounId}: max retries exceeded due to concurrent modifications`);
|
|
1412
|
-
}
|
|
1413
|
-
// Exponential backoff: 50ms, 100ms, 200ms, 400ms, 800ms
|
|
1414
|
-
const backoffMs = 50 * Math.pow(2, attempt);
|
|
1415
|
-
await new Promise(resolve => setTimeout(resolve, backoffMs));
|
|
1416
|
-
continue;
|
|
1417
|
-
}
|
|
1418
|
-
// Other error - rethrow
|
|
1419
|
-
this.logger.error(`Failed to save HNSW data for ${nounId}:`, error);
|
|
1420
|
-
throw new Error(`Failed to save HNSW data for ${nounId}: ${error}`);
|
|
1421
|
-
}
|
|
1069
|
+
const lockKey = `hnsw/${nounId}`;
|
|
1070
|
+
// CRITICAL FIX (v4.10.1): Mutex lock to prevent read-modify-write races
|
|
1071
|
+
// Problem: Without mutex, concurrent operations can:
|
|
1072
|
+
// 1. Thread A reads noun (connections: [1,2,3])
|
|
1073
|
+
// 2. Thread B reads noun (connections: [1,2,3])
|
|
1074
|
+
// 3. Thread A adds connection 4, writes [1,2,3,4]
|
|
1075
|
+
// 4. Thread B adds connection 5, writes [1,2,3,5] ← Connection 4 LOST!
|
|
1076
|
+
// Solution: Mutex serializes operations per entity (like FileSystem/OPFS adapters)
|
|
1077
|
+
// Production scale: Prevents corruption at 1000+ concurrent operations
|
|
1078
|
+
// Wait for any pending operations on this entity
|
|
1079
|
+
while (this.hnswLocks.has(lockKey)) {
|
|
1080
|
+
await this.hnswLocks.get(lockKey);
|
|
1081
|
+
}
|
|
1082
|
+
// Acquire lock
|
|
1083
|
+
let releaseLock;
|
|
1084
|
+
const lockPromise = new Promise(resolve => { releaseLock = resolve; });
|
|
1085
|
+
this.hnswLocks.set(lockKey, lockPromise);
|
|
1086
|
+
try {
|
|
1087
|
+
// v5.4.0: Use BaseStorage's getNoun (type-first paths)
|
|
1088
|
+
// Read existing noun data (if exists)
|
|
1089
|
+
const existingNoun = await this.getNoun(nounId);
|
|
1090
|
+
if (!existingNoun) {
|
|
1091
|
+
// Noun doesn't exist - cannot update HNSW data for non-existent noun
|
|
1092
|
+
throw new Error(`Cannot save HNSW data: noun ${nounId} not found`);
|
|
1093
|
+
}
|
|
1094
|
+
// Convert connections from Record to Map format for storage
|
|
1095
|
+
const connectionsMap = new Map();
|
|
1096
|
+
for (const [level, nodeIds] of Object.entries(hnswData.connections)) {
|
|
1097
|
+
connectionsMap.set(Number(level), new Set(nodeIds));
|
|
1098
|
+
}
|
|
1099
|
+
// Preserve id and vector, update only HNSW graph metadata
|
|
1100
|
+
const updatedNoun = {
|
|
1101
|
+
...existingNoun,
|
|
1102
|
+
level: hnswData.level,
|
|
1103
|
+
connections: connectionsMap
|
|
1104
|
+
};
|
|
1105
|
+
// v5.4.0: Use BaseStorage's saveNoun (type-first paths, atomic write via writeObjectToBranch)
|
|
1106
|
+
await this.saveNoun(updatedNoun);
|
|
1107
|
+
}
|
|
1108
|
+
finally {
|
|
1109
|
+
// Release lock (ALWAYS runs, even if error thrown)
|
|
1110
|
+
this.hnswLocks.delete(lockKey);
|
|
1111
|
+
releaseLock();
|
|
1422
1112
|
}
|
|
1423
1113
|
}
|
|
1424
1114
|
/**
|
|
1425
1115
|
* Get HNSW graph data for a noun
|
|
1116
|
+
* v5.4.0: Uses BaseStorage's getNoun (type-first paths)
|
|
1426
1117
|
*/
|
|
1427
1118
|
async getHNSWData(nounId) {
|
|
1428
|
-
await this.
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
const key = `entities/nouns/hnsw/${shard}/${nounId}.json`;
|
|
1432
|
-
const blockBlobClient = this.containerClient.getBlockBlobClient(key);
|
|
1433
|
-
const downloadResponse = await blockBlobClient.download(0);
|
|
1434
|
-
const downloaded = await this.streamToBuffer(downloadResponse.readableStreamBody);
|
|
1435
|
-
return JSON.parse(downloaded.toString());
|
|
1119
|
+
const noun = await this.getNoun(nounId);
|
|
1120
|
+
if (!noun) {
|
|
1121
|
+
return null;
|
|
1436
1122
|
}
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1123
|
+
// Convert connections from Map to Record format
|
|
1124
|
+
const connectionsRecord = {};
|
|
1125
|
+
if (noun.connections) {
|
|
1126
|
+
for (const [level, nodeIds] of noun.connections.entries()) {
|
|
1127
|
+
connectionsRecord[String(level)] = Array.from(nodeIds);
|
|
1440
1128
|
}
|
|
1441
|
-
this.logger.error(`Failed to get HNSW data for ${nounId}:`, error);
|
|
1442
|
-
throw new Error(`Failed to get HNSW data for ${nounId}: ${error}`);
|
|
1443
1129
|
}
|
|
1130
|
+
return {
|
|
1131
|
+
level: noun.level || 0,
|
|
1132
|
+
connections: connectionsRecord
|
|
1133
|
+
};
|
|
1444
1134
|
}
|
|
1445
1135
|
/**
|
|
1446
1136
|
* Save HNSW system data (entry point, max level)
|