@soulcraft/brainy 5.11.1 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +155 -5
- package/README.md +2 -6
- package/dist/api/DataAPI.d.ts +0 -40
- package/dist/api/DataAPI.js +0 -235
- package/dist/brainy.d.ts +28 -106
- package/dist/brainy.js +53 -370
- package/dist/cli/commands/cow.d.ts +1 -9
- package/dist/cli/commands/cow.js +1 -61
- package/dist/cli/commands/data.d.ts +1 -13
- package/dist/cli/commands/data.js +1 -74
- package/dist/cli/index.js +1 -16
- package/dist/neural/embeddedTypeEmbeddings.d.ts +1 -1
- package/dist/neural/embeddedTypeEmbeddings.js +2 -2
- package/dist/storage/adapters/azureBlobStorage.d.ts +21 -7
- package/dist/storage/adapters/azureBlobStorage.js +69 -14
- package/dist/storage/adapters/fileSystemStorage.js +2 -1
- package/dist/storage/adapters/gcsStorage.d.ts +29 -15
- package/dist/storage/adapters/gcsStorage.js +82 -27
- package/dist/storage/adapters/historicalStorageAdapter.js +2 -2
- package/dist/storage/adapters/memoryStorage.d.ts +1 -1
- package/dist/storage/adapters/memoryStorage.js +9 -11
- package/dist/storage/adapters/opfsStorage.js +2 -1
- package/dist/storage/adapters/r2Storage.d.ts +21 -10
- package/dist/storage/adapters/r2Storage.js +73 -17
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +20 -7
- package/dist/storage/adapters/s3CompatibleStorage.js +72 -14
- package/dist/storage/baseStorage.d.ts +153 -24
- package/dist/storage/baseStorage.js +758 -459
- package/dist/vfs/PathResolver.js +6 -2
- package/dist/vfs/VirtualFileSystem.d.ts +46 -24
- package/dist/vfs/VirtualFileSystem.js +176 -156
- package/package.json +1 -1
|
@@ -39,32 +39,36 @@ export function getDirectoryPath(entityType, dataType) {
|
|
|
39
39
|
* Built-in type-aware organization for all storage adapters
|
|
40
40
|
*/
|
|
41
41
|
/**
|
|
42
|
-
* Get
|
|
42
|
+
* Get ID-first path for noun vectors (v6.0.0)
|
|
43
|
+
* No type parameter needed - direct O(1) lookup by ID
|
|
43
44
|
*/
|
|
44
|
-
function getNounVectorPath(
|
|
45
|
+
function getNounVectorPath(id) {
|
|
45
46
|
const shard = getShardIdFromUuid(id);
|
|
46
|
-
return `entities/nouns/${
|
|
47
|
+
return `entities/nouns/${shard}/${id}/vectors.json`;
|
|
47
48
|
}
|
|
48
49
|
/**
|
|
49
|
-
* Get
|
|
50
|
+
* Get ID-first path for noun metadata (v6.0.0)
|
|
51
|
+
* No type parameter needed - direct O(1) lookup by ID
|
|
50
52
|
*/
|
|
51
|
-
function getNounMetadataPath(
|
|
53
|
+
function getNounMetadataPath(id) {
|
|
52
54
|
const shard = getShardIdFromUuid(id);
|
|
53
|
-
return `entities/nouns/${
|
|
55
|
+
return `entities/nouns/${shard}/${id}/metadata.json`;
|
|
54
56
|
}
|
|
55
57
|
/**
|
|
56
|
-
* Get
|
|
58
|
+
* Get ID-first path for verb vectors (v6.0.0)
|
|
59
|
+
* No type parameter needed - direct O(1) lookup by ID
|
|
57
60
|
*/
|
|
58
|
-
function getVerbVectorPath(
|
|
61
|
+
function getVerbVectorPath(id) {
|
|
59
62
|
const shard = getShardIdFromUuid(id);
|
|
60
|
-
return `entities/verbs/${
|
|
63
|
+
return `entities/verbs/${shard}/${id}/vectors.json`;
|
|
61
64
|
}
|
|
62
65
|
/**
|
|
63
|
-
* Get
|
|
66
|
+
* Get ID-first path for verb metadata (v6.0.0)
|
|
67
|
+
* No type parameter needed - direct O(1) lookup by ID
|
|
64
68
|
*/
|
|
65
|
-
function getVerbMetadataPath(
|
|
69
|
+
function getVerbMetadataPath(id) {
|
|
66
70
|
const shard = getShardIdFromUuid(id);
|
|
67
|
-
return `entities/verbs/${
|
|
71
|
+
return `entities/verbs/${shard}/${id}/metadata.json`;
|
|
68
72
|
}
|
|
69
73
|
/**
|
|
70
74
|
* Base storage adapter that implements common functionality
|
|
@@ -89,9 +93,9 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
89
93
|
this.nounCountsByType = new Uint32Array(NOUN_TYPE_COUNT); // 168 bytes (Stage 3: 42 types)
|
|
90
94
|
this.verbCountsByType = new Uint32Array(VERB_TYPE_COUNT); // 508 bytes (Stage 3: 127 types)
|
|
91
95
|
// Total: 676 bytes (99.2% reduction vs Map-based tracking)
|
|
92
|
-
// Type
|
|
93
|
-
|
|
94
|
-
|
|
96
|
+
// v6.0.0: Type caches REMOVED - ID-first paths eliminate need for type lookups!
|
|
97
|
+
// With ID-first architecture, we construct paths directly from IDs: {SHARD}/{ID}/metadata.json
|
|
98
|
+
// Type is just a field in the metadata, indexed by MetadataIndexManager for queries
|
|
95
99
|
// v5.5.0: Track if type counts have been rebuilt (prevent repeated rebuilds)
|
|
96
100
|
this.typeCountsRebuilt = false;
|
|
97
101
|
}
|
|
@@ -176,8 +180,33 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
176
180
|
async init() {
|
|
177
181
|
// Load type statistics from storage (if they exist)
|
|
178
182
|
await this.loadTypeStatistics();
|
|
183
|
+
// v6.0.0: Create GraphAdjacencyIndex (lazy-loaded, no rebuild)
|
|
184
|
+
// LSM-trees are initialized on first use via ensureInitialized()
|
|
185
|
+
// Index is populated incrementally as verbs are added via addVerb()
|
|
186
|
+
try {
|
|
187
|
+
prodLog.debug('[BaseStorage] Creating GraphAdjacencyIndex...');
|
|
188
|
+
this.graphIndex = new GraphAdjacencyIndex(this);
|
|
189
|
+
prodLog.debug(`[BaseStorage] GraphAdjacencyIndex instantiated (lazy-loaded), graphIndex=${!!this.graphIndex}`);
|
|
190
|
+
}
|
|
191
|
+
catch (error) {
|
|
192
|
+
prodLog.error('[BaseStorage] Failed to create GraphAdjacencyIndex:', error);
|
|
193
|
+
throw error;
|
|
194
|
+
}
|
|
179
195
|
this.isInitialized = true;
|
|
180
196
|
}
|
|
197
|
+
/**
|
|
198
|
+
* Rebuild GraphAdjacencyIndex from existing verbs (v6.0.0)
|
|
199
|
+
* Call this manually if you have existing verb data that needs to be indexed
|
|
200
|
+
* @public
|
|
201
|
+
*/
|
|
202
|
+
async rebuildGraphIndex() {
|
|
203
|
+
if (!this.graphIndex) {
|
|
204
|
+
throw new Error('GraphAdjacencyIndex not initialized');
|
|
205
|
+
}
|
|
206
|
+
prodLog.info('[BaseStorage] Rebuilding graph index from existing data...');
|
|
207
|
+
await this.graphIndex.rebuild();
|
|
208
|
+
prodLog.info('[BaseStorage] Graph index rebuild complete');
|
|
209
|
+
}
|
|
181
210
|
/**
|
|
182
211
|
* Ensure the storage adapter is initialized
|
|
183
212
|
*/
|
|
@@ -851,60 +880,44 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
851
880
|
*/
|
|
852
881
|
async getNounsWithPagination(options) {
|
|
853
882
|
await this.ensureInitialized();
|
|
854
|
-
const { limit, offset = 0, filter } = options;
|
|
883
|
+
const { limit, offset = 0, filter } = options;
|
|
855
884
|
const collectedNouns = [];
|
|
856
|
-
const targetCount = offset + limit;
|
|
857
|
-
//
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
for (let i = 0; i < NOUN_TYPE_COUNT && collectedNouns.length < targetCount; i++) {
|
|
862
|
-
// OPTIMIZATION 1: Skip empty types (only if counts are reliable)
|
|
863
|
-
if (useOptimization && this.nounCountsByType[i] === 0) {
|
|
864
|
-
continue;
|
|
865
|
-
}
|
|
866
|
-
const type = TypeUtils.getNounFromIndex(i);
|
|
867
|
-
// If filtering by type, skip other types
|
|
868
|
-
if (filter?.nounType) {
|
|
869
|
-
const filterTypes = Array.isArray(filter.nounType) ? filter.nounType : [filter.nounType];
|
|
870
|
-
if (!filterTypes.includes(type)) {
|
|
871
|
-
continue;
|
|
872
|
-
}
|
|
873
|
-
}
|
|
874
|
-
const typeDir = `entities/nouns/${type}/vectors`;
|
|
885
|
+
const targetCount = offset + limit;
|
|
886
|
+
// v6.0.0: Iterate by shards (0x00-0xFF) instead of types
|
|
887
|
+
for (let shard = 0; shard < 256 && collectedNouns.length < targetCount; shard++) {
|
|
888
|
+
const shardHex = shard.toString(16).padStart(2, '0');
|
|
889
|
+
const shardDir = `entities/nouns/${shardHex}`;
|
|
875
890
|
try {
|
|
876
|
-
|
|
877
|
-
const nounFiles = await this.listObjectsInBranch(typeDir);
|
|
891
|
+
const nounFiles = await this.listObjectsInBranch(shardDir);
|
|
878
892
|
for (const nounPath of nounFiles) {
|
|
879
|
-
|
|
880
|
-
if (collectedNouns.length >= targetCount) {
|
|
893
|
+
if (collectedNouns.length >= targetCount)
|
|
881
894
|
break;
|
|
882
|
-
|
|
883
|
-
// Skip if not a .json file
|
|
884
|
-
if (!nounPath.endsWith('.json'))
|
|
895
|
+
if (!nounPath.includes('/vectors.json'))
|
|
885
896
|
continue;
|
|
886
897
|
try {
|
|
887
|
-
const
|
|
888
|
-
if (
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
const noun = this.deserializeNoun(rawNoun);
|
|
892
|
-
// Load metadata
|
|
893
|
-
const metadataPath = getNounMetadataPath(type, noun.id);
|
|
894
|
-
const metadata = await this.readWithInheritance(metadataPath);
|
|
898
|
+
const noun = await this.readWithInheritance(nounPath);
|
|
899
|
+
if (noun) {
|
|
900
|
+
const deserialized = this.deserializeNoun(noun);
|
|
901
|
+
const metadata = await this.getNounMetadata(deserialized.id);
|
|
895
902
|
if (metadata) {
|
|
896
|
-
// Apply
|
|
903
|
+
// Apply type filter
|
|
904
|
+
if (filter?.nounType && metadata.noun) {
|
|
905
|
+
const types = Array.isArray(filter.nounType) ? filter.nounType : [filter.nounType];
|
|
906
|
+
if (!types.includes(metadata.noun)) {
|
|
907
|
+
continue;
|
|
908
|
+
}
|
|
909
|
+
}
|
|
910
|
+
// Apply service filter
|
|
897
911
|
if (filter?.service) {
|
|
898
912
|
const services = Array.isArray(filter.service) ? filter.service : [filter.service];
|
|
899
913
|
if (metadata.service && !services.includes(metadata.service)) {
|
|
900
914
|
continue;
|
|
901
915
|
}
|
|
902
916
|
}
|
|
903
|
-
// Combine noun + metadata
|
|
917
|
+
// Combine noun + metadata
|
|
904
918
|
collectedNouns.push({
|
|
905
|
-
...
|
|
906
|
-
|
|
907
|
-
type: metadata.noun || type, // Required: Extract type from metadata
|
|
919
|
+
...deserialized,
|
|
920
|
+
type: (metadata.noun || 'thing'),
|
|
908
921
|
confidence: metadata.confidence,
|
|
909
922
|
weight: metadata.weight,
|
|
910
923
|
createdAt: metadata.createdAt
|
|
@@ -927,15 +940,15 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
927
940
|
}
|
|
928
941
|
}
|
|
929
942
|
catch (error) {
|
|
930
|
-
// Skip
|
|
943
|
+
// Skip shards that have no data
|
|
931
944
|
}
|
|
932
945
|
}
|
|
933
|
-
// Apply pagination
|
|
946
|
+
// Apply pagination
|
|
934
947
|
const paginatedNouns = collectedNouns.slice(offset, offset + limit);
|
|
935
|
-
const hasMore = collectedNouns.length > targetCount;
|
|
948
|
+
const hasMore = collectedNouns.length > targetCount;
|
|
936
949
|
return {
|
|
937
950
|
items: paginatedNouns,
|
|
938
|
-
totalCount: collectedNouns.length,
|
|
951
|
+
totalCount: collectedNouns.length,
|
|
939
952
|
hasMore,
|
|
940
953
|
nextCursor: hasMore && paginatedNouns.length > 0
|
|
941
954
|
? paginatedNouns[paginatedNouns.length - 1].id
|
|
@@ -962,58 +975,70 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
962
975
|
const { limit, offset = 0, filter } = options; // cursor intentionally not extracted (not yet implemented)
|
|
963
976
|
const collectedVerbs = [];
|
|
964
977
|
const targetCount = offset + limit; // Early termination target
|
|
965
|
-
//
|
|
966
|
-
const
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
continue;
|
|
980
|
-
}
|
|
981
|
-
}
|
|
978
|
+
// Prepare filter sets for efficient lookup
|
|
979
|
+
const filterVerbTypes = filter?.verbType
|
|
980
|
+
? new Set(Array.isArray(filter.verbType) ? filter.verbType : [filter.verbType])
|
|
981
|
+
: null;
|
|
982
|
+
const filterSourceIds = filter?.sourceId
|
|
983
|
+
? new Set(Array.isArray(filter.sourceId) ? filter.sourceId : [filter.sourceId])
|
|
984
|
+
: null;
|
|
985
|
+
const filterTargetIds = filter?.targetId
|
|
986
|
+
? new Set(Array.isArray(filter.targetId) ? filter.targetId : [filter.targetId])
|
|
987
|
+
: null;
|
|
988
|
+
// v6.0.0: Iterate by shards (0x00-0xFF) instead of types - single pass!
|
|
989
|
+
for (let shard = 0; shard < 256 && collectedVerbs.length < targetCount; shard++) {
|
|
990
|
+
const shardHex = shard.toString(16).padStart(2, '0');
|
|
991
|
+
const shardDir = `entities/verbs/${shardHex}`;
|
|
982
992
|
try {
|
|
983
|
-
const
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
// OPTIMIZATION 2: Early termination (stop when we have enough)
|
|
987
|
-
if (collectedVerbs.length >= targetCount) {
|
|
993
|
+
const verbFiles = await this.listObjectsInBranch(shardDir);
|
|
994
|
+
for (const verbPath of verbFiles) {
|
|
995
|
+
if (collectedVerbs.length >= targetCount)
|
|
988
996
|
break;
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
if (
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
997
|
+
if (!verbPath.includes('/vectors.json'))
|
|
998
|
+
continue;
|
|
999
|
+
try {
|
|
1000
|
+
const rawVerb = await this.readWithInheritance(verbPath);
|
|
1001
|
+
if (!rawVerb)
|
|
1002
|
+
continue;
|
|
1003
|
+
// v6.0.0: Deserialize connections Map from JSON storage format
|
|
1004
|
+
const verb = this.deserializeVerb(rawVerb);
|
|
1005
|
+
// Apply type filter
|
|
1006
|
+
if (filterVerbTypes && !filterVerbTypes.has(verb.verb)) {
|
|
1007
|
+
continue;
|
|
1000
1008
|
}
|
|
1001
|
-
//
|
|
1002
|
-
if (
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
}
|
|
1009
|
+
// Apply sourceId filter
|
|
1010
|
+
if (filterSourceIds && !filterSourceIds.has(verb.sourceId)) {
|
|
1011
|
+
continue;
|
|
1012
|
+
}
|
|
1013
|
+
// Apply targetId filter
|
|
1014
|
+
if (filterTargetIds && !filterTargetIds.has(verb.targetId)) {
|
|
1015
|
+
continue;
|
|
1009
1016
|
}
|
|
1017
|
+
// Load metadata
|
|
1018
|
+
const metadata = await this.getVerbMetadata(verb.id);
|
|
1019
|
+
// Combine verb + metadata
|
|
1020
|
+
collectedVerbs.push({
|
|
1021
|
+
...verb,
|
|
1022
|
+
weight: metadata?.weight,
|
|
1023
|
+
confidence: metadata?.confidence,
|
|
1024
|
+
createdAt: metadata?.createdAt
|
|
1025
|
+
? (typeof metadata.createdAt === 'number' ? metadata.createdAt : metadata.createdAt.seconds * 1000)
|
|
1026
|
+
: Date.now(),
|
|
1027
|
+
updatedAt: metadata?.updatedAt
|
|
1028
|
+
? (typeof metadata.updatedAt === 'number' ? metadata.updatedAt : metadata.updatedAt.seconds * 1000)
|
|
1029
|
+
: Date.now(),
|
|
1030
|
+
service: metadata?.service,
|
|
1031
|
+
createdBy: metadata?.createdBy,
|
|
1032
|
+
metadata: metadata || {}
|
|
1033
|
+
});
|
|
1034
|
+
}
|
|
1035
|
+
catch (error) {
|
|
1036
|
+
// Skip verbs that fail to load
|
|
1010
1037
|
}
|
|
1011
|
-
// Verb passed all filters - add to collection
|
|
1012
|
-
collectedVerbs.push(verb);
|
|
1013
1038
|
}
|
|
1014
1039
|
}
|
|
1015
1040
|
catch (error) {
|
|
1016
|
-
// Skip
|
|
1041
|
+
// Skip shards that have no data
|
|
1017
1042
|
}
|
|
1018
1043
|
}
|
|
1019
1044
|
// Apply pagination (v5.5.0: Efficient slicing after early termination)
|
|
@@ -1386,11 +1411,8 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
1386
1411
|
*/
|
|
1387
1412
|
async saveNounMetadata_internal(id, metadata) {
|
|
1388
1413
|
await this.ensureInitialized();
|
|
1389
|
-
//
|
|
1390
|
-
const
|
|
1391
|
-
this.nounTypeCache.set(id, type);
|
|
1392
|
-
// v5.4.0: Use type-first path
|
|
1393
|
-
const path = getNounMetadataPath(type, id);
|
|
1414
|
+
// v6.0.0: ID-first path - no type needed!
|
|
1415
|
+
const path = getNounMetadataPath(id);
|
|
1394
1416
|
// Determine if this is a new entity by checking if metadata already exists
|
|
1395
1417
|
const existingMetadata = await this.readWithInheritance(path);
|
|
1396
1418
|
const isNew = !existingMetadata;
|
|
@@ -1411,6 +1433,17 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
1411
1433
|
/**
|
|
1412
1434
|
* Get noun metadata from storage (METADATA-ONLY, NO VECTORS)
|
|
1413
1435
|
*
|
|
1436
|
+
* **Performance (v6.0.0)**: Direct O(1) ID-first lookup - NO type search needed!
|
|
1437
|
+
* - **All lookups**: 1 read, ~500ms on cloud (consistent performance)
|
|
1438
|
+
* - **No cache needed**: Type is in the metadata, not the path
|
|
1439
|
+
* - **No type search**: ID-first paths eliminate 42-type search entirely
|
|
1440
|
+
*
|
|
1441
|
+
* **Clean architecture (v6.0.0)**:
|
|
1442
|
+
* - Path: `entities/nouns/{SHARD}/{ID}/metadata.json`
|
|
1443
|
+
* - Type is just a field in metadata (`noun: "document"`)
|
|
1444
|
+
* - MetadataIndex handles type queries (no path scanning needed)
|
|
1445
|
+
* - Scales to billions without any overhead
|
|
1446
|
+
*
|
|
1414
1447
|
* **Performance (v5.11.1)**: Fast path for metadata-only reads
|
|
1415
1448
|
* - **Speed**: 10ms vs 43ms (76-81% faster than getNoun)
|
|
1416
1449
|
* - **Bandwidth**: 300 bytes vs 6KB (95% less)
|
|
@@ -1440,71 +1473,236 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
1440
1473
|
* @returns Metadata or null if not found
|
|
1441
1474
|
*
|
|
1442
1475
|
* @performance
|
|
1443
|
-
* -
|
|
1444
|
-
* -
|
|
1445
|
-
* -
|
|
1476
|
+
* - O(1) direct ID lookup - always 1 read (~500ms on cloud, ~10ms local)
|
|
1477
|
+
* - No caching complexity
|
|
1478
|
+
* - No type search fallbacks
|
|
1479
|
+
* - Works in distributed systems without sync issues
|
|
1446
1480
|
*
|
|
1447
1481
|
* @since v4.0.0
|
|
1448
|
-
* @since v5.4.0 - Type-first paths
|
|
1482
|
+
* @since v5.4.0 - Type-first paths (removed in v6.0.0)
|
|
1449
1483
|
* @since v5.11.1 - Promoted to fast path for brain.get() optimization
|
|
1484
|
+
* @since v6.0.0 - CLEAN FIX: ID-first paths eliminate all type-search complexity
|
|
1450
1485
|
*/
|
|
1451
1486
|
async getNounMetadata(id) {
|
|
1452
1487
|
await this.ensureInitialized();
|
|
1453
|
-
//
|
|
1454
|
-
const
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1488
|
+
// v6.0.0: Clean, simple, O(1) lookup - no type needed!
|
|
1489
|
+
const path = getNounMetadataPath(id);
|
|
1490
|
+
return this.readWithInheritance(path);
|
|
1491
|
+
}
|
|
1492
|
+
/**
|
|
1493
|
+
* Batch fetch noun metadata from storage (v5.12.0 - Cloud Storage Optimization)
|
|
1494
|
+
*
|
|
1495
|
+
* **Performance**: Reduces N sequential calls → 1-2 batch calls
|
|
1496
|
+
* - Local storage: N × 10ms → 1 × 10ms parallel (N× faster)
|
|
1497
|
+
* - Cloud storage: N × 300ms → 1 × 300ms batch (N× faster)
|
|
1498
|
+
*
|
|
1499
|
+
* **Use cases:**
|
|
1500
|
+
* - VFS tree traversal (fetch all children at once)
|
|
1501
|
+
* - brain.find() result hydration (batch load entities)
|
|
1502
|
+
* - brain.getRelations() target entities (eliminate N+1)
|
|
1503
|
+
* - Import operations (batch existence checks)
|
|
1504
|
+
*
|
|
1505
|
+
* @param ids Array of entity IDs to fetch
|
|
1506
|
+
* @returns Map of id → metadata (only successful fetches included)
|
|
1507
|
+
*
|
|
1508
|
+
* @example
|
|
1509
|
+
* ```typescript
|
|
1510
|
+
* // Before (N+1 pattern)
|
|
1511
|
+
* for (const id of ids) {
|
|
1512
|
+
* const metadata = await storage.getNounMetadata(id) // N calls
|
|
1513
|
+
* }
|
|
1514
|
+
*
|
|
1515
|
+
* // After (batched)
|
|
1516
|
+
* const metadataMap = await storage.getNounMetadataBatch(ids) // 1 call
|
|
1517
|
+
* for (const id of ids) {
|
|
1518
|
+
* const metadata = metadataMap.get(id)
|
|
1519
|
+
* }
|
|
1520
|
+
* ```
|
|
1521
|
+
*
|
|
1522
|
+
* @since v5.12.0
|
|
1523
|
+
*/
|
|
1524
|
+
async getNounMetadataBatch(ids) {
|
|
1525
|
+
await this.ensureInitialized();
|
|
1526
|
+
const results = new Map();
|
|
1527
|
+
if (ids.length === 0)
|
|
1528
|
+
return results;
|
|
1529
|
+
// v6.0.0: ID-first paths - no type grouping or search needed!
|
|
1530
|
+
// Build direct paths for all IDs
|
|
1531
|
+
const pathsToFetch = ids.map(id => ({
|
|
1532
|
+
path: getNounMetadataPath(id),
|
|
1533
|
+
id
|
|
1534
|
+
}));
|
|
1535
|
+
// Batch read all paths (uses adapter's native batch API or parallel fallback)
|
|
1536
|
+
const batchResults = await this.readBatchWithInheritance(pathsToFetch.map(p => p.path));
|
|
1537
|
+
// Map results back to IDs
|
|
1538
|
+
for (const { path, id } of pathsToFetch) {
|
|
1539
|
+
const metadata = batchResults.get(path);
|
|
1540
|
+
if (metadata) {
|
|
1541
|
+
results.set(id, metadata);
|
|
1473
1542
|
}
|
|
1474
1543
|
}
|
|
1475
|
-
return
|
|
1544
|
+
return results;
|
|
1476
1545
|
}
|
|
1477
1546
|
/**
|
|
1478
|
-
*
|
|
1479
|
-
*
|
|
1547
|
+
* Batch read multiple storage paths with COW inheritance support (v5.12.0)
|
|
1548
|
+
*
|
|
1549
|
+
* Core batching primitive that all batch operations build upon.
|
|
1550
|
+
* Handles write cache, branch inheritance, and adapter-specific batching.
|
|
1551
|
+
*
|
|
1552
|
+
* **Performance**:
|
|
1553
|
+
* - Uses adapter's native batch API when available (GCS, S3, Azure)
|
|
1554
|
+
* - Falls back to parallel reads for non-batch adapters
|
|
1555
|
+
* - Respects rate limits via StorageBatchConfig
|
|
1556
|
+
*
|
|
1557
|
+
* @param paths Array of storage paths to read
|
|
1558
|
+
* @param branch Optional branch (defaults to current branch)
|
|
1559
|
+
* @returns Map of path → data (only successful reads included)
|
|
1560
|
+
*
|
|
1561
|
+
* @protected - Available to subclasses and batch operations
|
|
1562
|
+
* @since v5.12.0
|
|
1480
1563
|
*/
|
|
1481
|
-
async
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
const
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
this.
|
|
1490
|
-
|
|
1564
|
+
async readBatchWithInheritance(paths, branch) {
|
|
1565
|
+
if (paths.length === 0)
|
|
1566
|
+
return new Map();
|
|
1567
|
+
const targetBranch = branch || this.currentBranch || 'main';
|
|
1568
|
+
const results = new Map();
|
|
1569
|
+
// Resolve all paths to branch-specific paths
|
|
1570
|
+
const branchPaths = paths.map(path => ({
|
|
1571
|
+
original: path,
|
|
1572
|
+
resolved: this.resolveBranchPath(path, targetBranch)
|
|
1573
|
+
}));
|
|
1574
|
+
// Step 1: Check write cache first (synchronous, instant)
|
|
1575
|
+
const pathsToFetch = [];
|
|
1576
|
+
const pathMapping = new Map(); // resolved → original
|
|
1577
|
+
for (const { original, resolved } of branchPaths) {
|
|
1578
|
+
const cachedData = this.writeCache.get(resolved);
|
|
1579
|
+
if (cachedData !== undefined) {
|
|
1580
|
+
results.set(original, cachedData);
|
|
1581
|
+
}
|
|
1582
|
+
else {
|
|
1583
|
+
pathsToFetch.push(resolved);
|
|
1584
|
+
pathMapping.set(resolved, original);
|
|
1585
|
+
}
|
|
1491
1586
|
}
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1587
|
+
if (pathsToFetch.length === 0) {
|
|
1588
|
+
return results; // All in write cache
|
|
1589
|
+
}
|
|
1590
|
+
// Step 2: Batch read from adapter
|
|
1591
|
+
// Check if adapter supports native batch operations
|
|
1592
|
+
const batchData = await this.readBatchFromAdapter(pathsToFetch);
|
|
1593
|
+
// Step 3: Process results and handle inheritance for missing items
|
|
1594
|
+
const missingPaths = [];
|
|
1595
|
+
for (const [resolvedPath, data] of batchData.entries()) {
|
|
1596
|
+
const originalPath = pathMapping.get(resolvedPath);
|
|
1597
|
+
if (originalPath && data !== null) {
|
|
1598
|
+
results.set(originalPath, data);
|
|
1599
|
+
}
|
|
1600
|
+
}
|
|
1601
|
+
// Identify paths that weren't found
|
|
1602
|
+
for (const resolvedPath of pathsToFetch) {
|
|
1603
|
+
if (!batchData.has(resolvedPath) || batchData.get(resolvedPath) === null) {
|
|
1604
|
+
missingPaths.push(pathMapping.get(resolvedPath));
|
|
1605
|
+
}
|
|
1606
|
+
}
|
|
1607
|
+
// Step 4: Handle COW inheritance for missing items (if not on main branch)
|
|
1608
|
+
if (targetBranch !== 'main' && missingPaths.length > 0) {
|
|
1609
|
+
// For now, fall back to individual inheritance lookups
|
|
1610
|
+
// TODO v5.13.0: Optimize inheritance with batch commit walks
|
|
1611
|
+
for (const originalPath of missingPaths) {
|
|
1612
|
+
try {
|
|
1613
|
+
const data = await this.readWithInheritance(originalPath, targetBranch);
|
|
1614
|
+
if (data !== null) {
|
|
1615
|
+
results.set(originalPath, data);
|
|
1616
|
+
}
|
|
1617
|
+
}
|
|
1618
|
+
catch (error) {
|
|
1619
|
+
// Skip failed reads (they won't be in results map)
|
|
1502
1620
|
}
|
|
1503
1621
|
}
|
|
1622
|
+
}
|
|
1623
|
+
return results;
|
|
1624
|
+
}
|
|
1625
|
+
/**
|
|
1626
|
+
* Adapter-level batch read with automatic batching strategy (v5.12.0)
|
|
1627
|
+
*
|
|
1628
|
+
* Uses adapter's native batch API when available:
|
|
1629
|
+
* - GCS: batch API (100 ops)
|
|
1630
|
+
* - S3/R2: batch operations (1000 ops)
|
|
1631
|
+
* - Azure: batch API (100 ops)
|
|
1632
|
+
* - Others: parallel reads via Promise.all()
|
|
1633
|
+
*
|
|
1634
|
+
* Automatically chunks large batches based on adapter's maxBatchSize.
|
|
1635
|
+
*
|
|
1636
|
+
* @param paths Array of resolved storage paths
|
|
1637
|
+
* @returns Map of path → data
|
|
1638
|
+
*
|
|
1639
|
+
* @private
|
|
1640
|
+
* @since v5.12.0
|
|
1641
|
+
*/
|
|
1642
|
+
async readBatchFromAdapter(paths) {
|
|
1643
|
+
if (paths.length === 0)
|
|
1644
|
+
return new Map();
|
|
1645
|
+
// Check if this class implements batch operations (will be added to cloud adapters)
|
|
1646
|
+
const selfWithBatch = this;
|
|
1647
|
+
if (typeof selfWithBatch.readBatch === 'function') {
|
|
1648
|
+
// Adapter has native batch support - use it
|
|
1649
|
+
try {
|
|
1650
|
+
return await selfWithBatch.readBatch(paths);
|
|
1651
|
+
}
|
|
1504
1652
|
catch (error) {
|
|
1505
|
-
//
|
|
1653
|
+
// Fall back to parallel reads on batch failure
|
|
1654
|
+
prodLog.warn(`Batch read failed, falling back to parallel: ${error}`);
|
|
1506
1655
|
}
|
|
1507
1656
|
}
|
|
1657
|
+
// Fallback: Parallel individual reads
|
|
1658
|
+
// Respect adapter's maxConcurrent limit
|
|
1659
|
+
const batchConfig = this.getBatchConfig();
|
|
1660
|
+
const chunkSize = batchConfig.maxConcurrent || 50;
|
|
1661
|
+
const results = new Map();
|
|
1662
|
+
for (let i = 0; i < paths.length; i += chunkSize) {
|
|
1663
|
+
const chunk = paths.slice(i, i + chunkSize);
|
|
1664
|
+
const chunkResults = await Promise.allSettled(chunk.map(async (path) => ({
|
|
1665
|
+
path,
|
|
1666
|
+
data: await this.readObjectFromPath(path)
|
|
1667
|
+
})));
|
|
1668
|
+
for (const result of chunkResults) {
|
|
1669
|
+
if (result.status === 'fulfilled' && result.value.data !== null) {
|
|
1670
|
+
results.set(result.value.path, result.value.data);
|
|
1671
|
+
}
|
|
1672
|
+
}
|
|
1673
|
+
}
|
|
1674
|
+
return results;
|
|
1675
|
+
}
|
|
1676
|
+
/**
|
|
1677
|
+
* Get batch configuration for this storage adapter (v5.12.0)
|
|
1678
|
+
*
|
|
1679
|
+
* Override in subclasses to provide adapter-specific batch limits.
|
|
1680
|
+
* Defaults to conservative limits for safety.
|
|
1681
|
+
*
|
|
1682
|
+
* @public - Inherited from BaseStorageAdapter
|
|
1683
|
+
* @since v5.12.0
|
|
1684
|
+
*/
|
|
1685
|
+
getBatchConfig() {
|
|
1686
|
+
// Conservative defaults - adapters should override with their actual limits
|
|
1687
|
+
return {
|
|
1688
|
+
maxBatchSize: 100,
|
|
1689
|
+
batchDelayMs: 0,
|
|
1690
|
+
maxConcurrent: 50,
|
|
1691
|
+
supportsParallelWrites: true,
|
|
1692
|
+
rateLimit: {
|
|
1693
|
+
operationsPerSecond: 1000,
|
|
1694
|
+
burstCapacity: 5000
|
|
1695
|
+
}
|
|
1696
|
+
};
|
|
1697
|
+
}
|
|
1698
|
+
/**
|
|
1699
|
+
* Delete noun metadata from storage (v6.0.0: ID-first, O(1) delete)
|
|
1700
|
+
*/
|
|
1701
|
+
async deleteNounMetadata(id) {
|
|
1702
|
+
await this.ensureInitialized();
|
|
1703
|
+
// v6.0.0: Direct O(1) delete with ID-first path
|
|
1704
|
+
const path = getNounMetadataPath(id);
|
|
1705
|
+
await this.deleteObjectFromBranch(path);
|
|
1508
1706
|
}
|
|
1509
1707
|
/**
|
|
1510
1708
|
* Save verb metadata to storage (v4.0.0: now typed)
|
|
@@ -1516,7 +1714,7 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
1516
1714
|
}
|
|
1517
1715
|
/**
|
|
1518
1716
|
* Internal method for saving verb metadata (v4.0.0: now typed)
|
|
1519
|
-
* v5.4.0: Uses
|
|
1717
|
+
* v5.4.0: Uses ID-first paths (must match getVerbMetadata)
|
|
1520
1718
|
*
|
|
1521
1719
|
* CRITICAL (v4.1.2): Count synchronization happens here
|
|
1522
1720
|
* This ensures verb counts are updated AFTER metadata exists, fixing the race condition
|
|
@@ -1528,7 +1726,7 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
1528
1726
|
*/
|
|
1529
1727
|
async saveVerbMetadata_internal(id, metadata) {
|
|
1530
1728
|
await this.ensureInitialized();
|
|
1531
|
-
// v5.4.0: Extract verb type from metadata for
|
|
1729
|
+
// v5.4.0: Extract verb type from metadata for ID-first path
|
|
1532
1730
|
const verbType = metadata.verb;
|
|
1533
1731
|
if (!verbType) {
|
|
1534
1732
|
// Backward compatibility: fallback to old path if no verb type
|
|
@@ -1536,15 +1734,14 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
1536
1734
|
await this.writeObjectToBranch(keyInfo.fullPath, metadata);
|
|
1537
1735
|
return;
|
|
1538
1736
|
}
|
|
1539
|
-
// v5.4.0: Use
|
|
1540
|
-
const path = getVerbMetadataPath(
|
|
1737
|
+
// v5.4.0: Use ID-first path
|
|
1738
|
+
const path = getVerbMetadataPath(id);
|
|
1541
1739
|
// Determine if this is a new verb by checking if metadata already exists
|
|
1542
1740
|
const existingMetadata = await this.readWithInheritance(path);
|
|
1543
1741
|
const isNew = !existingMetadata;
|
|
1544
1742
|
// Save the metadata (COW-aware - writes to branch-specific path)
|
|
1545
1743
|
await this.writeObjectToBranch(path, metadata);
|
|
1546
1744
|
// v5.4.0: Cache verb type for faster lookups
|
|
1547
|
-
this.verbTypeCache.set(id, verbType);
|
|
1548
1745
|
// CRITICAL FIX (v4.1.2): Increment verb count for new relationships
|
|
1549
1746
|
// This runs AFTER metadata is saved
|
|
1550
1747
|
// Uses synchronous increment since storage operations are already serialized
|
|
@@ -1559,69 +1756,34 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
1559
1756
|
}
|
|
1560
1757
|
/**
|
|
1561
1758
|
* Get verb metadata from storage (v4.0.0: now typed)
|
|
1562
|
-
* v5.4.0: Uses
|
|
1759
|
+
* v5.4.0: Uses ID-first paths (must match saveVerbMetadata_internal)
|
|
1563
1760
|
*/
|
|
1564
1761
|
async getVerbMetadata(id) {
|
|
1565
1762
|
await this.ensureInitialized();
|
|
1566
|
-
//
|
|
1567
|
-
const
|
|
1568
|
-
|
|
1569
|
-
const
|
|
1570
|
-
return
|
|
1571
|
-
}
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
|
|
1575
|
-
const path = getVerbMetadataPath(type, id);
|
|
1576
|
-
try {
|
|
1577
|
-
const metadata = await this.readWithInheritance(path);
|
|
1578
|
-
if (metadata) {
|
|
1579
|
-
// Cache the type for next time
|
|
1580
|
-
this.verbTypeCache.set(id, type);
|
|
1581
|
-
return metadata;
|
|
1582
|
-
}
|
|
1583
|
-
}
|
|
1584
|
-
catch (error) {
|
|
1585
|
-
// Not in this type, continue searching
|
|
1586
|
-
}
|
|
1763
|
+
// v6.0.0: Direct O(1) lookup with ID-first paths - no type search needed!
|
|
1764
|
+
const path = getVerbMetadataPath(id);
|
|
1765
|
+
try {
|
|
1766
|
+
const metadata = await this.readWithInheritance(path);
|
|
1767
|
+
return metadata || null;
|
|
1768
|
+
}
|
|
1769
|
+
catch (error) {
|
|
1770
|
+
// Entity not found
|
|
1771
|
+
return null;
|
|
1587
1772
|
}
|
|
1588
|
-
return null;
|
|
1589
1773
|
}
|
|
1590
1774
|
/**
|
|
1591
|
-
* Delete verb metadata from storage
|
|
1592
|
-
* v5.4.0: Uses type-first paths (must match saveVerbMetadata_internal)
|
|
1775
|
+
* Delete verb metadata from storage (v6.0.0: ID-first, O(1) delete)
|
|
1593
1776
|
*/
|
|
1594
1777
|
async deleteVerbMetadata(id) {
|
|
1595
1778
|
await this.ensureInitialized();
|
|
1596
|
-
//
|
|
1597
|
-
const
|
|
1598
|
-
|
|
1599
|
-
const path = getVerbMetadataPath(cachedType, id);
|
|
1600
|
-
await this.deleteObjectFromBranch(path);
|
|
1601
|
-
// Remove from cache after deletion
|
|
1602
|
-
this.verbTypeCache.delete(id);
|
|
1603
|
-
return;
|
|
1604
|
-
}
|
|
1605
|
-
// If not in cache, search all types to find and delete
|
|
1606
|
-
for (let i = 0; i < VERB_TYPE_COUNT; i++) {
|
|
1607
|
-
const type = TypeUtils.getVerbFromIndex(i);
|
|
1608
|
-
const path = getVerbMetadataPath(type, id);
|
|
1609
|
-
try {
|
|
1610
|
-
// Check if exists before deleting
|
|
1611
|
-
const exists = await this.readWithInheritance(path);
|
|
1612
|
-
if (exists) {
|
|
1613
|
-
await this.deleteObjectFromBranch(path);
|
|
1614
|
-
return;
|
|
1615
|
-
}
|
|
1616
|
-
}
|
|
1617
|
-
catch (error) {
|
|
1618
|
-
// Not in this type, continue searching
|
|
1619
|
-
}
|
|
1620
|
-
}
|
|
1779
|
+
// v6.0.0: Direct O(1) delete with ID-first path
|
|
1780
|
+
const path = getVerbMetadataPath(id);
|
|
1781
|
+
await this.deleteObjectFromBranch(path);
|
|
1621
1782
|
}
|
|
1622
1783
|
// ============================================================================
|
|
1623
|
-
//
|
|
1624
|
-
//
|
|
1784
|
+
// ID-FIRST HELPER METHODS (v6.0.0)
|
|
1785
|
+
// Direct O(1) ID lookups - no type needed!
|
|
1786
|
+
// Clean, simple architecture for billion-scale performance
|
|
1625
1787
|
// ============================================================================
|
|
1626
1788
|
/**
|
|
1627
1789
|
* Load type statistics from storage
|
|
@@ -1663,30 +1825,61 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
1663
1825
|
*/
|
|
1664
1826
|
async rebuildTypeCounts() {
|
|
1665
1827
|
prodLog.info('[BaseStorage] Rebuilding type counts from storage...');
|
|
1666
|
-
// Rebuild
|
|
1667
|
-
|
|
1668
|
-
|
|
1669
|
-
|
|
1828
|
+
// v6.0.0: Rebuild by scanning shards (0x00-0xFF) and reading metadata
|
|
1829
|
+
this.nounCountsByType = new Uint32Array(NOUN_TYPE_COUNT);
|
|
1830
|
+
this.verbCountsByType = new Uint32Array(VERB_TYPE_COUNT);
|
|
1831
|
+
// Scan noun shards
|
|
1832
|
+
for (let shard = 0; shard < 256; shard++) {
|
|
1833
|
+
const shardHex = shard.toString(16).padStart(2, '0');
|
|
1834
|
+
const shardDir = `entities/nouns/${shardHex}`;
|
|
1670
1835
|
try {
|
|
1671
|
-
const paths = await this.listObjectsInBranch(
|
|
1672
|
-
|
|
1836
|
+
const paths = await this.listObjectsInBranch(shardDir);
|
|
1837
|
+
for (const path of paths) {
|
|
1838
|
+
if (!path.includes('/metadata.json'))
|
|
1839
|
+
continue;
|
|
1840
|
+
try {
|
|
1841
|
+
const metadata = await this.readWithInheritance(path);
|
|
1842
|
+
if (metadata && metadata.noun) {
|
|
1843
|
+
const typeIndex = TypeUtils.getNounIndex(metadata.noun);
|
|
1844
|
+
if (typeIndex >= 0 && typeIndex < NOUN_TYPE_COUNT) {
|
|
1845
|
+
this.nounCountsByType[typeIndex]++;
|
|
1846
|
+
}
|
|
1847
|
+
}
|
|
1848
|
+
}
|
|
1849
|
+
catch (error) {
|
|
1850
|
+
// Skip entities that fail to load
|
|
1851
|
+
}
|
|
1852
|
+
}
|
|
1673
1853
|
}
|
|
1674
1854
|
catch (error) {
|
|
1675
|
-
//
|
|
1676
|
-
this.verbCountsByType[i] = 0;
|
|
1855
|
+
// Skip shards that don't exist
|
|
1677
1856
|
}
|
|
1678
1857
|
}
|
|
1679
|
-
//
|
|
1680
|
-
for (let
|
|
1681
|
-
const
|
|
1682
|
-
const
|
|
1858
|
+
// Scan verb shards
|
|
1859
|
+
for (let shard = 0; shard < 256; shard++) {
|
|
1860
|
+
const shardHex = shard.toString(16).padStart(2, '0');
|
|
1861
|
+
const shardDir = `entities/verbs/${shardHex}`;
|
|
1683
1862
|
try {
|
|
1684
|
-
const paths = await this.listObjectsInBranch(
|
|
1685
|
-
|
|
1863
|
+
const paths = await this.listObjectsInBranch(shardDir);
|
|
1864
|
+
for (const path of paths) {
|
|
1865
|
+
if (!path.includes('/metadata.json'))
|
|
1866
|
+
continue;
|
|
1867
|
+
try {
|
|
1868
|
+
const metadata = await this.readWithInheritance(path);
|
|
1869
|
+
if (metadata && metadata.verb) {
|
|
1870
|
+
const typeIndex = TypeUtils.getVerbIndex(metadata.verb);
|
|
1871
|
+
if (typeIndex >= 0 && typeIndex < VERB_TYPE_COUNT) {
|
|
1872
|
+
this.verbCountsByType[typeIndex]++;
|
|
1873
|
+
}
|
|
1874
|
+
}
|
|
1875
|
+
}
|
|
1876
|
+
catch (error) {
|
|
1877
|
+
// Skip entities that fail to load
|
|
1878
|
+
}
|
|
1879
|
+
}
|
|
1686
1880
|
}
|
|
1687
1881
|
catch (error) {
|
|
1688
|
-
//
|
|
1689
|
-
this.nounCountsByType[i] = 0;
|
|
1882
|
+
// Skip shards that don't exist
|
|
1690
1883
|
}
|
|
1691
1884
|
}
|
|
1692
1885
|
// Save rebuilt counts to storage
|
|
@@ -1696,18 +1889,13 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
1696
1889
|
prodLog.info(`[BaseStorage] Rebuilt counts: ${totalNouns} nouns, ${totalVerbs} verbs`);
|
|
1697
1890
|
}
|
|
1698
1891
|
/**
|
|
1699
|
-
* Get noun type
|
|
1700
|
-
*
|
|
1892
|
+
* Get noun type (v6.0.0: type no longer needed for paths!)
|
|
1893
|
+
* With ID-first paths, this is only used for internal statistics tracking.
|
|
1894
|
+
* The actual type is stored in metadata and indexed by MetadataIndexManager.
|
|
1701
1895
|
*/
|
|
1702
1896
|
getNounType(noun) {
|
|
1703
|
-
//
|
|
1704
|
-
|
|
1705
|
-
if (cached) {
|
|
1706
|
-
return cached;
|
|
1707
|
-
}
|
|
1708
|
-
// Default to 'thing' if unknown
|
|
1709
|
-
// This should only happen if saveNoun_internal is called before saveNounMetadata
|
|
1710
|
-
prodLog.warn(`[BaseStorage] Unknown noun type for ${noun.id}, defaulting to 'thing'`);
|
|
1897
|
+
// v6.0.0: Type cache removed - default to 'thing' for statistics
|
|
1898
|
+
// The real type is in metadata, accessible via getNounMetadata(id)
|
|
1711
1899
|
return 'thing';
|
|
1712
1900
|
}
|
|
1713
1901
|
/**
|
|
@@ -1790,15 +1978,14 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
1790
1978
|
// Converted from abstract to concrete - all adapters now have built-in type-aware
|
|
1791
1979
|
// ============================================================================
|
|
1792
1980
|
/**
|
|
1793
|
-
* Save a noun to storage (
|
|
1981
|
+
* Save a noun to storage (ID-first path)
|
|
1794
1982
|
*/
|
|
1795
1983
|
async saveNoun_internal(noun) {
|
|
1796
1984
|
const type = this.getNounType(noun);
|
|
1797
|
-
const path = getNounVectorPath(
|
|
1985
|
+
const path = getNounVectorPath(noun.id);
|
|
1798
1986
|
// Update type tracking
|
|
1799
1987
|
const typeIndex = TypeUtils.getNounIndex(type);
|
|
1800
1988
|
this.nounCountsByType[typeIndex]++;
|
|
1801
|
-
this.nounTypeCache.set(noun.id, type);
|
|
1802
1989
|
// COW-aware write (v5.0.1): Use COW helper for branch isolation
|
|
1803
1990
|
await this.writeObjectToBranch(path, noun);
|
|
1804
1991
|
// Periodically save statistics (every 100 saves)
|
|
@@ -1807,120 +1994,89 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
1807
1994
|
}
|
|
1808
1995
|
}
|
|
1809
1996
|
/**
|
|
1810
|
-
* Get a noun from storage (
|
|
1997
|
+
* Get a noun from storage (ID-first path)
|
|
1811
1998
|
*/
|
|
1812
1999
|
async getNoun_internal(id) {
|
|
1813
|
-
//
|
|
1814
|
-
const
|
|
1815
|
-
|
|
1816
|
-
const path = getNounVectorPath(cachedType, id);
|
|
2000
|
+
// v6.0.0: Direct O(1) lookup with ID-first paths - no type search needed!
|
|
2001
|
+
const path = getNounVectorPath(id);
|
|
2002
|
+
try {
|
|
1817
2003
|
// COW-aware read (v5.0.1): Use COW helper for branch isolation
|
|
1818
|
-
const
|
|
1819
|
-
|
|
1820
|
-
|
|
1821
|
-
|
|
1822
|
-
// Need to search across all types (expensive, but cached after first access)
|
|
1823
|
-
for (let i = 0; i < NOUN_TYPE_COUNT; i++) {
|
|
1824
|
-
const type = TypeUtils.getNounFromIndex(i);
|
|
1825
|
-
const path = getNounVectorPath(type, id);
|
|
1826
|
-
try {
|
|
1827
|
-
// COW-aware read (v5.0.1): Use COW helper for branch isolation
|
|
1828
|
-
const noun = await this.readWithInheritance(path);
|
|
1829
|
-
if (noun) {
|
|
1830
|
-
// Cache the type for next time
|
|
1831
|
-
this.nounTypeCache.set(id, type);
|
|
1832
|
-
// v5.7.10: Deserialize connections Map from JSON storage format
|
|
1833
|
-
return this.deserializeNoun(noun);
|
|
1834
|
-
}
|
|
1835
|
-
}
|
|
1836
|
-
catch (error) {
|
|
1837
|
-
// Not in this type, continue searching
|
|
2004
|
+
const noun = await this.readWithInheritance(path);
|
|
2005
|
+
if (noun) {
|
|
2006
|
+
// v5.7.10: Deserialize connections Map from JSON storage format
|
|
2007
|
+
return this.deserializeNoun(noun);
|
|
1838
2008
|
}
|
|
1839
2009
|
}
|
|
2010
|
+
catch (error) {
|
|
2011
|
+
// Entity not found
|
|
2012
|
+
return null;
|
|
2013
|
+
}
|
|
1840
2014
|
return null;
|
|
1841
2015
|
}
|
|
1842
2016
|
/**
|
|
1843
|
-
* Get nouns by noun type (
|
|
2017
|
+
* Get nouns by noun type (v6.0.0: Shard-based iteration!)
|
|
1844
2018
|
*/
|
|
1845
2019
|
async getNounsByNounType_internal(nounType) {
|
|
1846
|
-
|
|
1847
|
-
|
|
1848
|
-
// COW-aware list (v5.0.1): Use COW helper for branch isolation
|
|
1849
|
-
const paths = await this.listObjectsInBranch(prefix);
|
|
1850
|
-
// Load all nouns of this type
|
|
2020
|
+
// v6.0.0: Iterate by shards (0x00-0xFF) instead of types
|
|
2021
|
+
// Type is stored in metadata.noun field, we filter as we load
|
|
1851
2022
|
const nouns = [];
|
|
1852
|
-
for (
|
|
2023
|
+
for (let shard = 0; shard < 256; shard++) {
|
|
2024
|
+
const shardHex = shard.toString(16).padStart(2, '0');
|
|
2025
|
+
const shardDir = `entities/nouns/${shardHex}`;
|
|
1853
2026
|
try {
|
|
1854
|
-
|
|
1855
|
-
const
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
|
|
1859
|
-
|
|
1860
|
-
|
|
2027
|
+
const nounFiles = await this.listObjectsInBranch(shardDir);
|
|
2028
|
+
for (const nounPath of nounFiles) {
|
|
2029
|
+
if (!nounPath.includes('/vectors.json'))
|
|
2030
|
+
continue;
|
|
2031
|
+
try {
|
|
2032
|
+
const noun = await this.readWithInheritance(nounPath);
|
|
2033
|
+
if (noun) {
|
|
2034
|
+
const deserialized = this.deserializeNoun(noun);
|
|
2035
|
+
// Check type from metadata
|
|
2036
|
+
const metadata = await this.getNounMetadata(deserialized.id);
|
|
2037
|
+
if (metadata && metadata.noun === nounType) {
|
|
2038
|
+
nouns.push(deserialized);
|
|
2039
|
+
}
|
|
2040
|
+
}
|
|
2041
|
+
}
|
|
2042
|
+
catch (error) {
|
|
2043
|
+
// Skip nouns that fail to load
|
|
2044
|
+
}
|
|
1861
2045
|
}
|
|
1862
2046
|
}
|
|
1863
2047
|
catch (error) {
|
|
1864
|
-
|
|
2048
|
+
// Skip shards that have no data
|
|
1865
2049
|
}
|
|
1866
2050
|
}
|
|
1867
2051
|
return nouns;
|
|
1868
2052
|
}
|
|
1869
2053
|
/**
|
|
1870
|
-
* Delete a noun from storage (
|
|
2054
|
+
* Delete a noun from storage (v6.0.0: ID-first, O(1) delete)
|
|
1871
2055
|
*/
|
|
1872
2056
|
async deleteNoun_internal(id) {
|
|
1873
|
-
//
|
|
1874
|
-
const
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
|
|
1878
|
-
await this.deleteObjectFromBranch(path);
|
|
1879
|
-
// Update counts
|
|
1880
|
-
const typeIndex = TypeUtils.getNounIndex(cachedType);
|
|
1881
|
-
if (this.nounCountsByType[typeIndex] > 0) {
|
|
1882
|
-
this.nounCountsByType[typeIndex]--;
|
|
1883
|
-
}
|
|
1884
|
-
this.nounTypeCache.delete(id);
|
|
1885
|
-
return;
|
|
1886
|
-
}
|
|
1887
|
-
// Search across all types
|
|
1888
|
-
for (let i = 0; i < NOUN_TYPE_COUNT; i++) {
|
|
1889
|
-
const type = TypeUtils.getNounFromIndex(i);
|
|
1890
|
-
const path = getNounVectorPath(type, id);
|
|
1891
|
-
try {
|
|
1892
|
-
// COW-aware delete (v5.0.1): Use COW helper for branch isolation
|
|
1893
|
-
await this.deleteObjectFromBranch(path);
|
|
1894
|
-
// Update counts
|
|
1895
|
-
if (this.nounCountsByType[i] > 0) {
|
|
1896
|
-
this.nounCountsByType[i]--;
|
|
1897
|
-
}
|
|
1898
|
-
this.nounTypeCache.delete(id);
|
|
1899
|
-
return;
|
|
1900
|
-
}
|
|
1901
|
-
catch (error) {
|
|
1902
|
-
// Not in this type, continue
|
|
1903
|
-
}
|
|
1904
|
-
}
|
|
2057
|
+
// v6.0.0: Direct O(1) delete with ID-first path
|
|
2058
|
+
const path = getNounVectorPath(id);
|
|
2059
|
+
await this.deleteObjectFromBranch(path);
|
|
2060
|
+
// Note: Type-specific counts will be decremented via metadata tracking
|
|
2061
|
+
// The real type is in metadata, accessible if needed via getNounMetadata(id)
|
|
1905
2062
|
}
|
|
1906
2063
|
/**
|
|
1907
|
-
* Save a verb to storage (
|
|
2064
|
+
* Save a verb to storage (ID-first path)
|
|
1908
2065
|
*/
|
|
1909
2066
|
async saveVerb_internal(verb) {
|
|
1910
2067
|
// Type is now a first-class field in HNSWVerb - no caching needed!
|
|
1911
2068
|
const type = verb.verb;
|
|
1912
|
-
const path = getVerbVectorPath(
|
|
2069
|
+
const path = getVerbVectorPath(verb.id);
|
|
2070
|
+
prodLog.debug(`[BaseStorage] saveVerb_internal: id=${verb.id}, sourceId=${verb.sourceId}, targetId=${verb.targetId}, type=${type}`);
|
|
1913
2071
|
// Update type tracking
|
|
1914
2072
|
const typeIndex = TypeUtils.getVerbIndex(type);
|
|
1915
2073
|
this.verbCountsByType[typeIndex]++;
|
|
1916
|
-
this.verbTypeCache.set(verb.id, type);
|
|
1917
2074
|
// COW-aware write (v5.0.1): Use COW helper for branch isolation
|
|
1918
2075
|
await this.writeObjectToBranch(path, verb);
|
|
1919
|
-
//
|
|
1920
|
-
//
|
|
1921
|
-
|
|
1922
|
-
|
|
1923
|
-
// Fast incremental update - no rebuild needed
|
|
2076
|
+
// v6.0.0: Update GraphAdjacencyIndex incrementally (always available after init())
|
|
2077
|
+
// GraphAdjacencyIndex.addVerb() calls ensureInitialized() automatically
|
|
2078
|
+
if (this.graphIndex) {
|
|
2079
|
+
prodLog.debug(`[BaseStorage] Updating GraphAdjacencyIndex with verb ${verb.id}`);
|
|
1924
2080
|
await this.graphIndex.addVerb({
|
|
1925
2081
|
id: verb.id,
|
|
1926
2082
|
sourceId: verb.sourceId,
|
|
@@ -1932,8 +2088,12 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
1932
2088
|
type: verb.verb,
|
|
1933
2089
|
createdAt: { seconds: Math.floor(Date.now() / 1000), nanoseconds: 0 },
|
|
1934
2090
|
updatedAt: { seconds: Math.floor(Date.now() / 1000), nanoseconds: 0 },
|
|
1935
|
-
createdBy: { augmentation: 'storage', version: '
|
|
2091
|
+
createdBy: { augmentation: 'storage', version: '6.0.0' }
|
|
1936
2092
|
});
|
|
2093
|
+
prodLog.debug(`[BaseStorage] GraphAdjacencyIndex updated successfully`);
|
|
2094
|
+
}
|
|
2095
|
+
else {
|
|
2096
|
+
prodLog.warn(`[BaseStorage] graphIndex is null, cannot update index for verb ${verb.id}`);
|
|
1937
2097
|
}
|
|
1938
2098
|
// Periodically save statistics
|
|
1939
2099
|
if (this.verbCountsByType[typeIndex] % 100 === 0) {
|
|
@@ -1941,69 +2101,89 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
1941
2101
|
}
|
|
1942
2102
|
}
|
|
1943
2103
|
/**
|
|
1944
|
-
* Get a verb from storage (
|
|
2104
|
+
* Get a verb from storage (ID-first path)
|
|
1945
2105
|
*/
|
|
1946
2106
|
async getVerb_internal(id) {
|
|
1947
|
-
//
|
|
1948
|
-
const
|
|
1949
|
-
|
|
1950
|
-
const path = getVerbVectorPath(cachedType, id);
|
|
2107
|
+
// v6.0.0: Direct O(1) lookup with ID-first paths - no type search needed!
|
|
2108
|
+
const path = getVerbVectorPath(id);
|
|
2109
|
+
try {
|
|
1951
2110
|
// COW-aware read (v5.0.1): Use COW helper for branch isolation
|
|
1952
2111
|
const verb = await this.readWithInheritance(path);
|
|
1953
|
-
|
|
1954
|
-
|
|
1955
|
-
|
|
1956
|
-
// Search across all types (only on first access)
|
|
1957
|
-
for (let i = 0; i < VERB_TYPE_COUNT; i++) {
|
|
1958
|
-
const type = TypeUtils.getVerbFromIndex(i);
|
|
1959
|
-
const path = getVerbVectorPath(type, id);
|
|
1960
|
-
try {
|
|
1961
|
-
// COW-aware read (v5.0.1): Use COW helper for branch isolation
|
|
1962
|
-
const verb = await this.readWithInheritance(path);
|
|
1963
|
-
if (verb) {
|
|
1964
|
-
// Cache the type for next time (read from verb.verb field)
|
|
1965
|
-
this.verbTypeCache.set(id, verb.verb);
|
|
1966
|
-
// v5.7.10: Deserialize connections Map from JSON storage format
|
|
1967
|
-
return this.deserializeVerb(verb);
|
|
1968
|
-
}
|
|
1969
|
-
}
|
|
1970
|
-
catch (error) {
|
|
1971
|
-
// Not in this type, continue
|
|
2112
|
+
if (verb) {
|
|
2113
|
+
// v5.7.10: Deserialize connections Map from JSON storage format
|
|
2114
|
+
return this.deserializeVerb(verb);
|
|
1972
2115
|
}
|
|
1973
2116
|
}
|
|
2117
|
+
catch (error) {
|
|
2118
|
+
// Entity not found
|
|
2119
|
+
return null;
|
|
2120
|
+
}
|
|
1974
2121
|
return null;
|
|
1975
2122
|
}
|
|
1976
2123
|
/**
|
|
1977
|
-
* Get verbs by source (
|
|
1978
|
-
*
|
|
2124
|
+
* Get verbs by source (v6.0.0: Uses GraphAdjacencyIndex when available)
|
|
2125
|
+
* Falls back to shard iteration during initialization to avoid circular dependency
|
|
1979
2126
|
*/
|
|
1980
2127
|
async getVerbsBySource_internal(sourceId) {
|
|
1981
|
-
// v5.7.1: Reverted to v5.6.3 implementation to fix circular dependency deadlock
|
|
1982
|
-
// v5.7.0 called getGraphIndex() here, creating deadlock during initialization:
|
|
1983
|
-
// GraphAdjacencyIndex.rebuild() → storage.getVerbs() → getVerbsBySource_internal() → getGraphIndex() → [deadlock]
|
|
1984
|
-
// v5.4.0: Type-first implementation - scan across all verb types
|
|
1985
|
-
// COW-aware: uses readWithInheritance for each verb
|
|
1986
2128
|
await this.ensureInitialized();
|
|
2129
|
+
prodLog.debug(`[BaseStorage] getVerbsBySource_internal: sourceId=${sourceId}, graphIndex=${!!this.graphIndex}, isInitialized=${this.graphIndex?.isInitialized}`);
|
|
2130
|
+
// v6.0.0: Fast path - use GraphAdjacencyIndex if available (lazy-loaded)
|
|
2131
|
+
if (this.graphIndex && this.graphIndex.isInitialized) {
|
|
2132
|
+
try {
|
|
2133
|
+
const verbIds = await this.graphIndex.getVerbIdsBySource(sourceId);
|
|
2134
|
+
prodLog.debug(`[BaseStorage] GraphAdjacencyIndex found ${verbIds.length} verb IDs for sourceId=${sourceId}`);
|
|
2135
|
+
const results = [];
|
|
2136
|
+
for (const verbId of verbIds) {
|
|
2137
|
+
const verb = await this.getVerb_internal(verbId);
|
|
2138
|
+
const metadata = await this.getVerbMetadata(verbId);
|
|
2139
|
+
if (verb && metadata) {
|
|
2140
|
+
results.push({
|
|
2141
|
+
...verb,
|
|
2142
|
+
weight: metadata.weight,
|
|
2143
|
+
confidence: metadata.confidence,
|
|
2144
|
+
createdAt: metadata.createdAt
|
|
2145
|
+
? (typeof metadata.createdAt === 'number' ? metadata.createdAt : metadata.createdAt.seconds * 1000)
|
|
2146
|
+
: Date.now(),
|
|
2147
|
+
updatedAt: metadata.updatedAt
|
|
2148
|
+
? (typeof metadata.updatedAt === 'number' ? metadata.updatedAt : metadata.updatedAt.seconds * 1000)
|
|
2149
|
+
: Date.now(),
|
|
2150
|
+
service: metadata.service,
|
|
2151
|
+
createdBy: metadata.createdBy,
|
|
2152
|
+
metadata: metadata || {}
|
|
2153
|
+
});
|
|
2154
|
+
}
|
|
2155
|
+
}
|
|
2156
|
+
prodLog.debug(`[BaseStorage] GraphAdjacencyIndex path returned ${results.length} verbs`);
|
|
2157
|
+
return results;
|
|
2158
|
+
}
|
|
2159
|
+
catch (error) {
|
|
2160
|
+
prodLog.warn('[BaseStorage] GraphAdjacencyIndex lookup failed, falling back to shard iteration:', error);
|
|
2161
|
+
}
|
|
2162
|
+
}
|
|
2163
|
+
// v6.0.0: Fallback - iterate by shards (WITH deserialization fix!)
|
|
2164
|
+
prodLog.debug(`[BaseStorage] Using shard iteration fallback for sourceId=${sourceId}`);
|
|
1987
2165
|
const results = [];
|
|
1988
|
-
|
|
1989
|
-
|
|
1990
|
-
|
|
1991
|
-
const
|
|
2166
|
+
let shardsScanned = 0;
|
|
2167
|
+
let verbsFound = 0;
|
|
2168
|
+
for (let shard = 0; shard < 256; shard++) {
|
|
2169
|
+
const shardHex = shard.toString(16).padStart(2, '0');
|
|
2170
|
+
const shardDir = `entities/verbs/${shardHex}`;
|
|
1992
2171
|
try {
|
|
1993
|
-
|
|
1994
|
-
|
|
1995
|
-
const verbFiles = await this.listObjectsInBranch(typeDir);
|
|
2172
|
+
const verbFiles = await this.listObjectsInBranch(shardDir);
|
|
2173
|
+
shardsScanned++;
|
|
1996
2174
|
for (const verbPath of verbFiles) {
|
|
1997
|
-
|
|
1998
|
-
if (!verbPath.endsWith('.json'))
|
|
2175
|
+
if (!verbPath.includes('/vectors.json'))
|
|
1999
2176
|
continue;
|
|
2000
2177
|
try {
|
|
2001
|
-
const
|
|
2002
|
-
if (
|
|
2003
|
-
|
|
2004
|
-
|
|
2178
|
+
const rawVerb = await this.readWithInheritance(verbPath);
|
|
2179
|
+
if (!rawVerb)
|
|
2180
|
+
continue;
|
|
2181
|
+
verbsFound++;
|
|
2182
|
+
// v6.0.0: CRITICAL - Deserialize connections Map from JSON storage format
|
|
2183
|
+
const verb = this.deserializeVerb(rawVerb);
|
|
2184
|
+
if (verb.sourceId === sourceId) {
|
|
2185
|
+
const metadataPath = getVerbMetadataPath(verb.id);
|
|
2005
2186
|
const metadata = await this.readWithInheritance(metadataPath);
|
|
2006
|
-
// v5.4.0: Extract standard fields from metadata to top-level (like nouns)
|
|
2007
2187
|
results.push({
|
|
2008
2188
|
...verb,
|
|
2009
2189
|
weight: metadata?.weight,
|
|
@@ -2022,11 +2202,122 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
2022
2202
|
}
|
|
2023
2203
|
catch (error) {
|
|
2024
2204
|
// Skip verbs that fail to load
|
|
2205
|
+
prodLog.debug(`[BaseStorage] Failed to load verb from ${verbPath}:`, error);
|
|
2025
2206
|
}
|
|
2026
2207
|
}
|
|
2027
2208
|
}
|
|
2028
2209
|
catch (error) {
|
|
2029
|
-
// Skip
|
|
2210
|
+
// Skip shards that have no data
|
|
2211
|
+
}
|
|
2212
|
+
}
|
|
2213
|
+
prodLog.debug(`[BaseStorage] Shard iteration: scanned ${shardsScanned} shards, found ${verbsFound} total verbs, matched ${results.length} for sourceId=${sourceId}`);
|
|
2214
|
+
return results;
|
|
2215
|
+
}
|
|
2216
|
+
/**
|
|
2217
|
+
* Batch get verbs by source IDs (v5.12.0 - Cloud Storage Optimization)
|
|
2218
|
+
*
|
|
2219
|
+
* **Performance**: Eliminates N+1 query pattern for relationship lookups
|
|
2220
|
+
* - Current: N × getVerbsBySource() = N × (list all verbs + filter)
|
|
2221
|
+
* - Batched: 1 × list all verbs + filter by N sourceIds
|
|
2222
|
+
*
|
|
2223
|
+
* **Use cases:**
|
|
2224
|
+
* - VFS tree traversal (get Contains edges for multiple directories)
|
|
2225
|
+
* - brain.getRelations() for multiple entities
|
|
2226
|
+
* - Graph traversal (fetch neighbors of multiple nodes)
|
|
2227
|
+
*
|
|
2228
|
+
* @param sourceIds Array of source entity IDs
|
|
2229
|
+
* @param verbType Optional verb type filter (e.g., VerbType.Contains for VFS)
|
|
2230
|
+
* @returns Map of sourceId → verbs[]
|
|
2231
|
+
*
|
|
2232
|
+
* @example
|
|
2233
|
+
* ```typescript
|
|
2234
|
+
* // Before (N+1 pattern)
|
|
2235
|
+
* for (const dirId of dirIds) {
|
|
2236
|
+
* const children = await storage.getVerbsBySource(dirId) // N calls
|
|
2237
|
+
* }
|
|
2238
|
+
*
|
|
2239
|
+
* // After (batched)
|
|
2240
|
+
* const childrenByDir = await storage.getVerbsBySourceBatch(dirIds, VerbType.Contains) // 1 scan
|
|
2241
|
+
* for (const dirId of dirIds) {
|
|
2242
|
+
* const children = childrenByDir.get(dirId) || []
|
|
2243
|
+
* }
|
|
2244
|
+
* ```
|
|
2245
|
+
*
|
|
2246
|
+
* @since v5.12.0
|
|
2247
|
+
*/
|
|
2248
|
+
async getVerbsBySourceBatch(sourceIds, verbType) {
|
|
2249
|
+
await this.ensureInitialized();
|
|
2250
|
+
const results = new Map();
|
|
2251
|
+
if (sourceIds.length === 0)
|
|
2252
|
+
return results;
|
|
2253
|
+
// Initialize empty arrays for all requested sourceIds
|
|
2254
|
+
for (const sourceId of sourceIds) {
|
|
2255
|
+
results.set(sourceId, []);
|
|
2256
|
+
}
|
|
2257
|
+
// Convert sourceIds to Set for O(1) lookup
|
|
2258
|
+
const sourceIdSet = new Set(sourceIds);
|
|
2259
|
+
// v6.0.0: Iterate by shards (0x00-0xFF) instead of types
|
|
2260
|
+
for (let shard = 0; shard < 256; shard++) {
|
|
2261
|
+
const shardHex = shard.toString(16).padStart(2, '0');
|
|
2262
|
+
const shardDir = `entities/verbs/${shardHex}`;
|
|
2263
|
+
try {
|
|
2264
|
+
// List all verb files in this shard
|
|
2265
|
+
const verbFiles = await this.listObjectsInBranch(shardDir);
|
|
2266
|
+
// Build paths for batch read
|
|
2267
|
+
const verbPaths = [];
|
|
2268
|
+
const metadataPaths = [];
|
|
2269
|
+
const pathToId = new Map();
|
|
2270
|
+
for (const verbPath of verbFiles) {
|
|
2271
|
+
if (!verbPath.includes('/vectors.json'))
|
|
2272
|
+
continue;
|
|
2273
|
+
verbPaths.push(verbPath);
|
|
2274
|
+
// Extract ID from path: "entities/verbs/{shard}/{id}/vector.json"
|
|
2275
|
+
const parts = verbPath.split('/');
|
|
2276
|
+
const verbId = parts[parts.length - 2]; // ID is second-to-last segment
|
|
2277
|
+
pathToId.set(verbPath, verbId);
|
|
2278
|
+
// Prepare metadata path
|
|
2279
|
+
metadataPaths.push(getVerbMetadataPath(verbId));
|
|
2280
|
+
}
|
|
2281
|
+
// Batch read all verb files for this shard
|
|
2282
|
+
const verbDataMap = await this.readBatchWithInheritance(verbPaths);
|
|
2283
|
+
const metadataMap = await this.readBatchWithInheritance(metadataPaths);
|
|
2284
|
+
// Process results
|
|
2285
|
+
for (const [verbPath, rawVerbData] of verbDataMap.entries()) {
|
|
2286
|
+
if (!rawVerbData || !rawVerbData.sourceId)
|
|
2287
|
+
continue;
|
|
2288
|
+
// v6.0.0: Deserialize connections Map from JSON storage format
|
|
2289
|
+
const verbData = this.deserializeVerb(rawVerbData);
|
|
2290
|
+
// Check if this verb's source is in our requested set
|
|
2291
|
+
if (!sourceIdSet.has(verbData.sourceId))
|
|
2292
|
+
continue;
|
|
2293
|
+
// If verbType specified, filter by type
|
|
2294
|
+
if (verbType && verbData.verb !== verbType)
|
|
2295
|
+
continue;
|
|
2296
|
+
// Found matching verb - hydrate with metadata
|
|
2297
|
+
const verbId = pathToId.get(verbPath);
|
|
2298
|
+
const metadataPath = getVerbMetadataPath(verbId);
|
|
2299
|
+
const metadata = metadataMap.get(metadataPath) || {};
|
|
2300
|
+
const hydratedVerb = {
|
|
2301
|
+
...verbData,
|
|
2302
|
+
weight: metadata?.weight,
|
|
2303
|
+
confidence: metadata?.confidence,
|
|
2304
|
+
createdAt: metadata?.createdAt
|
|
2305
|
+
? (typeof metadata.createdAt === 'number' ? metadata.createdAt : metadata.createdAt.seconds * 1000)
|
|
2306
|
+
: Date.now(),
|
|
2307
|
+
updatedAt: metadata?.updatedAt
|
|
2308
|
+
? (typeof metadata.updatedAt === 'number' ? metadata.updatedAt : metadata.updatedAt.seconds * 1000)
|
|
2309
|
+
: Date.now(),
|
|
2310
|
+
service: metadata?.service,
|
|
2311
|
+
createdBy: metadata?.createdBy,
|
|
2312
|
+
metadata: metadata
|
|
2313
|
+
};
|
|
2314
|
+
// Add to results for this sourceId
|
|
2315
|
+
const sourceVerbs = results.get(verbData.sourceId);
|
|
2316
|
+
sourceVerbs.push(hydratedVerb);
|
|
2317
|
+
}
|
|
2318
|
+
}
|
|
2319
|
+
catch (error) {
|
|
2320
|
+
// Skip shards that have no data
|
|
2030
2321
|
}
|
|
2031
2322
|
}
|
|
2032
2323
|
return results;
|
|
@@ -2037,31 +2328,57 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
2037
2328
|
* v5.4.0: Fixed to directly list verb files instead of directories
|
|
2038
2329
|
*/
|
|
2039
2330
|
async getVerbsByTarget_internal(targetId) {
|
|
2040
|
-
// v5.7.1: Reverted to v5.6.3 implementation to fix circular dependency deadlock
|
|
2041
|
-
// v5.7.0 called getGraphIndex() here, creating deadlock during initialization
|
|
2042
|
-
// v5.4.0: Type-first implementation - scan across all verb types
|
|
2043
|
-
// COW-aware: uses readWithInheritance for each verb
|
|
2044
2331
|
await this.ensureInitialized();
|
|
2332
|
+
// v6.0.0: Fast path - use GraphAdjacencyIndex if available (lazy-loaded)
|
|
2333
|
+
if (this.graphIndex && this.graphIndex.isInitialized) {
|
|
2334
|
+
try {
|
|
2335
|
+
const verbIds = await this.graphIndex.getVerbIdsByTarget(targetId);
|
|
2336
|
+
const results = [];
|
|
2337
|
+
for (const verbId of verbIds) {
|
|
2338
|
+
const verb = await this.getVerb_internal(verbId);
|
|
2339
|
+
const metadata = await this.getVerbMetadata(verbId);
|
|
2340
|
+
if (verb && metadata) {
|
|
2341
|
+
results.push({
|
|
2342
|
+
...verb,
|
|
2343
|
+
weight: metadata.weight,
|
|
2344
|
+
confidence: metadata.confidence,
|
|
2345
|
+
createdAt: metadata.createdAt
|
|
2346
|
+
? (typeof metadata.createdAt === 'number' ? metadata.createdAt : metadata.createdAt.seconds * 1000)
|
|
2347
|
+
: Date.now(),
|
|
2348
|
+
updatedAt: metadata.updatedAt
|
|
2349
|
+
? (typeof metadata.updatedAt === 'number' ? metadata.updatedAt : metadata.updatedAt.seconds * 1000)
|
|
2350
|
+
: Date.now(),
|
|
2351
|
+
service: metadata.service,
|
|
2352
|
+
createdBy: metadata.createdBy,
|
|
2353
|
+
metadata: metadata || {}
|
|
2354
|
+
});
|
|
2355
|
+
}
|
|
2356
|
+
}
|
|
2357
|
+
return results;
|
|
2358
|
+
}
|
|
2359
|
+
catch (error) {
|
|
2360
|
+
prodLog.warn('[BaseStorage] GraphAdjacencyIndex lookup failed, falling back to shard iteration:', error);
|
|
2361
|
+
}
|
|
2362
|
+
}
|
|
2363
|
+
// v6.0.0: Fallback - iterate by shards (WITH deserialization fix!)
|
|
2045
2364
|
const results = [];
|
|
2046
|
-
|
|
2047
|
-
|
|
2048
|
-
const
|
|
2049
|
-
const typeDir = `entities/verbs/${type}/vectors`;
|
|
2365
|
+
for (let shard = 0; shard < 256; shard++) {
|
|
2366
|
+
const shardHex = shard.toString(16).padStart(2, '0');
|
|
2367
|
+
const shardDir = `entities/verbs/${shardHex}`;
|
|
2050
2368
|
try {
|
|
2051
|
-
|
|
2052
|
-
// listObjectsInBranch returns full paths to .json files, not directories
|
|
2053
|
-
const verbFiles = await this.listObjectsInBranch(typeDir);
|
|
2369
|
+
const verbFiles = await this.listObjectsInBranch(shardDir);
|
|
2054
2370
|
for (const verbPath of verbFiles) {
|
|
2055
|
-
|
|
2056
|
-
if (!verbPath.endsWith('.json'))
|
|
2371
|
+
if (!verbPath.includes('/vectors.json'))
|
|
2057
2372
|
continue;
|
|
2058
2373
|
try {
|
|
2059
|
-
const
|
|
2060
|
-
if (
|
|
2061
|
-
|
|
2062
|
-
|
|
2374
|
+
const rawVerb = await this.readWithInheritance(verbPath);
|
|
2375
|
+
if (!rawVerb)
|
|
2376
|
+
continue;
|
|
2377
|
+
// v6.0.0: CRITICAL - Deserialize connections Map from JSON storage format
|
|
2378
|
+
const verb = this.deserializeVerb(rawVerb);
|
|
2379
|
+
if (verb.targetId === targetId) {
|
|
2380
|
+
const metadataPath = getVerbMetadataPath(verb.id);
|
|
2063
2381
|
const metadata = await this.readWithInheritance(metadataPath);
|
|
2064
|
-
// v5.4.0: Extract standard fields from metadata to top-level (like nouns)
|
|
2065
2382
|
results.push({
|
|
2066
2383
|
...verb,
|
|
2067
2384
|
weight: metadata?.weight,
|
|
@@ -2084,95 +2401,77 @@ export class BaseStorage extends BaseStorageAdapter {
|
|
|
2084
2401
|
}
|
|
2085
2402
|
}
|
|
2086
2403
|
catch (error) {
|
|
2087
|
-
// Skip
|
|
2404
|
+
// Skip shards that have no data
|
|
2088
2405
|
}
|
|
2089
2406
|
}
|
|
2090
2407
|
return results;
|
|
2091
2408
|
}
|
|
2092
2409
|
/**
|
|
2093
|
-
* Get verbs by type (
|
|
2410
|
+
* Get verbs by type (v6.0.0: Shard iteration with type filtering)
|
|
2094
2411
|
*/
|
|
2095
2412
|
async getVerbsByType_internal(verbType) {
|
|
2096
|
-
|
|
2097
|
-
const prefix = `entities/verbs/${type}/vectors/`;
|
|
2098
|
-
// COW-aware list (v5.0.1): Use COW helper for branch isolation
|
|
2099
|
-
const paths = await this.listObjectsInBranch(prefix);
|
|
2413
|
+
// v6.0.0: Iterate by shards (0x00-0xFF) instead of type-first paths
|
|
2100
2414
|
const verbs = [];
|
|
2101
|
-
for (
|
|
2415
|
+
for (let shard = 0; shard < 256; shard++) {
|
|
2416
|
+
const shardHex = shard.toString(16).padStart(2, '0');
|
|
2417
|
+
const shardDir = `entities/verbs/${shardHex}`;
|
|
2102
2418
|
try {
|
|
2103
|
-
|
|
2104
|
-
const
|
|
2105
|
-
|
|
2106
|
-
|
|
2107
|
-
|
|
2108
|
-
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
2112
|
-
|
|
2113
|
-
|
|
2114
|
-
|
|
2115
|
-
|
|
2116
|
-
|
|
2117
|
-
|
|
2118
|
-
|
|
2119
|
-
|
|
2120
|
-
|
|
2121
|
-
|
|
2122
|
-
|
|
2123
|
-
|
|
2124
|
-
|
|
2125
|
-
|
|
2126
|
-
|
|
2127
|
-
|
|
2128
|
-
|
|
2129
|
-
|
|
2130
|
-
|
|
2131
|
-
|
|
2132
|
-
|
|
2133
|
-
|
|
2134
|
-
|
|
2419
|
+
const verbFiles = await this.listObjectsInBranch(shardDir);
|
|
2420
|
+
for (const verbPath of verbFiles) {
|
|
2421
|
+
if (!verbPath.includes('/vectors.json'))
|
|
2422
|
+
continue;
|
|
2423
|
+
try {
|
|
2424
|
+
const rawVerb = await this.readWithInheritance(verbPath);
|
|
2425
|
+
if (!rawVerb)
|
|
2426
|
+
continue;
|
|
2427
|
+
// v5.7.10: Deserialize connections Map from JSON storage format
|
|
2428
|
+
const hnswVerb = this.deserializeVerb(rawVerb);
|
|
2429
|
+
// Filter by verb type
|
|
2430
|
+
if (hnswVerb.verb !== verbType)
|
|
2431
|
+
continue;
|
|
2432
|
+
// Load metadata separately (optional in v4.0.0!)
|
|
2433
|
+
const metadata = await this.getVerbMetadata(hnswVerb.id);
|
|
2434
|
+
// v4.8.0: Extract standard fields from metadata to top-level
|
|
2435
|
+
const metadataObj = (metadata || {});
|
|
2436
|
+
const { createdAt, updatedAt, confidence, weight, service, data, createdBy, ...customMetadata } = metadataObj;
|
|
2437
|
+
const verbWithMetadata = {
|
|
2438
|
+
id: hnswVerb.id,
|
|
2439
|
+
vector: [...hnswVerb.vector],
|
|
2440
|
+
connections: hnswVerb.connections, // v5.7.10: Already deserialized
|
|
2441
|
+
verb: hnswVerb.verb,
|
|
2442
|
+
sourceId: hnswVerb.sourceId,
|
|
2443
|
+
targetId: hnswVerb.targetId,
|
|
2444
|
+
createdAt: createdAt || Date.now(),
|
|
2445
|
+
updatedAt: updatedAt || Date.now(),
|
|
2446
|
+
confidence: confidence,
|
|
2447
|
+
weight: weight,
|
|
2448
|
+
service: service,
|
|
2449
|
+
data: data,
|
|
2450
|
+
createdBy,
|
|
2451
|
+
metadata: customMetadata
|
|
2452
|
+
};
|
|
2453
|
+
verbs.push(verbWithMetadata);
|
|
2454
|
+
}
|
|
2455
|
+
catch (error) {
|
|
2456
|
+
// Skip verbs that fail to load
|
|
2457
|
+
}
|
|
2458
|
+
}
|
|
2135
2459
|
}
|
|
2136
2460
|
catch (error) {
|
|
2137
|
-
|
|
2461
|
+
// Skip shards that have no data
|
|
2138
2462
|
}
|
|
2139
2463
|
}
|
|
2140
2464
|
return verbs;
|
|
2141
2465
|
}
|
|
2142
2466
|
/**
|
|
2143
|
-
* Delete a verb from storage (
|
|
2467
|
+
* Delete a verb from storage (v6.0.0: ID-first, O(1) delete)
|
|
2144
2468
|
*/
|
|
2145
2469
|
async deleteVerb_internal(id) {
|
|
2146
|
-
//
|
|
2147
|
-
const
|
|
2148
|
-
|
|
2149
|
-
|
|
2150
|
-
|
|
2151
|
-
await this.deleteObjectFromBranch(path);
|
|
2152
|
-
const typeIndex = TypeUtils.getVerbIndex(cachedType);
|
|
2153
|
-
if (this.verbCountsByType[typeIndex] > 0) {
|
|
2154
|
-
this.verbCountsByType[typeIndex]--;
|
|
2155
|
-
}
|
|
2156
|
-
this.verbTypeCache.delete(id);
|
|
2157
|
-
return;
|
|
2158
|
-
}
|
|
2159
|
-
// Search across all types
|
|
2160
|
-
for (let i = 0; i < VERB_TYPE_COUNT; i++) {
|
|
2161
|
-
const type = TypeUtils.getVerbFromIndex(i);
|
|
2162
|
-
const path = getVerbVectorPath(type, id);
|
|
2163
|
-
try {
|
|
2164
|
-
// COW-aware delete (v5.0.1): Use COW helper for branch isolation
|
|
2165
|
-
await this.deleteObjectFromBranch(path);
|
|
2166
|
-
if (this.verbCountsByType[i] > 0) {
|
|
2167
|
-
this.verbCountsByType[i]--;
|
|
2168
|
-
}
|
|
2169
|
-
this.verbTypeCache.delete(id);
|
|
2170
|
-
return;
|
|
2171
|
-
}
|
|
2172
|
-
catch (error) {
|
|
2173
|
-
// Continue
|
|
2174
|
-
}
|
|
2175
|
-
}
|
|
2470
|
+
// v6.0.0: Direct O(1) delete with ID-first path
|
|
2471
|
+
const path = getVerbVectorPath(id);
|
|
2472
|
+
await this.deleteObjectFromBranch(path);
|
|
2473
|
+
// Note: Type-specific counts will be decremented via metadata tracking
|
|
2474
|
+
// The real type is in metadata, accessible if needed via getVerbMetadata(id)
|
|
2176
2475
|
}
|
|
2177
2476
|
/**
|
|
2178
2477
|
* Helper method to convert a Map to a plain object for serialization
|