@soulcraft/brainy 3.8.3 → 3.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/brainy.d.ts +27 -0
- package/dist/brainy.js +231 -10
- package/dist/coreTypes.d.ts +10 -0
- package/dist/hnsw/hnswIndex.d.ts +2 -0
- package/dist/hnsw/hnswIndex.js +10 -0
- package/dist/neural/improvedNeuralAPI.d.ts +14 -1
- package/dist/neural/improvedNeuralAPI.js +59 -20
- package/dist/neural/naturalLanguageProcessorStatic.d.ts +1 -0
- package/dist/neural/naturalLanguageProcessorStatic.js +3 -2
- package/dist/storage/adapters/baseStorageAdapter.d.ts +59 -0
- package/dist/storage/adapters/baseStorageAdapter.js +137 -0
- package/dist/storage/adapters/fileSystemStorage.d.ts +41 -0
- package/dist/storage/adapters/fileSystemStorage.js +227 -19
- package/dist/storage/adapters/memoryStorage.d.ts +8 -0
- package/dist/storage/adapters/memoryStorage.js +48 -1
- package/dist/storage/adapters/opfsStorage.d.ts +12 -0
- package/dist/storage/adapters/opfsStorage.js +68 -0
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +34 -0
- package/dist/storage/adapters/s3CompatibleStorage.js +129 -3
- package/dist/storage/baseStorage.js +4 -3
- package/dist/storage/readOnlyOptimizations.d.ts +0 -9
- package/dist/storage/readOnlyOptimizations.js +6 -28
- package/dist/types/brainy.types.d.ts +15 -0
- package/dist/utils/metadataIndex.d.ts +5 -0
- package/dist/utils/metadataIndex.js +24 -0
- package/dist/utils/mutex.d.ts +53 -0
- package/dist/utils/mutex.js +221 -0
- package/dist/utils/paramValidation.js +20 -4
- package/package.json +1 -1
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
* Provides common functionality for all storage adapters, including statistics tracking
|
|
4
4
|
*/
|
|
5
5
|
import { extractFieldNamesFromJson, mapToStandardField } from '../../utils/fieldNameTracking.js';
|
|
6
|
+
import { getGlobalMutex } from '../../utils/mutex.js';
|
|
6
7
|
/**
|
|
7
8
|
* Base class for storage adapters that implements statistics tracking
|
|
8
9
|
*/
|
|
@@ -37,6 +38,16 @@ export class BaseStorageAdapter {
|
|
|
37
38
|
this.totalDelayMs = 0;
|
|
38
39
|
// Service-level throttling
|
|
39
40
|
this.serviceThrottling = new Map();
|
|
41
|
+
// =============================================
|
|
42
|
+
// Universal O(1) Count Management
|
|
43
|
+
// =============================================
|
|
44
|
+
// Universal count tracking - O(1) operations
|
|
45
|
+
this.totalNounCount = 0;
|
|
46
|
+
this.totalVerbCount = 0;
|
|
47
|
+
this.entityCounts = new Map(); // type -> count
|
|
48
|
+
this.verbCounts = new Map(); // verb type -> count
|
|
49
|
+
this.countCache = new Map();
|
|
50
|
+
this.COUNT_CACHE_TTL = 60000; // 1 minute cache TTL
|
|
40
51
|
}
|
|
41
52
|
/**
|
|
42
53
|
* Save statistics data
|
|
@@ -609,5 +620,131 @@ export class BaseStorageAdapter {
|
|
|
609
620
|
}
|
|
610
621
|
return stats;
|
|
611
622
|
}
|
|
623
|
+
/**
|
|
624
|
+
* Get total noun count - O(1) operation
|
|
625
|
+
* @returns Promise that resolves to the total number of nouns
|
|
626
|
+
*/
|
|
627
|
+
async getNounCount() {
|
|
628
|
+
return this.totalNounCount;
|
|
629
|
+
}
|
|
630
|
+
/**
|
|
631
|
+
* Get total verb count - O(1) operation
|
|
632
|
+
* @returns Promise that resolves to the total number of verbs
|
|
633
|
+
*/
|
|
634
|
+
async getVerbCount() {
|
|
635
|
+
return this.totalVerbCount;
|
|
636
|
+
}
|
|
637
|
+
/**
|
|
638
|
+
* Increment count for entity type - O(1) operation
|
|
639
|
+
* Protected by storage-specific mechanisms (mutex, distributed consensus, etc.)
|
|
640
|
+
* @param type The entity type
|
|
641
|
+
*/
|
|
642
|
+
incrementEntityCount(type) {
|
|
643
|
+
// For distributed scenarios, this is aggregated across shards
|
|
644
|
+
// For single-node, this is protected by storage-specific locking
|
|
645
|
+
this.entityCounts.set(type, (this.entityCounts.get(type) || 0) + 1);
|
|
646
|
+
this.totalNounCount++;
|
|
647
|
+
// Update cache
|
|
648
|
+
this.countCache.set('nouns_count', {
|
|
649
|
+
count: this.totalNounCount,
|
|
650
|
+
timestamp: Date.now()
|
|
651
|
+
});
|
|
652
|
+
}
|
|
653
|
+
/**
|
|
654
|
+
* Thread-safe increment for concurrent scenarios
|
|
655
|
+
* Uses mutex for single-node, distributed consensus for multi-node
|
|
656
|
+
*/
|
|
657
|
+
async incrementEntityCountSafe(type) {
|
|
658
|
+
// Single-node mutex protection (distributed mode handled by coordinator)
|
|
659
|
+
const mutex = getGlobalMutex();
|
|
660
|
+
await mutex.runExclusive(`count-entity-${type}`, async () => {
|
|
661
|
+
this.incrementEntityCount(type);
|
|
662
|
+
// Persist counts periodically
|
|
663
|
+
if (this.totalNounCount % 10 === 0) {
|
|
664
|
+
await this.persistCounts();
|
|
665
|
+
}
|
|
666
|
+
});
|
|
667
|
+
}
|
|
668
|
+
/**
|
|
669
|
+
* Decrement count for entity type - O(1) operation
|
|
670
|
+
* @param type The entity type
|
|
671
|
+
*/
|
|
672
|
+
decrementEntityCount(type) {
|
|
673
|
+
const current = this.entityCounts.get(type) || 0;
|
|
674
|
+
if (current > 1) {
|
|
675
|
+
this.entityCounts.set(type, current - 1);
|
|
676
|
+
}
|
|
677
|
+
else {
|
|
678
|
+
this.entityCounts.delete(type);
|
|
679
|
+
}
|
|
680
|
+
if (this.totalNounCount > 0) {
|
|
681
|
+
this.totalNounCount--;
|
|
682
|
+
}
|
|
683
|
+
// Update cache
|
|
684
|
+
this.countCache.set('nouns_count', {
|
|
685
|
+
count: this.totalNounCount,
|
|
686
|
+
timestamp: Date.now()
|
|
687
|
+
});
|
|
688
|
+
}
|
|
689
|
+
/**
|
|
690
|
+
* Thread-safe decrement for concurrent scenarios
|
|
691
|
+
*/
|
|
692
|
+
async decrementEntityCountSafe(type) {
|
|
693
|
+
const mutex = getGlobalMutex();
|
|
694
|
+
await mutex.runExclusive(`count-entity-${type}`, async () => {
|
|
695
|
+
this.decrementEntityCount(type);
|
|
696
|
+
if (this.totalNounCount % 10 === 0) {
|
|
697
|
+
await this.persistCounts();
|
|
698
|
+
}
|
|
699
|
+
});
|
|
700
|
+
}
|
|
701
|
+
/**
|
|
702
|
+
* Increment verb count - O(1) operation with mutex protection
|
|
703
|
+
* @param type The verb type
|
|
704
|
+
*/
|
|
705
|
+
async incrementVerbCount(type) {
|
|
706
|
+
const mutex = getGlobalMutex();
|
|
707
|
+
await mutex.runExclusive(`count-verb-${type}`, async () => {
|
|
708
|
+
this.verbCounts.set(type, (this.verbCounts.get(type) || 0) + 1);
|
|
709
|
+
this.totalVerbCount++;
|
|
710
|
+
// Update cache
|
|
711
|
+
this.countCache.set('verbs_count', {
|
|
712
|
+
count: this.totalVerbCount,
|
|
713
|
+
timestamp: Date.now()
|
|
714
|
+
});
|
|
715
|
+
// Persist counts immediately for consistency
|
|
716
|
+
if (this.totalVerbCount % 10 === 0) {
|
|
717
|
+
await this.persistCounts();
|
|
718
|
+
}
|
|
719
|
+
});
|
|
720
|
+
}
|
|
721
|
+
/**
|
|
722
|
+
* Decrement verb count - O(1) operation with mutex protection
|
|
723
|
+
* @param type The verb type
|
|
724
|
+
*/
|
|
725
|
+
async decrementVerbCount(type) {
|
|
726
|
+
const mutex = getGlobalMutex();
|
|
727
|
+
await mutex.runExclusive(`count-verb-${type}`, async () => {
|
|
728
|
+
const current = this.verbCounts.get(type) || 0;
|
|
729
|
+
if (current > 1) {
|
|
730
|
+
this.verbCounts.set(type, current - 1);
|
|
731
|
+
}
|
|
732
|
+
else {
|
|
733
|
+
this.verbCounts.delete(type);
|
|
734
|
+
}
|
|
735
|
+
if (this.totalVerbCount > 0) {
|
|
736
|
+
this.totalVerbCount--;
|
|
737
|
+
}
|
|
738
|
+
// Update cache
|
|
739
|
+
this.countCache.set('verbs_count', {
|
|
740
|
+
count: this.totalVerbCount,
|
|
741
|
+
timestamp: Date.now()
|
|
742
|
+
});
|
|
743
|
+
// Persist counts immediately for consistency
|
|
744
|
+
if (this.totalVerbCount % 10 === 0) {
|
|
745
|
+
await this.persistCounts();
|
|
746
|
+
}
|
|
747
|
+
});
|
|
748
|
+
}
|
|
612
749
|
}
|
|
613
750
|
//# sourceMappingURL=baseStorageAdapter.js.map
|
|
@@ -11,6 +11,10 @@ type Edge = HNSWVerb;
|
|
|
11
11
|
* Uses the file system to store data in the specified directory structure
|
|
12
12
|
*/
|
|
13
13
|
export declare class FileSystemStorage extends BaseStorage {
|
|
14
|
+
private countsFilePath?;
|
|
15
|
+
private readonly shardingDepth;
|
|
16
|
+
private readonly SHARDING_THRESHOLD;
|
|
17
|
+
private cachedShardingDepth?;
|
|
14
18
|
private rootDir;
|
|
15
19
|
private nounsDir;
|
|
16
20
|
private verbsDir;
|
|
@@ -22,6 +26,8 @@ export declare class FileSystemStorage extends BaseStorage {
|
|
|
22
26
|
private lockDir;
|
|
23
27
|
private useDualWrite;
|
|
24
28
|
private activeLocks;
|
|
29
|
+
private lockTimers;
|
|
30
|
+
private allTimers;
|
|
25
31
|
/**
|
|
26
32
|
* Initialize the storage adapter
|
|
27
33
|
* @param rootDirectory The root directory for storage
|
|
@@ -251,5 +257,40 @@ export declare class FileSystemStorage extends BaseStorage {
|
|
|
251
257
|
* Merge statistics from multiple sources
|
|
252
258
|
*/
|
|
253
259
|
private mergeStatistics;
|
|
260
|
+
/**
|
|
261
|
+
* Initialize counts from filesystem storage
|
|
262
|
+
*/
|
|
263
|
+
protected initializeCounts(): Promise<void>;
|
|
264
|
+
/**
|
|
265
|
+
* Initialize counts by scanning disk (only done once)
|
|
266
|
+
*/
|
|
267
|
+
private initializeCountsFromDisk;
|
|
268
|
+
/**
|
|
269
|
+
* Persist counts to filesystem storage
|
|
270
|
+
*/
|
|
271
|
+
protected persistCounts(): Promise<void>;
|
|
272
|
+
/**
|
|
273
|
+
* Determine optimal sharding depth based on dataset size
|
|
274
|
+
* This is called once during initialization for consistent behavior
|
|
275
|
+
*/
|
|
276
|
+
private getOptimalShardingDepth;
|
|
277
|
+
/**
|
|
278
|
+
* Get the path for a node with consistent sharding strategy
|
|
279
|
+
* Clean, predictable path generation
|
|
280
|
+
*/
|
|
281
|
+
private getNodePath;
|
|
282
|
+
/**
|
|
283
|
+
* Get the path for a verb with consistent sharding strategy
|
|
284
|
+
*/
|
|
285
|
+
private getVerbPath;
|
|
286
|
+
/**
|
|
287
|
+
* Universal sharded path generator
|
|
288
|
+
* Consistent across all entity types
|
|
289
|
+
*/
|
|
290
|
+
private getShardedPath;
|
|
291
|
+
/**
|
|
292
|
+
* Check if a file exists (handles both sharded and non-sharded)
|
|
293
|
+
*/
|
|
294
|
+
private fileExists;
|
|
254
295
|
}
|
|
255
296
|
export {};
|
|
@@ -37,8 +37,13 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
37
37
|
*/
|
|
38
38
|
constructor(rootDirectory) {
|
|
39
39
|
super();
|
|
40
|
+
// Intelligent sharding configuration
|
|
41
|
+
this.shardingDepth = 2; // 0=flat, 1=ab/, 2=ab/cd/
|
|
42
|
+
this.SHARDING_THRESHOLD = 1000; // Enable deep sharding at 1k files
|
|
40
43
|
this.useDualWrite = true; // Write to both locations during migration
|
|
41
44
|
this.activeLocks = new Set();
|
|
45
|
+
this.lockTimers = new Map(); // Track timers for cleanup
|
|
46
|
+
this.allTimers = new Set(); // Track all timers for cleanup
|
|
42
47
|
this.rootDir = rootDirectory;
|
|
43
48
|
// Defer path operations until init() when path module is guaranteed to be loaded
|
|
44
49
|
}
|
|
@@ -92,6 +97,14 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
92
97
|
}
|
|
93
98
|
// Create the locks directory if it doesn't exist
|
|
94
99
|
await this.ensureDirectoryExists(this.lockDir);
|
|
100
|
+
// Initialize count management
|
|
101
|
+
this.countsFilePath = path.join(this.systemDir, 'counts.json');
|
|
102
|
+
await this.initializeCounts();
|
|
103
|
+
// Cache sharding depth for consistency during this session
|
|
104
|
+
this.cachedShardingDepth = this.getOptimalShardingDepth();
|
|
105
|
+
// Log sharding strategy for transparency
|
|
106
|
+
const strategy = this.cachedShardingDepth === 0 ? 'flat' : this.cachedShardingDepth === 1 ? 'single-level' : 'deep';
|
|
107
|
+
console.log(`📁 Using ${strategy} sharding for optimal performance (${this.totalNounCount} items)`);
|
|
95
108
|
this.isInitialized = true;
|
|
96
109
|
}
|
|
97
110
|
catch (error) {
|
|
@@ -130,20 +143,33 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
130
143
|
*/
|
|
131
144
|
async saveNode(node) {
|
|
132
145
|
await this.ensureInitialized();
|
|
146
|
+
// Check if this is a new node to update counts
|
|
147
|
+
const isNew = !(await this.fileExists(this.getNodePath(node.id)));
|
|
133
148
|
// Convert connections Map to a serializable format
|
|
134
149
|
const serializableNode = {
|
|
135
150
|
...node,
|
|
136
151
|
connections: this.mapToObject(node.connections, (set) => Array.from(set))
|
|
137
152
|
};
|
|
138
|
-
const filePath =
|
|
153
|
+
const filePath = this.getNodePath(node.id);
|
|
154
|
+
await this.ensureDirectoryExists(path.dirname(filePath));
|
|
139
155
|
await fs.promises.writeFile(filePath, JSON.stringify(serializableNode, null, 2));
|
|
156
|
+
// Update counts for new nodes (intelligent type detection)
|
|
157
|
+
if (isNew) {
|
|
158
|
+
const type = node.metadata?.type || node.metadata?.nounType || 'default';
|
|
159
|
+
this.incrementEntityCount(type);
|
|
160
|
+
// Persist counts periodically (every 10 operations for efficiency)
|
|
161
|
+
if (this.totalNounCount % 10 === 0) {
|
|
162
|
+
await this.persistCounts();
|
|
163
|
+
}
|
|
164
|
+
}
|
|
140
165
|
}
|
|
141
166
|
/**
|
|
142
167
|
* Get a node from storage
|
|
143
168
|
*/
|
|
144
169
|
async getNode(id) {
|
|
145
170
|
await this.ensureInitialized();
|
|
146
|
-
|
|
171
|
+
// Clean, predictable path - no backward compatibility needed
|
|
172
|
+
const filePath = this.getNodePath(id);
|
|
147
173
|
try {
|
|
148
174
|
const data = await fs.promises.readFile(filePath, 'utf-8');
|
|
149
175
|
const parsedNode = JSON.parse(data);
|
|
@@ -246,9 +272,24 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
246
272
|
*/
|
|
247
273
|
async deleteNode(id) {
|
|
248
274
|
await this.ensureInitialized();
|
|
249
|
-
const filePath =
|
|
275
|
+
const filePath = this.getNodePath(id);
|
|
276
|
+
// Load node to get type for count update
|
|
277
|
+
try {
|
|
278
|
+
const node = await this.getNode(id);
|
|
279
|
+
if (node) {
|
|
280
|
+
const type = node.metadata?.type || node.metadata?.nounType || 'default';
|
|
281
|
+
this.decrementEntityCount(type);
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
catch {
|
|
285
|
+
// Node might not exist, that's ok
|
|
286
|
+
}
|
|
250
287
|
try {
|
|
251
288
|
await fs.promises.unlink(filePath);
|
|
289
|
+
// Persist counts periodically
|
|
290
|
+
if (this.totalNounCount % 10 === 0) {
|
|
291
|
+
await this.persistCounts();
|
|
292
|
+
}
|
|
252
293
|
}
|
|
253
294
|
catch (error) {
|
|
254
295
|
if (error.code !== 'ENOENT') {
|
|
@@ -267,7 +308,8 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
267
308
|
...edge,
|
|
268
309
|
connections: this.mapToObject(edge.connections, (set) => Array.from(set))
|
|
269
310
|
};
|
|
270
|
-
const filePath =
|
|
311
|
+
const filePath = this.getVerbPath(edge.id);
|
|
312
|
+
await this.ensureDirectoryExists(path.dirname(filePath));
|
|
271
313
|
await fs.promises.writeFile(filePath, JSON.stringify(serializableEdge, null, 2));
|
|
272
314
|
}
|
|
273
315
|
/**
|
|
@@ -275,7 +317,7 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
275
317
|
*/
|
|
276
318
|
async getEdge(id) {
|
|
277
319
|
await this.ensureInitialized();
|
|
278
|
-
const filePath =
|
|
320
|
+
const filePath = this.getVerbPath(id);
|
|
279
321
|
try {
|
|
280
322
|
const data = await fs.promises.readFile(filePath, 'utf-8');
|
|
281
323
|
const parsedEdge = JSON.parse(data);
|
|
@@ -494,9 +536,15 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
494
536
|
const nounFiles = files.filter((f) => f.endsWith('.json'));
|
|
495
537
|
// Sort for consistent pagination
|
|
496
538
|
nounFiles.sort();
|
|
497
|
-
// Find starting position
|
|
539
|
+
// Find starting position - prioritize offset for O(1) operation
|
|
498
540
|
let startIndex = 0;
|
|
499
|
-
|
|
541
|
+
const offset = options.offset; // Cast to any since offset might not be in type
|
|
542
|
+
if (offset !== undefined) {
|
|
543
|
+
// Direct offset - O(1) operation
|
|
544
|
+
startIndex = offset;
|
|
545
|
+
}
|
|
546
|
+
else if (cursor) {
|
|
547
|
+
// Cursor-based pagination
|
|
500
548
|
startIndex = nounFiles.findIndex((f) => f.replace('.json', '') > cursor);
|
|
501
549
|
if (startIndex === -1)
|
|
502
550
|
startIndex = nounFiles.length;
|
|
@@ -507,18 +555,10 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
507
555
|
const items = [];
|
|
508
556
|
let successfullyLoaded = 0;
|
|
509
557
|
let totalValidFiles = 0;
|
|
510
|
-
//
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
// Just check if file exists and is readable
|
|
515
|
-
await fs.promises.access(path.join(this.nounsDir, file), fs.constants.R_OK);
|
|
516
|
-
totalValidFiles++;
|
|
517
|
-
}
|
|
518
|
-
catch {
|
|
519
|
-
// File not readable, skip
|
|
520
|
-
}
|
|
521
|
-
}
|
|
558
|
+
// Use persisted counts - O(1) operation!
|
|
559
|
+
totalValidFiles = this.totalNounCount;
|
|
560
|
+
// No need to count files anymore - we maintain accurate counts
|
|
561
|
+
// This eliminates the O(n) operation completely
|
|
522
562
|
// Second pass: load the current page
|
|
523
563
|
for (const file of pageFiles) {
|
|
524
564
|
try {
|
|
@@ -1253,5 +1293,173 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
1253
1293
|
lastUpdated: new Date().toISOString()
|
|
1254
1294
|
};
|
|
1255
1295
|
}
|
|
1296
|
+
// =============================================
|
|
1297
|
+
// Count Management for O(1) Scalability
|
|
1298
|
+
// =============================================
|
|
1299
|
+
/**
|
|
1300
|
+
* Initialize counts from filesystem storage
|
|
1301
|
+
*/
|
|
1302
|
+
async initializeCounts() {
|
|
1303
|
+
if (!this.countsFilePath)
|
|
1304
|
+
return;
|
|
1305
|
+
try {
|
|
1306
|
+
if (await this.fileExists(this.countsFilePath)) {
|
|
1307
|
+
const data = await fs.promises.readFile(this.countsFilePath, 'utf-8');
|
|
1308
|
+
const counts = JSON.parse(data);
|
|
1309
|
+
// Restore entity counts
|
|
1310
|
+
this.entityCounts = new Map(Object.entries(counts.entityCounts || {}));
|
|
1311
|
+
this.verbCounts = new Map(Object.entries(counts.verbCounts || {}));
|
|
1312
|
+
this.totalNounCount = counts.totalNounCount || 0;
|
|
1313
|
+
this.totalVerbCount = counts.totalVerbCount || 0;
|
|
1314
|
+
// Also populate the cache for backward compatibility
|
|
1315
|
+
this.countCache.set('nouns_count', {
|
|
1316
|
+
count: this.totalNounCount,
|
|
1317
|
+
timestamp: Date.now()
|
|
1318
|
+
});
|
|
1319
|
+
this.countCache.set('verbs_count', {
|
|
1320
|
+
count: this.totalVerbCount,
|
|
1321
|
+
timestamp: Date.now()
|
|
1322
|
+
});
|
|
1323
|
+
}
|
|
1324
|
+
else {
|
|
1325
|
+
// If no counts file exists, do one initial count
|
|
1326
|
+
await this.initializeCountsFromDisk();
|
|
1327
|
+
}
|
|
1328
|
+
}
|
|
1329
|
+
catch (error) {
|
|
1330
|
+
console.warn('Could not load persisted counts, will initialize from disk:', error);
|
|
1331
|
+
await this.initializeCountsFromDisk();
|
|
1332
|
+
}
|
|
1333
|
+
}
|
|
1334
|
+
/**
|
|
1335
|
+
* Initialize counts by scanning disk (only done once)
|
|
1336
|
+
*/
|
|
1337
|
+
async initializeCountsFromDisk() {
|
|
1338
|
+
try {
|
|
1339
|
+
// Count nouns
|
|
1340
|
+
const nounFiles = await fs.promises.readdir(this.nounsDir);
|
|
1341
|
+
const validNounFiles = nounFiles.filter((f) => f.endsWith('.json'));
|
|
1342
|
+
this.totalNounCount = validNounFiles.length;
|
|
1343
|
+
// Count verbs
|
|
1344
|
+
const verbFiles = await fs.promises.readdir(this.verbsDir);
|
|
1345
|
+
const validVerbFiles = verbFiles.filter((f) => f.endsWith('.json'));
|
|
1346
|
+
this.totalVerbCount = validVerbFiles.length;
|
|
1347
|
+
// Sample some files to get type distribution (don't read all)
|
|
1348
|
+
const sampleSize = Math.min(100, validNounFiles.length);
|
|
1349
|
+
for (let i = 0; i < sampleSize; i++) {
|
|
1350
|
+
try {
|
|
1351
|
+
const file = validNounFiles[i];
|
|
1352
|
+
const data = await fs.promises.readFile(path.join(this.nounsDir, file), 'utf-8');
|
|
1353
|
+
const noun = JSON.parse(data);
|
|
1354
|
+
const type = noun.metadata?.type || noun.metadata?.nounType || 'default';
|
|
1355
|
+
this.entityCounts.set(type, (this.entityCounts.get(type) || 0) + 1);
|
|
1356
|
+
}
|
|
1357
|
+
catch {
|
|
1358
|
+
// Skip invalid files
|
|
1359
|
+
}
|
|
1360
|
+
}
|
|
1361
|
+
// Extrapolate counts if we sampled
|
|
1362
|
+
if (sampleSize < this.totalNounCount && sampleSize > 0) {
|
|
1363
|
+
const multiplier = this.totalNounCount / sampleSize;
|
|
1364
|
+
for (const [type, count] of this.entityCounts.entries()) {
|
|
1365
|
+
this.entityCounts.set(type, Math.round(count * multiplier));
|
|
1366
|
+
}
|
|
1367
|
+
}
|
|
1368
|
+
await this.persistCounts();
|
|
1369
|
+
}
|
|
1370
|
+
catch (error) {
|
|
1371
|
+
console.error('Error initializing counts from disk:', error);
|
|
1372
|
+
}
|
|
1373
|
+
}
|
|
1374
|
+
/**
|
|
1375
|
+
* Persist counts to filesystem storage
|
|
1376
|
+
*/
|
|
1377
|
+
async persistCounts() {
|
|
1378
|
+
if (!this.countsFilePath)
|
|
1379
|
+
return;
|
|
1380
|
+
try {
|
|
1381
|
+
const counts = {
|
|
1382
|
+
entityCounts: Object.fromEntries(this.entityCounts),
|
|
1383
|
+
verbCounts: Object.fromEntries(this.verbCounts),
|
|
1384
|
+
totalNounCount: this.totalNounCount,
|
|
1385
|
+
totalVerbCount: this.totalVerbCount,
|
|
1386
|
+
lastUpdated: new Date().toISOString()
|
|
1387
|
+
};
|
|
1388
|
+
await fs.promises.writeFile(this.countsFilePath, JSON.stringify(counts, null, 2));
|
|
1389
|
+
}
|
|
1390
|
+
catch (error) {
|
|
1391
|
+
console.error('Error persisting counts:', error);
|
|
1392
|
+
}
|
|
1393
|
+
}
|
|
1394
|
+
// =============================================
|
|
1395
|
+
// Intelligent Directory Sharding
|
|
1396
|
+
// =============================================
|
|
1397
|
+
/**
|
|
1398
|
+
* Determine optimal sharding depth based on dataset size
|
|
1399
|
+
* This is called once during initialization for consistent behavior
|
|
1400
|
+
*/
|
|
1401
|
+
getOptimalShardingDepth() {
|
|
1402
|
+
// For new installations, use intelligent defaults
|
|
1403
|
+
if (this.totalNounCount === 0 && this.totalVerbCount === 0) {
|
|
1404
|
+
return 1; // Default to single-level sharding for new installs
|
|
1405
|
+
}
|
|
1406
|
+
const maxCount = Math.max(this.totalNounCount, this.totalVerbCount);
|
|
1407
|
+
if (maxCount >= this.SHARDING_THRESHOLD) {
|
|
1408
|
+
return 2; // Deep sharding for large datasets
|
|
1409
|
+
}
|
|
1410
|
+
else if (maxCount >= 100) {
|
|
1411
|
+
return 1; // Single-level sharding for medium datasets
|
|
1412
|
+
}
|
|
1413
|
+
else {
|
|
1414
|
+
return 1; // Always use at least single-level sharding for consistency
|
|
1415
|
+
}
|
|
1416
|
+
}
|
|
1417
|
+
/**
|
|
1418
|
+
* Get the path for a node with consistent sharding strategy
|
|
1419
|
+
* Clean, predictable path generation
|
|
1420
|
+
*/
|
|
1421
|
+
getNodePath(id) {
|
|
1422
|
+
return this.getShardedPath(this.nounsDir, id);
|
|
1423
|
+
}
|
|
1424
|
+
/**
|
|
1425
|
+
* Get the path for a verb with consistent sharding strategy
|
|
1426
|
+
*/
|
|
1427
|
+
getVerbPath(id) {
|
|
1428
|
+
return this.getShardedPath(this.verbsDir, id);
|
|
1429
|
+
}
|
|
1430
|
+
/**
|
|
1431
|
+
* Universal sharded path generator
|
|
1432
|
+
* Consistent across all entity types
|
|
1433
|
+
*/
|
|
1434
|
+
getShardedPath(baseDir, id) {
|
|
1435
|
+
const depth = this.cachedShardingDepth ?? this.getOptimalShardingDepth();
|
|
1436
|
+
switch (depth) {
|
|
1437
|
+
case 0:
|
|
1438
|
+
// Flat structure: /nouns/uuid.json
|
|
1439
|
+
return path.join(baseDir, `${id}.json`);
|
|
1440
|
+
case 1:
|
|
1441
|
+
// Single-level sharding: /nouns/ab/uuid.json
|
|
1442
|
+
const shard1 = id.substring(0, 2);
|
|
1443
|
+
return path.join(baseDir, shard1, `${id}.json`);
|
|
1444
|
+
case 2:
|
|
1445
|
+
default:
|
|
1446
|
+
// Deep sharding: /nouns/ab/cd/uuid.json
|
|
1447
|
+
const shard1Deep = id.substring(0, 2);
|
|
1448
|
+
const shard2Deep = id.substring(2, 4);
|
|
1449
|
+
return path.join(baseDir, shard1Deep, shard2Deep, `${id}.json`);
|
|
1450
|
+
}
|
|
1451
|
+
}
|
|
1452
|
+
/**
|
|
1453
|
+
* Check if a file exists (handles both sharded and non-sharded)
|
|
1454
|
+
*/
|
|
1455
|
+
async fileExists(filePath) {
|
|
1456
|
+
try {
|
|
1457
|
+
await fs.promises.access(filePath, fs.constants.F_OK);
|
|
1458
|
+
return true;
|
|
1459
|
+
}
|
|
1460
|
+
catch {
|
|
1461
|
+
return false;
|
|
1462
|
+
}
|
|
1463
|
+
}
|
|
1256
1464
|
}
|
|
1257
1465
|
//# sourceMappingURL=fileSystemStorage.js.map
|
|
@@ -169,4 +169,12 @@ export declare class MemoryStorage extends BaseStorage {
|
|
|
169
169
|
* @returns Promise that resolves to the statistics data or null if not found
|
|
170
170
|
*/
|
|
171
171
|
protected getStatisticsData(): Promise<StatisticsData | null>;
|
|
172
|
+
/**
|
|
173
|
+
* Initialize counts from in-memory storage - O(1) operation
|
|
174
|
+
*/
|
|
175
|
+
protected initializeCounts(): Promise<void>;
|
|
176
|
+
/**
|
|
177
|
+
* Persist counts to storage - no-op for memory storage
|
|
178
|
+
*/
|
|
179
|
+
protected persistCounts(): Promise<void>;
|
|
172
180
|
}
|
|
@@ -30,6 +30,7 @@ export class MemoryStorage extends BaseStorage {
|
|
|
30
30
|
* Save a noun to storage
|
|
31
31
|
*/
|
|
32
32
|
async saveNoun_internal(noun) {
|
|
33
|
+
const isNew = !this.nouns.has(noun.id);
|
|
33
34
|
// Create a deep copy to avoid reference issues
|
|
34
35
|
const nounCopy = {
|
|
35
36
|
id: noun.id,
|
|
@@ -44,6 +45,11 @@ export class MemoryStorage extends BaseStorage {
|
|
|
44
45
|
}
|
|
45
46
|
// Save the noun directly in the nouns map
|
|
46
47
|
this.nouns.set(noun.id, nounCopy);
|
|
48
|
+
// Update counts for new entities
|
|
49
|
+
if (isNew) {
|
|
50
|
+
const type = noun.metadata?.type || noun.metadata?.nounType || 'default';
|
|
51
|
+
this.incrementEntityCount(type);
|
|
52
|
+
}
|
|
47
53
|
}
|
|
48
54
|
/**
|
|
49
55
|
* Get a noun from storage
|
|
@@ -190,12 +196,18 @@ export class MemoryStorage extends BaseStorage {
|
|
|
190
196
|
* Delete a noun from storage
|
|
191
197
|
*/
|
|
192
198
|
async deleteNoun_internal(id) {
|
|
199
|
+
const noun = this.nouns.get(id);
|
|
200
|
+
if (noun) {
|
|
201
|
+
const type = noun.metadata?.type || noun.metadata?.nounType || 'default';
|
|
202
|
+
this.decrementEntityCount(type);
|
|
203
|
+
}
|
|
193
204
|
this.nouns.delete(id);
|
|
194
205
|
}
|
|
195
206
|
/**
|
|
196
207
|
* Save a verb to storage
|
|
197
208
|
*/
|
|
198
209
|
async saveVerb_internal(verb) {
|
|
210
|
+
const isNew = !this.verbs.has(verb.id);
|
|
199
211
|
// Create a deep copy to avoid reference issues
|
|
200
212
|
const verbCopy = {
|
|
201
213
|
id: verb.id,
|
|
@@ -208,6 +220,8 @@ export class MemoryStorage extends BaseStorage {
|
|
|
208
220
|
}
|
|
209
221
|
// Save the verb directly in the verbs map
|
|
210
222
|
this.verbs.set(verb.id, verbCopy);
|
|
223
|
+
// Count tracking will be handled in saveVerbMetadata_internal
|
|
224
|
+
// since HNSWVerb doesn't contain type information
|
|
211
225
|
}
|
|
212
226
|
/**
|
|
213
227
|
* Get a verb from storage
|
|
@@ -393,7 +407,8 @@ export class MemoryStorage extends BaseStorage {
|
|
|
393
407
|
* Delete a verb from storage
|
|
394
408
|
*/
|
|
395
409
|
async deleteVerb_internal(id) {
|
|
396
|
-
//
|
|
410
|
+
// Count tracking will be handled when verb metadata is deleted
|
|
411
|
+
// since HNSWVerb doesn't contain type information
|
|
397
412
|
this.verbs.delete(id);
|
|
398
413
|
}
|
|
399
414
|
/**
|
|
@@ -448,7 +463,13 @@ export class MemoryStorage extends BaseStorage {
|
|
|
448
463
|
* Save verb metadata to storage (internal implementation)
|
|
449
464
|
*/
|
|
450
465
|
async saveVerbMetadata_internal(id, metadata) {
|
|
466
|
+
const isNew = !this.verbMetadata.has(id);
|
|
451
467
|
this.verbMetadata.set(id, JSON.parse(JSON.stringify(metadata)));
|
|
468
|
+
// Update counts for new verbs
|
|
469
|
+
if (isNew) {
|
|
470
|
+
const type = metadata?.verb || metadata?.type || 'default';
|
|
471
|
+
this.incrementVerbCount(type);
|
|
472
|
+
}
|
|
452
473
|
}
|
|
453
474
|
/**
|
|
454
475
|
* Get verb metadata from storage
|
|
@@ -549,5 +570,31 @@ export class MemoryStorage extends BaseStorage {
|
|
|
549
570
|
// Since this is in-memory, there's no need for fallback mechanisms
|
|
550
571
|
// to check multiple storage locations
|
|
551
572
|
}
|
|
573
|
+
/**
|
|
574
|
+
* Initialize counts from in-memory storage - O(1) operation
|
|
575
|
+
*/
|
|
576
|
+
async initializeCounts() {
|
|
577
|
+
// For memory storage, initialize counts from current in-memory state
|
|
578
|
+
this.totalNounCount = this.nouns.size;
|
|
579
|
+
this.totalVerbCount = this.verbMetadata.size;
|
|
580
|
+
// Initialize type-based counts by scanning current data
|
|
581
|
+
this.entityCounts.clear();
|
|
582
|
+
this.verbCounts.clear();
|
|
583
|
+
for (const noun of this.nouns.values()) {
|
|
584
|
+
const type = noun.metadata?.type || noun.metadata?.nounType || 'default';
|
|
585
|
+
this.entityCounts.set(type, (this.entityCounts.get(type) || 0) + 1);
|
|
586
|
+
}
|
|
587
|
+
for (const verbMetadata of this.verbMetadata.values()) {
|
|
588
|
+
const type = verbMetadata?.verb || verbMetadata?.type || 'default';
|
|
589
|
+
this.verbCounts.set(type, (this.verbCounts.get(type) || 0) + 1);
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
/**
|
|
593
|
+
* Persist counts to storage - no-op for memory storage
|
|
594
|
+
*/
|
|
595
|
+
async persistCounts() {
|
|
596
|
+
// No persistence needed for in-memory storage
|
|
597
|
+
// Counts are always accurate from the live data structures
|
|
598
|
+
}
|
|
552
599
|
}
|
|
553
600
|
//# sourceMappingURL=memoryStorage.js.map
|
|
@@ -254,5 +254,17 @@ export declare class OPFSStorage extends BaseStorage {
|
|
|
254
254
|
hasMore: boolean;
|
|
255
255
|
nextCursor?: string;
|
|
256
256
|
}>;
|
|
257
|
+
/**
|
|
258
|
+
* Initialize counts from OPFS storage
|
|
259
|
+
*/
|
|
260
|
+
protected initializeCounts(): Promise<void>;
|
|
261
|
+
/**
|
|
262
|
+
* Initialize counts by scanning OPFS (fallback for missing counts file)
|
|
263
|
+
*/
|
|
264
|
+
private initializeCountsFromScan;
|
|
265
|
+
/**
|
|
266
|
+
* Persist counts to OPFS storage
|
|
267
|
+
*/
|
|
268
|
+
protected persistCounts(): Promise<void>;
|
|
257
269
|
}
|
|
258
270
|
export {};
|