@soulcraft/brainy 3.8.3 → 3.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -28
- package/dist/brainy.d.ts +27 -0
- package/dist/brainy.js +231 -10
- package/dist/coreTypes.d.ts +10 -0
- package/dist/hnsw/hnswIndex.d.ts +2 -0
- package/dist/hnsw/hnswIndex.js +10 -0
- package/dist/neural/improvedNeuralAPI.d.ts +14 -1
- package/dist/neural/improvedNeuralAPI.js +59 -20
- package/dist/neural/naturalLanguageProcessorStatic.d.ts +1 -0
- package/dist/neural/naturalLanguageProcessorStatic.js +3 -2
- package/dist/storage/adapters/baseStorageAdapter.d.ts +59 -0
- package/dist/storage/adapters/baseStorageAdapter.js +137 -0
- package/dist/storage/adapters/fileSystemStorage.d.ts +41 -0
- package/dist/storage/adapters/fileSystemStorage.js +227 -19
- package/dist/storage/adapters/memoryStorage.d.ts +8 -0
- package/dist/storage/adapters/memoryStorage.js +48 -1
- package/dist/storage/adapters/opfsStorage.d.ts +12 -0
- package/dist/storage/adapters/opfsStorage.js +68 -0
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +34 -0
- package/dist/storage/adapters/s3CompatibleStorage.js +129 -3
- package/dist/storage/baseStorage.js +4 -3
- package/dist/storage/readOnlyOptimizations.d.ts +0 -9
- package/dist/storage/readOnlyOptimizations.js +6 -28
- package/dist/types/brainy.types.d.ts +15 -0
- package/dist/utils/metadataIndex.d.ts +5 -0
- package/dist/utils/metadataIndex.js +24 -0
- package/dist/utils/mutex.d.ts +53 -0
- package/dist/utils/mutex.js +221 -0
- package/dist/utils/paramValidation.js +20 -4
- package/package.json +1 -1
|
@@ -80,8 +80,8 @@ export class ImprovedNeuralAPI {
|
|
|
80
80
|
catch (error) {
|
|
81
81
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
82
82
|
throw new SimilarityError(`Failed to calculate similarity: ${errorMessage}`, {
|
|
83
|
-
inputA: typeof a === '
|
|
84
|
-
inputB: typeof b === '
|
|
83
|
+
inputA: Array.isArray(a) ? 'vector' : typeof a === 'string' ? a.substring(0, 50) : 'unknown',
|
|
84
|
+
inputB: Array.isArray(b) ? 'vector' : typeof b === 'string' ? b.substring(0, 50) : 'unknown',
|
|
85
85
|
options
|
|
86
86
|
});
|
|
87
87
|
}
|
|
@@ -1172,8 +1172,8 @@ export class ImprovedNeuralAPI {
|
|
|
1172
1172
|
// Utility methods for internal operations
|
|
1173
1173
|
_isId(value) {
|
|
1174
1174
|
return typeof value === 'string' &&
|
|
1175
|
-
(value.length === 36 && value.includes('-')) || // UUID-like
|
|
1176
|
-
|
|
1175
|
+
((value.length === 36 && value.includes('-')) || // UUID-like
|
|
1176
|
+
(value.length > 10 && !value.includes(' '))); // ID-like string
|
|
1177
1177
|
}
|
|
1178
1178
|
_isVector(value) {
|
|
1179
1179
|
return Array.isArray(value) &&
|
|
@@ -1441,28 +1441,67 @@ export class ImprovedNeuralAPI {
|
|
|
1441
1441
|
}
|
|
1442
1442
|
return groups;
|
|
1443
1443
|
}
|
|
1444
|
-
//
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
1444
|
+
// Iterator-based implementations for scalability
|
|
1445
|
+
/**
|
|
1446
|
+
* Iterate through all items without loading them all at once
|
|
1447
|
+
* This scales to millions of items without memory issues
|
|
1448
|
+
*/
|
|
1449
|
+
async *_iterateAllItems(options) {
|
|
1450
|
+
const batchSize = options?.batchSize || 1000;
|
|
1451
|
+
let cursor;
|
|
1452
|
+
let hasMore = true;
|
|
1453
|
+
while (hasMore) {
|
|
1454
|
+
const result = await this.brain.find({
|
|
1455
|
+
query: '',
|
|
1456
|
+
limit: batchSize,
|
|
1457
|
+
cursor
|
|
1458
|
+
});
|
|
1459
|
+
for (const item of result.items || result) {
|
|
1460
|
+
yield item;
|
|
1461
|
+
}
|
|
1462
|
+
hasMore = result.hasMore || false;
|
|
1463
|
+
cursor = result.nextCursor;
|
|
1464
|
+
// Safety check to prevent infinite loops
|
|
1465
|
+
if (!result.items || result.items.length === 0) {
|
|
1466
|
+
break;
|
|
1467
|
+
}
|
|
1452
1468
|
}
|
|
1453
|
-
|
|
1454
|
-
|
|
1469
|
+
}
|
|
1470
|
+
/**
|
|
1471
|
+
* Get a sample of item IDs for operations that don't need all items
|
|
1472
|
+
* This is O(1) for small samples
|
|
1473
|
+
*/
|
|
1474
|
+
async _getSampleItemIds(sampleSize = 1000) {
|
|
1455
1475
|
const result = await this.brain.find({
|
|
1456
1476
|
query: '',
|
|
1457
|
-
limit
|
|
1477
|
+
limit: Math.min(sampleSize, 10000) // Cap at 10k for safety
|
|
1458
1478
|
});
|
|
1459
|
-
|
|
1479
|
+
const items = result.items || result;
|
|
1480
|
+
return items.map((item) => item.entity?.id || item.id).filter((id) => id);
|
|
1460
1481
|
}
|
|
1482
|
+
/**
|
|
1483
|
+
* Get total count using the brain's O(1) counting API
|
|
1484
|
+
*/
|
|
1461
1485
|
async _getTotalItemCount() {
|
|
1462
|
-
//
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1486
|
+
// Use the brain's O(1) counting API if available
|
|
1487
|
+
if (this.brain.counts && typeof this.brain.counts.entities === 'function') {
|
|
1488
|
+
return await this.brain.counts.entities();
|
|
1489
|
+
}
|
|
1490
|
+
// Fallback: Get from storage statistics
|
|
1491
|
+
const storage = this.brain.storage;
|
|
1492
|
+
if (storage && typeof storage.getStatistics === 'function') {
|
|
1493
|
+
const stats = await storage.getStatistics();
|
|
1494
|
+
return stats?.totalNodes || 0;
|
|
1495
|
+
}
|
|
1496
|
+
// Last resort: Sample and estimate
|
|
1497
|
+
const sample = await this.brain.find({ query: '', limit: 1 });
|
|
1498
|
+
return sample.totalCount || 0;
|
|
1499
|
+
}
|
|
1500
|
+
// Deprecated: Remove methods that load everything
|
|
1501
|
+
// These are kept for backward compatibility but should not be used
|
|
1502
|
+
async _getAllItemIds() {
|
|
1503
|
+
console.warn('⚠️ _getAllItemIds() is deprecated and will fail with large datasets. Use _iterateAllItems() or _getSampleItemIds() instead.');
|
|
1504
|
+
return this._getSampleItemIds(10000); // Return sample only
|
|
1466
1505
|
}
|
|
1467
1506
|
// ===== GRAPH ALGORITHM SUPPORTING METHODS =====
|
|
1468
1507
|
_calculateTotalWeight(edges) {
|
|
@@ -47,6 +47,7 @@ export declare class NaturalLanguageProcessor {
|
|
|
47
47
|
private buildFieldConstraints;
|
|
48
48
|
/**
|
|
49
49
|
* Find similar queries from history (without using Brainy)
|
|
50
|
+
* NOTE: Currently unused - reserved for future query caching optimization
|
|
50
51
|
*/
|
|
51
52
|
private findSimilarQueries;
|
|
52
53
|
/**
|
|
@@ -119,10 +119,11 @@ export class NaturalLanguageProcessor {
|
|
|
119
119
|
}
|
|
120
120
|
/**
|
|
121
121
|
* Find similar queries from history (without using Brainy)
|
|
122
|
+
* NOTE: Currently unused - reserved for future query caching optimization
|
|
122
123
|
*/
|
|
123
124
|
findSimilarQueries(embedding) {
|
|
124
|
-
//
|
|
125
|
-
//
|
|
125
|
+
// Not implemented - not required for core functionality
|
|
126
|
+
// Would implement cosine similarity against queryHistory if needed
|
|
126
127
|
return [];
|
|
127
128
|
}
|
|
128
129
|
/**
|
|
@@ -253,4 +253,63 @@ export declare abstract class BaseStorageAdapter implements StorageAdapter {
|
|
|
253
253
|
* Include throttling metrics in statistics
|
|
254
254
|
*/
|
|
255
255
|
getStatisticsWithThrottling(): Promise<StatisticsData | null>;
|
|
256
|
+
protected totalNounCount: number;
|
|
257
|
+
protected totalVerbCount: number;
|
|
258
|
+
protected entityCounts: Map<string, number>;
|
|
259
|
+
protected verbCounts: Map<string, number>;
|
|
260
|
+
protected countCache: Map<string, {
|
|
261
|
+
count: number;
|
|
262
|
+
timestamp: number;
|
|
263
|
+
}>;
|
|
264
|
+
protected readonly COUNT_CACHE_TTL = 60000;
|
|
265
|
+
/**
|
|
266
|
+
* Get total noun count - O(1) operation
|
|
267
|
+
* @returns Promise that resolves to the total number of nouns
|
|
268
|
+
*/
|
|
269
|
+
getNounCount(): Promise<number>;
|
|
270
|
+
/**
|
|
271
|
+
* Get total verb count - O(1) operation
|
|
272
|
+
* @returns Promise that resolves to the total number of verbs
|
|
273
|
+
*/
|
|
274
|
+
getVerbCount(): Promise<number>;
|
|
275
|
+
/**
|
|
276
|
+
* Increment count for entity type - O(1) operation
|
|
277
|
+
* Protected by storage-specific mechanisms (mutex, distributed consensus, etc.)
|
|
278
|
+
* @param type The entity type
|
|
279
|
+
*/
|
|
280
|
+
protected incrementEntityCount(type: string): void;
|
|
281
|
+
/**
|
|
282
|
+
* Thread-safe increment for concurrent scenarios
|
|
283
|
+
* Uses mutex for single-node, distributed consensus for multi-node
|
|
284
|
+
*/
|
|
285
|
+
protected incrementEntityCountSafe(type: string): Promise<void>;
|
|
286
|
+
/**
|
|
287
|
+
* Decrement count for entity type - O(1) operation
|
|
288
|
+
* @param type The entity type
|
|
289
|
+
*/
|
|
290
|
+
protected decrementEntityCount(type: string): void;
|
|
291
|
+
/**
|
|
292
|
+
* Thread-safe decrement for concurrent scenarios
|
|
293
|
+
*/
|
|
294
|
+
protected decrementEntityCountSafe(type: string): Promise<void>;
|
|
295
|
+
/**
|
|
296
|
+
* Increment verb count - O(1) operation with mutex protection
|
|
297
|
+
* @param type The verb type
|
|
298
|
+
*/
|
|
299
|
+
protected incrementVerbCount(type: string): Promise<void>;
|
|
300
|
+
/**
|
|
301
|
+
* Decrement verb count - O(1) operation with mutex protection
|
|
302
|
+
* @param type The verb type
|
|
303
|
+
*/
|
|
304
|
+
protected decrementVerbCount(type: string): Promise<void>;
|
|
305
|
+
/**
|
|
306
|
+
* Initialize counts from storage - must be implemented by each adapter
|
|
307
|
+
* @protected
|
|
308
|
+
*/
|
|
309
|
+
protected abstract initializeCounts(): Promise<void>;
|
|
310
|
+
/**
|
|
311
|
+
* Persist counts to storage - must be implemented by each adapter
|
|
312
|
+
* @protected
|
|
313
|
+
*/
|
|
314
|
+
protected abstract persistCounts(): Promise<void>;
|
|
256
315
|
}
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
* Provides common functionality for all storage adapters, including statistics tracking
|
|
4
4
|
*/
|
|
5
5
|
import { extractFieldNamesFromJson, mapToStandardField } from '../../utils/fieldNameTracking.js';
|
|
6
|
+
import { getGlobalMutex } from '../../utils/mutex.js';
|
|
6
7
|
/**
|
|
7
8
|
* Base class for storage adapters that implements statistics tracking
|
|
8
9
|
*/
|
|
@@ -37,6 +38,16 @@ export class BaseStorageAdapter {
|
|
|
37
38
|
this.totalDelayMs = 0;
|
|
38
39
|
// Service-level throttling
|
|
39
40
|
this.serviceThrottling = new Map();
|
|
41
|
+
// =============================================
|
|
42
|
+
// Universal O(1) Count Management
|
|
43
|
+
// =============================================
|
|
44
|
+
// Universal count tracking - O(1) operations
|
|
45
|
+
this.totalNounCount = 0;
|
|
46
|
+
this.totalVerbCount = 0;
|
|
47
|
+
this.entityCounts = new Map(); // type -> count
|
|
48
|
+
this.verbCounts = new Map(); // verb type -> count
|
|
49
|
+
this.countCache = new Map();
|
|
50
|
+
this.COUNT_CACHE_TTL = 60000; // 1 minute cache TTL
|
|
40
51
|
}
|
|
41
52
|
/**
|
|
42
53
|
* Save statistics data
|
|
@@ -609,5 +620,131 @@ export class BaseStorageAdapter {
|
|
|
609
620
|
}
|
|
610
621
|
return stats;
|
|
611
622
|
}
|
|
623
|
+
/**
|
|
624
|
+
* Get total noun count - O(1) operation
|
|
625
|
+
* @returns Promise that resolves to the total number of nouns
|
|
626
|
+
*/
|
|
627
|
+
async getNounCount() {
|
|
628
|
+
return this.totalNounCount;
|
|
629
|
+
}
|
|
630
|
+
/**
|
|
631
|
+
* Get total verb count - O(1) operation
|
|
632
|
+
* @returns Promise that resolves to the total number of verbs
|
|
633
|
+
*/
|
|
634
|
+
async getVerbCount() {
|
|
635
|
+
return this.totalVerbCount;
|
|
636
|
+
}
|
|
637
|
+
/**
|
|
638
|
+
* Increment count for entity type - O(1) operation
|
|
639
|
+
* Protected by storage-specific mechanisms (mutex, distributed consensus, etc.)
|
|
640
|
+
* @param type The entity type
|
|
641
|
+
*/
|
|
642
|
+
incrementEntityCount(type) {
|
|
643
|
+
// For distributed scenarios, this is aggregated across shards
|
|
644
|
+
// For single-node, this is protected by storage-specific locking
|
|
645
|
+
this.entityCounts.set(type, (this.entityCounts.get(type) || 0) + 1);
|
|
646
|
+
this.totalNounCount++;
|
|
647
|
+
// Update cache
|
|
648
|
+
this.countCache.set('nouns_count', {
|
|
649
|
+
count: this.totalNounCount,
|
|
650
|
+
timestamp: Date.now()
|
|
651
|
+
});
|
|
652
|
+
}
|
|
653
|
+
/**
|
|
654
|
+
* Thread-safe increment for concurrent scenarios
|
|
655
|
+
* Uses mutex for single-node, distributed consensus for multi-node
|
|
656
|
+
*/
|
|
657
|
+
async incrementEntityCountSafe(type) {
|
|
658
|
+
// Single-node mutex protection (distributed mode handled by coordinator)
|
|
659
|
+
const mutex = getGlobalMutex();
|
|
660
|
+
await mutex.runExclusive(`count-entity-${type}`, async () => {
|
|
661
|
+
this.incrementEntityCount(type);
|
|
662
|
+
// Persist counts periodically
|
|
663
|
+
if (this.totalNounCount % 10 === 0) {
|
|
664
|
+
await this.persistCounts();
|
|
665
|
+
}
|
|
666
|
+
});
|
|
667
|
+
}
|
|
668
|
+
/**
|
|
669
|
+
* Decrement count for entity type - O(1) operation
|
|
670
|
+
* @param type The entity type
|
|
671
|
+
*/
|
|
672
|
+
decrementEntityCount(type) {
|
|
673
|
+
const current = this.entityCounts.get(type) || 0;
|
|
674
|
+
if (current > 1) {
|
|
675
|
+
this.entityCounts.set(type, current - 1);
|
|
676
|
+
}
|
|
677
|
+
else {
|
|
678
|
+
this.entityCounts.delete(type);
|
|
679
|
+
}
|
|
680
|
+
if (this.totalNounCount > 0) {
|
|
681
|
+
this.totalNounCount--;
|
|
682
|
+
}
|
|
683
|
+
// Update cache
|
|
684
|
+
this.countCache.set('nouns_count', {
|
|
685
|
+
count: this.totalNounCount,
|
|
686
|
+
timestamp: Date.now()
|
|
687
|
+
});
|
|
688
|
+
}
|
|
689
|
+
/**
|
|
690
|
+
* Thread-safe decrement for concurrent scenarios
|
|
691
|
+
*/
|
|
692
|
+
async decrementEntityCountSafe(type) {
|
|
693
|
+
const mutex = getGlobalMutex();
|
|
694
|
+
await mutex.runExclusive(`count-entity-${type}`, async () => {
|
|
695
|
+
this.decrementEntityCount(type);
|
|
696
|
+
if (this.totalNounCount % 10 === 0) {
|
|
697
|
+
await this.persistCounts();
|
|
698
|
+
}
|
|
699
|
+
});
|
|
700
|
+
}
|
|
701
|
+
/**
|
|
702
|
+
* Increment verb count - O(1) operation with mutex protection
|
|
703
|
+
* @param type The verb type
|
|
704
|
+
*/
|
|
705
|
+
async incrementVerbCount(type) {
|
|
706
|
+
const mutex = getGlobalMutex();
|
|
707
|
+
await mutex.runExclusive(`count-verb-${type}`, async () => {
|
|
708
|
+
this.verbCounts.set(type, (this.verbCounts.get(type) || 0) + 1);
|
|
709
|
+
this.totalVerbCount++;
|
|
710
|
+
// Update cache
|
|
711
|
+
this.countCache.set('verbs_count', {
|
|
712
|
+
count: this.totalVerbCount,
|
|
713
|
+
timestamp: Date.now()
|
|
714
|
+
});
|
|
715
|
+
// Persist counts immediately for consistency
|
|
716
|
+
if (this.totalVerbCount % 10 === 0) {
|
|
717
|
+
await this.persistCounts();
|
|
718
|
+
}
|
|
719
|
+
});
|
|
720
|
+
}
|
|
721
|
+
/**
|
|
722
|
+
* Decrement verb count - O(1) operation with mutex protection
|
|
723
|
+
* @param type The verb type
|
|
724
|
+
*/
|
|
725
|
+
async decrementVerbCount(type) {
|
|
726
|
+
const mutex = getGlobalMutex();
|
|
727
|
+
await mutex.runExclusive(`count-verb-${type}`, async () => {
|
|
728
|
+
const current = this.verbCounts.get(type) || 0;
|
|
729
|
+
if (current > 1) {
|
|
730
|
+
this.verbCounts.set(type, current - 1);
|
|
731
|
+
}
|
|
732
|
+
else {
|
|
733
|
+
this.verbCounts.delete(type);
|
|
734
|
+
}
|
|
735
|
+
if (this.totalVerbCount > 0) {
|
|
736
|
+
this.totalVerbCount--;
|
|
737
|
+
}
|
|
738
|
+
// Update cache
|
|
739
|
+
this.countCache.set('verbs_count', {
|
|
740
|
+
count: this.totalVerbCount,
|
|
741
|
+
timestamp: Date.now()
|
|
742
|
+
});
|
|
743
|
+
// Persist counts immediately for consistency
|
|
744
|
+
if (this.totalVerbCount % 10 === 0) {
|
|
745
|
+
await this.persistCounts();
|
|
746
|
+
}
|
|
747
|
+
});
|
|
748
|
+
}
|
|
612
749
|
}
|
|
613
750
|
//# sourceMappingURL=baseStorageAdapter.js.map
|
|
@@ -11,6 +11,10 @@ type Edge = HNSWVerb;
|
|
|
11
11
|
* Uses the file system to store data in the specified directory structure
|
|
12
12
|
*/
|
|
13
13
|
export declare class FileSystemStorage extends BaseStorage {
|
|
14
|
+
private countsFilePath?;
|
|
15
|
+
private readonly shardingDepth;
|
|
16
|
+
private readonly SHARDING_THRESHOLD;
|
|
17
|
+
private cachedShardingDepth?;
|
|
14
18
|
private rootDir;
|
|
15
19
|
private nounsDir;
|
|
16
20
|
private verbsDir;
|
|
@@ -22,6 +26,8 @@ export declare class FileSystemStorage extends BaseStorage {
|
|
|
22
26
|
private lockDir;
|
|
23
27
|
private useDualWrite;
|
|
24
28
|
private activeLocks;
|
|
29
|
+
private lockTimers;
|
|
30
|
+
private allTimers;
|
|
25
31
|
/**
|
|
26
32
|
* Initialize the storage adapter
|
|
27
33
|
* @param rootDirectory The root directory for storage
|
|
@@ -251,5 +257,40 @@ export declare class FileSystemStorage extends BaseStorage {
|
|
|
251
257
|
* Merge statistics from multiple sources
|
|
252
258
|
*/
|
|
253
259
|
private mergeStatistics;
|
|
260
|
+
/**
|
|
261
|
+
* Initialize counts from filesystem storage
|
|
262
|
+
*/
|
|
263
|
+
protected initializeCounts(): Promise<void>;
|
|
264
|
+
/**
|
|
265
|
+
* Initialize counts by scanning disk (only done once)
|
|
266
|
+
*/
|
|
267
|
+
private initializeCountsFromDisk;
|
|
268
|
+
/**
|
|
269
|
+
* Persist counts to filesystem storage
|
|
270
|
+
*/
|
|
271
|
+
protected persistCounts(): Promise<void>;
|
|
272
|
+
/**
|
|
273
|
+
* Determine optimal sharding depth based on dataset size
|
|
274
|
+
* This is called once during initialization for consistent behavior
|
|
275
|
+
*/
|
|
276
|
+
private getOptimalShardingDepth;
|
|
277
|
+
/**
|
|
278
|
+
* Get the path for a node with consistent sharding strategy
|
|
279
|
+
* Clean, predictable path generation
|
|
280
|
+
*/
|
|
281
|
+
private getNodePath;
|
|
282
|
+
/**
|
|
283
|
+
* Get the path for a verb with consistent sharding strategy
|
|
284
|
+
*/
|
|
285
|
+
private getVerbPath;
|
|
286
|
+
/**
|
|
287
|
+
* Universal sharded path generator
|
|
288
|
+
* Consistent across all entity types
|
|
289
|
+
*/
|
|
290
|
+
private getShardedPath;
|
|
291
|
+
/**
|
|
292
|
+
* Check if a file exists (handles both sharded and non-sharded)
|
|
293
|
+
*/
|
|
294
|
+
private fileExists;
|
|
254
295
|
}
|
|
255
296
|
export {};
|