@soulcraft/brainy 3.8.3 → 3.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/brainy.d.ts +27 -0
- package/dist/brainy.js +231 -10
- package/dist/coreTypes.d.ts +10 -0
- package/dist/hnsw/hnswIndex.d.ts +2 -0
- package/dist/hnsw/hnswIndex.js +10 -0
- package/dist/neural/improvedNeuralAPI.d.ts +14 -1
- package/dist/neural/improvedNeuralAPI.js +59 -20
- package/dist/neural/naturalLanguageProcessorStatic.d.ts +1 -0
- package/dist/neural/naturalLanguageProcessorStatic.js +3 -2
- package/dist/storage/adapters/baseStorageAdapter.d.ts +59 -0
- package/dist/storage/adapters/baseStorageAdapter.js +137 -0
- package/dist/storage/adapters/fileSystemStorage.d.ts +41 -0
- package/dist/storage/adapters/fileSystemStorage.js +227 -19
- package/dist/storage/adapters/memoryStorage.d.ts +8 -0
- package/dist/storage/adapters/memoryStorage.js +48 -1
- package/dist/storage/adapters/opfsStorage.d.ts +12 -0
- package/dist/storage/adapters/opfsStorage.js +68 -0
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +34 -0
- package/dist/storage/adapters/s3CompatibleStorage.js +129 -3
- package/dist/storage/baseStorage.js +4 -3
- package/dist/storage/readOnlyOptimizations.d.ts +0 -9
- package/dist/storage/readOnlyOptimizations.js +6 -28
- package/dist/types/brainy.types.d.ts +15 -0
- package/dist/utils/metadataIndex.d.ts +5 -0
- package/dist/utils/metadataIndex.js +24 -0
- package/dist/utils/mutex.d.ts +53 -0
- package/dist/utils/mutex.js +221 -0
- package/dist/utils/paramValidation.js +20 -4
- package/package.json +1 -1
package/dist/brainy.d.ts
CHANGED
|
@@ -26,6 +26,10 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
|
|
|
26
26
|
private distance;
|
|
27
27
|
private augmentationRegistry;
|
|
28
28
|
private config;
|
|
29
|
+
private coordinator?;
|
|
30
|
+
private shardManager?;
|
|
31
|
+
private cacheSync?;
|
|
32
|
+
private readWriteSeparation?;
|
|
29
33
|
private originalConsole?;
|
|
30
34
|
private _neural?;
|
|
31
35
|
private _nlp?;
|
|
@@ -111,6 +115,16 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
|
|
|
111
115
|
* Clear all data from the database
|
|
112
116
|
*/
|
|
113
117
|
clear(): Promise<void>;
|
|
118
|
+
/**
|
|
119
|
+
* Get total count of nouns - O(1) operation
|
|
120
|
+
* @returns Promise that resolves to the total number of nouns
|
|
121
|
+
*/
|
|
122
|
+
getNounCount(): Promise<number>;
|
|
123
|
+
/**
|
|
124
|
+
* Get total count of verbs - O(1) operation
|
|
125
|
+
* @returns Promise that resolves to the total number of verbs
|
|
126
|
+
*/
|
|
127
|
+
getVerbCount(): Promise<number>;
|
|
114
128
|
/**
|
|
115
129
|
* Neural API - Advanced AI operations
|
|
116
130
|
*/
|
|
@@ -353,6 +367,19 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
|
|
|
353
367
|
* Close and cleanup
|
|
354
368
|
*/
|
|
355
369
|
close(): Promise<void>;
|
|
370
|
+
/**
|
|
371
|
+
* Intelligently auto-detect distributed configuration
|
|
372
|
+
* Zero-config: Automatically determines best distributed settings
|
|
373
|
+
*/
|
|
374
|
+
private autoDetectDistributed;
|
|
375
|
+
/**
|
|
376
|
+
* Setup distributed components with zero-config intelligence
|
|
377
|
+
*/
|
|
378
|
+
private setupDistributedComponents;
|
|
379
|
+
/**
|
|
380
|
+
* Pass distributed components to storage adapter
|
|
381
|
+
*/
|
|
382
|
+
private connectDistributedStorage;
|
|
356
383
|
}
|
|
357
384
|
export * from './types/brainy.types.js';
|
|
358
385
|
export { NounType, VerbType } from './types/graphTypes.js';
|
package/dist/brainy.js
CHANGED
|
@@ -18,6 +18,7 @@ import { MetadataIndexManager } from './utils/metadataIndex.js';
|
|
|
18
18
|
import { GraphAdjacencyIndex } from './graph/graphAdjacencyIndex.js';
|
|
19
19
|
import { createPipeline } from './streaming/pipeline.js';
|
|
20
20
|
import { configureLogger, LogLevel } from './utils/logger.js';
|
|
21
|
+
import { DistributedCoordinator, ShardManager, CacheSync, ReadWriteSeparation } from './distributed/index.js';
|
|
21
22
|
import { NounType } from './types/graphTypes.js';
|
|
22
23
|
/**
|
|
23
24
|
* The main Brainy class - Clean, Beautiful, Powerful
|
|
@@ -35,6 +36,10 @@ export class Brainy {
|
|
|
35
36
|
this.distance = cosineDistance;
|
|
36
37
|
this.embedder = this.setupEmbedder();
|
|
37
38
|
this.augmentationRegistry = this.setupAugmentations();
|
|
39
|
+
// Setup distributed components if enabled
|
|
40
|
+
if (this.config.distributed?.enabled) {
|
|
41
|
+
this.setupDistributedComponents();
|
|
42
|
+
}
|
|
38
43
|
// Index and storage are initialized in init() because they may need each other
|
|
39
44
|
}
|
|
40
45
|
/**
|
|
@@ -113,6 +118,8 @@ export class Brainy {
|
|
|
113
118
|
}
|
|
114
119
|
}
|
|
115
120
|
});
|
|
121
|
+
// Connect distributed components to storage
|
|
122
|
+
await this.connectDistributedStorage();
|
|
116
123
|
// Warm up if configured
|
|
117
124
|
if (this.config.warmup) {
|
|
118
125
|
await this.warmup();
|
|
@@ -269,6 +276,10 @@ export class Brainy {
|
|
|
269
276
|
* Delete an entity
|
|
270
277
|
*/
|
|
271
278
|
async delete(id) {
|
|
279
|
+
// Handle invalid IDs gracefully
|
|
280
|
+
if (!id || typeof id !== 'string') {
|
|
281
|
+
return; // Silently return for invalid IDs
|
|
282
|
+
}
|
|
272
283
|
await this.ensureInitialized();
|
|
273
284
|
return this.augmentationRegistry.execute('delete', { id }, async () => {
|
|
274
285
|
// Remove from vector index
|
|
@@ -289,6 +300,9 @@ export class Brainy {
|
|
|
289
300
|
const targetVerbs = await this.storage.getVerbsByTarget(id);
|
|
290
301
|
const allVerbs = [...verbs, ...targetVerbs];
|
|
291
302
|
for (const verb of allVerbs) {
|
|
303
|
+
// Remove from graph index first
|
|
304
|
+
await this.graphIndex.removeVerb(verb.id);
|
|
305
|
+
// Then delete from storage
|
|
292
306
|
await this.storage.deleteVerb(verb.id);
|
|
293
307
|
}
|
|
294
308
|
});
|
|
@@ -407,10 +421,53 @@ export class Brainy {
|
|
|
407
421
|
const startTime = Date.now();
|
|
408
422
|
const result = await this.augmentationRegistry.execute('find', params, async () => {
|
|
409
423
|
let results = [];
|
|
410
|
-
//
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
424
|
+
// Distinguish between search criteria (need vector search) and filter criteria (metadata only)
|
|
425
|
+
// Treat empty string query as no query
|
|
426
|
+
const hasVectorSearchCriteria = (params.query && params.query.trim() !== '') || params.vector || params.near;
|
|
427
|
+
const hasFilterCriteria = params.where || params.type || params.service;
|
|
428
|
+
const hasGraphCriteria = params.connected;
|
|
429
|
+
// Handle metadata-only queries (no vector search needed)
|
|
430
|
+
if (!hasVectorSearchCriteria && !hasGraphCriteria && hasFilterCriteria) {
|
|
431
|
+
// Build filter for metadata index
|
|
432
|
+
let filter = {};
|
|
433
|
+
if (params.where)
|
|
434
|
+
Object.assign(filter, params.where);
|
|
435
|
+
if (params.service)
|
|
436
|
+
filter.service = params.service;
|
|
437
|
+
if (params.type) {
|
|
438
|
+
const types = Array.isArray(params.type) ? params.type : [params.type];
|
|
439
|
+
if (types.length === 1) {
|
|
440
|
+
filter.noun = types[0];
|
|
441
|
+
}
|
|
442
|
+
else {
|
|
443
|
+
filter = {
|
|
444
|
+
anyOf: types.map(type => ({
|
|
445
|
+
noun: type,
|
|
446
|
+
...filter
|
|
447
|
+
}))
|
|
448
|
+
};
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
// Get filtered IDs and paginate BEFORE loading entities
|
|
452
|
+
const filteredIds = await this.metadataIndex.getIdsForFilter(filter);
|
|
453
|
+
const limit = params.limit || 10;
|
|
454
|
+
const offset = params.offset || 0;
|
|
455
|
+
const pageIds = filteredIds.slice(offset, offset + limit);
|
|
456
|
+
// Load entities for the paginated results
|
|
457
|
+
for (const id of pageIds) {
|
|
458
|
+
const entity = await this.get(id);
|
|
459
|
+
if (entity) {
|
|
460
|
+
results.push({
|
|
461
|
+
id,
|
|
462
|
+
score: 1.0, // All metadata-filtered results equally relevant
|
|
463
|
+
entity
|
|
464
|
+
});
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
return results;
|
|
468
|
+
}
|
|
469
|
+
// Handle completely empty query - return all results paginated
|
|
470
|
+
if (!hasVectorSearchCriteria && !hasFilterCriteria && !hasGraphCriteria) {
|
|
414
471
|
const limit = params.limit || 20;
|
|
415
472
|
const offset = params.offset || 0;
|
|
416
473
|
const storageResults = await this.storage.getNouns({
|
|
@@ -803,6 +860,22 @@ export class Brainy {
|
|
|
803
860
|
this._tripleIntelligence = undefined;
|
|
804
861
|
});
|
|
805
862
|
}
|
|
863
|
+
/**
|
|
864
|
+
* Get total count of nouns - O(1) operation
|
|
865
|
+
* @returns Promise that resolves to the total number of nouns
|
|
866
|
+
*/
|
|
867
|
+
async getNounCount() {
|
|
868
|
+
await this.ensureInitialized();
|
|
869
|
+
return this.storage.getNounCount();
|
|
870
|
+
}
|
|
871
|
+
/**
|
|
872
|
+
* Get total count of verbs - O(1) operation
|
|
873
|
+
* @returns Promise that resolves to the total number of verbs
|
|
874
|
+
*/
|
|
875
|
+
async getVerbCount() {
|
|
876
|
+
await this.ensureInitialized();
|
|
877
|
+
return this.storage.getVerbCount();
|
|
878
|
+
}
|
|
806
879
|
// ============= SUB-APIS =============
|
|
807
880
|
/**
|
|
808
881
|
* Neural API - Advanced AI operations
|
|
@@ -1462,18 +1535,27 @@ export class Brainy {
|
|
|
1462
1535
|
if (config?.index?.efSearch && (config.index.efSearch < 1 || config.index.efSearch > 1000)) {
|
|
1463
1536
|
throw new Error(`Invalid index efSearch: ${config.index.efSearch}. Must be between 1 and 1000`);
|
|
1464
1537
|
}
|
|
1538
|
+
// Auto-detect distributed mode based on environment and configuration
|
|
1539
|
+
const distributedConfig = this.autoDetectDistributed(config?.distributed);
|
|
1465
1540
|
return {
|
|
1466
1541
|
storage: config?.storage || { type: 'auto' },
|
|
1467
1542
|
model: config?.model || { type: 'fast' },
|
|
1468
1543
|
index: config?.index || {},
|
|
1469
1544
|
cache: config?.cache ?? true,
|
|
1470
1545
|
augmentations: config?.augmentations || {},
|
|
1546
|
+
distributed: distributedConfig, // Type will be fixed when used
|
|
1471
1547
|
warmup: config?.warmup ?? false,
|
|
1472
1548
|
realtime: config?.realtime ?? false,
|
|
1473
1549
|
multiTenancy: config?.multiTenancy ?? false,
|
|
1474
1550
|
telemetry: config?.telemetry ?? false,
|
|
1475
1551
|
verbose: config?.verbose ?? false,
|
|
1476
|
-
silent: config?.silent ?? false
|
|
1552
|
+
silent: config?.silent ?? false,
|
|
1553
|
+
// New performance options with smart defaults
|
|
1554
|
+
disableAutoRebuild: config?.disableAutoRebuild ?? false, // false = auto-decide based on size
|
|
1555
|
+
disableMetrics: config?.disableMetrics ?? false,
|
|
1556
|
+
disableAutoOptimize: config?.disableAutoOptimize ?? false,
|
|
1557
|
+
batchWrites: config?.batchWrites ?? true,
|
|
1558
|
+
maxConcurrentOperations: config?.maxConcurrentOperations ?? 10
|
|
1477
1559
|
};
|
|
1478
1560
|
}
|
|
1479
1561
|
/**
|
|
@@ -1483,17 +1565,49 @@ export class Brainy {
|
|
|
1483
1565
|
try {
|
|
1484
1566
|
// Check if storage has data
|
|
1485
1567
|
const entities = await this.storage.getNouns({ pagination: { limit: 1 } });
|
|
1486
|
-
|
|
1568
|
+
const totalCount = entities.totalCount || 0;
|
|
1569
|
+
if (totalCount === 0) {
|
|
1487
1570
|
// No data in storage, no rebuild needed
|
|
1488
1571
|
return;
|
|
1489
1572
|
}
|
|
1573
|
+
// Intelligent decision: Auto-rebuild only for small datasets
|
|
1574
|
+
// For large datasets, use lazy loading for optimal performance
|
|
1575
|
+
const AUTO_REBUILD_THRESHOLD = 1000; // Only auto-rebuild if < 1000 items
|
|
1490
1576
|
// Check if metadata index is empty
|
|
1491
1577
|
const metadataStats = await this.metadataIndex.getStats();
|
|
1492
|
-
if (metadataStats.totalEntries === 0) {
|
|
1493
|
-
|
|
1578
|
+
if (metadataStats.totalEntries === 0 && totalCount > 0) {
|
|
1579
|
+
if (totalCount < AUTO_REBUILD_THRESHOLD) {
|
|
1580
|
+
// Small dataset - rebuild for convenience
|
|
1581
|
+
if (!this.config.silent) {
|
|
1582
|
+
console.log(`🔄 Small dataset (${totalCount} items) - rebuilding index for optimal performance...`);
|
|
1583
|
+
}
|
|
1584
|
+
await this.metadataIndex.rebuild();
|
|
1585
|
+
const newStats = await this.metadataIndex.getStats();
|
|
1586
|
+
if (!this.config.silent) {
|
|
1587
|
+
console.log(`✅ Index rebuilt: ${newStats.totalEntries} entries`);
|
|
1588
|
+
}
|
|
1589
|
+
}
|
|
1590
|
+
else {
|
|
1591
|
+
// Large dataset - use lazy loading
|
|
1592
|
+
if (!this.config.silent) {
|
|
1593
|
+
console.log(`⚡ Large dataset (${totalCount} items) - using lazy loading for optimal startup performance`);
|
|
1594
|
+
console.log('💡 Tip: Indexes will build automatically as you use the system');
|
|
1595
|
+
}
|
|
1596
|
+
}
|
|
1597
|
+
}
|
|
1598
|
+
// Override with explicit config if provided
|
|
1599
|
+
if (this.config.disableAutoRebuild === true) {
|
|
1600
|
+
if (!this.config.silent) {
|
|
1601
|
+
console.log('⚡ Auto-rebuild explicitly disabled via config');
|
|
1602
|
+
}
|
|
1603
|
+
return;
|
|
1604
|
+
}
|
|
1605
|
+
else if (this.config.disableAutoRebuild === false && metadataStats.totalEntries === 0) {
|
|
1606
|
+
// Explicitly enabled - rebuild regardless of size
|
|
1607
|
+
if (!this.config.silent) {
|
|
1608
|
+
console.log('🔄 Auto-rebuild explicitly enabled - rebuilding index...');
|
|
1609
|
+
}
|
|
1494
1610
|
await this.metadataIndex.rebuild();
|
|
1495
|
-
const newStats = await this.metadataIndex.getStats();
|
|
1496
|
-
console.log(`✅ Metadata index rebuilt: ${newStats.totalEntries} entries`);
|
|
1497
1611
|
}
|
|
1498
1612
|
// Note: GraphAdjacencyIndex will rebuild itself as relationships are added
|
|
1499
1613
|
// Vector index should already be populated if storage has data
|
|
@@ -1525,6 +1639,113 @@ export class Brainy {
|
|
|
1525
1639
|
// We'll just mark as not initialized
|
|
1526
1640
|
this.initialized = false;
|
|
1527
1641
|
}
|
|
1642
|
+
/**
|
|
1643
|
+
* Intelligently auto-detect distributed configuration
|
|
1644
|
+
* Zero-config: Automatically determines best distributed settings
|
|
1645
|
+
*/
|
|
1646
|
+
autoDetectDistributed(config) {
|
|
1647
|
+
// If explicitly disabled, respect that
|
|
1648
|
+
if (config?.enabled === false) {
|
|
1649
|
+
return config;
|
|
1650
|
+
}
|
|
1651
|
+
// Auto-detect based on environment variables (common in production)
|
|
1652
|
+
const envEnabled = process.env.BRAINY_DISTRIBUTED === 'true' ||
|
|
1653
|
+
process.env.NODE_ENV === 'production' ||
|
|
1654
|
+
process.env.CLUSTER_SIZE ||
|
|
1655
|
+
process.env.KUBERNETES_SERVICE_HOST; // Running in K8s
|
|
1656
|
+
// Auto-detect based on storage type (S3/R2/GCS implies distributed)
|
|
1657
|
+
const storageImpliesDistributed = this.config?.storage?.type === 's3' ||
|
|
1658
|
+
this.config?.storage?.type === 'r2' ||
|
|
1659
|
+
this.config?.storage?.type === 'gcs';
|
|
1660
|
+
// If not explicitly configured but environment suggests distributed
|
|
1661
|
+
if (!config && (envEnabled || storageImpliesDistributed)) {
|
|
1662
|
+
return {
|
|
1663
|
+
enabled: true,
|
|
1664
|
+
nodeId: process.env.HOSTNAME || process.env.NODE_ID || `node-${Date.now()}`,
|
|
1665
|
+
nodes: process.env.BRAINY_NODES?.split(',') || [],
|
|
1666
|
+
coordinatorUrl: process.env.BRAINY_COORDINATOR || undefined,
|
|
1667
|
+
shardCount: parseInt(process.env.BRAINY_SHARDS || '64'),
|
|
1668
|
+
replicationFactor: parseInt(process.env.BRAINY_REPLICAS || '3'),
|
|
1669
|
+
consensus: process.env.BRAINY_CONSENSUS || 'raft',
|
|
1670
|
+
transport: process.env.BRAINY_TRANSPORT || 'http'
|
|
1671
|
+
};
|
|
1672
|
+
}
|
|
1673
|
+
// Merge with provided config, applying intelligent defaults
|
|
1674
|
+
return config ? {
|
|
1675
|
+
...config,
|
|
1676
|
+
nodeId: config.nodeId || process.env.HOSTNAME || `node-${Date.now()}`,
|
|
1677
|
+
shardCount: config.shardCount || 64,
|
|
1678
|
+
replicationFactor: config.replicationFactor || 3,
|
|
1679
|
+
consensus: config.consensus || 'raft',
|
|
1680
|
+
transport: config.transport || 'http'
|
|
1681
|
+
} : undefined;
|
|
1682
|
+
}
|
|
1683
|
+
/**
|
|
1684
|
+
* Setup distributed components with zero-config intelligence
|
|
1685
|
+
*/
|
|
1686
|
+
setupDistributedComponents() {
|
|
1687
|
+
const distConfig = this.config.distributed;
|
|
1688
|
+
if (!distConfig?.enabled)
|
|
1689
|
+
return;
|
|
1690
|
+
console.log('🌍 Initializing distributed mode:', {
|
|
1691
|
+
nodeId: distConfig.nodeId,
|
|
1692
|
+
shards: distConfig.shardCount,
|
|
1693
|
+
replicas: distConfig.replicationFactor
|
|
1694
|
+
});
|
|
1695
|
+
// Initialize coordinator for consensus
|
|
1696
|
+
this.coordinator = new DistributedCoordinator({
|
|
1697
|
+
nodeId: distConfig.nodeId,
|
|
1698
|
+
address: distConfig.coordinatorUrl?.split(':')[0] || 'localhost',
|
|
1699
|
+
port: parseInt(distConfig.coordinatorUrl?.split(':')[1] || '8080'),
|
|
1700
|
+
nodes: distConfig.nodes
|
|
1701
|
+
});
|
|
1702
|
+
// Start the coordinator to establish leadership
|
|
1703
|
+
this.coordinator.start().catch(err => {
|
|
1704
|
+
console.warn('Coordinator start failed (will retry on init):', err.message);
|
|
1705
|
+
});
|
|
1706
|
+
// Initialize shard manager for data distribution
|
|
1707
|
+
this.shardManager = new ShardManager({
|
|
1708
|
+
shardCount: distConfig.shardCount,
|
|
1709
|
+
replicationFactor: distConfig.replicationFactor,
|
|
1710
|
+
virtualNodes: 150, // Optimal for consistent distribution
|
|
1711
|
+
autoRebalance: true
|
|
1712
|
+
});
|
|
1713
|
+
// Initialize cache synchronization
|
|
1714
|
+
this.cacheSync = new CacheSync({
|
|
1715
|
+
nodeId: distConfig.nodeId,
|
|
1716
|
+
syncInterval: 1000
|
|
1717
|
+
});
|
|
1718
|
+
// Initialize read/write separation if we have replicas
|
|
1719
|
+
// Note: Will be properly initialized after coordinator starts
|
|
1720
|
+
if (distConfig.replicationFactor && distConfig.replicationFactor > 1) {
|
|
1721
|
+
// Defer creation until coordinator is ready
|
|
1722
|
+
setTimeout(() => {
|
|
1723
|
+
this.readWriteSeparation = new ReadWriteSeparation({
|
|
1724
|
+
nodeId: distConfig.nodeId,
|
|
1725
|
+
consistencyLevel: 'eventual',
|
|
1726
|
+
role: 'replica', // Start as replica, will promote if leader
|
|
1727
|
+
syncInterval: 5000
|
|
1728
|
+
}, this.coordinator, this.shardManager, this.cacheSync);
|
|
1729
|
+
}, 100);
|
|
1730
|
+
}
|
|
1731
|
+
}
|
|
1732
|
+
/**
|
|
1733
|
+
* Pass distributed components to storage adapter
|
|
1734
|
+
*/
|
|
1735
|
+
async connectDistributedStorage() {
|
|
1736
|
+
if (!this.config.distributed?.enabled)
|
|
1737
|
+
return;
|
|
1738
|
+
// Check if storage supports distributed operations
|
|
1739
|
+
if ('setDistributedComponents' in this.storage) {
|
|
1740
|
+
this.storage.setDistributedComponents({
|
|
1741
|
+
coordinator: this.coordinator,
|
|
1742
|
+
shardManager: this.shardManager,
|
|
1743
|
+
cacheSync: this.cacheSync,
|
|
1744
|
+
readWriteSeparation: this.readWriteSeparation
|
|
1745
|
+
});
|
|
1746
|
+
console.log('✅ Distributed storage connected');
|
|
1747
|
+
}
|
|
1748
|
+
}
|
|
1528
1749
|
}
|
|
1529
1750
|
// Re-export types for convenience
|
|
1530
1751
|
export * from './types/brainy.types.js';
|
package/dist/coreTypes.d.ts
CHANGED
|
@@ -512,4 +512,14 @@ export interface StorageAdapter {
|
|
|
512
512
|
* @returns Promise that resolves to an array of changes
|
|
513
513
|
*/
|
|
514
514
|
getChangesSince?(timestamp: number, limit?: number): Promise<any[]>;
|
|
515
|
+
/**
|
|
516
|
+
* Get total count of nouns in storage - O(1) operation
|
|
517
|
+
* @returns Promise that resolves to the total number of nouns
|
|
518
|
+
*/
|
|
519
|
+
getNounCount(): Promise<number>;
|
|
520
|
+
/**
|
|
521
|
+
* Get total count of verbs in storage - O(1) operation
|
|
522
|
+
* @returns Promise that resolves to the total number of verbs
|
|
523
|
+
*/
|
|
524
|
+
getVerbCount(): Promise<number>;
|
|
515
525
|
}
|
package/dist/hnsw/hnswIndex.d.ts
CHANGED
package/dist/hnsw/hnswIndex.js
CHANGED
|
@@ -15,6 +15,9 @@ export class HNSWIndex {
|
|
|
15
15
|
this.nouns = new Map();
|
|
16
16
|
this.entryPointId = null;
|
|
17
17
|
this.maxLevel = 0;
|
|
18
|
+
// Track high-level nodes for O(1) entry point selection
|
|
19
|
+
this.highLevelNodes = new Map(); // level -> node IDs
|
|
20
|
+
this.MAX_TRACKED_LEVELS = 10; // Only track top levels for memory efficiency
|
|
18
21
|
this.dimension = null;
|
|
19
22
|
this.useParallelization = true; // Whether to use parallelization for performance-critical operations
|
|
20
23
|
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
@@ -203,6 +206,13 @@ export class HNSWIndex {
|
|
|
203
206
|
}
|
|
204
207
|
// Add noun to the index
|
|
205
208
|
this.nouns.set(id, noun);
|
|
209
|
+
// Track high-level nodes for O(1) entry point selection
|
|
210
|
+
if (nounLevel >= 2 && nounLevel <= this.MAX_TRACKED_LEVELS) {
|
|
211
|
+
if (!this.highLevelNodes.has(nounLevel)) {
|
|
212
|
+
this.highLevelNodes.set(nounLevel, new Set());
|
|
213
|
+
}
|
|
214
|
+
this.highLevelNodes.get(nounLevel).add(id);
|
|
215
|
+
}
|
|
206
216
|
return id;
|
|
207
217
|
}
|
|
208
218
|
/**
|
|
@@ -159,8 +159,21 @@ export declare class ImprovedNeuralAPI {
|
|
|
159
159
|
* Group items by their semantic noun types
|
|
160
160
|
*/
|
|
161
161
|
private _groupBySemanticType;
|
|
162
|
-
|
|
162
|
+
/**
|
|
163
|
+
* Iterate through all items without loading them all at once
|
|
164
|
+
* This scales to millions of items without memory issues
|
|
165
|
+
*/
|
|
166
|
+
private _iterateAllItems;
|
|
167
|
+
/**
|
|
168
|
+
* Get a sample of item IDs for operations that don't need all items
|
|
169
|
+
* This is O(1) for small samples
|
|
170
|
+
*/
|
|
171
|
+
private _getSampleItemIds;
|
|
172
|
+
/**
|
|
173
|
+
* Get total count using the brain's O(1) counting API
|
|
174
|
+
*/
|
|
163
175
|
private _getTotalItemCount;
|
|
176
|
+
private _getAllItemIds;
|
|
164
177
|
private _calculateTotalWeight;
|
|
165
178
|
private _getNeighborCommunities;
|
|
166
179
|
private _calculateModularityGain;
|
|
@@ -80,8 +80,8 @@ export class ImprovedNeuralAPI {
|
|
|
80
80
|
catch (error) {
|
|
81
81
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
82
82
|
throw new SimilarityError(`Failed to calculate similarity: ${errorMessage}`, {
|
|
83
|
-
inputA: typeof a === '
|
|
84
|
-
inputB: typeof b === '
|
|
83
|
+
inputA: Array.isArray(a) ? 'vector' : typeof a === 'string' ? a.substring(0, 50) : 'unknown',
|
|
84
|
+
inputB: Array.isArray(b) ? 'vector' : typeof b === 'string' ? b.substring(0, 50) : 'unknown',
|
|
85
85
|
options
|
|
86
86
|
});
|
|
87
87
|
}
|
|
@@ -1172,8 +1172,8 @@ export class ImprovedNeuralAPI {
|
|
|
1172
1172
|
// Utility methods for internal operations
|
|
1173
1173
|
_isId(value) {
|
|
1174
1174
|
return typeof value === 'string' &&
|
|
1175
|
-
(value.length === 36 && value.includes('-')) || // UUID-like
|
|
1176
|
-
|
|
1175
|
+
((value.length === 36 && value.includes('-')) || // UUID-like
|
|
1176
|
+
(value.length > 10 && !value.includes(' '))); // ID-like string
|
|
1177
1177
|
}
|
|
1178
1178
|
_isVector(value) {
|
|
1179
1179
|
return Array.isArray(value) &&
|
|
@@ -1441,28 +1441,67 @@ export class ImprovedNeuralAPI {
|
|
|
1441
1441
|
}
|
|
1442
1442
|
return groups;
|
|
1443
1443
|
}
|
|
1444
|
-
//
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
1444
|
+
// Iterator-based implementations for scalability
|
|
1445
|
+
/**
|
|
1446
|
+
* Iterate through all items without loading them all at once
|
|
1447
|
+
* This scales to millions of items without memory issues
|
|
1448
|
+
*/
|
|
1449
|
+
async *_iterateAllItems(options) {
|
|
1450
|
+
const batchSize = options?.batchSize || 1000;
|
|
1451
|
+
let cursor;
|
|
1452
|
+
let hasMore = true;
|
|
1453
|
+
while (hasMore) {
|
|
1454
|
+
const result = await this.brain.find({
|
|
1455
|
+
query: '',
|
|
1456
|
+
limit: batchSize,
|
|
1457
|
+
cursor
|
|
1458
|
+
});
|
|
1459
|
+
for (const item of result.items || result) {
|
|
1460
|
+
yield item;
|
|
1461
|
+
}
|
|
1462
|
+
hasMore = result.hasMore || false;
|
|
1463
|
+
cursor = result.nextCursor;
|
|
1464
|
+
// Safety check to prevent infinite loops
|
|
1465
|
+
if (!result.items || result.items.length === 0) {
|
|
1466
|
+
break;
|
|
1467
|
+
}
|
|
1452
1468
|
}
|
|
1453
|
-
|
|
1454
|
-
|
|
1469
|
+
}
|
|
1470
|
+
/**
|
|
1471
|
+
* Get a sample of item IDs for operations that don't need all items
|
|
1472
|
+
* This is O(1) for small samples
|
|
1473
|
+
*/
|
|
1474
|
+
async _getSampleItemIds(sampleSize = 1000) {
|
|
1455
1475
|
const result = await this.brain.find({
|
|
1456
1476
|
query: '',
|
|
1457
|
-
limit
|
|
1477
|
+
limit: Math.min(sampleSize, 10000) // Cap at 10k for safety
|
|
1458
1478
|
});
|
|
1459
|
-
|
|
1479
|
+
const items = result.items || result;
|
|
1480
|
+
return items.map((item) => item.entity?.id || item.id).filter((id) => id);
|
|
1460
1481
|
}
|
|
1482
|
+
/**
|
|
1483
|
+
* Get total count using the brain's O(1) counting API
|
|
1484
|
+
*/
|
|
1461
1485
|
async _getTotalItemCount() {
|
|
1462
|
-
//
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1486
|
+
// Use the brain's O(1) counting API if available
|
|
1487
|
+
if (this.brain.counts && typeof this.brain.counts.entities === 'function') {
|
|
1488
|
+
return await this.brain.counts.entities();
|
|
1489
|
+
}
|
|
1490
|
+
// Fallback: Get from storage statistics
|
|
1491
|
+
const storage = this.brain.storage;
|
|
1492
|
+
if (storage && typeof storage.getStatistics === 'function') {
|
|
1493
|
+
const stats = await storage.getStatistics();
|
|
1494
|
+
return stats?.totalNodes || 0;
|
|
1495
|
+
}
|
|
1496
|
+
// Last resort: Sample and estimate
|
|
1497
|
+
const sample = await this.brain.find({ query: '', limit: 1 });
|
|
1498
|
+
return sample.totalCount || 0;
|
|
1499
|
+
}
|
|
1500
|
+
// Deprecated: Remove methods that load everything
|
|
1501
|
+
// These are kept for backward compatibility but should not be used
|
|
1502
|
+
async _getAllItemIds() {
|
|
1503
|
+
console.warn('⚠️ _getAllItemIds() is deprecated and will fail with large datasets. Use _iterateAllItems() or _getSampleItemIds() instead.');
|
|
1504
|
+
return this._getSampleItemIds(10000); // Return sample only
|
|
1466
1505
|
}
|
|
1467
1506
|
// ===== GRAPH ALGORITHM SUPPORTING METHODS =====
|
|
1468
1507
|
_calculateTotalWeight(edges) {
|
|
@@ -47,6 +47,7 @@ export declare class NaturalLanguageProcessor {
|
|
|
47
47
|
private buildFieldConstraints;
|
|
48
48
|
/**
|
|
49
49
|
* Find similar queries from history (without using Brainy)
|
|
50
|
+
* NOTE: Currently unused - reserved for future query caching optimization
|
|
50
51
|
*/
|
|
51
52
|
private findSimilarQueries;
|
|
52
53
|
/**
|
|
@@ -119,10 +119,11 @@ export class NaturalLanguageProcessor {
|
|
|
119
119
|
}
|
|
120
120
|
/**
|
|
121
121
|
* Find similar queries from history (without using Brainy)
|
|
122
|
+
* NOTE: Currently unused - reserved for future query caching optimization
|
|
122
123
|
*/
|
|
123
124
|
findSimilarQueries(embedding) {
|
|
124
|
-
//
|
|
125
|
-
//
|
|
125
|
+
// Not implemented - not required for core functionality
|
|
126
|
+
// Would implement cosine similarity against queryHistory if needed
|
|
126
127
|
return [];
|
|
127
128
|
}
|
|
128
129
|
/**
|
|
@@ -253,4 +253,63 @@ export declare abstract class BaseStorageAdapter implements StorageAdapter {
|
|
|
253
253
|
* Include throttling metrics in statistics
|
|
254
254
|
*/
|
|
255
255
|
getStatisticsWithThrottling(): Promise<StatisticsData | null>;
|
|
256
|
+
protected totalNounCount: number;
|
|
257
|
+
protected totalVerbCount: number;
|
|
258
|
+
protected entityCounts: Map<string, number>;
|
|
259
|
+
protected verbCounts: Map<string, number>;
|
|
260
|
+
protected countCache: Map<string, {
|
|
261
|
+
count: number;
|
|
262
|
+
timestamp: number;
|
|
263
|
+
}>;
|
|
264
|
+
protected readonly COUNT_CACHE_TTL = 60000;
|
|
265
|
+
/**
|
|
266
|
+
* Get total noun count - O(1) operation
|
|
267
|
+
* @returns Promise that resolves to the total number of nouns
|
|
268
|
+
*/
|
|
269
|
+
getNounCount(): Promise<number>;
|
|
270
|
+
/**
|
|
271
|
+
* Get total verb count - O(1) operation
|
|
272
|
+
* @returns Promise that resolves to the total number of verbs
|
|
273
|
+
*/
|
|
274
|
+
getVerbCount(): Promise<number>;
|
|
275
|
+
/**
|
|
276
|
+
* Increment count for entity type - O(1) operation
|
|
277
|
+
* Protected by storage-specific mechanisms (mutex, distributed consensus, etc.)
|
|
278
|
+
* @param type The entity type
|
|
279
|
+
*/
|
|
280
|
+
protected incrementEntityCount(type: string): void;
|
|
281
|
+
/**
|
|
282
|
+
* Thread-safe increment for concurrent scenarios
|
|
283
|
+
* Uses mutex for single-node, distributed consensus for multi-node
|
|
284
|
+
*/
|
|
285
|
+
protected incrementEntityCountSafe(type: string): Promise<void>;
|
|
286
|
+
/**
|
|
287
|
+
* Decrement count for entity type - O(1) operation
|
|
288
|
+
* @param type The entity type
|
|
289
|
+
*/
|
|
290
|
+
protected decrementEntityCount(type: string): void;
|
|
291
|
+
/**
|
|
292
|
+
* Thread-safe decrement for concurrent scenarios
|
|
293
|
+
*/
|
|
294
|
+
protected decrementEntityCountSafe(type: string): Promise<void>;
|
|
295
|
+
/**
|
|
296
|
+
* Increment verb count - O(1) operation with mutex protection
|
|
297
|
+
* @param type The verb type
|
|
298
|
+
*/
|
|
299
|
+
protected incrementVerbCount(type: string): Promise<void>;
|
|
300
|
+
/**
|
|
301
|
+
* Decrement verb count - O(1) operation with mutex protection
|
|
302
|
+
* @param type The verb type
|
|
303
|
+
*/
|
|
304
|
+
protected decrementVerbCount(type: string): Promise<void>;
|
|
305
|
+
/**
|
|
306
|
+
* Initialize counts from storage - must be implemented by each adapter
|
|
307
|
+
* @protected
|
|
308
|
+
*/
|
|
309
|
+
protected abstract initializeCounts(): Promise<void>;
|
|
310
|
+
/**
|
|
311
|
+
* Persist counts to storage - must be implemented by each adapter
|
|
312
|
+
* @protected
|
|
313
|
+
*/
|
|
314
|
+
protected abstract persistCounts(): Promise<void>;
|
|
256
315
|
}
|