@soulcraft/brainy 3.8.3 → 3.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -28
- package/dist/brainy.d.ts +27 -0
- package/dist/brainy.js +231 -10
- package/dist/coreTypes.d.ts +10 -0
- package/dist/hnsw/hnswIndex.d.ts +2 -0
- package/dist/hnsw/hnswIndex.js +10 -0
- package/dist/neural/improvedNeuralAPI.d.ts +14 -1
- package/dist/neural/improvedNeuralAPI.js +59 -20
- package/dist/neural/naturalLanguageProcessorStatic.d.ts +1 -0
- package/dist/neural/naturalLanguageProcessorStatic.js +3 -2
- package/dist/storage/adapters/baseStorageAdapter.d.ts +59 -0
- package/dist/storage/adapters/baseStorageAdapter.js +137 -0
- package/dist/storage/adapters/fileSystemStorage.d.ts +41 -0
- package/dist/storage/adapters/fileSystemStorage.js +227 -19
- package/dist/storage/adapters/memoryStorage.d.ts +8 -0
- package/dist/storage/adapters/memoryStorage.js +48 -1
- package/dist/storage/adapters/opfsStorage.d.ts +12 -0
- package/dist/storage/adapters/opfsStorage.js +68 -0
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +34 -0
- package/dist/storage/adapters/s3CompatibleStorage.js +129 -3
- package/dist/storage/baseStorage.js +4 -3
- package/dist/storage/readOnlyOptimizations.d.ts +0 -9
- package/dist/storage/readOnlyOptimizations.js +6 -28
- package/dist/types/brainy.types.d.ts +15 -0
- package/dist/utils/metadataIndex.d.ts +5 -0
- package/dist/utils/metadataIndex.js +24 -0
- package/dist/utils/mutex.d.ts +53 -0
- package/dist/utils/mutex.js +221 -0
- package/dist/utils/paramValidation.js +20 -4
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
[](LICENSE)
|
|
10
10
|
[](https://www.typescriptlang.org/)
|
|
11
11
|
|
|
12
|
-
**🧠 Brainy
|
|
12
|
+
**🧠 Brainy - Universal Knowledge Protocol™**
|
|
13
13
|
|
|
14
14
|
**World's first Triple Intelligence™ database** unifying vector similarity, graph relationships, and document filtering in one magical API. **Framework-friendly design** works seamlessly with Next.js, React, Vue, Angular, and any modern JavaScript framework.
|
|
15
15
|
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
|
|
18
18
|
**Framework-first design.** Built for modern web development with zero configuration and automatic framework compatibility. O(log n) performance, <10ms search latency, production-ready.
|
|
19
19
|
|
|
20
|
-
## 🎉
|
|
20
|
+
## 🎉 Key Features
|
|
21
21
|
|
|
22
22
|
### 🧠 **Triple Intelligence™ Engine**
|
|
23
23
|
|
|
@@ -49,7 +49,7 @@ npm install @soulcraft/brainy
|
|
|
49
49
|
### 🎯 **True Zero Configuration**
|
|
50
50
|
|
|
51
51
|
```javascript
|
|
52
|
-
import {Brainy} from '@soulcraft/brainy'
|
|
52
|
+
import { Brainy, NounType } from '@soulcraft/brainy'
|
|
53
53
|
|
|
54
54
|
// Just this - auto-detects everything!
|
|
55
55
|
const brain = new Brainy()
|
|
@@ -58,7 +58,7 @@ await brain.init()
|
|
|
58
58
|
// Add entities with automatic embedding
|
|
59
59
|
const jsId = await brain.add({
|
|
60
60
|
data: "JavaScript is a programming language",
|
|
61
|
-
|
|
61
|
+
nounType: NounType.Concept,
|
|
62
62
|
metadata: {
|
|
63
63
|
type: "language",
|
|
64
64
|
year: 1995,
|
|
@@ -68,7 +68,7 @@ const jsId = await brain.add({
|
|
|
68
68
|
|
|
69
69
|
const nodeId = await brain.add({
|
|
70
70
|
data: "Node.js runtime environment",
|
|
71
|
-
|
|
71
|
+
nounType: NounType.Concept,
|
|
72
72
|
metadata: {
|
|
73
73
|
type: "runtime",
|
|
74
74
|
year: 2009,
|
|
@@ -100,7 +100,7 @@ const filtered = await brain.find({
|
|
|
100
100
|
|
|
101
101
|
## 🌐 Framework Integration
|
|
102
102
|
|
|
103
|
-
**Brainy
|
|
103
|
+
**Brainy is framework-first!** Works seamlessly with any modern JavaScript framework:
|
|
104
104
|
|
|
105
105
|
### ⚛️ **React & Next.js**
|
|
106
106
|
```javascript
|
|
@@ -194,7 +194,7 @@ If using nvm: `nvm use` (we provide a `.nvmrc` file)
|
|
|
194
194
|
|
|
195
195
|
**Enabled by Triple Intelligence, standardized for everyone:**
|
|
196
196
|
|
|
197
|
-
- **
|
|
197
|
+
- **31 Noun Types × 40 Verb Types**: 1,240 base combinations
|
|
198
198
|
- **∞ Expressiveness**: Unlimited metadata = model ANY data
|
|
199
199
|
- **One Language**: All tools, augmentations, AI models speak the same types
|
|
200
200
|
- **Perfect Interoperability**: Move data between any Brainy instance
|
|
@@ -211,10 +211,10 @@ await brain.find("Documentation about authentication from last month")
|
|
|
211
211
|
|
|
212
212
|
### 🎯 Zero Configuration Philosophy
|
|
213
213
|
|
|
214
|
-
Brainy
|
|
214
|
+
Brainy automatically configures **everything**:
|
|
215
215
|
|
|
216
216
|
```javascript
|
|
217
|
-
import {Brainy} from '@soulcraft/brainy'
|
|
217
|
+
import { Brainy } from '@soulcraft/brainy'
|
|
218
218
|
|
|
219
219
|
// 1. Pure zero-config - detects everything
|
|
220
220
|
const brain = new Brainy()
|
|
@@ -368,6 +368,8 @@ const brain = new Brainy({
|
|
|
368
368
|
### Real-World Example: Social Media Firehose
|
|
369
369
|
|
|
370
370
|
```javascript
|
|
371
|
+
import { Brainy, NounType } from '@soulcraft/brainy'
|
|
372
|
+
|
|
371
373
|
// Ingestion nodes (optimized for writes)
|
|
372
374
|
const ingestionNode = new Brainy({
|
|
373
375
|
storage: {type: 's3', options: {bucket: 'social-data'}},
|
|
@@ -378,7 +380,7 @@ const ingestionNode = new Brainy({
|
|
|
378
380
|
// Process Bluesky firehose
|
|
379
381
|
blueskyStream.on('post', async (post) => {
|
|
380
382
|
await ingestionNode.add(post, {
|
|
381
|
-
nounType:
|
|
383
|
+
nounType: NounType.Message,
|
|
382
384
|
platform: 'bluesky',
|
|
383
385
|
author: post.author,
|
|
384
386
|
timestamp: post.createdAt
|
|
@@ -417,21 +419,19 @@ const trending = await searchNode.find('trending AI topics', {
|
|
|
417
419
|
```javascript
|
|
418
420
|
// Store documentation with rich relationships
|
|
419
421
|
const apiGuide = await brain.add("REST API Guide", {
|
|
420
|
-
nounType:
|
|
422
|
+
nounType: NounType.Document,
|
|
421
423
|
title: "API Guide",
|
|
422
424
|
category: "documentation",
|
|
423
425
|
version: "2.0"
|
|
424
426
|
})
|
|
425
427
|
|
|
426
428
|
const author = await brain.add("Jane Developer", {
|
|
427
|
-
nounType:
|
|
428
|
-
type: "person",
|
|
429
|
+
nounType: NounType.Person,
|
|
429
430
|
role: "tech-lead"
|
|
430
431
|
})
|
|
431
432
|
|
|
432
433
|
const project = await brain.add("E-commerce Platform", {
|
|
433
|
-
nounType:
|
|
434
|
-
type: "project",
|
|
434
|
+
nounType: NounType.Project,
|
|
435
435
|
status: "active"
|
|
436
436
|
})
|
|
437
437
|
|
|
@@ -462,21 +462,18 @@ const similar = await brain.search(existingContent, {
|
|
|
462
462
|
```javascript
|
|
463
463
|
// Store conversation with relationships
|
|
464
464
|
const userId = await brain.add("User 123", {
|
|
465
|
-
nounType:
|
|
466
|
-
type: "user",
|
|
465
|
+
nounType: NounType.User,
|
|
467
466
|
tier: "premium"
|
|
468
467
|
})
|
|
469
468
|
|
|
470
469
|
const messageId = await brain.add(userMessage, {
|
|
471
|
-
nounType:
|
|
472
|
-
type: "message",
|
|
470
|
+
nounType: NounType.Message,
|
|
473
471
|
timestamp: Date.now(),
|
|
474
472
|
session: "abc"
|
|
475
473
|
})
|
|
476
474
|
|
|
477
475
|
const topicId = await brain.add("Product Support", {
|
|
478
|
-
nounType:
|
|
479
|
-
type: "topic",
|
|
476
|
+
nounType: NounType.Topic,
|
|
480
477
|
category: "support"
|
|
481
478
|
})
|
|
482
479
|
|
|
@@ -602,7 +599,7 @@ for (const cluster of feedbackClusters) {
|
|
|
602
599
|
}
|
|
603
600
|
|
|
604
601
|
// Find related documents
|
|
605
|
-
const docId = await brain.add("Machine learning guide", { nounType:
|
|
602
|
+
const docId = await brain.add("Machine learning guide", { nounType: NounType.Document })
|
|
606
603
|
const similar = await neural.neighbors(docId, 5)
|
|
607
604
|
// Returns 5 most similar documents
|
|
608
605
|
|
|
@@ -637,7 +634,7 @@ Brainy includes enterprise-grade capabilities at no extra cost. **No premium tie
|
|
|
637
634
|
- **Built-in monitoring** with metrics and health checks
|
|
638
635
|
- **Production ready** with circuit breakers and backpressure
|
|
639
636
|
|
|
640
|
-
📖 **
|
|
637
|
+
📖 **More enterprise features coming soon** - Stay tuned!
|
|
641
638
|
|
|
642
639
|
## 📊 Benchmarks
|
|
643
640
|
|
|
@@ -651,13 +648,14 @@ Brainy includes enterprise-grade capabilities at no extra cost. **No premium tie
|
|
|
651
648
|
| Bulk Import (1000 items) | 2.3s | +8MB |
|
|
652
649
|
| **Production Scale (10M items)** | **5.8ms** | **12GB** |
|
|
653
650
|
|
|
654
|
-
## 🔄 Migration from
|
|
651
|
+
## 🔄 Migration from Previous Versions
|
|
655
652
|
|
|
656
|
-
Key changes
|
|
653
|
+
Key changes in the latest version:
|
|
657
654
|
|
|
658
655
|
- Search methods consolidated into `search()` and `find()`
|
|
659
656
|
- Result format now includes full objects with metadata
|
|
660
|
-
-
|
|
657
|
+
- Enhanced natural language capabilities
|
|
658
|
+
- Distributed architecture support
|
|
661
659
|
|
|
662
660
|
## 🤝 Contributing
|
|
663
661
|
|
|
@@ -678,10 +676,10 @@ We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
|
|
678
676
|
### The Math of Infinite Expressiveness
|
|
679
677
|
|
|
680
678
|
```
|
|
681
|
-
|
|
679
|
+
31 Nouns × 40 Verbs × ∞ Metadata × Triple Intelligence = Universal Protocol
|
|
682
680
|
```
|
|
683
681
|
|
|
684
|
-
- **
|
|
682
|
+
- **1,240 base combinations** from standardized types
|
|
685
683
|
- **∞ domain specificity** via unlimited metadata
|
|
686
684
|
- **∞ relationship depth** via graph traversal
|
|
687
685
|
- **= Model ANYTHING**: From quantum physics to social networks
|
package/dist/brainy.d.ts
CHANGED
|
@@ -26,6 +26,10 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
|
|
|
26
26
|
private distance;
|
|
27
27
|
private augmentationRegistry;
|
|
28
28
|
private config;
|
|
29
|
+
private coordinator?;
|
|
30
|
+
private shardManager?;
|
|
31
|
+
private cacheSync?;
|
|
32
|
+
private readWriteSeparation?;
|
|
29
33
|
private originalConsole?;
|
|
30
34
|
private _neural?;
|
|
31
35
|
private _nlp?;
|
|
@@ -111,6 +115,16 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
|
|
|
111
115
|
* Clear all data from the database
|
|
112
116
|
*/
|
|
113
117
|
clear(): Promise<void>;
|
|
118
|
+
/**
|
|
119
|
+
* Get total count of nouns - O(1) operation
|
|
120
|
+
* @returns Promise that resolves to the total number of nouns
|
|
121
|
+
*/
|
|
122
|
+
getNounCount(): Promise<number>;
|
|
123
|
+
/**
|
|
124
|
+
* Get total count of verbs - O(1) operation
|
|
125
|
+
* @returns Promise that resolves to the total number of verbs
|
|
126
|
+
*/
|
|
127
|
+
getVerbCount(): Promise<number>;
|
|
114
128
|
/**
|
|
115
129
|
* Neural API - Advanced AI operations
|
|
116
130
|
*/
|
|
@@ -353,6 +367,19 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
|
|
|
353
367
|
* Close and cleanup
|
|
354
368
|
*/
|
|
355
369
|
close(): Promise<void>;
|
|
370
|
+
/**
|
|
371
|
+
* Intelligently auto-detect distributed configuration
|
|
372
|
+
* Zero-config: Automatically determines best distributed settings
|
|
373
|
+
*/
|
|
374
|
+
private autoDetectDistributed;
|
|
375
|
+
/**
|
|
376
|
+
* Setup distributed components with zero-config intelligence
|
|
377
|
+
*/
|
|
378
|
+
private setupDistributedComponents;
|
|
379
|
+
/**
|
|
380
|
+
* Pass distributed components to storage adapter
|
|
381
|
+
*/
|
|
382
|
+
private connectDistributedStorage;
|
|
356
383
|
}
|
|
357
384
|
export * from './types/brainy.types.js';
|
|
358
385
|
export { NounType, VerbType } from './types/graphTypes.js';
|
package/dist/brainy.js
CHANGED
|
@@ -18,6 +18,7 @@ import { MetadataIndexManager } from './utils/metadataIndex.js';
|
|
|
18
18
|
import { GraphAdjacencyIndex } from './graph/graphAdjacencyIndex.js';
|
|
19
19
|
import { createPipeline } from './streaming/pipeline.js';
|
|
20
20
|
import { configureLogger, LogLevel } from './utils/logger.js';
|
|
21
|
+
import { DistributedCoordinator, ShardManager, CacheSync, ReadWriteSeparation } from './distributed/index.js';
|
|
21
22
|
import { NounType } from './types/graphTypes.js';
|
|
22
23
|
/**
|
|
23
24
|
* The main Brainy class - Clean, Beautiful, Powerful
|
|
@@ -35,6 +36,10 @@ export class Brainy {
|
|
|
35
36
|
this.distance = cosineDistance;
|
|
36
37
|
this.embedder = this.setupEmbedder();
|
|
37
38
|
this.augmentationRegistry = this.setupAugmentations();
|
|
39
|
+
// Setup distributed components if enabled
|
|
40
|
+
if (this.config.distributed?.enabled) {
|
|
41
|
+
this.setupDistributedComponents();
|
|
42
|
+
}
|
|
38
43
|
// Index and storage are initialized in init() because they may need each other
|
|
39
44
|
}
|
|
40
45
|
/**
|
|
@@ -113,6 +118,8 @@ export class Brainy {
|
|
|
113
118
|
}
|
|
114
119
|
}
|
|
115
120
|
});
|
|
121
|
+
// Connect distributed components to storage
|
|
122
|
+
await this.connectDistributedStorage();
|
|
116
123
|
// Warm up if configured
|
|
117
124
|
if (this.config.warmup) {
|
|
118
125
|
await this.warmup();
|
|
@@ -269,6 +276,10 @@ export class Brainy {
|
|
|
269
276
|
* Delete an entity
|
|
270
277
|
*/
|
|
271
278
|
async delete(id) {
|
|
279
|
+
// Handle invalid IDs gracefully
|
|
280
|
+
if (!id || typeof id !== 'string') {
|
|
281
|
+
return; // Silently return for invalid IDs
|
|
282
|
+
}
|
|
272
283
|
await this.ensureInitialized();
|
|
273
284
|
return this.augmentationRegistry.execute('delete', { id }, async () => {
|
|
274
285
|
// Remove from vector index
|
|
@@ -289,6 +300,9 @@ export class Brainy {
|
|
|
289
300
|
const targetVerbs = await this.storage.getVerbsByTarget(id);
|
|
290
301
|
const allVerbs = [...verbs, ...targetVerbs];
|
|
291
302
|
for (const verb of allVerbs) {
|
|
303
|
+
// Remove from graph index first
|
|
304
|
+
await this.graphIndex.removeVerb(verb.id);
|
|
305
|
+
// Then delete from storage
|
|
292
306
|
await this.storage.deleteVerb(verb.id);
|
|
293
307
|
}
|
|
294
308
|
});
|
|
@@ -407,10 +421,53 @@ export class Brainy {
|
|
|
407
421
|
const startTime = Date.now();
|
|
408
422
|
const result = await this.augmentationRegistry.execute('find', params, async () => {
|
|
409
423
|
let results = [];
|
|
410
|
-
//
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
424
|
+
// Distinguish between search criteria (need vector search) and filter criteria (metadata only)
|
|
425
|
+
// Treat empty string query as no query
|
|
426
|
+
const hasVectorSearchCriteria = (params.query && params.query.trim() !== '') || params.vector || params.near;
|
|
427
|
+
const hasFilterCriteria = params.where || params.type || params.service;
|
|
428
|
+
const hasGraphCriteria = params.connected;
|
|
429
|
+
// Handle metadata-only queries (no vector search needed)
|
|
430
|
+
if (!hasVectorSearchCriteria && !hasGraphCriteria && hasFilterCriteria) {
|
|
431
|
+
// Build filter for metadata index
|
|
432
|
+
let filter = {};
|
|
433
|
+
if (params.where)
|
|
434
|
+
Object.assign(filter, params.where);
|
|
435
|
+
if (params.service)
|
|
436
|
+
filter.service = params.service;
|
|
437
|
+
if (params.type) {
|
|
438
|
+
const types = Array.isArray(params.type) ? params.type : [params.type];
|
|
439
|
+
if (types.length === 1) {
|
|
440
|
+
filter.noun = types[0];
|
|
441
|
+
}
|
|
442
|
+
else {
|
|
443
|
+
filter = {
|
|
444
|
+
anyOf: types.map(type => ({
|
|
445
|
+
noun: type,
|
|
446
|
+
...filter
|
|
447
|
+
}))
|
|
448
|
+
};
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
// Get filtered IDs and paginate BEFORE loading entities
|
|
452
|
+
const filteredIds = await this.metadataIndex.getIdsForFilter(filter);
|
|
453
|
+
const limit = params.limit || 10;
|
|
454
|
+
const offset = params.offset || 0;
|
|
455
|
+
const pageIds = filteredIds.slice(offset, offset + limit);
|
|
456
|
+
// Load entities for the paginated results
|
|
457
|
+
for (const id of pageIds) {
|
|
458
|
+
const entity = await this.get(id);
|
|
459
|
+
if (entity) {
|
|
460
|
+
results.push({
|
|
461
|
+
id,
|
|
462
|
+
score: 1.0, // All metadata-filtered results equally relevant
|
|
463
|
+
entity
|
|
464
|
+
});
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
return results;
|
|
468
|
+
}
|
|
469
|
+
// Handle completely empty query - return all results paginated
|
|
470
|
+
if (!hasVectorSearchCriteria && !hasFilterCriteria && !hasGraphCriteria) {
|
|
414
471
|
const limit = params.limit || 20;
|
|
415
472
|
const offset = params.offset || 0;
|
|
416
473
|
const storageResults = await this.storage.getNouns({
|
|
@@ -803,6 +860,22 @@ export class Brainy {
|
|
|
803
860
|
this._tripleIntelligence = undefined;
|
|
804
861
|
});
|
|
805
862
|
}
|
|
863
|
+
/**
|
|
864
|
+
* Get total count of nouns - O(1) operation
|
|
865
|
+
* @returns Promise that resolves to the total number of nouns
|
|
866
|
+
*/
|
|
867
|
+
async getNounCount() {
|
|
868
|
+
await this.ensureInitialized();
|
|
869
|
+
return this.storage.getNounCount();
|
|
870
|
+
}
|
|
871
|
+
/**
|
|
872
|
+
* Get total count of verbs - O(1) operation
|
|
873
|
+
* @returns Promise that resolves to the total number of verbs
|
|
874
|
+
*/
|
|
875
|
+
async getVerbCount() {
|
|
876
|
+
await this.ensureInitialized();
|
|
877
|
+
return this.storage.getVerbCount();
|
|
878
|
+
}
|
|
806
879
|
// ============= SUB-APIS =============
|
|
807
880
|
/**
|
|
808
881
|
* Neural API - Advanced AI operations
|
|
@@ -1462,18 +1535,27 @@ export class Brainy {
|
|
|
1462
1535
|
if (config?.index?.efSearch && (config.index.efSearch < 1 || config.index.efSearch > 1000)) {
|
|
1463
1536
|
throw new Error(`Invalid index efSearch: ${config.index.efSearch}. Must be between 1 and 1000`);
|
|
1464
1537
|
}
|
|
1538
|
+
// Auto-detect distributed mode based on environment and configuration
|
|
1539
|
+
const distributedConfig = this.autoDetectDistributed(config?.distributed);
|
|
1465
1540
|
return {
|
|
1466
1541
|
storage: config?.storage || { type: 'auto' },
|
|
1467
1542
|
model: config?.model || { type: 'fast' },
|
|
1468
1543
|
index: config?.index || {},
|
|
1469
1544
|
cache: config?.cache ?? true,
|
|
1470
1545
|
augmentations: config?.augmentations || {},
|
|
1546
|
+
distributed: distributedConfig, // Type will be fixed when used
|
|
1471
1547
|
warmup: config?.warmup ?? false,
|
|
1472
1548
|
realtime: config?.realtime ?? false,
|
|
1473
1549
|
multiTenancy: config?.multiTenancy ?? false,
|
|
1474
1550
|
telemetry: config?.telemetry ?? false,
|
|
1475
1551
|
verbose: config?.verbose ?? false,
|
|
1476
|
-
silent: config?.silent ?? false
|
|
1552
|
+
silent: config?.silent ?? false,
|
|
1553
|
+
// New performance options with smart defaults
|
|
1554
|
+
disableAutoRebuild: config?.disableAutoRebuild ?? false, // false = auto-decide based on size
|
|
1555
|
+
disableMetrics: config?.disableMetrics ?? false,
|
|
1556
|
+
disableAutoOptimize: config?.disableAutoOptimize ?? false,
|
|
1557
|
+
batchWrites: config?.batchWrites ?? true,
|
|
1558
|
+
maxConcurrentOperations: config?.maxConcurrentOperations ?? 10
|
|
1477
1559
|
};
|
|
1478
1560
|
}
|
|
1479
1561
|
/**
|
|
@@ -1483,17 +1565,49 @@ export class Brainy {
|
|
|
1483
1565
|
try {
|
|
1484
1566
|
// Check if storage has data
|
|
1485
1567
|
const entities = await this.storage.getNouns({ pagination: { limit: 1 } });
|
|
1486
|
-
|
|
1568
|
+
const totalCount = entities.totalCount || 0;
|
|
1569
|
+
if (totalCount === 0) {
|
|
1487
1570
|
// No data in storage, no rebuild needed
|
|
1488
1571
|
return;
|
|
1489
1572
|
}
|
|
1573
|
+
// Intelligent decision: Auto-rebuild only for small datasets
|
|
1574
|
+
// For large datasets, use lazy loading for optimal performance
|
|
1575
|
+
const AUTO_REBUILD_THRESHOLD = 1000; // Only auto-rebuild if < 1000 items
|
|
1490
1576
|
// Check if metadata index is empty
|
|
1491
1577
|
const metadataStats = await this.metadataIndex.getStats();
|
|
1492
|
-
if (metadataStats.totalEntries === 0) {
|
|
1493
|
-
|
|
1578
|
+
if (metadataStats.totalEntries === 0 && totalCount > 0) {
|
|
1579
|
+
if (totalCount < AUTO_REBUILD_THRESHOLD) {
|
|
1580
|
+
// Small dataset - rebuild for convenience
|
|
1581
|
+
if (!this.config.silent) {
|
|
1582
|
+
console.log(`🔄 Small dataset (${totalCount} items) - rebuilding index for optimal performance...`);
|
|
1583
|
+
}
|
|
1584
|
+
await this.metadataIndex.rebuild();
|
|
1585
|
+
const newStats = await this.metadataIndex.getStats();
|
|
1586
|
+
if (!this.config.silent) {
|
|
1587
|
+
console.log(`✅ Index rebuilt: ${newStats.totalEntries} entries`);
|
|
1588
|
+
}
|
|
1589
|
+
}
|
|
1590
|
+
else {
|
|
1591
|
+
// Large dataset - use lazy loading
|
|
1592
|
+
if (!this.config.silent) {
|
|
1593
|
+
console.log(`⚡ Large dataset (${totalCount} items) - using lazy loading for optimal startup performance`);
|
|
1594
|
+
console.log('💡 Tip: Indexes will build automatically as you use the system');
|
|
1595
|
+
}
|
|
1596
|
+
}
|
|
1597
|
+
}
|
|
1598
|
+
// Override with explicit config if provided
|
|
1599
|
+
if (this.config.disableAutoRebuild === true) {
|
|
1600
|
+
if (!this.config.silent) {
|
|
1601
|
+
console.log('⚡ Auto-rebuild explicitly disabled via config');
|
|
1602
|
+
}
|
|
1603
|
+
return;
|
|
1604
|
+
}
|
|
1605
|
+
else if (this.config.disableAutoRebuild === false && metadataStats.totalEntries === 0) {
|
|
1606
|
+
// Explicitly enabled - rebuild regardless of size
|
|
1607
|
+
if (!this.config.silent) {
|
|
1608
|
+
console.log('🔄 Auto-rebuild explicitly enabled - rebuilding index...');
|
|
1609
|
+
}
|
|
1494
1610
|
await this.metadataIndex.rebuild();
|
|
1495
|
-
const newStats = await this.metadataIndex.getStats();
|
|
1496
|
-
console.log(`✅ Metadata index rebuilt: ${newStats.totalEntries} entries`);
|
|
1497
1611
|
}
|
|
1498
1612
|
// Note: GraphAdjacencyIndex will rebuild itself as relationships are added
|
|
1499
1613
|
// Vector index should already be populated if storage has data
|
|
@@ -1525,6 +1639,113 @@ export class Brainy {
|
|
|
1525
1639
|
// We'll just mark as not initialized
|
|
1526
1640
|
this.initialized = false;
|
|
1527
1641
|
}
|
|
1642
|
+
/**
|
|
1643
|
+
* Intelligently auto-detect distributed configuration
|
|
1644
|
+
* Zero-config: Automatically determines best distributed settings
|
|
1645
|
+
*/
|
|
1646
|
+
autoDetectDistributed(config) {
|
|
1647
|
+
// If explicitly disabled, respect that
|
|
1648
|
+
if (config?.enabled === false) {
|
|
1649
|
+
return config;
|
|
1650
|
+
}
|
|
1651
|
+
// Auto-detect based on environment variables (common in production)
|
|
1652
|
+
const envEnabled = process.env.BRAINY_DISTRIBUTED === 'true' ||
|
|
1653
|
+
process.env.NODE_ENV === 'production' ||
|
|
1654
|
+
process.env.CLUSTER_SIZE ||
|
|
1655
|
+
process.env.KUBERNETES_SERVICE_HOST; // Running in K8s
|
|
1656
|
+
// Auto-detect based on storage type (S3/R2/GCS implies distributed)
|
|
1657
|
+
const storageImpliesDistributed = this.config?.storage?.type === 's3' ||
|
|
1658
|
+
this.config?.storage?.type === 'r2' ||
|
|
1659
|
+
this.config?.storage?.type === 'gcs';
|
|
1660
|
+
// If not explicitly configured but environment suggests distributed
|
|
1661
|
+
if (!config && (envEnabled || storageImpliesDistributed)) {
|
|
1662
|
+
return {
|
|
1663
|
+
enabled: true,
|
|
1664
|
+
nodeId: process.env.HOSTNAME || process.env.NODE_ID || `node-${Date.now()}`,
|
|
1665
|
+
nodes: process.env.BRAINY_NODES?.split(',') || [],
|
|
1666
|
+
coordinatorUrl: process.env.BRAINY_COORDINATOR || undefined,
|
|
1667
|
+
shardCount: parseInt(process.env.BRAINY_SHARDS || '64'),
|
|
1668
|
+
replicationFactor: parseInt(process.env.BRAINY_REPLICAS || '3'),
|
|
1669
|
+
consensus: process.env.BRAINY_CONSENSUS || 'raft',
|
|
1670
|
+
transport: process.env.BRAINY_TRANSPORT || 'http'
|
|
1671
|
+
};
|
|
1672
|
+
}
|
|
1673
|
+
// Merge with provided config, applying intelligent defaults
|
|
1674
|
+
return config ? {
|
|
1675
|
+
...config,
|
|
1676
|
+
nodeId: config.nodeId || process.env.HOSTNAME || `node-${Date.now()}`,
|
|
1677
|
+
shardCount: config.shardCount || 64,
|
|
1678
|
+
replicationFactor: config.replicationFactor || 3,
|
|
1679
|
+
consensus: config.consensus || 'raft',
|
|
1680
|
+
transport: config.transport || 'http'
|
|
1681
|
+
} : undefined;
|
|
1682
|
+
}
|
|
1683
|
+
/**
|
|
1684
|
+
* Setup distributed components with zero-config intelligence
|
|
1685
|
+
*/
|
|
1686
|
+
setupDistributedComponents() {
|
|
1687
|
+
const distConfig = this.config.distributed;
|
|
1688
|
+
if (!distConfig?.enabled)
|
|
1689
|
+
return;
|
|
1690
|
+
console.log('🌍 Initializing distributed mode:', {
|
|
1691
|
+
nodeId: distConfig.nodeId,
|
|
1692
|
+
shards: distConfig.shardCount,
|
|
1693
|
+
replicas: distConfig.replicationFactor
|
|
1694
|
+
});
|
|
1695
|
+
// Initialize coordinator for consensus
|
|
1696
|
+
this.coordinator = new DistributedCoordinator({
|
|
1697
|
+
nodeId: distConfig.nodeId,
|
|
1698
|
+
address: distConfig.coordinatorUrl?.split(':')[0] || 'localhost',
|
|
1699
|
+
port: parseInt(distConfig.coordinatorUrl?.split(':')[1] || '8080'),
|
|
1700
|
+
nodes: distConfig.nodes
|
|
1701
|
+
});
|
|
1702
|
+
// Start the coordinator to establish leadership
|
|
1703
|
+
this.coordinator.start().catch(err => {
|
|
1704
|
+
console.warn('Coordinator start failed (will retry on init):', err.message);
|
|
1705
|
+
});
|
|
1706
|
+
// Initialize shard manager for data distribution
|
|
1707
|
+
this.shardManager = new ShardManager({
|
|
1708
|
+
shardCount: distConfig.shardCount,
|
|
1709
|
+
replicationFactor: distConfig.replicationFactor,
|
|
1710
|
+
virtualNodes: 150, // Optimal for consistent distribution
|
|
1711
|
+
autoRebalance: true
|
|
1712
|
+
});
|
|
1713
|
+
// Initialize cache synchronization
|
|
1714
|
+
this.cacheSync = new CacheSync({
|
|
1715
|
+
nodeId: distConfig.nodeId,
|
|
1716
|
+
syncInterval: 1000
|
|
1717
|
+
});
|
|
1718
|
+
// Initialize read/write separation if we have replicas
|
|
1719
|
+
// Note: Will be properly initialized after coordinator starts
|
|
1720
|
+
if (distConfig.replicationFactor && distConfig.replicationFactor > 1) {
|
|
1721
|
+
// Defer creation until coordinator is ready
|
|
1722
|
+
setTimeout(() => {
|
|
1723
|
+
this.readWriteSeparation = new ReadWriteSeparation({
|
|
1724
|
+
nodeId: distConfig.nodeId,
|
|
1725
|
+
consistencyLevel: 'eventual',
|
|
1726
|
+
role: 'replica', // Start as replica, will promote if leader
|
|
1727
|
+
syncInterval: 5000
|
|
1728
|
+
}, this.coordinator, this.shardManager, this.cacheSync);
|
|
1729
|
+
}, 100);
|
|
1730
|
+
}
|
|
1731
|
+
}
|
|
1732
|
+
/**
|
|
1733
|
+
* Pass distributed components to storage adapter
|
|
1734
|
+
*/
|
|
1735
|
+
async connectDistributedStorage() {
|
|
1736
|
+
if (!this.config.distributed?.enabled)
|
|
1737
|
+
return;
|
|
1738
|
+
// Check if storage supports distributed operations
|
|
1739
|
+
if ('setDistributedComponents' in this.storage) {
|
|
1740
|
+
this.storage.setDistributedComponents({
|
|
1741
|
+
coordinator: this.coordinator,
|
|
1742
|
+
shardManager: this.shardManager,
|
|
1743
|
+
cacheSync: this.cacheSync,
|
|
1744
|
+
readWriteSeparation: this.readWriteSeparation
|
|
1745
|
+
});
|
|
1746
|
+
console.log('✅ Distributed storage connected');
|
|
1747
|
+
}
|
|
1748
|
+
}
|
|
1528
1749
|
}
|
|
1529
1750
|
// Re-export types for convenience
|
|
1530
1751
|
export * from './types/brainy.types.js';
|
package/dist/coreTypes.d.ts
CHANGED
|
@@ -512,4 +512,14 @@ export interface StorageAdapter {
|
|
|
512
512
|
* @returns Promise that resolves to an array of changes
|
|
513
513
|
*/
|
|
514
514
|
getChangesSince?(timestamp: number, limit?: number): Promise<any[]>;
|
|
515
|
+
/**
|
|
516
|
+
* Get total count of nouns in storage - O(1) operation
|
|
517
|
+
* @returns Promise that resolves to the total number of nouns
|
|
518
|
+
*/
|
|
519
|
+
getNounCount(): Promise<number>;
|
|
520
|
+
/**
|
|
521
|
+
* Get total count of verbs in storage - O(1) operation
|
|
522
|
+
* @returns Promise that resolves to the total number of verbs
|
|
523
|
+
*/
|
|
524
|
+
getVerbCount(): Promise<number>;
|
|
515
525
|
}
|
package/dist/hnsw/hnswIndex.d.ts
CHANGED
package/dist/hnsw/hnswIndex.js
CHANGED
|
@@ -15,6 +15,9 @@ export class HNSWIndex {
|
|
|
15
15
|
this.nouns = new Map();
|
|
16
16
|
this.entryPointId = null;
|
|
17
17
|
this.maxLevel = 0;
|
|
18
|
+
// Track high-level nodes for O(1) entry point selection
|
|
19
|
+
this.highLevelNodes = new Map(); // level -> node IDs
|
|
20
|
+
this.MAX_TRACKED_LEVELS = 10; // Only track top levels for memory efficiency
|
|
18
21
|
this.dimension = null;
|
|
19
22
|
this.useParallelization = true; // Whether to use parallelization for performance-critical operations
|
|
20
23
|
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
@@ -203,6 +206,13 @@ export class HNSWIndex {
|
|
|
203
206
|
}
|
|
204
207
|
// Add noun to the index
|
|
205
208
|
this.nouns.set(id, noun);
|
|
209
|
+
// Track high-level nodes for O(1) entry point selection
|
|
210
|
+
if (nounLevel >= 2 && nounLevel <= this.MAX_TRACKED_LEVELS) {
|
|
211
|
+
if (!this.highLevelNodes.has(nounLevel)) {
|
|
212
|
+
this.highLevelNodes.set(nounLevel, new Set());
|
|
213
|
+
}
|
|
214
|
+
this.highLevelNodes.get(nounLevel).add(id);
|
|
215
|
+
}
|
|
206
216
|
return id;
|
|
207
217
|
}
|
|
208
218
|
/**
|
|
@@ -159,8 +159,21 @@ export declare class ImprovedNeuralAPI {
|
|
|
159
159
|
* Group items by their semantic noun types
|
|
160
160
|
*/
|
|
161
161
|
private _groupBySemanticType;
|
|
162
|
-
|
|
162
|
+
/**
|
|
163
|
+
* Iterate through all items without loading them all at once
|
|
164
|
+
* This scales to millions of items without memory issues
|
|
165
|
+
*/
|
|
166
|
+
private _iterateAllItems;
|
|
167
|
+
/**
|
|
168
|
+
* Get a sample of item IDs for operations that don't need all items
|
|
169
|
+
* This is O(1) for small samples
|
|
170
|
+
*/
|
|
171
|
+
private _getSampleItemIds;
|
|
172
|
+
/**
|
|
173
|
+
* Get total count using the brain's O(1) counting API
|
|
174
|
+
*/
|
|
163
175
|
private _getTotalItemCount;
|
|
176
|
+
private _getAllItemIds;
|
|
164
177
|
private _calculateTotalWeight;
|
|
165
178
|
private _getNeighborCommunities;
|
|
166
179
|
private _calculateModularityGain;
|