@soulcraft/brainy 0.32.0 → 0.34.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +516 -402
- package/dist/brainyData.d.ts +136 -0
- package/dist/coreTypes.d.ts +26 -0
- package/dist/storage/adapters/baseStorageAdapter.d.ts +16 -0
- package/dist/storage/adapters/baseStorageAdapter.d.ts.map +1 -1
- package/dist/storage/adapters/fileSystemStorage.d.ts.map +1 -1
- package/dist/storage/cacheManager.d.ts +94 -13
- package/dist/storage/cacheManager.d.ts.map +1 -1
- package/dist/storage/storageFactory.d.ts +47 -3
- package/dist/storage/storageFactory.d.ts.map +1 -1
- package/dist/unified.js +61974 -29728
- package/dist/unified.min.js +4624 -982
- package/dist/utils/embedding.d.ts +11 -17
- package/dist/utils/embedding.d.ts.map +1 -1
- package/dist/utils/fieldNameTracking.d.ts +21 -0
- package/dist/utils/fieldNameTracking.d.ts.map +1 -0
- package/dist/utils/index.d.ts +2 -0
- package/dist/utils/index.d.ts.map +1 -1
- package/dist/utils/jsonProcessing.d.ts +43 -0
- package/dist/utils/jsonProcessing.d.ts.map +1 -0
- package/dist/utils/robustModelLoader.d.ts +94 -0
- package/dist/utils/robustModelLoader.d.ts.map +1 -0
- package/package.json +21 -21
package/README.md
CHANGED
|
@@ -27,6 +27,8 @@ it gets - learning from your data to provide increasingly relevant results and c
|
|
|
27
27
|
|
|
28
28
|
- **Run Everywhere** - Works in browsers, Node.js, serverless functions, and containers
|
|
29
29
|
- **Vector Search** - Find semantically similar content using embeddings
|
|
30
|
+
- **Advanced JSON Document Search** - Search within specific fields of JSON documents with field prioritization and
|
|
31
|
+
service-based field standardization
|
|
30
32
|
- **Graph Relationships** - Connect data with meaningful relationships
|
|
31
33
|
- **Streaming Pipeline** - Process data in real-time as it flows through the system
|
|
32
34
|
- **Extensible Augmentations** - Customize and extend functionality with pluggable components
|
|
@@ -89,7 +91,7 @@ REST API web service wrapper that provides HTTP endpoints for search operations
|
|
|
89
91
|
Brainy uses a unified build that automatically adapts to your environment (Node.js, browser, or serverless):
|
|
90
92
|
|
|
91
93
|
```typescript
|
|
92
|
-
import {BrainyData, NounType, VerbType} from '@soulcraft/brainy'
|
|
94
|
+
import { BrainyData, NounType, VerbType } from '@soulcraft/brainy'
|
|
93
95
|
|
|
94
96
|
// Create and initialize the database
|
|
95
97
|
const db = new BrainyData()
|
|
@@ -97,13 +99,13 @@ await db.init()
|
|
|
97
99
|
|
|
98
100
|
// Add data (automatically converted to vectors)
|
|
99
101
|
const catId = await db.add("Cats are independent pets", {
|
|
100
|
-
|
|
101
|
-
|
|
102
|
+
noun: NounType.Thing,
|
|
103
|
+
category: 'animal'
|
|
102
104
|
})
|
|
103
105
|
|
|
104
106
|
const dogId = await db.add("Dogs are loyal companions", {
|
|
105
|
-
|
|
106
|
-
|
|
107
|
+
noun: NounType.Thing,
|
|
108
|
+
category: 'animal'
|
|
107
109
|
})
|
|
108
110
|
|
|
109
111
|
// Search for similar items
|
|
@@ -112,8 +114,8 @@ console.log(results)
|
|
|
112
114
|
|
|
113
115
|
// Add a relationship between items
|
|
114
116
|
await db.addVerb(catId, dogId, {
|
|
115
|
-
|
|
116
|
-
|
|
117
|
+
verb: VerbType.RelatedTo,
|
|
118
|
+
description: 'Both are common household pets'
|
|
117
119
|
})
|
|
118
120
|
```
|
|
119
121
|
|
|
@@ -121,10 +123,10 @@ await db.addVerb(catId, dogId, {
|
|
|
121
123
|
|
|
122
124
|
```typescript
|
|
123
125
|
// Standard import - automatically adapts to any environment
|
|
124
|
-
import {BrainyData} from '@soulcraft/brainy'
|
|
126
|
+
import { BrainyData } from '@soulcraft/brainy'
|
|
125
127
|
|
|
126
128
|
// Minified version for production
|
|
127
|
-
import {BrainyData} from '@soulcraft/brainy/min'
|
|
129
|
+
import { BrainyData } from '@soulcraft/brainy/min'
|
|
128
130
|
```
|
|
129
131
|
|
|
130
132
|
> **Note**: The CLI functionality is available as a separate package `@soulcraft/brainy-cli` to reduce the bundle size
|
|
@@ -136,15 +138,15 @@ import {BrainyData} from '@soulcraft/brainy/min'
|
|
|
136
138
|
```html
|
|
137
139
|
|
|
138
140
|
<script type="module">
|
|
139
|
-
|
|
140
|
-
|
|
141
|
+
// Use local files instead of CDN
|
|
142
|
+
import { BrainyData } from './dist/unified.js'
|
|
141
143
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
+
// Or minified version
|
|
145
|
+
// import { BrainyData } from './dist/unified.min.js'
|
|
144
146
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
147
|
+
const db = new BrainyData()
|
|
148
|
+
await db.init()
|
|
149
|
+
// ...
|
|
148
150
|
</script>
|
|
149
151
|
```
|
|
150
152
|
|
|
@@ -299,13 +301,13 @@ The pipeline runs automatically when you:
|
|
|
299
301
|
|
|
300
302
|
```typescript
|
|
301
303
|
// Add data (runs embedding → indexing → storage)
|
|
302
|
-
const id = await db.add("Your text data here", {metadata})
|
|
304
|
+
const id = await db.add("Your text data here", { metadata })
|
|
303
305
|
|
|
304
306
|
// Search (runs embedding → similarity search)
|
|
305
307
|
const results = await db.searchText("Your query here", 5)
|
|
306
308
|
|
|
307
309
|
// Connect entities (runs graph construction → storage)
|
|
308
|
-
await db.addVerb(sourceId, targetId, {verb: VerbType.RelatedTo})
|
|
310
|
+
await db.addVerb(sourceId, targetId, { verb: VerbType.RelatedTo })
|
|
309
311
|
```
|
|
310
312
|
|
|
311
313
|
Using the CLI:
|
|
@@ -404,13 +406,13 @@ Connections between nouns (edges in the graph):
|
|
|
404
406
|
Brainy provides utility functions to access lists of noun and verb types:
|
|
405
407
|
|
|
406
408
|
```typescript
|
|
407
|
-
import {
|
|
408
|
-
NounType,
|
|
409
|
-
VerbType,
|
|
410
|
-
getNounTypes,
|
|
411
|
-
getVerbTypes,
|
|
412
|
-
getNounTypeMap,
|
|
413
|
-
getVerbTypeMap
|
|
409
|
+
import {
|
|
410
|
+
NounType,
|
|
411
|
+
VerbType,
|
|
412
|
+
getNounTypes,
|
|
413
|
+
getVerbTypes,
|
|
414
|
+
getNounTypeMap,
|
|
415
|
+
getVerbTypeMap
|
|
414
416
|
} from '@soulcraft/brainy'
|
|
415
417
|
|
|
416
418
|
// At development time:
|
|
@@ -433,6 +435,7 @@ const verbTypeMap = getVerbTypeMap() // { RelatedTo: 'relatedTo', Contains: 'con
|
|
|
433
435
|
```
|
|
434
436
|
|
|
435
437
|
These utility functions make it easy to:
|
|
438
|
+
|
|
436
439
|
- Get a complete list of available noun and verb types
|
|
437
440
|
- Validate user input against valid types
|
|
438
441
|
- Create dynamic UI components that display or select from available types
|
|
@@ -528,15 +531,17 @@ const status = await db.status()
|
|
|
528
531
|
const backupData = await db.backup()
|
|
529
532
|
|
|
530
533
|
// Restore data into the database
|
|
531
|
-
const restoreResult = await db.restore(backupData, {clearExisting: true})
|
|
534
|
+
const restoreResult = await db.restore(backupData, { clearExisting: true })
|
|
532
535
|
```
|
|
533
536
|
|
|
534
537
|
### Database Statistics
|
|
535
538
|
|
|
536
|
-
Brainy provides a way to get statistics about the current state of the database. For detailed information about the
|
|
539
|
+
Brainy provides a way to get statistics about the current state of the database. For detailed information about the
|
|
540
|
+
statistics system, including implementation details, scalability improvements, and usage examples, see
|
|
541
|
+
our [Statistics Guide](STATISTICS.md).
|
|
537
542
|
|
|
538
543
|
```typescript
|
|
539
|
-
import {BrainyData, getStatistics} from '@soulcraft/brainy'
|
|
544
|
+
import { BrainyData, getStatistics } from '@soulcraft/brainy'
|
|
540
545
|
|
|
541
546
|
// Create and initialize the database
|
|
542
547
|
const db = new BrainyData()
|
|
@@ -553,25 +558,25 @@ console.log(stats)
|
|
|
553
558
|
```typescript
|
|
554
559
|
// Add a noun (automatically vectorized)
|
|
555
560
|
const id = await db.add(textOrVector, {
|
|
556
|
-
|
|
557
|
-
|
|
561
|
+
noun: NounType.Thing,
|
|
562
|
+
// other metadata...
|
|
558
563
|
})
|
|
559
564
|
|
|
560
565
|
// Add multiple nouns in parallel (with multithreading and batch embedding)
|
|
561
566
|
const ids = await db.addBatch([
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
567
|
+
{
|
|
568
|
+
vectorOrData: "First item to add",
|
|
569
|
+
metadata: { noun: NounType.Thing, category: 'example' }
|
|
570
|
+
},
|
|
571
|
+
{
|
|
572
|
+
vectorOrData: "Second item to add",
|
|
573
|
+
metadata: { noun: NounType.Thing, category: 'example' }
|
|
574
|
+
},
|
|
575
|
+
// More items...
|
|
571
576
|
], {
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
577
|
+
forceEmbed: false,
|
|
578
|
+
concurrency: 4, // Control the level of parallelism (default: 4)
|
|
579
|
+
batchSize: 50 // Control the number of items to process in a single batch (default: 50)
|
|
575
580
|
})
|
|
576
581
|
|
|
577
582
|
// Retrieve a noun
|
|
@@ -579,8 +584,8 @@ const noun = await db.get(id)
|
|
|
579
584
|
|
|
580
585
|
// Update noun metadata
|
|
581
586
|
await db.updateMetadata(id, {
|
|
582
|
-
|
|
583
|
-
|
|
587
|
+
noun: NounType.Thing,
|
|
588
|
+
// updated metadata...
|
|
584
589
|
})
|
|
585
590
|
|
|
586
591
|
// Delete a noun
|
|
@@ -592,6 +597,39 @@ const textResults = await db.searchText("query text", numResults)
|
|
|
592
597
|
|
|
593
598
|
// Search by noun type
|
|
594
599
|
const thingNouns = await db.searchByNounTypes([NounType.Thing], numResults)
|
|
600
|
+
|
|
601
|
+
// Search within specific fields of JSON documents
|
|
602
|
+
const fieldResults = await db.search("Acme Corporation", 10, {
|
|
603
|
+
searchField: "company"
|
|
604
|
+
})
|
|
605
|
+
|
|
606
|
+
// Search using standard field names across different services
|
|
607
|
+
const titleResults = await db.searchByStandardField("title", "climate change", 10)
|
|
608
|
+
const authorResults = await db.searchByStandardField("author", "johndoe", 10, {
|
|
609
|
+
services: ["github", "reddit"]
|
|
610
|
+
})
|
|
611
|
+
```
|
|
612
|
+
|
|
613
|
+
### Field Standardization and Service Tracking
|
|
614
|
+
|
|
615
|
+
Brainy automatically tracks field names from JSON documents and associates them with the service that inserted the data.
|
|
616
|
+
This enables powerful cross-service search capabilities:
|
|
617
|
+
|
|
618
|
+
```typescript
|
|
619
|
+
// Get all available field names organized by service
|
|
620
|
+
const fieldNames = await db.getAvailableFieldNames()
|
|
621
|
+
// Example output: { "github": ["repository.name", "issue.title"], "reddit": ["title", "selftext"] }
|
|
622
|
+
|
|
623
|
+
// Get standard field mappings
|
|
624
|
+
const standardMappings = await db.getStandardFieldMappings()
|
|
625
|
+
// Example output: { "title": { "github": ["repository.name"], "reddit": ["title"] } }
|
|
626
|
+
```
|
|
627
|
+
|
|
628
|
+
When adding data, specify the service name to ensure proper field tracking:
|
|
629
|
+
|
|
630
|
+
```typescript
|
|
631
|
+
// Add data with service name
|
|
632
|
+
await db.add(jsonData, metadata, { service: "github" })
|
|
595
633
|
```
|
|
596
634
|
|
|
597
635
|
### Working with Verbs (Relationships)
|
|
@@ -599,21 +637,21 @@ const thingNouns = await db.searchByNounTypes([NounType.Thing], numResults)
|
|
|
599
637
|
```typescript
|
|
600
638
|
// Add a relationship between nouns
|
|
601
639
|
await db.addVerb(sourceId, targetId, {
|
|
602
|
-
|
|
603
|
-
|
|
640
|
+
verb: VerbType.RelatedTo,
|
|
641
|
+
// other metadata...
|
|
604
642
|
})
|
|
605
643
|
|
|
606
644
|
// Add a relationship with auto-creation of missing nouns
|
|
607
645
|
// This is useful when the target noun might not exist yet
|
|
608
646
|
await db.addVerb(sourceId, targetId, {
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
647
|
+
verb: VerbType.RelatedTo,
|
|
648
|
+
// Enable auto-creation of missing nouns
|
|
649
|
+
autoCreateMissingNouns: true,
|
|
650
|
+
// Optional metadata for auto-created nouns
|
|
651
|
+
missingNounMetadata: {
|
|
652
|
+
noun: NounType.Concept,
|
|
653
|
+
description: 'Auto-created noun'
|
|
654
|
+
}
|
|
617
655
|
})
|
|
618
656
|
|
|
619
657
|
// Get all relationships
|
|
@@ -665,32 +703,53 @@ db.setReadOnly(false)
|
|
|
665
703
|
db.setWriteOnly(false)
|
|
666
704
|
```
|
|
667
705
|
|
|
668
|
-
- **Read-Only Mode**: When enabled, prevents all write operations (add, update, delete). Useful for deployment scenarios
|
|
669
|
-
|
|
706
|
+
- **Read-Only Mode**: When enabled, prevents all write operations (add, update, delete). Useful for deployment scenarios
|
|
707
|
+
where you want to prevent modifications to the database.
|
|
708
|
+
- **Write-Only Mode**: When enabled, prevents all search operations. Useful for initial data loading or when you want to
|
|
709
|
+
optimize for write performance.
|
|
670
710
|
|
|
671
711
|
### Embedding
|
|
672
712
|
|
|
673
713
|
```typescript
|
|
674
714
|
import {
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
715
|
+
BrainyData,
|
|
716
|
+
createTensorFlowEmbeddingFunction,
|
|
717
|
+
createThreadedEmbeddingFunction
|
|
678
718
|
} from '@soulcraft/brainy'
|
|
679
719
|
|
|
680
720
|
// Use the standard TensorFlow Universal Sentence Encoder embedding function
|
|
681
721
|
const db = new BrainyData({
|
|
682
|
-
|
|
722
|
+
embeddingFunction: createTensorFlowEmbeddingFunction()
|
|
683
723
|
})
|
|
684
724
|
await db.init()
|
|
685
725
|
|
|
686
726
|
// Or use the threaded embedding function for better performance
|
|
687
727
|
const threadedDb = new BrainyData({
|
|
688
|
-
|
|
728
|
+
embeddingFunction: createThreadedEmbeddingFunction()
|
|
689
729
|
})
|
|
690
730
|
await threadedDb.init()
|
|
691
731
|
|
|
692
732
|
// Directly embed text to vectors
|
|
693
733
|
const vector = await db.embed("Some text to convert to a vector")
|
|
734
|
+
|
|
735
|
+
// Calculate similarity between two texts or vectors
|
|
736
|
+
const similarity = await db.calculateSimilarity(
|
|
737
|
+
"Cats are furry pets",
|
|
738
|
+
"Felines make good companions"
|
|
739
|
+
)
|
|
740
|
+
console.log(`Similarity score: ${similarity}`) // Higher value means more similar
|
|
741
|
+
|
|
742
|
+
// Calculate similarity with custom options
|
|
743
|
+
const vectorA = await db.embed("First text")
|
|
744
|
+
const vectorB = await db.embed("Second text")
|
|
745
|
+
const customSimilarity = await db.calculateSimilarity(
|
|
746
|
+
vectorA, // Can use pre-computed vectors
|
|
747
|
+
vectorB,
|
|
748
|
+
{
|
|
749
|
+
forceEmbed: false, // Skip embedding if inputs are already vectors
|
|
750
|
+
distanceFunction: cosineDistance // Optional custom distance function
|
|
751
|
+
}
|
|
752
|
+
)
|
|
694
753
|
```
|
|
695
754
|
|
|
696
755
|
The threaded embedding function runs in a separate thread (Web Worker in browsers, Worker Thread in Node.js) to improve
|
|
@@ -726,42 +785,42 @@ Brainy includes comprehensive multithreading support to improve performance acro
|
|
|
726
785
|
7. **Automatic Environment Detection**: Adapts to browser (Web Workers) and Node.js (Worker Threads) environments
|
|
727
786
|
|
|
728
787
|
```typescript
|
|
729
|
-
import {BrainyData, euclideanDistance} from '@soulcraft/brainy'
|
|
788
|
+
import { BrainyData, euclideanDistance } from '@soulcraft/brainy'
|
|
730
789
|
|
|
731
790
|
// Configure with custom options
|
|
732
791
|
const db = new BrainyData({
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
792
|
+
// Use Euclidean distance instead of default cosine distance
|
|
793
|
+
distanceFunction: euclideanDistance,
|
|
794
|
+
|
|
795
|
+
// HNSW index configuration for search performance
|
|
796
|
+
hnsw: {
|
|
797
|
+
M: 16, // Max connections per noun
|
|
798
|
+
efConstruction: 200, // Construction candidate list size
|
|
799
|
+
efSearch: 50, // Search candidate list size
|
|
800
|
+
},
|
|
801
|
+
|
|
802
|
+
// Performance optimization options
|
|
803
|
+
performance: {
|
|
804
|
+
useParallelization: true, // Enable multithreaded search operations
|
|
805
|
+
},
|
|
806
|
+
|
|
807
|
+
// Noun and Verb type validation
|
|
808
|
+
typeValidation: {
|
|
809
|
+
enforceNounTypes: true, // Validate noun types against NounType enum
|
|
810
|
+
enforceVerbTypes: true, // Validate verb types against VerbType enum
|
|
811
|
+
},
|
|
812
|
+
|
|
813
|
+
// Storage configuration
|
|
814
|
+
storage: {
|
|
815
|
+
requestPersistentStorage: true,
|
|
816
|
+
// Example configuration for cloud storage (replace with your own values):
|
|
817
|
+
// s3Storage: {
|
|
818
|
+
// bucketName: 'your-s3-bucket-name',
|
|
819
|
+
// region: 'your-aws-region'
|
|
820
|
+
// // Credentials should be provided via environment variables
|
|
821
|
+
// // AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
|
|
822
|
+
// }
|
|
823
|
+
}
|
|
765
824
|
})
|
|
766
825
|
```
|
|
767
826
|
|
|
@@ -775,34 +834,34 @@ hybrid approach:
|
|
|
775
834
|
3. **Memory-Efficient Indexing** - Optimizes memory usage for large-scale vector collections
|
|
776
835
|
|
|
777
836
|
```typescript
|
|
778
|
-
import {BrainyData} from '@soulcraft/brainy'
|
|
837
|
+
import { BrainyData } from '@soulcraft/brainy'
|
|
779
838
|
|
|
780
839
|
// Configure with optimized HNSW index for large datasets
|
|
781
840
|
const db = new BrainyData({
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
},
|
|
797
|
-
|
|
798
|
-
// Whether to use disk-based storage for the index
|
|
799
|
-
useDiskBasedIndex: true // Enable disk-based storage
|
|
841
|
+
hnswOptimized: {
|
|
842
|
+
// Standard HNSW parameters
|
|
843
|
+
M: 16, // Max connections per noun
|
|
844
|
+
efConstruction: 200, // Construction candidate list size
|
|
845
|
+
efSearch: 50, // Search candidate list size
|
|
846
|
+
|
|
847
|
+
// Memory threshold in bytes - when exceeded, will use disk-based approach
|
|
848
|
+
memoryThreshold: 1024 * 1024 * 1024, // 1GB default threshold
|
|
849
|
+
|
|
850
|
+
// Product quantization settings for dimensionality reduction
|
|
851
|
+
productQuantization: {
|
|
852
|
+
enabled: true, // Enable product quantization
|
|
853
|
+
numSubvectors: 16, // Number of subvectors to split the vector into
|
|
854
|
+
numCentroids: 256 // Number of centroids per subvector
|
|
800
855
|
},
|
|
801
856
|
|
|
802
|
-
//
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
857
|
+
// Whether to use disk-based storage for the index
|
|
858
|
+
useDiskBasedIndex: true // Enable disk-based storage
|
|
859
|
+
},
|
|
860
|
+
|
|
861
|
+
// Storage configuration (required for disk-based index)
|
|
862
|
+
storage: {
|
|
863
|
+
requestPersistentStorage: true
|
|
864
|
+
}
|
|
806
865
|
})
|
|
807
866
|
|
|
808
867
|
// The optimized index automatically adapts based on dataset size:
|
|
@@ -867,24 +926,24 @@ Brainy's restore functionality can handle:
|
|
|
867
926
|
```typescript
|
|
868
927
|
// Restore data with all options
|
|
869
928
|
const restoreResult = await db.restore(backupData, {
|
|
870
|
-
|
|
929
|
+
clearExisting: true // Whether to clear existing data before restore
|
|
871
930
|
})
|
|
872
931
|
|
|
873
932
|
// Import sparse data (without vectors)
|
|
874
933
|
// Vectors will be automatically created using the embedding function
|
|
875
934
|
const sparseData = {
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
935
|
+
nouns: [
|
|
936
|
+
{
|
|
937
|
+
id: '123',
|
|
938
|
+
// No vector field - will be created during import
|
|
939
|
+
metadata: {
|
|
940
|
+
noun: 'Thing',
|
|
941
|
+
text: 'This text will be used to generate a vector'
|
|
942
|
+
}
|
|
943
|
+
}
|
|
944
|
+
],
|
|
945
|
+
verbs: [],
|
|
946
|
+
version: '1.0.0'
|
|
888
947
|
}
|
|
889
948
|
|
|
890
949
|
const sparseImportResult = await db.importSparseData(sparseData)
|
|
@@ -931,82 +990,82 @@ boilerplate:
|
|
|
931
990
|
|
|
932
991
|
```typescript
|
|
933
992
|
import {
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
993
|
+
createMemoryAugmentation,
|
|
994
|
+
createConduitAugmentation,
|
|
995
|
+
createSenseAugmentation,
|
|
996
|
+
addWebSocketSupport,
|
|
997
|
+
executeStreamlined,
|
|
998
|
+
processStaticData,
|
|
999
|
+
processStreamingData,
|
|
1000
|
+
createPipeline
|
|
942
1001
|
} from '@soulcraft/brainy'
|
|
943
1002
|
|
|
944
1003
|
// Create a memory augmentation with minimal code
|
|
945
1004
|
const memoryAug = createMemoryAugmentation({
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
1005
|
+
name: 'simple-memory',
|
|
1006
|
+
description: 'A simple in-memory storage augmentation',
|
|
1007
|
+
autoRegister: true,
|
|
1008
|
+
autoInitialize: true,
|
|
1009
|
+
|
|
1010
|
+
// Implement only the methods you need
|
|
1011
|
+
storeData: async (key, data) => {
|
|
1012
|
+
// Your implementation here
|
|
1013
|
+
return {
|
|
1014
|
+
success: true,
|
|
1015
|
+
data: true
|
|
1016
|
+
}
|
|
1017
|
+
},
|
|
959
1018
|
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
}
|
|
1019
|
+
retrieveData: async (key) => {
|
|
1020
|
+
// Your implementation here
|
|
1021
|
+
return {
|
|
1022
|
+
success: true,
|
|
1023
|
+
data: { example: 'data', key }
|
|
966
1024
|
}
|
|
1025
|
+
}
|
|
967
1026
|
})
|
|
968
1027
|
|
|
969
1028
|
// Add WebSocket support to any augmentation
|
|
970
1029
|
const wsAugmentation = addWebSocketSupport(memoryAug, {
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
}
|
|
1030
|
+
connectWebSocket: async (url) => {
|
|
1031
|
+
// Your implementation here
|
|
1032
|
+
return {
|
|
1033
|
+
connectionId: 'ws-1',
|
|
1034
|
+
url,
|
|
1035
|
+
status: 'connected'
|
|
978
1036
|
}
|
|
1037
|
+
}
|
|
979
1038
|
})
|
|
980
1039
|
|
|
981
1040
|
// Process static data through a pipeline
|
|
982
1041
|
const result = await processStaticData(
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
{
|
|
986
|
-
augmentation: senseAug,
|
|
987
|
-
method: 'processRawData',
|
|
988
|
-
transformArgs: (data) => [data, 'text']
|
|
989
|
-
},
|
|
990
|
-
{
|
|
991
|
-
augmentation: memoryAug,
|
|
992
|
-
method: 'storeData',
|
|
993
|
-
transformArgs: (data) => ['processed-data', data]
|
|
994
|
-
}
|
|
995
|
-
]
|
|
996
|
-
)
|
|
997
|
-
|
|
998
|
-
// Create a reusable pipeline
|
|
999
|
-
const pipeline = createPipeline([
|
|
1042
|
+
'Input data',
|
|
1043
|
+
[
|
|
1000
1044
|
{
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1045
|
+
augmentation: senseAug,
|
|
1046
|
+
method: 'processRawData',
|
|
1047
|
+
transformArgs: (data) => [data, 'text']
|
|
1004
1048
|
},
|
|
1005
1049
|
{
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1050
|
+
augmentation: memoryAug,
|
|
1051
|
+
method: 'storeData',
|
|
1052
|
+
transformArgs: (data) => ['processed-data', data]
|
|
1009
1053
|
}
|
|
1054
|
+
]
|
|
1055
|
+
)
|
|
1056
|
+
|
|
1057
|
+
// Create a reusable pipeline
|
|
1058
|
+
const pipeline = createPipeline([
|
|
1059
|
+
{
|
|
1060
|
+
augmentation: senseAug,
|
|
1061
|
+
method: 'processRawData',
|
|
1062
|
+
transformArgs: (data) => [data, 'text']
|
|
1063
|
+
},
|
|
1064
|
+
{
|
|
1065
|
+
augmentation: memoryAug,
|
|
1066
|
+
method: 'storeData',
|
|
1067
|
+
transformArgs: (data) => ['processed-data', data]
|
|
1068
|
+
}
|
|
1010
1069
|
])
|
|
1011
1070
|
|
|
1012
1071
|
// Use the pipeline
|
|
@@ -1014,11 +1073,11 @@ const result = await pipeline('New input data')
|
|
|
1014
1073
|
|
|
1015
1074
|
// Dynamically load augmentations at runtime
|
|
1016
1075
|
const loadedAugmentations = await loadAugmentationModule(
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1076
|
+
import('./my-augmentations.js'),
|
|
1077
|
+
{
|
|
1078
|
+
autoRegister: true,
|
|
1079
|
+
autoInitialize: true
|
|
1080
|
+
}
|
|
1022
1081
|
)
|
|
1023
1082
|
```
|
|
1024
1083
|
|
|
@@ -1037,56 +1096,56 @@ capabilities to their augmentations:
|
|
|
1037
1096
|
|
|
1038
1097
|
```typescript
|
|
1039
1098
|
import {
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1099
|
+
// Base WebSocket support interface
|
|
1100
|
+
IWebSocketSupport,
|
|
1101
|
+
|
|
1102
|
+
// Combined WebSocket augmentation types
|
|
1103
|
+
IWebSocketSenseAugmentation,
|
|
1104
|
+
IWebSocketConduitAugmentation,
|
|
1105
|
+
IWebSocketCognitionAugmentation,
|
|
1106
|
+
IWebSocketMemoryAugmentation,
|
|
1107
|
+
IWebSocketPerceptionAugmentation,
|
|
1108
|
+
IWebSocketDialogAugmentation,
|
|
1109
|
+
IWebSocketActivationAugmentation,
|
|
1110
|
+
|
|
1111
|
+
// Function to add WebSocket support to any augmentation
|
|
1112
|
+
addWebSocketSupport
|
|
1054
1113
|
} from '@soulcraft/brainy'
|
|
1055
1114
|
|
|
1056
1115
|
// Example: Creating a typed WebSocket-enabled sense augmentation
|
|
1057
1116
|
const mySenseAug = createSenseAugmentation({
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
}
|
|
1117
|
+
name: 'my-sense',
|
|
1118
|
+
processRawData: async (data, dataType) => {
|
|
1119
|
+
// Implementation
|
|
1120
|
+
return {
|
|
1121
|
+
success: true,
|
|
1122
|
+
data: { nouns: [], verbs: [] }
|
|
1065
1123
|
}
|
|
1124
|
+
}
|
|
1066
1125
|
}) as IWebSocketSenseAugmentation
|
|
1067
1126
|
|
|
1068
1127
|
// Add WebSocket support
|
|
1069
1128
|
addWebSocketSupport(mySenseAug, {
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
}
|
|
1077
|
-
},
|
|
1078
|
-
sendWebSocketMessage: async (connectionId, data) => {
|
|
1079
|
-
// Send message implementation
|
|
1080
|
-
},
|
|
1081
|
-
onWebSocketMessage: async (connectionId, callback) => {
|
|
1082
|
-
// Register callback implementation
|
|
1083
|
-
},
|
|
1084
|
-
offWebSocketMessage: async (connectionId, callback) => {
|
|
1085
|
-
// Remove callback implementation
|
|
1086
|
-
},
|
|
1087
|
-
closeWebSocket: async (connectionId, code, reason) => {
|
|
1088
|
-
// Close connection implementation
|
|
1129
|
+
connectWebSocket: async (url) => {
|
|
1130
|
+
// WebSocket implementation
|
|
1131
|
+
return {
|
|
1132
|
+
connectionId: 'ws-1',
|
|
1133
|
+
url,
|
|
1134
|
+
status: 'connected'
|
|
1089
1135
|
}
|
|
1136
|
+
},
|
|
1137
|
+
sendWebSocketMessage: async (connectionId, data) => {
|
|
1138
|
+
// Send message implementation
|
|
1139
|
+
},
|
|
1140
|
+
onWebSocketMessage: async (connectionId, callback) => {
|
|
1141
|
+
// Register callback implementation
|
|
1142
|
+
},
|
|
1143
|
+
offWebSocketMessage: async (connectionId, callback) => {
|
|
1144
|
+
// Remove callback implementation
|
|
1145
|
+
},
|
|
1146
|
+
closeWebSocket: async (connectionId, code, reason) => {
|
|
1147
|
+
// Close connection implementation
|
|
1148
|
+
}
|
|
1090
1149
|
})
|
|
1091
1150
|
|
|
1092
1151
|
// Now mySenseAug has both sense augmentation methods and WebSocket methods
|
|
@@ -1124,13 +1183,13 @@ everywhere.
|
|
|
1124
1183
|
Brainy automatically detects the environment it's running in:
|
|
1125
1184
|
|
|
1126
1185
|
```typescript
|
|
1127
|
-
import {environment} from '@soulcraft/brainy'
|
|
1186
|
+
import { environment } from '@soulcraft/brainy'
|
|
1128
1187
|
|
|
1129
1188
|
// Check which environment we're running in
|
|
1130
1189
|
console.log(`Running in ${
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1190
|
+
environment.isBrowser ? 'browser' :
|
|
1191
|
+
environment.isNode ? 'Node.js' :
|
|
1192
|
+
'serverless/unknown'
|
|
1134
1193
|
} environment`)
|
|
1135
1194
|
```
|
|
1136
1195
|
|
|
@@ -1203,9 +1262,9 @@ You can use the conduit augmentations to sync Brainy instances:
|
|
|
1203
1262
|
|
|
1204
1263
|
```typescript
|
|
1205
1264
|
import {
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1265
|
+
BrainyData,
|
|
1266
|
+
pipeline,
|
|
1267
|
+
createConduitAugmentation
|
|
1209
1268
|
} from '@soulcraft/brainy'
|
|
1210
1269
|
|
|
1211
1270
|
// Create and initialize the database
|
|
@@ -1221,36 +1280,36 @@ pipeline.register(wsConduit)
|
|
|
1221
1280
|
// Connect to another Brainy instance (server or browser)
|
|
1222
1281
|
// Replace the example URL below with your actual WebSocket server URL
|
|
1223
1282
|
const connectionResult = await pipeline.executeConduitPipeline(
|
|
1224
|
-
|
|
1225
|
-
|
|
1283
|
+
'establishConnection',
|
|
1284
|
+
['wss://example-websocket-server.com/brainy-sync', { protocols: 'brainy-sync' }]
|
|
1226
1285
|
)
|
|
1227
1286
|
|
|
1228
1287
|
if (connectionResult[0] && (await connectionResult[0]).success) {
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
}
|
|
1288
|
+
const connection = (await connectionResult[0]).data
|
|
1289
|
+
|
|
1290
|
+
// Read data from the remote instance
|
|
1291
|
+
const readResult = await pipeline.executeConduitPipeline(
|
|
1292
|
+
'readData',
|
|
1293
|
+
[{ connectionId: connection.connectionId, query: { type: 'getAllNouns' } }]
|
|
1294
|
+
)
|
|
1295
|
+
|
|
1296
|
+
// Process and add the received data to the local instance
|
|
1297
|
+
if (readResult[0] && (await readResult[0]).success) {
|
|
1298
|
+
const remoteNouns = (await readResult[0]).data
|
|
1299
|
+
for (const noun of remoteNouns) {
|
|
1300
|
+
await db.add(noun.vector, noun.metadata)
|
|
1243
1301
|
}
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
}
|
|
1302
|
+
}
|
|
1303
|
+
|
|
1304
|
+
// Set up real-time sync by monitoring the stream
|
|
1305
|
+
await wsConduit.monitorStream(connection.connectionId, async (data) => {
|
|
1306
|
+
// Handle incoming data (e.g., new nouns, verbs, updates)
|
|
1307
|
+
if (data.type === 'newNoun') {
|
|
1308
|
+
await db.add(data.vector, data.metadata)
|
|
1309
|
+
} else if (data.type === 'newVerb') {
|
|
1310
|
+
await db.addVerb(data.sourceId, data.targetId, data.vector, data.options)
|
|
1311
|
+
}
|
|
1312
|
+
})
|
|
1254
1313
|
}
|
|
1255
1314
|
```
|
|
1256
1315
|
|
|
@@ -1258,9 +1317,9 @@ if (connectionResult[0] && (await connectionResult[0]).success) {
|
|
|
1258
1317
|
|
|
1259
1318
|
```typescript
|
|
1260
1319
|
import {
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1320
|
+
BrainyData,
|
|
1321
|
+
pipeline,
|
|
1322
|
+
createConduitAugmentation
|
|
1264
1323
|
} from '@soulcraft/brainy'
|
|
1265
1324
|
|
|
1266
1325
|
// Create and initialize the database
|
|
@@ -1276,48 +1335,48 @@ pipeline.register(webrtcConduit)
|
|
|
1276
1335
|
// Connect to a peer using a signaling server
|
|
1277
1336
|
// Replace the example values below with your actual configuration
|
|
1278
1337
|
const connectionResult = await pipeline.executeConduitPipeline(
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1338
|
+
'establishConnection',
|
|
1339
|
+
[
|
|
1340
|
+
'peer-id-to-connect-to', // Replace with actual peer ID
|
|
1341
|
+
{
|
|
1342
|
+
signalServerUrl: 'wss://example-signal-server.com', // Replace with your signal server
|
|
1343
|
+
localPeerId: 'my-local-peer-id', // Replace with your local peer ID
|
|
1344
|
+
iceServers: [{ urls: 'stun:stun.l.google.com:19302' }] // Public STUN server
|
|
1345
|
+
}
|
|
1346
|
+
]
|
|
1288
1347
|
)
|
|
1289
1348
|
|
|
1290
1349
|
if (connectionResult[0] && (await connectionResult[0]).success) {
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1350
|
+
const connection = (await connectionResult[0]).data
|
|
1351
|
+
|
|
1352
|
+
// Set up real-time sync by monitoring the stream
|
|
1353
|
+
await webrtcConduit.monitorStream(connection.connectionId, async (data) => {
|
|
1354
|
+
// Handle incoming data (e.g., new nouns, verbs, updates)
|
|
1355
|
+
if (data.type === 'newNoun') {
|
|
1356
|
+
await db.add(data.vector, data.metadata)
|
|
1357
|
+
} else if (data.type === 'newVerb') {
|
|
1358
|
+
await db.addVerb(data.sourceId, data.targetId, data.vector, data.options)
|
|
1359
|
+
}
|
|
1360
|
+
})
|
|
1361
|
+
|
|
1362
|
+
// When adding new data locally, also send to the peer
|
|
1363
|
+
const nounId = await db.add("New data to sync", { noun: "Thing" })
|
|
1364
|
+
|
|
1365
|
+
// Send the new noun to the peer
|
|
1366
|
+
await pipeline.executeConduitPipeline(
|
|
1367
|
+
'writeData',
|
|
1368
|
+
[
|
|
1369
|
+
{
|
|
1370
|
+
connectionId: connection.connectionId,
|
|
1371
|
+
data: {
|
|
1372
|
+
type: 'newNoun',
|
|
1373
|
+
id: nounId,
|
|
1374
|
+
vector: (await db.get(nounId)).vector,
|
|
1375
|
+
metadata: (await db.get(nounId)).metadata
|
|
1300
1376
|
}
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
const nounId = await db.add("New data to sync", {noun: "Thing"})
|
|
1305
|
-
|
|
1306
|
-
// Send the new noun to the peer
|
|
1307
|
-
await pipeline.executeConduitPipeline(
|
|
1308
|
-
'writeData',
|
|
1309
|
-
[
|
|
1310
|
-
{
|
|
1311
|
-
connectionId: connection.connectionId,
|
|
1312
|
-
data: {
|
|
1313
|
-
type: 'newNoun',
|
|
1314
|
-
id: nounId,
|
|
1315
|
-
vector: (await db.get(nounId)).vector,
|
|
1316
|
-
metadata: (await db.get(nounId)).metadata
|
|
1317
|
-
}
|
|
1318
|
-
}
|
|
1319
|
-
]
|
|
1320
|
-
)
|
|
1377
|
+
}
|
|
1378
|
+
]
|
|
1379
|
+
)
|
|
1321
1380
|
}
|
|
1322
1381
|
```
|
|
1323
1382
|
|
|
@@ -1327,39 +1386,39 @@ Brainy supports searching a server-hosted instance from a browser, storing resul
|
|
|
1327
1386
|
searches against the local instance:
|
|
1328
1387
|
|
|
1329
1388
|
```typescript
|
|
1330
|
-
import {BrainyData} from '@soulcraft/brainy'
|
|
1389
|
+
import { BrainyData } from '@soulcraft/brainy'
|
|
1331
1390
|
|
|
1332
1391
|
// Create and initialize the database with remote server configuration
|
|
1333
1392
|
// Replace the example URL below with your actual Brainy server URL
|
|
1334
1393
|
const db = new BrainyData({
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1394
|
+
remoteServer: {
|
|
1395
|
+
url: 'wss://example-brainy-server.com/ws', // Replace with your server URL
|
|
1396
|
+
protocols: 'brainy-sync',
|
|
1397
|
+
autoConnect: true // Connect automatically during initialization
|
|
1398
|
+
}
|
|
1340
1399
|
})
|
|
1341
1400
|
await db.init()
|
|
1342
1401
|
|
|
1343
1402
|
// Or connect manually after initialization
|
|
1344
1403
|
if (!db.isConnectedToRemoteServer()) {
|
|
1345
|
-
|
|
1346
|
-
|
|
1404
|
+
// Replace the example URL below with your actual Brainy server URL
|
|
1405
|
+
await db.connectToRemoteServer('wss://example-brainy-server.com/ws', 'brainy-sync')
|
|
1347
1406
|
}
|
|
1348
1407
|
|
|
1349
1408
|
// Search the remote server (results are stored locally)
|
|
1350
|
-
const remoteResults = await db.searchText('machine learning', 5, {searchMode: 'remote'})
|
|
1409
|
+
const remoteResults = await db.searchText('machine learning', 5, { searchMode: 'remote' })
|
|
1351
1410
|
|
|
1352
1411
|
// Search the local database (includes previously stored results)
|
|
1353
|
-
const localResults = await db.searchText('machine learning', 5, {searchMode: 'local'})
|
|
1412
|
+
const localResults = await db.searchText('machine learning', 5, { searchMode: 'local' })
|
|
1354
1413
|
|
|
1355
1414
|
// Perform a combined search (local first, then remote if needed)
|
|
1356
|
-
const combinedResults = await db.searchText('neural networks', 5, {searchMode: 'combined'})
|
|
1415
|
+
const combinedResults = await db.searchText('neural networks', 5, { searchMode: 'combined' })
|
|
1357
1416
|
|
|
1358
1417
|
// Add data to both local and remote instances
|
|
1359
1418
|
const id = await db.addToBoth('Deep learning is a subset of machine learning', {
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1419
|
+
noun: 'Concept',
|
|
1420
|
+
category: 'AI',
|
|
1421
|
+
tags: ['deep learning', 'neural networks']
|
|
1363
1422
|
})
|
|
1364
1423
|
|
|
1365
1424
|
// Clean up when done (this also cleans up worker pools)
|
|
@@ -1377,7 +1436,9 @@ terabyte-scale data that can't fit entirely in memory, we provide several approa
|
|
|
1377
1436
|
- **Distributed HNSW**: Sharding and partitioning across multiple machines
|
|
1378
1437
|
- **Hybrid Solutions**: Combining quantization techniques with multi-tier architectures
|
|
1379
1438
|
|
|
1380
|
-
For detailed information on how to scale Brainy for large datasets, vector dimension standardization, threading
|
|
1439
|
+
For detailed information on how to scale Brainy for large datasets, vector dimension standardization, threading
|
|
1440
|
+
implementation, storage testing, and other technical topics, see our
|
|
1441
|
+
comprehensive [Technical Guides](TECHNICAL_GUIDES.md).
|
|
1381
1442
|
|
|
1382
1443
|
## Recent Changes and Performance Improvements
|
|
1383
1444
|
|
|
@@ -1385,24 +1446,29 @@ For detailed information on how to scale Brainy for large datasets, vector dimen
|
|
|
1385
1446
|
|
|
1386
1447
|
Brainy has been significantly improved to handle larger datasets more efficiently:
|
|
1387
1448
|
|
|
1388
|
-
- **Pagination Support**: All data retrieval methods now support pagination to avoid loading entire datasets into memory
|
|
1449
|
+
- **Pagination Support**: All data retrieval methods now support pagination to avoid loading entire datasets into memory
|
|
1450
|
+
at once. The deprecated `getAllNouns()` and `getAllVerbs()` methods have been replaced with `getNouns()` and
|
|
1451
|
+
`getVerbs()` methods that support pagination, filtering, and cursor-based navigation.
|
|
1389
1452
|
|
|
1390
1453
|
- **Multi-level Caching**: A sophisticated three-level caching strategy has been implemented:
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1454
|
+
- **Level 1**: Hot cache (most accessed nodes) - RAM (automatically detecting and adjusting in each environment)
|
|
1455
|
+
- **Level 2**: Warm cache (recent nodes) - OPFS, Filesystem or S3 depending on environment
|
|
1456
|
+
- **Level 3**: Cold storage (all nodes) - OPFS, Filesystem or S3 depending on environment
|
|
1394
1457
|
|
|
1395
1458
|
- **Adaptive Memory Usage**: The system automatically detects available memory and adjusts cache sizes accordingly:
|
|
1396
|
-
|
|
1397
|
-
|
|
1459
|
+
- In Node.js: Uses 10% of free memory (minimum 1000 entries)
|
|
1460
|
+
- In browsers: Scales based on device memory (500 entries per GB, minimum 1000)
|
|
1398
1461
|
|
|
1399
|
-
- **Intelligent Cache Eviction**: Implements a Least Recently Used (LRU) policy that evicts the oldest 20% of items when
|
|
1462
|
+
- **Intelligent Cache Eviction**: Implements a Least Recently Used (LRU) policy that evicts the oldest 20% of items when
|
|
1463
|
+
the cache reaches the configured threshold.
|
|
1400
1464
|
|
|
1401
|
-
- **Prefetching Strategy**: Implements batch prefetching to improve performance while avoiding overwhelming system
|
|
1465
|
+
- **Prefetching Strategy**: Implements batch prefetching to improve performance while avoiding overwhelming system
|
|
1466
|
+
resources.
|
|
1402
1467
|
|
|
1403
1468
|
### S3-Compatible Storage Improvements
|
|
1404
1469
|
|
|
1405
|
-
- **Enhanced Cloud Storage**: Improved support for S3-compatible storage services including AWS S3, Cloudflare R2, and
|
|
1470
|
+
- **Enhanced Cloud Storage**: Improved support for S3-compatible storage services including AWS S3, Cloudflare R2, and
|
|
1471
|
+
others.
|
|
1406
1472
|
|
|
1407
1473
|
- **Optimized Data Access**: Batch operations and error handling for efficient cloud storage access.
|
|
1408
1474
|
|
|
@@ -1412,9 +1478,11 @@ Brainy has been significantly improved to handle larger datasets more efficientl
|
|
|
1412
1478
|
|
|
1413
1479
|
Yes, you can use existing data indexed from an old version. Brainy includes robust data migration capabilities:
|
|
1414
1480
|
|
|
1415
|
-
- **Vector Regeneration**: If vectors are missing in imported data, they will be automatically created using the
|
|
1481
|
+
- **Vector Regeneration**: If vectors are missing in imported data, they will be automatically created using the
|
|
1482
|
+
embedding function.
|
|
1416
1483
|
|
|
1417
|
-
- **HNSW Index Reconstruction**: The system can reconstruct the HNSW index from backup data, ensuring compatibility with
|
|
1484
|
+
- **HNSW Index Reconstruction**: The system can reconstruct the HNSW index from backup data, ensuring compatibility with
|
|
1485
|
+
previous versions.
|
|
1418
1486
|
|
|
1419
1487
|
- **Sparse Data Import**: Support for importing sparse data (without vectors) through the `importSparseData()` method.
|
|
1420
1488
|
|
|
@@ -1422,66 +1490,138 @@ Yes, you can use existing data indexed from an old version. Brainy includes robu
|
|
|
1422
1490
|
|
|
1423
1491
|
#### Default Mode
|
|
1424
1492
|
|
|
1425
|
-
- **Memory**:
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
- **CPU**:
|
|
1430
|
-
|
|
1431
|
-
|
|
1493
|
+
- **Memory**:
|
|
1494
|
+
- Minimum: 512MB RAM
|
|
1495
|
+
- Recommended: 2GB+ RAM for medium datasets, 8GB+ for large datasets
|
|
1496
|
+
|
|
1497
|
+
- **CPU**:
|
|
1498
|
+
- Minimum: 2 cores
|
|
1499
|
+
- Recommended: 4+ cores for better performance with parallel operations
|
|
1432
1500
|
|
|
1433
1501
|
- **Storage**:
|
|
1434
|
-
|
|
1435
|
-
|
|
1502
|
+
- Minimum: 1GB available storage
|
|
1503
|
+
- Recommended: Storage space at least 3x the size of your dataset
|
|
1436
1504
|
|
|
1437
1505
|
#### Read-Only Mode
|
|
1438
1506
|
|
|
1439
1507
|
Read-only mode prevents all write operations (add, update, delete) and is optimized for search operations.
|
|
1440
1508
|
|
|
1441
|
-
- **Memory**:
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
- **CPU**:
|
|
1446
|
-
|
|
1447
|
-
|
|
1509
|
+
- **Memory**:
|
|
1510
|
+
- Minimum: 256MB RAM
|
|
1511
|
+
- Recommended: 1GB+ RAM
|
|
1512
|
+
|
|
1513
|
+
- **CPU**:
|
|
1514
|
+
- Minimum: 1 core
|
|
1515
|
+
- Recommended: 2+ cores
|
|
1448
1516
|
|
|
1449
1517
|
- **Storage**:
|
|
1450
|
-
|
|
1451
|
-
|
|
1518
|
+
- Minimum: Storage space equal to the size of your dataset
|
|
1519
|
+
- Recommended: 2x the size of your dataset for caching
|
|
1452
1520
|
|
|
1453
1521
|
- **New Feature**: Lazy loading support in read-only mode for improved performance with large datasets.
|
|
1454
1522
|
|
|
1455
1523
|
#### Write-Only Mode
|
|
1456
1524
|
|
|
1457
|
-
Write-only mode prevents all search operations and is optimized for initial data loading or when you want to optimize
|
|
1525
|
+
Write-only mode prevents all search operations and is optimized for initial data loading or when you want to optimize
|
|
1526
|
+
for write performance.
|
|
1458
1527
|
|
|
1459
|
-
- **Memory**:
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
- **CPU**:
|
|
1464
|
-
|
|
1465
|
-
|
|
1528
|
+
- **Memory**:
|
|
1529
|
+
- Minimum: 512MB RAM
|
|
1530
|
+
- Recommended: 2GB+ RAM
|
|
1531
|
+
|
|
1532
|
+
- **CPU**:
|
|
1533
|
+
- Minimum: 2 cores
|
|
1534
|
+
- Recommended: 4+ cores for faster data ingestion
|
|
1466
1535
|
|
|
1467
1536
|
- **Storage**:
|
|
1468
|
-
|
|
1469
|
-
|
|
1537
|
+
- Minimum: Storage space at least 2x the size of your dataset
|
|
1538
|
+
- Recommended: 4x the size of your dataset for optimal performance
|
|
1470
1539
|
|
|
1471
1540
|
### Performance Tuning Parameters
|
|
1472
1541
|
|
|
1473
|
-
Brainy offers
|
|
1542
|
+
Brainy offers comprehensive configuration options for performance tuning, with enhanced support for large datasets in S3
|
|
1543
|
+
or other remote storage. **All configuration is optional** - the system automatically detects the optimal settings based
|
|
1544
|
+
on your environment, dataset size, and usage patterns.
|
|
1545
|
+
|
|
1546
|
+
#### Intelligent Defaults
|
|
1547
|
+
|
|
1548
|
+
Brainy uses intelligent defaults that automatically adapt to your environment:
|
|
1549
|
+
|
|
1550
|
+
- **Environment Detection**: Automatically detects whether you're running in Node.js, browser, or worker environment
|
|
1551
|
+
- **Memory-Aware Caching**: Adjusts cache sizes based on available system memory
|
|
1552
|
+
- **Dataset Size Adaptation**: Tunes parameters based on the size of your dataset
|
|
1553
|
+
- **Usage Pattern Optimization**: Adjusts to read-heavy vs. write-heavy workloads
|
|
1554
|
+
- **Storage Type Awareness**: Optimizes for local vs. remote storage (S3, R2, etc.)
|
|
1555
|
+
- **Operating Mode Specialization**: Special optimizations for read-only and write-only modes
|
|
1556
|
+
|
|
1557
|
+
#### Cache Configuration (Optional)
|
|
1558
|
+
|
|
1559
|
+
You can override any of these automatically tuned parameters if needed:
|
|
1474
1560
|
|
|
1475
1561
|
- **Hot Cache Size**: Control the maximum number of items to keep in memory.
|
|
1562
|
+
- For large datasets (>100K items), consider values between 5,000-50,000 depending on available memory.
|
|
1563
|
+
- In read-only mode, larger values (10,000-100,000) can be used for better performance.
|
|
1564
|
+
|
|
1476
1565
|
- **Eviction Threshold**: Set the threshold at which cache eviction begins (default: 0.8 or 80% of max size).
|
|
1477
|
-
-
|
|
1478
|
-
-
|
|
1566
|
+
- For write-heavy workloads, lower values (0.6-0.7) may improve performance.
|
|
1567
|
+
- For read-heavy workloads, higher values (0.8-0.9) are recommended.
|
|
1568
|
+
|
|
1569
|
+
- **Warm Cache TTL**: Set the time-to-live for items in the warm cache (default: 3600000 ms or 1 hour).
|
|
1570
|
+
- For frequently changing data, shorter TTLs are recommended.
|
|
1571
|
+
- For relatively static data, longer TTLs improve performance.
|
|
1572
|
+
|
|
1573
|
+
- **Batch Size**: Control the number of items to process in a single batch for operations like prefetching.
|
|
1574
|
+
- For S3 or remote storage with large datasets, larger values (50-200) significantly improve throughput.
|
|
1575
|
+
- In read-only mode with remote storage, even larger values (100-300) can be used.
|
|
1576
|
+
|
|
1577
|
+
#### Auto-Tuning (Enabled by Default)
|
|
1578
|
+
|
|
1579
|
+
- **Auto-Tune**: Enable or disable automatic tuning of cache parameters based on usage patterns (default: true).
|
|
1580
|
+
- **Auto-Tune Interval**: Set how frequently the system adjusts cache parameters (default: 60000 ms or 1 minute).
|
|
1581
|
+
|
|
1582
|
+
#### Read-Only Mode Optimizations (Automatic)
|
|
1583
|
+
|
|
1584
|
+
Read-only mode includes special optimizations for search performance that are automatically applied:
|
|
1479
1585
|
|
|
1480
|
-
|
|
1586
|
+
- **Larger Cache Sizes**: Automatically uses more memory for caching (up to 40% of free memory for large datasets).
|
|
1587
|
+
- **Aggressive Prefetching**: Loads more data in each batch to reduce the number of storage requests.
|
|
1588
|
+
- **Prefetch Strategy**: Defaults to 'aggressive' prefetching strategy in read-only mode.
|
|
1589
|
+
|
|
1590
|
+
#### Example Configuration for Large S3 Datasets
|
|
1591
|
+
|
|
1592
|
+
```javascript
|
|
1593
|
+
const brainy = new BrainyData({
|
|
1594
|
+
readOnly: true,
|
|
1595
|
+
lazyLoadInReadOnlyMode: true,
|
|
1596
|
+
storage: {
|
|
1597
|
+
type: 's3',
|
|
1598
|
+
s3Storage: {
|
|
1599
|
+
bucketName: 'your-bucket',
|
|
1600
|
+
accessKeyId: 'your-access-key',
|
|
1601
|
+
secretAccessKey: 'your-secret-key',
|
|
1602
|
+
region: 'your-region'
|
|
1603
|
+
}
|
|
1604
|
+
},
|
|
1605
|
+
cache: {
|
|
1606
|
+
hotCacheMaxSize: 20000,
|
|
1607
|
+
hotCacheEvictionThreshold: 0.85,
|
|
1608
|
+
batchSize: 100,
|
|
1609
|
+
readOnlyMode: {
|
|
1610
|
+
hotCacheMaxSize: 50000,
|
|
1611
|
+
batchSize: 200,
|
|
1612
|
+
prefetchStrategy: 'aggressive'
|
|
1613
|
+
}
|
|
1614
|
+
}
|
|
1615
|
+
});
|
|
1616
|
+
```
|
|
1617
|
+
|
|
1618
|
+
These configuration options make Brainy more efficient, scalable, and adaptable to different environments and usage
|
|
1619
|
+
patterns, especially for large datasets in cloud storage.
|
|
1481
1620
|
|
|
1482
1621
|
## Testing
|
|
1483
1622
|
|
|
1484
|
-
Brainy uses Vitest for testing. For detailed information about testing in Brainy, including test configuration, scripts,
|
|
1623
|
+
Brainy uses Vitest for testing. For detailed information about testing in Brainy, including test configuration, scripts,
|
|
1624
|
+
reporting tools, and best practices, see our [Testing Guide](docs/technical/TESTING.md).
|
|
1485
1625
|
|
|
1486
1626
|
Here are some common test commands:
|
|
1487
1627
|
|
|
@@ -1505,45 +1645,18 @@ see [DEVELOPERS.md](DEVELOPERS.md).
|
|
|
1505
1645
|
|
|
1506
1646
|
We have a [Code of Conduct](CODE_OF_CONDUCT.md) that all contributors are expected to follow.
|
|
1507
1647
|
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
Brainy uses a streamlined release workflow that automates version updates, changelog generation, GitHub releases, and NPM deployment.
|
|
1511
|
-
|
|
1512
|
-
### Automated Release Process
|
|
1513
|
-
|
|
1514
|
-
The release workflow combines several steps into a single command:
|
|
1515
|
-
|
|
1516
|
-
1. **Build the project** - Ensures the code compiles correctly
|
|
1517
|
-
2. **Run tests** - Verifies that all tests pass
|
|
1518
|
-
3. **Update version** - Bumps the version number (patch, minor, or major)
|
|
1519
|
-
4. **Generate changelog** - Automatically updates CHANGELOG.md with commit messages since the last release
|
|
1520
|
-
5. **Create GitHub release** - Creates a GitHub release with auto-generated notes
|
|
1521
|
-
6. **Publish to NPM** - Deploys the package to NPM
|
|
1522
|
-
|
|
1523
|
-
### Release Commands
|
|
1524
|
-
|
|
1525
|
-
Use one of the following commands to release a new version:
|
|
1526
|
-
|
|
1527
|
-
```bash
|
|
1528
|
-
# Release with patch version update (0.0.x)
|
|
1529
|
-
npm run workflow:patch
|
|
1530
|
-
|
|
1531
|
-
# Release with minor version update (0.x.0)
|
|
1532
|
-
npm run workflow:minor
|
|
1533
|
-
|
|
1534
|
-
# Release with major version update (x.0.0)
|
|
1535
|
-
npm run workflow:major
|
|
1648
|
+
### Commit Message Format
|
|
1536
1649
|
|
|
1537
|
-
|
|
1538
|
-
|
|
1650
|
+
For best results with automatic changelog generation, follow
|
|
1651
|
+
the [Conventional Commits](https://www.conventionalcommits.org/) specification for your commit messages:
|
|
1539
1652
|
|
|
1540
|
-
# Dry run (build, test, and simulate version update without making changes)
|
|
1541
|
-
npm run workflow:dry-run
|
|
1542
1653
|
```
|
|
1654
|
+
AI Template for automated commit messages:
|
|
1543
1655
|
|
|
1544
|
-
|
|
1545
|
-
|
|
1546
|
-
|
|
1656
|
+
Use Conventional Commit format
|
|
1657
|
+
Specify the changes in a structured format
|
|
1658
|
+
Add information about the purpose of the commit
|
|
1659
|
+
```
|
|
1547
1660
|
|
|
1548
1661
|
```
|
|
1549
1662
|
<type>(<scope>): <description>
|
|
@@ -1554,6 +1667,7 @@ For best results with automatic changelog generation, follow the [Conventional C
|
|
|
1554
1667
|
```
|
|
1555
1668
|
|
|
1556
1669
|
Where `<type>` is one of:
|
|
1670
|
+
|
|
1557
1671
|
- `feat`: A new feature (maps to **Added** section)
|
|
1558
1672
|
- `fix`: A bug fix (maps to **Fixed** section)
|
|
1559
1673
|
- `chore`: Regular maintenance tasks (maps to **Changed** section)
|
|
@@ -1567,10 +1681,10 @@ If you need more control over the release process, you can use the individual co
|
|
|
1567
1681
|
|
|
1568
1682
|
```bash
|
|
1569
1683
|
# Update version and generate changelog
|
|
1570
|
-
npm run
|
|
1684
|
+
npm run _release:patch # or _release:minor, _release:major
|
|
1571
1685
|
|
|
1572
1686
|
# Create GitHub release
|
|
1573
|
-
npm run
|
|
1687
|
+
npm run _github-release
|
|
1574
1688
|
|
|
1575
1689
|
# Publish to NPM
|
|
1576
1690
|
npm publish
|