@soulcraft/brainy 0.32.0 → 0.33.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +495 -400
- package/dist/brainyData.d.ts +115 -0
- package/dist/coreTypes.d.ts +26 -0
- package/dist/storage/adapters/baseStorageAdapter.d.ts +16 -0
- package/dist/storage/adapters/baseStorageAdapter.d.ts.map +1 -1
- package/dist/storage/cacheManager.d.ts +27 -13
- package/dist/storage/cacheManager.d.ts.map +1 -1
- package/dist/storage/storageFactory.d.ts +44 -0
- package/dist/storage/storageFactory.d.ts.map +1 -1
- package/dist/unified.js +823 -56
- package/dist/unified.min.js +747 -747
- package/dist/utils/fieldNameTracking.d.ts +21 -0
- package/dist/utils/fieldNameTracking.d.ts.map +1 -0
- package/dist/utils/index.d.ts +2 -0
- package/dist/utils/index.d.ts.map +1 -1
- package/dist/utils/jsonProcessing.d.ts +43 -0
- package/dist/utils/jsonProcessing.d.ts.map +1 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -27,6 +27,8 @@ it gets - learning from your data to provide increasingly relevant results and c
|
|
|
27
27
|
|
|
28
28
|
- **Run Everywhere** - Works in browsers, Node.js, serverless functions, and containers
|
|
29
29
|
- **Vector Search** - Find semantically similar content using embeddings
|
|
30
|
+
- **Advanced JSON Document Search** - Search within specific fields of JSON documents with field prioritization and
|
|
31
|
+
service-based field standardization
|
|
30
32
|
- **Graph Relationships** - Connect data with meaningful relationships
|
|
31
33
|
- **Streaming Pipeline** - Process data in real-time as it flows through the system
|
|
32
34
|
- **Extensible Augmentations** - Customize and extend functionality with pluggable components
|
|
@@ -89,7 +91,7 @@ REST API web service wrapper that provides HTTP endpoints for search operations
|
|
|
89
91
|
Brainy uses a unified build that automatically adapts to your environment (Node.js, browser, or serverless):
|
|
90
92
|
|
|
91
93
|
```typescript
|
|
92
|
-
import {BrainyData, NounType, VerbType} from '@soulcraft/brainy'
|
|
94
|
+
import { BrainyData, NounType, VerbType } from '@soulcraft/brainy'
|
|
93
95
|
|
|
94
96
|
// Create and initialize the database
|
|
95
97
|
const db = new BrainyData()
|
|
@@ -97,13 +99,13 @@ await db.init()
|
|
|
97
99
|
|
|
98
100
|
// Add data (automatically converted to vectors)
|
|
99
101
|
const catId = await db.add("Cats are independent pets", {
|
|
100
|
-
|
|
101
|
-
|
|
102
|
+
noun: NounType.Thing,
|
|
103
|
+
category: 'animal'
|
|
102
104
|
})
|
|
103
105
|
|
|
104
106
|
const dogId = await db.add("Dogs are loyal companions", {
|
|
105
|
-
|
|
106
|
-
|
|
107
|
+
noun: NounType.Thing,
|
|
108
|
+
category: 'animal'
|
|
107
109
|
})
|
|
108
110
|
|
|
109
111
|
// Search for similar items
|
|
@@ -112,8 +114,8 @@ console.log(results)
|
|
|
112
114
|
|
|
113
115
|
// Add a relationship between items
|
|
114
116
|
await db.addVerb(catId, dogId, {
|
|
115
|
-
|
|
116
|
-
|
|
117
|
+
verb: VerbType.RelatedTo,
|
|
118
|
+
description: 'Both are common household pets'
|
|
117
119
|
})
|
|
118
120
|
```
|
|
119
121
|
|
|
@@ -121,10 +123,10 @@ await db.addVerb(catId, dogId, {
|
|
|
121
123
|
|
|
122
124
|
```typescript
|
|
123
125
|
// Standard import - automatically adapts to any environment
|
|
124
|
-
import {BrainyData} from '@soulcraft/brainy'
|
|
126
|
+
import { BrainyData } from '@soulcraft/brainy'
|
|
125
127
|
|
|
126
128
|
// Minified version for production
|
|
127
|
-
import {BrainyData} from '@soulcraft/brainy/min'
|
|
129
|
+
import { BrainyData } from '@soulcraft/brainy/min'
|
|
128
130
|
```
|
|
129
131
|
|
|
130
132
|
> **Note**: The CLI functionality is available as a separate package `@soulcraft/brainy-cli` to reduce the bundle size
|
|
@@ -136,15 +138,15 @@ import {BrainyData} from '@soulcraft/brainy/min'
|
|
|
136
138
|
```html
|
|
137
139
|
|
|
138
140
|
<script type="module">
|
|
139
|
-
|
|
140
|
-
|
|
141
|
+
// Use local files instead of CDN
|
|
142
|
+
import { BrainyData } from './dist/unified.js'
|
|
141
143
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
+
// Or minified version
|
|
145
|
+
// import { BrainyData } from './dist/unified.min.js'
|
|
144
146
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
147
|
+
const db = new BrainyData()
|
|
148
|
+
await db.init()
|
|
149
|
+
// ...
|
|
148
150
|
</script>
|
|
149
151
|
```
|
|
150
152
|
|
|
@@ -299,13 +301,13 @@ The pipeline runs automatically when you:
|
|
|
299
301
|
|
|
300
302
|
```typescript
|
|
301
303
|
// Add data (runs embedding → indexing → storage)
|
|
302
|
-
const id = await db.add("Your text data here", {metadata})
|
|
304
|
+
const id = await db.add("Your text data here", { metadata })
|
|
303
305
|
|
|
304
306
|
// Search (runs embedding → similarity search)
|
|
305
307
|
const results = await db.searchText("Your query here", 5)
|
|
306
308
|
|
|
307
309
|
// Connect entities (runs graph construction → storage)
|
|
308
|
-
await db.addVerb(sourceId, targetId, {verb: VerbType.RelatedTo})
|
|
310
|
+
await db.addVerb(sourceId, targetId, { verb: VerbType.RelatedTo })
|
|
309
311
|
```
|
|
310
312
|
|
|
311
313
|
Using the CLI:
|
|
@@ -404,13 +406,13 @@ Connections between nouns (edges in the graph):
|
|
|
404
406
|
Brainy provides utility functions to access lists of noun and verb types:
|
|
405
407
|
|
|
406
408
|
```typescript
|
|
407
|
-
import {
|
|
408
|
-
NounType,
|
|
409
|
-
VerbType,
|
|
410
|
-
getNounTypes,
|
|
411
|
-
getVerbTypes,
|
|
412
|
-
getNounTypeMap,
|
|
413
|
-
getVerbTypeMap
|
|
409
|
+
import {
|
|
410
|
+
NounType,
|
|
411
|
+
VerbType,
|
|
412
|
+
getNounTypes,
|
|
413
|
+
getVerbTypes,
|
|
414
|
+
getNounTypeMap,
|
|
415
|
+
getVerbTypeMap
|
|
414
416
|
} from '@soulcraft/brainy'
|
|
415
417
|
|
|
416
418
|
// At development time:
|
|
@@ -433,6 +435,7 @@ const verbTypeMap = getVerbTypeMap() // { RelatedTo: 'relatedTo', Contains: 'con
|
|
|
433
435
|
```
|
|
434
436
|
|
|
435
437
|
These utility functions make it easy to:
|
|
438
|
+
|
|
436
439
|
- Get a complete list of available noun and verb types
|
|
437
440
|
- Validate user input against valid types
|
|
438
441
|
- Create dynamic UI components that display or select from available types
|
|
@@ -528,15 +531,17 @@ const status = await db.status()
|
|
|
528
531
|
const backupData = await db.backup()
|
|
529
532
|
|
|
530
533
|
// Restore data into the database
|
|
531
|
-
const restoreResult = await db.restore(backupData, {clearExisting: true})
|
|
534
|
+
const restoreResult = await db.restore(backupData, { clearExisting: true })
|
|
532
535
|
```
|
|
533
536
|
|
|
534
537
|
### Database Statistics
|
|
535
538
|
|
|
536
|
-
Brainy provides a way to get statistics about the current state of the database. For detailed information about the
|
|
539
|
+
Brainy provides a way to get statistics about the current state of the database. For detailed information about the
|
|
540
|
+
statistics system, including implementation details, scalability improvements, and usage examples, see
|
|
541
|
+
our [Statistics Guide](STATISTICS.md).
|
|
537
542
|
|
|
538
543
|
```typescript
|
|
539
|
-
import {BrainyData, getStatistics} from '@soulcraft/brainy'
|
|
544
|
+
import { BrainyData, getStatistics } from '@soulcraft/brainy'
|
|
540
545
|
|
|
541
546
|
// Create and initialize the database
|
|
542
547
|
const db = new BrainyData()
|
|
@@ -553,25 +558,25 @@ console.log(stats)
|
|
|
553
558
|
```typescript
|
|
554
559
|
// Add a noun (automatically vectorized)
|
|
555
560
|
const id = await db.add(textOrVector, {
|
|
556
|
-
|
|
557
|
-
|
|
561
|
+
noun: NounType.Thing,
|
|
562
|
+
// other metadata...
|
|
558
563
|
})
|
|
559
564
|
|
|
560
565
|
// Add multiple nouns in parallel (with multithreading and batch embedding)
|
|
561
566
|
const ids = await db.addBatch([
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
567
|
+
{
|
|
568
|
+
vectorOrData: "First item to add",
|
|
569
|
+
metadata: { noun: NounType.Thing, category: 'example' }
|
|
570
|
+
},
|
|
571
|
+
{
|
|
572
|
+
vectorOrData: "Second item to add",
|
|
573
|
+
metadata: { noun: NounType.Thing, category: 'example' }
|
|
574
|
+
},
|
|
575
|
+
// More items...
|
|
571
576
|
], {
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
577
|
+
forceEmbed: false,
|
|
578
|
+
concurrency: 4, // Control the level of parallelism (default: 4)
|
|
579
|
+
batchSize: 50 // Control the number of items to process in a single batch (default: 50)
|
|
575
580
|
})
|
|
576
581
|
|
|
577
582
|
// Retrieve a noun
|
|
@@ -579,8 +584,8 @@ const noun = await db.get(id)
|
|
|
579
584
|
|
|
580
585
|
// Update noun metadata
|
|
581
586
|
await db.updateMetadata(id, {
|
|
582
|
-
|
|
583
|
-
|
|
587
|
+
noun: NounType.Thing,
|
|
588
|
+
// updated metadata...
|
|
584
589
|
})
|
|
585
590
|
|
|
586
591
|
// Delete a noun
|
|
@@ -592,6 +597,39 @@ const textResults = await db.searchText("query text", numResults)
|
|
|
592
597
|
|
|
593
598
|
// Search by noun type
|
|
594
599
|
const thingNouns = await db.searchByNounTypes([NounType.Thing], numResults)
|
|
600
|
+
|
|
601
|
+
// Search within specific fields of JSON documents
|
|
602
|
+
const fieldResults = await db.search("Acme Corporation", 10, {
|
|
603
|
+
searchField: "company"
|
|
604
|
+
})
|
|
605
|
+
|
|
606
|
+
// Search using standard field names across different services
|
|
607
|
+
const titleResults = await db.searchByStandardField("title", "climate change", 10)
|
|
608
|
+
const authorResults = await db.searchByStandardField("author", "johndoe", 10, {
|
|
609
|
+
services: ["github", "reddit"]
|
|
610
|
+
})
|
|
611
|
+
```
|
|
612
|
+
|
|
613
|
+
### Field Standardization and Service Tracking
|
|
614
|
+
|
|
615
|
+
Brainy automatically tracks field names from JSON documents and associates them with the service that inserted the data.
|
|
616
|
+
This enables powerful cross-service search capabilities:
|
|
617
|
+
|
|
618
|
+
```typescript
|
|
619
|
+
// Get all available field names organized by service
|
|
620
|
+
const fieldNames = await db.getAvailableFieldNames()
|
|
621
|
+
// Example output: { "github": ["repository.name", "issue.title"], "reddit": ["title", "selftext"] }
|
|
622
|
+
|
|
623
|
+
// Get standard field mappings
|
|
624
|
+
const standardMappings = await db.getStandardFieldMappings()
|
|
625
|
+
// Example output: { "title": { "github": ["repository.name"], "reddit": ["title"] } }
|
|
626
|
+
```
|
|
627
|
+
|
|
628
|
+
When adding data, specify the service name to ensure proper field tracking:
|
|
629
|
+
|
|
630
|
+
```typescript
|
|
631
|
+
// Add data with service name
|
|
632
|
+
await db.add(jsonData, metadata, { service: "github" })
|
|
595
633
|
```
|
|
596
634
|
|
|
597
635
|
### Working with Verbs (Relationships)
|
|
@@ -599,21 +637,21 @@ const thingNouns = await db.searchByNounTypes([NounType.Thing], numResults)
|
|
|
599
637
|
```typescript
|
|
600
638
|
// Add a relationship between nouns
|
|
601
639
|
await db.addVerb(sourceId, targetId, {
|
|
602
|
-
|
|
603
|
-
|
|
640
|
+
verb: VerbType.RelatedTo,
|
|
641
|
+
// other metadata...
|
|
604
642
|
})
|
|
605
643
|
|
|
606
644
|
// Add a relationship with auto-creation of missing nouns
|
|
607
645
|
// This is useful when the target noun might not exist yet
|
|
608
646
|
await db.addVerb(sourceId, targetId, {
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
647
|
+
verb: VerbType.RelatedTo,
|
|
648
|
+
// Enable auto-creation of missing nouns
|
|
649
|
+
autoCreateMissingNouns: true,
|
|
650
|
+
// Optional metadata for auto-created nouns
|
|
651
|
+
missingNounMetadata: {
|
|
652
|
+
noun: NounType.Concept,
|
|
653
|
+
description: 'Auto-created noun'
|
|
654
|
+
}
|
|
617
655
|
})
|
|
618
656
|
|
|
619
657
|
// Get all relationships
|
|
@@ -665,27 +703,29 @@ db.setReadOnly(false)
|
|
|
665
703
|
db.setWriteOnly(false)
|
|
666
704
|
```
|
|
667
705
|
|
|
668
|
-
- **Read-Only Mode**: When enabled, prevents all write operations (add, update, delete). Useful for deployment scenarios
|
|
669
|
-
|
|
706
|
+
- **Read-Only Mode**: When enabled, prevents all write operations (add, update, delete). Useful for deployment scenarios
|
|
707
|
+
where you want to prevent modifications to the database.
|
|
708
|
+
- **Write-Only Mode**: When enabled, prevents all search operations. Useful for initial data loading or when you want to
|
|
709
|
+
optimize for write performance.
|
|
670
710
|
|
|
671
711
|
### Embedding
|
|
672
712
|
|
|
673
713
|
```typescript
|
|
674
714
|
import {
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
715
|
+
BrainyData,
|
|
716
|
+
createTensorFlowEmbeddingFunction,
|
|
717
|
+
createThreadedEmbeddingFunction
|
|
678
718
|
} from '@soulcraft/brainy'
|
|
679
719
|
|
|
680
720
|
// Use the standard TensorFlow Universal Sentence Encoder embedding function
|
|
681
721
|
const db = new BrainyData({
|
|
682
|
-
|
|
722
|
+
embeddingFunction: createTensorFlowEmbeddingFunction()
|
|
683
723
|
})
|
|
684
724
|
await db.init()
|
|
685
725
|
|
|
686
726
|
// Or use the threaded embedding function for better performance
|
|
687
727
|
const threadedDb = new BrainyData({
|
|
688
|
-
|
|
728
|
+
embeddingFunction: createThreadedEmbeddingFunction()
|
|
689
729
|
})
|
|
690
730
|
await threadedDb.init()
|
|
691
731
|
|
|
@@ -726,42 +766,42 @@ Brainy includes comprehensive multithreading support to improve performance acro
|
|
|
726
766
|
7. **Automatic Environment Detection**: Adapts to browser (Web Workers) and Node.js (Worker Threads) environments
|
|
727
767
|
|
|
728
768
|
```typescript
|
|
729
|
-
import {BrainyData, euclideanDistance} from '@soulcraft/brainy'
|
|
769
|
+
import { BrainyData, euclideanDistance } from '@soulcraft/brainy'
|
|
730
770
|
|
|
731
771
|
// Configure with custom options
|
|
732
772
|
const db = new BrainyData({
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
773
|
+
// Use Euclidean distance instead of default cosine distance
|
|
774
|
+
distanceFunction: euclideanDistance,
|
|
775
|
+
|
|
776
|
+
// HNSW index configuration for search performance
|
|
777
|
+
hnsw: {
|
|
778
|
+
M: 16, // Max connections per noun
|
|
779
|
+
efConstruction: 200, // Construction candidate list size
|
|
780
|
+
efSearch: 50, // Search candidate list size
|
|
781
|
+
},
|
|
782
|
+
|
|
783
|
+
// Performance optimization options
|
|
784
|
+
performance: {
|
|
785
|
+
useParallelization: true, // Enable multithreaded search operations
|
|
786
|
+
},
|
|
787
|
+
|
|
788
|
+
// Noun and Verb type validation
|
|
789
|
+
typeValidation: {
|
|
790
|
+
enforceNounTypes: true, // Validate noun types against NounType enum
|
|
791
|
+
enforceVerbTypes: true, // Validate verb types against VerbType enum
|
|
792
|
+
},
|
|
793
|
+
|
|
794
|
+
// Storage configuration
|
|
795
|
+
storage: {
|
|
796
|
+
requestPersistentStorage: true,
|
|
797
|
+
// Example configuration for cloud storage (replace with your own values):
|
|
798
|
+
// s3Storage: {
|
|
799
|
+
// bucketName: 'your-s3-bucket-name',
|
|
800
|
+
// region: 'your-aws-region'
|
|
801
|
+
// // Credentials should be provided via environment variables
|
|
802
|
+
// // AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
|
|
803
|
+
// }
|
|
804
|
+
}
|
|
765
805
|
})
|
|
766
806
|
```
|
|
767
807
|
|
|
@@ -775,34 +815,34 @@ hybrid approach:
|
|
|
775
815
|
3. **Memory-Efficient Indexing** - Optimizes memory usage for large-scale vector collections
|
|
776
816
|
|
|
777
817
|
```typescript
|
|
778
|
-
import {BrainyData} from '@soulcraft/brainy'
|
|
818
|
+
import { BrainyData } from '@soulcraft/brainy'
|
|
779
819
|
|
|
780
820
|
// Configure with optimized HNSW index for large datasets
|
|
781
821
|
const db = new BrainyData({
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
},
|
|
797
|
-
|
|
798
|
-
// Whether to use disk-based storage for the index
|
|
799
|
-
useDiskBasedIndex: true // Enable disk-based storage
|
|
822
|
+
hnswOptimized: {
|
|
823
|
+
// Standard HNSW parameters
|
|
824
|
+
M: 16, // Max connections per noun
|
|
825
|
+
efConstruction: 200, // Construction candidate list size
|
|
826
|
+
efSearch: 50, // Search candidate list size
|
|
827
|
+
|
|
828
|
+
// Memory threshold in bytes - when exceeded, will use disk-based approach
|
|
829
|
+
memoryThreshold: 1024 * 1024 * 1024, // 1GB default threshold
|
|
830
|
+
|
|
831
|
+
// Product quantization settings for dimensionality reduction
|
|
832
|
+
productQuantization: {
|
|
833
|
+
enabled: true, // Enable product quantization
|
|
834
|
+
numSubvectors: 16, // Number of subvectors to split the vector into
|
|
835
|
+
numCentroids: 256 // Number of centroids per subvector
|
|
800
836
|
},
|
|
801
837
|
|
|
802
|
-
//
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
838
|
+
// Whether to use disk-based storage for the index
|
|
839
|
+
useDiskBasedIndex: true // Enable disk-based storage
|
|
840
|
+
},
|
|
841
|
+
|
|
842
|
+
// Storage configuration (required for disk-based index)
|
|
843
|
+
storage: {
|
|
844
|
+
requestPersistentStorage: true
|
|
845
|
+
}
|
|
806
846
|
})
|
|
807
847
|
|
|
808
848
|
// The optimized index automatically adapts based on dataset size:
|
|
@@ -867,24 +907,24 @@ Brainy's restore functionality can handle:
|
|
|
867
907
|
```typescript
|
|
868
908
|
// Restore data with all options
|
|
869
909
|
const restoreResult = await db.restore(backupData, {
|
|
870
|
-
|
|
910
|
+
clearExisting: true // Whether to clear existing data before restore
|
|
871
911
|
})
|
|
872
912
|
|
|
873
913
|
// Import sparse data (without vectors)
|
|
874
914
|
// Vectors will be automatically created using the embedding function
|
|
875
915
|
const sparseData = {
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
916
|
+
nouns: [
|
|
917
|
+
{
|
|
918
|
+
id: '123',
|
|
919
|
+
// No vector field - will be created during import
|
|
920
|
+
metadata: {
|
|
921
|
+
noun: 'Thing',
|
|
922
|
+
text: 'This text will be used to generate a vector'
|
|
923
|
+
}
|
|
924
|
+
}
|
|
925
|
+
],
|
|
926
|
+
verbs: [],
|
|
927
|
+
version: '1.0.0'
|
|
888
928
|
}
|
|
889
929
|
|
|
890
930
|
const sparseImportResult = await db.importSparseData(sparseData)
|
|
@@ -931,82 +971,82 @@ boilerplate:
|
|
|
931
971
|
|
|
932
972
|
```typescript
|
|
933
973
|
import {
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
974
|
+
createMemoryAugmentation,
|
|
975
|
+
createConduitAugmentation,
|
|
976
|
+
createSenseAugmentation,
|
|
977
|
+
addWebSocketSupport,
|
|
978
|
+
executeStreamlined,
|
|
979
|
+
processStaticData,
|
|
980
|
+
processStreamingData,
|
|
981
|
+
createPipeline
|
|
942
982
|
} from '@soulcraft/brainy'
|
|
943
983
|
|
|
944
984
|
// Create a memory augmentation with minimal code
|
|
945
985
|
const memoryAug = createMemoryAugmentation({
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
986
|
+
name: 'simple-memory',
|
|
987
|
+
description: 'A simple in-memory storage augmentation',
|
|
988
|
+
autoRegister: true,
|
|
989
|
+
autoInitialize: true,
|
|
990
|
+
|
|
991
|
+
// Implement only the methods you need
|
|
992
|
+
storeData: async (key, data) => {
|
|
993
|
+
// Your implementation here
|
|
994
|
+
return {
|
|
995
|
+
success: true,
|
|
996
|
+
data: true
|
|
997
|
+
}
|
|
998
|
+
},
|
|
959
999
|
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
}
|
|
1000
|
+
retrieveData: async (key) => {
|
|
1001
|
+
// Your implementation here
|
|
1002
|
+
return {
|
|
1003
|
+
success: true,
|
|
1004
|
+
data: { example: 'data', key }
|
|
966
1005
|
}
|
|
1006
|
+
}
|
|
967
1007
|
})
|
|
968
1008
|
|
|
969
1009
|
// Add WebSocket support to any augmentation
|
|
970
1010
|
const wsAugmentation = addWebSocketSupport(memoryAug, {
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
}
|
|
1011
|
+
connectWebSocket: async (url) => {
|
|
1012
|
+
// Your implementation here
|
|
1013
|
+
return {
|
|
1014
|
+
connectionId: 'ws-1',
|
|
1015
|
+
url,
|
|
1016
|
+
status: 'connected'
|
|
978
1017
|
}
|
|
1018
|
+
}
|
|
979
1019
|
})
|
|
980
1020
|
|
|
981
1021
|
// Process static data through a pipeline
|
|
982
1022
|
const result = await processStaticData(
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
{
|
|
986
|
-
augmentation: senseAug,
|
|
987
|
-
method: 'processRawData',
|
|
988
|
-
transformArgs: (data) => [data, 'text']
|
|
989
|
-
},
|
|
990
|
-
{
|
|
991
|
-
augmentation: memoryAug,
|
|
992
|
-
method: 'storeData',
|
|
993
|
-
transformArgs: (data) => ['processed-data', data]
|
|
994
|
-
}
|
|
995
|
-
]
|
|
996
|
-
)
|
|
997
|
-
|
|
998
|
-
// Create a reusable pipeline
|
|
999
|
-
const pipeline = createPipeline([
|
|
1023
|
+
'Input data',
|
|
1024
|
+
[
|
|
1000
1025
|
{
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1026
|
+
augmentation: senseAug,
|
|
1027
|
+
method: 'processRawData',
|
|
1028
|
+
transformArgs: (data) => [data, 'text']
|
|
1004
1029
|
},
|
|
1005
1030
|
{
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1031
|
+
augmentation: memoryAug,
|
|
1032
|
+
method: 'storeData',
|
|
1033
|
+
transformArgs: (data) => ['processed-data', data]
|
|
1009
1034
|
}
|
|
1035
|
+
]
|
|
1036
|
+
)
|
|
1037
|
+
|
|
1038
|
+
// Create a reusable pipeline
|
|
1039
|
+
const pipeline = createPipeline([
|
|
1040
|
+
{
|
|
1041
|
+
augmentation: senseAug,
|
|
1042
|
+
method: 'processRawData',
|
|
1043
|
+
transformArgs: (data) => [data, 'text']
|
|
1044
|
+
},
|
|
1045
|
+
{
|
|
1046
|
+
augmentation: memoryAug,
|
|
1047
|
+
method: 'storeData',
|
|
1048
|
+
transformArgs: (data) => ['processed-data', data]
|
|
1049
|
+
}
|
|
1010
1050
|
])
|
|
1011
1051
|
|
|
1012
1052
|
// Use the pipeline
|
|
@@ -1014,11 +1054,11 @@ const result = await pipeline('New input data')
|
|
|
1014
1054
|
|
|
1015
1055
|
// Dynamically load augmentations at runtime
|
|
1016
1056
|
const loadedAugmentations = await loadAugmentationModule(
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1057
|
+
import('./my-augmentations.js'),
|
|
1058
|
+
{
|
|
1059
|
+
autoRegister: true,
|
|
1060
|
+
autoInitialize: true
|
|
1061
|
+
}
|
|
1022
1062
|
)
|
|
1023
1063
|
```
|
|
1024
1064
|
|
|
@@ -1037,56 +1077,56 @@ capabilities to their augmentations:
|
|
|
1037
1077
|
|
|
1038
1078
|
```typescript
|
|
1039
1079
|
import {
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1080
|
+
// Base WebSocket support interface
|
|
1081
|
+
IWebSocketSupport,
|
|
1082
|
+
|
|
1083
|
+
// Combined WebSocket augmentation types
|
|
1084
|
+
IWebSocketSenseAugmentation,
|
|
1085
|
+
IWebSocketConduitAugmentation,
|
|
1086
|
+
IWebSocketCognitionAugmentation,
|
|
1087
|
+
IWebSocketMemoryAugmentation,
|
|
1088
|
+
IWebSocketPerceptionAugmentation,
|
|
1089
|
+
IWebSocketDialogAugmentation,
|
|
1090
|
+
IWebSocketActivationAugmentation,
|
|
1091
|
+
|
|
1092
|
+
// Function to add WebSocket support to any augmentation
|
|
1093
|
+
addWebSocketSupport
|
|
1054
1094
|
} from '@soulcraft/brainy'
|
|
1055
1095
|
|
|
1056
1096
|
// Example: Creating a typed WebSocket-enabled sense augmentation
|
|
1057
1097
|
const mySenseAug = createSenseAugmentation({
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
}
|
|
1098
|
+
name: 'my-sense',
|
|
1099
|
+
processRawData: async (data, dataType) => {
|
|
1100
|
+
// Implementation
|
|
1101
|
+
return {
|
|
1102
|
+
success: true,
|
|
1103
|
+
data: { nouns: [], verbs: [] }
|
|
1065
1104
|
}
|
|
1105
|
+
}
|
|
1066
1106
|
}) as IWebSocketSenseAugmentation
|
|
1067
1107
|
|
|
1068
1108
|
// Add WebSocket support
|
|
1069
1109
|
addWebSocketSupport(mySenseAug, {
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
}
|
|
1077
|
-
},
|
|
1078
|
-
sendWebSocketMessage: async (connectionId, data) => {
|
|
1079
|
-
// Send message implementation
|
|
1080
|
-
},
|
|
1081
|
-
onWebSocketMessage: async (connectionId, callback) => {
|
|
1082
|
-
// Register callback implementation
|
|
1083
|
-
},
|
|
1084
|
-
offWebSocketMessage: async (connectionId, callback) => {
|
|
1085
|
-
// Remove callback implementation
|
|
1086
|
-
},
|
|
1087
|
-
closeWebSocket: async (connectionId, code, reason) => {
|
|
1088
|
-
// Close connection implementation
|
|
1110
|
+
connectWebSocket: async (url) => {
|
|
1111
|
+
// WebSocket implementation
|
|
1112
|
+
return {
|
|
1113
|
+
connectionId: 'ws-1',
|
|
1114
|
+
url,
|
|
1115
|
+
status: 'connected'
|
|
1089
1116
|
}
|
|
1117
|
+
},
|
|
1118
|
+
sendWebSocketMessage: async (connectionId, data) => {
|
|
1119
|
+
// Send message implementation
|
|
1120
|
+
},
|
|
1121
|
+
onWebSocketMessage: async (connectionId, callback) => {
|
|
1122
|
+
// Register callback implementation
|
|
1123
|
+
},
|
|
1124
|
+
offWebSocketMessage: async (connectionId, callback) => {
|
|
1125
|
+
// Remove callback implementation
|
|
1126
|
+
},
|
|
1127
|
+
closeWebSocket: async (connectionId, code, reason) => {
|
|
1128
|
+
// Close connection implementation
|
|
1129
|
+
}
|
|
1090
1130
|
})
|
|
1091
1131
|
|
|
1092
1132
|
// Now mySenseAug has both sense augmentation methods and WebSocket methods
|
|
@@ -1124,13 +1164,13 @@ everywhere.
|
|
|
1124
1164
|
Brainy automatically detects the environment it's running in:
|
|
1125
1165
|
|
|
1126
1166
|
```typescript
|
|
1127
|
-
import {environment} from '@soulcraft/brainy'
|
|
1167
|
+
import { environment } from '@soulcraft/brainy'
|
|
1128
1168
|
|
|
1129
1169
|
// Check which environment we're running in
|
|
1130
1170
|
console.log(`Running in ${
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1171
|
+
environment.isBrowser ? 'browser' :
|
|
1172
|
+
environment.isNode ? 'Node.js' :
|
|
1173
|
+
'serverless/unknown'
|
|
1134
1174
|
} environment`)
|
|
1135
1175
|
```
|
|
1136
1176
|
|
|
@@ -1203,9 +1243,9 @@ You can use the conduit augmentations to sync Brainy instances:
|
|
|
1203
1243
|
|
|
1204
1244
|
```typescript
|
|
1205
1245
|
import {
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1246
|
+
BrainyData,
|
|
1247
|
+
pipeline,
|
|
1248
|
+
createConduitAugmentation
|
|
1209
1249
|
} from '@soulcraft/brainy'
|
|
1210
1250
|
|
|
1211
1251
|
// Create and initialize the database
|
|
@@ -1221,36 +1261,36 @@ pipeline.register(wsConduit)
|
|
|
1221
1261
|
// Connect to another Brainy instance (server or browser)
|
|
1222
1262
|
// Replace the example URL below with your actual WebSocket server URL
|
|
1223
1263
|
const connectionResult = await pipeline.executeConduitPipeline(
|
|
1224
|
-
|
|
1225
|
-
|
|
1264
|
+
'establishConnection',
|
|
1265
|
+
['wss://example-websocket-server.com/brainy-sync', { protocols: 'brainy-sync' }]
|
|
1226
1266
|
)
|
|
1227
1267
|
|
|
1228
1268
|
if (connectionResult[0] && (await connectionResult[0]).success) {
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
}
|
|
1269
|
+
const connection = (await connectionResult[0]).data
|
|
1270
|
+
|
|
1271
|
+
// Read data from the remote instance
|
|
1272
|
+
const readResult = await pipeline.executeConduitPipeline(
|
|
1273
|
+
'readData',
|
|
1274
|
+
[{ connectionId: connection.connectionId, query: { type: 'getAllNouns' } }]
|
|
1275
|
+
)
|
|
1276
|
+
|
|
1277
|
+
// Process and add the received data to the local instance
|
|
1278
|
+
if (readResult[0] && (await readResult[0]).success) {
|
|
1279
|
+
const remoteNouns = (await readResult[0]).data
|
|
1280
|
+
for (const noun of remoteNouns) {
|
|
1281
|
+
await db.add(noun.vector, noun.metadata)
|
|
1243
1282
|
}
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
}
|
|
1283
|
+
}
|
|
1284
|
+
|
|
1285
|
+
// Set up real-time sync by monitoring the stream
|
|
1286
|
+
await wsConduit.monitorStream(connection.connectionId, async (data) => {
|
|
1287
|
+
// Handle incoming data (e.g., new nouns, verbs, updates)
|
|
1288
|
+
if (data.type === 'newNoun') {
|
|
1289
|
+
await db.add(data.vector, data.metadata)
|
|
1290
|
+
} else if (data.type === 'newVerb') {
|
|
1291
|
+
await db.addVerb(data.sourceId, data.targetId, data.vector, data.options)
|
|
1292
|
+
}
|
|
1293
|
+
})
|
|
1254
1294
|
}
|
|
1255
1295
|
```
|
|
1256
1296
|
|
|
@@ -1258,9 +1298,9 @@ if (connectionResult[0] && (await connectionResult[0]).success) {
|
|
|
1258
1298
|
|
|
1259
1299
|
```typescript
|
|
1260
1300
|
import {
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1301
|
+
BrainyData,
|
|
1302
|
+
pipeline,
|
|
1303
|
+
createConduitAugmentation
|
|
1264
1304
|
} from '@soulcraft/brainy'
|
|
1265
1305
|
|
|
1266
1306
|
// Create and initialize the database
|
|
@@ -1276,48 +1316,48 @@ pipeline.register(webrtcConduit)
|
|
|
1276
1316
|
// Connect to a peer using a signaling server
|
|
1277
1317
|
// Replace the example values below with your actual configuration
|
|
1278
1318
|
const connectionResult = await pipeline.executeConduitPipeline(
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1319
|
+
'establishConnection',
|
|
1320
|
+
[
|
|
1321
|
+
'peer-id-to-connect-to', // Replace with actual peer ID
|
|
1322
|
+
{
|
|
1323
|
+
signalServerUrl: 'wss://example-signal-server.com', // Replace with your signal server
|
|
1324
|
+
localPeerId: 'my-local-peer-id', // Replace with your local peer ID
|
|
1325
|
+
iceServers: [{ urls: 'stun:stun.l.google.com:19302' }] // Public STUN server
|
|
1326
|
+
}
|
|
1327
|
+
]
|
|
1288
1328
|
)
|
|
1289
1329
|
|
|
1290
1330
|
if (connectionResult[0] && (await connectionResult[0]).success) {
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1331
|
+
const connection = (await connectionResult[0]).data
|
|
1332
|
+
|
|
1333
|
+
// Set up real-time sync by monitoring the stream
|
|
1334
|
+
await webrtcConduit.monitorStream(connection.connectionId, async (data) => {
|
|
1335
|
+
// Handle incoming data (e.g., new nouns, verbs, updates)
|
|
1336
|
+
if (data.type === 'newNoun') {
|
|
1337
|
+
await db.add(data.vector, data.metadata)
|
|
1338
|
+
} else if (data.type === 'newVerb') {
|
|
1339
|
+
await db.addVerb(data.sourceId, data.targetId, data.vector, data.options)
|
|
1340
|
+
}
|
|
1341
|
+
})
|
|
1342
|
+
|
|
1343
|
+
// When adding new data locally, also send to the peer
|
|
1344
|
+
const nounId = await db.add("New data to sync", { noun: "Thing" })
|
|
1345
|
+
|
|
1346
|
+
// Send the new noun to the peer
|
|
1347
|
+
await pipeline.executeConduitPipeline(
|
|
1348
|
+
'writeData',
|
|
1349
|
+
[
|
|
1350
|
+
{
|
|
1351
|
+
connectionId: connection.connectionId,
|
|
1352
|
+
data: {
|
|
1353
|
+
type: 'newNoun',
|
|
1354
|
+
id: nounId,
|
|
1355
|
+
vector: (await db.get(nounId)).vector,
|
|
1356
|
+
metadata: (await db.get(nounId)).metadata
|
|
1300
1357
|
}
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
const nounId = await db.add("New data to sync", {noun: "Thing"})
|
|
1305
|
-
|
|
1306
|
-
// Send the new noun to the peer
|
|
1307
|
-
await pipeline.executeConduitPipeline(
|
|
1308
|
-
'writeData',
|
|
1309
|
-
[
|
|
1310
|
-
{
|
|
1311
|
-
connectionId: connection.connectionId,
|
|
1312
|
-
data: {
|
|
1313
|
-
type: 'newNoun',
|
|
1314
|
-
id: nounId,
|
|
1315
|
-
vector: (await db.get(nounId)).vector,
|
|
1316
|
-
metadata: (await db.get(nounId)).metadata
|
|
1317
|
-
}
|
|
1318
|
-
}
|
|
1319
|
-
]
|
|
1320
|
-
)
|
|
1358
|
+
}
|
|
1359
|
+
]
|
|
1360
|
+
)
|
|
1321
1361
|
}
|
|
1322
1362
|
```
|
|
1323
1363
|
|
|
@@ -1327,39 +1367,39 @@ Brainy supports searching a server-hosted instance from a browser, storing resul
|
|
|
1327
1367
|
searches against the local instance:
|
|
1328
1368
|
|
|
1329
1369
|
```typescript
|
|
1330
|
-
import {BrainyData} from '@soulcraft/brainy'
|
|
1370
|
+
import { BrainyData } from '@soulcraft/brainy'
|
|
1331
1371
|
|
|
1332
1372
|
// Create and initialize the database with remote server configuration
|
|
1333
1373
|
// Replace the example URL below with your actual Brainy server URL
|
|
1334
1374
|
const db = new BrainyData({
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1375
|
+
remoteServer: {
|
|
1376
|
+
url: 'wss://example-brainy-server.com/ws', // Replace with your server URL
|
|
1377
|
+
protocols: 'brainy-sync',
|
|
1378
|
+
autoConnect: true // Connect automatically during initialization
|
|
1379
|
+
}
|
|
1340
1380
|
})
|
|
1341
1381
|
await db.init()
|
|
1342
1382
|
|
|
1343
1383
|
// Or connect manually after initialization
|
|
1344
1384
|
if (!db.isConnectedToRemoteServer()) {
|
|
1345
|
-
|
|
1346
|
-
|
|
1385
|
+
// Replace the example URL below with your actual Brainy server URL
|
|
1386
|
+
await db.connectToRemoteServer('wss://example-brainy-server.com/ws', 'brainy-sync')
|
|
1347
1387
|
}
|
|
1348
1388
|
|
|
1349
1389
|
// Search the remote server (results are stored locally)
|
|
1350
|
-
const remoteResults = await db.searchText('machine learning', 5, {searchMode: 'remote'})
|
|
1390
|
+
const remoteResults = await db.searchText('machine learning', 5, { searchMode: 'remote' })
|
|
1351
1391
|
|
|
1352
1392
|
// Search the local database (includes previously stored results)
|
|
1353
|
-
const localResults = await db.searchText('machine learning', 5, {searchMode: 'local'})
|
|
1393
|
+
const localResults = await db.searchText('machine learning', 5, { searchMode: 'local' })
|
|
1354
1394
|
|
|
1355
1395
|
// Perform a combined search (local first, then remote if needed)
|
|
1356
|
-
const combinedResults = await db.searchText('neural networks', 5, {searchMode: 'combined'})
|
|
1396
|
+
const combinedResults = await db.searchText('neural networks', 5, { searchMode: 'combined' })
|
|
1357
1397
|
|
|
1358
1398
|
// Add data to both local and remote instances
|
|
1359
1399
|
const id = await db.addToBoth('Deep learning is a subset of machine learning', {
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1400
|
+
noun: 'Concept',
|
|
1401
|
+
category: 'AI',
|
|
1402
|
+
tags: ['deep learning', 'neural networks']
|
|
1363
1403
|
})
|
|
1364
1404
|
|
|
1365
1405
|
// Clean up when done (this also cleans up worker pools)
|
|
@@ -1377,7 +1417,9 @@ terabyte-scale data that can't fit entirely in memory, we provide several approa
|
|
|
1377
1417
|
- **Distributed HNSW**: Sharding and partitioning across multiple machines
|
|
1378
1418
|
- **Hybrid Solutions**: Combining quantization techniques with multi-tier architectures
|
|
1379
1419
|
|
|
1380
|
-
For detailed information on how to scale Brainy for large datasets, vector dimension standardization, threading
|
|
1420
|
+
For detailed information on how to scale Brainy for large datasets, vector dimension standardization, threading
|
|
1421
|
+
implementation, storage testing, and other technical topics, see our
|
|
1422
|
+
comprehensive [Technical Guides](TECHNICAL_GUIDES.md).
|
|
1381
1423
|
|
|
1382
1424
|
## Recent Changes and Performance Improvements
|
|
1383
1425
|
|
|
@@ -1385,24 +1427,29 @@ For detailed information on how to scale Brainy for large datasets, vector dimen
|
|
|
1385
1427
|
|
|
1386
1428
|
Brainy has been significantly improved to handle larger datasets more efficiently:
|
|
1387
1429
|
|
|
1388
|
-
- **Pagination Support**: All data retrieval methods now support pagination to avoid loading entire datasets into memory
|
|
1430
|
+
- **Pagination Support**: All data retrieval methods now support pagination to avoid loading entire datasets into memory
|
|
1431
|
+
at once. The deprecated `getAllNouns()` and `getAllVerbs()` methods have been replaced with `getNouns()` and
|
|
1432
|
+
`getVerbs()` methods that support pagination, filtering, and cursor-based navigation.
|
|
1389
1433
|
|
|
1390
1434
|
- **Multi-level Caching**: A sophisticated three-level caching strategy has been implemented:
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1435
|
+
- **Level 1**: Hot cache (most accessed nodes) - RAM (automatically detecting and adjusting in each environment)
|
|
1436
|
+
- **Level 2**: Warm cache (recent nodes) - OPFS, Filesystem or S3 depending on environment
|
|
1437
|
+
- **Level 3**: Cold storage (all nodes) - OPFS, Filesystem or S3 depending on environment
|
|
1394
1438
|
|
|
1395
1439
|
- **Adaptive Memory Usage**: The system automatically detects available memory and adjusts cache sizes accordingly:
|
|
1396
|
-
|
|
1397
|
-
|
|
1440
|
+
- In Node.js: Uses 10% of free memory (minimum 1000 entries)
|
|
1441
|
+
- In browsers: Scales based on device memory (500 entries per GB, minimum 1000)
|
|
1398
1442
|
|
|
1399
|
-
- **Intelligent Cache Eviction**: Implements a Least Recently Used (LRU) policy that evicts the oldest 20% of items when
|
|
1443
|
+
- **Intelligent Cache Eviction**: Implements a Least Recently Used (LRU) policy that evicts the oldest 20% of items when
|
|
1444
|
+
the cache reaches the configured threshold.
|
|
1400
1445
|
|
|
1401
|
-
- **Prefetching Strategy**: Implements batch prefetching to improve performance while avoiding overwhelming system
|
|
1446
|
+
- **Prefetching Strategy**: Implements batch prefetching to improve performance while avoiding overwhelming system
|
|
1447
|
+
resources.
|
|
1402
1448
|
|
|
1403
1449
|
### S3-Compatible Storage Improvements
|
|
1404
1450
|
|
|
1405
|
-
- **Enhanced Cloud Storage**: Improved support for S3-compatible storage services including AWS S3, Cloudflare R2, and
|
|
1451
|
+
- **Enhanced Cloud Storage**: Improved support for S3-compatible storage services including AWS S3, Cloudflare R2, and
|
|
1452
|
+
others.
|
|
1406
1453
|
|
|
1407
1454
|
- **Optimized Data Access**: Batch operations and error handling for efficient cloud storage access.
|
|
1408
1455
|
|
|
@@ -1412,9 +1459,11 @@ Brainy has been significantly improved to handle larger datasets more efficientl
|
|
|
1412
1459
|
|
|
1413
1460
|
Yes, you can use existing data indexed from an old version. Brainy includes robust data migration capabilities:
|
|
1414
1461
|
|
|
1415
|
-
- **Vector Regeneration**: If vectors are missing in imported data, they will be automatically created using the
|
|
1462
|
+
- **Vector Regeneration**: If vectors are missing in imported data, they will be automatically created using the
|
|
1463
|
+
embedding function.
|
|
1416
1464
|
|
|
1417
|
-
- **HNSW Index Reconstruction**: The system can reconstruct the HNSW index from backup data, ensuring compatibility with
|
|
1465
|
+
- **HNSW Index Reconstruction**: The system can reconstruct the HNSW index from backup data, ensuring compatibility with
|
|
1466
|
+
previous versions.
|
|
1418
1467
|
|
|
1419
1468
|
- **Sparse Data Import**: Support for importing sparse data (without vectors) through the `importSparseData()` method.
|
|
1420
1469
|
|
|
@@ -1422,66 +1471,138 @@ Yes, you can use existing data indexed from an old version. Brainy includes robu
|
|
|
1422
1471
|
|
|
1423
1472
|
#### Default Mode
|
|
1424
1473
|
|
|
1425
|
-
- **Memory**:
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
- **CPU**:
|
|
1430
|
-
|
|
1431
|
-
|
|
1474
|
+
- **Memory**:
|
|
1475
|
+
- Minimum: 512MB RAM
|
|
1476
|
+
- Recommended: 2GB+ RAM for medium datasets, 8GB+ for large datasets
|
|
1477
|
+
|
|
1478
|
+
- **CPU**:
|
|
1479
|
+
- Minimum: 2 cores
|
|
1480
|
+
- Recommended: 4+ cores for better performance with parallel operations
|
|
1432
1481
|
|
|
1433
1482
|
- **Storage**:
|
|
1434
|
-
|
|
1435
|
-
|
|
1483
|
+
- Minimum: 1GB available storage
|
|
1484
|
+
- Recommended: Storage space at least 3x the size of your dataset
|
|
1436
1485
|
|
|
1437
1486
|
#### Read-Only Mode
|
|
1438
1487
|
|
|
1439
1488
|
Read-only mode prevents all write operations (add, update, delete) and is optimized for search operations.
|
|
1440
1489
|
|
|
1441
|
-
- **Memory**:
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
- **CPU**:
|
|
1446
|
-
|
|
1447
|
-
|
|
1490
|
+
- **Memory**:
|
|
1491
|
+
- Minimum: 256MB RAM
|
|
1492
|
+
- Recommended: 1GB+ RAM
|
|
1493
|
+
|
|
1494
|
+
- **CPU**:
|
|
1495
|
+
- Minimum: 1 core
|
|
1496
|
+
- Recommended: 2+ cores
|
|
1448
1497
|
|
|
1449
1498
|
- **Storage**:
|
|
1450
|
-
|
|
1451
|
-
|
|
1499
|
+
- Minimum: Storage space equal to the size of your dataset
|
|
1500
|
+
- Recommended: 2x the size of your dataset for caching
|
|
1452
1501
|
|
|
1453
1502
|
- **New Feature**: Lazy loading support in read-only mode for improved performance with large datasets.
|
|
1454
1503
|
|
|
1455
1504
|
#### Write-Only Mode
|
|
1456
1505
|
|
|
1457
|
-
Write-only mode prevents all search operations and is optimized for initial data loading or when you want to optimize
|
|
1506
|
+
Write-only mode prevents all search operations and is optimized for initial data loading or when you want to optimize
|
|
1507
|
+
for write performance.
|
|
1508
|
+
|
|
1509
|
+
- **Memory**:
|
|
1510
|
+
- Minimum: 512MB RAM
|
|
1511
|
+
- Recommended: 2GB+ RAM
|
|
1458
1512
|
|
|
1459
|
-
- **
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
- **CPU**:
|
|
1464
|
-
- Minimum: 2 cores
|
|
1465
|
-
- Recommended: 4+ cores for faster data ingestion
|
|
1513
|
+
- **CPU**:
|
|
1514
|
+
- Minimum: 2 cores
|
|
1515
|
+
- Recommended: 4+ cores for faster data ingestion
|
|
1466
1516
|
|
|
1467
1517
|
- **Storage**:
|
|
1468
|
-
|
|
1469
|
-
|
|
1518
|
+
- Minimum: Storage space at least 2x the size of your dataset
|
|
1519
|
+
- Recommended: 4x the size of your dataset for optimal performance
|
|
1470
1520
|
|
|
1471
1521
|
### Performance Tuning Parameters
|
|
1472
1522
|
|
|
1473
|
-
Brainy offers
|
|
1523
|
+
Brainy offers comprehensive configuration options for performance tuning, with enhanced support for large datasets in S3
|
|
1524
|
+
or other remote storage. **All configuration is optional** - the system automatically detects the optimal settings based
|
|
1525
|
+
on your environment, dataset size, and usage patterns.
|
|
1526
|
+
|
|
1527
|
+
#### Intelligent Defaults
|
|
1528
|
+
|
|
1529
|
+
Brainy uses intelligent defaults that automatically adapt to your environment:
|
|
1530
|
+
|
|
1531
|
+
- **Environment Detection**: Automatically detects whether you're running in Node.js, browser, or worker environment
|
|
1532
|
+
- **Memory-Aware Caching**: Adjusts cache sizes based on available system memory
|
|
1533
|
+
- **Dataset Size Adaptation**: Tunes parameters based on the size of your dataset
|
|
1534
|
+
- **Usage Pattern Optimization**: Adjusts to read-heavy vs. write-heavy workloads
|
|
1535
|
+
- **Storage Type Awareness**: Optimizes for local vs. remote storage (S3, R2, etc.)
|
|
1536
|
+
- **Operating Mode Specialization**: Special optimizations for read-only and write-only modes
|
|
1537
|
+
|
|
1538
|
+
#### Cache Configuration (Optional)
|
|
1539
|
+
|
|
1540
|
+
You can override any of these automatically tuned parameters if needed:
|
|
1474
1541
|
|
|
1475
1542
|
- **Hot Cache Size**: Control the maximum number of items to keep in memory.
|
|
1543
|
+
- For large datasets (>100K items), consider values between 5,000-50,000 depending on available memory.
|
|
1544
|
+
- In read-only mode, larger values (10,000-100,000) can be used for better performance.
|
|
1545
|
+
|
|
1476
1546
|
- **Eviction Threshold**: Set the threshold at which cache eviction begins (default: 0.8 or 80% of max size).
|
|
1477
|
-
-
|
|
1478
|
-
-
|
|
1547
|
+
- For write-heavy workloads, lower values (0.6-0.7) may improve performance.
|
|
1548
|
+
- For read-heavy workloads, higher values (0.8-0.9) are recommended.
|
|
1549
|
+
|
|
1550
|
+
- **Warm Cache TTL**: Set the time-to-live for items in the warm cache (default: 3600000 ms or 1 hour).
|
|
1551
|
+
- For frequently changing data, shorter TTLs are recommended.
|
|
1552
|
+
- For relatively static data, longer TTLs improve performance.
|
|
1553
|
+
|
|
1554
|
+
- **Batch Size**: Control the number of items to process in a single batch for operations like prefetching.
|
|
1555
|
+
- For S3 or remote storage with large datasets, larger values (50-200) significantly improve throughput.
|
|
1556
|
+
- In read-only mode with remote storage, even larger values (100-300) can be used.
|
|
1557
|
+
|
|
1558
|
+
#### Auto-Tuning (Enabled by Default)
|
|
1559
|
+
|
|
1560
|
+
- **Auto-Tune**: Enable or disable automatic tuning of cache parameters based on usage patterns (default: true).
|
|
1561
|
+
- **Auto-Tune Interval**: Set how frequently the system adjusts cache parameters (default: 60000 ms or 1 minute).
|
|
1562
|
+
|
|
1563
|
+
#### Read-Only Mode Optimizations (Automatic)
|
|
1564
|
+
|
|
1565
|
+
Read-only mode includes special optimizations for search performance that are automatically applied:
|
|
1479
1566
|
|
|
1480
|
-
|
|
1567
|
+
- **Larger Cache Sizes**: Automatically uses more memory for caching (up to 40% of free memory for large datasets).
|
|
1568
|
+
- **Aggressive Prefetching**: Loads more data in each batch to reduce the number of storage requests.
|
|
1569
|
+
- **Prefetch Strategy**: Defaults to 'aggressive' prefetching strategy in read-only mode.
|
|
1570
|
+
|
|
1571
|
+
#### Example Configuration for Large S3 Datasets
|
|
1572
|
+
|
|
1573
|
+
```javascript
|
|
1574
|
+
const brainy = new BrainyData({
|
|
1575
|
+
readOnly: true,
|
|
1576
|
+
lazyLoadInReadOnlyMode: true,
|
|
1577
|
+
storage: {
|
|
1578
|
+
type: 's3',
|
|
1579
|
+
s3Storage: {
|
|
1580
|
+
bucketName: 'your-bucket',
|
|
1581
|
+
accessKeyId: 'your-access-key',
|
|
1582
|
+
secretAccessKey: 'your-secret-key',
|
|
1583
|
+
region: 'your-region'
|
|
1584
|
+
}
|
|
1585
|
+
},
|
|
1586
|
+
cache: {
|
|
1587
|
+
hotCacheMaxSize: 20000,
|
|
1588
|
+
hotCacheEvictionThreshold: 0.85,
|
|
1589
|
+
batchSize: 100,
|
|
1590
|
+
readOnlyMode: {
|
|
1591
|
+
hotCacheMaxSize: 50000,
|
|
1592
|
+
batchSize: 200,
|
|
1593
|
+
prefetchStrategy: 'aggressive'
|
|
1594
|
+
}
|
|
1595
|
+
}
|
|
1596
|
+
});
|
|
1597
|
+
```
|
|
1598
|
+
|
|
1599
|
+
These configuration options make Brainy more efficient, scalable, and adaptable to different environments and usage
|
|
1600
|
+
patterns, especially for large datasets in cloud storage.
|
|
1481
1601
|
|
|
1482
1602
|
## Testing
|
|
1483
1603
|
|
|
1484
|
-
Brainy uses Vitest for testing. For detailed information about testing in Brainy, including test configuration, scripts,
|
|
1604
|
+
Brainy uses Vitest for testing. For detailed information about testing in Brainy, including test configuration, scripts,
|
|
1605
|
+
reporting tools, and best practices, see our [Testing Guide](TESTING.md).
|
|
1485
1606
|
|
|
1486
1607
|
Here are some common test commands:
|
|
1487
1608
|
|
|
@@ -1505,45 +1626,18 @@ see [DEVELOPERS.md](DEVELOPERS.md).
|
|
|
1505
1626
|
|
|
1506
1627
|
We have a [Code of Conduct](CODE_OF_CONDUCT.md) that all contributors are expected to follow.
|
|
1507
1628
|
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
Brainy uses a streamlined release workflow that automates version updates, changelog generation, GitHub releases, and NPM deployment.
|
|
1511
|
-
|
|
1512
|
-
### Automated Release Process
|
|
1513
|
-
|
|
1514
|
-
The release workflow combines several steps into a single command:
|
|
1515
|
-
|
|
1516
|
-
1. **Build the project** - Ensures the code compiles correctly
|
|
1517
|
-
2. **Run tests** - Verifies that all tests pass
|
|
1518
|
-
3. **Update version** - Bumps the version number (patch, minor, or major)
|
|
1519
|
-
4. **Generate changelog** - Automatically updates CHANGELOG.md with commit messages since the last release
|
|
1520
|
-
5. **Create GitHub release** - Creates a GitHub release with auto-generated notes
|
|
1521
|
-
6. **Publish to NPM** - Deploys the package to NPM
|
|
1522
|
-
|
|
1523
|
-
### Release Commands
|
|
1524
|
-
|
|
1525
|
-
Use one of the following commands to release a new version:
|
|
1526
|
-
|
|
1527
|
-
```bash
|
|
1528
|
-
# Release with patch version update (0.0.x)
|
|
1529
|
-
npm run workflow:patch
|
|
1530
|
-
|
|
1531
|
-
# Release with minor version update (0.x.0)
|
|
1532
|
-
npm run workflow:minor
|
|
1533
|
-
|
|
1534
|
-
# Release with major version update (x.0.0)
|
|
1535
|
-
npm run workflow:major
|
|
1629
|
+
### Commit Message Format
|
|
1536
1630
|
|
|
1537
|
-
|
|
1538
|
-
|
|
1631
|
+
For best results with automatic changelog generation, follow
|
|
1632
|
+
the [Conventional Commits](https://www.conventionalcommits.org/) specification for your commit messages:
|
|
1539
1633
|
|
|
1540
|
-
# Dry run (build, test, and simulate version update without making changes)
|
|
1541
|
-
npm run workflow:dry-run
|
|
1542
1634
|
```
|
|
1635
|
+
AI Template for automated commit messages:
|
|
1543
1636
|
|
|
1544
|
-
|
|
1545
|
-
|
|
1546
|
-
|
|
1637
|
+
Use Conventional Commit format
|
|
1638
|
+
Specify the changes in a structured format
|
|
1639
|
+
Add information about the purpose of the commit
|
|
1640
|
+
```
|
|
1547
1641
|
|
|
1548
1642
|
```
|
|
1549
1643
|
<type>(<scope>): <description>
|
|
@@ -1554,6 +1648,7 @@ For best results with automatic changelog generation, follow the [Conventional C
|
|
|
1554
1648
|
```
|
|
1555
1649
|
|
|
1556
1650
|
Where `<type>` is one of:
|
|
1651
|
+
|
|
1557
1652
|
- `feat`: A new feature (maps to **Added** section)
|
|
1558
1653
|
- `fix`: A bug fix (maps to **Fixed** section)
|
|
1559
1654
|
- `chore`: Regular maintenance tasks (maps to **Changed** section)
|