@soulcraft/brainy 0.46.0 → 0.48.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/OFFLINE_MODELS.md +56 -0
- package/README.md +46 -1
- package/dist/brainyData.js +7 -9
- package/dist/brainyData.js.map +1 -1
- package/dist/demo.js +2 -2
- package/dist/demo.js.map +1 -1
- package/dist/hnsw/hnswIndex.d.ts +1 -1
- package/dist/hnsw/hnswIndex.js +4 -4
- package/dist/hnsw/hnswIndex.js.map +1 -1
- package/dist/index.d.ts +2 -3
- package/dist/index.js +3 -9
- package/dist/index.js.map +1 -1
- package/dist/setup.d.ts +3 -3
- package/dist/setup.js +6 -6
- package/dist/setup.js.map +1 -1
- package/dist/utils/distance.d.ts +4 -4
- package/dist/utils/distance.js +67 -140
- package/dist/utils/distance.js.map +1 -1
- package/dist/utils/embedding.d.ts +58 -84
- package/dist/utils/embedding.js +250 -594
- package/dist/utils/embedding.js.map +1 -1
- package/dist/utils/robustModelLoader.d.ts +4 -0
- package/dist/utils/robustModelLoader.js +58 -7
- package/dist/utils/robustModelLoader.js.map +1 -1
- package/dist/utils/textEncoding.d.ts +2 -3
- package/dist/utils/textEncoding.js +31 -274
- package/dist/utils/textEncoding.js.map +1 -1
- package/package.json +10 -19
- package/scripts/download-models.cjs +190 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# Offline Models
|
|
2
|
+
|
|
3
|
+
Brainy uses Transformers.js with ONNX Runtime for **true offline operation** - no more TensorFlow.js dependency hell!
|
|
4
|
+
|
|
5
|
+
## How it works
|
|
6
|
+
|
|
7
|
+
Brainy automatically figures out the best approach:
|
|
8
|
+
|
|
9
|
+
1. **First use**: Downloads models once (~87 MB) to local cache
|
|
10
|
+
2. **Subsequent use**: Loads from cache (completely offline, zero network calls)
|
|
11
|
+
3. **Smart detection**: Automatically finds models in cache, bundled, or downloads as needed
|
|
12
|
+
|
|
13
|
+
## Standard usage
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
npm install @soulcraft/brainy
|
|
17
|
+
# Use immediately - models download automatically on first use
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Docker with production egress restrictions
|
|
21
|
+
|
|
22
|
+
For environments where production has no internet but build does:
|
|
23
|
+
|
|
24
|
+
```dockerfile
|
|
25
|
+
FROM node:24-slim
|
|
26
|
+
WORKDIR /app
|
|
27
|
+
COPY package*.json ./
|
|
28
|
+
RUN npm install @soulcraft/brainy
|
|
29
|
+
RUN npm run download-models # Download during build (when internet available)
|
|
30
|
+
COPY . .
|
|
31
|
+
# Production container now works completely offline
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Development with immediate offline
|
|
35
|
+
|
|
36
|
+
If you want models available immediately for development:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
npm install @soulcraft/brainy
|
|
40
|
+
npm run download-models # Optional: download now instead of on first use
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Key benefits vs TensorFlow.js
|
|
44
|
+
|
|
45
|
+
- ✅ **95% smaller package** - 643 kB vs 12.5 MB
|
|
46
|
+
- ✅ **84% smaller models** - 87 MB vs 525 MB
|
|
47
|
+
- ✅ **True offline** - Zero network calls after initial download
|
|
48
|
+
- ✅ **No dependency issues** - 5 deps vs 47+, no more --legacy-peer-deps
|
|
49
|
+
- ✅ **Better performance** - ONNX Runtime beats TensorFlow.js
|
|
50
|
+
- ✅ **Same API** - Drop-in replacement
|
|
51
|
+
|
|
52
|
+
## Philosophy
|
|
53
|
+
|
|
54
|
+
**Install and use. Brainy handles the rest.**
|
|
55
|
+
|
|
56
|
+
No configuration files, no environment variables, no complex setup. Brainy detects your environment and does the right thing automatically.
|
package/README.md
CHANGED
|
@@ -11,6 +11,51 @@
|
|
|
11
11
|
|
|
12
12
|
</div>
|
|
13
13
|
|
|
14
|
+
## 🔥 MAJOR UPDATE: TensorFlow.js → Transformers.js Migration (v0.46+)
|
|
15
|
+
|
|
16
|
+
**We've completely replaced TensorFlow.js with Transformers.js for better performance and true offline operation!**
|
|
17
|
+
|
|
18
|
+
### Why We Made This Change
|
|
19
|
+
|
|
20
|
+
**The Honest Truth About TensorFlow.js:**
|
|
21
|
+
|
|
22
|
+
- 📦 **Massive Package Size**: 12.5MB+ packages with complex dependency trees
|
|
23
|
+
- 🌐 **Hidden Network Calls**: Even "local" models triggered fetch() calls internally
|
|
24
|
+
- 🐛 **Dependency Hell**: Constant `--legacy-peer-deps` issues with Node.js updates
|
|
25
|
+
- 🔧 **Maintenance Burden**: 47+ dependencies to keep compatible across environments
|
|
26
|
+
- 💾 **Huge Models**: 525MB Universal Sentence Encoder models
|
|
27
|
+
|
|
28
|
+
### What You Get Now
|
|
29
|
+
|
|
30
|
+
- ✅ **95% Smaller Package**: 643 kB vs 12.5 MB (and it actually works better!)
|
|
31
|
+
- ✅ **84% Smaller Models**: 87 MB vs 525 MB all-MiniLM-L6-v2 vs USE
|
|
32
|
+
- ✅ **True Offline Operation**: Zero network calls after initial model download
|
|
33
|
+
- ✅ **5x Fewer Dependencies**: Clean dependency tree, no more peer dep issues
|
|
34
|
+
- ✅ **Same API**: Drop-in replacement - your existing code just works
|
|
35
|
+
- ✅ **Better Performance**: ONNX Runtime is faster than TensorFlow.js in most cases
|
|
36
|
+
|
|
37
|
+
### Migration (It's Automatic!)
|
|
38
|
+
|
|
39
|
+
```javascript
|
|
40
|
+
// Your existing code works unchanged!
|
|
41
|
+
import { BrainyData } from '@soulcraft/brainy'
|
|
42
|
+
|
|
43
|
+
const db = new BrainyData({
|
|
44
|
+
embedding: { type: 'transformer' } // Now uses Transformers.js automatically
|
|
45
|
+
})
|
|
46
|
+
|
|
47
|
+
// Dimensions changed from 512 → 384 (handled automatically)
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
**For Docker/Production or No Egress:**
|
|
51
|
+
|
|
52
|
+
```dockerfile
|
|
53
|
+
RUN npm install @soulcraft/brainy
|
|
54
|
+
RUN npm run download-models # Download during build for offline production
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
14
59
|
## ✨ What is Brainy?
|
|
15
60
|
|
|
16
61
|
Imagine a database that thinks like you do - connecting ideas, finding patterns, and getting smarter over time. Brainy
|
|
@@ -33,7 +78,7 @@ easy-to-use package.
|
|
|
33
78
|
environment and optimizes itself
|
|
34
79
|
- **🌍 True Write-Once, Run-Anywhere** - Same code runs in Angular, React, Vue, Node.js, Deno, Bun, serverless, edge
|
|
35
80
|
workers, and web workers with automatic environment detection
|
|
36
|
-
- **⚡ Scary Fast** - Handles millions of vectors with sub-millisecond search.
|
|
81
|
+
- **⚡ Scary Fast** - Handles millions of vectors with sub-millisecond search. GPU acceleration for embeddings, optimized CPU for distance calculations
|
|
37
82
|
- **🎯 Self-Learning** - Like having a database that goes to the gym. Gets faster and smarter the more you use it
|
|
38
83
|
- **🔮 AI-First Design** - Built for the age of embeddings, RAG, and semantic search. Your LLMs will thank you
|
|
39
84
|
- **🎮 Actually Fun to Use** - Clean API, great DX, and it does the heavy lifting so you can build cool stuff
|
package/dist/brainyData.js
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
import { v4 as uuidv4 } from 'uuid';
|
|
6
6
|
import { HNSWIndexOptimized } from './hnsw/hnswIndexOptimized.js';
|
|
7
7
|
import { createStorage } from './storage/storageFactory.js';
|
|
8
|
-
import { cosineDistance,
|
|
8
|
+
import { cosineDistance, defaultEmbeddingFunction, cleanupWorkerPools, batchEmbed } from './utils/index.js';
|
|
9
9
|
import { getAugmentationVersion } from './utils/version.js';
|
|
10
10
|
import { NounType, VerbType } from './types/graphTypes.js';
|
|
11
11
|
import { createServerSearchAugmentations } from './augmentations/serverSearchAugmentations.js';
|
|
@@ -73,8 +73,8 @@ export class BrainyData {
|
|
|
73
73
|
this.healthMonitor = null;
|
|
74
74
|
// Statistics collector
|
|
75
75
|
this.statisticsCollector = new StatisticsCollector();
|
|
76
|
-
// Set dimensions to fixed value of
|
|
77
|
-
this._dimensions =
|
|
76
|
+
// Set dimensions to fixed value of 384 (all-MiniLM-L6-v2 dimension)
|
|
77
|
+
this._dimensions = 384;
|
|
78
78
|
// Set distance function
|
|
79
79
|
this.distanceFunction = config.distanceFunction || cosineDistance;
|
|
80
80
|
// Always use the optimized HNSW index implementation
|
|
@@ -99,9 +99,7 @@ export class BrainyData {
|
|
|
99
99
|
this.embeddingFunction = config.embeddingFunction;
|
|
100
100
|
}
|
|
101
101
|
else {
|
|
102
|
-
this.embeddingFunction =
|
|
103
|
-
verbose: this.loggingConfig?.verbose
|
|
104
|
-
});
|
|
102
|
+
this.embeddingFunction = defaultEmbeddingFunction;
|
|
105
103
|
}
|
|
106
104
|
// Set persistent storage request flag
|
|
107
105
|
this.requestPersistentStorage =
|
|
@@ -554,8 +552,8 @@ export class BrainyData {
|
|
|
554
552
|
await new Promise((resolve) => setTimeout(resolve, 1000));
|
|
555
553
|
// Try again with a different approach - use the non-threaded version
|
|
556
554
|
// This is a fallback in case the threaded version fails
|
|
557
|
-
const {
|
|
558
|
-
const fallbackEmbeddingFunction =
|
|
555
|
+
const { createEmbeddingFunction } = await import('./utils/embedding.js');
|
|
556
|
+
const fallbackEmbeddingFunction = createEmbeddingFunction();
|
|
559
557
|
// Test the fallback embedding function
|
|
560
558
|
await fallbackEmbeddingFunction('');
|
|
561
559
|
// If successful, replace the embedding function
|
|
@@ -1181,7 +1179,7 @@ export class BrainyData {
|
|
|
1181
1179
|
// Extract just the text for batch embedding
|
|
1182
1180
|
const texts = textItems.map((item) => item.text);
|
|
1183
1181
|
// Perform batch embedding
|
|
1184
|
-
const embeddings = await
|
|
1182
|
+
const embeddings = await batchEmbed(texts);
|
|
1185
1183
|
// Add each item with its embedding
|
|
1186
1184
|
textPromises = textItems.map((item, i) => this.add(embeddings[i], item.metadata, {
|
|
1187
1185
|
...options,
|