@soulcraft/brainy 0.46.0 → 0.48.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,56 @@
1
+ # Offline Models
2
+
3
+ Brainy uses Transformers.js with ONNX Runtime for **true offline operation** - no more TensorFlow.js dependency hell!
4
+
5
+ ## How it works
6
+
7
+ Brainy automatically figures out the best approach:
8
+
9
+ 1. **First use**: Downloads models once (~87 MB) to local cache
10
+ 2. **Subsequent use**: Loads from cache (completely offline, zero network calls)
11
+ 3. **Smart detection**: Automatically finds models in cache, bundled, or downloads as needed
12
+
13
+ ## Standard usage
14
+
15
+ ```bash
16
+ npm install @soulcraft/brainy
17
+ # Use immediately - models download automatically on first use
18
+ ```
19
+
20
+ ## Docker with production egress restrictions
21
+
22
+ For environments where production has no internet but build does:
23
+
24
+ ```dockerfile
25
+ FROM node:24-slim
26
+ WORKDIR /app
27
+ COPY package*.json ./
28
+ RUN npm install @soulcraft/brainy
29
+ RUN npm run download-models # Download during build (when internet available)
30
+ COPY . .
31
+ # Production container now works completely offline
32
+ ```
33
+
34
+ ## Development with immediate offline
35
+
36
+ If you want models available immediately for development:
37
+
38
+ ```bash
39
+ npm install @soulcraft/brainy
40
+ npm run download-models # Optional: download now instead of on first use
41
+ ```
42
+
43
+ ## Key benefits vs TensorFlow.js
44
+
45
+ - ✅ **95% smaller package** - 643 kB vs 12.5 MB
46
+ - ✅ **84% smaller models** - 87 MB vs 525 MB
47
+ - ✅ **True offline** - Zero network calls after initial download
48
+ - ✅ **No dependency issues** - 5 deps vs 47+, no more --legacy-peer-deps
49
+ - ✅ **Better performance** - ONNX Runtime beats TensorFlow.js
50
+ - ✅ **Same API** - Drop-in replacement
51
+
52
+ ## Philosophy
53
+
54
+ **Install and use. Brainy handles the rest.**
55
+
56
+ No configuration files, no environment variables, no complex setup. Brainy detects your environment and does the right thing automatically.
package/README.md CHANGED
@@ -11,6 +11,51 @@
11
11
 
12
12
  </div>
13
13
 
14
+ ## 🔥 MAJOR UPDATE: TensorFlow.js → Transformers.js Migration (v0.46+)
15
+
16
+ **We've completely replaced TensorFlow.js with Transformers.js for better performance and true offline operation!**
17
+
18
+ ### Why We Made This Change
19
+
20
+ **The Honest Truth About TensorFlow.js:**
21
+
22
+ - 📦 **Massive Package Size**: 12.5MB+ packages with complex dependency trees
23
+ - 🌐 **Hidden Network Calls**: Even "local" models triggered fetch() calls internally
24
+ - 🐛 **Dependency Hell**: Constant `--legacy-peer-deps` issues with Node.js updates
25
+ - 🔧 **Maintenance Burden**: 47+ dependencies to keep compatible across environments
26
+ - 💾 **Huge Models**: 525MB Universal Sentence Encoder models
27
+
28
+ ### What You Get Now
29
+
30
+ - ✅ **95% Smaller Package**: 643 kB vs 12.5 MB (and it actually works better!)
31
+ - ✅ **84% Smaller Models**: 87 MB vs 525 MB all-MiniLM-L6-v2 vs USE
32
+ - ✅ **True Offline Operation**: Zero network calls after initial model download
33
+ - ✅ **5x Fewer Dependencies**: Clean dependency tree, no more peer dep issues
34
+ - ✅ **Same API**: Drop-in replacement - your existing code just works
35
+ - ✅ **Better Performance**: ONNX Runtime is faster than TensorFlow.js in most cases
36
+
37
+ ### Migration (It's Automatic!)
38
+
39
+ ```javascript
40
+ // Your existing code works unchanged!
41
+ import { BrainyData } from '@soulcraft/brainy'
42
+
43
+ const db = new BrainyData({
44
+ embedding: { type: 'transformer' } // Now uses Transformers.js automatically
45
+ })
46
+
47
+ // Dimensions changed from 512 → 384 (handled automatically)
48
+ ```
49
+
50
+ **For Docker/Production or No Egress:**
51
+
52
+ ```dockerfile
53
+ RUN npm install @soulcraft/brainy
54
+ RUN npm run download-models # Download during build for offline production
55
+ ```
56
+
57
+ ---
58
+
14
59
  ## ✨ What is Brainy?
15
60
 
16
61
  Imagine a database that thinks like you do - connecting ideas, finding patterns, and getting smarter over time. Brainy
@@ -33,7 +78,7 @@ easy-to-use package.
33
78
  environment and optimizes itself
34
79
  - **🌍 True Write-Once, Run-Anywhere** - Same code runs in Angular, React, Vue, Node.js, Deno, Bun, serverless, edge
35
80
  workers, and web workers with automatic environment detection
36
- - **⚡ Scary Fast** - Handles millions of vectors with sub-millisecond search. Built-in GPU acceleration when available
81
+ - **⚡ Scary Fast** - Handles millions of vectors with sub-millisecond search. GPU acceleration for embeddings, optimized CPU for distance calculations
37
82
  - **🎯 Self-Learning** - Like having a database that goes to the gym. Gets faster and smarter the more you use it
38
83
  - **🔮 AI-First Design** - Built for the age of embeddings, RAG, and semantic search. Your LLMs will thank you
39
84
  - **🎮 Actually Fun to Use** - Clean API, great DX, and it does the heavy lifting so you can build cool stuff
@@ -5,7 +5,7 @@
5
5
  import { v4 as uuidv4 } from 'uuid';
6
6
  import { HNSWIndexOptimized } from './hnsw/hnswIndexOptimized.js';
7
7
  import { createStorage } from './storage/storageFactory.js';
8
- import { cosineDistance, defaultBatchEmbeddingFunction, getDefaultEmbeddingFunction, cleanupWorkerPools } from './utils/index.js';
8
+ import { cosineDistance, defaultEmbeddingFunction, cleanupWorkerPools, batchEmbed } from './utils/index.js';
9
9
  import { getAugmentationVersion } from './utils/version.js';
10
10
  import { NounType, VerbType } from './types/graphTypes.js';
11
11
  import { createServerSearchAugmentations } from './augmentations/serverSearchAugmentations.js';
@@ -73,8 +73,8 @@ export class BrainyData {
73
73
  this.healthMonitor = null;
74
74
  // Statistics collector
75
75
  this.statisticsCollector = new StatisticsCollector();
76
- // Set dimensions to fixed value of 512 (Universal Sentence Encoder dimension)
77
- this._dimensions = 512;
76
+ // Set dimensions to fixed value of 384 (all-MiniLM-L6-v2 dimension)
77
+ this._dimensions = 384;
78
78
  // Set distance function
79
79
  this.distanceFunction = config.distanceFunction || cosineDistance;
80
80
  // Always use the optimized HNSW index implementation
@@ -99,9 +99,7 @@ export class BrainyData {
99
99
  this.embeddingFunction = config.embeddingFunction;
100
100
  }
101
101
  else {
102
- this.embeddingFunction = getDefaultEmbeddingFunction({
103
- verbose: this.loggingConfig?.verbose
104
- });
102
+ this.embeddingFunction = defaultEmbeddingFunction;
105
103
  }
106
104
  // Set persistent storage request flag
107
105
  this.requestPersistentStorage =
@@ -554,8 +552,8 @@ export class BrainyData {
554
552
  await new Promise((resolve) => setTimeout(resolve, 1000));
555
553
  // Try again with a different approach - use the non-threaded version
556
554
  // This is a fallback in case the threaded version fails
557
- const { createTensorFlowEmbeddingFunction } = await import('./utils/embedding.js');
558
- const fallbackEmbeddingFunction = createTensorFlowEmbeddingFunction();
555
+ const { createEmbeddingFunction } = await import('./utils/embedding.js');
556
+ const fallbackEmbeddingFunction = createEmbeddingFunction();
559
557
  // Test the fallback embedding function
560
558
  await fallbackEmbeddingFunction('');
561
559
  // If successful, replace the embedding function
@@ -1181,7 +1179,7 @@ export class BrainyData {
1181
1179
  // Extract just the text for batch embedding
1182
1180
  const texts = textItems.map((item) => item.text);
1183
1181
  // Perform batch embedding
1184
- const embeddings = await defaultBatchEmbeddingFunction(texts);
1182
+ const embeddings = await batchEmbed(texts);
1185
1183
  // Add each item with its embedding
1186
1184
  textPromises = textItems.map((item, i) => this.add(embeddings[i], item.metadata, {
1187
1185
  ...options,