@soulcraft/brainy 0.9.5 → 0.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -15
- package/dist/brainy.js +428 -224
- package/dist/brainy.min.js +987 -987
- package/dist/unified.js +40 -31
- package/dist/unified.min.js +1 -1
- package/dist/utils/version.d.ts +1 -1
- package/dist/utils/workerUtils.d.ts +6 -6
- package/package.json +1 -4
package/dist/brainy.js
CHANGED
|
@@ -2524,6 +2524,177 @@ const dotProductDistance = (a, b) => {
|
|
|
2524
2524
|
return -dotProduct;
|
|
2525
2525
|
};
|
|
2526
2526
|
|
|
2527
|
+
/**
|
|
2528
|
+
* Utility functions for environment detection
|
|
2529
|
+
*/
|
|
2530
|
+
/**
|
|
2531
|
+
* Check if code is running in a browser environment
|
|
2532
|
+
*/
|
|
2533
|
+
function isBrowser$1() {
|
|
2534
|
+
return typeof window !== 'undefined' && typeof document !== 'undefined';
|
|
2535
|
+
}
|
|
2536
|
+
/**
|
|
2537
|
+
* Check if code is running in a Node.js environment
|
|
2538
|
+
*/
|
|
2539
|
+
function isNode() {
|
|
2540
|
+
return typeof process !== 'undefined' &&
|
|
2541
|
+
process.versions != null &&
|
|
2542
|
+
process.versions.node != null;
|
|
2543
|
+
}
|
|
2544
|
+
/**
|
|
2545
|
+
* Check if Web Workers are available in the current environment
|
|
2546
|
+
*/
|
|
2547
|
+
function areWebWorkersAvailable() {
|
|
2548
|
+
return isBrowser$1() && typeof Worker !== 'undefined';
|
|
2549
|
+
}
|
|
2550
|
+
/**
|
|
2551
|
+
* Check if Worker Threads are available in the current environment (Node.js)
|
|
2552
|
+
*/
|
|
2553
|
+
function areWorkerThreadsAvailable() {
|
|
2554
|
+
if (!isNode())
|
|
2555
|
+
return false;
|
|
2556
|
+
try {
|
|
2557
|
+
// Dynamic import to avoid errors in browser environments
|
|
2558
|
+
require('worker_threads');
|
|
2559
|
+
return true;
|
|
2560
|
+
}
|
|
2561
|
+
catch (e) {
|
|
2562
|
+
return false;
|
|
2563
|
+
}
|
|
2564
|
+
}
|
|
2565
|
+
/**
|
|
2566
|
+
* Determine if threading is available in the current environment
|
|
2567
|
+
*/
|
|
2568
|
+
function isThreadingAvailable() {
|
|
2569
|
+
return areWebWorkersAvailable() || areWorkerThreadsAvailable();
|
|
2570
|
+
}
|
|
2571
|
+
|
|
2572
|
+
/**
|
|
2573
|
+
* Utility functions for working with Web Workers and Worker Threads
|
|
2574
|
+
*/
|
|
2575
|
+
/**
|
|
2576
|
+
* Execute a function in a Web Worker (browser environment)
|
|
2577
|
+
*
|
|
2578
|
+
* @param fnString The function to execute as a string
|
|
2579
|
+
* @param args The arguments to pass to the function
|
|
2580
|
+
* @returns A promise that resolves with the result of the function
|
|
2581
|
+
*/
|
|
2582
|
+
function executeInWebWorker(fnString, args) {
|
|
2583
|
+
return new Promise((resolve, reject) => {
|
|
2584
|
+
// Create a blob URL for the worker script
|
|
2585
|
+
const workerScript = `
|
|
2586
|
+
self.onmessage = async function(e) {
|
|
2587
|
+
try {
|
|
2588
|
+
const fn = ${fnString};
|
|
2589
|
+
const result = await fn(e.data);
|
|
2590
|
+
self.postMessage({ success: true, data: result });
|
|
2591
|
+
} catch (error) {
|
|
2592
|
+
self.postMessage({
|
|
2593
|
+
success: false,
|
|
2594
|
+
error: error instanceof Error ? error.message : String(error)
|
|
2595
|
+
});
|
|
2596
|
+
}
|
|
2597
|
+
};
|
|
2598
|
+
`;
|
|
2599
|
+
const blob = new Blob([workerScript], { type: 'application/javascript' });
|
|
2600
|
+
const blobURL = URL.createObjectURL(blob);
|
|
2601
|
+
// Create a worker
|
|
2602
|
+
const worker = new Worker(blobURL);
|
|
2603
|
+
// Set up message handling
|
|
2604
|
+
worker.onmessage = function (e) {
|
|
2605
|
+
URL.revokeObjectURL(blobURL); // Clean up
|
|
2606
|
+
worker.terminate(); // Terminate the worker
|
|
2607
|
+
if (e.data.success) {
|
|
2608
|
+
resolve(e.data.data);
|
|
2609
|
+
}
|
|
2610
|
+
else {
|
|
2611
|
+
reject(new Error(e.data.error));
|
|
2612
|
+
}
|
|
2613
|
+
};
|
|
2614
|
+
worker.onerror = function (error) {
|
|
2615
|
+
URL.revokeObjectURL(blobURL); // Clean up
|
|
2616
|
+
worker.terminate(); // Terminate the worker
|
|
2617
|
+
reject(error);
|
|
2618
|
+
};
|
|
2619
|
+
// Start the worker
|
|
2620
|
+
worker.postMessage(args);
|
|
2621
|
+
});
|
|
2622
|
+
}
|
|
2623
|
+
/**
|
|
2624
|
+
* Execute a function in a Worker Thread (Node.js environment)
|
|
2625
|
+
*
|
|
2626
|
+
* @param fnString The function to execute as a string
|
|
2627
|
+
* @param args The arguments to pass to the function
|
|
2628
|
+
* @returns A promise that resolves with the result of the function
|
|
2629
|
+
*/
|
|
2630
|
+
function executeInWorkerThread(fnString, args) {
|
|
2631
|
+
return new Promise((resolve, reject) => {
|
|
2632
|
+
try {
|
|
2633
|
+
// Dynamic import to avoid errors in browser environments
|
|
2634
|
+
const { Worker } = require('worker_threads');
|
|
2635
|
+
// Create a worker script
|
|
2636
|
+
const workerScript = `
|
|
2637
|
+
const { parentPort } = require('worker_threads');
|
|
2638
|
+
|
|
2639
|
+
parentPort.once('message', async (data) => {
|
|
2640
|
+
try {
|
|
2641
|
+
const fn = ${fnString};
|
|
2642
|
+
const result = await fn(data);
|
|
2643
|
+
parentPort.postMessage({ success: true, data: result });
|
|
2644
|
+
} catch (error) {
|
|
2645
|
+
parentPort.postMessage({
|
|
2646
|
+
success: false,
|
|
2647
|
+
error: error instanceof Error ? error.message : String(error)
|
|
2648
|
+
});
|
|
2649
|
+
}
|
|
2650
|
+
});
|
|
2651
|
+
`;
|
|
2652
|
+
// Create a worker
|
|
2653
|
+
const worker = new Worker(workerScript, { eval: true });
|
|
2654
|
+
// Set up message handling
|
|
2655
|
+
worker.on('message', (data) => {
|
|
2656
|
+
worker.terminate(); // Terminate the worker
|
|
2657
|
+
if (data.success) {
|
|
2658
|
+
resolve(data.data);
|
|
2659
|
+
}
|
|
2660
|
+
else {
|
|
2661
|
+
reject(new Error(data.error));
|
|
2662
|
+
}
|
|
2663
|
+
});
|
|
2664
|
+
worker.on('error', (error) => {
|
|
2665
|
+
worker.terminate(); // Terminate the worker
|
|
2666
|
+
reject(error);
|
|
2667
|
+
});
|
|
2668
|
+
// Start the worker
|
|
2669
|
+
worker.postMessage(args);
|
|
2670
|
+
}
|
|
2671
|
+
catch (error) {
|
|
2672
|
+
reject(error);
|
|
2673
|
+
}
|
|
2674
|
+
});
|
|
2675
|
+
}
|
|
2676
|
+
/**
|
|
2677
|
+
* Execute a function in a separate thread based on the environment
|
|
2678
|
+
*
|
|
2679
|
+
* @param fnString The function to execute as a string
|
|
2680
|
+
* @param args The arguments to pass to the function
|
|
2681
|
+
* @returns A promise that resolves with the result of the function
|
|
2682
|
+
*/
|
|
2683
|
+
function executeInThread(fnString, args) {
|
|
2684
|
+
if (isBrowser$1()) {
|
|
2685
|
+
return executeInWebWorker(fnString, args);
|
|
2686
|
+
}
|
|
2687
|
+
else if (isNode()) {
|
|
2688
|
+
return executeInWorkerThread(fnString, args);
|
|
2689
|
+
}
|
|
2690
|
+
else {
|
|
2691
|
+
// Fall back to executing in the main thread
|
|
2692
|
+
// Parse the function from string and execute it
|
|
2693
|
+
const fn = new Function('return ' + fnString)();
|
|
2694
|
+
return Promise.resolve(fn(args));
|
|
2695
|
+
}
|
|
2696
|
+
}
|
|
2697
|
+
|
|
2527
2698
|
/**
|
|
2528
2699
|
* Embedding functions for converting data to vectors
|
|
2529
2700
|
*/
|
|
@@ -2687,12 +2858,89 @@ function createTensorFlowEmbeddingFunction() {
|
|
|
2687
2858
|
}
|
|
2688
2859
|
};
|
|
2689
2860
|
}
|
|
2861
|
+
/**
|
|
2862
|
+
* Creates a TensorFlow-based Universal Sentence Encoder embedding function that runs in a separate thread
|
|
2863
|
+
* This provides better performance for CPU-intensive embedding operations
|
|
2864
|
+
* @param options Configuration options
|
|
2865
|
+
* @returns An embedding function that runs in a separate thread
|
|
2866
|
+
*/
|
|
2867
|
+
function createThreadedEmbeddingFunction(options = {}) {
|
|
2868
|
+
// Create a standard embedding function to use as fallback
|
|
2869
|
+
const standardEmbedding = createTensorFlowEmbeddingFunction();
|
|
2870
|
+
// Flag to track if we've fallen back to main thread
|
|
2871
|
+
let useFallback = false;
|
|
2872
|
+
return async (data) => {
|
|
2873
|
+
// If we've already determined that threading doesn't work, use the fallback
|
|
2874
|
+
if (useFallback) {
|
|
2875
|
+
return standardEmbedding(data);
|
|
2876
|
+
}
|
|
2877
|
+
try {
|
|
2878
|
+
// Function to be executed in a worker thread
|
|
2879
|
+
// This must be a regular function (not async) to avoid Promise cloning issues
|
|
2880
|
+
const embedInWorker = (inputData) => {
|
|
2881
|
+
// Return a plain object with the input data
|
|
2882
|
+
// All async operations will be performed inside the worker
|
|
2883
|
+
return { data: inputData };
|
|
2884
|
+
};
|
|
2885
|
+
// Worker implementation function that will be stringified and run in the worker
|
|
2886
|
+
const workerImplementation = async ({ data }) => {
|
|
2887
|
+
// We need to dynamically import TensorFlow.js and USE in the worker
|
|
2888
|
+
const tf = await Promise.resolve().then(function () { return index; });
|
|
2889
|
+
const use = await Promise.resolve().then(function () { return universalSentenceEncoder_esm; });
|
|
2890
|
+
// Load the model
|
|
2891
|
+
const model = await use.load();
|
|
2892
|
+
// Handle different input types
|
|
2893
|
+
let textToEmbed;
|
|
2894
|
+
if (typeof data === 'string') {
|
|
2895
|
+
if (data.trim() === '') {
|
|
2896
|
+
return new Array(512).fill(0);
|
|
2897
|
+
}
|
|
2898
|
+
textToEmbed = [data];
|
|
2899
|
+
}
|
|
2900
|
+
else if (Array.isArray(data) &&
|
|
2901
|
+
data.every((item) => typeof item === 'string')) {
|
|
2902
|
+
if (data.length === 0 || data.every((item) => item.trim() === '')) {
|
|
2903
|
+
return new Array(512).fill(0);
|
|
2904
|
+
}
|
|
2905
|
+
textToEmbed = data.filter((item) => item.trim() !== '');
|
|
2906
|
+
if (textToEmbed.length === 0) {
|
|
2907
|
+
return new Array(512).fill(0);
|
|
2908
|
+
}
|
|
2909
|
+
}
|
|
2910
|
+
else {
|
|
2911
|
+
throw new Error('UniversalSentenceEncoder only supports string or string[] data');
|
|
2912
|
+
}
|
|
2913
|
+
// Get embeddings
|
|
2914
|
+
const embeddings = await model.embed(textToEmbed);
|
|
2915
|
+
// Convert to array and return the first embedding
|
|
2916
|
+
const embeddingArray = await embeddings.array();
|
|
2917
|
+
// Dispose of the tensor to free memory
|
|
2918
|
+
embeddings.dispose();
|
|
2919
|
+
return embeddingArray[0];
|
|
2920
|
+
};
|
|
2921
|
+
// Execute the embedding function in a separate thread
|
|
2922
|
+
// Pass the worker implementation as a string to avoid Promise cloning issues
|
|
2923
|
+
return await executeInThread(workerImplementation.toString(), embedInWorker(data));
|
|
2924
|
+
}
|
|
2925
|
+
catch (error) {
|
|
2926
|
+
// If threading fails and fallback is enabled, use the standard embedding function
|
|
2927
|
+
if (options.fallbackToMain) {
|
|
2928
|
+
console.warn('Threaded embedding failed, falling back to main thread:', error);
|
|
2929
|
+
useFallback = true;
|
|
2930
|
+
return standardEmbedding(data);
|
|
2931
|
+
}
|
|
2932
|
+
// Otherwise, propagate the error
|
|
2933
|
+
throw new Error(`Threaded embedding failed: ${error}`);
|
|
2934
|
+
}
|
|
2935
|
+
};
|
|
2936
|
+
}
|
|
2690
2937
|
/**
|
|
2691
2938
|
* Default embedding function
|
|
2692
2939
|
* Uses UniversalSentenceEncoder for all text embeddings
|
|
2693
2940
|
* TensorFlow.js is required for this to work
|
|
2941
|
+
* Uses threading when available for better performance
|
|
2694
2942
|
*/
|
|
2695
|
-
const defaultEmbeddingFunction =
|
|
2943
|
+
const defaultEmbeddingFunction = createThreadedEmbeddingFunction({ fallbackToMain: true });
|
|
2696
2944
|
|
|
2697
2945
|
/**
|
|
2698
2946
|
* HNSW (Hierarchical Navigable Small World) Index implementation
|
|
@@ -2706,18 +2954,73 @@ const DEFAULT_CONFIG = {
|
|
|
2706
2954
|
ml: 16 // Max level
|
|
2707
2955
|
};
|
|
2708
2956
|
class HNSWIndex {
|
|
2709
|
-
constructor(config = {}, distanceFunction = euclideanDistance) {
|
|
2957
|
+
constructor(config = {}, distanceFunction = euclideanDistance, options = {}) {
|
|
2710
2958
|
this.nouns = new Map();
|
|
2711
2959
|
this.entryPointId = null;
|
|
2712
2960
|
this.maxLevel = 0;
|
|
2713
2961
|
this.dimension = null;
|
|
2962
|
+
this.useParallelization = true; // Whether to use parallelization for performance-critical operations
|
|
2714
2963
|
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
2715
2964
|
this.distanceFunction = distanceFunction;
|
|
2965
|
+
this.useParallelization = options.useParallelization !== undefined ? options.useParallelization : true;
|
|
2966
|
+
}
|
|
2967
|
+
/**
|
|
2968
|
+
* Set whether to use parallelization for performance-critical operations
|
|
2969
|
+
*/
|
|
2970
|
+
setUseParallelization(useParallelization) {
|
|
2971
|
+
this.useParallelization = useParallelization;
|
|
2972
|
+
}
|
|
2973
|
+
/**
|
|
2974
|
+
* Get whether parallelization is enabled
|
|
2975
|
+
*/
|
|
2976
|
+
getUseParallelization() {
|
|
2977
|
+
return this.useParallelization;
|
|
2978
|
+
}
|
|
2979
|
+
/**
|
|
2980
|
+
* Calculate distances between a query vector and multiple vectors in parallel
|
|
2981
|
+
* This is used to optimize performance for search operations
|
|
2982
|
+
* @param queryVector The query vector
|
|
2983
|
+
* @param vectors Array of vectors to compare against
|
|
2984
|
+
* @returns Array of distances
|
|
2985
|
+
*/
|
|
2986
|
+
async calculateDistancesInParallel(queryVector, vectors) {
|
|
2987
|
+
// If parallelization is disabled or there are very few vectors, use sequential processing
|
|
2988
|
+
if (!this.useParallelization || vectors.length < 10) {
|
|
2989
|
+
return vectors.map(item => ({
|
|
2990
|
+
id: item.id,
|
|
2991
|
+
distance: this.distanceFunction(queryVector, item.vector)
|
|
2992
|
+
}));
|
|
2993
|
+
}
|
|
2994
|
+
// Function to be executed in a worker thread
|
|
2995
|
+
const distanceCalculator = (args) => {
|
|
2996
|
+
const { queryVector, vectors, distanceFnString } = args;
|
|
2997
|
+
// Recreate the distance function from its string representation
|
|
2998
|
+
const distanceFunction = new Function('return ' + distanceFnString)();
|
|
2999
|
+
// Calculate distances for all items
|
|
3000
|
+
return vectors.map(item => ({
|
|
3001
|
+
id: item.id,
|
|
3002
|
+
distance: distanceFunction(queryVector, item.vector)
|
|
3003
|
+
}));
|
|
3004
|
+
};
|
|
3005
|
+
try {
|
|
3006
|
+
// Convert the distance function to a string for serialization
|
|
3007
|
+
const distanceFnString = this.distanceFunction.toString();
|
|
3008
|
+
// Execute the distance calculation in a separate thread
|
|
3009
|
+
return await executeInThread(distanceCalculator.toString(), { queryVector, vectors, distanceFnString });
|
|
3010
|
+
}
|
|
3011
|
+
catch (error) {
|
|
3012
|
+
console.error('Error in parallel distance calculation, falling back to sequential:', error);
|
|
3013
|
+
// Fall back to sequential processing if parallel execution fails
|
|
3014
|
+
return vectors.map(item => ({
|
|
3015
|
+
id: item.id,
|
|
3016
|
+
distance: this.distanceFunction(queryVector, item.vector)
|
|
3017
|
+
}));
|
|
3018
|
+
}
|
|
2716
3019
|
}
|
|
2717
3020
|
/**
|
|
2718
3021
|
* Add a vector to the index
|
|
2719
3022
|
*/
|
|
2720
|
-
addItem(item) {
|
|
3023
|
+
async addItem(item) {
|
|
2721
3024
|
// Check if item is defined
|
|
2722
3025
|
if (!item) {
|
|
2723
3026
|
throw new Error('Item is undefined or null');
|
|
@@ -2799,7 +3102,7 @@ class HNSWIndex {
|
|
|
2799
3102
|
// For each level from nounLevel down to 0
|
|
2800
3103
|
for (let level = Math.min(nounLevel, this.maxLevel); level >= 0; level--) {
|
|
2801
3104
|
// Find ef nearest elements using greedy search
|
|
2802
|
-
const nearestNouns = this.searchLayer(vector, currObj, this.config.efConstruction, level);
|
|
3105
|
+
const nearestNouns = await this.searchLayer(vector, currObj, this.config.efConstruction, level);
|
|
2803
3106
|
// Select M nearest neighbors
|
|
2804
3107
|
const neighbors = this.selectNeighbors(vector, nearestNouns, this.config.M);
|
|
2805
3108
|
// Add bidirectional connections
|
|
@@ -2848,7 +3151,7 @@ class HNSWIndex {
|
|
|
2848
3151
|
/**
|
|
2849
3152
|
* Search for nearest neighbors
|
|
2850
3153
|
*/
|
|
2851
|
-
search(queryVector, k = 10) {
|
|
3154
|
+
async search(queryVector, k = 10) {
|
|
2852
3155
|
if (this.nouns.size === 0) {
|
|
2853
3156
|
return [];
|
|
2854
3157
|
}
|
|
@@ -2878,23 +3181,50 @@ class HNSWIndex {
|
|
|
2878
3181
|
changed = false;
|
|
2879
3182
|
// Check all neighbors at current level
|
|
2880
3183
|
const connections = currObj.connections.get(level) || new Set();
|
|
2881
|
-
|
|
2882
|
-
|
|
2883
|
-
|
|
2884
|
-
|
|
2885
|
-
|
|
3184
|
+
// If we have enough connections, use parallel distance calculation
|
|
3185
|
+
if (this.useParallelization && connections.size >= 10) {
|
|
3186
|
+
// Prepare vectors for parallel calculation
|
|
3187
|
+
const vectors = [];
|
|
3188
|
+
for (const neighborId of connections) {
|
|
3189
|
+
const neighbor = this.nouns.get(neighborId);
|
|
3190
|
+
if (!neighbor)
|
|
3191
|
+
continue;
|
|
3192
|
+
vectors.push({ id: neighborId, vector: neighbor.vector });
|
|
2886
3193
|
}
|
|
2887
|
-
|
|
2888
|
-
|
|
2889
|
-
|
|
2890
|
-
|
|
2891
|
-
|
|
3194
|
+
// Calculate distances in parallel
|
|
3195
|
+
const distances = await this.calculateDistancesInParallel(queryVector, vectors);
|
|
3196
|
+
// Find the closest neighbor
|
|
3197
|
+
for (const { id, distance } of distances) {
|
|
3198
|
+
if (distance < currDist) {
|
|
3199
|
+
currDist = distance;
|
|
3200
|
+
const neighbor = this.nouns.get(id);
|
|
3201
|
+
if (neighbor) {
|
|
3202
|
+
currObj = neighbor;
|
|
3203
|
+
changed = true;
|
|
3204
|
+
}
|
|
3205
|
+
}
|
|
3206
|
+
}
|
|
3207
|
+
}
|
|
3208
|
+
else {
|
|
3209
|
+
// Use sequential processing for small number of connections
|
|
3210
|
+
for (const neighborId of connections) {
|
|
3211
|
+
const neighbor = this.nouns.get(neighborId);
|
|
3212
|
+
if (!neighbor) {
|
|
3213
|
+
// Skip neighbors that don't exist (expected during rapid additions/deletions)
|
|
3214
|
+
continue;
|
|
3215
|
+
}
|
|
3216
|
+
const distToNeighbor = this.distanceFunction(queryVector, neighbor.vector);
|
|
3217
|
+
if (distToNeighbor < currDist) {
|
|
3218
|
+
currDist = distToNeighbor;
|
|
3219
|
+
currObj = neighbor;
|
|
3220
|
+
changed = true;
|
|
3221
|
+
}
|
|
2892
3222
|
}
|
|
2893
3223
|
}
|
|
2894
3224
|
}
|
|
2895
3225
|
}
|
|
2896
3226
|
// Search at level 0 with ef = k
|
|
2897
|
-
const nearestNouns = this.searchLayer(queryVector, currObj, Math.max(this.config.efSearch, k), 0);
|
|
3227
|
+
const nearestNouns = await this.searchLayer(queryVector, currObj, Math.max(this.config.efSearch, k), 0);
|
|
2898
3228
|
// Convert to array and sort by distance
|
|
2899
3229
|
return [...nearestNouns].slice(0, k);
|
|
2900
3230
|
}
|
|
@@ -3014,7 +3344,7 @@ class HNSWIndex {
|
|
|
3014
3344
|
* Search within a specific layer
|
|
3015
3345
|
* Returns a map of noun IDs to distances, sorted by distance
|
|
3016
3346
|
*/
|
|
3017
|
-
searchLayer(queryVector, entryPoint, ef, level) {
|
|
3347
|
+
async searchLayer(queryVector, entryPoint, ef, level) {
|
|
3018
3348
|
// Set of visited nouns
|
|
3019
3349
|
const visited = new Set([entryPoint.id]);
|
|
3020
3350
|
// Priority queue of candidates (closest first)
|
|
@@ -3040,25 +3370,62 @@ class HNSWIndex {
|
|
|
3040
3370
|
continue;
|
|
3041
3371
|
}
|
|
3042
3372
|
const connections = noun.connections.get(level) || new Set();
|
|
3043
|
-
|
|
3044
|
-
|
|
3045
|
-
|
|
3046
|
-
|
|
3047
|
-
|
|
3048
|
-
|
|
3049
|
-
|
|
3373
|
+
// If we have enough connections and parallelization is enabled, use parallel distance calculation
|
|
3374
|
+
if (this.useParallelization && connections.size >= 10) {
|
|
3375
|
+
// Collect unvisited neighbors
|
|
3376
|
+
const unvisitedNeighbors = [];
|
|
3377
|
+
for (const neighborId of connections) {
|
|
3378
|
+
if (!visited.has(neighborId)) {
|
|
3379
|
+
visited.add(neighborId);
|
|
3380
|
+
const neighbor = this.nouns.get(neighborId);
|
|
3381
|
+
if (!neighbor)
|
|
3382
|
+
continue;
|
|
3383
|
+
unvisitedNeighbors.push({ id: neighborId, vector: neighbor.vector });
|
|
3050
3384
|
}
|
|
3051
|
-
|
|
3052
|
-
|
|
3053
|
-
|
|
3054
|
-
|
|
3055
|
-
|
|
3056
|
-
|
|
3057
|
-
|
|
3058
|
-
|
|
3059
|
-
|
|
3060
|
-
|
|
3061
|
-
|
|
3385
|
+
}
|
|
3386
|
+
if (unvisitedNeighbors.length > 0) {
|
|
3387
|
+
// Calculate distances in parallel
|
|
3388
|
+
const distances = await this.calculateDistancesInParallel(queryVector, unvisitedNeighbors);
|
|
3389
|
+
// Process the results
|
|
3390
|
+
for (const { id, distance } of distances) {
|
|
3391
|
+
// If we haven't found ef nearest neighbors yet, or this neighbor is closer than the farthest one we've found
|
|
3392
|
+
if (nearest.size < ef || distance < farthestInNearest[1]) {
|
|
3393
|
+
candidates.set(id, distance);
|
|
3394
|
+
nearest.set(id, distance);
|
|
3395
|
+
// If we have more than ef neighbors, remove the farthest one
|
|
3396
|
+
if (nearest.size > ef) {
|
|
3397
|
+
const sortedNearest = [...nearest].sort((a, b) => a[1] - b[1]);
|
|
3398
|
+
nearest.clear();
|
|
3399
|
+
for (let i = 0; i < ef; i++) {
|
|
3400
|
+
nearest.set(sortedNearest[i][0], sortedNearest[i][1]);
|
|
3401
|
+
}
|
|
3402
|
+
}
|
|
3403
|
+
}
|
|
3404
|
+
}
|
|
3405
|
+
}
|
|
3406
|
+
}
|
|
3407
|
+
else {
|
|
3408
|
+
// Use sequential processing for small number of connections
|
|
3409
|
+
for (const neighborId of connections) {
|
|
3410
|
+
if (!visited.has(neighborId)) {
|
|
3411
|
+
visited.add(neighborId);
|
|
3412
|
+
const neighbor = this.nouns.get(neighborId);
|
|
3413
|
+
if (!neighbor) {
|
|
3414
|
+
// Skip neighbors that don't exist (expected during rapid additions/deletions)
|
|
3415
|
+
continue;
|
|
3416
|
+
}
|
|
3417
|
+
const distToNeighbor = this.distanceFunction(queryVector, neighbor.vector);
|
|
3418
|
+
// If we haven't found ef nearest neighbors yet, or this neighbor is closer than the farthest one we've found
|
|
3419
|
+
if (nearest.size < ef || distToNeighbor < farthestInNearest[1]) {
|
|
3420
|
+
candidates.set(neighborId, distToNeighbor);
|
|
3421
|
+
nearest.set(neighborId, distToNeighbor);
|
|
3422
|
+
// If we have more than ef neighbors, remove the farthest one
|
|
3423
|
+
if (nearest.size > ef) {
|
|
3424
|
+
const sortedNearest = [...nearest].sort((a, b) => a[1] - b[1]);
|
|
3425
|
+
nearest.clear();
|
|
3426
|
+
for (let i = 0; i < ef; i++) {
|
|
3427
|
+
nearest.set(sortedNearest[i][0], sortedNearest[i][1]);
|
|
3428
|
+
}
|
|
3062
3429
|
}
|
|
3063
3430
|
}
|
|
3064
3431
|
}
|
|
@@ -3361,7 +3728,7 @@ class HNSWIndexOptimized extends HNSWIndex {
|
|
|
3361
3728
|
* Add a vector to the index
|
|
3362
3729
|
* Uses product quantization if enabled and memory threshold is exceeded
|
|
3363
3730
|
*/
|
|
3364
|
-
addItem(item) {
|
|
3731
|
+
async addItem(item) {
|
|
3365
3732
|
// Check if item is defined
|
|
3366
3733
|
if (!item) {
|
|
3367
3734
|
throw new Error('Item is undefined or null');
|
|
@@ -3397,7 +3764,7 @@ class HNSWIndexOptimized extends HNSWIndex {
|
|
|
3397
3764
|
// Reconstruct the vector for indexing
|
|
3398
3765
|
const reconstructedVector = this.productQuantizer.reconstruct(codes);
|
|
3399
3766
|
// Add the reconstructed vector to the index
|
|
3400
|
-
return super.addItem({ id, vector: reconstructedVector });
|
|
3767
|
+
return await super.addItem({ id, vector: reconstructedVector });
|
|
3401
3768
|
}
|
|
3402
3769
|
// If disk-based index is active and storage is available, store the vector
|
|
3403
3770
|
if (this.useDiskBasedIndex && this.storage) {
|
|
@@ -3413,13 +3780,13 @@ class HNSWIndexOptimized extends HNSWIndex {
|
|
|
3413
3780
|
});
|
|
3414
3781
|
}
|
|
3415
3782
|
// Add the vector to the in-memory index
|
|
3416
|
-
return super.addItem(item);
|
|
3783
|
+
return await super.addItem(item);
|
|
3417
3784
|
}
|
|
3418
3785
|
/**
|
|
3419
3786
|
* Search for nearest neighbors
|
|
3420
3787
|
* Uses product quantization if enabled
|
|
3421
3788
|
*/
|
|
3422
|
-
search(queryVector, k = 10) {
|
|
3789
|
+
async search(queryVector, k = 10) {
|
|
3423
3790
|
// Check if query vector is defined
|
|
3424
3791
|
if (!queryVector) {
|
|
3425
3792
|
throw new Error('Query vector is undefined or null');
|
|
@@ -3433,10 +3800,10 @@ class HNSWIndexOptimized extends HNSWIndex {
|
|
|
3433
3800
|
// Reconstruct the query vector
|
|
3434
3801
|
const reconstructedVector = this.productQuantizer.reconstruct(codes);
|
|
3435
3802
|
// Search with the reconstructed vector
|
|
3436
|
-
return super.search(reconstructedVector, k);
|
|
3803
|
+
return await super.search(reconstructedVector, k);
|
|
3437
3804
|
}
|
|
3438
3805
|
// Otherwise, use the standard search
|
|
3439
|
-
return super.search(queryVector, k);
|
|
3806
|
+
return await super.search(queryVector, k);
|
|
3440
3807
|
}
|
|
3441
3808
|
/**
|
|
3442
3809
|
* Remove an item from the index
|
|
@@ -7001,7 +7368,7 @@ class BrainyData {
|
|
|
7001
7368
|
this.index.clear();
|
|
7002
7369
|
for (const noun of nouns) {
|
|
7003
7370
|
// Add to index
|
|
7004
|
-
this.index.addItem({
|
|
7371
|
+
await this.index.addItem({
|
|
7005
7372
|
id: noun.id,
|
|
7006
7373
|
vector: noun.vector
|
|
7007
7374
|
});
|
|
@@ -7087,7 +7454,7 @@ class BrainyData {
|
|
|
7087
7454
|
? metadata.id
|
|
7088
7455
|
: v4());
|
|
7089
7456
|
// Add to index
|
|
7090
|
-
this.index.addItem({ id, vector });
|
|
7457
|
+
await this.index.addItem({ id, vector });
|
|
7091
7458
|
// Get the noun from the index
|
|
7092
7459
|
const noun = this.index.getNouns().get(id);
|
|
7093
7460
|
if (!noun) {
|
|
@@ -7178,11 +7545,19 @@ class BrainyData {
|
|
|
7178
7545
|
await this.ensureInitialized();
|
|
7179
7546
|
// Check if database is in read-only mode
|
|
7180
7547
|
this.checkReadOnly();
|
|
7181
|
-
|
|
7548
|
+
// Default concurrency to 4 if not specified
|
|
7549
|
+
const concurrency = options.concurrency || 4;
|
|
7182
7550
|
try {
|
|
7183
|
-
|
|
7184
|
-
|
|
7185
|
-
|
|
7551
|
+
// Process items in batches to control concurrency
|
|
7552
|
+
const ids = [];
|
|
7553
|
+
const itemsToProcess = [...items]; // Create a copy to avoid modifying the original array
|
|
7554
|
+
while (itemsToProcess.length > 0) {
|
|
7555
|
+
// Take up to 'concurrency' items to process in parallel
|
|
7556
|
+
const batch = itemsToProcess.splice(0, concurrency);
|
|
7557
|
+
// Process this batch in parallel
|
|
7558
|
+
const batchResults = await Promise.all(batch.map(item => this.add(item.vectorOrData, item.metadata, options)));
|
|
7559
|
+
// Add the results to our ids array
|
|
7560
|
+
ids.push(...batchResults);
|
|
7186
7561
|
}
|
|
7187
7562
|
return ids;
|
|
7188
7563
|
}
|
|
@@ -7240,7 +7615,7 @@ class BrainyData {
|
|
|
7240
7615
|
// If no noun types specified, search all nouns
|
|
7241
7616
|
if (!nounTypes || nounTypes.length === 0) {
|
|
7242
7617
|
// Search in the index
|
|
7243
|
-
const results = this.index.search(queryVector, k);
|
|
7618
|
+
const results = await this.index.search(queryVector, k);
|
|
7244
7619
|
// Get metadata for each result
|
|
7245
7620
|
const searchResults = [];
|
|
7246
7621
|
for (const [id, score] of results) {
|
|
@@ -7633,7 +8008,7 @@ class BrainyData {
|
|
|
7633
8008
|
metadata: options.metadata
|
|
7634
8009
|
};
|
|
7635
8010
|
// Add to index
|
|
7636
|
-
this.index.addItem({ id, vector: verbVector });
|
|
8011
|
+
await this.index.addItem({ id, vector: verbVector });
|
|
7637
8012
|
// Get the noun from the index
|
|
7638
8013
|
const indexNoun = this.index.getNouns().get(id);
|
|
7639
8014
|
if (!indexNoun) {
|
|
@@ -8373,7 +8748,7 @@ class BrainyData {
|
|
|
8373
8748
|
// Re-add all nouns to the index
|
|
8374
8749
|
for (const noun of data.nouns) {
|
|
8375
8750
|
if (noun.vector && noun.vector.length > 0) {
|
|
8376
|
-
this.index.addItem({ id: noun.id, vector: noun.vector });
|
|
8751
|
+
await this.index.addItem({ id: noun.id, vector: noun.vector });
|
|
8377
8752
|
}
|
|
8378
8753
|
}
|
|
8379
8754
|
console.log('HNSW index reconstruction complete');
|
|
@@ -10230,177 +10605,6 @@ var s3CompatibleStorage = /*#__PURE__*/Object.freeze({
|
|
|
10230
10605
|
S3CompatibleStorage: S3CompatibleStorage
|
|
10231
10606
|
});
|
|
10232
10607
|
|
|
10233
|
-
/**
|
|
10234
|
-
* Utility functions for environment detection
|
|
10235
|
-
*/
|
|
10236
|
-
/**
|
|
10237
|
-
* Check if code is running in a browser environment
|
|
10238
|
-
*/
|
|
10239
|
-
function isBrowser$1() {
|
|
10240
|
-
return typeof window !== 'undefined' && typeof document !== 'undefined';
|
|
10241
|
-
}
|
|
10242
|
-
/**
|
|
10243
|
-
* Check if code is running in a Node.js environment
|
|
10244
|
-
*/
|
|
10245
|
-
function isNode() {
|
|
10246
|
-
return typeof process !== 'undefined' &&
|
|
10247
|
-
process.versions != null &&
|
|
10248
|
-
process.versions.node != null;
|
|
10249
|
-
}
|
|
10250
|
-
/**
|
|
10251
|
-
* Check if Web Workers are available in the current environment
|
|
10252
|
-
*/
|
|
10253
|
-
function areWebWorkersAvailable() {
|
|
10254
|
-
return isBrowser$1() && typeof Worker !== 'undefined';
|
|
10255
|
-
}
|
|
10256
|
-
/**
|
|
10257
|
-
* Check if Worker Threads are available in the current environment (Node.js)
|
|
10258
|
-
*/
|
|
10259
|
-
function areWorkerThreadsAvailable() {
|
|
10260
|
-
if (!isNode())
|
|
10261
|
-
return false;
|
|
10262
|
-
try {
|
|
10263
|
-
// Dynamic import to avoid errors in browser environments
|
|
10264
|
-
require('worker_threads');
|
|
10265
|
-
return true;
|
|
10266
|
-
}
|
|
10267
|
-
catch (e) {
|
|
10268
|
-
return false;
|
|
10269
|
-
}
|
|
10270
|
-
}
|
|
10271
|
-
/**
|
|
10272
|
-
* Determine if threading is available in the current environment
|
|
10273
|
-
*/
|
|
10274
|
-
function isThreadingAvailable() {
|
|
10275
|
-
return areWebWorkersAvailable() || areWorkerThreadsAvailable();
|
|
10276
|
-
}
|
|
10277
|
-
|
|
10278
|
-
/**
|
|
10279
|
-
* Utility functions for working with Web Workers and Worker Threads
|
|
10280
|
-
*/
|
|
10281
|
-
/**
|
|
10282
|
-
* Execute a function in a Web Worker (browser environment)
|
|
10283
|
-
*
|
|
10284
|
-
* @param fn The function to execute
|
|
10285
|
-
* @param args The arguments to pass to the function
|
|
10286
|
-
* @returns A promise that resolves with the result of the function
|
|
10287
|
-
*/
|
|
10288
|
-
function executeInWebWorker(fn, ...args) {
|
|
10289
|
-
return new Promise((resolve, reject) => {
|
|
10290
|
-
// Create a blob URL for the worker script
|
|
10291
|
-
const fnString = fn.toString();
|
|
10292
|
-
const workerScript = `
|
|
10293
|
-
self.onmessage = function(e) {
|
|
10294
|
-
try {
|
|
10295
|
-
const fn = ${fnString};
|
|
10296
|
-
const result = fn(...e.data);
|
|
10297
|
-
self.postMessage({ success: true, data: result });
|
|
10298
|
-
} catch (error) {
|
|
10299
|
-
self.postMessage({
|
|
10300
|
-
success: false,
|
|
10301
|
-
error: error instanceof Error ? error.message : String(error)
|
|
10302
|
-
});
|
|
10303
|
-
}
|
|
10304
|
-
};
|
|
10305
|
-
`;
|
|
10306
|
-
const blob = new Blob([workerScript], { type: 'application/javascript' });
|
|
10307
|
-
const blobURL = URL.createObjectURL(blob);
|
|
10308
|
-
// Create a worker
|
|
10309
|
-
const worker = new Worker(blobURL);
|
|
10310
|
-
// Set up message handling
|
|
10311
|
-
worker.onmessage = function (e) {
|
|
10312
|
-
URL.revokeObjectURL(blobURL); // Clean up
|
|
10313
|
-
worker.terminate(); // Terminate the worker
|
|
10314
|
-
if (e.data.success) {
|
|
10315
|
-
resolve(e.data.data);
|
|
10316
|
-
}
|
|
10317
|
-
else {
|
|
10318
|
-
reject(new Error(e.data.error));
|
|
10319
|
-
}
|
|
10320
|
-
};
|
|
10321
|
-
worker.onerror = function (error) {
|
|
10322
|
-
URL.revokeObjectURL(blobURL); // Clean up
|
|
10323
|
-
worker.terminate(); // Terminate the worker
|
|
10324
|
-
reject(error);
|
|
10325
|
-
};
|
|
10326
|
-
// Start the worker
|
|
10327
|
-
worker.postMessage(args);
|
|
10328
|
-
});
|
|
10329
|
-
}
|
|
10330
|
-
/**
|
|
10331
|
-
* Execute a function in a Worker Thread (Node.js environment)
|
|
10332
|
-
*
|
|
10333
|
-
* @param fn The function to execute
|
|
10334
|
-
* @param args The arguments to pass to the function
|
|
10335
|
-
* @returns A promise that resolves with the result of the function
|
|
10336
|
-
*/
|
|
10337
|
-
function executeInWorkerThread(fn, ...args) {
|
|
10338
|
-
return new Promise((resolve, reject) => {
|
|
10339
|
-
try {
|
|
10340
|
-
// Dynamic import to avoid errors in browser environments
|
|
10341
|
-
const { Worker } = require('worker_threads');
|
|
10342
|
-
// Create a worker script
|
|
10343
|
-
const fnString = fn.toString();
|
|
10344
|
-
const workerScript = `
|
|
10345
|
-
const { parentPort } = require('worker_threads');
|
|
10346
|
-
|
|
10347
|
-
parentPort.once('message', (data) => {
|
|
10348
|
-
try {
|
|
10349
|
-
const fn = ${fnString};
|
|
10350
|
-
const result = fn(...data);
|
|
10351
|
-
parentPort.postMessage({ success: true, data: result });
|
|
10352
|
-
} catch (error) {
|
|
10353
|
-
parentPort.postMessage({
|
|
10354
|
-
success: false,
|
|
10355
|
-
error: error instanceof Error ? error.message : String(error)
|
|
10356
|
-
});
|
|
10357
|
-
}
|
|
10358
|
-
});
|
|
10359
|
-
`;
|
|
10360
|
-
// Create a worker
|
|
10361
|
-
const worker = new Worker(workerScript, { eval: true });
|
|
10362
|
-
// Set up message handling
|
|
10363
|
-
worker.on('message', (data) => {
|
|
10364
|
-
worker.terminate(); // Terminate the worker
|
|
10365
|
-
if (data.success) {
|
|
10366
|
-
resolve(data.data);
|
|
10367
|
-
}
|
|
10368
|
-
else {
|
|
10369
|
-
reject(new Error(data.error));
|
|
10370
|
-
}
|
|
10371
|
-
});
|
|
10372
|
-
worker.on('error', (error) => {
|
|
10373
|
-
worker.terminate(); // Terminate the worker
|
|
10374
|
-
reject(error);
|
|
10375
|
-
});
|
|
10376
|
-
// Start the worker
|
|
10377
|
-
worker.postMessage(args);
|
|
10378
|
-
}
|
|
10379
|
-
catch (error) {
|
|
10380
|
-
reject(error);
|
|
10381
|
-
}
|
|
10382
|
-
});
|
|
10383
|
-
}
|
|
10384
|
-
/**
|
|
10385
|
-
* Execute a function in a separate thread based on the environment
|
|
10386
|
-
*
|
|
10387
|
-
* @param fn The function to execute
|
|
10388
|
-
* @param args The arguments to pass to the function
|
|
10389
|
-
* @returns A promise that resolves with the result of the function
|
|
10390
|
-
*/
|
|
10391
|
-
function executeInThread(fn, ...args) {
|
|
10392
|
-
if (isBrowser$1()) {
|
|
10393
|
-
return executeInWebWorker(fn, ...args);
|
|
10394
|
-
}
|
|
10395
|
-
else if (isNode()) {
|
|
10396
|
-
return executeInWorkerThread(fn, ...args);
|
|
10397
|
-
}
|
|
10398
|
-
else {
|
|
10399
|
-
// Fall back to executing in the main thread
|
|
10400
|
-
return Promise.resolve(fn(...args));
|
|
10401
|
-
}
|
|
10402
|
-
}
|
|
10403
|
-
|
|
10404
10608
|
/**
|
|
10405
10609
|
* Augmentation Registry
|
|
10406
10610
|
*
|
|
@@ -10996,7 +11200,7 @@ class Pipeline {
|
|
|
10996
11200
|
const augFn = augmentation[method];
|
|
10997
11201
|
return augFn.apply(augmentation, workerArgs);
|
|
10998
11202
|
};
|
|
10999
|
-
methodPromise = executeInThread(workerFn,
|
|
11203
|
+
methodPromise = executeInThread(workerFn.toString(), args);
|
|
11000
11204
|
}
|
|
11001
11205
|
catch (threadError) {
|
|
11002
11206
|
console.warn(`Failed to execute in thread, falling back to main thread: ${threadError}`);
|
|
@@ -11658,7 +11862,7 @@ class AugmentationPipeline {
|
|
|
11658
11862
|
const augFn = augmentation[method];
|
|
11659
11863
|
return augFn.apply(augmentation, workerArgs);
|
|
11660
11864
|
};
|
|
11661
|
-
methodPromise = executeInThread(workerFn,
|
|
11865
|
+
methodPromise = executeInThread(workerFn.toString(), args);
|
|
11662
11866
|
}
|
|
11663
11867
|
catch (threadError) {
|
|
11664
11868
|
console.warn(`Failed to execute in thread, falling back to main thread: ${threadError}`);
|
|
@@ -116925,5 +117129,5 @@ var _child_processShim = /*#__PURE__*/Object.freeze({
|
|
|
116925
117129
|
__proto__: null
|
|
116926
117130
|
});
|
|
116927
117131
|
|
|
116928
|
-
export { AugmentationType, BrainyData, BrainyMCPAdapter, BrainyMCPService, ExecutionMode$1 as ExecutionMode, FileSystemStorage, FileSystemStorageAugmentation, HNSWIndex, HNSWIndexOptimized, MCPAugmentationToolset, MCPRequestType, MCP_VERSION, MemoryStorage, MemoryStorageAugmentation, NounType, OPFSStorage, OPFSStorageAugmentation, Pipeline, S3CompatibleStorage as R2Storage, S3CompatibleStorage, SequentialPipeline, ServerSearchActivationAugmentation, ServerSearchConduitAugmentation, StreamlinedExecutionMode, UniversalSentenceEncoder$1 as UniversalSentenceEncoder, VerbType, WebRTCConduitAugmentation, WebSocketConduitAugmentation, addWebSocketSupport, augmentationPipeline$1 as augmentationPipeline, availableAugmentations, cosineDistance$1 as cosineDistance, createAugmentationRegistryPlugin, createAugmentationRegistryRollupPlugin, createConduitAugmentation, createEmbeddingFunction, createMemoryAugmentation, createPipeline, createSenseAugmentation, createServerSearchAugmentations, createStorage, createStreamingPipeline, createTensorFlowEmbeddingFunction, defaultEmbeddingFunction, dotProductDistance, environment, euclideanDistance, executeAugmentation, executeByType, executeSingle, executeStreamlined, getAugmentationsByType, initializeAugmentationPipeline, loadAugmentationModule, loadAugmentationsFromModules, manhattanDistance, pipeline, processStaticData, processStreamingData, registerAugmentation, sequentialPipeline, setAugmentationEnabled };
|
|
117132
|
+
export { AugmentationType, BrainyData, BrainyMCPAdapter, BrainyMCPService, ExecutionMode$1 as ExecutionMode, FileSystemStorage, FileSystemStorageAugmentation, HNSWIndex, HNSWIndexOptimized, MCPAugmentationToolset, MCPRequestType, MCP_VERSION, MemoryStorage, MemoryStorageAugmentation, NounType, OPFSStorage, OPFSStorageAugmentation, Pipeline, S3CompatibleStorage as R2Storage, S3CompatibleStorage, SequentialPipeline, ServerSearchActivationAugmentation, ServerSearchConduitAugmentation, StreamlinedExecutionMode, UniversalSentenceEncoder$1 as UniversalSentenceEncoder, VerbType, WebRTCConduitAugmentation, WebSocketConduitAugmentation, addWebSocketSupport, augmentationPipeline$1 as augmentationPipeline, availableAugmentations, cosineDistance$1 as cosineDistance, createAugmentationRegistryPlugin, createAugmentationRegistryRollupPlugin, createConduitAugmentation, createEmbeddingFunction, createMemoryAugmentation, createPipeline, createSenseAugmentation, createServerSearchAugmentations, createStorage, createStreamingPipeline, createTensorFlowEmbeddingFunction, createThreadedEmbeddingFunction, defaultEmbeddingFunction, dotProductDistance, environment, euclideanDistance, executeAugmentation, executeByType, executeSingle, executeStreamlined, getAugmentationsByType, initializeAugmentationPipeline, loadAugmentationModule, loadAugmentationsFromModules, manhattanDistance, pipeline, processStaticData, processStreamingData, registerAugmentation, sequentialPipeline, setAugmentationEnabled };
|
|
116929
117133
|
//# sourceMappingURL=brainy.js.map
|