@soulcraft/brainy 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/README.md +2 -43
  2. package/dist/brainy.js +776 -252
  3. package/dist/brainy.min.js +765 -749
  4. package/dist/brainyData.d.ts +29 -0
  5. package/dist/patched-platform-node.d.ts +17 -0
  6. package/dist/setup.d.ts +16 -0
  7. package/dist/storage/fileSystemStorage.d.ts.map +1 -1
  8. package/dist/unified.d.ts +5 -3
  9. package/dist/unified.js +775 -253
  10. package/dist/unified.min.js +765 -749
  11. package/dist/utils/distance.d.ts.map +1 -1
  12. package/dist/utils/embedding.d.ts +25 -4
  13. package/dist/utils/embedding.d.ts.map +1 -1
  14. package/dist/utils/environment.d.ts.map +1 -1
  15. package/dist/utils/textEncoding.d.ts +13 -20
  16. package/dist/utils/textEncoding.d.ts.map +1 -1
  17. package/dist/utils/version.d.ts +1 -1
  18. package/package.json +30 -11
  19. package/dist/augmentations/huggingfaceActivation.d.ts +0 -57
  20. package/dist/augmentations/huggingfaceActivation.d.ts.map +0 -1
  21. package/dist/augmentations/huggingfaceActivationRegistration.d.ts +0 -6
  22. package/dist/augmentations/huggingfaceActivationRegistration.d.ts.map +0 -1
  23. package/dist/cli.d.ts +0 -7
  24. package/dist/utils/distance-js.d.ts +0 -38
  25. package/dist/utils/distance-js.d.ts.map +0 -1
  26. package/dist/utils/distance-wasm.d.ts +0 -36
  27. package/dist/utils/distance-wasm.d.ts.map +0 -1
  28. package/dist/utils/huggingfaceEmbedding.d.ts +0 -53
  29. package/dist/utils/huggingfaceEmbedding.d.ts.map +0 -1
  30. package/dist/utils/tensorflowBridge.d.ts +0 -14
  31. package/dist/utils/tensorflowBridge.d.ts.map +0 -1
  32. package/dist/utils/textEncoderPolyfill.d.ts +0 -6
  33. package/dist/utils/textEncoderPolyfill.d.ts.map +0 -1
  34. package/dist/utils/universalDebug.d.ts +0 -21
  35. package/dist/utils/universalDebug.d.ts.map +0 -1
  36. package/dist/utils/universalUuid.d.ts +0 -26
  37. package/dist/utils/universalUuid.d.ts.map +0 -1
package/dist/brainy.js CHANGED
@@ -2397,86 +2397,400 @@ ieee754.write = function (buffer, value, offset, isLE, mLen, nBytes) {
2397
2397
  } (buffer$1));
2398
2398
 
2399
2399
  /**
2400
- * Unified Text Encoding Utilities
2401
- *
2402
- * This module provides a consistent way to handle text encoding/decoding across all environments
2403
- * using the native TextEncoder/TextDecoder APIs.
2400
+ * Utility functions for environment detection
2401
+ */
2402
+ /**
2403
+ * Check if code is running in a browser environment
2404
+ */
2405
+ function isBrowser$1() {
2406
+ return typeof window !== 'undefined' && typeof document !== 'undefined';
2407
+ }
2408
+ /**
2409
+ * Check if code is running in a Node.js environment
2410
+ */
2411
+ function isNode() {
2412
+ // If browser environment is detected, prioritize it over Node.js
2413
+ // This handles cases like jsdom where both window and process exist
2414
+ if (isBrowser$1()) {
2415
+ return false;
2416
+ }
2417
+ return (typeof process !== 'undefined' &&
2418
+ process.versions != null &&
2419
+ process.versions.node != null);
2420
+ }
2421
+ /**
2422
+ * Check if code is running in a Web Worker environment
2423
+ */
2424
+ function isWebWorker() {
2425
+ return (typeof self === 'object' &&
2426
+ self.constructor &&
2427
+ self.constructor.name === 'DedicatedWorkerGlobalScope');
2428
+ }
2429
+ /**
2430
+ * Check if Web Workers are available in the current environment
2431
+ */
2432
+ function areWebWorkersAvailable() {
2433
+ return isBrowser$1() && typeof Worker !== 'undefined';
2434
+ }
2435
+ /**
2436
+ * Check if Worker Threads are available in the current environment (Node.js)
2437
+ */
2438
+ async function areWorkerThreadsAvailable() {
2439
+ if (!isNode())
2440
+ return false;
2441
+ try {
2442
+ // Use dynamic import to avoid errors in browser environments
2443
+ await import('worker_threads');
2444
+ return true;
2445
+ }
2446
+ catch (e) {
2447
+ return false;
2448
+ }
2449
+ }
2450
+ /**
2451
+ * Synchronous version that doesn't actually try to load the module
2452
+ * This is safer in ES module environments
2453
+ */
2454
+ function areWorkerThreadsAvailableSync() {
2455
+ if (!isNode())
2456
+ return false;
2457
+ // In Node.js 24.4.0+, worker_threads is always available
2458
+ return parseInt(process.versions.node.split('.')[0]) >= 24;
2459
+ }
2460
+ /**
2461
+ * Determine if threading is available in the current environment
2462
+ * Returns true if either Web Workers (browser) or Worker Threads (Node.js) are available
2463
+ */
2464
+ function isThreadingAvailable() {
2465
+ return areWebWorkersAvailable() || areWorkerThreadsAvailableSync();
2466
+ }
2467
+ /**
2468
+ * Async version of isThreadingAvailable
2404
2469
  */
2470
+ async function isThreadingAvailableAsync() {
2471
+ return areWebWorkersAvailable() || (await areWorkerThreadsAvailable());
2472
+ }
2473
+
2405
2474
  /**
2406
- * Get a text encoder that works in the current environment
2407
- * @returns A TextEncoder instance
2475
+ * Flag to track if the patch has been applied
2408
2476
  */
2477
+ let patchApplied = false;
2409
2478
  /**
2410
- * Apply the TensorFlow.js platform patch if needed
2411
- * This function patches the global object to provide a PlatformNode class
2412
- * that uses native TextEncoder/TextDecoder
2479
+ * Monkeypatch TensorFlow.js's PlatformNode class to fix TextEncoder/TextDecoder issues
2480
+ * CRITICAL: This runs immediately at the top level when this module is imported
2413
2481
  */
2414
- function applyTensorFlowPatch() {
2482
+ if (typeof globalThis !== 'undefined' && isNode()) {
2415
2483
  try {
2416
- // Define a custom Platform class that works in both Node.js and browser environments
2417
- class Platform {
2418
- constructor() {
2419
- // Create a util object with necessary methods and constructors
2420
- this.util = {
2421
- // Use native TextEncoder and TextDecoder
2484
+ // Ensure TextEncoder/TextDecoder are globally available
2485
+ if (typeof globalThis.TextEncoder === 'undefined') {
2486
+ globalThis.TextEncoder = TextEncoder;
2487
+ }
2488
+ if (typeof globalThis.TextDecoder === 'undefined') {
2489
+ globalThis.TextDecoder = TextDecoder;
2490
+ }
2491
+ // Patch global objects to handle the TensorFlow.js constructor issue
2492
+ // This is needed because TF accesses TextEncoder/TextDecoder as constructors via this.util
2493
+ if (typeof global !== 'undefined') {
2494
+ if (!global.TextEncoder) {
2495
+ global.TextEncoder = TextEncoder;
2496
+ }
2497
+ if (!global.TextDecoder) {
2498
+ global.TextDecoder = TextDecoder;
2499
+ }
2500
+ // Also set the special global constructors that TensorFlow can use safely
2501
+ global.__TextEncoder__ = TextEncoder;
2502
+ global.__TextDecoder__ = TextDecoder;
2503
+ }
2504
+ // CRITICAL FIX: Create a custom util object that TensorFlow.js can use
2505
+ // We'll make this available globally so TensorFlow.js can find it
2506
+ const customUtil = {
2507
+ TextEncoder: TextEncoder,
2508
+ TextDecoder: TextDecoder,
2509
+ types: {
2510
+ isFloat32Array: (arr) => arr instanceof Float32Array,
2511
+ isInt32Array: (arr) => arr instanceof Int32Array,
2512
+ isUint8Array: (arr) => arr instanceof Uint8Array,
2513
+ isUint8ClampedArray: (arr) => arr instanceof Uint8ClampedArray
2514
+ }
2515
+ };
2516
+ // Make the custom util available globally
2517
+ if (typeof global !== 'undefined') {
2518
+ global.__brainy_util__ = customUtil;
2519
+ }
2520
+ // Try to patch the global require cache if possible
2521
+ if (typeof global !== 'undefined' &&
2522
+ global.require &&
2523
+ global.require.cache) {
2524
+ // Find the util module in the cache and patch it
2525
+ for (const key in global.require.cache) {
2526
+ if (key.endsWith('/util.js') || key === 'util') {
2527
+ const utilModule = global.require.cache[key];
2528
+ if (utilModule && utilModule.exports) {
2529
+ Object.assign(utilModule.exports, customUtil);
2530
+ }
2531
+ }
2532
+ }
2533
+ }
2534
+ // CRITICAL: Patch the Node.js util module directly
2535
+ try {
2536
+ const util = require('util');
2537
+ // Ensure TextEncoder and TextDecoder are available as constructors
2538
+ util.TextEncoder = TextEncoder;
2539
+ util.TextDecoder = TextDecoder;
2540
+ }
2541
+ catch (error) {
2542
+ // Ignore if util module is not available
2543
+ }
2544
+ // CRITICAL: Patch Float32Array to handle buffer alignment issues
2545
+ // This fixes the "byte length of Float32Array should be a multiple of 4" error
2546
+ if (typeof global !== 'undefined') {
2547
+ const originalFloat32Array = global.Float32Array;
2548
+ global.Float32Array = class extends originalFloat32Array {
2549
+ constructor(arg, byteOffset, length) {
2550
+ if (arg instanceof ArrayBuffer) {
2551
+ // Ensure buffer is properly aligned for Float32Array (multiple of 4 bytes)
2552
+ const alignedByteOffset = byteOffset || 0;
2553
+ const alignedLength = length !== undefined
2554
+ ? length
2555
+ : (arg.byteLength - alignedByteOffset) / 4;
2556
+ // Check if the buffer slice is properly aligned
2557
+ if ((arg.byteLength - alignedByteOffset) % 4 !== 0 &&
2558
+ length === undefined) {
2559
+ // Create a new aligned buffer if the original isn't properly aligned
2560
+ const alignedByteLength = Math.floor((arg.byteLength - alignedByteOffset) / 4) * 4;
2561
+ const alignedBuffer = new ArrayBuffer(alignedByteLength);
2562
+ const sourceView = new Uint8Array(arg, alignedByteOffset, alignedByteLength);
2563
+ const targetView = new Uint8Array(alignedBuffer);
2564
+ targetView.set(sourceView);
2565
+ super(alignedBuffer);
2566
+ }
2567
+ else {
2568
+ super(arg, alignedByteOffset, alignedLength);
2569
+ }
2570
+ }
2571
+ else {
2572
+ super(arg, byteOffset, length);
2573
+ }
2574
+ }
2575
+ };
2576
+ // Preserve static methods and properties
2577
+ Object.setPrototypeOf(global.Float32Array, originalFloat32Array);
2578
+ Object.defineProperty(global.Float32Array, 'name', {
2579
+ value: 'Float32Array'
2580
+ });
2581
+ Object.defineProperty(global.Float32Array, 'BYTES_PER_ELEMENT', {
2582
+ value: 4
2583
+ });
2584
+ }
2585
+ // CRITICAL: Patch any empty util shims that bundlers might create
2586
+ // This handles cases where bundlers provide empty shims for Node.js modules
2587
+ if (typeof global !== 'undefined') {
2588
+ // Look for common patterns of util shims in bundled code
2589
+ const checkAndPatchUtilShim = (obj) => {
2590
+ if (obj && typeof obj === 'object' && !obj.TextEncoder) {
2591
+ obj.TextEncoder = TextEncoder;
2592
+ obj.TextDecoder = TextDecoder;
2593
+ obj.types = obj.types || {
2594
+ isFloat32Array: (arr) => arr instanceof Float32Array,
2595
+ isInt32Array: (arr) => arr instanceof Int32Array,
2596
+ isUint8Array: (arr) => arr instanceof Uint8Array,
2597
+ isUint8ClampedArray: (arr) => arr instanceof Uint8ClampedArray
2598
+ };
2599
+ }
2600
+ };
2601
+ // Patch any existing util-like objects in global scope
2602
+ if (global._utilShim) {
2603
+ checkAndPatchUtilShim(global._utilShim);
2604
+ }
2605
+ // CRITICAL: Patch the bundled util shim directly
2606
+ // In bundled code, there's often a _utilShim object that needs patching
2607
+ if (typeof globalThis !== 'undefined' &&
2608
+ globalThis._utilShim) {
2609
+ checkAndPatchUtilShim(globalThis._utilShim);
2610
+ }
2611
+ // CRITICAL: Create and patch a global _utilShim if it doesn't exist
2612
+ // This ensures the bundled code will find the patched version
2613
+ if (!global._utilShim) {
2614
+ global._utilShim = {
2422
2615
  TextEncoder: TextEncoder,
2423
- TextDecoder: TextDecoder
2616
+ TextDecoder: TextDecoder,
2617
+ types: {
2618
+ isFloat32Array: (arr) => arr instanceof Float32Array,
2619
+ isInt32Array: (arr) => arr instanceof Int32Array,
2620
+ isUint8Array: (arr) => arr instanceof Uint8Array,
2621
+ isUint8ClampedArray: (arr) => arr instanceof Uint8ClampedArray
2622
+ }
2424
2623
  };
2425
- // Initialize using native constructors
2426
- this.textEncoder = new TextEncoder();
2427
- this.textDecoder = new TextDecoder();
2428
- }
2429
- // Define isFloat32Array directly on the instance
2430
- isFloat32Array(arr) {
2431
- return !!(arr instanceof Float32Array ||
2432
- (arr &&
2433
- Object.prototype.toString.call(arr) === '[object Float32Array]'));
2434
- }
2435
- // Define isTypedArray directly on the instance
2436
- isTypedArray(arr) {
2437
- return !!(ArrayBuffer.isView(arr) && !(arr instanceof DataView));
2438
- }
2439
- }
2440
- // Get the global object in a way that works in both Node.js and browser
2441
- const globalObj = typeof global !== 'undefined'
2442
- ? global
2443
- : typeof window !== 'undefined'
2444
- ? window
2445
- : typeof self !== 'undefined'
2446
- ? self
2447
- : {};
2448
- // Only apply in Node.js environment
2449
- if (typeof process !== 'undefined' &&
2450
- process.versions &&
2451
- process.versions.node) {
2452
- // Assign the Platform class to the global object as PlatformNode for Node.js
2453
- ;
2454
- globalObj.PlatformNode = Platform;
2455
- globalObj.platformNode = new Platform();
2456
- }
2457
- else if (typeof window !== 'undefined' || typeof self !== 'undefined') {
2458
- // In browser environments, we might need to provide similar functionality
2459
- // but we'll use a different name to avoid conflicts
2460
- ;
2461
- globalObj.PlatformBrowser = Platform;
2462
- globalObj.platformBrowser = new Platform();
2624
+ }
2625
+ else {
2626
+ checkAndPatchUtilShim(global._utilShim);
2627
+ }
2628
+ // Also ensure it's available on globalThis
2629
+ if (typeof globalThis !== 'undefined' &&
2630
+ !globalThis._utilShim) {
2631
+ ;
2632
+ globalThis._utilShim = global._utilShim;
2633
+ }
2634
+ // Set up a property descriptor to catch util shim assignments
2635
+ try {
2636
+ Object.defineProperty(global, '_utilShim', {
2637
+ get() {
2638
+ return this.__utilShim || {};
2639
+ },
2640
+ set(value) {
2641
+ checkAndPatchUtilShim(value);
2642
+ this.__utilShim = value;
2643
+ },
2644
+ configurable: true
2645
+ });
2646
+ }
2647
+ catch (e) {
2648
+ // Ignore if property can't be defined
2649
+ }
2650
+ // Also set up property descriptor on globalThis
2651
+ try {
2652
+ Object.defineProperty(globalThis, '_utilShim', {
2653
+ get() {
2654
+ return this.__utilShim || {};
2655
+ },
2656
+ set(value) {
2657
+ checkAndPatchUtilShim(value);
2658
+ this.__utilShim = value;
2659
+ },
2660
+ configurable: true
2661
+ });
2662
+ }
2663
+ catch (e) {
2664
+ // Ignore if property can't be defined
2665
+ }
2666
+ }
2667
+ console.log('Brainy: Successfully patched TensorFlow.js PlatformNode at module load time');
2668
+ patchApplied = true;
2669
+ }
2670
+ catch (error) {
2671
+ console.warn('Brainy: Failed to apply early TensorFlow.js platform patch:', error);
2672
+ }
2673
+ }
2674
+ /**
2675
+ * Apply the TensorFlow.js platform patch if it hasn't been applied already
2676
+ * This is a safety measure in case the module-level patch didn't run
2677
+ * Now works across all environments: browser, Node.js, and serverless/server
2678
+ */
2679
+ async function applyTensorFlowPatch() {
2680
+ // Apply patches for all non-browser environments that might need TensorFlow.js compatibility
2681
+ // This includes Node.js, serverless environments, and other server environments
2682
+ const isBrowserEnv = typeof window !== 'undefined' && typeof document !== 'undefined';
2683
+ if (isBrowserEnv) {
2684
+ return; // Browser environments don't need these patches
2685
+ }
2686
+ // Get the appropriate global object for the current environment
2687
+ const globalObj = (() => {
2688
+ if (typeof globalThis !== 'undefined')
2689
+ return globalThis;
2690
+ if (typeof global !== 'undefined')
2691
+ return global;
2692
+ if (typeof self !== 'undefined')
2693
+ return self;
2694
+ return {}; // Fallback for unknown environments
2695
+ })();
2696
+ // Check if the critical globals exist, not just the flag
2697
+ // This allows re-patching if globals have been deleted
2698
+ const needsPatch = !patchApplied ||
2699
+ typeof globalObj.__TextEncoder__ === 'undefined' ||
2700
+ typeof globalObj.__TextDecoder__ === 'undefined';
2701
+ if (!needsPatch) {
2702
+ return;
2703
+ }
2704
+ try {
2705
+ console.log('Brainy: Applying TensorFlow.js platform patch via function call');
2706
+ // CRITICAL FIX: Patch the global environment to ensure TextEncoder/TextDecoder are available
2707
+ // This approach works by ensuring the global constructors are available before TensorFlow.js loads
2708
+ // Now works across all environments: Node.js, serverless, and other server environments
2709
+ // Make sure TextEncoder and TextDecoder are available globally
2710
+ if (!globalObj.TextEncoder) {
2711
+ globalObj.TextEncoder = TextEncoder;
2712
+ }
2713
+ if (!globalObj.TextDecoder) {
2714
+ globalObj.TextDecoder = TextDecoder;
2715
+ }
2716
+ // Also set the special global constructors that TensorFlow can use safely
2717
+ ;
2718
+ globalObj.__TextEncoder__ = TextEncoder;
2719
+ globalObj.__TextDecoder__ = TextDecoder;
2720
+ // Also patch process.versions to ensure TensorFlow.js detects Node.js correctly
2721
+ if (typeof process !== 'undefined' && process.versions) {
2722
+ // Ensure TensorFlow.js sees this as a Node.js environment
2723
+ if (!process.versions.node) {
2724
+ process.versions.node = process.version;
2725
+ }
2726
+ }
2727
+ // CRITICAL: Patch the Node.js util module directly
2728
+ try {
2729
+ const util = await Promise.resolve().then(function () { return _utilShim$1; });
2730
+ // Ensure TextEncoder and TextDecoder are available as constructors
2731
+ util.TextEncoder = TextEncoder;
2732
+ util.TextDecoder = TextDecoder;
2733
+ }
2734
+ catch (error) {
2735
+ // Ignore if util module is not available
2463
2736
  }
2737
+ patchApplied = true;
2464
2738
  }
2465
2739
  catch (error) {
2466
- console.warn('Failed to apply TensorFlow.js platform patch:', error);
2740
+ console.warn('Brainy: Failed to apply TensorFlow.js platform patch:', error);
2467
2741
  }
2468
2742
  }
2743
+ // Apply patch immediately
2744
+ applyTensorFlowPatch().catch((error) => {
2745
+ console.warn('Failed to apply TensorFlow patch at module load:', error);
2746
+ });
2747
+
2748
+ var textEncoding = /*#__PURE__*/Object.freeze({
2749
+ __proto__: null,
2750
+ applyTensorFlowPatch: applyTensorFlowPatch
2751
+ });
2469
2752
 
2470
2753
  /**
2471
- * This file is imported for its side effects to patch the environment
2754
+ * CRITICAL: This file is imported for its side effects to patch the environment
2472
2755
  * for TensorFlow.js before any other library code runs.
2473
2756
  *
2474
2757
  * It ensures that by the time TensorFlow.js is imported by any other
2475
2758
  * module, the necessary compatibility fixes for the current Node.js
2476
2759
  * environment are already in place.
2477
- */
2478
- // Apply the TensorFlow.js platform patch if needed
2760
+ *
2761
+ * This file MUST be imported as the first import in unified.ts to prevent
2762
+ * race conditions with TensorFlow.js initialization. Failure to do so will
2763
+ * result in errors like "TextEncoder is not a constructor" when the package
2764
+ * is used in Node.js environments.
2765
+ *
2766
+ * The package.json file marks this file as having side effects to prevent
2767
+ * tree-shaking by bundlers, ensuring the patch is always applied.
2768
+ */
2769
+ // Get the appropriate global object for the current environment
2770
+ const globalObj = (() => {
2771
+ if (typeof globalThis !== 'undefined')
2772
+ return globalThis;
2773
+ if (typeof global !== 'undefined')
2774
+ return global;
2775
+ if (typeof self !== 'undefined')
2776
+ return self;
2777
+ return null; // No global object available
2778
+ })();
2779
+ // Define TextEncoder and TextDecoder globally to make sure they're available
2780
+ // Now works across all environments: Node.js, serverless, and other server environments
2781
+ if (globalObj) {
2782
+ if (!globalObj.TextEncoder) {
2783
+ globalObj.TextEncoder = TextEncoder;
2784
+ }
2785
+ if (!globalObj.TextDecoder) {
2786
+ globalObj.TextDecoder = TextDecoder;
2787
+ }
2788
+ globalObj.__TextEncoder__ = TextEncoder;
2789
+ globalObj.__TextDecoder__ = TextDecoder;
2790
+ }
2791
+ // Apply the TensorFlow.js platform patch
2479
2792
  applyTensorFlowPatch();
2793
+ console.log('Applied TensorFlow.js patch via ES modules in setup.ts');
2480
2794
 
2481
2795
  // Unique ID creation requires a high quality random # generator. In the browser we therefore
2482
2796
  // require the crypto API and do not support built-in fallback to lower quality random number
@@ -2636,12 +2950,31 @@ async function calculateDistancesBatch(queryVector, vectors, distanceFunction =
2636
2950
  tf = await self.importTensorFlow();
2637
2951
  }
2638
2952
  else {
2639
- // Dynamically import TensorFlow.js core module and backends
2640
- tf = await Promise.resolve().then(function () { return index$2; });
2641
- // Import CPU backend
2642
- await Promise.resolve().then(function () { return index$1; });
2643
- // Set CPU as the backend
2644
- await tf.setBackend('cpu');
2953
+ // CRITICAL: Ensure TextEncoder/TextDecoder are available before TensorFlow.js loads
2954
+ try {
2955
+ // Use dynamic imports for all environments to ensure TensorFlow loads after patch
2956
+ if (typeof process !== 'undefined' && process.versions && process.versions.node) {
2957
+ // Ensure TextEncoder/TextDecoder are globally available in Node.js
2958
+ const util = await Promise.resolve().then(function () { return _utilShim$1; });
2959
+ if (typeof global.TextEncoder === 'undefined') {
2960
+ global.TextEncoder = util.TextEncoder;
2961
+ }
2962
+ if (typeof global.TextDecoder === 'undefined') {
2963
+ global.TextDecoder = util.TextDecoder;
2964
+ }
2965
+ }
2966
+ // Apply the TensorFlow.js patch
2967
+ const { applyTensorFlowPatch } = await Promise.resolve().then(function () { return textEncoding; });
2968
+ await applyTensorFlowPatch();
2969
+ // Now load TensorFlow.js core module using dynamic imports
2970
+ tf = await Promise.resolve().then(function () { return index$2; });
2971
+ await Promise.resolve().then(function () { return index$1; });
2972
+ await tf.setBackend('cpu');
2973
+ }
2974
+ catch (error) {
2975
+ console.error('Failed to initialize TensorFlow.js:', error);
2976
+ throw error;
2977
+ }
2645
2978
  }
2646
2979
  // Convert vectors to tensors
2647
2980
  const queryTensor = tf.tensor2d([queryVector]);
@@ -2753,76 +3086,6 @@ async function calculateDistancesBatch(queryVector, vectors, distanceFunction =
2753
3086
  }
2754
3087
  }
2755
3088
 
2756
- /**
2757
- * Utility functions for environment detection
2758
- */
2759
- /**
2760
- * Check if code is running in a browser environment
2761
- */
2762
- function isBrowser$1() {
2763
- return typeof window !== 'undefined' && typeof document !== 'undefined';
2764
- }
2765
- /**
2766
- * Check if code is running in a Node.js environment
2767
- */
2768
- function isNode() {
2769
- return (typeof process !== 'undefined' &&
2770
- process.versions != null &&
2771
- process.versions.node != null);
2772
- }
2773
- /**
2774
- * Check if code is running in a Web Worker environment
2775
- */
2776
- function isWebWorker() {
2777
- return (typeof self === 'object' &&
2778
- self.constructor &&
2779
- self.constructor.name === 'DedicatedWorkerGlobalScope');
2780
- }
2781
- /**
2782
- * Check if Web Workers are available in the current environment
2783
- */
2784
- function areWebWorkersAvailable() {
2785
- return isBrowser$1() && typeof Worker !== 'undefined';
2786
- }
2787
- /**
2788
- * Check if Worker Threads are available in the current environment (Node.js)
2789
- */
2790
- async function areWorkerThreadsAvailable() {
2791
- if (!isNode())
2792
- return false;
2793
- try {
2794
- // Use dynamic import to avoid errors in browser environments
2795
- await import('worker_threads');
2796
- return true;
2797
- }
2798
- catch (e) {
2799
- return false;
2800
- }
2801
- }
2802
- /**
2803
- * Synchronous version that doesn't actually try to load the module
2804
- * This is safer in ES module environments
2805
- */
2806
- function areWorkerThreadsAvailableSync() {
2807
- if (!isNode())
2808
- return false;
2809
- // In Node.js 24.4.0+, worker_threads is always available
2810
- return parseInt(process.versions.node.split('.')[0]) >= 24;
2811
- }
2812
- /**
2813
- * Determine if threading is available in the current environment
2814
- * Returns true if either Web Workers (browser) or Worker Threads (Node.js) are available
2815
- */
2816
- function isThreadingAvailable() {
2817
- return areWebWorkersAvailable() || areWorkerThreadsAvailableSync();
2818
- }
2819
- /**
2820
- * Async version of isThreadingAvailable
2821
- */
2822
- async function isThreadingAvailableAsync() {
2823
- return areWebWorkersAvailable() || (await areWorkerThreadsAvailable());
2824
- }
2825
-
2826
3089
  /**
2827
3090
  * Utility functions for executing functions in Worker Threads (Node.js) or Web Workers (Browser)
2828
3091
  * This implementation leverages Node.js 24's improved Worker Threads API for better performance
@@ -3301,80 +3564,127 @@ let UniversalSentenceEncoder$1 = class UniversalSentenceEncoder {
3301
3564
  }
3302
3565
  /**
3303
3566
  * Add polyfills and patches for TensorFlow.js compatibility
3304
- * This addresses issues with TensorFlow.js in Node.js environments
3567
+ * This addresses issues with TensorFlow.js across all server environments
3568
+ * (Node.js, serverless, and other server environments)
3569
+ *
3570
+ * Note: The main TensorFlow.js patching is now centralized in textEncoding.ts
3571
+ * and applied through setup.ts. This method only adds additional utility functions
3572
+ * that might be needed by TensorFlow.js.
3305
3573
  */
3306
- addNodeCompatibilityPolyfills() {
3307
- // Only apply in Node.js environment
3308
- if (typeof process === 'undefined' ||
3309
- !process.versions ||
3310
- !process.versions.node) {
3311
- return;
3312
- }
3313
- // Add polyfill for isFloat32Array in Node.js 24.4.0
3314
- // This fixes the "Cannot read properties of undefined (reading 'isFloat32Array')" error
3315
- if (typeof global !== 'undefined') {
3316
- try {
3317
- // Define a custom PlatformNode class
3318
- class PlatformNode {
3319
- constructor() {
3320
- // Create a util object with necessary methods
3321
- this.util = {
3322
- // Add isFloat32Array and isTypedArray directly to util
3323
- isFloat32Array: (arr) => {
3324
- return !!(arr instanceof Float32Array ||
3325
- (arr &&
3326
- Object.prototype.toString.call(arr) ===
3327
- '[object Float32Array]'));
3328
- },
3329
- isTypedArray: (arr) => {
3330
- return !!(ArrayBuffer.isView(arr) && !(arr instanceof DataView));
3331
- },
3332
- // Use native TextEncoder and TextDecoder
3333
- TextEncoder: TextEncoder,
3334
- TextDecoder: TextDecoder
3335
- };
3336
- // Initialize encoders using native constructors
3337
- this.textEncoder = new TextEncoder();
3338
- this.textDecoder = new TextDecoder();
3339
- }
3340
- // Define isFloat32Array directly on the instance
3341
- isFloat32Array(arr) {
3342
- return !!(arr instanceof Float32Array ||
3343
- (arr &&
3344
- Object.prototype.toString.call(arr) === '[object Float32Array]'));
3345
- }
3346
- // Define isTypedArray directly on the instance
3347
- isTypedArray(arr) {
3348
- return !!(ArrayBuffer.isView(arr) && !(arr instanceof DataView));
3349
- }
3350
- }
3351
- // Assign the PlatformNode class to the global object
3352
- ;
3353
- global.PlatformNode = PlatformNode;
3354
- global.platformNode = new PlatformNode();
3355
- }
3356
- catch (error) {
3357
- console.warn('Failed to define global PlatformNode class:', error);
3358
- }
3574
+ addServerCompatibilityPolyfills() {
3575
+ // Apply in all non-browser environments (Node.js, serverless, server environments)
3576
+ const isBrowserEnv = typeof window !== 'undefined' && typeof document !== 'undefined';
3577
+ if (isBrowserEnv) {
3578
+ return; // Browser environments don't need these polyfills
3579
+ }
3580
+ // Get the appropriate global object for the current environment
3581
+ const globalObj = (() => {
3582
+ if (typeof globalThis !== 'undefined')
3583
+ return globalThis;
3584
+ if (typeof global !== 'undefined')
3585
+ return global;
3586
+ if (typeof self !== 'undefined')
3587
+ return self;
3588
+ return {}; // Fallback for unknown environments
3589
+ })();
3590
+ // Add polyfill for utility functions across all server environments
3591
+ // This fixes issues like "Cannot read properties of undefined (reading 'isFloat32Array')"
3592
+ try {
3359
3593
  // Ensure the util object exists
3360
- if (!global.util) {
3361
- global.util = {};
3594
+ if (!globalObj.util) {
3595
+ globalObj.util = {};
3362
3596
  }
3363
3597
  // Add isFloat32Array method if it doesn't exist
3364
- if (!global.util.isFloat32Array) {
3365
- global.util.isFloat32Array = (obj) => {
3598
+ if (!globalObj.util.isFloat32Array) {
3599
+ globalObj.util.isFloat32Array = (obj) => {
3366
3600
  return !!(obj instanceof Float32Array ||
3367
3601
  (obj &&
3368
3602
  Object.prototype.toString.call(obj) === '[object Float32Array]'));
3369
3603
  };
3370
3604
  }
3371
3605
  // Add isTypedArray method if it doesn't exist
3372
- if (!global.util.isTypedArray) {
3373
- global.util.isTypedArray = (obj) => {
3606
+ if (!globalObj.util.isTypedArray) {
3607
+ globalObj.util.isTypedArray = (obj) => {
3374
3608
  return !!(ArrayBuffer.isView(obj) && !(obj instanceof DataView));
3375
3609
  };
3376
3610
  }
3377
3611
  }
3612
+ catch (error) {
3613
+ console.warn('Failed to add utility polyfills:', error);
3614
+ }
3615
+ }
3616
+ /**
3617
+ * Check if we're running in a test environment
3618
+ */
3619
+ isTestEnvironment() {
3620
+ // Safely check for Node.js environment first
3621
+ if (typeof process === 'undefined') {
3622
+ return false;
3623
+ }
3624
+ return (process.env.NODE_ENV === 'test' ||
3625
+ process.env.VITEST === 'true' ||
3626
+ (typeof global !== 'undefined' && global.__vitest__) ||
3627
+ process.argv.some((arg) => arg.includes('vitest')));
3628
+ }
3629
+ /**
3630
+ * Log message only if not in test environment
3631
+ */
3632
+ logIfNotTest(level, message, ...args) {
3633
+ if (!this.isTestEnvironment()) {
3634
+ console[level](message, ...args);
3635
+ }
3636
+ }
3637
+ /**
3638
+ * Load the Universal Sentence Encoder model with retry logic
3639
+ * This helps handle network failures and JSON parsing errors from TensorFlow Hub
3640
+ * @param loadFunction The function to load the model
3641
+ * @param maxRetries Maximum number of retry attempts
3642
+ * @param baseDelay Base delay in milliseconds for exponential backoff
3643
+ */
3644
+ async loadModelWithRetry(loadFunction, maxRetries = 3, baseDelay = 1000) {
3645
+ let lastError = null;
3646
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
3647
+ try {
3648
+ this.logIfNotTest('log', attempt === 0
3649
+ ? 'Loading Universal Sentence Encoder model...'
3650
+ : `Retrying Universal Sentence Encoder model loading (attempt ${attempt + 1}/${maxRetries + 1})...`);
3651
+ const model = await loadFunction();
3652
+ if (attempt > 0) {
3653
+ this.logIfNotTest('log', 'Universal Sentence Encoder model loaded successfully after retry');
3654
+ }
3655
+ return model;
3656
+ }
3657
+ catch (error) {
3658
+ lastError = error;
3659
+ const errorMessage = lastError.message || String(lastError);
3660
+ // Check if this is a network-related error that might benefit from retry
3661
+ const isRetryableError = errorMessage.includes('Failed to parse model JSON') ||
3662
+ errorMessage.includes('Failed to fetch') ||
3663
+ errorMessage.includes('Network error') ||
3664
+ errorMessage.includes('ENOTFOUND') ||
3665
+ errorMessage.includes('ECONNRESET') ||
3666
+ errorMessage.includes('ETIMEDOUT') ||
3667
+ errorMessage.includes('JSON') ||
3668
+ errorMessage.includes('model.json');
3669
+ if (attempt < maxRetries && isRetryableError) {
3670
+ const delay = baseDelay * Math.pow(2, attempt); // Exponential backoff
3671
+ this.logIfNotTest('warn', `Universal Sentence Encoder model loading failed (attempt ${attempt + 1}): ${errorMessage}. Retrying in ${delay}ms...`);
3672
+ await new Promise((resolve) => setTimeout(resolve, delay));
3673
+ }
3674
+ else {
3675
+ // Either we've exhausted retries or this is not a retryable error
3676
+ if (attempt >= maxRetries) {
3677
+ this.logIfNotTest('error', `Universal Sentence Encoder model loading failed after ${maxRetries + 1} attempts. Last error: ${errorMessage}`);
3678
+ }
3679
+ else {
3680
+ this.logIfNotTest('error', `Universal Sentence Encoder model loading failed with non-retryable error: ${errorMessage}`);
3681
+ }
3682
+ throw lastError;
3683
+ }
3684
+ }
3685
+ }
3686
+ // This should never be reached, but just in case
3687
+ throw lastError || new Error('Unknown error during model loading');
3378
3688
  }
3379
3689
  /**
3380
3690
  * Initialize the embedding model
@@ -3393,43 +3703,87 @@ let UniversalSentenceEncoder$1 = class UniversalSentenceEncoder {
3393
3703
  originalWarn(message, ...optionalParams);
3394
3704
  };
3395
3705
  // Add polyfills for TensorFlow.js compatibility
3396
- this.addNodeCompatibilityPolyfills();
3706
+ this.addServerCompatibilityPolyfills();
3397
3707
  // TensorFlow.js will use its default EPSILON value
3398
- // Dynamically import TensorFlow.js core module and backends
3399
- // Use type assertions to tell TypeScript these modules exist
3400
- this.tf = await Promise.resolve().then(function () { return index$2; });
3401
- // Import CPU backend (always needed as fallback)
3402
- await Promise.resolve().then(function () { return index$1; });
3403
- // Try to import WebGL backend for GPU acceleration in browser environments
3708
+ // CRITICAL: Ensure TextEncoder/TextDecoder are available before TensorFlow.js loads
3404
3709
  try {
3405
- if (typeof window !== 'undefined') {
3406
- await Promise.resolve().then(function () { return index; });
3407
- // Check if WebGL is available using setBackend instead of findBackend
3710
+ // Get the appropriate global object for the current environment
3711
+ const globalObj = (() => {
3712
+ if (typeof globalThis !== 'undefined')
3713
+ return globalThis;
3714
+ if (typeof global !== 'undefined')
3715
+ return global;
3716
+ if (typeof self !== 'undefined')
3717
+ return self;
3718
+ return null;
3719
+ })();
3720
+ // Ensure TextEncoder/TextDecoder are globally available in server environments
3721
+ if (globalObj) {
3722
+ // Try to use Node.js util module if available (Node.js environments)
3408
3723
  try {
3409
- if (this.tf.setBackend) {
3410
- await this.tf.setBackend('webgl');
3411
- this.backend = 'webgl';
3412
- console.log('Using WebGL backend for TensorFlow.js');
3724
+ if (typeof process !== 'undefined' && process.versions && process.versions.node) {
3725
+ const util = await Promise.resolve().then(function () { return _utilShim$1; });
3726
+ if (!globalObj.TextEncoder) {
3727
+ globalObj.TextEncoder = util.TextEncoder;
3728
+ }
3729
+ if (!globalObj.TextDecoder) {
3730
+ globalObj.TextDecoder = util.TextDecoder;
3731
+ }
3413
3732
  }
3414
- else {
3415
- console.warn('tf.setBackend is not available, falling back to CPU');
3733
+ }
3734
+ catch (utilError) {
3735
+ // Fallback to standard TextEncoder/TextDecoder for non-Node.js server environments
3736
+ if (!globalObj.TextEncoder) {
3737
+ globalObj.TextEncoder = TextEncoder;
3738
+ }
3739
+ if (!globalObj.TextDecoder) {
3740
+ globalObj.TextDecoder = TextDecoder;
3416
3741
  }
3417
3742
  }
3418
- catch (e) {
3419
- console.warn('WebGL backend not available, falling back to CPU:', e);
3420
- this.backend = 'cpu';
3743
+ }
3744
+ // Apply the TensorFlow.js patch
3745
+ const { applyTensorFlowPatch } = await Promise.resolve().then(function () { return textEncoding; });
3746
+ await applyTensorFlowPatch();
3747
+ // Now load TensorFlow.js core module using dynamic imports
3748
+ this.tf = await Promise.resolve().then(function () { return index$2; });
3749
+ // Import CPU backend (always needed as fallback)
3750
+ await Promise.resolve().then(function () { return index$1; });
3751
+ // Try to import WebGL backend for GPU acceleration in browser environments
3752
+ try {
3753
+ if (typeof window !== 'undefined') {
3754
+ await Promise.resolve().then(function () { return index; });
3755
+ // Check if WebGL is available
3756
+ try {
3757
+ if (this.tf.setBackend) {
3758
+ await this.tf.setBackend('webgl');
3759
+ this.backend = 'webgl';
3760
+ console.log('Using WebGL backend for TensorFlow.js');
3761
+ }
3762
+ else {
3763
+ console.warn('tf.setBackend is not available, falling back to CPU');
3764
+ }
3765
+ }
3766
+ catch (e) {
3767
+ console.warn('WebGL backend not available, falling back to CPU:', e);
3768
+ this.backend = 'cpu';
3769
+ }
3421
3770
  }
3422
3771
  }
3772
+ catch (error) {
3773
+ console.warn('WebGL backend not available, falling back to CPU:', error);
3774
+ this.backend = 'cpu';
3775
+ }
3776
+ // Load Universal Sentence Encoder using dynamic import
3777
+ this.use = await Promise.resolve().then(function () { return universalSentenceEncoder_esm; });
3423
3778
  }
3424
3779
  catch (error) {
3425
- console.warn('WebGL backend not available, falling back to CPU:', error);
3426
- this.backend = 'cpu';
3780
+ this.logIfNotTest('error', 'Failed to initialize TensorFlow.js:', error);
3781
+ throw error;
3427
3782
  }
3428
3783
  // Set the backend
3429
3784
  if (this.tf.setBackend) {
3430
3785
  await this.tf.setBackend(this.backend);
3431
3786
  }
3432
- this.use = await Promise.resolve().then(function () { return universalSentenceEncoder_esm; });
3433
3787
  // Log the module structure to help with debugging
3434
3788
  console.log('Universal Sentence Encoder module structure in main thread:', Object.keys(this.use), this.use.default ? Object.keys(this.use.default) : 'No default export');
3435
3789
  // Try to find the load function in different possible module structures
@@ -3437,14 +3791,14 @@ let UniversalSentenceEncoder$1 = class UniversalSentenceEncoder {
3437
3791
  if (!loadFunction) {
3438
3792
  throw new Error('Could not find Universal Sentence Encoder load function');
3439
3793
  }
3440
- // Load the model
3441
- this.model = await loadFunction();
3794
+ // Load the model with retry logic for network failures
3795
+ this.model = await this.loadModelWithRetry(loadFunction);
3442
3796
  this.initialized = true;
3443
3797
  // Restore original console.warn
3444
3798
  console.warn = originalWarn;
3445
3799
  }
3446
3800
  catch (error) {
3447
- console.error('Failed to initialize Universal Sentence Encoder:', error);
3801
+ this.logIfNotTest('error', 'Failed to initialize Universal Sentence Encoder:', error);
3448
3802
  throw new Error(`Failed to initialize Universal Sentence Encoder: ${error}`);
3449
3803
  }
3450
3804
  }
@@ -3491,7 +3845,7 @@ let UniversalSentenceEncoder$1 = class UniversalSentenceEncoder {
3491
3845
  return embeddingArray[0];
3492
3846
  }
3493
3847
  catch (error) {
3494
- console.error('Failed to embed text with Universal Sentence Encoder:', error);
3848
+ this.logIfNotTest('error', 'Failed to embed text with Universal Sentence Encoder:', error);
3495
3849
  throw new Error(`Failed to embed text with Universal Sentence Encoder: ${error}`);
3496
3850
  }
3497
3851
  }
@@ -3540,7 +3894,7 @@ let UniversalSentenceEncoder$1 = class UniversalSentenceEncoder {
3540
3894
  return results;
3541
3895
  }
3542
3896
  catch (error) {
3543
- console.error('Failed to batch embed text with Universal Sentence Encoder:', error);
3897
+ this.logIfNotTest('error', 'Failed to batch embed text with Universal Sentence Encoder:', error);
3544
3898
  throw new Error(`Failed to batch embed text with Universal Sentence Encoder: ${error}`);
3545
3899
  }
3546
3900
  }
@@ -3556,7 +3910,7 @@ let UniversalSentenceEncoder$1 = class UniversalSentenceEncoder {
3556
3910
  this.initialized = false;
3557
3911
  }
3558
3912
  catch (error) {
3559
- console.error('Failed to dispose Universal Sentence Encoder:', error);
3913
+ this.logIfNotTest('error', 'Failed to dispose Universal Sentence Encoder:', error);
3560
3914
  }
3561
3915
  }
3562
3916
  return Promise.resolve();
@@ -3644,11 +3998,36 @@ function findUSELoadFunction(sentenceEncoderModule) {
3644
3998
  }
3645
3999
  return loadFunction;
3646
4000
  }
4001
+ /**
4002
+ * Check if we're running in a test environment (standalone version)
4003
+ */
4004
+ function isTestEnvironment() {
4005
+ // Safely check for Node.js environment first
4006
+ if (typeof process === 'undefined') {
4007
+ return false;
4008
+ }
4009
+ return (process.env.NODE_ENV === 'test' ||
4010
+ process.env.VITEST === 'true' ||
4011
+ (typeof global !== 'undefined' && global.__vitest__) ||
4012
+ process.argv.some((arg) => arg.includes('vitest')));
4013
+ }
4014
+ /**
4015
+ * Log message only if not in test environment (standalone version)
4016
+ */
4017
+ function logIfNotTest(level, message, ...args) {
4018
+ if (!isTestEnvironment()) {
4019
+ console[level](message, ...args);
4020
+ }
4021
+ }
3647
4022
  /**
3648
4023
  * Create an embedding function from an embedding model
3649
- * @param model Embedding model to use
4024
+ * @param model Embedding model to use (optional, defaults to UniversalSentenceEncoder)
3650
4025
  */
3651
4026
  function createEmbeddingFunction(model) {
4027
+ // If no model is provided, use the default TensorFlow embedding function
4028
+ if (!model) {
4029
+ return createTensorFlowEmbeddingFunction();
4030
+ }
3652
4031
  return async (data) => {
3653
4032
  return await model.embed(data);
3654
4033
  };
@@ -3666,13 +4045,20 @@ function createTensorFlowEmbeddingFunction() {
3666
4045
  try {
3667
4046
  // Initialize the model if it hasn't been initialized yet
3668
4047
  if (!sharedModelInitialized) {
3669
- await sharedModel.init();
3670
- sharedModelInitialized = true;
4048
+ try {
4049
+ await sharedModel.init();
4050
+ sharedModelInitialized = true;
4051
+ }
4052
+ catch (initError) {
4053
+ // Reset the flag so we can retry initialization on the next call
4054
+ sharedModelInitialized = false;
4055
+ throw initError;
4056
+ }
3671
4057
  }
3672
4058
  return await sharedModel.embed(data);
3673
4059
  }
3674
4060
  catch (error) {
3675
- console.error('Failed to use TensorFlow embedding:', error);
4061
+ logIfNotTest('error', 'Failed to use TensorFlow embedding:', error);
3676
4062
  throw new Error(`Universal Sentence Encoder is required but failed: ${error}`);
3677
4063
  }
3678
4064
  };
@@ -3704,7 +4090,7 @@ const defaultBatchEmbeddingFunction = async (dataArray) => {
3704
4090
  return await sharedBatchModel.embedBatch(dataArray);
3705
4091
  }
3706
4092
  catch (error) {
3707
- console.error('Failed to use TensorFlow batch embedding:', error);
4093
+ logIfNotTest('error', 'Failed to use TensorFlow batch embedding:', error);
3708
4094
  throw new Error(`Universal Sentence Encoder batch embedding failed: ${error}`);
3709
4095
  }
3710
4096
  };
@@ -8081,6 +8467,26 @@ async function createServerSearchAugmentations(serverUrl, options = {}) {
8081
8467
  * Main class that provides the vector database functionality
8082
8468
  */
8083
8469
  class BrainyData {
8470
+ /**
8471
+ * Get the vector dimensions
8472
+ */
8473
+ get dimensions() {
8474
+ return this._dimensions;
8475
+ }
8476
+ /**
8477
+ * Get the maximum connections parameter from HNSW configuration
8478
+ */
8479
+ get maxConnections() {
8480
+ const config = this.index.getConfig();
8481
+ return config.M || 16;
8482
+ }
8483
+ /**
8484
+ * Get the efConstruction parameter from HNSW configuration
8485
+ */
8486
+ get efConstruction() {
8487
+ const config = this.index.getConfig();
8488
+ return config.efConstruction || 200;
8489
+ }
8084
8490
  /**
8085
8491
  * Create a new vector database
8086
8492
  */
@@ -8094,6 +8500,12 @@ class BrainyData {
8094
8500
  this.remoteServerConfig = null;
8095
8501
  this.serverSearchConduit = null;
8096
8502
  this.serverConnection = null;
8503
+ // Validate dimensions
8504
+ if (config.dimensions !== undefined && config.dimensions <= 0) {
8505
+ throw new Error('Dimensions must be a positive number');
8506
+ }
8507
+ // Set dimensions (default to 512 for embedding functions, or require explicit config)
8508
+ this._dimensions = config.dimensions || 512;
8097
8509
  // Set distance function
8098
8510
  this.distanceFunction = config.distanceFunction || cosineDistance$1;
8099
8511
  // Check if optimized HNSW index configuration is provided
@@ -8197,6 +8609,13 @@ class BrainyData {
8197
8609
  // Clear the index and add all nouns
8198
8610
  this.index.clear();
8199
8611
  for (const noun of nouns) {
8612
+ // Check if the vector dimensions match the expected dimensions
8613
+ if (noun.vector.length !== this._dimensions) {
8614
+ console.warn(`Skipping noun ${noun.id} due to dimension mismatch: expected ${this._dimensions}, got ${noun.vector.length}`);
8615
+ // Optionally, you could delete the mismatched noun from storage
8616
+ // await this.storage!.deleteNoun(noun.id)
8617
+ continue;
8618
+ }
8200
8619
  // Add to index
8201
8620
  await this.index.addItem({
8202
8621
  id: noun.id,
@@ -8280,6 +8699,10 @@ class BrainyData {
8280
8699
  if (!vector) {
8281
8700
  throw new Error('Vector is undefined or null');
8282
8701
  }
8702
+ // Validate vector dimensions
8703
+ if (vector.length !== this._dimensions) {
8704
+ throw new Error(`Vector dimension mismatch: expected ${this._dimensions}, got ${vector.length}`);
8705
+ }
8283
8706
  // Use ID from options if it exists, otherwise from metadata, otherwise generate a new UUID
8284
8707
  const id = options.id ||
8285
8708
  (metadata && typeof metadata === 'object' && 'id' in metadata
@@ -8306,7 +8729,12 @@ class BrainyData {
8306
8729
  metadata.noun = NounType.Concept;
8307
8730
  }
8308
8731
  }
8309
- await this.storage.saveMetadata(id, metadata);
8732
+ // Ensure metadata has the correct id field
8733
+ let metadataToSave = metadata;
8734
+ if (metadata && typeof metadata === 'object') {
8735
+ metadataToSave = { ...metadata, id };
8736
+ }
8737
+ await this.storage.saveMetadata(id, metadataToSave);
8310
8738
  }
8311
8739
  // If addToRemote is true and we're connected to a remote server, add to remote as well
8312
8740
  if (options.addToRemote && this.isConnectedToRemoteServer()) {
@@ -8324,6 +8752,18 @@ class BrainyData {
8324
8752
  throw new Error(`Failed to add vector: ${error}`);
8325
8753
  }
8326
8754
  }
8755
+ /**
8756
+ * Add a text item to the database with automatic embedding
8757
+ * This is a convenience method for adding text data with metadata
8758
+ * @param text Text data to add
8759
+ * @param metadata Metadata to associate with the text
8760
+ * @param options Additional options
8761
+ * @returns The ID of the added item
8762
+ */
8763
+ async addItem(text, metadata, options = {}) {
8764
+ // Use the existing add method with forceEmbed to ensure text is embedded
8765
+ return this.add(text, metadata, { ...options, forceEmbed: true });
8766
+ }
8327
8767
  /**
8328
8768
  * Add data to both local and remote Brainy instances
8329
8769
  * @param vectorOrData Vector or data to add
@@ -8475,7 +8915,9 @@ class BrainyData {
8475
8915
  * @returns Array of search results
8476
8916
  */
8477
8917
  async searchByNounTypes(queryVectorOrData, k = 10, nounTypes = null, options = {}) {
8478
- await this.ensureInitialized();
8918
+ if (!this.isInitialized) {
8919
+ throw new Error('BrainyData must be initialized before searching. Call init() first.');
8920
+ }
8479
8921
  try {
8480
8922
  let queryVector;
8481
8923
  // Check if input is already a vector
@@ -8569,6 +9011,9 @@ class BrainyData {
8569
9011
  * @returns Array of search results
8570
9012
  */
8571
9013
  async search(queryVectorOrData, k = 10, options = {}) {
9014
+ if (!this.isInitialized) {
9015
+ throw new Error('BrainyData must be initialized before searching. Call init() first.');
9016
+ }
8572
9017
  // If searching for verbs directly
8573
9018
  if (options.searchVerbs) {
8574
9019
  const verbResults = await this.searchVerbs(queryVectorOrData, k, {
@@ -8617,6 +9062,9 @@ class BrainyData {
8617
9062
  * @returns Array of search results
8618
9063
  */
8619
9064
  async searchLocal(queryVectorOrData, k = 10, options = {}) {
9065
+ if (!this.isInitialized) {
9066
+ throw new Error('BrainyData must be initialized before searching. Call init() first.');
9067
+ }
8620
9068
  // If input is a string and not a vector, automatically vectorize it
8621
9069
  let queryToUse = queryVectorOrData;
8622
9070
  if (typeof queryVectorOrData === 'string' && !options.forceEmbed) {
@@ -10173,6 +10621,7 @@ class FileSystemStorage {
10173
10621
  if (!this.isInitialized)
10174
10622
  await this.init();
10175
10623
  const filePath = path.join(this.metadataDir, `${id}.json`);
10624
+ await this.ensureDirectoryExists(path.dirname(filePath));
10176
10625
  await fs.promises.writeFile(filePath, JSON.stringify(metadata, null, 2));
10177
10626
  }
10178
10627
  /**
@@ -10198,7 +10647,60 @@ class FileSystemStorage {
10198
10647
  async clear() {
10199
10648
  if (!this.isInitialized)
10200
10649
  await this.init();
10201
- await fs.promises.rm(this.rootDir, { recursive: true, force: true });
10650
+ // Helper function to recursively remove directory contents
10651
+ const removeDirectoryContents = async (dirPath) => {
10652
+ try {
10653
+ const files = await fs.promises.readdir(dirPath, { withFileTypes: true });
10654
+ for (const file of files) {
10655
+ const fullPath = path.join(dirPath, file.name);
10656
+ if (file.isDirectory()) {
10657
+ await removeDirectoryContents(fullPath);
10658
+ // Use fs.promises.rm with recursive option instead of rmdir
10659
+ try {
10660
+ await fs.promises.rm(fullPath, { recursive: true, force: true });
10661
+ }
10662
+ catch (rmError) {
10663
+ // Fallback to rmdir if rm fails
10664
+ await fs.promises.rmdir(fullPath);
10665
+ }
10666
+ }
10667
+ else {
10668
+ await fs.promises.unlink(fullPath);
10669
+ }
10670
+ }
10671
+ }
10672
+ catch (error) {
10673
+ if (error.code !== 'ENOENT') {
10674
+ console.error(`Error removing directory contents ${dirPath}:`, error);
10675
+ throw error;
10676
+ }
10677
+ }
10678
+ };
10679
+ try {
10680
+ // First try the modern approach
10681
+ await fs.promises.rm(this.rootDir, { recursive: true, force: true });
10682
+ }
10683
+ catch (error) {
10684
+ console.warn('Modern rm failed, falling back to manual cleanup:', error);
10685
+ // Fallback: manually remove contents then directory
10686
+ try {
10687
+ await removeDirectoryContents(this.rootDir);
10688
+ // Use fs.promises.rm with recursive option instead of rmdir
10689
+ try {
10690
+ await fs.promises.rm(this.rootDir, { recursive: true, force: true });
10691
+ }
10692
+ catch (rmError) {
10693
+ // Final fallback to rmdir if rm fails
10694
+ await fs.promises.rmdir(this.rootDir);
10695
+ }
10696
+ }
10697
+ catch (fallbackError) {
10698
+ if (fallbackError.code !== 'ENOENT') {
10699
+ console.error('Manual cleanup also failed:', fallbackError);
10700
+ throw fallbackError;
10701
+ }
10702
+ }
10703
+ }
10202
10704
  this.isInitialized = false; // Reset state
10203
10705
  await this.init(); // Re-create directories
10204
10706
  }
@@ -13957,19 +14459,17 @@ class BrainyMCPService {
13957
14459
  if (typeof window !== 'undefined' && typeof globalThis.Buffer === 'undefined') {
13958
14460
  globalThis.Buffer = buffer$1.Buffer;
13959
14461
  }
13960
- /**
13961
- * Unified entry point for Brainy
13962
- * This file exports everything from index.ts
13963
- * Environment detection is handled here and made available to all components
13964
- */
13965
- // Export environment information
14462
+ // Export environment information with lazy evaluation
13966
14463
  const environment = {
13967
- isBrowser: typeof window !== 'undefined',
13968
- isNode: typeof process !== 'undefined' && process.versions && process.versions.node,
13969
- isServerless: typeof window === 'undefined' &&
13970
- (typeof process === 'undefined' ||
13971
- !process.versions ||
13972
- !process.versions.node)
14464
+ get isBrowser() {
14465
+ return isBrowser$1();
14466
+ },
14467
+ get isNode() {
14468
+ return isNode();
14469
+ },
14470
+ get isServerless() {
14471
+ return !isBrowser$1() && !isNode();
14472
+ }
13973
14473
  };
13974
14474
  // Make environment information available globally
13975
14475
  if (typeof globalThis !== 'undefined') {
@@ -13982,6 +14482,38 @@ console.log(`Brainy running in ${environment.isBrowser
13982
14482
  ? 'Node.js'
13983
14483
  : 'serverless/unknown'} environment`);
13984
14484
 
14485
+ // Util shim with TextEncoder/TextDecoder support
14486
+ const TextEncoder$1 = globalThis.TextEncoder || (typeof global !== 'undefined' && global.TextEncoder) || class TextEncoder {
14487
+ encode(input) {
14488
+ return new Uint8Array(Buffer.from(input, 'utf8'));
14489
+ }
14490
+ };
14491
+
14492
+ const TextDecoder$1 = globalThis.TextDecoder || (typeof global !== 'undefined' && global.TextDecoder) || class TextDecoder {
14493
+ decode(input) {
14494
+ return Buffer.from(input).toString('utf8');
14495
+ }
14496
+ };
14497
+
14498
+ const types = {
14499
+ isFloat32Array: (arr) => arr instanceof Float32Array,
14500
+ isInt32Array: (arr) => arr instanceof Int32Array,
14501
+ isUint8Array: (arr) => arr instanceof Uint8Array,
14502
+ isUint8ClampedArray: (arr) => arr instanceof Uint8ClampedArray
14503
+ };
14504
+
14505
+ var _utilShim = { TextEncoder: TextEncoder$1, TextDecoder: TextDecoder$1, types };
14506
+ const promises = {};
14507
+
14508
+ var _utilShim$1 = /*#__PURE__*/Object.freeze({
14509
+ __proto__: null,
14510
+ TextDecoder: TextDecoder$1,
14511
+ TextEncoder: TextEncoder$1,
14512
+ default: _utilShim,
14513
+ promises: promises,
14514
+ types: types
14515
+ });
14516
+
13985
14517
  /**
13986
14518
  * @license
13987
14519
  * Copyright 2020 Google LLC. All Rights Reserved.
@@ -22029,14 +22561,6 @@ var _nodeResolve_empty$1 = /*#__PURE__*/Object.freeze({
22029
22561
 
22030
22562
  var require$$0 = /*@__PURE__*/getAugmentedNamespace(_nodeResolve_empty$1);
22031
22563
 
22032
- var _utilShim = {}; const promises = {};
22033
-
22034
- var _utilShim$1 = /*#__PURE__*/Object.freeze({
22035
- __proto__: null,
22036
- default: _utilShim,
22037
- promises: promises
22038
- });
22039
-
22040
22564
  var require$$1 = /*@__PURE__*/getAugmentedNamespace(_utilShim$1);
22041
22565
 
22042
22566
  /**
@@ -87286,5 +87810,5 @@ var universalSentenceEncoder_esm = /*#__PURE__*/Object.freeze({
87286
87810
  version: version
87287
87811
  });
87288
87812
 
87289
- export { AugmentationType, BrainyData, BrainyMCPAdapter, BrainyMCPService, ExecutionMode$1 as ExecutionMode, FileSystemStorage, FileSystemStorageAugmentation, HNSWIndex, HNSWIndexOptimized, MCPAugmentationToolset, MCPRequestType, MCP_VERSION, MemoryStorage, MemoryStorageAugmentation, NounType, OPFSStorage, OPFSStorageAugmentation, Pipeline, S3CompatibleStorage as R2Storage, S3CompatibleStorage, SequentialPipeline, ServerSearchActivationAugmentation, ServerSearchConduitAugmentation, StreamlinedExecutionMode, UniversalSentenceEncoder$1 as UniversalSentenceEncoder, VerbType, WebRTCConduitAugmentation, WebSocketConduitAugmentation, addWebSocketSupport, areWebWorkersAvailable, areWorkerThreadsAvailable, areWorkerThreadsAvailableSync, augmentationPipeline$1 as augmentationPipeline, availableAugmentations, cleanupWorkerPools, cosineDistance$1 as cosineDistance, createAugmentationRegistryPlugin, createAugmentationRegistryRollupPlugin, createConduitAugmentation, createEmbeddingFunction, createMemoryAugmentation, createPipeline, createSenseAugmentation, createServerSearchAugmentations, createStorage, createStreamingPipeline, createTensorFlowEmbeddingFunction, createThreadedEmbeddingFunction, defaultEmbeddingFunction, dotProductDistance, environment, euclideanDistance, executeAugmentation, executeByType, executeInThread, executeSingle, executeStreamlined, getAugmentationsByType, initializeAugmentationPipeline, isBrowser$1 as isBrowser, isNode, isThreadingAvailable, isThreadingAvailableAsync, isWebWorker, loadAugmentationModule, loadAugmentationsFromModules, manhattanDistance, pipeline, processStaticData, processStreamingData, registerAugmentation, sequentialPipeline, setAugmentationEnabled };
87813
+ export { AugmentationType, BrainyData, BrainyMCPAdapter, BrainyMCPService, ExecutionMode$1 as ExecutionMode, FileSystemStorage, FileSystemStorageAugmentation, HNSWIndex, HNSWIndexOptimized, MCPAugmentationToolset, MCPRequestType, MCP_VERSION, MemoryStorage, MemoryStorageAugmentation, NounType, OPFSStorage, OPFSStorageAugmentation, Pipeline, S3CompatibleStorage as R2Storage, S3CompatibleStorage, SequentialPipeline, ServerSearchActivationAugmentation, ServerSearchConduitAugmentation, StreamlinedExecutionMode, UniversalSentenceEncoder$1 as UniversalSentenceEncoder, VerbType, WebRTCConduitAugmentation, WebSocketConduitAugmentation, addWebSocketSupport, applyTensorFlowPatch, areWebWorkersAvailable, areWorkerThreadsAvailable, areWorkerThreadsAvailableSync, augmentationPipeline$1 as augmentationPipeline, availableAugmentations, cleanupWorkerPools, cosineDistance$1 as cosineDistance, createAugmentationRegistryPlugin, createAugmentationRegistryRollupPlugin, createConduitAugmentation, createEmbeddingFunction, createMemoryAugmentation, createPipeline, createSenseAugmentation, createServerSearchAugmentations, createStorage, createStreamingPipeline, createTensorFlowEmbeddingFunction, createThreadedEmbeddingFunction, defaultEmbeddingFunction, dotProductDistance, environment, euclideanDistance, executeAugmentation, executeByType, executeInThread, executeSingle, executeStreamlined, getAugmentationsByType, initializeAugmentationPipeline, isBrowser$1 as isBrowser, isNode, isThreadingAvailable, isThreadingAvailableAsync, isWebWorker, loadAugmentationModule, loadAugmentationsFromModules, manhattanDistance, pipeline, processStaticData, processStreamingData, registerAugmentation, sequentialPipeline, setAugmentationEnabled };
87290
87814
  //# sourceMappingURL=brainy.js.map