npm - @soulcraft/brainy - Versions diffs - 0.46.0 → 0.47.0 - Mend

@soulcraft/brainy 0.46.0 → 0.47.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/OFFLINE_MODELS.md +56 -0
package/README.md +45 -0
package/dist/brainyData.js +7 -9
package/dist/brainyData.js.map +1 -1
package/dist/demo.js +2 -2
package/dist/demo.js.map +1 -1
package/dist/index.d.ts +2 -3
package/dist/index.js +3 -9
package/dist/index.js.map +1 -1
package/dist/setup.d.ts +3 -3
package/dist/setup.js +6 -6
package/dist/setup.js.map +1 -1
package/dist/utils/distance.js +63 -136
package/dist/utils/distance.js.map +1 -1
package/dist/utils/embedding.d.ts +48 -84
package/dist/utils/embedding.js +182 -598
package/dist/utils/embedding.js.map +1 -1
package/dist/utils/robustModelLoader.d.ts +4 -0
package/dist/utils/robustModelLoader.js +58 -7
package/dist/utils/robustModelLoader.js.map +1 -1
package/dist/utils/textEncoding.d.ts +2 -3
package/dist/utils/textEncoding.js +31 -274
package/dist/utils/textEncoding.js.map +1 -1
package/package.json +7 -16
package/scripts/download-models.cjs +190 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@soulcraft/brainy",
-  "version": "0.46.0",
+  "version": "0.47.0",
   "description": "A vector graph database using HNSW indexing with Origin Private File System storage",
   "main": "dist/index.js",
   "module": "dist/index.js",
@@ -92,7 +92,8 @@
     "_workflow:minor": "node scripts/release-workflow.js minor",
     "_workflow:major": "node scripts/release-workflow.js major",
     "_workflow:dry-run": "npm run build && npm test && npm run _release:dry-run",
-    "_dry-run": "npm pack --dry-run"
+    "_dry-run": "npm pack --dry-run",
+    "download-models": "node scripts/download-models.cjs"
   },
   "keywords": [
     "vector-database",
@@ -128,7 +129,9 @@
     "!dist/framework.min.js.map",
     "LICENSE",
     "README.md",
-    "brainy.png"
+    "brainy.png",
+    "scripts/download-models.cjs",
+    "OFFLINE_MODELS.md"
   ],
   "devDependencies": {
     "@rollup/plugin-commonjs": "^25.0.7",
@@ -159,23 +162,11 @@
   },
   "dependencies": {
     "@aws-sdk/client-s3": "^3.540.0",
-    "@tensorflow/tfjs": "^4.22.0",
-    "@tensorflow/tfjs-backend-cpu": "^4.22.0",
-    "@tensorflow/tfjs-backend-webgl": "^4.22.0",
-    "@tensorflow/tfjs-converter": "^4.22.0",
-    "@tensorflow/tfjs-core": "^4.22.0",
+    "@huggingface/transformers": "^3.1.0",
     "buffer": "^6.0.3",
     "dotenv": "^16.4.5",
     "uuid": "^9.0.1"
   },
-  "peerDependencies": {
-    "@soulcraft/brainy-models": ">=0.7.0"
-  },
-  "peerDependenciesMeta": {
-    "@soulcraft/brainy-models": {
-      "optional": true
-    }
-  },
   "prettier": {
     "arrowParens": "always",
     "bracketSameLine": true,

package/scripts/download-models.cjs ADDED Viewed

@@ -0,0 +1,190 @@
+#!/usr/bin/env node
+/**
+ * Download and bundle models for offline usage
+ */
+const fs = require('fs').promises
+const path = require('path')
+const MODEL_NAME = 'Xenova/all-MiniLM-L6-v2'
+const OUTPUT_DIR = './models'
+async function downloadModels() {
+  // Use dynamic import for ES modules in CommonJS
+  const { pipeline, env } = await import('@huggingface/transformers')
+  // Configure transformers.js to use local cache
+  env.cacheDir = './models-cache'
+  env.allowRemoteModels = true
+  try {
+    console.log('🔄 Downloading all-MiniLM-L6-v2 model for offline bundling...')
+    console.log(`   Model: ${MODEL_NAME}`)
+    console.log(`   Cache: ${env.cacheDir}`)
+    // Create output directory
+    await fs.mkdir(OUTPUT_DIR, { recursive: true })
+    // Load the model to force download
+    console.log('📥 Loading model pipeline...')
+    const extractor = await pipeline('feature-extraction', MODEL_NAME)
+    // Test the model to make sure it works
+    console.log('🧪 Testing model...')
+    const testResult = await extractor(['Hello world!'], {
+      pooling: 'mean',
+      normalize: true
+    })
+    console.log(`✅ Model test successful! Embedding dimensions: ${testResult.data.length}`)
+    // Copy ALL model files from cache to our models directory
+    console.log('📋 Copying ALL model files to bundle directory...')
+    const cacheDir = path.resolve(env.cacheDir)
+    const outputDir = path.resolve(OUTPUT_DIR)
+    console.log(`   From: ${cacheDir}`)
+    console.log(`   To: ${outputDir}`)
+    // Copy the entire cache directory structure to ensure we get ALL files
+    // including tokenizer.json, config.json, and all ONNX model files
+    const modelCacheDir = path.join(cacheDir, 'Xenova', 'all-MiniLM-L6-v2')
+    if (await dirExists(modelCacheDir)) {
+      const targetModelDir = path.join(outputDir, 'Xenova', 'all-MiniLM-L6-v2')
+      console.log(`   Copying complete model: Xenova/all-MiniLM-L6-v2`)
+      await copyDirectory(modelCacheDir, targetModelDir)
+    } else {
+      throw new Error(`Model cache directory not found: ${modelCacheDir}`)
+    }
+    console.log('✅ Model bundling complete!')
+    console.log(`   Total size: ${await calculateDirectorySize(outputDir)} MB`)
+    console.log(`   Location: ${outputDir}`)
+    // Create a marker file
+    await fs.writeFile(
+      path.join(outputDir, '.brainy-models-bundled'),
+      JSON.stringify({
+        model: MODEL_NAME,
+        bundledAt: new Date().toISOString(),
+        version: '1.0.0'
+      }, null, 2)
+    )
+  } catch (error) {
+    console.error('❌ Error downloading models:', error)
+    process.exit(1)
+  }
+}
+async function findModelDirectories(baseDir, modelName) {
+  const dirs = []
+  try {
+    // Convert model name to expected directory structure
+    const modelPath = modelName.replace('/', '--')
+    async function searchDirectory(currentDir) {
+      try {
+        const entries = await fs.readdir(currentDir, { withFileTypes: true })
+        for (const entry of entries) {
+          if (entry.isDirectory()) {
+            const fullPath = path.join(currentDir, entry.name)
+            // Check if this directory contains model files
+            if (entry.name.includes(modelPath) || entry.name === 'onnx') {
+              const hasModelFiles = await containsModelFiles(fullPath)
+              if (hasModelFiles) {
+                dirs.push(fullPath)
+              }
+            }
+            // Recursively search subdirectories
+            await searchDirectory(fullPath)
+          }
+        }
+      } catch (error) {
+        // Ignore access errors
+      }
+    }
+    await searchDirectory(baseDir)
+  } catch (error) {
+    console.warn('Warning: Error searching for model directories:', error)
+  }
+  return dirs
+}
+async function containsModelFiles(dir) {
+  try {
+    const files = await fs.readdir(dir)
+    return files.some(file =>
+      file.endsWith('.onnx') ||
+      file.endsWith('.json') ||
+      file === 'config.json' ||
+      file === 'tokenizer.json'
+    )
+  } catch (error) {
+    return false
+  }
+}
+async function dirExists(dir) {
+  try {
+    const stats = await fs.stat(dir)
+    return stats.isDirectory()
+  } catch (error) {
+    return false
+  }
+}
+async function copyDirectory(src, dest) {
+  await fs.mkdir(dest, { recursive: true })
+  const entries = await fs.readdir(src, { withFileTypes: true })
+  for (const entry of entries) {
+    const srcPath = path.join(src, entry.name)
+    const destPath = path.join(dest, entry.name)
+    if (entry.isDirectory()) {
+      await copyDirectory(srcPath, destPath)
+    } else {
+      await fs.copyFile(srcPath, destPath)
+    }
+  }
+}
+async function calculateDirectorySize(dir) {
+  let size = 0
+  async function calculateSize(currentDir) {
+    try {
+      const entries = await fs.readdir(currentDir, { withFileTypes: true })
+      for (const entry of entries) {
+        const fullPath = path.join(currentDir, entry.name)
+        if (entry.isDirectory()) {
+          await calculateSize(fullPath)
+        } else {
+          const stats = await fs.stat(fullPath)
+          size += stats.size
+        }
+      }
+    } catch (error) {
+      // Ignore access errors
+    }
+  }
+  await calculateSize(dir)
+  return Math.round(size / (1024 * 1024))
+}
+// Run the download
+downloadModels().catch(error => {
+  console.error('Fatal error:', error)
+  process.exit(1)
+})