@soulcraft/brainy 0.46.0 โ 0.47.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/OFFLINE_MODELS.md +56 -0
- package/README.md +45 -0
- package/dist/brainyData.js +7 -9
- package/dist/brainyData.js.map +1 -1
- package/dist/demo.js +2 -2
- package/dist/demo.js.map +1 -1
- package/dist/index.d.ts +2 -3
- package/dist/index.js +3 -9
- package/dist/index.js.map +1 -1
- package/dist/setup.d.ts +3 -3
- package/dist/setup.js +6 -6
- package/dist/setup.js.map +1 -1
- package/dist/utils/distance.js +63 -136
- package/dist/utils/distance.js.map +1 -1
- package/dist/utils/embedding.d.ts +48 -84
- package/dist/utils/embedding.js +182 -598
- package/dist/utils/embedding.js.map +1 -1
- package/dist/utils/robustModelLoader.d.ts +4 -0
- package/dist/utils/robustModelLoader.js +58 -7
- package/dist/utils/robustModelLoader.js.map +1 -1
- package/dist/utils/textEncoding.d.ts +2 -3
- package/dist/utils/textEncoding.js +31 -274
- package/dist/utils/textEncoding.js.map +1 -1
- package/package.json +7 -16
- package/scripts/download-models.cjs +190 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.47.0",
|
|
4
4
|
"description": "A vector graph database using HNSW indexing with Origin Private File System storage",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|
|
@@ -92,7 +92,8 @@
|
|
|
92
92
|
"_workflow:minor": "node scripts/release-workflow.js minor",
|
|
93
93
|
"_workflow:major": "node scripts/release-workflow.js major",
|
|
94
94
|
"_workflow:dry-run": "npm run build && npm test && npm run _release:dry-run",
|
|
95
|
-
"_dry-run": "npm pack --dry-run"
|
|
95
|
+
"_dry-run": "npm pack --dry-run",
|
|
96
|
+
"download-models": "node scripts/download-models.cjs"
|
|
96
97
|
},
|
|
97
98
|
"keywords": [
|
|
98
99
|
"vector-database",
|
|
@@ -128,7 +129,9 @@
|
|
|
128
129
|
"!dist/framework.min.js.map",
|
|
129
130
|
"LICENSE",
|
|
130
131
|
"README.md",
|
|
131
|
-
"brainy.png"
|
|
132
|
+
"brainy.png",
|
|
133
|
+
"scripts/download-models.cjs",
|
|
134
|
+
"OFFLINE_MODELS.md"
|
|
132
135
|
],
|
|
133
136
|
"devDependencies": {
|
|
134
137
|
"@rollup/plugin-commonjs": "^25.0.7",
|
|
@@ -159,23 +162,11 @@
|
|
|
159
162
|
},
|
|
160
163
|
"dependencies": {
|
|
161
164
|
"@aws-sdk/client-s3": "^3.540.0",
|
|
162
|
-
"@
|
|
163
|
-
"@tensorflow/tfjs-backend-cpu": "^4.22.0",
|
|
164
|
-
"@tensorflow/tfjs-backend-webgl": "^4.22.0",
|
|
165
|
-
"@tensorflow/tfjs-converter": "^4.22.0",
|
|
166
|
-
"@tensorflow/tfjs-core": "^4.22.0",
|
|
165
|
+
"@huggingface/transformers": "^3.1.0",
|
|
167
166
|
"buffer": "^6.0.3",
|
|
168
167
|
"dotenv": "^16.4.5",
|
|
169
168
|
"uuid": "^9.0.1"
|
|
170
169
|
},
|
|
171
|
-
"peerDependencies": {
|
|
172
|
-
"@soulcraft/brainy-models": ">=0.7.0"
|
|
173
|
-
},
|
|
174
|
-
"peerDependenciesMeta": {
|
|
175
|
-
"@soulcraft/brainy-models": {
|
|
176
|
-
"optional": true
|
|
177
|
-
}
|
|
178
|
-
},
|
|
179
170
|
"prettier": {
|
|
180
171
|
"arrowParens": "always",
|
|
181
172
|
"bracketSameLine": true,
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Download and bundle models for offline usage
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
const fs = require('fs').promises
|
|
7
|
+
const path = require('path')
|
|
8
|
+
|
|
9
|
+
const MODEL_NAME = 'Xenova/all-MiniLM-L6-v2'
|
|
10
|
+
const OUTPUT_DIR = './models'
|
|
11
|
+
|
|
12
|
+
async function downloadModels() {
|
|
13
|
+
// Use dynamic import for ES modules in CommonJS
|
|
14
|
+
const { pipeline, env } = await import('@huggingface/transformers')
|
|
15
|
+
|
|
16
|
+
// Configure transformers.js to use local cache
|
|
17
|
+
env.cacheDir = './models-cache'
|
|
18
|
+
env.allowRemoteModels = true
|
|
19
|
+
try {
|
|
20
|
+
console.log('๐ Downloading all-MiniLM-L6-v2 model for offline bundling...')
|
|
21
|
+
console.log(` Model: ${MODEL_NAME}`)
|
|
22
|
+
console.log(` Cache: ${env.cacheDir}`)
|
|
23
|
+
|
|
24
|
+
// Create output directory
|
|
25
|
+
await fs.mkdir(OUTPUT_DIR, { recursive: true })
|
|
26
|
+
|
|
27
|
+
// Load the model to force download
|
|
28
|
+
console.log('๐ฅ Loading model pipeline...')
|
|
29
|
+
const extractor = await pipeline('feature-extraction', MODEL_NAME)
|
|
30
|
+
|
|
31
|
+
// Test the model to make sure it works
|
|
32
|
+
console.log('๐งช Testing model...')
|
|
33
|
+
const testResult = await extractor(['Hello world!'], {
|
|
34
|
+
pooling: 'mean',
|
|
35
|
+
normalize: true
|
|
36
|
+
})
|
|
37
|
+
|
|
38
|
+
console.log(`โ
Model test successful! Embedding dimensions: ${testResult.data.length}`)
|
|
39
|
+
|
|
40
|
+
// Copy ALL model files from cache to our models directory
|
|
41
|
+
console.log('๐ Copying ALL model files to bundle directory...')
|
|
42
|
+
|
|
43
|
+
const cacheDir = path.resolve(env.cacheDir)
|
|
44
|
+
const outputDir = path.resolve(OUTPUT_DIR)
|
|
45
|
+
|
|
46
|
+
console.log(` From: ${cacheDir}`)
|
|
47
|
+
console.log(` To: ${outputDir}`)
|
|
48
|
+
|
|
49
|
+
// Copy the entire cache directory structure to ensure we get ALL files
|
|
50
|
+
// including tokenizer.json, config.json, and all ONNX model files
|
|
51
|
+
const modelCacheDir = path.join(cacheDir, 'Xenova', 'all-MiniLM-L6-v2')
|
|
52
|
+
|
|
53
|
+
if (await dirExists(modelCacheDir)) {
|
|
54
|
+
const targetModelDir = path.join(outputDir, 'Xenova', 'all-MiniLM-L6-v2')
|
|
55
|
+
console.log(` Copying complete model: Xenova/all-MiniLM-L6-v2`)
|
|
56
|
+
await copyDirectory(modelCacheDir, targetModelDir)
|
|
57
|
+
} else {
|
|
58
|
+
throw new Error(`Model cache directory not found: ${modelCacheDir}`)
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
console.log('โ
Model bundling complete!')
|
|
62
|
+
console.log(` Total size: ${await calculateDirectorySize(outputDir)} MB`)
|
|
63
|
+
console.log(` Location: ${outputDir}`)
|
|
64
|
+
|
|
65
|
+
// Create a marker file
|
|
66
|
+
await fs.writeFile(
|
|
67
|
+
path.join(outputDir, '.brainy-models-bundled'),
|
|
68
|
+
JSON.stringify({
|
|
69
|
+
model: MODEL_NAME,
|
|
70
|
+
bundledAt: new Date().toISOString(),
|
|
71
|
+
version: '1.0.0'
|
|
72
|
+
}, null, 2)
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
} catch (error) {
|
|
76
|
+
console.error('โ Error downloading models:', error)
|
|
77
|
+
process.exit(1)
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
async function findModelDirectories(baseDir, modelName) {
|
|
82
|
+
const dirs = []
|
|
83
|
+
|
|
84
|
+
try {
|
|
85
|
+
// Convert model name to expected directory structure
|
|
86
|
+
const modelPath = modelName.replace('/', '--')
|
|
87
|
+
|
|
88
|
+
async function searchDirectory(currentDir) {
|
|
89
|
+
try {
|
|
90
|
+
const entries = await fs.readdir(currentDir, { withFileTypes: true })
|
|
91
|
+
|
|
92
|
+
for (const entry of entries) {
|
|
93
|
+
if (entry.isDirectory()) {
|
|
94
|
+
const fullPath = path.join(currentDir, entry.name)
|
|
95
|
+
|
|
96
|
+
// Check if this directory contains model files
|
|
97
|
+
if (entry.name.includes(modelPath) || entry.name === 'onnx') {
|
|
98
|
+
const hasModelFiles = await containsModelFiles(fullPath)
|
|
99
|
+
if (hasModelFiles) {
|
|
100
|
+
dirs.push(fullPath)
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Recursively search subdirectories
|
|
105
|
+
await searchDirectory(fullPath)
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
} catch (error) {
|
|
109
|
+
// Ignore access errors
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
await searchDirectory(baseDir)
|
|
114
|
+
} catch (error) {
|
|
115
|
+
console.warn('Warning: Error searching for model directories:', error)
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
return dirs
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
async function containsModelFiles(dir) {
|
|
122
|
+
try {
|
|
123
|
+
const files = await fs.readdir(dir)
|
|
124
|
+
return files.some(file =>
|
|
125
|
+
file.endsWith('.onnx') ||
|
|
126
|
+
file.endsWith('.json') ||
|
|
127
|
+
file === 'config.json' ||
|
|
128
|
+
file === 'tokenizer.json'
|
|
129
|
+
)
|
|
130
|
+
} catch (error) {
|
|
131
|
+
return false
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
async function dirExists(dir) {
|
|
136
|
+
try {
|
|
137
|
+
const stats = await fs.stat(dir)
|
|
138
|
+
return stats.isDirectory()
|
|
139
|
+
} catch (error) {
|
|
140
|
+
return false
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
async function copyDirectory(src, dest) {
|
|
145
|
+
await fs.mkdir(dest, { recursive: true })
|
|
146
|
+
const entries = await fs.readdir(src, { withFileTypes: true })
|
|
147
|
+
|
|
148
|
+
for (const entry of entries) {
|
|
149
|
+
const srcPath = path.join(src, entry.name)
|
|
150
|
+
const destPath = path.join(dest, entry.name)
|
|
151
|
+
|
|
152
|
+
if (entry.isDirectory()) {
|
|
153
|
+
await copyDirectory(srcPath, destPath)
|
|
154
|
+
} else {
|
|
155
|
+
await fs.copyFile(srcPath, destPath)
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
async function calculateDirectorySize(dir) {
|
|
161
|
+
let size = 0
|
|
162
|
+
|
|
163
|
+
async function calculateSize(currentDir) {
|
|
164
|
+
try {
|
|
165
|
+
const entries = await fs.readdir(currentDir, { withFileTypes: true })
|
|
166
|
+
|
|
167
|
+
for (const entry of entries) {
|
|
168
|
+
const fullPath = path.join(currentDir, entry.name)
|
|
169
|
+
|
|
170
|
+
if (entry.isDirectory()) {
|
|
171
|
+
await calculateSize(fullPath)
|
|
172
|
+
} else {
|
|
173
|
+
const stats = await fs.stat(fullPath)
|
|
174
|
+
size += stats.size
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
} catch (error) {
|
|
178
|
+
// Ignore access errors
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
await calculateSize(dir)
|
|
183
|
+
return Math.round(size / (1024 * 1024))
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// Run the download
|
|
187
|
+
downloadModels().catch(error => {
|
|
188
|
+
console.error('Fatal error:', error)
|
|
189
|
+
process.exit(1)
|
|
190
|
+
})
|