@soulcraft/brainy 6.4.0 → 6.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/models/all-MiniLM-L6-v2-q8/config.json +25 -0
- package/assets/models/all-MiniLM-L6-v2-q8/model.onnx +0 -0
- package/assets/models/all-MiniLM-L6-v2-q8/tokenizer.json +30686 -0
- package/assets/models/all-MiniLM-L6-v2-q8/vocab.json +1 -0
- package/dist/critical/model-guardian.d.ts +5 -22
- package/dist/critical/model-guardian.js +38 -210
- package/dist/embeddings/EmbeddingManager.d.ts +7 -17
- package/dist/embeddings/EmbeddingManager.js +28 -136
- package/dist/embeddings/wasm/AssetLoader.d.ts +67 -0
- package/dist/embeddings/wasm/AssetLoader.js +238 -0
- package/dist/embeddings/wasm/EmbeddingPostProcessor.d.ts +60 -0
- package/dist/embeddings/wasm/EmbeddingPostProcessor.js +123 -0
- package/dist/embeddings/wasm/ONNXInferenceEngine.d.ts +55 -0
- package/dist/embeddings/wasm/ONNXInferenceEngine.js +154 -0
- package/dist/embeddings/wasm/WASMEmbeddingEngine.d.ts +82 -0
- package/dist/embeddings/wasm/WASMEmbeddingEngine.js +231 -0
- package/dist/embeddings/wasm/WordPieceTokenizer.d.ts +71 -0
- package/dist/embeddings/wasm/WordPieceTokenizer.js +264 -0
- package/dist/embeddings/wasm/index.d.ts +13 -0
- package/dist/embeddings/wasm/index.js +15 -0
- package/dist/embeddings/wasm/types.d.ts +114 -0
- package/dist/embeddings/wasm/types.js +25 -0
- package/dist/setup.d.ts +11 -11
- package/dist/setup.js +17 -31
- package/dist/utils/embedding.d.ts +45 -62
- package/dist/utils/embedding.js +61 -440
- package/dist/vfs/VirtualFileSystem.d.ts +14 -0
- package/dist/vfs/VirtualFileSystem.js +56 -6
- package/package.json +10 -3
- package/scripts/download-model.cjs +175 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "6.
|
|
3
|
+
"version": "6.6.0",
|
|
4
4
|
"description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. Stage 3 CANONICAL: 42 nouns × 127 verbs covering 96-97% of all human knowledge.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|
|
@@ -64,7 +64,8 @@
|
|
|
64
64
|
"./dist/cortex/backupRestore.js": false
|
|
65
65
|
},
|
|
66
66
|
"engines": {
|
|
67
|
-
"node": "22.x"
|
|
67
|
+
"node": "22.x",
|
|
68
|
+
"bun": ">=1.0.0"
|
|
68
69
|
},
|
|
69
70
|
"scripts": {
|
|
70
71
|
"build": "npm run build:types:if-needed && npm run build:patterns:if-needed && npm run build:keywords:if-needed && tsc && tsc -p tsconfig.cli.json",
|
|
@@ -90,6 +91,10 @@
|
|
|
90
91
|
"test:ci-unit": "CI=true vitest run --config tests/configs/vitest.unit.config.ts",
|
|
91
92
|
"test:ci-integration": "NODE_OPTIONS='--max-old-space-size=16384' CI=true vitest run --config tests/configs/vitest.integration.config.ts",
|
|
92
93
|
"test:ci": "npm run test:ci-unit",
|
|
94
|
+
"test:bun": "bun tests/integration/bun-compile-test.ts",
|
|
95
|
+
"test:bun:compile": "bun build tests/integration/bun-compile-test.ts --compile --outfile /tmp/brainy-bun-test && /tmp/brainy-bun-test",
|
|
96
|
+
"test:wasm": "npx vitest run tests/integration/wasm-embeddings.test.ts",
|
|
97
|
+
"download-model": "node scripts/download-model.cjs",
|
|
93
98
|
"download-models": "node scripts/download-models.cjs",
|
|
94
99
|
"download-models:q8": "node scripts/download-models.cjs",
|
|
95
100
|
"models:verify": "node scripts/ensure-models.js",
|
|
@@ -142,7 +147,9 @@
|
|
|
142
147
|
"dist/**/*.js",
|
|
143
148
|
"dist/**/*.d.ts",
|
|
144
149
|
"bin/",
|
|
150
|
+
"assets/models/**/*",
|
|
145
151
|
"scripts/download-models.cjs",
|
|
152
|
+
"scripts/download-model.cjs",
|
|
146
153
|
"scripts/ensure-models.js",
|
|
147
154
|
"scripts/prepare-models.js",
|
|
148
155
|
"brainy.png",
|
|
@@ -180,7 +187,7 @@
|
|
|
180
187
|
"@azure/identity": "^4.0.0",
|
|
181
188
|
"@azure/storage-blob": "^12.17.0",
|
|
182
189
|
"@google-cloud/storage": "^7.14.0",
|
|
183
|
-
"
|
|
190
|
+
"onnxruntime-web": "^1.22.0",
|
|
184
191
|
"@msgpack/msgpack": "^3.1.2",
|
|
185
192
|
"@types/js-yaml": "^4.0.9",
|
|
186
193
|
"boxen": "^8.0.1",
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Download Model Assets
|
|
4
|
+
*
|
|
5
|
+
* Downloads the all-MiniLM-L6-v2 Q8 model from Hugging Face.
|
|
6
|
+
* Run: node scripts/download-model.cjs
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
const fs = require('node:fs')
|
|
10
|
+
const path = require('node:path')
|
|
11
|
+
const https = require('node:https')
|
|
12
|
+
|
|
13
|
+
const MODEL_DIR = path.join(__dirname, '..', 'assets', 'models', 'all-MiniLM-L6-v2-q8')
|
|
14
|
+
const BASE_URL = 'https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/onnx'
|
|
15
|
+
|
|
16
|
+
const FILES = [
|
|
17
|
+
{
|
|
18
|
+
name: 'model_quantized.onnx',
|
|
19
|
+
url: `${BASE_URL}/model_quantized.onnx`,
|
|
20
|
+
dest: 'model.onnx',
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
name: 'tokenizer.json',
|
|
24
|
+
url: 'https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/tokenizer.json',
|
|
25
|
+
dest: 'tokenizer.json',
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
name: 'config.json',
|
|
29
|
+
url: 'https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/config.json',
|
|
30
|
+
dest: 'config.json',
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
name: 'vocab.txt',
|
|
34
|
+
url: 'https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/vocab.txt',
|
|
35
|
+
dest: 'vocab.txt',
|
|
36
|
+
},
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Follow redirects and download file
|
|
41
|
+
*/
|
|
42
|
+
function downloadFile(url, destPath, maxRedirects = 5) {
|
|
43
|
+
return new Promise((resolve, reject) => {
|
|
44
|
+
if (maxRedirects === 0) {
|
|
45
|
+
reject(new Error('Too many redirects'))
|
|
46
|
+
return
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const doRequest = (reqUrl) => {
|
|
50
|
+
const parsedUrl = new URL(reqUrl)
|
|
51
|
+
const options = {
|
|
52
|
+
hostname: parsedUrl.hostname,
|
|
53
|
+
path: parsedUrl.pathname + parsedUrl.search,
|
|
54
|
+
headers: {
|
|
55
|
+
'User-Agent': 'Brainy-Model-Downloader/1.0',
|
|
56
|
+
},
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
https.get(options, (response) => {
|
|
60
|
+
// Handle redirects
|
|
61
|
+
if (response.statusCode >= 300 && response.statusCode < 400 && response.headers.location) {
|
|
62
|
+
response.resume() // Consume response data to free memory
|
|
63
|
+
const redirectUrl = response.headers.location.startsWith('http')
|
|
64
|
+
? response.headers.location
|
|
65
|
+
: new URL(response.headers.location, reqUrl).toString()
|
|
66
|
+
console.log(` ↳ Redirecting to: ${redirectUrl.slice(0, 80)}...`)
|
|
67
|
+
downloadFile(redirectUrl, destPath, maxRedirects - 1)
|
|
68
|
+
.then(resolve)
|
|
69
|
+
.catch(reject)
|
|
70
|
+
return
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (response.statusCode !== 200) {
|
|
74
|
+
reject(new Error(`HTTP ${response.statusCode}`))
|
|
75
|
+
return
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const fileStream = fs.createWriteStream(destPath)
|
|
79
|
+
let downloadedBytes = 0
|
|
80
|
+
const totalBytes = parseInt(response.headers['content-length'] || '0', 10)
|
|
81
|
+
|
|
82
|
+
response.on('data', (chunk) => {
|
|
83
|
+
downloadedBytes += chunk.length
|
|
84
|
+
if (totalBytes > 0) {
|
|
85
|
+
const percent = Math.round((downloadedBytes / totalBytes) * 100)
|
|
86
|
+
process.stdout.write(`\r Progress: ${percent}% (${Math.round(downloadedBytes / 1024 / 1024)}MB)`)
|
|
87
|
+
}
|
|
88
|
+
})
|
|
89
|
+
|
|
90
|
+
response.pipe(fileStream)
|
|
91
|
+
|
|
92
|
+
fileStream.on('finish', () => {
|
|
93
|
+
fileStream.close()
|
|
94
|
+
console.log(`\n ✅ Downloaded: ${path.basename(destPath)} (${Math.round(downloadedBytes / 1024 / 1024)}MB)`)
|
|
95
|
+
resolve()
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
fileStream.on('error', (err) => {
|
|
99
|
+
fs.unlink(destPath, () => {}) // Delete partial file
|
|
100
|
+
reject(err)
|
|
101
|
+
})
|
|
102
|
+
}).on('error', reject)
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
doRequest(url)
|
|
106
|
+
})
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Convert vocab.txt to vocab.json
|
|
111
|
+
*/
|
|
112
|
+
function convertVocabToJson(vocabTxtPath, vocabJsonPath) {
|
|
113
|
+
console.log('📝 Converting vocab.txt to vocab.json...')
|
|
114
|
+
const content = fs.readFileSync(vocabTxtPath, 'utf-8')
|
|
115
|
+
const lines = content.split('\n').filter(line => line.trim())
|
|
116
|
+
|
|
117
|
+
const vocab = {}
|
|
118
|
+
for (let i = 0; i < lines.length; i++) {
|
|
119
|
+
vocab[lines[i]] = i
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
fs.writeFileSync(vocabJsonPath, JSON.stringify(vocab))
|
|
123
|
+
console.log(` ✅ Created vocab.json with ${Object.keys(vocab).length} tokens`)
|
|
124
|
+
|
|
125
|
+
// Remove vocab.txt since we have vocab.json
|
|
126
|
+
fs.unlinkSync(vocabTxtPath)
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
async function main() {
|
|
130
|
+
console.log('🔽 Downloading all-MiniLM-L6-v2 Q8 model assets...\n')
|
|
131
|
+
|
|
132
|
+
// Create model directory
|
|
133
|
+
fs.mkdirSync(MODEL_DIR, { recursive: true })
|
|
134
|
+
console.log(`📁 Model directory: ${MODEL_DIR}\n`)
|
|
135
|
+
|
|
136
|
+
// Download each file
|
|
137
|
+
for (const file of FILES) {
|
|
138
|
+
const destPath = path.join(MODEL_DIR, file.dest)
|
|
139
|
+
|
|
140
|
+
// Check if already exists
|
|
141
|
+
if (fs.existsSync(destPath)) {
|
|
142
|
+
const stats = fs.statSync(destPath)
|
|
143
|
+
if (stats.size > 0) {
|
|
144
|
+
console.log(`⏭️ Skipping ${file.name} (already exists)`)
|
|
145
|
+
continue
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
console.log(`📥 Downloading ${file.name}...`)
|
|
150
|
+
try {
|
|
151
|
+
await downloadFile(file.url, destPath)
|
|
152
|
+
} catch (error) {
|
|
153
|
+
console.error(` ❌ Failed to download ${file.name}: ${error.message}`)
|
|
154
|
+
process.exit(1)
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Convert vocab.txt to vocab.json
|
|
159
|
+
const vocabTxtPath = path.join(MODEL_DIR, 'vocab.txt')
|
|
160
|
+
const vocabJsonPath = path.join(MODEL_DIR, 'vocab.json')
|
|
161
|
+
if (fs.existsSync(vocabTxtPath) && !fs.existsSync(vocabJsonPath)) {
|
|
162
|
+
convertVocabToJson(vocabTxtPath, vocabJsonPath)
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
console.log('\n✅ All model assets downloaded successfully!')
|
|
166
|
+
console.log('\nModel files:')
|
|
167
|
+
const files = fs.readdirSync(MODEL_DIR)
|
|
168
|
+
for (const file of files) {
|
|
169
|
+
const stats = fs.statSync(path.join(MODEL_DIR, file))
|
|
170
|
+
const sizeMB = (stats.size / 1024 / 1024).toFixed(2)
|
|
171
|
+
console.log(` - ${file}: ${sizeMB}MB`)
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
main().catch(console.error)
|