@soulcraft/brainy 6.5.0 → 6.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/models/all-MiniLM-L6-v2-q8/config.json +25 -0
- package/assets/models/all-MiniLM-L6-v2-q8/model.onnx +0 -0
- package/assets/models/all-MiniLM-L6-v2-q8/tokenizer.json +30686 -0
- package/assets/models/all-MiniLM-L6-v2-q8/vocab.json +1 -0
- package/dist/brainy.js +0 -6
- package/dist/config/index.d.ts +1 -3
- package/dist/config/index.js +2 -4
- package/dist/config/modelAutoConfig.d.ts +10 -17
- package/dist/config/modelAutoConfig.js +15 -88
- package/dist/config/sharedConfigManager.d.ts +1 -2
- package/dist/config/zeroConfig.d.ts +2 -13
- package/dist/config/zeroConfig.js +7 -15
- package/dist/critical/model-guardian.d.ts +5 -22
- package/dist/critical/model-guardian.js +38 -210
- package/dist/embeddings/EmbeddingManager.d.ts +7 -17
- package/dist/embeddings/EmbeddingManager.js +28 -136
- package/dist/embeddings/wasm/AssetLoader.d.ts +67 -0
- package/dist/embeddings/wasm/AssetLoader.js +238 -0
- package/dist/embeddings/wasm/EmbeddingPostProcessor.d.ts +60 -0
- package/dist/embeddings/wasm/EmbeddingPostProcessor.js +123 -0
- package/dist/embeddings/wasm/ONNXInferenceEngine.d.ts +55 -0
- package/dist/embeddings/wasm/ONNXInferenceEngine.js +154 -0
- package/dist/embeddings/wasm/WASMEmbeddingEngine.d.ts +82 -0
- package/dist/embeddings/wasm/WASMEmbeddingEngine.js +231 -0
- package/dist/embeddings/wasm/WordPieceTokenizer.d.ts +71 -0
- package/dist/embeddings/wasm/WordPieceTokenizer.js +264 -0
- package/dist/embeddings/wasm/index.d.ts +13 -0
- package/dist/embeddings/wasm/index.js +15 -0
- package/dist/embeddings/wasm/types.d.ts +114 -0
- package/dist/embeddings/wasm/types.js +25 -0
- package/dist/setup.d.ts +11 -11
- package/dist/setup.js +17 -31
- package/dist/types/brainy.types.d.ts +0 -5
- package/dist/utils/embedding.d.ts +45 -62
- package/dist/utils/embedding.js +61 -440
- package/package.json +10 -3
- package/scripts/download-model.cjs +175 -0
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Download Model Assets
|
|
4
|
+
*
|
|
5
|
+
* Downloads the all-MiniLM-L6-v2 Q8 model from Hugging Face.
|
|
6
|
+
* Run: node scripts/download-model.cjs
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
const fs = require('node:fs')
|
|
10
|
+
const path = require('node:path')
|
|
11
|
+
const https = require('node:https')
|
|
12
|
+
|
|
13
|
+
const MODEL_DIR = path.join(__dirname, '..', 'assets', 'models', 'all-MiniLM-L6-v2-q8')
|
|
14
|
+
const BASE_URL = 'https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/onnx'
|
|
15
|
+
|
|
16
|
+
const FILES = [
|
|
17
|
+
{
|
|
18
|
+
name: 'model_quantized.onnx',
|
|
19
|
+
url: `${BASE_URL}/model_quantized.onnx`,
|
|
20
|
+
dest: 'model.onnx',
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
name: 'tokenizer.json',
|
|
24
|
+
url: 'https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/tokenizer.json',
|
|
25
|
+
dest: 'tokenizer.json',
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
name: 'config.json',
|
|
29
|
+
url: 'https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/config.json',
|
|
30
|
+
dest: 'config.json',
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
name: 'vocab.txt',
|
|
34
|
+
url: 'https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/vocab.txt',
|
|
35
|
+
dest: 'vocab.txt',
|
|
36
|
+
},
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Follow redirects and download file
|
|
41
|
+
*/
|
|
42
|
+
function downloadFile(url, destPath, maxRedirects = 5) {
|
|
43
|
+
return new Promise((resolve, reject) => {
|
|
44
|
+
if (maxRedirects === 0) {
|
|
45
|
+
reject(new Error('Too many redirects'))
|
|
46
|
+
return
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const doRequest = (reqUrl) => {
|
|
50
|
+
const parsedUrl = new URL(reqUrl)
|
|
51
|
+
const options = {
|
|
52
|
+
hostname: parsedUrl.hostname,
|
|
53
|
+
path: parsedUrl.pathname + parsedUrl.search,
|
|
54
|
+
headers: {
|
|
55
|
+
'User-Agent': 'Brainy-Model-Downloader/1.0',
|
|
56
|
+
},
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
https.get(options, (response) => {
|
|
60
|
+
// Handle redirects
|
|
61
|
+
if (response.statusCode >= 300 && response.statusCode < 400 && response.headers.location) {
|
|
62
|
+
response.resume() // Consume response data to free memory
|
|
63
|
+
const redirectUrl = response.headers.location.startsWith('http')
|
|
64
|
+
? response.headers.location
|
|
65
|
+
: new URL(response.headers.location, reqUrl).toString()
|
|
66
|
+
console.log(` ↳ Redirecting to: ${redirectUrl.slice(0, 80)}...`)
|
|
67
|
+
downloadFile(redirectUrl, destPath, maxRedirects - 1)
|
|
68
|
+
.then(resolve)
|
|
69
|
+
.catch(reject)
|
|
70
|
+
return
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (response.statusCode !== 200) {
|
|
74
|
+
reject(new Error(`HTTP ${response.statusCode}`))
|
|
75
|
+
return
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const fileStream = fs.createWriteStream(destPath)
|
|
79
|
+
let downloadedBytes = 0
|
|
80
|
+
const totalBytes = parseInt(response.headers['content-length'] || '0', 10)
|
|
81
|
+
|
|
82
|
+
response.on('data', (chunk) => {
|
|
83
|
+
downloadedBytes += chunk.length
|
|
84
|
+
if (totalBytes > 0) {
|
|
85
|
+
const percent = Math.round((downloadedBytes / totalBytes) * 100)
|
|
86
|
+
process.stdout.write(`\r Progress: ${percent}% (${Math.round(downloadedBytes / 1024 / 1024)}MB)`)
|
|
87
|
+
}
|
|
88
|
+
})
|
|
89
|
+
|
|
90
|
+
response.pipe(fileStream)
|
|
91
|
+
|
|
92
|
+
fileStream.on('finish', () => {
|
|
93
|
+
fileStream.close()
|
|
94
|
+
console.log(`\n ✅ Downloaded: ${path.basename(destPath)} (${Math.round(downloadedBytes / 1024 / 1024)}MB)`)
|
|
95
|
+
resolve()
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
fileStream.on('error', (err) => {
|
|
99
|
+
fs.unlink(destPath, () => {}) // Delete partial file
|
|
100
|
+
reject(err)
|
|
101
|
+
})
|
|
102
|
+
}).on('error', reject)
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
doRequest(url)
|
|
106
|
+
})
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Convert vocab.txt to vocab.json
|
|
111
|
+
*/
|
|
112
|
+
function convertVocabToJson(vocabTxtPath, vocabJsonPath) {
|
|
113
|
+
console.log('📝 Converting vocab.txt to vocab.json...')
|
|
114
|
+
const content = fs.readFileSync(vocabTxtPath, 'utf-8')
|
|
115
|
+
const lines = content.split('\n').filter(line => line.trim())
|
|
116
|
+
|
|
117
|
+
const vocab = {}
|
|
118
|
+
for (let i = 0; i < lines.length; i++) {
|
|
119
|
+
vocab[lines[i]] = i
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
fs.writeFileSync(vocabJsonPath, JSON.stringify(vocab))
|
|
123
|
+
console.log(` ✅ Created vocab.json with ${Object.keys(vocab).length} tokens`)
|
|
124
|
+
|
|
125
|
+
// Remove vocab.txt since we have vocab.json
|
|
126
|
+
fs.unlinkSync(vocabTxtPath)
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
async function main() {
|
|
130
|
+
console.log('🔽 Downloading all-MiniLM-L6-v2 Q8 model assets...\n')
|
|
131
|
+
|
|
132
|
+
// Create model directory
|
|
133
|
+
fs.mkdirSync(MODEL_DIR, { recursive: true })
|
|
134
|
+
console.log(`📁 Model directory: ${MODEL_DIR}\n`)
|
|
135
|
+
|
|
136
|
+
// Download each file
|
|
137
|
+
for (const file of FILES) {
|
|
138
|
+
const destPath = path.join(MODEL_DIR, file.dest)
|
|
139
|
+
|
|
140
|
+
// Check if already exists
|
|
141
|
+
if (fs.existsSync(destPath)) {
|
|
142
|
+
const stats = fs.statSync(destPath)
|
|
143
|
+
if (stats.size > 0) {
|
|
144
|
+
console.log(`⏭️ Skipping ${file.name} (already exists)`)
|
|
145
|
+
continue
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
console.log(`📥 Downloading ${file.name}...`)
|
|
150
|
+
try {
|
|
151
|
+
await downloadFile(file.url, destPath)
|
|
152
|
+
} catch (error) {
|
|
153
|
+
console.error(` ❌ Failed to download ${file.name}: ${error.message}`)
|
|
154
|
+
process.exit(1)
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Convert vocab.txt to vocab.json
|
|
159
|
+
const vocabTxtPath = path.join(MODEL_DIR, 'vocab.txt')
|
|
160
|
+
const vocabJsonPath = path.join(MODEL_DIR, 'vocab.json')
|
|
161
|
+
if (fs.existsSync(vocabTxtPath) && !fs.existsSync(vocabJsonPath)) {
|
|
162
|
+
convertVocabToJson(vocabTxtPath, vocabJsonPath)
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
console.log('\n✅ All model assets downloaded successfully!')
|
|
166
|
+
console.log('\nModel files:')
|
|
167
|
+
const files = fs.readdirSync(MODEL_DIR)
|
|
168
|
+
for (const file of files) {
|
|
169
|
+
const stats = fs.statSync(path.join(MODEL_DIR, file))
|
|
170
|
+
const sizeMB = (stats.size / 1024 / 1024).toFixed(2)
|
|
171
|
+
console.log(` - ${file}: ${sizeMB}MB`)
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
main().catch(console.error)
|