@soulcraft/brainy 6.5.0 → 6.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/assets/models/all-MiniLM-L6-v2-q8/config.json +25 -0
  2. package/assets/models/all-MiniLM-L6-v2-q8/model.onnx +0 -0
  3. package/assets/models/all-MiniLM-L6-v2-q8/tokenizer.json +30686 -0
  4. package/assets/models/all-MiniLM-L6-v2-q8/vocab.json +1 -0
  5. package/dist/brainy.js +0 -6
  6. package/dist/config/index.d.ts +1 -3
  7. package/dist/config/index.js +2 -4
  8. package/dist/config/modelAutoConfig.d.ts +10 -17
  9. package/dist/config/modelAutoConfig.js +15 -88
  10. package/dist/config/sharedConfigManager.d.ts +1 -2
  11. package/dist/config/zeroConfig.d.ts +2 -13
  12. package/dist/config/zeroConfig.js +7 -15
  13. package/dist/critical/model-guardian.d.ts +5 -22
  14. package/dist/critical/model-guardian.js +38 -210
  15. package/dist/embeddings/EmbeddingManager.d.ts +7 -17
  16. package/dist/embeddings/EmbeddingManager.js +28 -136
  17. package/dist/embeddings/wasm/AssetLoader.d.ts +67 -0
  18. package/dist/embeddings/wasm/AssetLoader.js +238 -0
  19. package/dist/embeddings/wasm/EmbeddingPostProcessor.d.ts +60 -0
  20. package/dist/embeddings/wasm/EmbeddingPostProcessor.js +123 -0
  21. package/dist/embeddings/wasm/ONNXInferenceEngine.d.ts +55 -0
  22. package/dist/embeddings/wasm/ONNXInferenceEngine.js +154 -0
  23. package/dist/embeddings/wasm/WASMEmbeddingEngine.d.ts +82 -0
  24. package/dist/embeddings/wasm/WASMEmbeddingEngine.js +231 -0
  25. package/dist/embeddings/wasm/WordPieceTokenizer.d.ts +71 -0
  26. package/dist/embeddings/wasm/WordPieceTokenizer.js +264 -0
  27. package/dist/embeddings/wasm/index.d.ts +13 -0
  28. package/dist/embeddings/wasm/index.js +15 -0
  29. package/dist/embeddings/wasm/types.d.ts +114 -0
  30. package/dist/embeddings/wasm/types.js +25 -0
  31. package/dist/setup.d.ts +11 -11
  32. package/dist/setup.js +17 -31
  33. package/dist/types/brainy.types.d.ts +0 -5
  34. package/dist/utils/embedding.d.ts +45 -62
  35. package/dist/utils/embedding.js +61 -440
  36. package/package.json +10 -3
  37. package/scripts/download-model.cjs +175 -0
@@ -0,0 +1,175 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Download Model Assets
4
+ *
5
+ * Downloads the all-MiniLM-L6-v2 Q8 model from Hugging Face.
6
+ * Run: node scripts/download-model.cjs
7
+ */
8
+
9
+ const fs = require('node:fs')
10
+ const path = require('node:path')
11
+ const https = require('node:https')
12
+
13
+ const MODEL_DIR = path.join(__dirname, '..', 'assets', 'models', 'all-MiniLM-L6-v2-q8')
14
+ const BASE_URL = 'https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/onnx'
15
+
16
+ const FILES = [
17
+ {
18
+ name: 'model_quantized.onnx',
19
+ url: `${BASE_URL}/model_quantized.onnx`,
20
+ dest: 'model.onnx',
21
+ },
22
+ {
23
+ name: 'tokenizer.json',
24
+ url: 'https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/tokenizer.json',
25
+ dest: 'tokenizer.json',
26
+ },
27
+ {
28
+ name: 'config.json',
29
+ url: 'https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/config.json',
30
+ dest: 'config.json',
31
+ },
32
+ {
33
+ name: 'vocab.txt',
34
+ url: 'https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/vocab.txt',
35
+ dest: 'vocab.txt',
36
+ },
37
+ ]
38
+
39
+ /**
40
+ * Follow redirects and download file
41
+ */
42
+ function downloadFile(url, destPath, maxRedirects = 5) {
43
+ return new Promise((resolve, reject) => {
44
+ if (maxRedirects === 0) {
45
+ reject(new Error('Too many redirects'))
46
+ return
47
+ }
48
+
49
+ const doRequest = (reqUrl) => {
50
+ const parsedUrl = new URL(reqUrl)
51
+ const options = {
52
+ hostname: parsedUrl.hostname,
53
+ path: parsedUrl.pathname + parsedUrl.search,
54
+ headers: {
55
+ 'User-Agent': 'Brainy-Model-Downloader/1.0',
56
+ },
57
+ }
58
+
59
+ https.get(options, (response) => {
60
+ // Handle redirects
61
+ if (response.statusCode >= 300 && response.statusCode < 400 && response.headers.location) {
62
+ response.resume() // Consume response data to free memory
63
+ const redirectUrl = response.headers.location.startsWith('http')
64
+ ? response.headers.location
65
+ : new URL(response.headers.location, reqUrl).toString()
66
+ console.log(` ↳ Redirecting to: ${redirectUrl.slice(0, 80)}...`)
67
+ downloadFile(redirectUrl, destPath, maxRedirects - 1)
68
+ .then(resolve)
69
+ .catch(reject)
70
+ return
71
+ }
72
+
73
+ if (response.statusCode !== 200) {
74
+ reject(new Error(`HTTP ${response.statusCode}`))
75
+ return
76
+ }
77
+
78
+ const fileStream = fs.createWriteStream(destPath)
79
+ let downloadedBytes = 0
80
+ const totalBytes = parseInt(response.headers['content-length'] || '0', 10)
81
+
82
+ response.on('data', (chunk) => {
83
+ downloadedBytes += chunk.length
84
+ if (totalBytes > 0) {
85
+ const percent = Math.round((downloadedBytes / totalBytes) * 100)
86
+ process.stdout.write(`\r Progress: ${percent}% (${Math.round(downloadedBytes / 1024 / 1024)}MB)`)
87
+ }
88
+ })
89
+
90
+ response.pipe(fileStream)
91
+
92
+ fileStream.on('finish', () => {
93
+ fileStream.close()
94
+ console.log(`\n ✅ Downloaded: ${path.basename(destPath)} (${Math.round(downloadedBytes / 1024 / 1024)}MB)`)
95
+ resolve()
96
+ })
97
+
98
+ fileStream.on('error', (err) => {
99
+ fs.unlink(destPath, () => {}) // Delete partial file
100
+ reject(err)
101
+ })
102
+ }).on('error', reject)
103
+ }
104
+
105
+ doRequest(url)
106
+ })
107
+ }
108
+
109
+ /**
110
+ * Convert vocab.txt to vocab.json
111
+ */
112
+ function convertVocabToJson(vocabTxtPath, vocabJsonPath) {
113
+ console.log('📝 Converting vocab.txt to vocab.json...')
114
+ const content = fs.readFileSync(vocabTxtPath, 'utf-8')
115
+ const lines = content.split('\n').filter(line => line.trim())
116
+
117
+ const vocab = {}
118
+ for (let i = 0; i < lines.length; i++) {
119
+ vocab[lines[i]] = i
120
+ }
121
+
122
+ fs.writeFileSync(vocabJsonPath, JSON.stringify(vocab))
123
+ console.log(` ✅ Created vocab.json with ${Object.keys(vocab).length} tokens`)
124
+
125
+ // Remove vocab.txt since we have vocab.json
126
+ fs.unlinkSync(vocabTxtPath)
127
+ }
128
+
129
+ async function main() {
130
+ console.log('🔽 Downloading all-MiniLM-L6-v2 Q8 model assets...\n')
131
+
132
+ // Create model directory
133
+ fs.mkdirSync(MODEL_DIR, { recursive: true })
134
+ console.log(`📁 Model directory: ${MODEL_DIR}\n`)
135
+
136
+ // Download each file
137
+ for (const file of FILES) {
138
+ const destPath = path.join(MODEL_DIR, file.dest)
139
+
140
+ // Check if already exists
141
+ if (fs.existsSync(destPath)) {
142
+ const stats = fs.statSync(destPath)
143
+ if (stats.size > 0) {
144
+ console.log(`⏭️ Skipping ${file.name} (already exists)`)
145
+ continue
146
+ }
147
+ }
148
+
149
+ console.log(`📥 Downloading ${file.name}...`)
150
+ try {
151
+ await downloadFile(file.url, destPath)
152
+ } catch (error) {
153
+ console.error(` ❌ Failed to download ${file.name}: ${error.message}`)
154
+ process.exit(1)
155
+ }
156
+ }
157
+
158
+ // Convert vocab.txt to vocab.json
159
+ const vocabTxtPath = path.join(MODEL_DIR, 'vocab.txt')
160
+ const vocabJsonPath = path.join(MODEL_DIR, 'vocab.json')
161
+ if (fs.existsSync(vocabTxtPath) && !fs.existsSync(vocabJsonPath)) {
162
+ convertVocabToJson(vocabTxtPath, vocabJsonPath)
163
+ }
164
+
165
+ console.log('\n✅ All model assets downloaded successfully!')
166
+ console.log('\nModel files:')
167
+ const files = fs.readdirSync(MODEL_DIR)
168
+ for (const file of files) {
169
+ const stats = fs.statSync(path.join(MODEL_DIR, file))
170
+ const sizeMB = (stats.size / 1024 / 1024).toFixed(2)
171
+ console.log(` - ${file}: ${sizeMB}MB`)
172
+ }
173
+ }
174
+
175
+ main().catch(console.error)