@soulcraft/brainy 0.45.0 → 0.47.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/OFFLINE_MODELS.md +56 -0
- package/README.md +45 -0
- package/dist/brainyData.js +7 -9
- package/dist/brainyData.js.map +1 -1
- package/dist/demo.js +2 -2
- package/dist/demo.js.map +1 -1
- package/dist/index.d.ts +2 -3
- package/dist/index.js +3 -9
- package/dist/index.js.map +1 -1
- package/dist/setup.d.ts +3 -3
- package/dist/setup.js +6 -6
- package/dist/setup.js.map +1 -1
- package/dist/utils/distance.js +63 -136
- package/dist/utils/distance.js.map +1 -1
- package/dist/utils/embedding.d.ts +48 -84
- package/dist/utils/embedding.js +182 -598
- package/dist/utils/embedding.js.map +1 -1
- package/dist/utils/robustModelLoader.d.ts +4 -0
- package/dist/utils/robustModelLoader.js +105 -53
- package/dist/utils/robustModelLoader.js.map +1 -1
- package/dist/utils/textEncoding.d.ts +2 -3
- package/dist/utils/textEncoding.js +31 -274
- package/dist/utils/textEncoding.js.map +1 -1
- package/package.json +7 -16
- package/scripts/download-models.cjs +190 -0
|
@@ -1,267 +1,33 @@
|
|
|
1
1
|
import { isNode } from './environment.js';
|
|
2
|
+
// Simplified TextEncoder/TextDecoder utilities for Node.js compatibility
|
|
3
|
+
// No longer needs complex TensorFlow.js patches - only basic TextEncoder/TextDecoder
|
|
2
4
|
/**
|
|
3
5
|
* Flag to track if the patch has been applied
|
|
4
6
|
*/
|
|
5
7
|
let patchApplied = false;
|
|
6
8
|
/**
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
+
* Apply TextEncoder/TextDecoder patches for Node.js compatibility
|
|
10
|
+
* Simplified version for Transformers.js/ONNX Runtime
|
|
9
11
|
*/
|
|
10
|
-
|
|
12
|
+
export async function applyTensorFlowPatch() {
|
|
13
|
+
// Apply patches for all non-browser environments that might need TextEncoder/TextDecoder
|
|
14
|
+
const isBrowserEnv = typeof window !== 'undefined' && typeof document !== 'undefined';
|
|
15
|
+
if (isBrowserEnv || patchApplied) {
|
|
16
|
+
return; // Browser environments don't need these patches, and don't patch twice
|
|
17
|
+
}
|
|
18
|
+
if (!isNode()) {
|
|
19
|
+
return; // Only patch Node.js environments
|
|
20
|
+
}
|
|
11
21
|
try {
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
globalThis.TextEncoder = TextEncoder;
|
|
15
|
-
}
|
|
16
|
-
if (typeof globalThis.TextDecoder === 'undefined') {
|
|
17
|
-
globalThis.TextDecoder = TextDecoder;
|
|
18
|
-
}
|
|
19
|
-
// Patch global objects to handle the TensorFlow.js constructor issue
|
|
20
|
-
// This is needed because TF accesses TextEncoder/TextDecoder as constructors via this.util
|
|
21
|
-
if (typeof global !== 'undefined') {
|
|
22
|
-
if (!global.TextEncoder) {
|
|
23
|
-
global.TextEncoder = TextEncoder;
|
|
24
|
-
}
|
|
25
|
-
if (!global.TextDecoder) {
|
|
26
|
-
global.TextDecoder = TextDecoder;
|
|
27
|
-
}
|
|
28
|
-
// Also set the special global constructors that TensorFlow can use safely
|
|
29
|
-
global.__TextEncoder__ = TextEncoder;
|
|
30
|
-
global.__TextDecoder__ = TextDecoder;
|
|
31
|
-
}
|
|
32
|
-
// CRITICAL FIX: Create a custom util object that TensorFlow.js can use
|
|
33
|
-
// We'll make this available globally so TensorFlow.js can find it
|
|
34
|
-
const customUtil = {
|
|
35
|
-
TextEncoder: TextEncoder,
|
|
36
|
-
TextDecoder: TextDecoder,
|
|
37
|
-
types: {
|
|
38
|
-
isFloat32Array: (arr) => arr instanceof Float32Array,
|
|
39
|
-
isInt32Array: (arr) => arr instanceof Int32Array,
|
|
40
|
-
isUint8Array: (arr) => arr instanceof Uint8Array,
|
|
41
|
-
isUint8ClampedArray: (arr) => arr instanceof Uint8ClampedArray
|
|
42
|
-
}
|
|
43
|
-
};
|
|
44
|
-
// Make the custom util available globally
|
|
45
|
-
if (typeof global !== 'undefined') {
|
|
46
|
-
global.__brainy_util__ = customUtil;
|
|
47
|
-
}
|
|
48
|
-
// Try to patch the global require cache if possible
|
|
49
|
-
if (typeof global !== 'undefined' &&
|
|
50
|
-
global.require &&
|
|
51
|
-
global.require.cache) {
|
|
52
|
-
// Find the util module in the cache and patch it
|
|
53
|
-
for (const key in global.require.cache) {
|
|
54
|
-
if (key.endsWith('/util.js') || key === 'util') {
|
|
55
|
-
const utilModule = global.require.cache[key];
|
|
56
|
-
if (utilModule && utilModule.exports) {
|
|
57
|
-
Object.assign(utilModule.exports, customUtil);
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
// CRITICAL: Patch the Node.js util module directly
|
|
63
|
-
try {
|
|
64
|
-
const util = require('util');
|
|
65
|
-
// Ensure TextEncoder and TextDecoder are available as constructors
|
|
66
|
-
util.TextEncoder = TextEncoder;
|
|
67
|
-
util.TextDecoder = TextDecoder;
|
|
68
|
-
}
|
|
69
|
-
catch (error) {
|
|
70
|
-
// Ignore if util module is not available
|
|
71
|
-
}
|
|
72
|
-
// CRITICAL: Patch Float32Array to handle buffer alignment issues
|
|
73
|
-
// This fixes the "byte length of Float32Array should be a multiple of 4" error
|
|
74
|
-
// Get the appropriate global object for the current environment
|
|
22
|
+
console.log('Brainy: Applying TextEncoder/TextDecoder patch for Node.js');
|
|
23
|
+
// Get the appropriate global object
|
|
75
24
|
const globalObj = (() => {
|
|
76
25
|
if (typeof globalThis !== 'undefined')
|
|
77
26
|
return globalThis;
|
|
78
27
|
if (typeof global !== 'undefined')
|
|
79
28
|
return global;
|
|
80
|
-
|
|
81
|
-
return self;
|
|
82
|
-
if (typeof window !== 'undefined')
|
|
83
|
-
return window;
|
|
84
|
-
return {}; // Fallback for unknown environments
|
|
29
|
+
return {};
|
|
85
30
|
})();
|
|
86
|
-
if (globalObj && globalObj.Float32Array) {
|
|
87
|
-
const originalFloat32Array = globalObj.Float32Array;
|
|
88
|
-
// Create a patched Float32Array class that handles alignment issues
|
|
89
|
-
const PatchedFloat32Array = class extends originalFloat32Array {
|
|
90
|
-
constructor(arg, byteOffset, length) {
|
|
91
|
-
if (arg instanceof ArrayBuffer) {
|
|
92
|
-
// Ensure buffer is properly aligned for Float32Array (multiple of 4 bytes)
|
|
93
|
-
const alignedByteOffset = byteOffset || 0;
|
|
94
|
-
const alignedLength = length !== undefined
|
|
95
|
-
? length
|
|
96
|
-
: (arg.byteLength - alignedByteOffset) / 4;
|
|
97
|
-
// Check if the buffer slice is properly aligned
|
|
98
|
-
if ((arg.byteLength - alignedByteOffset) % 4 !== 0 &&
|
|
99
|
-
length === undefined) {
|
|
100
|
-
try {
|
|
101
|
-
// Create a new aligned buffer if the original isn't properly aligned
|
|
102
|
-
const alignedByteLength = Math.floor((arg.byteLength - alignedByteOffset) / 4) * 4;
|
|
103
|
-
const alignedBuffer = new ArrayBuffer(alignedByteLength);
|
|
104
|
-
const sourceView = new Uint8Array(arg, alignedByteOffset, alignedByteLength);
|
|
105
|
-
const targetView = new Uint8Array(alignedBuffer);
|
|
106
|
-
targetView.set(sourceView);
|
|
107
|
-
super(alignedBuffer);
|
|
108
|
-
}
|
|
109
|
-
catch (error) {
|
|
110
|
-
// If alignment fails, try the original approach
|
|
111
|
-
console.warn('Float32Array alignment failed, using original constructor:', error);
|
|
112
|
-
super(arg, alignedByteOffset, alignedLength);
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
else {
|
|
116
|
-
super(arg, alignedByteOffset, alignedLength);
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
else {
|
|
120
|
-
super(arg, byteOffset, length);
|
|
121
|
-
}
|
|
122
|
-
}
|
|
123
|
-
};
|
|
124
|
-
// Apply the patch to the global object
|
|
125
|
-
try {
|
|
126
|
-
// Preserve static methods and properties
|
|
127
|
-
Object.setPrototypeOf(PatchedFloat32Array, originalFloat32Array);
|
|
128
|
-
Object.defineProperty(PatchedFloat32Array, 'name', {
|
|
129
|
-
value: 'Float32Array'
|
|
130
|
-
});
|
|
131
|
-
Object.defineProperty(PatchedFloat32Array, 'BYTES_PER_ELEMENT', {
|
|
132
|
-
value: 4
|
|
133
|
-
});
|
|
134
|
-
// Replace the global Float32Array with our patched version
|
|
135
|
-
globalObj.Float32Array = PatchedFloat32Array;
|
|
136
|
-
}
|
|
137
|
-
catch (error) {
|
|
138
|
-
console.warn('Failed to patch Float32Array:', error);
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
// CRITICAL: Patch any empty util shims that bundlers might create
|
|
142
|
-
// This handles cases where bundlers provide empty shims for Node.js modules
|
|
143
|
-
if (typeof global !== 'undefined') {
|
|
144
|
-
// Look for common patterns of util shims in bundled code
|
|
145
|
-
const checkAndPatchUtilShim = (obj) => {
|
|
146
|
-
if (obj && typeof obj === 'object' && !obj.TextEncoder) {
|
|
147
|
-
obj.TextEncoder = TextEncoder;
|
|
148
|
-
obj.TextDecoder = TextDecoder;
|
|
149
|
-
obj.types = obj.types || {
|
|
150
|
-
isFloat32Array: (arr) => arr instanceof Float32Array,
|
|
151
|
-
isInt32Array: (arr) => arr instanceof Int32Array,
|
|
152
|
-
isUint8Array: (arr) => arr instanceof Uint8Array,
|
|
153
|
-
isUint8ClampedArray: (arr) => arr instanceof Uint8ClampedArray
|
|
154
|
-
};
|
|
155
|
-
}
|
|
156
|
-
};
|
|
157
|
-
// Patch any existing util-like objects in global scope
|
|
158
|
-
if (global._utilShim) {
|
|
159
|
-
checkAndPatchUtilShim(global._utilShim);
|
|
160
|
-
}
|
|
161
|
-
// CRITICAL: Patch the bundled util shim directly
|
|
162
|
-
// In bundled code, there's often a _utilShim object that needs patching
|
|
163
|
-
if (typeof globalThis !== 'undefined' &&
|
|
164
|
-
globalThis._utilShim) {
|
|
165
|
-
checkAndPatchUtilShim(globalThis._utilShim);
|
|
166
|
-
}
|
|
167
|
-
// CRITICAL: Create and patch a global _utilShim if it doesn't exist
|
|
168
|
-
// This ensures the bundled code will find the patched version
|
|
169
|
-
if (!global._utilShim) {
|
|
170
|
-
global._utilShim = {
|
|
171
|
-
TextEncoder: TextEncoder,
|
|
172
|
-
TextDecoder: TextDecoder,
|
|
173
|
-
types: {
|
|
174
|
-
isFloat32Array: (arr) => arr instanceof Float32Array,
|
|
175
|
-
isInt32Array: (arr) => arr instanceof Int32Array,
|
|
176
|
-
isUint8Array: (arr) => arr instanceof Uint8Array,
|
|
177
|
-
isUint8ClampedArray: (arr) => arr instanceof Uint8ClampedArray
|
|
178
|
-
}
|
|
179
|
-
};
|
|
180
|
-
}
|
|
181
|
-
else {
|
|
182
|
-
checkAndPatchUtilShim(global._utilShim);
|
|
183
|
-
}
|
|
184
|
-
// Also ensure it's available on globalThis
|
|
185
|
-
if (typeof globalThis !== 'undefined' &&
|
|
186
|
-
!globalThis._utilShim) {
|
|
187
|
-
;
|
|
188
|
-
globalThis._utilShim = global._utilShim;
|
|
189
|
-
}
|
|
190
|
-
// Set up a property descriptor to catch util shim assignments
|
|
191
|
-
try {
|
|
192
|
-
Object.defineProperty(global, '_utilShim', {
|
|
193
|
-
get() {
|
|
194
|
-
return this.__utilShim || {};
|
|
195
|
-
},
|
|
196
|
-
set(value) {
|
|
197
|
-
checkAndPatchUtilShim(value);
|
|
198
|
-
this.__utilShim = value;
|
|
199
|
-
},
|
|
200
|
-
configurable: true
|
|
201
|
-
});
|
|
202
|
-
}
|
|
203
|
-
catch (e) {
|
|
204
|
-
// Ignore if property can't be defined
|
|
205
|
-
}
|
|
206
|
-
// Also set up property descriptor on globalThis
|
|
207
|
-
try {
|
|
208
|
-
Object.defineProperty(globalThis, '_utilShim', {
|
|
209
|
-
get() {
|
|
210
|
-
return this.__utilShim || {};
|
|
211
|
-
},
|
|
212
|
-
set(value) {
|
|
213
|
-
checkAndPatchUtilShim(value);
|
|
214
|
-
this.__utilShim = value;
|
|
215
|
-
},
|
|
216
|
-
configurable: true
|
|
217
|
-
});
|
|
218
|
-
}
|
|
219
|
-
catch (e) {
|
|
220
|
-
// Ignore if property can't be defined
|
|
221
|
-
}
|
|
222
|
-
}
|
|
223
|
-
console.log('Brainy: Successfully patched TensorFlow.js PlatformNode at module load time');
|
|
224
|
-
patchApplied = true;
|
|
225
|
-
}
|
|
226
|
-
catch (error) {
|
|
227
|
-
console.warn('Brainy: Failed to apply early TensorFlow.js platform patch:', error);
|
|
228
|
-
}
|
|
229
|
-
}
|
|
230
|
-
/**
|
|
231
|
-
* Apply the TensorFlow.js platform patch if it hasn't been applied already
|
|
232
|
-
* This is a safety measure in case the module-level patch didn't run
|
|
233
|
-
* Now works across all environments: browser, Node.js, and serverless/server
|
|
234
|
-
*/
|
|
235
|
-
export async function applyTensorFlowPatch() {
|
|
236
|
-
// Apply patches for all non-browser environments that might need TensorFlow.js compatibility
|
|
237
|
-
// This includes Node.js, serverless environments, and other server environments
|
|
238
|
-
const isBrowserEnv = typeof window !== 'undefined' && typeof document !== 'undefined';
|
|
239
|
-
if (isBrowserEnv) {
|
|
240
|
-
return; // Browser environments don't need these patches
|
|
241
|
-
}
|
|
242
|
-
// Get the appropriate global object for the current environment
|
|
243
|
-
const globalObj = (() => {
|
|
244
|
-
if (typeof globalThis !== 'undefined')
|
|
245
|
-
return globalThis;
|
|
246
|
-
if (typeof global !== 'undefined')
|
|
247
|
-
return global;
|
|
248
|
-
if (typeof self !== 'undefined')
|
|
249
|
-
return self;
|
|
250
|
-
return {}; // Fallback for unknown environments
|
|
251
|
-
})();
|
|
252
|
-
// Check if the critical globals exist, not just the flag
|
|
253
|
-
// This allows re-patching if globals have been deleted
|
|
254
|
-
const needsPatch = !patchApplied ||
|
|
255
|
-
typeof globalObj.__TextEncoder__ === 'undefined' ||
|
|
256
|
-
typeof globalObj.__TextDecoder__ === 'undefined';
|
|
257
|
-
if (!needsPatch) {
|
|
258
|
-
return;
|
|
259
|
-
}
|
|
260
|
-
try {
|
|
261
|
-
console.log('Brainy: Applying TensorFlow.js platform patch via function call');
|
|
262
|
-
// CRITICAL FIX: Patch the global environment to ensure TextEncoder/TextDecoder are available
|
|
263
|
-
// This approach works by ensuring the global constructors are available before TensorFlow.js loads
|
|
264
|
-
// Now works across all environments: Node.js, serverless, and other server environments
|
|
265
31
|
// Make sure TextEncoder and TextDecoder are available globally
|
|
266
32
|
if (!globalObj.TextEncoder) {
|
|
267
33
|
globalObj.TextEncoder = TextEncoder;
|
|
@@ -269,31 +35,20 @@ export async function applyTensorFlowPatch() {
|
|
|
269
35
|
if (!globalObj.TextDecoder) {
|
|
270
36
|
globalObj.TextDecoder = TextDecoder;
|
|
271
37
|
}
|
|
272
|
-
// Also set the
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
if (!process.versions.node) {
|
|
280
|
-
process.versions.node = process.version;
|
|
38
|
+
// Also set them on the global object for older code
|
|
39
|
+
if (typeof global !== 'undefined') {
|
|
40
|
+
if (!global.TextEncoder) {
|
|
41
|
+
global.TextEncoder = TextEncoder;
|
|
42
|
+
}
|
|
43
|
+
if (!global.TextDecoder) {
|
|
44
|
+
global.TextDecoder = TextDecoder;
|
|
281
45
|
}
|
|
282
|
-
}
|
|
283
|
-
// CRITICAL: Patch the Node.js util module directly
|
|
284
|
-
try {
|
|
285
|
-
const util = await import('util');
|
|
286
|
-
// Ensure TextEncoder and TextDecoder are available as constructors
|
|
287
|
-
util.TextEncoder = TextEncoder;
|
|
288
|
-
util.TextDecoder = TextDecoder;
|
|
289
|
-
}
|
|
290
|
-
catch (error) {
|
|
291
|
-
// Ignore if util module is not available
|
|
292
46
|
}
|
|
293
47
|
patchApplied = true;
|
|
48
|
+
console.log('Brainy: TextEncoder/TextDecoder patches applied successfully');
|
|
294
49
|
}
|
|
295
50
|
catch (error) {
|
|
296
|
-
console.warn('Brainy: Failed to apply
|
|
51
|
+
console.warn('Brainy: Failed to apply TextEncoder/TextDecoder patch:', error);
|
|
297
52
|
}
|
|
298
53
|
}
|
|
299
54
|
export function getTextEncoder() {
|
|
@@ -302,8 +57,10 @@ export function getTextEncoder() {
|
|
|
302
57
|
export function getTextDecoder() {
|
|
303
58
|
return new TextDecoder();
|
|
304
59
|
}
|
|
305
|
-
// Apply patch immediately
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
60
|
+
// Apply patch immediately if in Node.js
|
|
61
|
+
if (isNode()) {
|
|
62
|
+
applyTensorFlowPatch().catch((error) => {
|
|
63
|
+
console.warn('Failed to apply TextEncoder/TextDecoder patch at module load:', error);
|
|
64
|
+
});
|
|
65
|
+
}
|
|
309
66
|
//# sourceMappingURL=textEncoding.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"textEncoding.js","sourceRoot":"","sources":["../../src/utils/textEncoding.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAA;
|
|
1
|
+
{"version":3,"file":"textEncoding.js","sourceRoot":"","sources":["../../src/utils/textEncoding.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAA;AAEzC,yEAAyE;AACzE,qFAAqF;AAErF;;GAEG;AACH,IAAI,YAAY,GAAG,KAAK,CAAA;AAExB;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB;IACxC,yFAAyF;IACzF,MAAM,YAAY,GAAG,OAAO,MAAM,KAAK,WAAW,IAAI,OAAO,QAAQ,KAAK,WAAW,CAAA;IACrF,IAAI,YAAY,IAAI,YAAY,EAAE,CAAC;QACjC,OAAM,CAAC,uEAAuE;IAChF,CAAC;IAED,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC;QACd,OAAM,CAAC,kCAAkC;IAC3C,CAAC;IAED,IAAI,CAAC;QACH,OAAO,CAAC,GAAG,CAAC,4DAA4D,CAAC,CAAA;QAEzE,oCAAoC;QACpC,MAAM,SAAS,GAAG,CAAC,GAAG,EAAE;YACtB,IAAI,OAAO,UAAU,KAAK,WAAW;gBAAE,OAAO,UAAU,CAAA;YACxD,IAAI,OAAO,MAAM,KAAK,WAAW;gBAAE,OAAO,MAAM,CAAA;YAChD,OAAO,EAAS,CAAA;QAClB,CAAC,CAAC,EAAE,CAAA;QAEJ,+DAA+D;QAC/D,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC;YAC3B,SAAS,CAAC,WAAW,GAAG,WAAW,CAAA;QACrC,CAAC;QACD,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC;YAC3B,SAAS,CAAC,WAAW,GAAG,WAAW,CAAA;QACrC,CAAC;QAED,oDAAoD;QACpD,IAAI,OAAO,MAAM,KAAK,WAAW,EAAE,CAAC;YAClC,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;gBACxB,MAAM,CAAC,WAAW,GAAG,WAAW,CAAA;YAClC,CAAC;YACD,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;gBACxB,MAAM,CAAC,WAAW,GAAG,WAAW,CAAA;YAClC,CAAC;QACH,CAAC;QAED,YAAY,GAAG,IAAI,CAAA;QACnB,OAAO,CAAC,GAAG,CAAC,8DAA8D,CAAC,CAAA;IAC7E,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,IAAI,CAAC,wDAAwD,EAAE,KAAK,CAAC,CAAA;IAC/E,CAAC;AACH,CAAC;AAED,MAAM,UAAU,cAAc;IAC5B,OAAO,IAAI,WAAW,EAAE,CAAA;AAC1B,CAAC;AAED,MAAM,UAAU,cAAc;IAC5B,OAAO,IAAI,WAAW,EAAE,CAAA;AAC1B,CAAC;AAED,wCAAwC;AACxC,IAAI,MAAM,EAAE,EAAE,CAAC;IACb,oBAAoB,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;QACrC,OAAO,CAAC,IAAI,CAAC,+DAA+D,EAAE,KAAK,CAAC,CAAA;IACtF,CAAC,CAAC,CAAA;AACJ,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.47.0",
|
|
4
4
|
"description": "A vector graph database using HNSW indexing with Origin Private File System storage",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|
|
@@ -92,7 +92,8 @@
|
|
|
92
92
|
"_workflow:minor": "node scripts/release-workflow.js minor",
|
|
93
93
|
"_workflow:major": "node scripts/release-workflow.js major",
|
|
94
94
|
"_workflow:dry-run": "npm run build && npm test && npm run _release:dry-run",
|
|
95
|
-
"_dry-run": "npm pack --dry-run"
|
|
95
|
+
"_dry-run": "npm pack --dry-run",
|
|
96
|
+
"download-models": "node scripts/download-models.cjs"
|
|
96
97
|
},
|
|
97
98
|
"keywords": [
|
|
98
99
|
"vector-database",
|
|
@@ -128,7 +129,9 @@
|
|
|
128
129
|
"!dist/framework.min.js.map",
|
|
129
130
|
"LICENSE",
|
|
130
131
|
"README.md",
|
|
131
|
-
"brainy.png"
|
|
132
|
+
"brainy.png",
|
|
133
|
+
"scripts/download-models.cjs",
|
|
134
|
+
"OFFLINE_MODELS.md"
|
|
132
135
|
],
|
|
133
136
|
"devDependencies": {
|
|
134
137
|
"@rollup/plugin-commonjs": "^25.0.7",
|
|
@@ -159,23 +162,11 @@
|
|
|
159
162
|
},
|
|
160
163
|
"dependencies": {
|
|
161
164
|
"@aws-sdk/client-s3": "^3.540.0",
|
|
162
|
-
"@
|
|
163
|
-
"@tensorflow/tfjs-backend-cpu": "^4.22.0",
|
|
164
|
-
"@tensorflow/tfjs-backend-webgl": "^4.22.0",
|
|
165
|
-
"@tensorflow/tfjs-converter": "^4.22.0",
|
|
166
|
-
"@tensorflow/tfjs-core": "^4.22.0",
|
|
165
|
+
"@huggingface/transformers": "^3.1.0",
|
|
167
166
|
"buffer": "^6.0.3",
|
|
168
167
|
"dotenv": "^16.4.5",
|
|
169
168
|
"uuid": "^9.0.1"
|
|
170
169
|
},
|
|
171
|
-
"peerDependencies": {
|
|
172
|
-
"@soulcraft/brainy-models": ">=0.7.0"
|
|
173
|
-
},
|
|
174
|
-
"peerDependenciesMeta": {
|
|
175
|
-
"@soulcraft/brainy-models": {
|
|
176
|
-
"optional": true
|
|
177
|
-
}
|
|
178
|
-
},
|
|
179
170
|
"prettier": {
|
|
180
171
|
"arrowParens": "always",
|
|
181
172
|
"bracketSameLine": true,
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Download and bundle models for offline usage
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
const fs = require('fs').promises
|
|
7
|
+
const path = require('path')
|
|
8
|
+
|
|
9
|
+
const MODEL_NAME = 'Xenova/all-MiniLM-L6-v2'
|
|
10
|
+
const OUTPUT_DIR = './models'
|
|
11
|
+
|
|
12
|
+
async function downloadModels() {
|
|
13
|
+
// Use dynamic import for ES modules in CommonJS
|
|
14
|
+
const { pipeline, env } = await import('@huggingface/transformers')
|
|
15
|
+
|
|
16
|
+
// Configure transformers.js to use local cache
|
|
17
|
+
env.cacheDir = './models-cache'
|
|
18
|
+
env.allowRemoteModels = true
|
|
19
|
+
try {
|
|
20
|
+
console.log('🔄 Downloading all-MiniLM-L6-v2 model for offline bundling...')
|
|
21
|
+
console.log(` Model: ${MODEL_NAME}`)
|
|
22
|
+
console.log(` Cache: ${env.cacheDir}`)
|
|
23
|
+
|
|
24
|
+
// Create output directory
|
|
25
|
+
await fs.mkdir(OUTPUT_DIR, { recursive: true })
|
|
26
|
+
|
|
27
|
+
// Load the model to force download
|
|
28
|
+
console.log('📥 Loading model pipeline...')
|
|
29
|
+
const extractor = await pipeline('feature-extraction', MODEL_NAME)
|
|
30
|
+
|
|
31
|
+
// Test the model to make sure it works
|
|
32
|
+
console.log('🧪 Testing model...')
|
|
33
|
+
const testResult = await extractor(['Hello world!'], {
|
|
34
|
+
pooling: 'mean',
|
|
35
|
+
normalize: true
|
|
36
|
+
})
|
|
37
|
+
|
|
38
|
+
console.log(`✅ Model test successful! Embedding dimensions: ${testResult.data.length}`)
|
|
39
|
+
|
|
40
|
+
// Copy ALL model files from cache to our models directory
|
|
41
|
+
console.log('📋 Copying ALL model files to bundle directory...')
|
|
42
|
+
|
|
43
|
+
const cacheDir = path.resolve(env.cacheDir)
|
|
44
|
+
const outputDir = path.resolve(OUTPUT_DIR)
|
|
45
|
+
|
|
46
|
+
console.log(` From: ${cacheDir}`)
|
|
47
|
+
console.log(` To: ${outputDir}`)
|
|
48
|
+
|
|
49
|
+
// Copy the entire cache directory structure to ensure we get ALL files
|
|
50
|
+
// including tokenizer.json, config.json, and all ONNX model files
|
|
51
|
+
const modelCacheDir = path.join(cacheDir, 'Xenova', 'all-MiniLM-L6-v2')
|
|
52
|
+
|
|
53
|
+
if (await dirExists(modelCacheDir)) {
|
|
54
|
+
const targetModelDir = path.join(outputDir, 'Xenova', 'all-MiniLM-L6-v2')
|
|
55
|
+
console.log(` Copying complete model: Xenova/all-MiniLM-L6-v2`)
|
|
56
|
+
await copyDirectory(modelCacheDir, targetModelDir)
|
|
57
|
+
} else {
|
|
58
|
+
throw new Error(`Model cache directory not found: ${modelCacheDir}`)
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
console.log('✅ Model bundling complete!')
|
|
62
|
+
console.log(` Total size: ${await calculateDirectorySize(outputDir)} MB`)
|
|
63
|
+
console.log(` Location: ${outputDir}`)
|
|
64
|
+
|
|
65
|
+
// Create a marker file
|
|
66
|
+
await fs.writeFile(
|
|
67
|
+
path.join(outputDir, '.brainy-models-bundled'),
|
|
68
|
+
JSON.stringify({
|
|
69
|
+
model: MODEL_NAME,
|
|
70
|
+
bundledAt: new Date().toISOString(),
|
|
71
|
+
version: '1.0.0'
|
|
72
|
+
}, null, 2)
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
} catch (error) {
|
|
76
|
+
console.error('❌ Error downloading models:', error)
|
|
77
|
+
process.exit(1)
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
async function findModelDirectories(baseDir, modelName) {
|
|
82
|
+
const dirs = []
|
|
83
|
+
|
|
84
|
+
try {
|
|
85
|
+
// Convert model name to expected directory structure
|
|
86
|
+
const modelPath = modelName.replace('/', '--')
|
|
87
|
+
|
|
88
|
+
async function searchDirectory(currentDir) {
|
|
89
|
+
try {
|
|
90
|
+
const entries = await fs.readdir(currentDir, { withFileTypes: true })
|
|
91
|
+
|
|
92
|
+
for (const entry of entries) {
|
|
93
|
+
if (entry.isDirectory()) {
|
|
94
|
+
const fullPath = path.join(currentDir, entry.name)
|
|
95
|
+
|
|
96
|
+
// Check if this directory contains model files
|
|
97
|
+
if (entry.name.includes(modelPath) || entry.name === 'onnx') {
|
|
98
|
+
const hasModelFiles = await containsModelFiles(fullPath)
|
|
99
|
+
if (hasModelFiles) {
|
|
100
|
+
dirs.push(fullPath)
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Recursively search subdirectories
|
|
105
|
+
await searchDirectory(fullPath)
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
} catch (error) {
|
|
109
|
+
// Ignore access errors
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
await searchDirectory(baseDir)
|
|
114
|
+
} catch (error) {
|
|
115
|
+
console.warn('Warning: Error searching for model directories:', error)
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
return dirs
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
async function containsModelFiles(dir) {
|
|
122
|
+
try {
|
|
123
|
+
const files = await fs.readdir(dir)
|
|
124
|
+
return files.some(file =>
|
|
125
|
+
file.endsWith('.onnx') ||
|
|
126
|
+
file.endsWith('.json') ||
|
|
127
|
+
file === 'config.json' ||
|
|
128
|
+
file === 'tokenizer.json'
|
|
129
|
+
)
|
|
130
|
+
} catch (error) {
|
|
131
|
+
return false
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
async function dirExists(dir) {
|
|
136
|
+
try {
|
|
137
|
+
const stats = await fs.stat(dir)
|
|
138
|
+
return stats.isDirectory()
|
|
139
|
+
} catch (error) {
|
|
140
|
+
return false
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
async function copyDirectory(src, dest) {
|
|
145
|
+
await fs.mkdir(dest, { recursive: true })
|
|
146
|
+
const entries = await fs.readdir(src, { withFileTypes: true })
|
|
147
|
+
|
|
148
|
+
for (const entry of entries) {
|
|
149
|
+
const srcPath = path.join(src, entry.name)
|
|
150
|
+
const destPath = path.join(dest, entry.name)
|
|
151
|
+
|
|
152
|
+
if (entry.isDirectory()) {
|
|
153
|
+
await copyDirectory(srcPath, destPath)
|
|
154
|
+
} else {
|
|
155
|
+
await fs.copyFile(srcPath, destPath)
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
async function calculateDirectorySize(dir) {
|
|
161
|
+
let size = 0
|
|
162
|
+
|
|
163
|
+
async function calculateSize(currentDir) {
|
|
164
|
+
try {
|
|
165
|
+
const entries = await fs.readdir(currentDir, { withFileTypes: true })
|
|
166
|
+
|
|
167
|
+
for (const entry of entries) {
|
|
168
|
+
const fullPath = path.join(currentDir, entry.name)
|
|
169
|
+
|
|
170
|
+
if (entry.isDirectory()) {
|
|
171
|
+
await calculateSize(fullPath)
|
|
172
|
+
} else {
|
|
173
|
+
const stats = await fs.stat(fullPath)
|
|
174
|
+
size += stats.size
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
} catch (error) {
|
|
178
|
+
// Ignore access errors
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
await calculateSize(dir)
|
|
183
|
+
return Math.round(size / (1024 * 1024))
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// Run the download
|
|
187
|
+
downloadModels().catch(error => {
|
|
188
|
+
console.error('Fatal error:', error)
|
|
189
|
+
process.exit(1)
|
|
190
|
+
})
|