@soulcraft/cortex 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +16 -0
- package/README.md +125 -0
- package/dist/graph/NativeGraphAdjacencyIndex.d.ts +92 -0
- package/dist/graph/NativeGraphAdjacencyIndex.js +671 -0
- package/dist/index.d.ts +22 -0
- package/dist/index.js +23 -0
- package/dist/license.d.ts +18 -0
- package/dist/license.js +172 -0
- package/dist/native/NativeEmbeddingEngine.d.ts +79 -0
- package/dist/native/NativeEmbeddingEngine.js +302 -0
- package/dist/native/NativeRoaringBitmap32.d.ts +114 -0
- package/dist/native/NativeRoaringBitmap32.js +221 -0
- package/dist/native/ffi.d.ts +20 -0
- package/dist/native/ffi.js +48 -0
- package/dist/native/index.d.ts +30 -0
- package/dist/native/index.js +58 -0
- package/dist/native/napi.d.ts +21 -0
- package/dist/native/napi.js +88 -0
- package/dist/native/types.d.ts +710 -0
- package/dist/native/types.js +16 -0
- package/dist/plugin.d.ts +22 -0
- package/dist/plugin.js +115 -0
- package/dist/storage/mmapFileSystemStorage.d.ts +24 -0
- package/dist/storage/mmapFileSystemStorage.js +73 -0
- package/dist/utils/NativeMetadataIndex.d.ts +185 -0
- package/dist/utils/NativeMetadataIndex.js +1274 -0
- package/dist/utils/nativeEntityIdMapper.d.ts +84 -0
- package/dist/utils/nativeEntityIdMapper.js +134 -0
- package/native/brainy-native.darwin-arm64.node +0 -0
- package/native/brainy-native.darwin-x64.node +0 -0
- package/native/brainy-native.linux-arm64-gnu.node +0 -0
- package/native/brainy-native.linux-x64-gnu.node +0 -0
- package/native/brainy-native.win32-x64-msvc.node +0 -0
- package/native/index.d.ts +1068 -0
- package/package.json +66 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @soulcraft/cortex — Native Rust acceleration for Brainy
|
|
3
|
+
*
|
|
4
|
+
* When installed alongside @soulcraft/brainy, this package automatically
|
|
5
|
+
* provides native Rust implementations for:
|
|
6
|
+
* - SIMD distance calculations (cosine, euclidean, manhattan, dot product)
|
|
7
|
+
* - Metadata index (full query/mutation engine in Rust)
|
|
8
|
+
* - Graph adjacency index (4 LSM-trees + verb tracking)
|
|
9
|
+
* - Entity ID mapper (UUID ↔ integer bidirectional mapping)
|
|
10
|
+
* - Roaring bitmaps (CRoaring bindings)
|
|
11
|
+
* - Embedding engine (Candle ML — CPU, CUDA, Metal)
|
|
12
|
+
* - Msgpack encoding/decoding
|
|
13
|
+
* - MmapFileSystemStorage adapter (zero-copy SSTables)
|
|
14
|
+
*
|
|
15
|
+
* Usage: `npm install @soulcraft/cortex` — auto-detected, zero config.
|
|
16
|
+
*/
|
|
17
|
+
// Default export: BrainyPlugin (auto-detected by brainy init())
|
|
18
|
+
export { default } from './plugin.js';
|
|
19
|
+
export { loadNativeModule, isNativeAvailable } from './native/index.js';
|
|
20
|
+
// Re-export all native wrapper classes
|
|
21
|
+
export { NativeEmbeddingEngine, nativeEmbeddingEngine, cosineSimilarity } from './native/NativeEmbeddingEngine.js';
|
|
22
|
+
export { RoaringBitmap32, RoaringBitmap32Iterator, roaringLibraryInitialize, roaringLibraryIsReady, SerializationFormat, DeserializationFormat } from './native/NativeRoaringBitmap32.js';
|
|
23
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* License validation for @soulcraft/cortex
|
|
3
|
+
*
|
|
4
|
+
* Validation order:
|
|
5
|
+
* 1. SOULCRAFT_LICENSE environment variable
|
|
6
|
+
* 2. ~/.soulcraft/license file
|
|
7
|
+
* 3. 14-day trial (tracked via ~/.soulcraft/.cortex-trial)
|
|
8
|
+
*
|
|
9
|
+
* License keys are Ed25519-signed tokens with embedded payload.
|
|
10
|
+
* Offline-only — no network calls. Soft enforcement for read-only
|
|
11
|
+
* filesystems (containers, CI).
|
|
12
|
+
*/
|
|
13
|
+
/**
|
|
14
|
+
* Validate the cortex license.
|
|
15
|
+
* Returns true if the license is valid or the trial is active.
|
|
16
|
+
*/
|
|
17
|
+
export declare function validateLicense(): Promise<boolean>;
|
|
18
|
+
//# sourceMappingURL=license.d.ts.map
|
package/dist/license.js
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* License validation for @soulcraft/cortex
|
|
3
|
+
*
|
|
4
|
+
* Validation order:
|
|
5
|
+
* 1. SOULCRAFT_LICENSE environment variable
|
|
6
|
+
* 2. ~/.soulcraft/license file
|
|
7
|
+
* 3. 14-day trial (tracked via ~/.soulcraft/.cortex-trial)
|
|
8
|
+
*
|
|
9
|
+
* License keys are Ed25519-signed tokens with embedded payload.
|
|
10
|
+
* Offline-only — no network calls. Soft enforcement for read-only
|
|
11
|
+
* filesystems (containers, CI).
|
|
12
|
+
*/
|
|
13
|
+
import { createPublicKey, verify } from 'node:crypto';
|
|
14
|
+
import { readFile, writeFile, mkdir } from 'node:fs/promises';
|
|
15
|
+
import { homedir } from 'node:os';
|
|
16
|
+
import { join } from 'node:path';
|
|
17
|
+
const TRIAL_DAYS = 14;
|
|
18
|
+
const SOULCRAFT_DIR = '.soulcraft';
|
|
19
|
+
const LICENSE_FILE = 'license';
|
|
20
|
+
const TRIAL_FILE = '.cortex-trial';
|
|
21
|
+
// Ed25519 public key for license verification (private key is NOT in this repo)
|
|
22
|
+
const PUBLIC_KEY_PEM = `-----BEGIN PUBLIC KEY-----
|
|
23
|
+
MCowBQYDK2VwAyEAKaoTwRGx57o0yHgpdB6ESEf0+9p5stIifvbKXoxCHuQ=
|
|
24
|
+
-----END PUBLIC KEY-----`;
|
|
25
|
+
// Ed25519 signature is always 64 bytes
|
|
26
|
+
const ED25519_SIG_SIZE = 64;
|
|
27
|
+
/**
|
|
28
|
+
* Validate the cortex license.
|
|
29
|
+
* Returns true if the license is valid or the trial is active.
|
|
30
|
+
*/
|
|
31
|
+
export async function validateLicense() {
|
|
32
|
+
// 1. Check environment variable
|
|
33
|
+
const envLicense = process.env.SOULCRAFT_LICENSE;
|
|
34
|
+
if (envLicense) {
|
|
35
|
+
const result = verifyLicenseToken(envLicense);
|
|
36
|
+
if (result.valid) {
|
|
37
|
+
return true;
|
|
38
|
+
}
|
|
39
|
+
console.warn(`[cortex] Invalid license key${result.reason ? ': ' + result.reason : ''}. ` +
|
|
40
|
+
`Get a valid license at https://soulcraft.com/pricing?focus=pro`);
|
|
41
|
+
// Fall through to trial if env key is invalid
|
|
42
|
+
}
|
|
43
|
+
// 2. Check license file
|
|
44
|
+
const soulcraftDir = join(homedir(), SOULCRAFT_DIR);
|
|
45
|
+
try {
|
|
46
|
+
const fileContent = await readFile(join(soulcraftDir, LICENSE_FILE), 'utf-8');
|
|
47
|
+
const token = fileContent.trim();
|
|
48
|
+
if (token) {
|
|
49
|
+
const result = verifyLicenseToken(token);
|
|
50
|
+
if (result.valid) {
|
|
51
|
+
return true;
|
|
52
|
+
}
|
|
53
|
+
console.warn(`[cortex] License file invalid${result.reason ? ': ' + result.reason : ''}. ` +
|
|
54
|
+
`Get a valid license at https://soulcraft.com/pricing?focus=pro`);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
catch {
|
|
58
|
+
// No license file — fall through to trial
|
|
59
|
+
}
|
|
60
|
+
// 3. Check/create trial
|
|
61
|
+
return checkTrial(soulcraftDir);
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Verify an Ed25519-signed license token.
|
|
65
|
+
*
|
|
66
|
+
* Token format: sc_cortex_<base64url(json_payload + ed25519_signature)>
|
|
67
|
+
* - Payload: JSON with sub, tier, exp, iat fields
|
|
68
|
+
* - Signature: 64-byte Ed25519 signature over the payload bytes
|
|
69
|
+
*/
|
|
70
|
+
function verifyLicenseToken(token) {
|
|
71
|
+
if (!token.startsWith('sc_cortex_')) {
|
|
72
|
+
return { valid: false, reason: 'invalid prefix' };
|
|
73
|
+
}
|
|
74
|
+
const encoded = token.slice('sc_cortex_'.length);
|
|
75
|
+
if (encoded.length < 20) {
|
|
76
|
+
return { valid: false, reason: 'token too short' };
|
|
77
|
+
}
|
|
78
|
+
let decoded;
|
|
79
|
+
try {
|
|
80
|
+
decoded = Buffer.from(encoded, 'base64url');
|
|
81
|
+
}
|
|
82
|
+
catch {
|
|
83
|
+
return { valid: false, reason: 'invalid encoding' };
|
|
84
|
+
}
|
|
85
|
+
if (decoded.length <= ED25519_SIG_SIZE) {
|
|
86
|
+
return { valid: false, reason: 'token too short' };
|
|
87
|
+
}
|
|
88
|
+
// Split: payload (everything except last 64 bytes) + signature (last 64 bytes)
|
|
89
|
+
const payloadBytes = decoded.subarray(0, decoded.length - ED25519_SIG_SIZE);
|
|
90
|
+
const signature = decoded.subarray(decoded.length - ED25519_SIG_SIZE);
|
|
91
|
+
// Verify Ed25519 signature
|
|
92
|
+
let signatureValid;
|
|
93
|
+
try {
|
|
94
|
+
const publicKey = createPublicKey(PUBLIC_KEY_PEM);
|
|
95
|
+
signatureValid = verify(null, payloadBytes, publicKey, signature);
|
|
96
|
+
}
|
|
97
|
+
catch {
|
|
98
|
+
return { valid: false, reason: 'signature verification failed' };
|
|
99
|
+
}
|
|
100
|
+
if (!signatureValid) {
|
|
101
|
+
return { valid: false, reason: 'invalid signature' };
|
|
102
|
+
}
|
|
103
|
+
// Parse payload
|
|
104
|
+
let payload;
|
|
105
|
+
try {
|
|
106
|
+
payload = JSON.parse(payloadBytes.toString('utf8'));
|
|
107
|
+
}
|
|
108
|
+
catch {
|
|
109
|
+
return { valid: false, reason: 'invalid payload' };
|
|
110
|
+
}
|
|
111
|
+
// Check expiry (empty exp = perpetual / internal keys)
|
|
112
|
+
if (payload.exp) {
|
|
113
|
+
const expiry = new Date(payload.exp);
|
|
114
|
+
if (isNaN(expiry.getTime())) {
|
|
115
|
+
return { valid: false, reason: 'invalid expiry date' };
|
|
116
|
+
}
|
|
117
|
+
if (expiry < new Date()) {
|
|
118
|
+
return { valid: false, reason: 'license expired' };
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
return { valid: true };
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Check if the trial period is still active.
|
|
125
|
+
* Creates a trial marker file on first run.
|
|
126
|
+
*/
|
|
127
|
+
async function checkTrial(soulcraftDir) {
|
|
128
|
+
const trialPath = join(soulcraftDir, TRIAL_FILE);
|
|
129
|
+
try {
|
|
130
|
+
const content = await readFile(trialPath, 'utf-8');
|
|
131
|
+
const startDate = new Date(content.trim());
|
|
132
|
+
if (isNaN(startDate.getTime())) {
|
|
133
|
+
// Corrupted trial file — reset
|
|
134
|
+
return createTrial(soulcraftDir, trialPath);
|
|
135
|
+
}
|
|
136
|
+
const elapsed = Date.now() - startDate.getTime();
|
|
137
|
+
const daysElapsed = elapsed / (1000 * 60 * 60 * 24);
|
|
138
|
+
const daysRemaining = Math.ceil(TRIAL_DAYS - daysElapsed);
|
|
139
|
+
if (daysElapsed <= TRIAL_DAYS) {
|
|
140
|
+
if (daysRemaining <= 3) {
|
|
141
|
+
console.warn(`[cortex] Trial expires in ${daysRemaining} day${daysRemaining === 1 ? '' : 's'}. ` +
|
|
142
|
+
`Purchase a license at https://soulcraft.com/pricing?focus=pro`);
|
|
143
|
+
}
|
|
144
|
+
return true;
|
|
145
|
+
}
|
|
146
|
+
// Trial expired
|
|
147
|
+
console.warn(`[cortex] Trial expired. ` +
|
|
148
|
+
`Purchase a license at https://soulcraft.com/pricing?focus=pro`);
|
|
149
|
+
return false;
|
|
150
|
+
}
|
|
151
|
+
catch {
|
|
152
|
+
// No trial file — create one (first run)
|
|
153
|
+
return createTrial(soulcraftDir, trialPath);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Create a new trial marker file.
|
|
158
|
+
* Soft enforcement: returns true even if filesystem is read-only.
|
|
159
|
+
*/
|
|
160
|
+
async function createTrial(soulcraftDir, trialPath) {
|
|
161
|
+
console.log(`[cortex] Starting 14-day trial. ` +
|
|
162
|
+
`Purchase a license at https://soulcraft.com/pricing?focus=pro`);
|
|
163
|
+
try {
|
|
164
|
+
await mkdir(soulcraftDir, { recursive: true });
|
|
165
|
+
await writeFile(trialPath, new Date().toISOString(), 'utf-8');
|
|
166
|
+
}
|
|
167
|
+
catch {
|
|
168
|
+
// Read-only filesystem (Docker, CI, etc.) — allow silently
|
|
169
|
+
}
|
|
170
|
+
return true;
|
|
171
|
+
}
|
|
172
|
+
//# sourceMappingURL=license.js.map
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Native Embedding Engine
|
|
3
|
+
*
|
|
4
|
+
* TypeScript wrapper matching the CandleEmbeddingEngine / WASMEmbeddingEngine API.
|
|
5
|
+
* Uses the native Rust module (napi-rs or bun:ffi) instead of WASM.
|
|
6
|
+
*
|
|
7
|
+
* Drop-in replacement: same singleton pattern, same method signatures,
|
|
8
|
+
* same model loading flow. Consumers don't need to change.
|
|
9
|
+
*/
|
|
10
|
+
import type { EmbeddingResult, EngineStats } from './types.js';
|
|
11
|
+
/**
|
|
12
|
+
* Native embedding engine using Rust (Candle) via napi-rs.
|
|
13
|
+
*
|
|
14
|
+
* Replaces WASMEmbeddingEngine and CandleEmbeddingEngine.
|
|
15
|
+
* Same API surface for backward compatibility.
|
|
16
|
+
*/
|
|
17
|
+
export declare class NativeEmbeddingEngine {
|
|
18
|
+
private engine;
|
|
19
|
+
private initialized;
|
|
20
|
+
private embedCount;
|
|
21
|
+
private totalProcessingTimeMs;
|
|
22
|
+
private constructor();
|
|
23
|
+
/**
|
|
24
|
+
* Get the singleton instance
|
|
25
|
+
*/
|
|
26
|
+
static getInstance(): NativeEmbeddingEngine;
|
|
27
|
+
/**
|
|
28
|
+
* Initialize the embedding engine
|
|
29
|
+
*/
|
|
30
|
+
initialize(): Promise<void>;
|
|
31
|
+
/**
|
|
32
|
+
* Perform actual initialization.
|
|
33
|
+
*
|
|
34
|
+
* Loads the native module and model assets.
|
|
35
|
+
* Native module loads in <100ms (vs 3-5s for WASM compilation).
|
|
36
|
+
*/
|
|
37
|
+
private performInit;
|
|
38
|
+
/**
|
|
39
|
+
* Load model assets from filesystem.
|
|
40
|
+
* Same resolution logic as the WASM modelLoader.ts.
|
|
41
|
+
*/
|
|
42
|
+
private loadModelAssets;
|
|
43
|
+
private loadBunAssets;
|
|
44
|
+
private loadNodeAssets;
|
|
45
|
+
/**
|
|
46
|
+
* Generate embedding for text
|
|
47
|
+
*/
|
|
48
|
+
embed(text: string): Promise<number[]>;
|
|
49
|
+
/**
|
|
50
|
+
* Generate embedding with metadata
|
|
51
|
+
*/
|
|
52
|
+
embedWithMetadata(text: string): Promise<EmbeddingResult>;
|
|
53
|
+
/**
|
|
54
|
+
* Batch embed multiple texts
|
|
55
|
+
*/
|
|
56
|
+
embedBatch(texts: string[]): Promise<number[][]>;
|
|
57
|
+
/**
|
|
58
|
+
* Check if initialized
|
|
59
|
+
*/
|
|
60
|
+
isInitialized(): boolean;
|
|
61
|
+
/**
|
|
62
|
+
* Get engine statistics
|
|
63
|
+
*/
|
|
64
|
+
getStats(): EngineStats;
|
|
65
|
+
/**
|
|
66
|
+
* Dispose and free resources
|
|
67
|
+
*/
|
|
68
|
+
dispose(): Promise<void>;
|
|
69
|
+
/**
|
|
70
|
+
* Reset singleton (for testing)
|
|
71
|
+
*/
|
|
72
|
+
static resetInstance(): void;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Calculate cosine similarity between two embeddings (native)
|
|
76
|
+
*/
|
|
77
|
+
export declare function cosineSimilarity(a: number[], b: number[]): number;
|
|
78
|
+
export declare const nativeEmbeddingEngine: NativeEmbeddingEngine;
|
|
79
|
+
//# sourceMappingURL=NativeEmbeddingEngine.d.ts.map
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Native Embedding Engine
|
|
3
|
+
*
|
|
4
|
+
* TypeScript wrapper matching the CandleEmbeddingEngine / WASMEmbeddingEngine API.
|
|
5
|
+
* Uses the native Rust module (napi-rs or bun:ffi) instead of WASM.
|
|
6
|
+
*
|
|
7
|
+
* Drop-in replacement: same singleton pattern, same method signatures,
|
|
8
|
+
* same model loading flow. Consumers don't need to change.
|
|
9
|
+
*/
|
|
10
|
+
import { MODEL_CONSTANTS } from './types.js';
|
|
11
|
+
import { loadNativeModule } from './index.js';
|
|
12
|
+
// Global singleton
|
|
13
|
+
let globalInstance = null;
|
|
14
|
+
let globalInitPromise = null;
|
|
15
|
+
/**
|
|
16
|
+
* Native embedding engine using Rust (Candle) via napi-rs.
|
|
17
|
+
*
|
|
18
|
+
* Replaces WASMEmbeddingEngine and CandleEmbeddingEngine.
|
|
19
|
+
* Same API surface for backward compatibility.
|
|
20
|
+
*/
|
|
21
|
+
export class NativeEmbeddingEngine {
|
|
22
|
+
engine = null;
|
|
23
|
+
initialized = false;
|
|
24
|
+
embedCount = 0;
|
|
25
|
+
totalProcessingTimeMs = 0;
|
|
26
|
+
constructor() {
|
|
27
|
+
// Private constructor for singleton
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Get the singleton instance
|
|
31
|
+
*/
|
|
32
|
+
static getInstance() {
|
|
33
|
+
if (!globalInstance) {
|
|
34
|
+
globalInstance = new NativeEmbeddingEngine();
|
|
35
|
+
}
|
|
36
|
+
return globalInstance;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Initialize the embedding engine
|
|
40
|
+
*/
|
|
41
|
+
async initialize() {
|
|
42
|
+
if (this.initialized) {
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
if (globalInitPromise) {
|
|
46
|
+
await globalInitPromise;
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
globalInitPromise = this.performInit();
|
|
50
|
+
try {
|
|
51
|
+
await globalInitPromise;
|
|
52
|
+
}
|
|
53
|
+
finally {
|
|
54
|
+
globalInitPromise = null;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Perform actual initialization.
|
|
59
|
+
*
|
|
60
|
+
* Loads the native module and model assets.
|
|
61
|
+
* Native module loads in <100ms (vs 3-5s for WASM compilation).
|
|
62
|
+
*/
|
|
63
|
+
async performInit() {
|
|
64
|
+
const startTime = Date.now();
|
|
65
|
+
console.log('Initializing Native Embedding Engine...');
|
|
66
|
+
try {
|
|
67
|
+
// 1. Load native module
|
|
68
|
+
console.log('Loading native module...');
|
|
69
|
+
const native = loadNativeModule();
|
|
70
|
+
this.engine = new native.NativeEmbeddingEngine();
|
|
71
|
+
const nativeTime = Date.now() - startTime;
|
|
72
|
+
console.log(` Native module loaded in ${nativeTime}ms`);
|
|
73
|
+
// 2. Load model assets (same flow as WASM version)
|
|
74
|
+
console.log('Loading model assets (~88MB)...');
|
|
75
|
+
const modelStartTime = Date.now();
|
|
76
|
+
const assets = await this.loadModelAssets();
|
|
77
|
+
const modelTime = Date.now() - modelStartTime;
|
|
78
|
+
console.log(` Model loaded in ${modelTime}ms`);
|
|
79
|
+
// 3. Initialize engine with model
|
|
80
|
+
console.log('Initializing embedding engine...');
|
|
81
|
+
this.engine.load(assets.model, assets.tokenizer, assets.config);
|
|
82
|
+
if (!this.engine.isReady()) {
|
|
83
|
+
throw new Error('Engine failed to initialize');
|
|
84
|
+
}
|
|
85
|
+
this.initialized = true;
|
|
86
|
+
const initTime = Date.now() - startTime;
|
|
87
|
+
console.log(`Native Embedding Engine ready in ${initTime}ms`);
|
|
88
|
+
}
|
|
89
|
+
catch (error) {
|
|
90
|
+
this.initialized = false;
|
|
91
|
+
this.engine = null;
|
|
92
|
+
throw new Error(`Failed to initialize Native Embedding Engine: ${error instanceof Error ? error.message : String(error)}`);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Load model assets from filesystem.
|
|
97
|
+
* Same resolution logic as the WASM modelLoader.ts.
|
|
98
|
+
*/
|
|
99
|
+
async loadModelAssets() {
|
|
100
|
+
const isBun = typeof Bun !== 'undefined';
|
|
101
|
+
if (isBun) {
|
|
102
|
+
return this.loadBunAssets();
|
|
103
|
+
}
|
|
104
|
+
return this.loadNodeAssets();
|
|
105
|
+
}
|
|
106
|
+
async loadBunAssets() {
|
|
107
|
+
const pathsToTry = [];
|
|
108
|
+
// Strategy 1: URL-resolved paths (works in Bun runtime)
|
|
109
|
+
try {
|
|
110
|
+
const modelPath = new URL('../../assets/models/all-MiniLM-L6-v2/model.safetensors', import.meta.url).pathname;
|
|
111
|
+
const tokenizerPath = new URL('../../assets/models/all-MiniLM-L6-v2/tokenizer.json', import.meta.url).pathname;
|
|
112
|
+
const configPath = new URL('../../assets/models/all-MiniLM-L6-v2/config.json', import.meta.url).pathname;
|
|
113
|
+
pathsToTry.push([modelPath, tokenizerPath, configPath]);
|
|
114
|
+
}
|
|
115
|
+
catch {
|
|
116
|
+
// skip
|
|
117
|
+
}
|
|
118
|
+
// Strategy 2: node_modules path (installed packages)
|
|
119
|
+
const nmPath = './node_modules/@soulcraft/brainy/assets/models/all-MiniLM-L6-v2';
|
|
120
|
+
pathsToTry.push([
|
|
121
|
+
`${nmPath}/model.safetensors`,
|
|
122
|
+
`${nmPath}/tokenizer.json`,
|
|
123
|
+
`${nmPath}/config.json`,
|
|
124
|
+
]);
|
|
125
|
+
// Strategy 3: assets folder relative to CWD (local development)
|
|
126
|
+
pathsToTry.push([
|
|
127
|
+
'./assets/models/all-MiniLM-L6-v2/model.safetensors',
|
|
128
|
+
'./assets/models/all-MiniLM-L6-v2/tokenizer.json',
|
|
129
|
+
'./assets/models/all-MiniLM-L6-v2/config.json',
|
|
130
|
+
]);
|
|
131
|
+
for (const [modelPath, tokenizerPath, configPath] of pathsToTry) {
|
|
132
|
+
try {
|
|
133
|
+
const [model, tokenizer, config] = await Promise.all([
|
|
134
|
+
Bun.file(modelPath).arrayBuffer(),
|
|
135
|
+
Bun.file(tokenizerPath).arrayBuffer(),
|
|
136
|
+
Bun.file(configPath).arrayBuffer(),
|
|
137
|
+
]);
|
|
138
|
+
if (model.byteLength > 0 && tokenizer.byteLength > 0 && config.byteLength > 0) {
|
|
139
|
+
return {
|
|
140
|
+
model: Buffer.from(model),
|
|
141
|
+
tokenizer: Buffer.from(tokenizer),
|
|
142
|
+
config: Buffer.from(config),
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
catch {
|
|
147
|
+
continue;
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
throw new Error('Could not load model assets. Ensure model files are accessible:\n' +
|
|
151
|
+
' Option 1: npm install @soulcraft/brainy (includes assets)\n' +
|
|
152
|
+
' Option 2: Copy assets/ folder to your working directory\n' +
|
|
153
|
+
' Option 3: For bun --compile, use --asset flag');
|
|
154
|
+
}
|
|
155
|
+
async loadNodeAssets() {
|
|
156
|
+
const fs = await import('node:fs');
|
|
157
|
+
const nodePath = await import('node:path');
|
|
158
|
+
const { fileURLToPath } = await import('node:url');
|
|
159
|
+
const thisDir = nodePath.dirname(fileURLToPath(import.meta.url));
|
|
160
|
+
const assetsDir = nodePath.resolve(thisDir, '../../assets/models/all-MiniLM-L6-v2');
|
|
161
|
+
if (!fs.existsSync(assetsDir)) {
|
|
162
|
+
throw new Error(`Model assets not found: ${assetsDir}\n` +
|
|
163
|
+
`Ensure @soulcraft/brainy is installed correctly.`);
|
|
164
|
+
}
|
|
165
|
+
const [model, tokenizer, config] = await Promise.all([
|
|
166
|
+
fs.promises.readFile(nodePath.join(assetsDir, 'model.safetensors')),
|
|
167
|
+
fs.promises.readFile(nodePath.join(assetsDir, 'tokenizer.json')),
|
|
168
|
+
fs.promises.readFile(nodePath.join(assetsDir, 'config.json')),
|
|
169
|
+
]);
|
|
170
|
+
return {
|
|
171
|
+
model: Buffer.from(model),
|
|
172
|
+
tokenizer: Buffer.from(tokenizer),
|
|
173
|
+
config: Buffer.from(config),
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Generate embedding for text
|
|
178
|
+
*/
|
|
179
|
+
async embed(text) {
|
|
180
|
+
const result = await this.embedWithMetadata(text);
|
|
181
|
+
return result.embedding;
|
|
182
|
+
}
|
|
183
|
+
/**
|
|
184
|
+
* Generate embedding with metadata
|
|
185
|
+
*/
|
|
186
|
+
async embedWithMetadata(text) {
|
|
187
|
+
if (!this.initialized) {
|
|
188
|
+
await this.initialize();
|
|
189
|
+
}
|
|
190
|
+
if (!this.engine) {
|
|
191
|
+
throw new Error('Engine not properly initialized');
|
|
192
|
+
}
|
|
193
|
+
try {
|
|
194
|
+
const startTime = Date.now();
|
|
195
|
+
const embedding = this.engine.embed(text);
|
|
196
|
+
const processingTimeMs = Date.now() - startTime;
|
|
197
|
+
this.embedCount++;
|
|
198
|
+
this.totalProcessingTimeMs += processingTimeMs;
|
|
199
|
+
return {
|
|
200
|
+
embedding,
|
|
201
|
+
tokenCount: 0, // Candle handles tokenization internally
|
|
202
|
+
processingTimeMs,
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
catch (error) {
|
|
206
|
+
console.error('Native embed failed, marking engine for re-initialization:', error);
|
|
207
|
+
this.initialized = false;
|
|
208
|
+
this.engine = null;
|
|
209
|
+
throw error;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
/**
|
|
213
|
+
* Batch embed multiple texts
|
|
214
|
+
*/
|
|
215
|
+
async embedBatch(texts) {
|
|
216
|
+
if (!this.initialized) {
|
|
217
|
+
await this.initialize();
|
|
218
|
+
}
|
|
219
|
+
if (!this.engine) {
|
|
220
|
+
throw new Error('Engine not properly initialized');
|
|
221
|
+
}
|
|
222
|
+
if (texts.length === 0) {
|
|
223
|
+
return [];
|
|
224
|
+
}
|
|
225
|
+
try {
|
|
226
|
+
const embeddings = this.engine.embedBatch(texts);
|
|
227
|
+
this.embedCount += texts.length;
|
|
228
|
+
return embeddings;
|
|
229
|
+
}
|
|
230
|
+
catch (error) {
|
|
231
|
+
console.error('Native embedBatch failed, marking engine for re-initialization:', error);
|
|
232
|
+
this.initialized = false;
|
|
233
|
+
this.engine = null;
|
|
234
|
+
throw error;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
/**
|
|
238
|
+
* Check if initialized
|
|
239
|
+
*/
|
|
240
|
+
isInitialized() {
|
|
241
|
+
return this.initialized;
|
|
242
|
+
}
|
|
243
|
+
/**
|
|
244
|
+
* Get engine statistics
|
|
245
|
+
*/
|
|
246
|
+
getStats() {
|
|
247
|
+
if (this.engine) {
|
|
248
|
+
return this.engine.getStats();
|
|
249
|
+
}
|
|
250
|
+
return {
|
|
251
|
+
initialized: this.initialized,
|
|
252
|
+
embedCount: this.embedCount,
|
|
253
|
+
totalProcessingTimeMs: this.totalProcessingTimeMs,
|
|
254
|
+
avgProcessingTimeMs: this.embedCount > 0 ? this.totalProcessingTimeMs / this.embedCount : 0,
|
|
255
|
+
modelName: MODEL_CONSTANTS.MODEL_NAME,
|
|
256
|
+
};
|
|
257
|
+
}
|
|
258
|
+
/**
|
|
259
|
+
* Dispose and free resources
|
|
260
|
+
*/
|
|
261
|
+
async dispose() {
|
|
262
|
+
// napi-rs handles cleanup via GC; just clear references
|
|
263
|
+
this.engine = null;
|
|
264
|
+
this.initialized = false;
|
|
265
|
+
}
|
|
266
|
+
/**
|
|
267
|
+
* Reset singleton (for testing)
|
|
268
|
+
*/
|
|
269
|
+
static resetInstance() {
|
|
270
|
+
if (globalInstance) {
|
|
271
|
+
globalInstance.dispose();
|
|
272
|
+
}
|
|
273
|
+
globalInstance = null;
|
|
274
|
+
globalInitPromise = null;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
/**
|
|
278
|
+
* Calculate cosine similarity between two embeddings (native)
|
|
279
|
+
*/
|
|
280
|
+
export function cosineSimilarity(a, b) {
|
|
281
|
+
try {
|
|
282
|
+
const native = loadNativeModule();
|
|
283
|
+
return native.cosineSimilarity(a, b);
|
|
284
|
+
}
|
|
285
|
+
catch {
|
|
286
|
+
// Fallback to JS implementation
|
|
287
|
+
if (a.length !== b.length || a.length === 0)
|
|
288
|
+
return 0;
|
|
289
|
+
let dot = 0, normA = 0, normB = 0;
|
|
290
|
+
for (let i = 0; i < a.length; i++) {
|
|
291
|
+
dot += a[i] * b[i];
|
|
292
|
+
normA += a[i] * a[i];
|
|
293
|
+
normB += b[i] * b[i];
|
|
294
|
+
}
|
|
295
|
+
if (normA === 0 || normB === 0)
|
|
296
|
+
return 0;
|
|
297
|
+
return dot / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
// Export singleton access
|
|
301
|
+
export const nativeEmbeddingEngine = NativeEmbeddingEngine.getInstance();
|
|
302
|
+
//# sourceMappingURL=NativeEmbeddingEngine.js.map
|