@soulcraft/cortex 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/LICENSE +16 -0
  2. package/README.md +125 -0
  3. package/dist/graph/NativeGraphAdjacencyIndex.d.ts +92 -0
  4. package/dist/graph/NativeGraphAdjacencyIndex.js +671 -0
  5. package/dist/index.d.ts +22 -0
  6. package/dist/index.js +23 -0
  7. package/dist/license.d.ts +18 -0
  8. package/dist/license.js +172 -0
  9. package/dist/native/NativeEmbeddingEngine.d.ts +79 -0
  10. package/dist/native/NativeEmbeddingEngine.js +302 -0
  11. package/dist/native/NativeRoaringBitmap32.d.ts +114 -0
  12. package/dist/native/NativeRoaringBitmap32.js +221 -0
  13. package/dist/native/ffi.d.ts +20 -0
  14. package/dist/native/ffi.js +48 -0
  15. package/dist/native/index.d.ts +30 -0
  16. package/dist/native/index.js +58 -0
  17. package/dist/native/napi.d.ts +21 -0
  18. package/dist/native/napi.js +88 -0
  19. package/dist/native/types.d.ts +710 -0
  20. package/dist/native/types.js +16 -0
  21. package/dist/plugin.d.ts +22 -0
  22. package/dist/plugin.js +115 -0
  23. package/dist/storage/mmapFileSystemStorage.d.ts +24 -0
  24. package/dist/storage/mmapFileSystemStorage.js +73 -0
  25. package/dist/utils/NativeMetadataIndex.d.ts +185 -0
  26. package/dist/utils/NativeMetadataIndex.js +1274 -0
  27. package/dist/utils/nativeEntityIdMapper.d.ts +84 -0
  28. package/dist/utils/nativeEntityIdMapper.js +134 -0
  29. package/native/brainy-native.darwin-arm64.node +0 -0
  30. package/native/brainy-native.darwin-x64.node +0 -0
  31. package/native/brainy-native.linux-arm64-gnu.node +0 -0
  32. package/native/brainy-native.linux-x64-gnu.node +0 -0
  33. package/native/brainy-native.win32-x64-msvc.node +0 -0
  34. package/native/index.d.ts +1068 -0
  35. package/package.json +66 -0
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Shared types for the native module integration.
3
+ *
4
+ * These types are used by both the napi-rs and bun:ffi loaders.
5
+ * Ported from src/embeddings/wasm/types.ts — keeps the same API surface.
6
+ */
7
+ /**
8
+ * Model constants for all-MiniLM-L6-v2
9
+ */
10
+ export const MODEL_CONSTANTS = {
11
+ HIDDEN_SIZE: 384,
12
+ MAX_SEQUENCE_LENGTH: 256,
13
+ VOCAB_SIZE: 30522,
14
+ MODEL_NAME: 'all-MiniLM-L6-v2',
15
+ };
16
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1,22 @@
1
+ /**
2
+ * @soulcraft/cortex Plugin Entry Point
3
+ *
4
+ * Implements the BrainyPlugin interface to register all native providers.
5
+ * Auto-detected by brainy during init() via dynamic import.
6
+ *
7
+ * Storage adapters are ALWAYS registered (even without a license) so users
8
+ * can always access their data. Compute acceleration requires a valid license.
9
+ *
10
+ * Provider registration order:
11
+ * 1. storage:mmap-filesystem — ALWAYS registered (data access)
12
+ * 2. distance — SIMD-accelerated cosine (licensed)
13
+ * 3. metadataIndex — Native Rust query/mutation engine (licensed)
14
+ * 4. graphIndex — Native 4 LSM-trees with verb tracking (licensed)
15
+ * 5. embeddings — Candle ML native engine (CPU/CUDA/Metal) (licensed)
16
+ * 6. roaring — CRoaring bitmap bindings (licensed)
17
+ * 7. msgpack — Native encode/decode (licensed)
18
+ */
19
+ import type { BrainyPlugin } from '@soulcraft/brainy/plugin';
20
+ declare const cortexPlugin: BrainyPlugin;
21
+ export default cortexPlugin;
22
+ //# sourceMappingURL=plugin.d.ts.map
package/dist/plugin.js ADDED
@@ -0,0 +1,115 @@
1
+ /**
2
+ * @soulcraft/cortex Plugin Entry Point
3
+ *
4
+ * Implements the BrainyPlugin interface to register all native providers.
5
+ * Auto-detected by brainy during init() via dynamic import.
6
+ *
7
+ * Storage adapters are ALWAYS registered (even without a license) so users
8
+ * can always access their data. Compute acceleration requires a valid license.
9
+ *
10
+ * Provider registration order:
11
+ * 1. storage:mmap-filesystem — ALWAYS registered (data access)
12
+ * 2. distance — SIMD-accelerated cosine (licensed)
13
+ * 3. metadataIndex — Native Rust query/mutation engine (licensed)
14
+ * 4. graphIndex — Native 4 LSM-trees with verb tracking (licensed)
15
+ * 5. embeddings — Candle ML native engine (CPU/CUDA/Metal) (licensed)
16
+ * 6. roaring — CRoaring bitmap bindings (licensed)
17
+ * 7. msgpack — Native encode/decode (licensed)
18
+ */
19
+ import { loadNativeModule, isNativeAvailable } from './native/index.js';
20
+ import { validateLicense } from './license.js';
21
+ const cortexPlugin = {
22
+ name: '@soulcraft/cortex',
23
+ async activate(context) {
24
+ // Storage adapters are ALWAYS registered — users must always be able
25
+ // to access their data, even if the license expires. Brainy falls back
26
+ // to JS compute but needs the storage adapter to read the files.
27
+ const { MmapFileSystemStorage } = await import('./storage/mmapFileSystemStorage.js');
28
+ context.registerProvider('storage:mmap-filesystem', {
29
+ name: 'mmap-filesystem',
30
+ create: (config) => new MmapFileSystemStorage(config.rootDirectory, {
31
+ compression: config.compression,
32
+ compressionLevel: config.compressionLevel
33
+ })
34
+ });
35
+ // Gate: native module must be available for compute acceleration
36
+ if (!isNativeAvailable()) {
37
+ console.log('[cortex] Native module not available — using JavaScript fallback for compute');
38
+ return true; // storage registered, no native acceleration
39
+ }
40
+ // Gate: license must be valid for compute acceleration
41
+ if (!await validateLicense()) {
42
+ console.log('[cortex] Native acceleration disabled — using JavaScript fallback for compute');
43
+ return true; // storage registered, no native acceleration
44
+ }
45
+ const native = loadNativeModule();
46
+ // Distance: SIMD-accelerated cosine
47
+ // This is the highest-impact provider — every HNSW search uses it
48
+ context.registerProvider('distance', native.cosineDistance);
49
+ // Quantized distance: SQ8 cosine distance on uint8 arrays (no dequantization)
50
+ context.registerProvider('distance:sq8', native.cosineDistanceSq8);
51
+ context.registerProvider('distance:sq8:batch', native.cosineDistanceSq8Batch);
52
+ // Quantized distance: SQ4 cosine distance on nibble-packed arrays
53
+ context.registerProvider('distance:sq4', native.cosineDistanceSq4);
54
+ context.registerProvider('distance:sq4:batch', native.cosineDistanceSq4Batch);
55
+ // Quantization: native SQ8/SQ4 quantize/dequantize
56
+ context.registerProvider('quantization:sq8', {
57
+ quantize: native.quantizeSq8,
58
+ dequantize: native.dequantizeSq8,
59
+ serialize: native.serializeSq8,
60
+ deserialize: native.deserializeSq8,
61
+ });
62
+ context.registerProvider('quantization:sq4', {
63
+ quantize: native.quantizeSq4,
64
+ dequantize: native.dequantizeSq4,
65
+ });
66
+ // Graph compression: delta-varint encoded connection lists
67
+ context.registerProvider('graph:compression', {
68
+ encode: native.encodeConnections,
69
+ decode: native.decodeConnections,
70
+ });
71
+ // Product Quantization: 16-32x compression for large datasets
72
+ context.registerProvider('quantization:pq', native.NativePqCodebook);
73
+ // Metadata index: Native Rust query/mutation engine
74
+ const { MetadataIndexManager: NativeMetadataIndex } = await import('./utils/NativeMetadataIndex.js');
75
+ context.registerProvider('metadataIndex', (storage) => new NativeMetadataIndex(storage));
76
+ // Graph adjacency: Native 4 LSM-trees with verb tracking
77
+ const { GraphAdjacencyIndex: NativeGraphAdjacencyIndex } = await import('./graph/NativeGraphAdjacencyIndex.js');
78
+ context.registerProvider('graphIndex', (storage) => new NativeGraphAdjacencyIndex(storage));
79
+ // Embeddings: Candle ML native engine
80
+ const { NativeEmbeddingEngine } = await import('./native/NativeEmbeddingEngine.js');
81
+ const engine = NativeEmbeddingEngine.getInstance();
82
+ context.registerProvider('embeddings', async (text) => {
83
+ if (!engine.isInitialized()) {
84
+ await engine.initialize();
85
+ }
86
+ if (Array.isArray(text)) {
87
+ return engine.embedBatch(text);
88
+ }
89
+ return engine.embed(text);
90
+ });
91
+ // Roaring bitmaps: CRoaring bindings (binary-compatible with roaring-wasm)
92
+ const { RoaringBitmap32 } = await import('./native/NativeRoaringBitmap32.js');
93
+ context.registerProvider('roaring', RoaringBitmap32);
94
+ // Msgpack: Native encode/decode
95
+ context.registerProvider('msgpack', {
96
+ encode: native.msgpackEncode,
97
+ decode: native.msgpackDecode
98
+ });
99
+ // Mmap Vector Store: zero-copy vector file (mmap-filesystem only)
100
+ context.registerProvider('vectorStore:mmap', native.NativeMmapVectorStore);
101
+ // Batch I/O: direct file reads without mmap (reranking, fallback)
102
+ context.registerProvider('io:batchReadVectors', native.batchReadVectorsDirect);
103
+ context.registerProvider('io:vectorFileInfo', native.readVectorFileInfo);
104
+ // Graph-aware compaction: BFS traversal order for disk locality
105
+ context.registerProvider('compaction:bfsOrder', native.computeBfsOrder);
106
+ context.registerProvider('compaction:hnswOrder', native.computeHnswTraversalOrder);
107
+ return true;
108
+ },
109
+ async deactivate() {
110
+ // Native resources are cleaned up by the Rust drop() mechanism
111
+ // when the process exits. No explicit cleanup needed.
112
+ }
113
+ };
114
+ export default cortexPlugin;
115
+ //# sourceMappingURL=plugin.js.map
@@ -0,0 +1,24 @@
1
+ /**
2
+ * MmapFileSystemStorage — FileSystemStorage with binary blob support for mmap SSTables
3
+ *
4
+ * Extends FileSystemStorage with saveBinaryBlob/loadBinaryBlob/deleteBinaryBlob/getBinaryBlobPath
5
+ * methods that enable the NativeGraphAdjacencyIndex to use adapter-controlled mmap I/O.
6
+ *
7
+ * Binary blobs are stored in a _blobs/ subdirectory with atomic writes (tmp + rename).
8
+ * getBinaryBlobPath returns the filesystem path so Rust can mmap the file directly.
9
+ */
10
+ import { FileSystemStorage } from '@soulcraft/brainy';
11
+ export declare class MmapFileSystemStorage extends FileSystemStorage {
12
+ private blobsDir;
13
+ constructor(rootDirectory: string, options?: {
14
+ compression?: boolean;
15
+ compressionLevel?: number;
16
+ });
17
+ init(): Promise<void>;
18
+ private blobPath;
19
+ saveBinaryBlob(key: string, data: Buffer): Promise<void>;
20
+ loadBinaryBlob(key: string): Promise<Buffer | null>;
21
+ deleteBinaryBlob(key: string): Promise<void>;
22
+ getBinaryBlobPath(key: string): string | null;
23
+ }
24
+ //# sourceMappingURL=mmapFileSystemStorage.d.ts.map
@@ -0,0 +1,73 @@
1
+ /**
2
+ * MmapFileSystemStorage — FileSystemStorage with binary blob support for mmap SSTables
3
+ *
4
+ * Extends FileSystemStorage with saveBinaryBlob/loadBinaryBlob/deleteBinaryBlob/getBinaryBlobPath
5
+ * methods that enable the NativeGraphAdjacencyIndex to use adapter-controlled mmap I/O.
6
+ *
7
+ * Binary blobs are stored in a _blobs/ subdirectory with atomic writes (tmp + rename).
8
+ * getBinaryBlobPath returns the filesystem path so Rust can mmap the file directly.
9
+ */
10
+ import { FileSystemStorage } from '@soulcraft/brainy';
11
+ // Node.js modules - loaded lazily
12
+ let fs;
13
+ let fsPromises;
14
+ let pathMod;
15
+ let modulesLoaded = false;
16
+ async function ensureModules() {
17
+ if (modulesLoaded)
18
+ return;
19
+ const [fsModule, pathModule] = await Promise.all([
20
+ import('node:fs'),
21
+ import('node:path'),
22
+ ]);
23
+ fs = fsModule;
24
+ fsPromises = fsModule.promises;
25
+ pathMod = pathModule;
26
+ modulesLoaded = true;
27
+ }
28
+ export class MmapFileSystemStorage extends FileSystemStorage {
29
+ blobsDir;
30
+ constructor(rootDirectory, options) {
31
+ super(rootDirectory, options);
32
+ this.blobsDir = rootDirectory + '/_blobs';
33
+ }
34
+ async init() {
35
+ await super.init();
36
+ await ensureModules();
37
+ // Re-resolve with proper path.join after modules are loaded
38
+ this.blobsDir = pathMod.join(this.rootDir, '_blobs');
39
+ await fsPromises.mkdir(this.blobsDir, { recursive: true });
40
+ }
41
+ blobPath(key) {
42
+ // key: "graph-lsm/source/sstable-123"
43
+ // path: {rootDir}/_blobs/graph-lsm/source/sstable-123.bin
44
+ return pathMod.join(this.blobsDir, ...key.split('/')) + '.bin';
45
+ }
46
+ async saveBinaryBlob(key, data) {
47
+ const filePath = this.blobPath(key);
48
+ await fsPromises.mkdir(pathMod.dirname(filePath), { recursive: true });
49
+ const tmpPath = filePath + '.tmp';
50
+ await fsPromises.writeFile(tmpPath, data);
51
+ await fsPromises.rename(tmpPath, filePath);
52
+ }
53
+ async loadBinaryBlob(key) {
54
+ try {
55
+ return await fsPromises.readFile(this.blobPath(key));
56
+ }
57
+ catch {
58
+ return null;
59
+ }
60
+ }
61
+ async deleteBinaryBlob(key) {
62
+ try {
63
+ await fsPromises.unlink(this.blobPath(key));
64
+ }
65
+ catch {
66
+ /* ignore missing files */
67
+ }
68
+ }
69
+ getBinaryBlobPath(key) {
70
+ return this.blobPath(key);
71
+ }
72
+ }
73
+ //# sourceMappingURL=mmapFileSystemStorage.js.map
@@ -0,0 +1,185 @@
1
+ /**
2
+ * NativeMetadataIndex — TypeScript wrapper around the Rust NativeMetadataIndex.
3
+ *
4
+ * Implements the same public API as the old MetadataIndexManager (3,721 lines),
5
+ * delegating core operations (query, mutation, normalization) to Rust.
6
+ *
7
+ * Architecture:
8
+ * - Rust owns: bitmap operations, filter evaluation, value normalization,
9
+ * field extraction, chunk management, text search, entity ID mapping
10
+ * - TS owns: async storage I/O, lazy field loading, rebuild orchestration,
11
+ * flush, getSortedIdsForFilter (loads sort values from storage)
12
+ *
13
+ * Buffer exchange pattern: TS loads data from storage, passes JSON to Rust.
14
+ * Rust operates in-memory, returns serialized state for TS to persist.
15
+ */
16
+ import type { StorageAdapter } from '@soulcraft/brainy';
17
+ import { NounType, VerbType } from '@soulcraft/brainy/types/graphTypes';
18
+ export interface MetadataIndexEntry {
19
+ field: string;
20
+ value: string | number | boolean;
21
+ ids: Set<string>;
22
+ lastUpdated: number;
23
+ }
24
+ export interface FieldIndexData {
25
+ values: Record<string, number>;
26
+ lastUpdated: number;
27
+ }
28
+ export interface MetadataIndexStats {
29
+ totalEntries: number;
30
+ totalIds: number;
31
+ fieldsIndexed: string[];
32
+ lastRebuild: number;
33
+ indexSize: number;
34
+ }
35
+ export interface MetadataIndexConfig {
36
+ maxIndexSize?: number;
37
+ rebuildThreshold?: number;
38
+ autoOptimize?: boolean;
39
+ indexedFields?: string[];
40
+ excludeFields?: string[];
41
+ }
42
+ interface CardinalityInfo {
43
+ uniqueValues: number;
44
+ totalValues: number;
45
+ distribution: 'uniform' | 'skewed' | 'sparse';
46
+ updateFrequency: number;
47
+ lastAnalyzed: number;
48
+ }
49
+ interface FieldStats {
50
+ cardinality: CardinalityInfo;
51
+ queryCount: number;
52
+ rangeQueryCount: number;
53
+ exactQueryCount: number;
54
+ avgQueryTime: number;
55
+ indexType: 'hash';
56
+ normalizationStrategy?: 'none' | 'precision' | 'bucket';
57
+ }
58
+ /**
59
+ * MetadataIndexManager — native Rust implementation with TS storage bridge.
60
+ *
61
+ * Drop-in replacement for the old pure-TS MetadataIndexManager.
62
+ * All bitmap AND/OR/NOT operations execute in Rust without crossing FFI.
63
+ */
64
+ export declare class MetadataIndexManager {
65
+ private storage;
66
+ private config;
67
+ private native;
68
+ private isRebuilding;
69
+ private lastFlushTime;
70
+ private autoFlushThreshold;
71
+ private dirtyFields;
72
+ private loadedFields;
73
+ private knownFields;
74
+ private fieldStats;
75
+ private typeFieldAffinity;
76
+ private totalEntitiesByType;
77
+ private entityCountsByTypeFixed;
78
+ private verbCountsByTypeFixed;
79
+ private unifiedCache;
80
+ private fieldTypeInference;
81
+ constructor(storage: StorageAdapter, config?: MetadataIndexConfig);
82
+ init(): Promise<void>;
83
+ private loadEntityIdMapper;
84
+ private saveEntityIdMapper;
85
+ private loadFieldRegistry;
86
+ private saveFieldRegistry;
87
+ /**
88
+ * Ensure a field's sparse index + all chunks are loaded into Rust.
89
+ * Matches the lazy-loading pattern of the old UnifiedCache approach.
90
+ */
91
+ private ensureFieldLoaded;
92
+ /**
93
+ * Ensure multiple fields are loaded (parallel).
94
+ */
95
+ private ensureFieldsLoaded;
96
+ /**
97
+ * Extract filter field names for lazy loading.
98
+ */
99
+ private extractFilterFields;
100
+ /**
101
+ * Persist dirty chunks, sparse indices, field indexes from a MutationResult.
102
+ */
103
+ private persistMutationResult;
104
+ private warmCache;
105
+ private warmCacheForTopTypes;
106
+ private lazyLoadCounts;
107
+ private syncTypeCountsToFixed;
108
+ private syncTypeCountsFromFixed;
109
+ getIds(field: string, value: any): Promise<string[]>;
110
+ getIdsForFilter(filter: any): Promise<string[]>;
111
+ private filterNeedsAllIds;
112
+ getIdsForMultipleFields(fieldValuePairs: Array<{
113
+ field: string;
114
+ value: any;
115
+ }>): Promise<string[]>;
116
+ getIdsForTextQuery(query: string): Promise<Array<{
117
+ id: string;
118
+ matchCount: number;
119
+ }>>;
120
+ getSortedIdsForFilter(filter: any, orderBy: string, order?: 'asc' | 'desc'): Promise<string[]>;
121
+ getFieldValueForEntity(entityId: string, field: string): Promise<any>;
122
+ private denormalizeValue;
123
+ getFilterValues(field: string): Promise<string[]>;
124
+ getFilterFields(): Promise<string[]>;
125
+ addToIndex(id: string, entityOrMetadata: any, skipFlush?: boolean, deferWrites?: boolean): Promise<void>;
126
+ removeFromIndex(id: string, metadata?: any): Promise<void>;
127
+ private updateTypeTracking;
128
+ tokenize(text: string): string[];
129
+ hashWord(word: string): number;
130
+ extractTextContent(data: any): string;
131
+ getEntityCountByType(type: string): number;
132
+ getTotalEntityCount(): number;
133
+ getAllEntityCounts(): Map<string, number>;
134
+ getEntityCountByTypeEnum(type: NounType): number;
135
+ getVerbCountByTypeEnum(type: VerbType): number;
136
+ getTopNounTypes(n: number): NounType[];
137
+ getTopVerbTypes(n: number): VerbType[];
138
+ getAllNounTypeCounts(): Map<NounType, number>;
139
+ getAllVerbTypeCounts(): Map<VerbType, number>;
140
+ getVFSEntityCountByType(type: string): Promise<number>;
141
+ getAllVFSEntityCounts(): Promise<Map<string, number>>;
142
+ getTotalVFSEntityCount(): Promise<number>;
143
+ getCountForCriteria(field: string, value: any): Promise<number>;
144
+ getAllIds(): Promise<string[]>;
145
+ flush(): Promise<void>;
146
+ getStats(): Promise<MetadataIndexStats>;
147
+ validateConsistency(): Promise<{
148
+ healthy: boolean;
149
+ avgEntriesPerEntity: number;
150
+ entityCount: number;
151
+ indexEntryCount: number;
152
+ recommendation: string | null;
153
+ }>;
154
+ detectAndRepairCorruption(): Promise<void>;
155
+ rebuild(): Promise<void>;
156
+ /**
157
+ * During rebuild, persist all dirty data accumulated so far.
158
+ */
159
+ private flushRebuildDirty;
160
+ clearAllIndexData(): Promise<void>;
161
+ private getPersistedFieldList;
162
+ private deleteFieldChunks;
163
+ getFieldStatistics(): Promise<Map<string, FieldStats>>;
164
+ getFieldCardinality(field: string): Promise<CardinalityInfo | null>;
165
+ getFieldsWithCardinality(): Promise<Array<{
166
+ field: string;
167
+ cardinality: number;
168
+ distribution: string;
169
+ }>>;
170
+ getOptimalQueryPlan(filters: Record<string, any>): Promise<{
171
+ strategy: 'exact' | 'range' | 'hybrid';
172
+ fieldOrder: string[];
173
+ estimatedCost: number;
174
+ }>;
175
+ getFieldsForType(nounType: NounType): Promise<Array<{
176
+ field: string;
177
+ affinity: number;
178
+ occurrences: number;
179
+ totalEntities: number;
180
+ }>>;
181
+ getIdsForCriteria(criteria: Record<string, any>): Promise<string[]>;
182
+ private yieldToEventLoop;
183
+ }
184
+ export {};
185
+ //# sourceMappingURL=NativeMetadataIndex.d.ts.map