@soulcraft/brainy 3.43.3 → 3.44.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,290 @@
1
+ /**
2
+ * SSTable - Sorted String Table for LSM-Tree
3
+ *
4
+ * Production-grade sorted file format for storing graph relationships:
5
+ * - Binary format using MessagePack (50-70% smaller than JSON)
6
+ * - Sorted by sourceId for O(log n) binary search
7
+ * - Bloom filter for fast negative lookups (90% disk I/O reduction)
8
+ * - Zone maps (min/max keys) for file skipping
9
+ * - Immutable after creation (LSM-tree property)
10
+ *
11
+ * File Structure:
12
+ * - Header: version, metadata, bloom filter, zone map
13
+ * - Data: sorted array of [sourceId, targetIds[]]
14
+ * - Footer: checksum, stats
15
+ */
16
+ import { encode, decode } from '@msgpack/msgpack';
17
+ import { BloomFilter } from './BloomFilter.js';
18
+ /**
19
+ * SSTable - Immutable sorted file for LSM-tree
20
+ *
21
+ * Key Properties:
22
+ * - Immutable: Never modified after creation
23
+ * - Sorted: Entries sorted by sourceId for binary search
24
+ * - Filtered: Bloom filter for fast negative lookups
25
+ * - Zoned: Min/max keys for file skipping
26
+ * - Compact: MessagePack binary format
27
+ *
28
+ * Typical Usage:
29
+ * 1. Create from MemTable entries
30
+ * 2. Serialize and store via StorageAdapter
31
+ * 3. Load from storage when needed
32
+ * 4. Query with binary search
33
+ * 5. Eventually merge via compaction
34
+ */
35
+ export class SSTable {
36
+ /**
37
+ * Create a new SSTable from entries
38
+ * @param entries Unsorted entries (will be sorted)
39
+ * @param level Compaction level
40
+ * @param id Unique ID for this SSTable
41
+ */
42
+ constructor(entries, level = 0, id) {
43
+ // Sort entries by sourceId for binary search
44
+ this.entries = entries.sort((a, b) => a.sourceId.localeCompare(b.sourceId));
45
+ // Calculate statistics
46
+ const relationshipCount = entries.reduce((sum, entry) => sum + entry.count, 0);
47
+ // Create bloom filter for all sourceIds
48
+ this.bloomFilter = BloomFilter.createOptimal(entries.length, 0.01 // 1% false positive rate
49
+ );
50
+ for (const entry of entries) {
51
+ this.bloomFilter.add(entry.sourceId);
52
+ }
53
+ // Build metadata
54
+ this.metadata = {
55
+ version: SSTable.VERSION,
56
+ id: id || this.generateId(),
57
+ level,
58
+ createdAt: Date.now(),
59
+ entryCount: entries.length,
60
+ relationshipCount,
61
+ minSourceId: entries.length > 0 ? entries[0].sourceId : '',
62
+ maxSourceId: entries.length > 0 ? entries[entries.length - 1].sourceId : '',
63
+ sizeBytes: 0, // Will be set during serialization
64
+ compressed: false
65
+ };
66
+ }
67
+ /**
68
+ * Generate a unique ID for this SSTable
69
+ */
70
+ generateId() {
71
+ return `sstable-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
72
+ }
73
+ /**
74
+ * Check if a sourceId might be in this SSTable (using bloom filter)
75
+ * @param sourceId The source ID to check
76
+ * @returns true if might be present (with 1% FP rate), false if definitely not present
77
+ */
78
+ mightContain(sourceId) {
79
+ // Check bloom filter first (fast, in-memory)
80
+ return this.bloomFilter.contains(sourceId);
81
+ }
82
+ /**
83
+ * Check if a sourceId is in the valid range for this SSTable (zone map)
84
+ * @param sourceId The source ID to check
85
+ * @returns true if in range, false otherwise
86
+ */
87
+ isInRange(sourceId) {
88
+ if (this.metadata.entryCount === 0) {
89
+ return false;
90
+ }
91
+ return (sourceId >= this.metadata.minSourceId &&
92
+ sourceId <= this.metadata.maxSourceId);
93
+ }
94
+ /**
95
+ * Get targets for a sourceId using binary search
96
+ * @param sourceId The source ID to query
97
+ * @returns Array of target IDs, or null if not found
98
+ */
99
+ get(sourceId) {
100
+ // Quick check: Is it in range?
101
+ if (!this.isInRange(sourceId)) {
102
+ return null;
103
+ }
104
+ // Quick check: Does bloom filter say it might be here?
105
+ if (!this.mightContain(sourceId)) {
106
+ return null;
107
+ }
108
+ // Binary search in sorted entries
109
+ let left = 0;
110
+ let right = this.entries.length - 1;
111
+ while (left <= right) {
112
+ const mid = Math.floor((left + right) / 2);
113
+ const entry = this.entries[mid];
114
+ const cmp = entry.sourceId.localeCompare(sourceId);
115
+ if (cmp === 0) {
116
+ // Found it!
117
+ return entry.targets;
118
+ }
119
+ else if (cmp < 0) {
120
+ left = mid + 1;
121
+ }
122
+ else {
123
+ right = mid - 1;
124
+ }
125
+ }
126
+ // Not found (bloom filter false positive)
127
+ return null;
128
+ }
129
+ /**
130
+ * Get all entries in this SSTable
131
+ * Used for compaction and merging
132
+ */
133
+ getEntries() {
134
+ return this.entries;
135
+ }
136
+ /**
137
+ * Get number of entries
138
+ */
139
+ size() {
140
+ return this.entries.length;
141
+ }
142
+ /**
143
+ * Serialize SSTable to binary format using MessagePack
144
+ * @returns Uint8Array of serialized data
145
+ */
146
+ serialize() {
147
+ const data = {
148
+ metadata: this.metadata,
149
+ entries: this.entries,
150
+ bloomFilter: this.bloomFilter.serialize(),
151
+ checksum: this.calculateChecksum(this.entries)
152
+ };
153
+ const serialized = encode(data);
154
+ // Update size in metadata
155
+ this.metadata.sizeBytes = serialized.length;
156
+ return serialized;
157
+ }
158
+ /**
159
+ * Calculate checksum for data integrity
160
+ * Simple but effective: hash of all sourceIds concatenated
161
+ */
162
+ calculateChecksum(entries) {
163
+ const crypto = require('crypto');
164
+ const hash = crypto.createHash('sha256');
165
+ for (const entry of entries) {
166
+ hash.update(entry.sourceId);
167
+ for (const target of entry.targets) {
168
+ hash.update(target);
169
+ }
170
+ }
171
+ return hash.digest('hex');
172
+ }
173
+ /**
174
+ * Deserialize SSTable from binary format
175
+ * @param data Serialized SSTable data
176
+ * @returns SSTable instance
177
+ */
178
+ static deserialize(data) {
179
+ const decoded = decode(data);
180
+ // Verify checksum
181
+ const sstable = new SSTable(decoded.entries, decoded.metadata.level, decoded.metadata.id);
182
+ const calculatedChecksum = sstable.calculateChecksum(decoded.entries);
183
+ if (calculatedChecksum !== decoded.checksum) {
184
+ throw new Error(`SSTable checksum mismatch: expected ${decoded.checksum}, got ${calculatedChecksum}`);
185
+ }
186
+ // Restore metadata
187
+ Object.assign(sstable.metadata, decoded.metadata);
188
+ // Restore bloom filter
189
+ sstable.bloomFilter = BloomFilter.deserialize(decoded.bloomFilter);
190
+ return sstable;
191
+ }
192
+ /**
193
+ * Merge multiple SSTables into a single sorted SSTable
194
+ * Used during compaction to combine multiple files
195
+ *
196
+ * @param sstables Array of SSTables to merge
197
+ * @param targetLevel Target compaction level
198
+ * @returns New merged SSTable
199
+ */
200
+ static merge(sstables, targetLevel) {
201
+ if (sstables.length === 0) {
202
+ throw new Error('Cannot merge zero SSTables');
203
+ }
204
+ if (sstables.length === 1) {
205
+ // Nothing to merge, just update level
206
+ return new SSTable(sstables[0].getEntries(), targetLevel);
207
+ }
208
+ // Collect all entries from all SSTables
209
+ const allEntries = new Map();
210
+ for (const sstable of sstables) {
211
+ for (const entry of sstable.getEntries()) {
212
+ if (!allEntries.has(entry.sourceId)) {
213
+ allEntries.set(entry.sourceId, new Set());
214
+ }
215
+ const targets = allEntries.get(entry.sourceId);
216
+ for (const target of entry.targets) {
217
+ targets.add(target);
218
+ }
219
+ }
220
+ }
221
+ // Convert back to SSTableEntry format
222
+ const mergedEntries = [];
223
+ allEntries.forEach((targets, sourceId) => {
224
+ mergedEntries.push({
225
+ sourceId,
226
+ targets: Array.from(targets),
227
+ count: targets.size
228
+ });
229
+ });
230
+ // Create new merged SSTable (will be sorted in constructor)
231
+ return new SSTable(mergedEntries, targetLevel);
232
+ }
233
+ /**
234
+ * Get statistics about this SSTable
235
+ */
236
+ getStats() {
237
+ return {
238
+ id: this.metadata.id,
239
+ level: this.metadata.level,
240
+ entries: this.metadata.entryCount,
241
+ relationships: this.metadata.relationshipCount,
242
+ sizeBytes: this.metadata.sizeBytes,
243
+ minSourceId: this.metadata.minSourceId,
244
+ maxSourceId: this.metadata.maxSourceId,
245
+ bloomFilterStats: this.bloomFilter.getStats()
246
+ };
247
+ }
248
+ /**
249
+ * Create an SSTable from a Map of sourceId → targets
250
+ * Convenience method for creating from MemTable
251
+ *
252
+ * @param sourceMap Map of sourceId to Set of targetIds
253
+ * @param level Compaction level
254
+ * @returns New SSTable
255
+ */
256
+ static fromMap(sourceMap, level = 0) {
257
+ const entries = [];
258
+ sourceMap.forEach((targets, sourceId) => {
259
+ entries.push({
260
+ sourceId,
261
+ targets: Array.from(targets),
262
+ count: targets.size
263
+ });
264
+ });
265
+ return new SSTable(entries, level);
266
+ }
267
+ /**
268
+ * Estimate memory usage of this SSTable when loaded
269
+ * @returns Estimated bytes
270
+ */
271
+ estimateMemoryUsage() {
272
+ let bytes = 0;
273
+ // Metadata
274
+ bytes += 500; // Rough estimate for metadata object
275
+ // Entries
276
+ for (const entry of this.entries) {
277
+ bytes += entry.sourceId.length * 2; // UTF-16 encoding
278
+ bytes += entry.targets.length * 40; // ~40 bytes per UUID string
279
+ bytes += 50; // Entry object overhead
280
+ }
281
+ // Bloom filter
282
+ bytes += this.bloomFilter.getStats().memoryBytes;
283
+ return bytes;
284
+ }
285
+ }
286
+ /**
287
+ * Current format version
288
+ */
289
+ SSTable.VERSION = 1;
290
+ //# sourceMappingURL=SSTable.js.map
@@ -38,10 +38,11 @@ export async function createStorage(options = {}) {
38
38
  console.warn('FileSystemStorage is not available in browser environments, falling back to memory storage');
39
39
  return new MemoryStorage();
40
40
  }
41
- console.log('Using file system storage (forced)');
41
+ const fsPath = getFileSystemPath(options);
42
+ console.log(`Using file system storage (forced): ${fsPath}`);
42
43
  try {
43
44
  const { FileSystemStorage } = await import('./adapters/fileSystemStorage.js');
44
- return new FileSystemStorage(getFileSystemPath(options));
45
+ return new FileSystemStorage(fsPath);
45
46
  }
46
47
  catch (error) {
47
48
  console.warn('Failed to load FileSystemStorage, falling back to memory storage:', error);
@@ -77,10 +78,11 @@ export async function createStorage(options = {}) {
77
78
  console.warn('FileSystemStorage is not available in browser environments, falling back to memory storage');
78
79
  return new MemoryStorage();
79
80
  }
80
- console.log('Using file system storage');
81
+ const fsPath = getFileSystemPath(options);
82
+ console.log(`Using file system storage: ${fsPath}`);
81
83
  try {
82
84
  const { FileSystemStorage } = await import('./adapters/fileSystemStorage.js');
83
- return new FileSystemStorage(getFileSystemPath(options));
85
+ return new FileSystemStorage(fsPath);
84
86
  }
85
87
  catch (error) {
86
88
  console.warn('Failed to load FileSystemStorage, falling back to memory storage:', error);
@@ -230,10 +232,11 @@ export async function createStorage(options = {}) {
230
232
  if (typeof process !== 'undefined' &&
231
233
  process.versions &&
232
234
  process.versions.node) {
233
- console.log('Using file system storage (auto-detected)');
235
+ const fsPath = getFileSystemPath(options);
236
+ console.log(`Using file system storage (auto-detected): ${fsPath}`);
234
237
  try {
235
238
  const { FileSystemStorage } = await import('./adapters/fileSystemStorage.js');
236
- return new FileSystemStorage(getFileSystemPath(options));
239
+ return new FileSystemStorage(fsPath);
237
240
  }
238
241
  catch (fsError) {
239
242
  console.warn('Failed to load FileSystemStorage, falling back to memory storage:', fsError);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@soulcraft/brainy",
3
- "version": "3.43.3",
3
+ "version": "3.44.0",
4
4
  "description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",
@@ -162,6 +162,7 @@
162
162
  "@aws-sdk/client-s3": "^3.540.0",
163
163
  "@google-cloud/storage": "^7.14.0",
164
164
  "@huggingface/transformers": "^3.7.2",
165
+ "@msgpack/msgpack": "^3.1.2",
165
166
  "boxen": "^8.0.1",
166
167
  "chalk": "^5.3.0",
167
168
  "chardet": "^2.0.0",