@soulcraft/brainy 3.44.0 → 3.45.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,10 +31,6 @@ export class MetadataIndexManager {
31
31
  this.activeLocks = new Map();
32
32
  this.lockPromises = new Map();
33
33
  this.lockTimers = new Map(); // Track timers for cleanup
34
- // Adaptive Chunked Sparse Indexing (v3.42.0)
35
- // Reduces file count from 560k → 89 files (630x reduction)
36
- // ALL fields now use chunking - no more flat files
37
- this.sparseIndices = new Map(); // field -> sparse index
38
34
  this.storage = storage;
39
35
  this.config = {
40
36
  maxIndexSize: config.maxIndexSize ?? 10000,
@@ -87,6 +83,30 @@ export class MetadataIndexManager {
87
83
  async init() {
88
84
  // Initialize EntityIdMapper (loads UUID ↔ integer mappings from storage)
89
85
  await this.idMapper.init();
86
+ // Warm the cache with common fields (v3.44.1 - lazy loading optimization)
87
+ await this.warmCache();
88
+ }
89
+ /**
90
+ * Warm the cache by preloading common field sparse indices (v3.44.1)
91
+ * This improves cache hit rates by loading frequently-accessed fields at startup
92
+ * Target: >80% cache hit rate for typical workloads
93
+ */
94
+ async warmCache() {
95
+ // Common fields used in most queries
96
+ const commonFields = ['noun', 'type', 'service', 'createdAt'];
97
+ prodLog.debug(`🔥 Warming metadata cache with common fields: ${commonFields.join(', ')}`);
98
+ // Preload in parallel for speed
99
+ await Promise.all(commonFields.map(async (field) => {
100
+ try {
101
+ await this.loadSparseIndex(field);
102
+ }
103
+ catch (error) {
104
+ // Silently ignore if field doesn't exist yet
105
+ // This maintains zero-configuration principle
106
+ prodLog.debug(`Cache warming: field '${field}' not yet indexed`);
107
+ }
108
+ }));
109
+ prodLog.debug('✅ Metadata cache warmed successfully');
90
110
  }
91
111
  /**
92
112
  * Acquire an in-memory lock for coordinating concurrent metadata index writes
@@ -303,16 +323,13 @@ export class MetadataIndexManager {
303
323
  }
304
324
  /**
305
325
  * Get IDs for a value using chunked sparse index with roaring bitmaps (v3.43.0)
326
+ * v3.44.1: Now fully lazy-loaded via UnifiedCache (no local sparseIndices Map)
306
327
  */
307
328
  async getIdsFromChunks(field, value) {
308
- // Load sparse index
309
- let sparseIndex = this.sparseIndices.get(field);
329
+ // Load sparse index via UnifiedCache (lazy loading)
330
+ const sparseIndex = await this.loadSparseIndex(field);
310
331
  if (!sparseIndex) {
311
- sparseIndex = await this.loadSparseIndex(field);
312
- if (!sparseIndex) {
313
- return []; // No chunked index exists yet
314
- }
315
- this.sparseIndices.set(field, sparseIndex);
332
+ return []; // No chunked index exists yet
316
333
  }
317
334
  // Find candidate chunks using zone maps and bloom filters
318
335
  const normalizedValue = this.normalizeValue(value, field);
@@ -339,16 +356,13 @@ export class MetadataIndexManager {
339
356
  }
340
357
  /**
341
358
  * Get IDs for a range using chunked sparse index with zone maps and roaring bitmaps (v3.43.0)
359
+ * v3.44.1: Now fully lazy-loaded via UnifiedCache (no local sparseIndices Map)
342
360
  */
343
361
  async getIdsFromChunksForRange(field, min, max, includeMin = true, includeMax = true) {
344
- // Load sparse index
345
- let sparseIndex = this.sparseIndices.get(field);
362
+ // Load sparse index via UnifiedCache (lazy loading)
363
+ const sparseIndex = await this.loadSparseIndex(field);
346
364
  if (!sparseIndex) {
347
- sparseIndex = await this.loadSparseIndex(field);
348
- if (!sparseIndex) {
349
- return []; // No chunked index exists yet
350
- }
351
- this.sparseIndices.set(field, sparseIndex);
365
+ return []; // No chunked index exists yet
352
366
  }
353
367
  // Find candidate chunks using zone maps
354
368
  const candidateChunkIds = sparseIndex.findChunksForRange(min, max);
@@ -384,17 +398,14 @@ export class MetadataIndexManager {
384
398
  /**
385
399
  * Get roaring bitmap for a field-value pair without converting to UUIDs (v3.43.0)
386
400
  * This is used for fast multi-field intersection queries using hardware-accelerated bitmap AND
401
+ * v3.44.1: Now fully lazy-loaded via UnifiedCache (no local sparseIndices Map)
387
402
  * @returns RoaringBitmap32 containing integer IDs, or null if no matches
388
403
  */
389
404
  async getBitmapFromChunks(field, value) {
390
- // Load sparse index
391
- let sparseIndex = this.sparseIndices.get(field);
405
+ // Load sparse index via UnifiedCache (lazy loading)
406
+ const sparseIndex = await this.loadSparseIndex(field);
392
407
  if (!sparseIndex) {
393
- sparseIndex = await this.loadSparseIndex(field);
394
- if (!sparseIndex) {
395
- return null; // No chunked index exists yet
396
- }
397
- this.sparseIndices.set(field, sparseIndex);
408
+ return null; // No chunked index exists yet
398
409
  }
399
410
  // Find candidate chunks using zone maps and bloom filters
400
411
  const normalizedValue = this.normalizeValue(value, field);
@@ -481,25 +492,22 @@ export class MetadataIndexManager {
481
492
  }
482
493
  /**
483
494
  * Add value-ID mapping to chunked index
495
+ * v3.44.1: Now fully lazy-loaded via UnifiedCache (no local sparseIndices Map)
484
496
  */
485
497
  async addToChunkedIndex(field, value, id) {
486
- // Load or create sparse index
487
- let sparseIndex = this.sparseIndices.get(field);
498
+ // Load or create sparse index via UnifiedCache (lazy loading)
499
+ let sparseIndex = await this.loadSparseIndex(field);
488
500
  if (!sparseIndex) {
489
- sparseIndex = await this.loadSparseIndex(field);
490
- if (!sparseIndex) {
491
- // Create new sparse index
492
- const stats = this.fieldStats.get(field);
493
- const chunkSize = stats
494
- ? this.chunkingStrategy.getOptimalChunkSize({
495
- uniqueValues: stats.cardinality.uniqueValues,
496
- distribution: stats.cardinality.distribution,
497
- avgIdsPerValue: stats.cardinality.totalValues / Math.max(1, stats.cardinality.uniqueValues)
498
- })
499
- : 50;
500
- sparseIndex = new SparseIndex(field, chunkSize);
501
- }
502
- this.sparseIndices.set(field, sparseIndex);
501
+ // Create new sparse index
502
+ const stats = this.fieldStats.get(field);
503
+ const chunkSize = stats
504
+ ? this.chunkingStrategy.getOptimalChunkSize({
505
+ uniqueValues: stats.cardinality.uniqueValues,
506
+ distribution: stats.cardinality.distribution,
507
+ avgIdsPerValue: stats.cardinality.totalValues / Math.max(1, stats.cardinality.uniqueValues)
508
+ })
509
+ : 50;
510
+ sparseIndex = new SparseIndex(field, chunkSize);
503
511
  }
504
512
  const normalizedValue = this.normalizeValue(value, field);
505
513
  // Find existing chunk for this value (check zone maps)
@@ -571,9 +579,11 @@ export class MetadataIndexManager {
571
579
  }
572
580
  /**
573
581
  * Remove ID from chunked index
582
+ * v3.44.1: Now fully lazy-loaded via UnifiedCache (no local sparseIndices Map)
574
583
  */
575
584
  async removeFromChunkedIndex(field, value, id) {
576
- const sparseIndex = this.sparseIndices.get(field) || await this.loadSparseIndex(field);
585
+ // Load sparse index via UnifiedCache (lazy loading)
586
+ const sparseIndex = await this.loadSparseIndex(field);
577
587
  if (!sparseIndex) {
578
588
  return; // No chunked index exists
579
589
  }
@@ -834,21 +844,25 @@ export class MetadataIndexManager {
834
844
  }
835
845
  }
836
846
  else {
837
- // Remove from all indexes (slower, requires scanning all chunks)
847
+ // Remove from all indexes (slower, requires scanning all field indexes)
838
848
  // This should be rare - prefer providing metadata when removing
839
- prodLog.warn(`Removing ID ${id} without metadata requires scanning all sparse indices (slow)`);
840
- // Scan all sparse indices
841
- for (const [field, sparseIndex] of this.sparseIndices.entries()) {
842
- for (const chunkId of sparseIndex.getAllChunkIds()) {
843
- const chunk = await this.chunkManager.loadChunk(field, chunkId);
844
- if (chunk) {
845
- // Convert UUID to integer for bitmap checking
846
- const intId = this.idMapper.getInt(id);
847
- if (intId !== undefined) {
848
- // Check all values in this chunk
849
- for (const [value, bitmap] of chunk.entries) {
850
- if (bitmap.has(intId)) {
851
- await this.removeFromChunkedIndex(field, value, id);
849
+ // v3.44.1: Scan via fieldIndexes, load sparse indices on-demand
850
+ prodLog.warn(`Removing ID ${id} without metadata requires scanning all fields (slow)`);
851
+ // Scan all fields via fieldIndexes
852
+ for (const field of this.fieldIndexes.keys()) {
853
+ const sparseIndex = await this.loadSparseIndex(field);
854
+ if (sparseIndex) {
855
+ for (const chunkId of sparseIndex.getAllChunkIds()) {
856
+ const chunk = await this.chunkManager.loadChunk(field, chunkId);
857
+ if (chunk) {
858
+ // Convert UUID to integer for bitmap checking
859
+ const intId = this.idMapper.getInt(id);
860
+ if (intId !== undefined) {
861
+ // Check all values in this chunk
862
+ for (const [value, bitmap] of chunk.entries) {
863
+ if (bitmap.has(intId)) {
864
+ await this.removeFromChunkedIndex(field, value, id);
865
+ }
852
866
  }
853
867
  }
854
868
  }
@@ -1087,9 +1101,10 @@ export class MetadataIndexManager {
1087
1101
  case 'exists':
1088
1102
  if (operand) {
1089
1103
  // Get all IDs that have this field (any value) from chunked sparse index with roaring bitmaps (v3.43.0)
1104
+ // v3.44.1: Now fully lazy-loaded via UnifiedCache (no local sparseIndices Map)
1090
1105
  const allIntIds = new Set();
1091
- // Load sparse index for this field
1092
- const sparseIndex = this.sparseIndices.get(field) || await this.loadSparseIndex(field);
1106
+ // Load sparse index via UnifiedCache (lazy loading)
1107
+ const sparseIndex = await this.loadSparseIndex(field);
1093
1108
  if (sparseIndex) {
1094
1109
  // Iterate through all chunks for this field
1095
1110
  for (const chunkId of sparseIndex.getAllChunkIds()) {
@@ -1343,29 +1358,31 @@ export class MetadataIndexManager {
1343
1358
  }
1344
1359
  /**
1345
1360
  * Get index statistics with enhanced counting information
1361
+ * v3.44.1: Sparse indices now lazy-loaded via UnifiedCache
1362
+ * Note: This method may load sparse indices to calculate stats
1346
1363
  */
1347
1364
  async getStats() {
1348
1365
  const fields = new Set();
1349
1366
  let totalEntries = 0;
1350
1367
  let totalIds = 0;
1351
- // Collect stats from sparse indices (v3.42.0 - removed indexCache)
1352
- for (const [field, sparseIndex] of this.sparseIndices.entries()) {
1368
+ // Collect stats from field indexes (lightweight - always in memory)
1369
+ for (const field of this.fieldIndexes.keys()) {
1353
1370
  fields.add(field);
1354
- // Count entries and IDs from all chunks
1355
- for (const chunkId of sparseIndex.getAllChunkIds()) {
1356
- const chunk = await this.chunkManager.loadChunk(field, chunkId);
1357
- if (chunk) {
1358
- totalEntries += chunk.entries.size;
1359
- for (const ids of chunk.entries.values()) {
1360
- totalIds += ids.size;
1371
+ // Load sparse index to count entries (may trigger lazy load)
1372
+ const sparseIndex = await this.loadSparseIndex(field);
1373
+ if (sparseIndex) {
1374
+ // Count entries and IDs from all chunks
1375
+ for (const chunkId of sparseIndex.getAllChunkIds()) {
1376
+ const chunk = await this.chunkManager.loadChunk(field, chunkId);
1377
+ if (chunk) {
1378
+ totalEntries += chunk.entries.size;
1379
+ for (const ids of chunk.entries.values()) {
1380
+ totalIds += ids.size;
1381
+ }
1361
1382
  }
1362
1383
  }
1363
1384
  }
1364
1385
  }
1365
- // Also include fields from fieldIndexes that might not have sparse indices yet
1366
- for (const field of this.fieldIndexes.keys()) {
1367
- fields.add(field);
1368
- }
1369
1386
  return {
1370
1387
  totalEntries,
1371
1388
  totalIds,
@@ -1377,6 +1394,7 @@ export class MetadataIndexManager {
1377
1394
  /**
1378
1395
  * Rebuild entire index from scratch using pagination
1379
1396
  * Non-blocking version that yields control back to event loop
1397
+ * v3.44.1: Sparse indices now lazy-loaded via UnifiedCache (no need to clear Map)
1380
1398
  */
1381
1399
  async rebuild() {
1382
1400
  if (this.isRebuilding)
@@ -1387,9 +1405,12 @@ export class MetadataIndexManager {
1387
1405
  prodLog.info(`📊 Storage adapter: ${this.storage.constructor.name}`);
1388
1406
  prodLog.info(`🔧 Batch processing available: ${!!this.storage.getMetadataBatch}`);
1389
1407
  // Clear existing indexes (v3.42.0 - use sparse indices instead of flat files)
1390
- this.sparseIndices.clear();
1408
+ // v3.44.1: No sparseIndices Map to clear - UnifiedCache handles eviction
1391
1409
  this.fieldIndexes.clear();
1392
1410
  this.dirtyFields.clear();
1411
+ // Clear all cached sparse indices in UnifiedCache
1412
+ // This ensures rebuild starts fresh (v3.44.1)
1413
+ this.unifiedCache.clear('metadata');
1393
1414
  // Rebuild noun metadata indexes using pagination
1394
1415
  let nounOffset = 0;
1395
1416
  const nounLimit = 25; // Even smaller batches during initialization to prevent socket exhaustion
@@ -0,0 +1,203 @@
1
+ /**
2
+ * Universal Concept System for VFS
3
+ *
4
+ * Manages concepts that transcend files and exist independently
5
+ * Ideas that can be linked to multiple manifestations across domains
6
+ * PRODUCTION-READY: Real implementation using Brainy
7
+ */
8
+ import { Brainy } from '../brainy.js';
9
+ import { EntityManager, ManagedEntity } from './EntityManager.js';
10
+ /**
11
+ * Universal concept that exists independently of files
12
+ */
13
+ export interface UniversalConcept extends ManagedEntity {
14
+ id: string;
15
+ name: string;
16
+ description?: string;
17
+ domain: string;
18
+ category: string;
19
+ keywords: string[];
20
+ links: ConceptLink[];
21
+ manifestations: ConceptManifestation[];
22
+ strength: number;
23
+ created: number;
24
+ lastUpdated: number;
25
+ version: number;
26
+ metadata: Record<string, any>;
27
+ conceptType?: string;
28
+ }
29
+ /**
30
+ * A link between concepts
31
+ */
32
+ export interface ConceptLink {
33
+ id: string;
34
+ targetConceptId: string;
35
+ relationship: 'extends' | 'implements' | 'uses' | 'opposite' | 'related' | 'contains' | 'part-of';
36
+ strength: number;
37
+ context?: string;
38
+ bidirectional: boolean;
39
+ }
40
+ /**
41
+ * A manifestation of a concept in a specific location
42
+ */
43
+ export interface ConceptManifestation extends ManagedEntity {
44
+ id: string;
45
+ conceptId: string;
46
+ filePath: string;
47
+ context: string;
48
+ form: 'definition' | 'usage' | 'example' | 'discussion' | 'implementation';
49
+ position?: {
50
+ line?: number;
51
+ column?: number;
52
+ offset?: number;
53
+ };
54
+ confidence: number;
55
+ timestamp: number;
56
+ extractedBy: 'manual' | 'auto' | 'ai';
57
+ }
58
+ /**
59
+ * Configuration for concept system
60
+ */
61
+ export interface ConceptSystemConfig {
62
+ autoLink?: boolean;
63
+ similarityThreshold?: number;
64
+ maxManifestations?: number;
65
+ strengthDecay?: number;
66
+ }
67
+ /**
68
+ * Concept graph structure for visualization
69
+ */
70
+ export interface ConceptGraph {
71
+ concepts: Array<{
72
+ id: string;
73
+ name: string;
74
+ domain: string;
75
+ strength: number;
76
+ manifestationCount: number;
77
+ }>;
78
+ links: Array<{
79
+ source: string;
80
+ target: string;
81
+ relationship: string;
82
+ strength: number;
83
+ }>;
84
+ }
85
+ /**
86
+ * Universal Concept System
87
+ *
88
+ * Manages concepts that exist independently of any specific file or context
89
+ * Examples:
90
+ * - "Authentication" concept appearing in docs, code, tests
91
+ * - "Customer Journey" concept in marketing, UX, analytics
92
+ * - "Dependency Injection" pattern across multiple codebases
93
+ * - "Sustainability" theme in various research papers
94
+ */
95
+ export declare class ConceptSystem extends EntityManager {
96
+ private config;
97
+ private conceptCache;
98
+ constructor(brain: Brainy, config?: ConceptSystemConfig);
99
+ /**
100
+ * Create a new universal concept
101
+ */
102
+ createConcept(concept: Omit<UniversalConcept, 'id' | 'created' | 'lastUpdated' | 'version' | 'links' | 'manifestations'>): Promise<string>;
103
+ /**
104
+ * Find concepts by various criteria
105
+ */
106
+ findConcepts(query: {
107
+ name?: string;
108
+ domain?: string;
109
+ category?: string;
110
+ keywords?: string[];
111
+ similar?: string;
112
+ manifestedIn?: string;
113
+ }): Promise<UniversalConcept[]>;
114
+ /**
115
+ * Link two concepts together
116
+ */
117
+ linkConcept(fromConceptId: string, toConceptId: string, relationship: ConceptLink['relationship'], options?: {
118
+ strength?: number;
119
+ context?: string;
120
+ bidirectional?: boolean;
121
+ }): Promise<string>;
122
+ /**
123
+ * Record a manifestation of a concept in a file
124
+ */
125
+ recordManifestation(conceptId: string, filePath: string, context: string, form: ConceptManifestation['form'], options?: {
126
+ position?: ConceptManifestation['position'];
127
+ confidence?: number;
128
+ extractedBy?: ConceptManifestation['extractedBy'];
129
+ }): Promise<string>;
130
+ /**
131
+ * Extract and link concepts from content
132
+ */
133
+ extractAndLinkConcepts(filePath: string, content: Buffer): Promise<string[]>;
134
+ /**
135
+ * Get concept graph for visualization
136
+ */
137
+ getConceptGraph(options?: {
138
+ domain?: string;
139
+ minStrength?: number;
140
+ maxConcepts?: number;
141
+ }): Promise<ConceptGraph>;
142
+ /**
143
+ * Find appearances of a concept
144
+ */
145
+ findAppearances(conceptId: string, options?: {
146
+ filePath?: string;
147
+ form?: ConceptManifestation['form'];
148
+ minConfidence?: number;
149
+ limit?: number;
150
+ }): Promise<ConceptManifestation[]>;
151
+ /**
152
+ * Auto-link concept to similar concepts
153
+ */
154
+ private autoLinkConcept;
155
+ /**
156
+ * Get concept by ID
157
+ */
158
+ private getConcept;
159
+ /**
160
+ * Update stored concept
161
+ */
162
+ private updateConcept;
163
+ /**
164
+ * Calculate similarity between two concepts
165
+ */
166
+ private calculateConceptSimilarity;
167
+ /**
168
+ * Generate embedding for concept
169
+ */
170
+ private generateConceptEmbedding;
171
+ /**
172
+ * Generate embedding for text
173
+ */
174
+ private generateTextEmbedding;
175
+ /**
176
+ * Get reverse relationship type
177
+ */
178
+ private getReverseRelationship;
179
+ /**
180
+ * Map concept relationship to VerbType
181
+ */
182
+ private getVerbType;
183
+ /**
184
+ * Detect concept domain from context
185
+ */
186
+ private detectDomain;
187
+ /**
188
+ * Detect concept category
189
+ */
190
+ private detectCategory;
191
+ /**
192
+ * Detect manifestation form from context
193
+ */
194
+ private detectManifestationForm;
195
+ /**
196
+ * Extract context around a position
197
+ */
198
+ private extractContext;
199
+ /**
200
+ * Clear concept cache
201
+ */
202
+ clearCache(conceptId?: string): void;
203
+ }