@soulcraft/brainy 3.40.2 → 3.41.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,18 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
4
4
 
5
+ ## [3.41.0](https://github.com/soulcraftlabs/brainy/compare/v3.40.3...v3.41.0) (2025-10-13)
6
+
7
+
8
+ ### ✨ Features
9
+
10
+ * automatic temporal bucketing for metadata indexes ([b3edd4b](https://github.com/soulcraftlabs/brainy/commit/b3edd4b60a49d26d1ca776d459aa013736a0db9d))
11
+
12
+ ### [3.40.3](https://github.com/soulcraftlabs/brainy/compare/v3.40.2...v3.40.3) (2025-10-13)
13
+
14
+ - fix: prevent metadata index file pollution by excluding high-cardinality fields (0c86c4f)
15
+
16
+
5
17
  ### [3.40.2](https://github.com/soulcraftlabs/brainy/compare/v3.40.1...v3.40.2) (2025-10-13)
6
18
 
7
19
 
@@ -39,7 +39,25 @@ export class MetadataIndexManager {
39
39
  rebuildThreshold: config.rebuildThreshold ?? 0.1,
40
40
  autoOptimize: config.autoOptimize ?? true,
41
41
  indexedFields: config.indexedFields ?? [],
42
- excludeFields: config.excludeFields ?? ['id', 'createdAt', 'updatedAt', 'embedding', 'vector', 'embeddings', 'vectors']
42
+ excludeFields: config.excludeFields ?? [
43
+ // ONLY exclude truly un-indexable fields (binary data, large content)
44
+ // Timestamps are NOW indexed with automatic bucketing (prevents pollution)
45
+ // Vectors and embeddings (binary data, already have HNSW indexes)
46
+ 'embedding',
47
+ 'vector',
48
+ 'embeddings',
49
+ 'vectors',
50
+ // Large content fields (too large for metadata indexing)
51
+ 'content',
52
+ 'data',
53
+ 'originalData',
54
+ '_data',
55
+ // Primary keys (use direct lookups instead)
56
+ 'id'
57
+ // NOTE: 'accessed', 'modified', 'createdAt', etc. are NO LONGER excluded!
58
+ // They are now indexed with automatic 1-minute bucketing to prevent file pollution
59
+ // This enables range queries like: modified > yesterday
60
+ ]
43
61
  };
44
62
  // Initialize metadata cache with similar config to search cache
45
63
  this.metadataCache = new MetadataIndexCache({
@@ -134,7 +152,7 @@ export class MetadataIndexManager {
134
152
  * Get index key for field and value
135
153
  */
136
154
  getIndexKey(field, value) {
137
- const normalizedValue = this.normalizeValue(value);
155
+ const normalizedValue = this.normalizeValue(value, field); // Pass field for bucketing!
138
156
  return `${field}:${normalizedValue}`;
139
157
  }
140
158
  /**
@@ -267,7 +285,7 @@ export class MetadataIndexManager {
267
285
  });
268
286
  }
269
287
  const sortedIndex = this.sortedIndices.get(field);
270
- const normalizedValue = this.normalizeValue(value);
288
+ const normalizedValue = this.normalizeValue(value, field); // Pass field for bucketing!
271
289
  // Find where this value should be in the sorted array
272
290
  const insertPos = this.findInsertPosition(sortedIndex.values, normalizedValue, sortedIndex.fieldType);
273
291
  if (insertPos < sortedIndex.values.length &&
@@ -289,7 +307,7 @@ export class MetadataIndexManager {
289
307
  const sortedIndex = this.sortedIndices.get(field);
290
308
  if (!sortedIndex || sortedIndex.values.length === 0)
291
309
  return;
292
- const normalizedValue = this.normalizeValue(value);
310
+ const normalizedValue = this.normalizeValue(value, field); // Pass field for bucketing!
293
311
  // Binary search to find the value
294
312
  const pos = this.findInsertPosition(sortedIndex.values, normalizedValue, sortedIndex.fieldType);
295
313
  if (pos < sortedIndex.values.length &&
@@ -429,12 +447,10 @@ export class MetadataIndexManager {
429
447
  else {
430
448
  stats.indexType = 'hash';
431
449
  }
432
- // Determine normalization strategy for high cardinality fields
450
+ // Determine normalization strategy for high cardinality NON-temporal fields
451
+ // (Temporal fields are already bucketed in normalizeValue from the start!)
433
452
  if (hasHighCardinality) {
434
- if (field.toLowerCase().includes('time') || field.toLowerCase().includes('date')) {
435
- stats.normalizationStrategy = 'bucket'; // Time bucketing
436
- }
437
- else if (isNumeric) {
453
+ if (isNumeric) {
438
454
  stats.normalizationStrategy = 'precision'; // Reduce float precision
439
455
  }
440
456
  else {
@@ -494,7 +510,7 @@ export class MetadataIndexManager {
494
510
  * Generate value chunk filename for scalable storage
495
511
  */
496
512
  getValueChunkFilename(field, value, chunkIndex = 0) {
497
- const normalizedValue = this.normalizeValue(value);
513
+ const normalizedValue = this.normalizeValue(value, field); // Pass field for bucketing!
498
514
  const safeValue = this.makeSafeFilename(normalizedValue);
499
515
  return `${field}_${safeValue}_chunk${chunkIndex}`;
500
516
  }
@@ -516,19 +532,25 @@ export class MetadataIndexManager {
516
532
  return '__NULL__';
517
533
  if (typeof value === 'boolean')
518
534
  return value ? '__TRUE__' : '__FALSE__';
519
- // Apply smart normalization based on field statistics
535
+ // ALWAYS apply bucketing to temporal fields (prevents pollution from the start!)
536
+ // This is the key fix: don't wait for cardinality stats, just bucket immediately
537
+ if (field && typeof value === 'number') {
538
+ const fieldLower = field.toLowerCase();
539
+ const isTemporal = fieldLower.includes('time') || fieldLower.includes('date') ||
540
+ fieldLower.includes('accessed') || fieldLower.includes('modified') ||
541
+ fieldLower.includes('created') || fieldLower.includes('updated');
542
+ if (isTemporal) {
543
+ // Apply time bucketing immediately (no need to wait for stats)
544
+ const bucketSize = this.TIMESTAMP_PRECISION_MS; // 1 minute buckets
545
+ const bucketed = Math.floor(value / bucketSize) * bucketSize;
546
+ return bucketed.toString();
547
+ }
548
+ }
549
+ // Apply smart normalization based on field statistics (for non-temporal fields)
520
550
  if (field && this.fieldStats.has(field)) {
521
551
  const stats = this.fieldStats.get(field);
522
552
  const strategy = stats.normalizationStrategy;
523
- if (strategy === 'bucket' && typeof value === 'number') {
524
- // Time bucketing for timestamps
525
- if (field.toLowerCase().includes('time') || field.toLowerCase().includes('date')) {
526
- const bucketSize = this.TIMESTAMP_PRECISION_MS;
527
- const bucketed = Math.floor(value / bucketSize) * bucketSize;
528
- return bucketed.toString();
529
- }
530
- }
531
- else if (strategy === 'precision' && typeof value === 'number') {
553
+ if (strategy === 'precision' && typeof value === 'number') {
532
554
  // Reduce float precision for high cardinality numeric fields
533
555
  const rounded = Math.round(value * Math.pow(10, this.FLOAT_PRECISION)) / Math.pow(10, this.FLOAT_PRECISION);
534
556
  return rounded.toString();
@@ -631,7 +653,7 @@ export class MetadataIndexManager {
631
653
  const loadedEntry = await this.loadIndexEntry(key);
632
654
  entry = loadedEntry ?? {
633
655
  field,
634
- value: this.normalizeValue(value),
656
+ value: this.normalizeValue(value, field), // Pass field for bucketing!
635
657
  ids: new Set(),
636
658
  lastUpdated: Date.now()
637
659
  };
@@ -704,7 +726,7 @@ export class MetadataIndexManager {
704
726
  };
705
727
  this.fieldIndexes.set(field, fieldIndex);
706
728
  }
707
- const normalizedValue = this.normalizeValue(value);
729
+ const normalizedValue = this.normalizeValue(value, field); // Pass field for bucketing!
708
730
  fieldIndex.values[normalizedValue] = (fieldIndex.values[normalizedValue] || 0) + delta;
709
731
  // Remove if count drops to 0
710
732
  if (fieldIndex.values[normalizedValue] <= 0) {
@@ -1800,7 +1822,7 @@ export class MetadataIndexManager {
1800
1822
  let entityType = null;
1801
1823
  if (field === 'noun') {
1802
1824
  // This is the type definition itself
1803
- entityType = this.normalizeValue(value);
1825
+ entityType = this.normalizeValue(value, field); // Pass field for bucketing!
1804
1826
  }
1805
1827
  else {
1806
1828
  // Find the noun type for this entity by looking for entries with this entityId
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@soulcraft/brainy",
3
- "version": "3.40.2",
3
+ "version": "3.41.0",
4
4
  "description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",