@soulcraft/brainy 2.1.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/dist/augmentations/AugmentationMetadataContract.d.ts +94 -0
  2. package/dist/augmentations/AugmentationMetadataContract.js +306 -0
  3. package/dist/augmentations/apiServerAugmentation.d.ts +1 -0
  4. package/dist/augmentations/apiServerAugmentation.js +1 -0
  5. package/dist/augmentations/batchProcessingAugmentation.d.ts +1 -0
  6. package/dist/augmentations/batchProcessingAugmentation.js +1 -0
  7. package/dist/augmentations/brainyAugmentation.d.ts +16 -0
  8. package/dist/augmentations/cacheAugmentation.d.ts +1 -0
  9. package/dist/augmentations/cacheAugmentation.js +1 -0
  10. package/dist/augmentations/conduitAugmentations.d.ts +1 -0
  11. package/dist/augmentations/conduitAugmentations.js +1 -0
  12. package/dist/augmentations/connectionPoolAugmentation.d.ts +1 -0
  13. package/dist/augmentations/connectionPoolAugmentation.js +1 -0
  14. package/dist/augmentations/entityRegistryAugmentation.d.ts +2 -0
  15. package/dist/augmentations/entityRegistryAugmentation.js +2 -0
  16. package/dist/augmentations/indexAugmentation.d.ts +1 -0
  17. package/dist/augmentations/indexAugmentation.js +1 -0
  18. package/dist/augmentations/intelligentVerbScoringAugmentation.d.ts +4 -0
  19. package/dist/augmentations/intelligentVerbScoringAugmentation.js +4 -0
  20. package/dist/augmentations/metadataEnforcer.d.ts +20 -0
  21. package/dist/augmentations/metadataEnforcer.js +171 -0
  22. package/dist/augmentations/metricsAugmentation.d.ts +2 -7
  23. package/dist/augmentations/metricsAugmentation.js +1 -0
  24. package/dist/augmentations/monitoringAugmentation.d.ts +1 -0
  25. package/dist/augmentations/monitoringAugmentation.js +1 -0
  26. package/dist/augmentations/neuralImport.d.ts +4 -0
  27. package/dist/augmentations/neuralImport.js +4 -0
  28. package/dist/augmentations/requestDeduplicatorAugmentation.d.ts +1 -0
  29. package/dist/augmentations/requestDeduplicatorAugmentation.js +1 -0
  30. package/dist/augmentations/serverSearchAugmentations.d.ts +2 -0
  31. package/dist/augmentations/serverSearchAugmentations.js +2 -0
  32. package/dist/augmentations/storageAugmentation.d.ts +1 -0
  33. package/dist/augmentations/storageAugmentation.js +1 -0
  34. package/dist/augmentations/synapseAugmentation.d.ts +4 -0
  35. package/dist/augmentations/synapseAugmentation.js +4 -0
  36. package/dist/augmentations/walAugmentation.d.ts +1 -0
  37. package/dist/augmentations/walAugmentation.js +1 -0
  38. package/dist/brainyData.d.ts +28 -1
  39. package/dist/brainyData.js +229 -83
  40. package/dist/embeddings/model-manager.d.ts +9 -8
  41. package/dist/embeddings/model-manager.js +105 -85
  42. package/dist/triple/TripleIntelligence.d.ts +4 -0
  43. package/dist/triple/TripleIntelligence.js +39 -9
  44. package/dist/utils/deletedItemsIndex.d.ts +59 -0
  45. package/dist/utils/deletedItemsIndex.js +98 -0
  46. package/dist/utils/ensureDeleted.d.ts +38 -0
  47. package/dist/utils/ensureDeleted.js +79 -0
  48. package/dist/utils/metadataFilter.js +5 -0
  49. package/dist/utils/metadataIndex.d.ts +4 -0
  50. package/dist/utils/metadataIndex.js +45 -0
  51. package/dist/utils/metadataNamespace.d.ts +113 -0
  52. package/dist/utils/metadataNamespace.js +162 -0
  53. package/dist/utils/periodicCleanup.d.ts +87 -0
  54. package/dist/utils/periodicCleanup.js +219 -0
  55. package/package.json +9 -3
@@ -412,6 +412,36 @@ export class MetadataIndexManager {
412
412
  }
413
413
  }
414
414
  }
415
+ /**
416
+ * Get all IDs in the index
417
+ */
418
+ async getAllIds() {
419
+ // Collect all unique IDs from all index entries
420
+ const allIds = new Set();
421
+ // First, add all IDs from the in-memory cache
422
+ for (const entry of this.indexCache.values()) {
423
+ entry.ids.forEach(id => allIds.add(id));
424
+ }
425
+ // If storage has a method to get all nouns, use it as the source of truth
426
+ // This ensures we include items that might not be indexed yet
427
+ if (this.storage && typeof this.storage.getNouns === 'function') {
428
+ try {
429
+ const result = await this.storage.getNouns({
430
+ pagination: { limit: 100000 }
431
+ });
432
+ if (result && result.items) {
433
+ result.items.forEach((item) => {
434
+ if (item.id)
435
+ allIds.add(item.id);
436
+ });
437
+ }
438
+ }
439
+ catch (e) {
440
+ // Fall back to using only indexed IDs
441
+ }
442
+ }
443
+ return Array.from(allIds);
444
+ }
415
445
  /**
416
446
  * Get IDs for a specific field-value combination with caching
417
447
  */
@@ -638,6 +668,21 @@ export class MetadataIndexManager {
638
668
  fieldResults = Array.from(allIds);
639
669
  }
640
670
  break;
671
+ // Negation operators
672
+ case 'notEquals':
673
+ case 'isNot':
674
+ case 'ne':
675
+ // For notEquals, we need all IDs EXCEPT those matching the value
676
+ // This is especially important for soft delete: deleted !== true
677
+ // should include items without a deleted field
678
+ // First, get all IDs in the database
679
+ const allItemIds = await this.getAllIds();
680
+ // Then get IDs that match the value we want to exclude
681
+ const excludeIds = await this.getIds(field, operand);
682
+ const excludeSet = new Set(excludeIds);
683
+ // Return all IDs except those to exclude
684
+ fieldResults = allItemIds.filter(id => !excludeSet.has(id));
685
+ break;
641
686
  }
642
687
  }
643
688
  }
@@ -0,0 +1,113 @@
1
+ /**
2
+ * Clean Metadata Architecture for Brainy 2.2
3
+ * No backward compatibility - doing it RIGHT from the start!
4
+ */
5
+ export declare const BRAINY_NS: "_brainy";
6
+ export declare const AUG_NS: "_augmentations";
7
+ export declare const AUDIT_NS: "_audit";
8
+ export declare const DELETED_FIELD: "_brainy.deleted";
9
+ export declare const INDEXED_FIELD: "_brainy.indexed";
10
+ export declare const VERSION_FIELD: "_brainy.version";
11
+ /**
12
+ * Internal Brainy metadata structure
13
+ * These fields are ALWAYS present and indexed for O(1) access
14
+ */
15
+ export interface BrainyInternalMetadata {
16
+ deleted: boolean;
17
+ indexed: boolean;
18
+ version: number;
19
+ created: number;
20
+ updated: number;
21
+ partition?: number;
22
+ domain?: string;
23
+ priority?: number;
24
+ ttl?: number;
25
+ }
26
+ /**
27
+ * Complete metadata structure with namespaces
28
+ */
29
+ export interface NamespacedMetadata<T = any> {
30
+ [key: string]: any;
31
+ [BRAINY_NS]: BrainyInternalMetadata;
32
+ [AUG_NS]?: {
33
+ [augmentationName: string]: any;
34
+ };
35
+ [AUDIT_NS]?: Array<{
36
+ timestamp: number;
37
+ augmentation: string;
38
+ field: string;
39
+ oldValue: any;
40
+ newValue: any;
41
+ }>;
42
+ }
43
+ /**
44
+ * Create properly namespaced metadata
45
+ * This is called for EVERY noun/verb creation
46
+ */
47
+ export declare function createNamespacedMetadata<T = any>(userMetadata?: T): NamespacedMetadata<T>;
48
+ /**
49
+ * Update metadata while preserving namespaces
50
+ */
51
+ export declare function updateNamespacedMetadata<T = any>(existing: NamespacedMetadata<T>, updates: Partial<T>): NamespacedMetadata<T>;
52
+ /**
53
+ * Soft delete a noun (O(1) operation)
54
+ */
55
+ export declare function markDeleted<T = any>(metadata: NamespacedMetadata<T>): NamespacedMetadata<T>;
56
+ /**
57
+ * Restore a soft-deleted noun (O(1) operation)
58
+ */
59
+ export declare function markRestored<T = any>(metadata: NamespacedMetadata<T>): NamespacedMetadata<T>;
60
+ /**
61
+ * Check if a noun is deleted (O(1) check)
62
+ */
63
+ export declare function isDeleted<T = any>(metadata: NamespacedMetadata<T>): boolean;
64
+ /**
65
+ * Get user metadata without internal fields
66
+ * Used by augmentations to get clean user data
67
+ */
68
+ export declare function getUserMetadata<T = any>(metadata: NamespacedMetadata<T>): T;
69
+ /**
70
+ * Set augmentation data in isolated namespace
71
+ */
72
+ export declare function setAugmentationData<T = any>(metadata: NamespacedMetadata<T>, augmentationName: string, data: any): NamespacedMetadata<T>;
73
+ /**
74
+ * Add audit entry for tracking
75
+ */
76
+ export declare function addAuditEntry<T = any>(metadata: NamespacedMetadata<T>, entry: {
77
+ augmentation: string;
78
+ field: string;
79
+ oldValue: any;
80
+ newValue: any;
81
+ }): NamespacedMetadata<T>;
82
+ /**
83
+ * INDEXING EXPLANATION:
84
+ *
85
+ * The MetadataIndex flattens nested objects into dot-notation keys:
86
+ *
87
+ * Input metadata:
88
+ * {
89
+ * name: "Django",
90
+ * _brainy: {
91
+ * deleted: false,
92
+ * indexed: true
93
+ * }
94
+ * }
95
+ *
96
+ * Creates index entries:
97
+ * - "name" -> "django" -> Set([id1, id2...])
98
+ * - "_brainy.deleted" -> "false" -> Set([id1, id2...]) // O(1) lookup!
99
+ * - "_brainy.indexed" -> "true" -> Set([id1, id2...])
100
+ *
101
+ * Query: { "_brainy.deleted": false }
102
+ * Lookup: index["_brainy.deleted"]["false"] -> Set of IDs in O(1)
103
+ *
104
+ * This is why namespacing doesn't hurt performance - it's all flattened!
105
+ */
106
+ /**
107
+ * Fields that should ALWAYS be indexed for O(1) access
108
+ */
109
+ export declare const ALWAYS_INDEXED_FIELDS: ("_brainy.deleted" | "_brainy.indexed" | "_brainy.version")[];
110
+ /**
111
+ * Fields that should use sorted index for O(log n) range queries
112
+ */
113
+ export declare const SORTED_INDEX_FIELDS: string[];
@@ -0,0 +1,162 @@
1
+ /**
2
+ * Clean Metadata Architecture for Brainy 2.2
3
+ * No backward compatibility - doing it RIGHT from the start!
4
+ */
5
+ // Namespace constants
6
+ export const BRAINY_NS = '_brainy';
7
+ export const AUG_NS = '_augmentations';
8
+ export const AUDIT_NS = '_audit';
9
+ // Field paths for O(1) indexing
10
+ export const DELETED_FIELD = `${BRAINY_NS}.deleted`;
11
+ export const INDEXED_FIELD = `${BRAINY_NS}.indexed`;
12
+ export const VERSION_FIELD = `${BRAINY_NS}.version`;
13
+ /**
14
+ * Create properly namespaced metadata
15
+ * This is called for EVERY noun/verb creation
16
+ */
17
+ export function createNamespacedMetadata(userMetadata) {
18
+ const now = Date.now();
19
+ // Start with user metadata or empty object
20
+ const result = userMetadata ? { ...userMetadata } : {};
21
+ // ALWAYS add internal namespace with required fields
22
+ result[BRAINY_NS] = {
23
+ deleted: false, // CRITICAL: Always false for new items
24
+ indexed: true, // New items are indexed
25
+ version: 1, // Current schema version
26
+ created: now,
27
+ updated: now
28
+ };
29
+ return result;
30
+ }
31
+ /**
32
+ * Update metadata while preserving namespaces
33
+ */
34
+ export function updateNamespacedMetadata(existing, updates) {
35
+ const now = Date.now();
36
+ // Merge user fields
37
+ const result = {
38
+ ...existing,
39
+ ...updates
40
+ };
41
+ // Preserve internal namespace but update timestamp
42
+ result[BRAINY_NS] = {
43
+ ...existing[BRAINY_NS],
44
+ updated: now
45
+ };
46
+ // Preserve augmentation namespace
47
+ if (existing[AUG_NS]) {
48
+ result[AUG_NS] = existing[AUG_NS];
49
+ }
50
+ // Preserve audit trail
51
+ if (existing[AUDIT_NS]) {
52
+ result[AUDIT_NS] = existing[AUDIT_NS];
53
+ }
54
+ return result;
55
+ }
56
+ /**
57
+ * Soft delete a noun (O(1) operation)
58
+ */
59
+ export function markDeleted(metadata) {
60
+ return {
61
+ ...metadata,
62
+ [BRAINY_NS]: {
63
+ ...metadata[BRAINY_NS],
64
+ deleted: true,
65
+ updated: Date.now()
66
+ }
67
+ };
68
+ }
69
+ /**
70
+ * Restore a soft-deleted noun (O(1) operation)
71
+ */
72
+ export function markRestored(metadata) {
73
+ return {
74
+ ...metadata,
75
+ [BRAINY_NS]: {
76
+ ...metadata[BRAINY_NS],
77
+ deleted: false,
78
+ updated: Date.now()
79
+ }
80
+ };
81
+ }
82
+ /**
83
+ * Check if a noun is deleted (O(1) check)
84
+ */
85
+ export function isDeleted(metadata) {
86
+ return metadata[BRAINY_NS]?.deleted === true;
87
+ }
88
+ /**
89
+ * Get user metadata without internal fields
90
+ * Used by augmentations to get clean user data
91
+ */
92
+ export function getUserMetadata(metadata) {
93
+ const { [BRAINY_NS]: _, [AUG_NS]: __, [AUDIT_NS]: ___, ...userMeta } = metadata;
94
+ return userMeta;
95
+ }
96
+ /**
97
+ * Set augmentation data in isolated namespace
98
+ */
99
+ export function setAugmentationData(metadata, augmentationName, data) {
100
+ const result = { ...metadata };
101
+ if (!result[AUG_NS]) {
102
+ result[AUG_NS] = {};
103
+ }
104
+ result[AUG_NS][augmentationName] = data;
105
+ return result;
106
+ }
107
+ /**
108
+ * Add audit entry for tracking
109
+ */
110
+ export function addAuditEntry(metadata, entry) {
111
+ const result = { ...metadata };
112
+ if (!result[AUDIT_NS]) {
113
+ result[AUDIT_NS] = [];
114
+ }
115
+ result[AUDIT_NS].push({
116
+ ...entry,
117
+ timestamp: Date.now()
118
+ });
119
+ return result;
120
+ }
121
+ /**
122
+ * INDEXING EXPLANATION:
123
+ *
124
+ * The MetadataIndex flattens nested objects into dot-notation keys:
125
+ *
126
+ * Input metadata:
127
+ * {
128
+ * name: "Django",
129
+ * _brainy: {
130
+ * deleted: false,
131
+ * indexed: true
132
+ * }
133
+ * }
134
+ *
135
+ * Creates index entries:
136
+ * - "name" -> "django" -> Set([id1, id2...])
137
+ * - "_brainy.deleted" -> "false" -> Set([id1, id2...]) // O(1) lookup!
138
+ * - "_brainy.indexed" -> "true" -> Set([id1, id2...])
139
+ *
140
+ * Query: { "_brainy.deleted": false }
141
+ * Lookup: index["_brainy.deleted"]["false"] -> Set of IDs in O(1)
142
+ *
143
+ * This is why namespacing doesn't hurt performance - it's all flattened!
144
+ */
145
+ /**
146
+ * Fields that should ALWAYS be indexed for O(1) access
147
+ */
148
+ export const ALWAYS_INDEXED_FIELDS = [
149
+ DELETED_FIELD, // For soft delete filtering
150
+ INDEXED_FIELD, // For index management
151
+ VERSION_FIELD // For schema versioning
152
+ ];
153
+ /**
154
+ * Fields that should use sorted index for O(log n) range queries
155
+ */
156
+ export const SORTED_INDEX_FIELDS = [
157
+ `${BRAINY_NS}.created`,
158
+ `${BRAINY_NS}.updated`,
159
+ `${BRAINY_NS}.priority`,
160
+ `${BRAINY_NS}.ttl`
161
+ ];
162
+ //# sourceMappingURL=metadataNamespace.js.map
@@ -0,0 +1,87 @@
1
+ /**
2
+ * Periodic Cleanup for Soft-Deleted Items
3
+ *
4
+ * SAFETY-FIRST APPROACH:
5
+ * - Maintains durability guarantees (storage-first)
6
+ * - Coordinates HNSW and metadata index consistency
7
+ * - Isolated from live operations
8
+ * - Graceful failure handling
9
+ */
10
+ import type { StorageAdapter } from '../coreTypes.js';
11
+ import type { HNSWIndex } from '../hnsw/hnswIndex.js';
12
+ import type { MetadataIndexManager } from './metadataIndex.js';
13
+ export interface CleanupConfig {
14
+ /** Age in milliseconds after which soft-deleted items are eligible for cleanup */
15
+ maxAge: number;
16
+ /** Maximum number of items to clean up in one batch */
17
+ batchSize: number;
18
+ /** Interval between cleanup runs (milliseconds) */
19
+ cleanupInterval: number;
20
+ /** Whether to run cleanup automatically */
21
+ enabled: boolean;
22
+ }
23
+ export interface CleanupStats {
24
+ itemsProcessed: number;
25
+ itemsDeleted: number;
26
+ errors: number;
27
+ lastRun: number;
28
+ nextRun: number;
29
+ }
30
+ /**
31
+ * Coordinates safe cleanup of old soft-deleted items across all indexes
32
+ *
33
+ * CRITICAL SAFETY FEATURES:
34
+ * 1. Storage-first deletion (durability)
35
+ * 2. Index consistency coordination
36
+ * 3. Batch processing with limits
37
+ * 4. Error isolation and recovery
38
+ */
39
+ export declare class PeriodicCleanup {
40
+ private storage;
41
+ private hnswIndex;
42
+ private metadataIndex;
43
+ private config;
44
+ private stats;
45
+ private cleanupTimer;
46
+ private running;
47
+ constructor(storage: StorageAdapter, hnswIndex: HNSWIndex, metadataIndex: MetadataIndexManager | null, config?: Partial<CleanupConfig>);
48
+ /**
49
+ * Start periodic cleanup
50
+ */
51
+ start(): void;
52
+ /**
53
+ * Stop periodic cleanup
54
+ */
55
+ stop(): void;
56
+ /**
57
+ * Run cleanup manually
58
+ */
59
+ runNow(): Promise<CleanupStats>;
60
+ /**
61
+ * Get current cleanup statistics
62
+ */
63
+ getStats(): CleanupStats;
64
+ private scheduleNext;
65
+ /**
66
+ * CRITICAL: Coordinated cleanup across all indexes
67
+ *
68
+ * SAFETY PROTOCOL:
69
+ * 1. Find eligible items (old + soft-deleted)
70
+ * 2. Remove from storage FIRST (durability)
71
+ * 3. Remove from HNSW (graph consistency)
72
+ * 4. Remove from metadata index (search consistency)
73
+ * 5. Track stats and errors
74
+ */
75
+ private performCleanup;
76
+ /**
77
+ * Find items eligible for cleanup (old + soft-deleted)
78
+ */
79
+ private findEligibleItems;
80
+ /**
81
+ * Process a batch of items for cleanup
82
+ *
83
+ * CRITICAL: This maintains the durability-first approach:
84
+ * Storage → HNSW → Metadata Index
85
+ */
86
+ private processBatch;
87
+ }
@@ -0,0 +1,219 @@
1
+ /**
2
+ * Periodic Cleanup for Soft-Deleted Items
3
+ *
4
+ * SAFETY-FIRST APPROACH:
5
+ * - Maintains durability guarantees (storage-first)
6
+ * - Coordinates HNSW and metadata index consistency
7
+ * - Isolated from live operations
8
+ * - Graceful failure handling
9
+ */
10
+ import { prodLog } from './logger.js';
11
+ import { isDeleted } from './metadataNamespace.js';
12
+ /**
13
+ * Coordinates safe cleanup of old soft-deleted items across all indexes
14
+ *
15
+ * CRITICAL SAFETY FEATURES:
16
+ * 1. Storage-first deletion (durability)
17
+ * 2. Index consistency coordination
18
+ * 3. Batch processing with limits
19
+ * 4. Error isolation and recovery
20
+ */
21
+ export class PeriodicCleanup {
22
+ constructor(storage, hnswIndex, metadataIndex, config = {}) {
23
+ this.cleanupTimer = null;
24
+ this.running = false;
25
+ this.storage = storage;
26
+ this.hnswIndex = hnswIndex;
27
+ this.metadataIndex = metadataIndex;
28
+ // Default: clean up items deleted more than 1 hour ago
29
+ this.config = {
30
+ maxAge: config.maxAge ?? 60 * 60 * 1000, // 1 hour
31
+ batchSize: config.batchSize ?? 100, // 100 items max per batch
32
+ cleanupInterval: config.cleanupInterval ?? 15 * 60 * 1000, // Every 15 minutes
33
+ enabled: config.enabled ?? true
34
+ };
35
+ this.stats = {
36
+ itemsProcessed: 0,
37
+ itemsDeleted: 0,
38
+ errors: 0,
39
+ lastRun: 0,
40
+ nextRun: 0
41
+ };
42
+ }
43
+ /**
44
+ * Start periodic cleanup
45
+ */
46
+ start() {
47
+ if (!this.config.enabled || this.cleanupTimer) {
48
+ return;
49
+ }
50
+ prodLog.info(`Starting periodic cleanup: maxAge=${this.config.maxAge}, batchSize=${this.config.batchSize}, interval=${this.config.cleanupInterval}`);
51
+ this.scheduleNext();
52
+ }
53
+ /**
54
+ * Stop periodic cleanup
55
+ */
56
+ stop() {
57
+ if (this.cleanupTimer) {
58
+ clearTimeout(this.cleanupTimer);
59
+ this.cleanupTimer = null;
60
+ }
61
+ prodLog.info('Stopped periodic cleanup');
62
+ }
63
+ /**
64
+ * Run cleanup manually
65
+ */
66
+ async runNow() {
67
+ if (this.running) {
68
+ throw new Error('Cleanup already running');
69
+ }
70
+ return this.performCleanup();
71
+ }
72
+ /**
73
+ * Get current cleanup statistics
74
+ */
75
+ getStats() {
76
+ return { ...this.stats };
77
+ }
78
+ scheduleNext() {
79
+ const nextRun = Date.now() + this.config.cleanupInterval;
80
+ this.stats.nextRun = nextRun;
81
+ this.cleanupTimer = setTimeout(async () => {
82
+ await this.performCleanup();
83
+ this.scheduleNext();
84
+ }, this.config.cleanupInterval);
85
+ }
86
+ /**
87
+ * CRITICAL: Coordinated cleanup across all indexes
88
+ *
89
+ * SAFETY PROTOCOL:
90
+ * 1. Find eligible items (old + soft-deleted)
91
+ * 2. Remove from storage FIRST (durability)
92
+ * 3. Remove from HNSW (graph consistency)
93
+ * 4. Remove from metadata index (search consistency)
94
+ * 5. Track stats and errors
95
+ */
96
+ async performCleanup() {
97
+ if (this.running) {
98
+ prodLog.warn('Cleanup already running, skipping');
99
+ return this.stats;
100
+ }
101
+ this.running = true;
102
+ const startTime = Date.now();
103
+ this.stats.lastRun = startTime;
104
+ try {
105
+ prodLog.debug(`Starting cleanup run: maxAge=${this.config.maxAge}, cutoffTime=${startTime - this.config.maxAge}`);
106
+ // Step 1: Find eligible items for cleanup
107
+ const eligibleItems = await this.findEligibleItems(startTime);
108
+ if (eligibleItems.length === 0) {
109
+ prodLog.debug('No items eligible for cleanup');
110
+ return this.stats;
111
+ }
112
+ prodLog.info(`Found ${eligibleItems.length} items eligible for cleanup`);
113
+ // Step 2: Process in batches for safety
114
+ let processed = 0;
115
+ let deleted = 0;
116
+ let errors = 0;
117
+ for (let i = 0; i < eligibleItems.length; i += this.config.batchSize) {
118
+ const batch = eligibleItems.slice(i, i + this.config.batchSize);
119
+ const batchResult = await this.processBatch(batch);
120
+ processed += batchResult.processed;
121
+ deleted += batchResult.deleted;
122
+ errors += batchResult.errors;
123
+ // Small delay between batches to avoid overwhelming the system
124
+ if (i + this.config.batchSize < eligibleItems.length) {
125
+ await new Promise(resolve => setTimeout(resolve, 10));
126
+ }
127
+ }
128
+ // Update stats
129
+ this.stats.itemsProcessed += processed;
130
+ this.stats.itemsDeleted += deleted;
131
+ this.stats.errors += errors;
132
+ prodLog.info(`Cleanup run completed: processed=${processed}, deleted=${deleted}, errors=${errors}, duration=${Date.now() - startTime}ms`);
133
+ }
134
+ catch (error) {
135
+ prodLog.error(`Cleanup run failed: ${error}`);
136
+ this.stats.errors++;
137
+ }
138
+ finally {
139
+ this.running = false;
140
+ }
141
+ return this.stats;
142
+ }
143
+ /**
144
+ * Find items eligible for cleanup (old + soft-deleted)
145
+ */
146
+ async findEligibleItems(currentTime) {
147
+ const cutoffTime = currentTime - this.config.maxAge;
148
+ const eligibleItems = [];
149
+ try {
150
+ // Get all nouns from storage (using pagination to avoid memory issues)
151
+ const nounsResult = await this.storage.getNouns({
152
+ pagination: { limit: 1000 } // Process in chunks
153
+ });
154
+ for (const noun of nounsResult.items) {
155
+ try {
156
+ if (!noun.metadata || !isDeleted(noun.metadata)) {
157
+ continue; // Not deleted, skip
158
+ }
159
+ // Check if old enough for cleanup
160
+ const deletedTime = noun.metadata._brainy?.updated || 0;
161
+ if (deletedTime && (currentTime - deletedTime) > this.config.maxAge) {
162
+ eligibleItems.push(noun.id);
163
+ }
164
+ }
165
+ catch (error) {
166
+ prodLog.warn(`Failed to check item ${noun.id} for cleanup eligibility: ${error}`);
167
+ }
168
+ }
169
+ }
170
+ catch (error) {
171
+ prodLog.error(`Failed to find eligible items: ${error}`);
172
+ throw error;
173
+ }
174
+ return eligibleItems;
175
+ }
176
+ /**
177
+ * Process a batch of items for cleanup
178
+ *
179
+ * CRITICAL: This maintains the durability-first approach:
180
+ * Storage → HNSW → Metadata Index
181
+ */
182
+ async processBatch(itemIds) {
183
+ let processed = 0;
184
+ let deleted = 0;
185
+ let errors = 0;
186
+ for (const id of itemIds) {
187
+ processed++;
188
+ try {
189
+ // STEP 1: Remove from storage FIRST (durability guarantee)
190
+ try {
191
+ await this.storage.deleteNoun(id);
192
+ }
193
+ catch (storageError) {
194
+ prodLog.warn(`Failed to delete ${id} from storage: ${storageError}`);
195
+ errors++;
196
+ continue;
197
+ }
198
+ // STEP 2: Remove from HNSW index (vector search consistency)
199
+ const hnswResult = this.hnswIndex.removeItem(id);
200
+ if (!hnswResult) {
201
+ prodLog.warn(`Failed to remove ${id} from HNSW index (may not have been indexed)`);
202
+ // Not a critical error - item might not have been in vector index
203
+ }
204
+ // STEP 3: Remove from metadata index (faceted search consistency)
205
+ if (this.metadataIndex) {
206
+ await this.metadataIndex.removeFromIndex(id);
207
+ }
208
+ deleted++;
209
+ prodLog.debug(`Successfully cleaned up item ${id}`);
210
+ }
211
+ catch (error) {
212
+ errors++;
213
+ prodLog.error(`Failed to cleanup item ${id}: ${error}`);
214
+ }
215
+ }
216
+ return { processed, deleted, errors };
217
+ }
218
+ }
219
+ //# sourceMappingURL=periodicCleanup.js.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@soulcraft/brainy",
3
- "version": "2.1.0",
3
+ "version": "3.0.0",
4
4
  "description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",
@@ -77,7 +77,12 @@
77
77
  "lint": "eslint --ext .ts,.js src/",
78
78
  "lint:fix": "eslint --ext .ts,.js src/ --fix",
79
79
  "format": "prettier --write \"src/**/*.{ts,js}\"",
80
- "format:check": "prettier --check \"src/**/*.{ts,js}\""
80
+ "format:check": "prettier --check \"src/**/*.{ts,js}\"",
81
+ "release": "standard-version",
82
+ "release:patch": "standard-version --release-as patch",
83
+ "release:minor": "standard-version --release-as minor",
84
+ "release:major": "standard-version --release-as major",
85
+ "release:dry": "standard-version --dry-run"
81
86
  },
82
87
  "keywords": [
83
88
  "ai-database",
@@ -130,13 +135,14 @@
130
135
  "@typescript-eslint/eslint-plugin": "^8.0.0",
131
136
  "@typescript-eslint/parser": "^8.0.0",
132
137
  "@vitest/coverage-v8": "^3.2.4",
138
+ "standard-version": "^9.5.0",
133
139
  "tsx": "^4.19.2",
134
140
  "typescript": "^5.4.5",
135
141
  "vitest": "^3.2.4"
136
142
  },
137
143
  "dependencies": {
138
144
  "@aws-sdk/client-s3": "^3.540.0",
139
- "@huggingface/transformers": "^3.1.0",
145
+ "@huggingface/transformers": "^3.7.2",
140
146
  "boxen": "^8.0.1",
141
147
  "chalk": "^5.3.0",
142
148
  "cli-table3": "^0.6.5",