@soulcraft/brainy 2.1.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/augmentations/AugmentationMetadataContract.d.ts +94 -0
- package/dist/augmentations/AugmentationMetadataContract.js +306 -0
- package/dist/augmentations/apiServerAugmentation.d.ts +1 -0
- package/dist/augmentations/apiServerAugmentation.js +1 -0
- package/dist/augmentations/batchProcessingAugmentation.d.ts +1 -0
- package/dist/augmentations/batchProcessingAugmentation.js +1 -0
- package/dist/augmentations/brainyAugmentation.d.ts +16 -0
- package/dist/augmentations/cacheAugmentation.d.ts +1 -0
- package/dist/augmentations/cacheAugmentation.js +1 -0
- package/dist/augmentations/conduitAugmentations.d.ts +1 -0
- package/dist/augmentations/conduitAugmentations.js +1 -0
- package/dist/augmentations/connectionPoolAugmentation.d.ts +1 -0
- package/dist/augmentations/connectionPoolAugmentation.js +1 -0
- package/dist/augmentations/entityRegistryAugmentation.d.ts +2 -0
- package/dist/augmentations/entityRegistryAugmentation.js +2 -0
- package/dist/augmentations/indexAugmentation.d.ts +1 -0
- package/dist/augmentations/indexAugmentation.js +1 -0
- package/dist/augmentations/intelligentVerbScoringAugmentation.d.ts +4 -0
- package/dist/augmentations/intelligentVerbScoringAugmentation.js +4 -0
- package/dist/augmentations/metadataEnforcer.d.ts +20 -0
- package/dist/augmentations/metadataEnforcer.js +171 -0
- package/dist/augmentations/metricsAugmentation.d.ts +2 -7
- package/dist/augmentations/metricsAugmentation.js +1 -0
- package/dist/augmentations/monitoringAugmentation.d.ts +1 -0
- package/dist/augmentations/monitoringAugmentation.js +1 -0
- package/dist/augmentations/neuralImport.d.ts +4 -0
- package/dist/augmentations/neuralImport.js +4 -0
- package/dist/augmentations/requestDeduplicatorAugmentation.d.ts +1 -0
- package/dist/augmentations/requestDeduplicatorAugmentation.js +1 -0
- package/dist/augmentations/serverSearchAugmentations.d.ts +2 -0
- package/dist/augmentations/serverSearchAugmentations.js +2 -0
- package/dist/augmentations/storageAugmentation.d.ts +1 -0
- package/dist/augmentations/storageAugmentation.js +1 -0
- package/dist/augmentations/synapseAugmentation.d.ts +4 -0
- package/dist/augmentations/synapseAugmentation.js +4 -0
- package/dist/augmentations/walAugmentation.d.ts +1 -0
- package/dist/augmentations/walAugmentation.js +1 -0
- package/dist/brainyData.d.ts +28 -1
- package/dist/brainyData.js +229 -83
- package/dist/embeddings/model-manager.d.ts +9 -8
- package/dist/embeddings/model-manager.js +105 -85
- package/dist/triple/TripleIntelligence.d.ts +4 -0
- package/dist/triple/TripleIntelligence.js +39 -9
- package/dist/utils/deletedItemsIndex.d.ts +59 -0
- package/dist/utils/deletedItemsIndex.js +98 -0
- package/dist/utils/ensureDeleted.d.ts +38 -0
- package/dist/utils/ensureDeleted.js +79 -0
- package/dist/utils/metadataFilter.js +5 -0
- package/dist/utils/metadataIndex.d.ts +4 -0
- package/dist/utils/metadataIndex.js +45 -0
- package/dist/utils/metadataNamespace.d.ts +113 -0
- package/dist/utils/metadataNamespace.js +162 -0
- package/dist/utils/periodicCleanup.d.ts +87 -0
- package/dist/utils/periodicCleanup.js +219 -0
- package/package.json +9 -3
|
@@ -412,6 +412,36 @@ export class MetadataIndexManager {
|
|
|
412
412
|
}
|
|
413
413
|
}
|
|
414
414
|
}
|
|
415
|
+
/**
|
|
416
|
+
* Get all IDs in the index
|
|
417
|
+
*/
|
|
418
|
+
async getAllIds() {
|
|
419
|
+
// Collect all unique IDs from all index entries
|
|
420
|
+
const allIds = new Set();
|
|
421
|
+
// First, add all IDs from the in-memory cache
|
|
422
|
+
for (const entry of this.indexCache.values()) {
|
|
423
|
+
entry.ids.forEach(id => allIds.add(id));
|
|
424
|
+
}
|
|
425
|
+
// If storage has a method to get all nouns, use it as the source of truth
|
|
426
|
+
// This ensures we include items that might not be indexed yet
|
|
427
|
+
if (this.storage && typeof this.storage.getNouns === 'function') {
|
|
428
|
+
try {
|
|
429
|
+
const result = await this.storage.getNouns({
|
|
430
|
+
pagination: { limit: 100000 }
|
|
431
|
+
});
|
|
432
|
+
if (result && result.items) {
|
|
433
|
+
result.items.forEach((item) => {
|
|
434
|
+
if (item.id)
|
|
435
|
+
allIds.add(item.id);
|
|
436
|
+
});
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
catch (e) {
|
|
440
|
+
// Fall back to using only indexed IDs
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
return Array.from(allIds);
|
|
444
|
+
}
|
|
415
445
|
/**
|
|
416
446
|
* Get IDs for a specific field-value combination with caching
|
|
417
447
|
*/
|
|
@@ -638,6 +668,21 @@ export class MetadataIndexManager {
|
|
|
638
668
|
fieldResults = Array.from(allIds);
|
|
639
669
|
}
|
|
640
670
|
break;
|
|
671
|
+
// Negation operators
|
|
672
|
+
case 'notEquals':
|
|
673
|
+
case 'isNot':
|
|
674
|
+
case 'ne':
|
|
675
|
+
// For notEquals, we need all IDs EXCEPT those matching the value
|
|
676
|
+
// This is especially important for soft delete: deleted !== true
|
|
677
|
+
// should include items without a deleted field
|
|
678
|
+
// First, get all IDs in the database
|
|
679
|
+
const allItemIds = await this.getAllIds();
|
|
680
|
+
// Then get IDs that match the value we want to exclude
|
|
681
|
+
const excludeIds = await this.getIds(field, operand);
|
|
682
|
+
const excludeSet = new Set(excludeIds);
|
|
683
|
+
// Return all IDs except those to exclude
|
|
684
|
+
fieldResults = allItemIds.filter(id => !excludeSet.has(id));
|
|
685
|
+
break;
|
|
641
686
|
}
|
|
642
687
|
}
|
|
643
688
|
}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Clean Metadata Architecture for Brainy 2.2
|
|
3
|
+
* No backward compatibility - doing it RIGHT from the start!
|
|
4
|
+
*/
|
|
5
|
+
export declare const BRAINY_NS: "_brainy";
|
|
6
|
+
export declare const AUG_NS: "_augmentations";
|
|
7
|
+
export declare const AUDIT_NS: "_audit";
|
|
8
|
+
export declare const DELETED_FIELD: "_brainy.deleted";
|
|
9
|
+
export declare const INDEXED_FIELD: "_brainy.indexed";
|
|
10
|
+
export declare const VERSION_FIELD: "_brainy.version";
|
|
11
|
+
/**
|
|
12
|
+
* Internal Brainy metadata structure
|
|
13
|
+
* These fields are ALWAYS present and indexed for O(1) access
|
|
14
|
+
*/
|
|
15
|
+
export interface BrainyInternalMetadata {
|
|
16
|
+
deleted: boolean;
|
|
17
|
+
indexed: boolean;
|
|
18
|
+
version: number;
|
|
19
|
+
created: number;
|
|
20
|
+
updated: number;
|
|
21
|
+
partition?: number;
|
|
22
|
+
domain?: string;
|
|
23
|
+
priority?: number;
|
|
24
|
+
ttl?: number;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Complete metadata structure with namespaces
|
|
28
|
+
*/
|
|
29
|
+
export interface NamespacedMetadata<T = any> {
|
|
30
|
+
[key: string]: any;
|
|
31
|
+
[BRAINY_NS]: BrainyInternalMetadata;
|
|
32
|
+
[AUG_NS]?: {
|
|
33
|
+
[augmentationName: string]: any;
|
|
34
|
+
};
|
|
35
|
+
[AUDIT_NS]?: Array<{
|
|
36
|
+
timestamp: number;
|
|
37
|
+
augmentation: string;
|
|
38
|
+
field: string;
|
|
39
|
+
oldValue: any;
|
|
40
|
+
newValue: any;
|
|
41
|
+
}>;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Create properly namespaced metadata
|
|
45
|
+
* This is called for EVERY noun/verb creation
|
|
46
|
+
*/
|
|
47
|
+
export declare function createNamespacedMetadata<T = any>(userMetadata?: T): NamespacedMetadata<T>;
|
|
48
|
+
/**
|
|
49
|
+
* Update metadata while preserving namespaces
|
|
50
|
+
*/
|
|
51
|
+
export declare function updateNamespacedMetadata<T = any>(existing: NamespacedMetadata<T>, updates: Partial<T>): NamespacedMetadata<T>;
|
|
52
|
+
/**
|
|
53
|
+
* Soft delete a noun (O(1) operation)
|
|
54
|
+
*/
|
|
55
|
+
export declare function markDeleted<T = any>(metadata: NamespacedMetadata<T>): NamespacedMetadata<T>;
|
|
56
|
+
/**
|
|
57
|
+
* Restore a soft-deleted noun (O(1) operation)
|
|
58
|
+
*/
|
|
59
|
+
export declare function markRestored<T = any>(metadata: NamespacedMetadata<T>): NamespacedMetadata<T>;
|
|
60
|
+
/**
|
|
61
|
+
* Check if a noun is deleted (O(1) check)
|
|
62
|
+
*/
|
|
63
|
+
export declare function isDeleted<T = any>(metadata: NamespacedMetadata<T>): boolean;
|
|
64
|
+
/**
|
|
65
|
+
* Get user metadata without internal fields
|
|
66
|
+
* Used by augmentations to get clean user data
|
|
67
|
+
*/
|
|
68
|
+
export declare function getUserMetadata<T = any>(metadata: NamespacedMetadata<T>): T;
|
|
69
|
+
/**
|
|
70
|
+
* Set augmentation data in isolated namespace
|
|
71
|
+
*/
|
|
72
|
+
export declare function setAugmentationData<T = any>(metadata: NamespacedMetadata<T>, augmentationName: string, data: any): NamespacedMetadata<T>;
|
|
73
|
+
/**
|
|
74
|
+
* Add audit entry for tracking
|
|
75
|
+
*/
|
|
76
|
+
export declare function addAuditEntry<T = any>(metadata: NamespacedMetadata<T>, entry: {
|
|
77
|
+
augmentation: string;
|
|
78
|
+
field: string;
|
|
79
|
+
oldValue: any;
|
|
80
|
+
newValue: any;
|
|
81
|
+
}): NamespacedMetadata<T>;
|
|
82
|
+
/**
|
|
83
|
+
* INDEXING EXPLANATION:
|
|
84
|
+
*
|
|
85
|
+
* The MetadataIndex flattens nested objects into dot-notation keys:
|
|
86
|
+
*
|
|
87
|
+
* Input metadata:
|
|
88
|
+
* {
|
|
89
|
+
* name: "Django",
|
|
90
|
+
* _brainy: {
|
|
91
|
+
* deleted: false,
|
|
92
|
+
* indexed: true
|
|
93
|
+
* }
|
|
94
|
+
* }
|
|
95
|
+
*
|
|
96
|
+
* Creates index entries:
|
|
97
|
+
* - "name" -> "django" -> Set([id1, id2...])
|
|
98
|
+
* - "_brainy.deleted" -> "false" -> Set([id1, id2...]) // O(1) lookup!
|
|
99
|
+
* - "_brainy.indexed" -> "true" -> Set([id1, id2...])
|
|
100
|
+
*
|
|
101
|
+
* Query: { "_brainy.deleted": false }
|
|
102
|
+
* Lookup: index["_brainy.deleted"]["false"] -> Set of IDs in O(1)
|
|
103
|
+
*
|
|
104
|
+
* This is why namespacing doesn't hurt performance - it's all flattened!
|
|
105
|
+
*/
|
|
106
|
+
/**
|
|
107
|
+
* Fields that should ALWAYS be indexed for O(1) access
|
|
108
|
+
*/
|
|
109
|
+
export declare const ALWAYS_INDEXED_FIELDS: ("_brainy.deleted" | "_brainy.indexed" | "_brainy.version")[];
|
|
110
|
+
/**
|
|
111
|
+
* Fields that should use sorted index for O(log n) range queries
|
|
112
|
+
*/
|
|
113
|
+
export declare const SORTED_INDEX_FIELDS: string[];
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Clean Metadata Architecture for Brainy 2.2
|
|
3
|
+
* No backward compatibility - doing it RIGHT from the start!
|
|
4
|
+
*/
|
|
5
|
+
// Namespace constants
|
|
6
|
+
export const BRAINY_NS = '_brainy';
|
|
7
|
+
export const AUG_NS = '_augmentations';
|
|
8
|
+
export const AUDIT_NS = '_audit';
|
|
9
|
+
// Field paths for O(1) indexing
|
|
10
|
+
export const DELETED_FIELD = `${BRAINY_NS}.deleted`;
|
|
11
|
+
export const INDEXED_FIELD = `${BRAINY_NS}.indexed`;
|
|
12
|
+
export const VERSION_FIELD = `${BRAINY_NS}.version`;
|
|
13
|
+
/**
|
|
14
|
+
* Create properly namespaced metadata
|
|
15
|
+
* This is called for EVERY noun/verb creation
|
|
16
|
+
*/
|
|
17
|
+
export function createNamespacedMetadata(userMetadata) {
|
|
18
|
+
const now = Date.now();
|
|
19
|
+
// Start with user metadata or empty object
|
|
20
|
+
const result = userMetadata ? { ...userMetadata } : {};
|
|
21
|
+
// ALWAYS add internal namespace with required fields
|
|
22
|
+
result[BRAINY_NS] = {
|
|
23
|
+
deleted: false, // CRITICAL: Always false for new items
|
|
24
|
+
indexed: true, // New items are indexed
|
|
25
|
+
version: 1, // Current schema version
|
|
26
|
+
created: now,
|
|
27
|
+
updated: now
|
|
28
|
+
};
|
|
29
|
+
return result;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Update metadata while preserving namespaces
|
|
33
|
+
*/
|
|
34
|
+
export function updateNamespacedMetadata(existing, updates) {
|
|
35
|
+
const now = Date.now();
|
|
36
|
+
// Merge user fields
|
|
37
|
+
const result = {
|
|
38
|
+
...existing,
|
|
39
|
+
...updates
|
|
40
|
+
};
|
|
41
|
+
// Preserve internal namespace but update timestamp
|
|
42
|
+
result[BRAINY_NS] = {
|
|
43
|
+
...existing[BRAINY_NS],
|
|
44
|
+
updated: now
|
|
45
|
+
};
|
|
46
|
+
// Preserve augmentation namespace
|
|
47
|
+
if (existing[AUG_NS]) {
|
|
48
|
+
result[AUG_NS] = existing[AUG_NS];
|
|
49
|
+
}
|
|
50
|
+
// Preserve audit trail
|
|
51
|
+
if (existing[AUDIT_NS]) {
|
|
52
|
+
result[AUDIT_NS] = existing[AUDIT_NS];
|
|
53
|
+
}
|
|
54
|
+
return result;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Soft delete a noun (O(1) operation)
|
|
58
|
+
*/
|
|
59
|
+
export function markDeleted(metadata) {
|
|
60
|
+
return {
|
|
61
|
+
...metadata,
|
|
62
|
+
[BRAINY_NS]: {
|
|
63
|
+
...metadata[BRAINY_NS],
|
|
64
|
+
deleted: true,
|
|
65
|
+
updated: Date.now()
|
|
66
|
+
}
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Restore a soft-deleted noun (O(1) operation)
|
|
71
|
+
*/
|
|
72
|
+
export function markRestored(metadata) {
|
|
73
|
+
return {
|
|
74
|
+
...metadata,
|
|
75
|
+
[BRAINY_NS]: {
|
|
76
|
+
...metadata[BRAINY_NS],
|
|
77
|
+
deleted: false,
|
|
78
|
+
updated: Date.now()
|
|
79
|
+
}
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Check if a noun is deleted (O(1) check)
|
|
84
|
+
*/
|
|
85
|
+
export function isDeleted(metadata) {
|
|
86
|
+
return metadata[BRAINY_NS]?.deleted === true;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Get user metadata without internal fields
|
|
90
|
+
* Used by augmentations to get clean user data
|
|
91
|
+
*/
|
|
92
|
+
export function getUserMetadata(metadata) {
|
|
93
|
+
const { [BRAINY_NS]: _, [AUG_NS]: __, [AUDIT_NS]: ___, ...userMeta } = metadata;
|
|
94
|
+
return userMeta;
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Set augmentation data in isolated namespace
|
|
98
|
+
*/
|
|
99
|
+
export function setAugmentationData(metadata, augmentationName, data) {
|
|
100
|
+
const result = { ...metadata };
|
|
101
|
+
if (!result[AUG_NS]) {
|
|
102
|
+
result[AUG_NS] = {};
|
|
103
|
+
}
|
|
104
|
+
result[AUG_NS][augmentationName] = data;
|
|
105
|
+
return result;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Add audit entry for tracking
|
|
109
|
+
*/
|
|
110
|
+
export function addAuditEntry(metadata, entry) {
|
|
111
|
+
const result = { ...metadata };
|
|
112
|
+
if (!result[AUDIT_NS]) {
|
|
113
|
+
result[AUDIT_NS] = [];
|
|
114
|
+
}
|
|
115
|
+
result[AUDIT_NS].push({
|
|
116
|
+
...entry,
|
|
117
|
+
timestamp: Date.now()
|
|
118
|
+
});
|
|
119
|
+
return result;
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* INDEXING EXPLANATION:
|
|
123
|
+
*
|
|
124
|
+
* The MetadataIndex flattens nested objects into dot-notation keys:
|
|
125
|
+
*
|
|
126
|
+
* Input metadata:
|
|
127
|
+
* {
|
|
128
|
+
* name: "Django",
|
|
129
|
+
* _brainy: {
|
|
130
|
+
* deleted: false,
|
|
131
|
+
* indexed: true
|
|
132
|
+
* }
|
|
133
|
+
* }
|
|
134
|
+
*
|
|
135
|
+
* Creates index entries:
|
|
136
|
+
* - "name" -> "django" -> Set([id1, id2...])
|
|
137
|
+
* - "_brainy.deleted" -> "false" -> Set([id1, id2...]) // O(1) lookup!
|
|
138
|
+
* - "_brainy.indexed" -> "true" -> Set([id1, id2...])
|
|
139
|
+
*
|
|
140
|
+
* Query: { "_brainy.deleted": false }
|
|
141
|
+
* Lookup: index["_brainy.deleted"]["false"] -> Set of IDs in O(1)
|
|
142
|
+
*
|
|
143
|
+
* This is why namespacing doesn't hurt performance - it's all flattened!
|
|
144
|
+
*/
|
|
145
|
+
/**
|
|
146
|
+
* Fields that should ALWAYS be indexed for O(1) access
|
|
147
|
+
*/
|
|
148
|
+
export const ALWAYS_INDEXED_FIELDS = [
|
|
149
|
+
DELETED_FIELD, // For soft delete filtering
|
|
150
|
+
INDEXED_FIELD, // For index management
|
|
151
|
+
VERSION_FIELD // For schema versioning
|
|
152
|
+
];
|
|
153
|
+
/**
|
|
154
|
+
* Fields that should use sorted index for O(log n) range queries
|
|
155
|
+
*/
|
|
156
|
+
export const SORTED_INDEX_FIELDS = [
|
|
157
|
+
`${BRAINY_NS}.created`,
|
|
158
|
+
`${BRAINY_NS}.updated`,
|
|
159
|
+
`${BRAINY_NS}.priority`,
|
|
160
|
+
`${BRAINY_NS}.ttl`
|
|
161
|
+
];
|
|
162
|
+
//# sourceMappingURL=metadataNamespace.js.map
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Periodic Cleanup for Soft-Deleted Items
|
|
3
|
+
*
|
|
4
|
+
* SAFETY-FIRST APPROACH:
|
|
5
|
+
* - Maintains durability guarantees (storage-first)
|
|
6
|
+
* - Coordinates HNSW and metadata index consistency
|
|
7
|
+
* - Isolated from live operations
|
|
8
|
+
* - Graceful failure handling
|
|
9
|
+
*/
|
|
10
|
+
import type { StorageAdapter } from '../coreTypes.js';
|
|
11
|
+
import type { HNSWIndex } from '../hnsw/hnswIndex.js';
|
|
12
|
+
import type { MetadataIndexManager } from './metadataIndex.js';
|
|
13
|
+
export interface CleanupConfig {
|
|
14
|
+
/** Age in milliseconds after which soft-deleted items are eligible for cleanup */
|
|
15
|
+
maxAge: number;
|
|
16
|
+
/** Maximum number of items to clean up in one batch */
|
|
17
|
+
batchSize: number;
|
|
18
|
+
/** Interval between cleanup runs (milliseconds) */
|
|
19
|
+
cleanupInterval: number;
|
|
20
|
+
/** Whether to run cleanup automatically */
|
|
21
|
+
enabled: boolean;
|
|
22
|
+
}
|
|
23
|
+
export interface CleanupStats {
|
|
24
|
+
itemsProcessed: number;
|
|
25
|
+
itemsDeleted: number;
|
|
26
|
+
errors: number;
|
|
27
|
+
lastRun: number;
|
|
28
|
+
nextRun: number;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Coordinates safe cleanup of old soft-deleted items across all indexes
|
|
32
|
+
*
|
|
33
|
+
* CRITICAL SAFETY FEATURES:
|
|
34
|
+
* 1. Storage-first deletion (durability)
|
|
35
|
+
* 2. Index consistency coordination
|
|
36
|
+
* 3. Batch processing with limits
|
|
37
|
+
* 4. Error isolation and recovery
|
|
38
|
+
*/
|
|
39
|
+
export declare class PeriodicCleanup {
|
|
40
|
+
private storage;
|
|
41
|
+
private hnswIndex;
|
|
42
|
+
private metadataIndex;
|
|
43
|
+
private config;
|
|
44
|
+
private stats;
|
|
45
|
+
private cleanupTimer;
|
|
46
|
+
private running;
|
|
47
|
+
constructor(storage: StorageAdapter, hnswIndex: HNSWIndex, metadataIndex: MetadataIndexManager | null, config?: Partial<CleanupConfig>);
|
|
48
|
+
/**
|
|
49
|
+
* Start periodic cleanup
|
|
50
|
+
*/
|
|
51
|
+
start(): void;
|
|
52
|
+
/**
|
|
53
|
+
* Stop periodic cleanup
|
|
54
|
+
*/
|
|
55
|
+
stop(): void;
|
|
56
|
+
/**
|
|
57
|
+
* Run cleanup manually
|
|
58
|
+
*/
|
|
59
|
+
runNow(): Promise<CleanupStats>;
|
|
60
|
+
/**
|
|
61
|
+
* Get current cleanup statistics
|
|
62
|
+
*/
|
|
63
|
+
getStats(): CleanupStats;
|
|
64
|
+
private scheduleNext;
|
|
65
|
+
/**
|
|
66
|
+
* CRITICAL: Coordinated cleanup across all indexes
|
|
67
|
+
*
|
|
68
|
+
* SAFETY PROTOCOL:
|
|
69
|
+
* 1. Find eligible items (old + soft-deleted)
|
|
70
|
+
* 2. Remove from storage FIRST (durability)
|
|
71
|
+
* 3. Remove from HNSW (graph consistency)
|
|
72
|
+
* 4. Remove from metadata index (search consistency)
|
|
73
|
+
* 5. Track stats and errors
|
|
74
|
+
*/
|
|
75
|
+
private performCleanup;
|
|
76
|
+
/**
|
|
77
|
+
* Find items eligible for cleanup (old + soft-deleted)
|
|
78
|
+
*/
|
|
79
|
+
private findEligibleItems;
|
|
80
|
+
/**
|
|
81
|
+
* Process a batch of items for cleanup
|
|
82
|
+
*
|
|
83
|
+
* CRITICAL: This maintains the durability-first approach:
|
|
84
|
+
* Storage → HNSW → Metadata Index
|
|
85
|
+
*/
|
|
86
|
+
private processBatch;
|
|
87
|
+
}
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Periodic Cleanup for Soft-Deleted Items
|
|
3
|
+
*
|
|
4
|
+
* SAFETY-FIRST APPROACH:
|
|
5
|
+
* - Maintains durability guarantees (storage-first)
|
|
6
|
+
* - Coordinates HNSW and metadata index consistency
|
|
7
|
+
* - Isolated from live operations
|
|
8
|
+
* - Graceful failure handling
|
|
9
|
+
*/
|
|
10
|
+
import { prodLog } from './logger.js';
|
|
11
|
+
import { isDeleted } from './metadataNamespace.js';
|
|
12
|
+
/**
|
|
13
|
+
* Coordinates safe cleanup of old soft-deleted items across all indexes
|
|
14
|
+
*
|
|
15
|
+
* CRITICAL SAFETY FEATURES:
|
|
16
|
+
* 1. Storage-first deletion (durability)
|
|
17
|
+
* 2. Index consistency coordination
|
|
18
|
+
* 3. Batch processing with limits
|
|
19
|
+
* 4. Error isolation and recovery
|
|
20
|
+
*/
|
|
21
|
+
export class PeriodicCleanup {
|
|
22
|
+
constructor(storage, hnswIndex, metadataIndex, config = {}) {
|
|
23
|
+
this.cleanupTimer = null;
|
|
24
|
+
this.running = false;
|
|
25
|
+
this.storage = storage;
|
|
26
|
+
this.hnswIndex = hnswIndex;
|
|
27
|
+
this.metadataIndex = metadataIndex;
|
|
28
|
+
// Default: clean up items deleted more than 1 hour ago
|
|
29
|
+
this.config = {
|
|
30
|
+
maxAge: config.maxAge ?? 60 * 60 * 1000, // 1 hour
|
|
31
|
+
batchSize: config.batchSize ?? 100, // 100 items max per batch
|
|
32
|
+
cleanupInterval: config.cleanupInterval ?? 15 * 60 * 1000, // Every 15 minutes
|
|
33
|
+
enabled: config.enabled ?? true
|
|
34
|
+
};
|
|
35
|
+
this.stats = {
|
|
36
|
+
itemsProcessed: 0,
|
|
37
|
+
itemsDeleted: 0,
|
|
38
|
+
errors: 0,
|
|
39
|
+
lastRun: 0,
|
|
40
|
+
nextRun: 0
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Start periodic cleanup
|
|
45
|
+
*/
|
|
46
|
+
start() {
|
|
47
|
+
if (!this.config.enabled || this.cleanupTimer) {
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
prodLog.info(`Starting periodic cleanup: maxAge=${this.config.maxAge}, batchSize=${this.config.batchSize}, interval=${this.config.cleanupInterval}`);
|
|
51
|
+
this.scheduleNext();
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Stop periodic cleanup
|
|
55
|
+
*/
|
|
56
|
+
stop() {
|
|
57
|
+
if (this.cleanupTimer) {
|
|
58
|
+
clearTimeout(this.cleanupTimer);
|
|
59
|
+
this.cleanupTimer = null;
|
|
60
|
+
}
|
|
61
|
+
prodLog.info('Stopped periodic cleanup');
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Run cleanup manually
|
|
65
|
+
*/
|
|
66
|
+
async runNow() {
|
|
67
|
+
if (this.running) {
|
|
68
|
+
throw new Error('Cleanup already running');
|
|
69
|
+
}
|
|
70
|
+
return this.performCleanup();
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Get current cleanup statistics
|
|
74
|
+
*/
|
|
75
|
+
getStats() {
|
|
76
|
+
return { ...this.stats };
|
|
77
|
+
}
|
|
78
|
+
scheduleNext() {
|
|
79
|
+
const nextRun = Date.now() + this.config.cleanupInterval;
|
|
80
|
+
this.stats.nextRun = nextRun;
|
|
81
|
+
this.cleanupTimer = setTimeout(async () => {
|
|
82
|
+
await this.performCleanup();
|
|
83
|
+
this.scheduleNext();
|
|
84
|
+
}, this.config.cleanupInterval);
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* CRITICAL: Coordinated cleanup across all indexes
|
|
88
|
+
*
|
|
89
|
+
* SAFETY PROTOCOL:
|
|
90
|
+
* 1. Find eligible items (old + soft-deleted)
|
|
91
|
+
* 2. Remove from storage FIRST (durability)
|
|
92
|
+
* 3. Remove from HNSW (graph consistency)
|
|
93
|
+
* 4. Remove from metadata index (search consistency)
|
|
94
|
+
* 5. Track stats and errors
|
|
95
|
+
*/
|
|
96
|
+
async performCleanup() {
|
|
97
|
+
if (this.running) {
|
|
98
|
+
prodLog.warn('Cleanup already running, skipping');
|
|
99
|
+
return this.stats;
|
|
100
|
+
}
|
|
101
|
+
this.running = true;
|
|
102
|
+
const startTime = Date.now();
|
|
103
|
+
this.stats.lastRun = startTime;
|
|
104
|
+
try {
|
|
105
|
+
prodLog.debug(`Starting cleanup run: maxAge=${this.config.maxAge}, cutoffTime=${startTime - this.config.maxAge}`);
|
|
106
|
+
// Step 1: Find eligible items for cleanup
|
|
107
|
+
const eligibleItems = await this.findEligibleItems(startTime);
|
|
108
|
+
if (eligibleItems.length === 0) {
|
|
109
|
+
prodLog.debug('No items eligible for cleanup');
|
|
110
|
+
return this.stats;
|
|
111
|
+
}
|
|
112
|
+
prodLog.info(`Found ${eligibleItems.length} items eligible for cleanup`);
|
|
113
|
+
// Step 2: Process in batches for safety
|
|
114
|
+
let processed = 0;
|
|
115
|
+
let deleted = 0;
|
|
116
|
+
let errors = 0;
|
|
117
|
+
for (let i = 0; i < eligibleItems.length; i += this.config.batchSize) {
|
|
118
|
+
const batch = eligibleItems.slice(i, i + this.config.batchSize);
|
|
119
|
+
const batchResult = await this.processBatch(batch);
|
|
120
|
+
processed += batchResult.processed;
|
|
121
|
+
deleted += batchResult.deleted;
|
|
122
|
+
errors += batchResult.errors;
|
|
123
|
+
// Small delay between batches to avoid overwhelming the system
|
|
124
|
+
if (i + this.config.batchSize < eligibleItems.length) {
|
|
125
|
+
await new Promise(resolve => setTimeout(resolve, 10));
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
// Update stats
|
|
129
|
+
this.stats.itemsProcessed += processed;
|
|
130
|
+
this.stats.itemsDeleted += deleted;
|
|
131
|
+
this.stats.errors += errors;
|
|
132
|
+
prodLog.info(`Cleanup run completed: processed=${processed}, deleted=${deleted}, errors=${errors}, duration=${Date.now() - startTime}ms`);
|
|
133
|
+
}
|
|
134
|
+
catch (error) {
|
|
135
|
+
prodLog.error(`Cleanup run failed: ${error}`);
|
|
136
|
+
this.stats.errors++;
|
|
137
|
+
}
|
|
138
|
+
finally {
|
|
139
|
+
this.running = false;
|
|
140
|
+
}
|
|
141
|
+
return this.stats;
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Find items eligible for cleanup (old + soft-deleted)
|
|
145
|
+
*/
|
|
146
|
+
async findEligibleItems(currentTime) {
|
|
147
|
+
const cutoffTime = currentTime - this.config.maxAge;
|
|
148
|
+
const eligibleItems = [];
|
|
149
|
+
try {
|
|
150
|
+
// Get all nouns from storage (using pagination to avoid memory issues)
|
|
151
|
+
const nounsResult = await this.storage.getNouns({
|
|
152
|
+
pagination: { limit: 1000 } // Process in chunks
|
|
153
|
+
});
|
|
154
|
+
for (const noun of nounsResult.items) {
|
|
155
|
+
try {
|
|
156
|
+
if (!noun.metadata || !isDeleted(noun.metadata)) {
|
|
157
|
+
continue; // Not deleted, skip
|
|
158
|
+
}
|
|
159
|
+
// Check if old enough for cleanup
|
|
160
|
+
const deletedTime = noun.metadata._brainy?.updated || 0;
|
|
161
|
+
if (deletedTime && (currentTime - deletedTime) > this.config.maxAge) {
|
|
162
|
+
eligibleItems.push(noun.id);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
catch (error) {
|
|
166
|
+
prodLog.warn(`Failed to check item ${noun.id} for cleanup eligibility: ${error}`);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
catch (error) {
|
|
171
|
+
prodLog.error(`Failed to find eligible items: ${error}`);
|
|
172
|
+
throw error;
|
|
173
|
+
}
|
|
174
|
+
return eligibleItems;
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Process a batch of items for cleanup
|
|
178
|
+
*
|
|
179
|
+
* CRITICAL: This maintains the durability-first approach:
|
|
180
|
+
* Storage → HNSW → Metadata Index
|
|
181
|
+
*/
|
|
182
|
+
async processBatch(itemIds) {
|
|
183
|
+
let processed = 0;
|
|
184
|
+
let deleted = 0;
|
|
185
|
+
let errors = 0;
|
|
186
|
+
for (const id of itemIds) {
|
|
187
|
+
processed++;
|
|
188
|
+
try {
|
|
189
|
+
// STEP 1: Remove from storage FIRST (durability guarantee)
|
|
190
|
+
try {
|
|
191
|
+
await this.storage.deleteNoun(id);
|
|
192
|
+
}
|
|
193
|
+
catch (storageError) {
|
|
194
|
+
prodLog.warn(`Failed to delete ${id} from storage: ${storageError}`);
|
|
195
|
+
errors++;
|
|
196
|
+
continue;
|
|
197
|
+
}
|
|
198
|
+
// STEP 2: Remove from HNSW index (vector search consistency)
|
|
199
|
+
const hnswResult = this.hnswIndex.removeItem(id);
|
|
200
|
+
if (!hnswResult) {
|
|
201
|
+
prodLog.warn(`Failed to remove ${id} from HNSW index (may not have been indexed)`);
|
|
202
|
+
// Not a critical error - item might not have been in vector index
|
|
203
|
+
}
|
|
204
|
+
// STEP 3: Remove from metadata index (faceted search consistency)
|
|
205
|
+
if (this.metadataIndex) {
|
|
206
|
+
await this.metadataIndex.removeFromIndex(id);
|
|
207
|
+
}
|
|
208
|
+
deleted++;
|
|
209
|
+
prodLog.debug(`Successfully cleaned up item ${id}`);
|
|
210
|
+
}
|
|
211
|
+
catch (error) {
|
|
212
|
+
errors++;
|
|
213
|
+
prodLog.error(`Failed to cleanup item ${id}: ${error}`);
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
return { processed, deleted, errors };
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
//# sourceMappingURL=periodicCleanup.js.map
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "3.0.0",
|
|
4
4
|
"description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|
|
@@ -77,7 +77,12 @@
|
|
|
77
77
|
"lint": "eslint --ext .ts,.js src/",
|
|
78
78
|
"lint:fix": "eslint --ext .ts,.js src/ --fix",
|
|
79
79
|
"format": "prettier --write \"src/**/*.{ts,js}\"",
|
|
80
|
-
"format:check": "prettier --check \"src/**/*.{ts,js}\""
|
|
80
|
+
"format:check": "prettier --check \"src/**/*.{ts,js}\"",
|
|
81
|
+
"release": "standard-version",
|
|
82
|
+
"release:patch": "standard-version --release-as patch",
|
|
83
|
+
"release:minor": "standard-version --release-as minor",
|
|
84
|
+
"release:major": "standard-version --release-as major",
|
|
85
|
+
"release:dry": "standard-version --dry-run"
|
|
81
86
|
},
|
|
82
87
|
"keywords": [
|
|
83
88
|
"ai-database",
|
|
@@ -130,13 +135,14 @@
|
|
|
130
135
|
"@typescript-eslint/eslint-plugin": "^8.0.0",
|
|
131
136
|
"@typescript-eslint/parser": "^8.0.0",
|
|
132
137
|
"@vitest/coverage-v8": "^3.2.4",
|
|
138
|
+
"standard-version": "^9.5.0",
|
|
133
139
|
"tsx": "^4.19.2",
|
|
134
140
|
"typescript": "^5.4.5",
|
|
135
141
|
"vitest": "^3.2.4"
|
|
136
142
|
},
|
|
137
143
|
"dependencies": {
|
|
138
144
|
"@aws-sdk/client-s3": "^3.540.0",
|
|
139
|
-
"@huggingface/transformers": "^3.
|
|
145
|
+
"@huggingface/transformers": "^3.7.2",
|
|
140
146
|
"boxen": "^8.0.1",
|
|
141
147
|
"chalk": "^5.3.0",
|
|
142
148
|
"cli-table3": "^0.6.5",
|