@soulcraft/brainy 1.4.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +188 -0
- package/LICENSE +2 -2
- package/README.md +201 -596
- package/bin/brainy-interactive.js +564 -0
- package/bin/brainy-ts.js +18 -0
- package/bin/brainy.js +672 -81
- package/dist/augmentationPipeline.d.ts +48 -220
- package/dist/augmentationPipeline.js +60 -508
- package/dist/augmentationRegistry.d.ts +22 -31
- package/dist/augmentationRegistry.js +28 -79
- package/dist/augmentations/apiServerAugmentation.d.ts +108 -0
- package/dist/augmentations/apiServerAugmentation.js +502 -0
- package/dist/augmentations/batchProcessingAugmentation.d.ts +95 -0
- package/dist/augmentations/batchProcessingAugmentation.js +567 -0
- package/dist/augmentations/brainyAugmentation.d.ts +153 -0
- package/dist/augmentations/brainyAugmentation.js +145 -0
- package/dist/augmentations/cacheAugmentation.d.ts +105 -0
- package/dist/augmentations/cacheAugmentation.js +238 -0
- package/dist/augmentations/conduitAugmentations.d.ts +54 -156
- package/dist/augmentations/conduitAugmentations.js +156 -1082
- package/dist/augmentations/connectionPoolAugmentation.d.ts +62 -0
- package/dist/augmentations/connectionPoolAugmentation.js +316 -0
- package/dist/augmentations/defaultAugmentations.d.ts +53 -0
- package/dist/augmentations/defaultAugmentations.js +88 -0
- package/dist/augmentations/entityRegistryAugmentation.d.ts +126 -0
- package/dist/augmentations/entityRegistryAugmentation.js +386 -0
- package/dist/augmentations/indexAugmentation.d.ts +117 -0
- package/dist/augmentations/indexAugmentation.js +284 -0
- package/dist/augmentations/intelligentVerbScoringAugmentation.d.ts +152 -0
- package/dist/augmentations/intelligentVerbScoringAugmentation.js +554 -0
- package/dist/augmentations/metricsAugmentation.d.ts +202 -0
- package/dist/augmentations/metricsAugmentation.js +291 -0
- package/dist/augmentations/monitoringAugmentation.d.ts +94 -0
- package/dist/augmentations/monitoringAugmentation.js +227 -0
- package/dist/augmentations/neuralImport.d.ts +50 -117
- package/dist/augmentations/neuralImport.js +255 -629
- package/dist/augmentations/requestDeduplicatorAugmentation.d.ts +52 -0
- package/dist/augmentations/requestDeduplicatorAugmentation.js +162 -0
- package/dist/augmentations/serverSearchAugmentations.d.ts +43 -22
- package/dist/augmentations/serverSearchAugmentations.js +125 -72
- package/dist/augmentations/storageAugmentation.d.ts +54 -0
- package/dist/augmentations/storageAugmentation.js +93 -0
- package/dist/augmentations/storageAugmentations.d.ts +96 -0
- package/dist/augmentations/storageAugmentations.js +182 -0
- package/dist/augmentations/synapseAugmentation.d.ts +156 -0
- package/dist/augmentations/synapseAugmentation.js +312 -0
- package/dist/augmentations/walAugmentation.d.ts +108 -0
- package/dist/augmentations/walAugmentation.js +515 -0
- package/dist/brainyData.d.ts +404 -130
- package/dist/brainyData.js +1336 -855
- package/dist/chat/BrainyChat.d.ts +16 -8
- package/dist/chat/BrainyChat.js +60 -32
- package/dist/chat/ChatCLI.d.ts +1 -1
- package/dist/chat/ChatCLI.js +6 -6
- package/dist/cli/catalog.d.ts +3 -3
- package/dist/cli/catalog.js +116 -70
- package/dist/cli/commands/core.d.ts +61 -0
- package/dist/cli/commands/core.js +348 -0
- package/dist/cli/commands/neural.d.ts +25 -0
- package/dist/cli/commands/neural.js +508 -0
- package/dist/cli/commands/utility.d.ts +37 -0
- package/dist/cli/commands/utility.js +276 -0
- package/dist/cli/index.d.ts +7 -0
- package/dist/cli/index.js +167 -0
- package/dist/cli/interactive.d.ts +164 -0
- package/dist/cli/interactive.js +542 -0
- package/dist/cortex/neuralImport.js +5 -5
- package/dist/critical/model-guardian.js +11 -4
- package/dist/embeddings/lightweight-embedder.d.ts +23 -0
- package/dist/embeddings/lightweight-embedder.js +136 -0
- package/dist/embeddings/universal-memory-manager.d.ts +38 -0
- package/dist/embeddings/universal-memory-manager.js +206 -0
- package/dist/embeddings/worker-embedding.d.ts +7 -0
- package/dist/embeddings/worker-embedding.js +77 -0
- package/dist/embeddings/worker-manager.d.ts +28 -0
- package/dist/embeddings/worker-manager.js +162 -0
- package/dist/examples/basicUsage.js +7 -7
- package/dist/graph/pathfinding.d.ts +78 -0
- package/dist/graph/pathfinding.js +393 -0
- package/dist/hnsw/hnswIndex.d.ts +13 -0
- package/dist/hnsw/hnswIndex.js +35 -0
- package/dist/hnsw/hnswIndexOptimized.d.ts +1 -0
- package/dist/hnsw/hnswIndexOptimized.js +3 -0
- package/dist/index.d.ts +9 -11
- package/dist/index.js +21 -11
- package/dist/indices/fieldIndex.d.ts +76 -0
- package/dist/indices/fieldIndex.js +357 -0
- package/dist/mcp/brainyMCPAdapter.js +3 -2
- package/dist/mcp/mcpAugmentationToolset.js +11 -17
- package/dist/neural/embeddedPatterns.d.ts +41 -0
- package/dist/neural/embeddedPatterns.js +4044 -0
- package/dist/neural/naturalLanguageProcessor.d.ts +94 -0
- package/dist/neural/naturalLanguageProcessor.js +317 -0
- package/dist/neural/naturalLanguageProcessorStatic.d.ts +64 -0
- package/dist/neural/naturalLanguageProcessorStatic.js +151 -0
- package/dist/neural/neuralAPI.d.ts +255 -0
- package/dist/neural/neuralAPI.js +612 -0
- package/dist/neural/patternLibrary.d.ts +101 -0
- package/dist/neural/patternLibrary.js +313 -0
- package/dist/neural/patterns.d.ts +27 -0
- package/dist/neural/patterns.js +68 -0
- package/dist/neural/staticPatternMatcher.d.ts +35 -0
- package/dist/neural/staticPatternMatcher.js +153 -0
- package/dist/scripts/precomputePatternEmbeddings.d.ts +19 -0
- package/dist/scripts/precomputePatternEmbeddings.js +100 -0
- package/dist/storage/adapters/fileSystemStorage.d.ts +5 -0
- package/dist/storage/adapters/fileSystemStorage.js +20 -0
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +5 -0
- package/dist/storage/adapters/s3CompatibleStorage.js +16 -0
- package/dist/storage/enhancedClearOperations.d.ts +83 -0
- package/dist/storage/enhancedClearOperations.js +345 -0
- package/dist/storage/storageFactory.js +31 -27
- package/dist/triple/TripleIntelligence.d.ts +134 -0
- package/dist/triple/TripleIntelligence.js +548 -0
- package/dist/types/augmentations.d.ts +45 -344
- package/dist/types/augmentations.js +5 -2
- package/dist/types/brainyDataInterface.d.ts +20 -10
- package/dist/types/graphTypes.d.ts +46 -0
- package/dist/types/graphTypes.js +16 -2
- package/dist/utils/BoundedRegistry.d.ts +29 -0
- package/dist/utils/BoundedRegistry.js +54 -0
- package/dist/utils/embedding.js +20 -3
- package/dist/utils/hybridModelManager.js +10 -5
- package/dist/utils/metadataFilter.d.ts +33 -19
- package/dist/utils/metadataFilter.js +58 -23
- package/dist/utils/metadataIndex.d.ts +37 -6
- package/dist/utils/metadataIndex.js +427 -64
- package/dist/utils/requestDeduplicator.d.ts +10 -0
- package/dist/utils/requestDeduplicator.js +24 -0
- package/dist/utils/unifiedCache.d.ts +103 -0
- package/dist/utils/unifiedCache.js +311 -0
- package/package.json +43 -128
- package/scripts/ensure-models.js +108 -0
- package/scripts/prepare-models.js +387 -0
- package/OFFLINE_MODELS.md +0 -56
- package/dist/intelligence/neuralEngine.d.ts +0 -207
- package/dist/intelligence/neuralEngine.js +0 -706
- package/dist/utils/modelLoader.d.ts +0 -32
- package/dist/utils/modelLoader.js +0 -219
- package/dist/utils/modelManager.d.ts +0 -77
- package/dist/utils/modelManager.js +0 -219
|
@@ -5,10 +5,7 @@
|
|
|
5
5
|
*/
|
|
6
6
|
import { MetadataIndexCache } from './metadataIndexCache.js';
|
|
7
7
|
import { prodLog } from './logger.js';
|
|
8
|
-
|
|
9
|
-
* Manages metadata indexes for fast filtering
|
|
10
|
-
* Maintains inverted indexes: field+value -> list of IDs
|
|
11
|
-
*/
|
|
8
|
+
import { getGlobalCache } from './unifiedCache.js';
|
|
12
9
|
export class MetadataIndexManager {
|
|
13
10
|
constructor(storage, config = {}) {
|
|
14
11
|
this.indexCache = new Map();
|
|
@@ -18,6 +15,9 @@ export class MetadataIndexManager {
|
|
|
18
15
|
this.dirtyFields = new Set();
|
|
19
16
|
this.lastFlushTime = Date.now();
|
|
20
17
|
this.autoFlushThreshold = 10; // Start with 10 for more frequent non-blocking flushes
|
|
18
|
+
// Sorted indices for range queries (only for numeric/date fields)
|
|
19
|
+
this.sortedIndices = new Map();
|
|
20
|
+
this.numericFields = new Set(); // Track which fields are numeric
|
|
21
21
|
this.storage = storage;
|
|
22
22
|
this.config = {
|
|
23
23
|
maxIndexSize: config.maxIndexSize ?? 10000,
|
|
@@ -32,6 +32,8 @@ export class MetadataIndexManager {
|
|
|
32
32
|
maxSize: 500, // 500 entries (field indexes + value chunks)
|
|
33
33
|
enabled: true
|
|
34
34
|
});
|
|
35
|
+
// Get global unified cache for coordinated memory management
|
|
36
|
+
this.unifiedCache = getGlobalCache();
|
|
35
37
|
}
|
|
36
38
|
/**
|
|
37
39
|
* Get index key for field and value
|
|
@@ -40,6 +42,140 @@ export class MetadataIndexManager {
|
|
|
40
42
|
const normalizedValue = this.normalizeValue(value);
|
|
41
43
|
return `${field}:${normalizedValue}`;
|
|
42
44
|
}
|
|
45
|
+
/**
|
|
46
|
+
* Ensure sorted index exists for a field (for range queries)
|
|
47
|
+
*/
|
|
48
|
+
async ensureSortedIndex(field) {
|
|
49
|
+
if (!this.sortedIndices.has(field)) {
|
|
50
|
+
// Try to load from storage first
|
|
51
|
+
const loaded = await this.loadSortedIndex(field);
|
|
52
|
+
if (loaded) {
|
|
53
|
+
this.sortedIndices.set(field, loaded);
|
|
54
|
+
}
|
|
55
|
+
else {
|
|
56
|
+
// Create new sorted index
|
|
57
|
+
this.sortedIndices.set(field, {
|
|
58
|
+
values: [],
|
|
59
|
+
isDirty: true,
|
|
60
|
+
fieldType: 'mixed'
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Build sorted index for a field from hash index
|
|
67
|
+
*/
|
|
68
|
+
async buildSortedIndex(field) {
|
|
69
|
+
const sortedIndex = this.sortedIndices.get(field);
|
|
70
|
+
if (!sortedIndex || !sortedIndex.isDirty)
|
|
71
|
+
return;
|
|
72
|
+
// Collect all values for this field from hash index
|
|
73
|
+
const valueMap = new Map();
|
|
74
|
+
for (const [key, entry] of this.indexCache.entries()) {
|
|
75
|
+
if (entry.field === field) {
|
|
76
|
+
const existing = valueMap.get(entry.value);
|
|
77
|
+
if (existing) {
|
|
78
|
+
// Merge ID sets
|
|
79
|
+
entry.ids.forEach(id => existing.add(id));
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
valueMap.set(entry.value, new Set(entry.ids));
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
// Convert to sorted array
|
|
87
|
+
const sorted = Array.from(valueMap.entries());
|
|
88
|
+
// Detect field type and sort accordingly
|
|
89
|
+
if (sorted.length > 0) {
|
|
90
|
+
const sampleValue = sorted[0][0];
|
|
91
|
+
if (typeof sampleValue === 'number') {
|
|
92
|
+
sortedIndex.fieldType = 'number';
|
|
93
|
+
sorted.sort((a, b) => a[0] - b[0]);
|
|
94
|
+
}
|
|
95
|
+
else if (sampleValue instanceof Date) {
|
|
96
|
+
sortedIndex.fieldType = 'date';
|
|
97
|
+
sorted.sort((a, b) => a[0].getTime() - b[0].getTime());
|
|
98
|
+
}
|
|
99
|
+
else {
|
|
100
|
+
sortedIndex.fieldType = 'string';
|
|
101
|
+
sorted.sort((a, b) => {
|
|
102
|
+
const aVal = String(a[0]);
|
|
103
|
+
const bVal = String(b[0]);
|
|
104
|
+
return aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
sortedIndex.values = sorted;
|
|
109
|
+
sortedIndex.isDirty = false;
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Binary search for range start (inclusive or exclusive)
|
|
113
|
+
*/
|
|
114
|
+
binarySearchStart(sorted, target, inclusive) {
|
|
115
|
+
let left = 0;
|
|
116
|
+
let right = sorted.length - 1;
|
|
117
|
+
let result = sorted.length;
|
|
118
|
+
while (left <= right) {
|
|
119
|
+
const mid = Math.floor((left + right) / 2);
|
|
120
|
+
const midVal = sorted[mid][0];
|
|
121
|
+
if (inclusive ? midVal >= target : midVal > target) {
|
|
122
|
+
result = mid;
|
|
123
|
+
right = mid - 1;
|
|
124
|
+
}
|
|
125
|
+
else {
|
|
126
|
+
left = mid + 1;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
return result;
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* Binary search for range end (inclusive or exclusive)
|
|
133
|
+
*/
|
|
134
|
+
binarySearchEnd(sorted, target, inclusive) {
|
|
135
|
+
let left = 0;
|
|
136
|
+
let right = sorted.length - 1;
|
|
137
|
+
let result = -1;
|
|
138
|
+
while (left <= right) {
|
|
139
|
+
const mid = Math.floor((left + right) / 2);
|
|
140
|
+
const midVal = sorted[mid][0];
|
|
141
|
+
if (inclusive ? midVal <= target : midVal < target) {
|
|
142
|
+
result = mid;
|
|
143
|
+
left = mid + 1;
|
|
144
|
+
}
|
|
145
|
+
else {
|
|
146
|
+
right = mid - 1;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
return result;
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Get IDs matching a range query
|
|
153
|
+
*/
|
|
154
|
+
async getIdsForRange(field, min, max, includeMin = true, includeMax = true) {
|
|
155
|
+
// Ensure sorted index exists and is up to date
|
|
156
|
+
await this.ensureSortedIndex(field);
|
|
157
|
+
await this.buildSortedIndex(field);
|
|
158
|
+
const sortedIndex = this.sortedIndices.get(field);
|
|
159
|
+
if (!sortedIndex || sortedIndex.values.length === 0)
|
|
160
|
+
return [];
|
|
161
|
+
const sorted = sortedIndex.values;
|
|
162
|
+
const resultSet = new Set();
|
|
163
|
+
// Find range boundaries
|
|
164
|
+
let start = 0;
|
|
165
|
+
let end = sorted.length - 1;
|
|
166
|
+
if (min !== undefined) {
|
|
167
|
+
start = this.binarySearchStart(sorted, min, includeMin);
|
|
168
|
+
}
|
|
169
|
+
if (max !== undefined) {
|
|
170
|
+
end = this.binarySearchEnd(sorted, max, includeMax);
|
|
171
|
+
}
|
|
172
|
+
// Collect all IDs in range
|
|
173
|
+
for (let i = start; i <= end && i < sorted.length; i++) {
|
|
174
|
+
const [, ids] = sorted[i];
|
|
175
|
+
ids.forEach(id => resultSet.add(id));
|
|
176
|
+
}
|
|
177
|
+
return Array.from(resultSet);
|
|
178
|
+
}
|
|
43
179
|
/**
|
|
44
180
|
* Generate field index filename for filter discovery
|
|
45
181
|
*/
|
|
@@ -149,6 +285,13 @@ export class MetadataIndexManager {
|
|
|
149
285
|
*/
|
|
150
286
|
async addToIndex(id, metadata, skipFlush = false) {
|
|
151
287
|
const fields = this.extractIndexableFields(metadata);
|
|
288
|
+
// Mark sorted indices as dirty when adding new data
|
|
289
|
+
for (const { field } of fields) {
|
|
290
|
+
const sortedIndex = this.sortedIndices.get(field);
|
|
291
|
+
if (sortedIndex) {
|
|
292
|
+
sortedIndex.isDirty = true;
|
|
293
|
+
}
|
|
294
|
+
}
|
|
152
295
|
for (let i = 0; i < fields.length; i++) {
|
|
153
296
|
const { field, value } = fields[i];
|
|
154
297
|
const key = this.getIndexKey(field, value);
|
|
@@ -344,7 +487,7 @@ export class MetadataIndexManager {
|
|
|
344
487
|
return fieldsArray;
|
|
345
488
|
}
|
|
346
489
|
/**
|
|
347
|
-
* Convert
|
|
490
|
+
* Convert Brainy Field Operator filter to simple field-value criteria for indexing
|
|
348
491
|
*/
|
|
349
492
|
convertFilterToCriteria(filter) {
|
|
350
493
|
const criteria = [];
|
|
@@ -353,25 +496,34 @@ export class MetadataIndexManager {
|
|
|
353
496
|
}
|
|
354
497
|
for (const [key, value] of Object.entries(filter)) {
|
|
355
498
|
// Skip logical operators for now - handle them separately
|
|
356
|
-
if (key
|
|
499
|
+
if (key === 'allOf' || key === 'anyOf' || key === 'not')
|
|
357
500
|
continue;
|
|
358
501
|
if (value && typeof value === 'object' && !Array.isArray(value)) {
|
|
359
|
-
// Handle
|
|
502
|
+
// Handle Brainy Field Operators
|
|
360
503
|
for (const [op, operand] of Object.entries(value)) {
|
|
361
504
|
switch (op) {
|
|
362
|
-
case '
|
|
505
|
+
case 'oneOf':
|
|
363
506
|
if (Array.isArray(operand)) {
|
|
364
507
|
criteria.push({ field: key, values: operand });
|
|
365
508
|
}
|
|
366
509
|
break;
|
|
367
|
-
case '
|
|
510
|
+
case 'equals':
|
|
511
|
+
case 'is':
|
|
512
|
+
case 'eq':
|
|
368
513
|
criteria.push({ field: key, values: [operand] });
|
|
369
514
|
break;
|
|
370
|
-
case '
|
|
371
|
-
// For
|
|
515
|
+
case 'contains':
|
|
516
|
+
// For contains, the operand is the value we're looking for in an array field
|
|
372
517
|
criteria.push({ field: key, values: [operand] });
|
|
373
518
|
break;
|
|
374
|
-
|
|
519
|
+
case 'greaterThan':
|
|
520
|
+
case 'lessThan':
|
|
521
|
+
case 'greaterEqual':
|
|
522
|
+
case 'lessEqual':
|
|
523
|
+
case 'between':
|
|
524
|
+
// Range queries will be handled separately
|
|
525
|
+
// Sorted index will be created/loaded when needed in getIdsForRange
|
|
526
|
+
break;
|
|
375
527
|
default:
|
|
376
528
|
break;
|
|
377
529
|
}
|
|
@@ -386,17 +538,140 @@ export class MetadataIndexManager {
|
|
|
386
538
|
return criteria;
|
|
387
539
|
}
|
|
388
540
|
/**
|
|
389
|
-
* Get IDs matching
|
|
541
|
+
* Get IDs matching Brainy Field Operator metadata filter using indexes where possible
|
|
390
542
|
*/
|
|
391
543
|
async getIdsForFilter(filter) {
|
|
392
544
|
if (!filter || Object.keys(filter).length === 0) {
|
|
393
545
|
return [];
|
|
394
546
|
}
|
|
395
547
|
// Handle logical operators
|
|
396
|
-
if (filter
|
|
397
|
-
// For
|
|
548
|
+
if (filter.allOf && Array.isArray(filter.allOf)) {
|
|
549
|
+
// For allOf, we need intersection of all sub-filters
|
|
550
|
+
const allIds = [];
|
|
551
|
+
for (const subFilter of filter.allOf) {
|
|
552
|
+
const subIds = await this.getIdsForFilter(subFilter);
|
|
553
|
+
allIds.push(subIds);
|
|
554
|
+
}
|
|
555
|
+
if (allIds.length === 0)
|
|
556
|
+
return [];
|
|
557
|
+
if (allIds.length === 1)
|
|
558
|
+
return allIds[0];
|
|
559
|
+
// Intersection of all sets
|
|
560
|
+
return allIds.reduce((intersection, currentSet) => intersection.filter(id => currentSet.includes(id)));
|
|
561
|
+
}
|
|
562
|
+
if (filter.anyOf && Array.isArray(filter.anyOf)) {
|
|
563
|
+
// For anyOf, we need union of all sub-filters
|
|
564
|
+
const unionIds = new Set();
|
|
565
|
+
for (const subFilter of filter.anyOf) {
|
|
566
|
+
const subIds = await this.getIdsForFilter(subFilter);
|
|
567
|
+
subIds.forEach(id => unionIds.add(id));
|
|
568
|
+
}
|
|
569
|
+
return Array.from(unionIds);
|
|
570
|
+
}
|
|
571
|
+
// Process field filters with range support
|
|
572
|
+
const idSets = [];
|
|
573
|
+
for (const [field, condition] of Object.entries(filter)) {
|
|
574
|
+
// Skip logical operators
|
|
575
|
+
if (field === 'allOf' || field === 'anyOf' || field === 'not')
|
|
576
|
+
continue;
|
|
577
|
+
let fieldResults = [];
|
|
578
|
+
if (condition && typeof condition === 'object' && !Array.isArray(condition)) {
|
|
579
|
+
// Handle Brainy Field Operators
|
|
580
|
+
for (const [op, operand] of Object.entries(condition)) {
|
|
581
|
+
switch (op) {
|
|
582
|
+
// Exact match operators
|
|
583
|
+
case 'equals':
|
|
584
|
+
case 'is':
|
|
585
|
+
case 'eq':
|
|
586
|
+
fieldResults = await this.getIds(field, operand);
|
|
587
|
+
break;
|
|
588
|
+
// Multiple value operators
|
|
589
|
+
case 'oneOf':
|
|
590
|
+
case 'in':
|
|
591
|
+
if (Array.isArray(operand)) {
|
|
592
|
+
const unionIds = new Set();
|
|
593
|
+
for (const value of operand) {
|
|
594
|
+
const ids = await this.getIds(field, value);
|
|
595
|
+
ids.forEach(id => unionIds.add(id));
|
|
596
|
+
}
|
|
597
|
+
fieldResults = Array.from(unionIds);
|
|
598
|
+
}
|
|
599
|
+
break;
|
|
600
|
+
// Range operators
|
|
601
|
+
case 'greaterThan':
|
|
602
|
+
case 'gt':
|
|
603
|
+
fieldResults = await this.getIdsForRange(field, operand, undefined, false, true);
|
|
604
|
+
break;
|
|
605
|
+
case 'greaterEqual':
|
|
606
|
+
case 'gte':
|
|
607
|
+
case 'greaterThanOrEqual':
|
|
608
|
+
fieldResults = await this.getIdsForRange(field, operand, undefined, true, true);
|
|
609
|
+
break;
|
|
610
|
+
case 'lessThan':
|
|
611
|
+
case 'lt':
|
|
612
|
+
fieldResults = await this.getIdsForRange(field, undefined, operand, true, false);
|
|
613
|
+
break;
|
|
614
|
+
case 'lessEqual':
|
|
615
|
+
case 'lte':
|
|
616
|
+
case 'lessThanOrEqual':
|
|
617
|
+
fieldResults = await this.getIdsForRange(field, undefined, operand, true, true);
|
|
618
|
+
break;
|
|
619
|
+
case 'between':
|
|
620
|
+
if (Array.isArray(operand) && operand.length === 2) {
|
|
621
|
+
fieldResults = await this.getIdsForRange(field, operand[0], operand[1], true, true);
|
|
622
|
+
}
|
|
623
|
+
break;
|
|
624
|
+
// Array contains operator
|
|
625
|
+
case 'contains':
|
|
626
|
+
fieldResults = await this.getIds(field, operand);
|
|
627
|
+
break;
|
|
628
|
+
// Existence operator
|
|
629
|
+
case 'exists':
|
|
630
|
+
if (operand) {
|
|
631
|
+
// Get all IDs that have this field (any value)
|
|
632
|
+
const allIds = new Set();
|
|
633
|
+
for (const [key, entry] of this.indexCache.entries()) {
|
|
634
|
+
if (entry.field === field) {
|
|
635
|
+
entry.ids.forEach(id => allIds.add(id));
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
fieldResults = Array.from(allIds);
|
|
639
|
+
}
|
|
640
|
+
break;
|
|
641
|
+
}
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
else {
|
|
645
|
+
// Direct value match (shorthand for equals)
|
|
646
|
+
fieldResults = await this.getIds(field, condition);
|
|
647
|
+
}
|
|
648
|
+
if (fieldResults.length > 0) {
|
|
649
|
+
idSets.push(fieldResults);
|
|
650
|
+
}
|
|
651
|
+
else {
|
|
652
|
+
// If any field has no matches, intersection will be empty
|
|
653
|
+
return [];
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
if (idSets.length === 0)
|
|
657
|
+
return [];
|
|
658
|
+
if (idSets.length === 1)
|
|
659
|
+
return idSets[0];
|
|
660
|
+
// Intersection of all field criteria (implicit AND)
|
|
661
|
+
return idSets.reduce((intersection, currentSet) => intersection.filter(id => currentSet.includes(id)));
|
|
662
|
+
}
|
|
663
|
+
/**
|
|
664
|
+
* DEPRECATED - Old implementation for backward compatibility
|
|
665
|
+
*/
|
|
666
|
+
async getIdsForFilterOld(filter) {
|
|
667
|
+
if (!filter || Object.keys(filter).length === 0) {
|
|
668
|
+
return [];
|
|
669
|
+
}
|
|
670
|
+
// Handle logical operators
|
|
671
|
+
if (filter.allOf && Array.isArray(filter.allOf)) {
|
|
672
|
+
// For allOf, we need intersection of all sub-filters
|
|
398
673
|
const allIds = [];
|
|
399
|
-
for (const subFilter of filter
|
|
674
|
+
for (const subFilter of filter.allOf) {
|
|
400
675
|
const subIds = await this.getIdsForFilter(subFilter);
|
|
401
676
|
allIds.push(subIds);
|
|
402
677
|
}
|
|
@@ -407,10 +682,10 @@ export class MetadataIndexManager {
|
|
|
407
682
|
// Intersection of all sets
|
|
408
683
|
return allIds.reduce((intersection, currentSet) => intersection.filter(id => currentSet.includes(id)));
|
|
409
684
|
}
|
|
410
|
-
if (filter
|
|
411
|
-
// For
|
|
685
|
+
if (filter.anyOf && Array.isArray(filter.anyOf)) {
|
|
686
|
+
// For anyOf, we need union of all sub-filters
|
|
412
687
|
const unionIds = new Set();
|
|
413
|
-
for (const subFilter of filter
|
|
688
|
+
for (const subFilter of filter.anyOf) {
|
|
414
689
|
const subIds = await this.getIdsForFilter(subFilter);
|
|
415
690
|
subIds.forEach(id => unionIds.add(id));
|
|
416
691
|
}
|
|
@@ -445,7 +720,9 @@ export class MetadataIndexManager {
|
|
|
445
720
|
* Flush dirty entries to storage (non-blocking version)
|
|
446
721
|
*/
|
|
447
722
|
async flush() {
|
|
448
|
-
if
|
|
723
|
+
// Check if we have anything to flush (including sorted indices)
|
|
724
|
+
const hasDirtySortedIndices = Array.from(this.sortedIndices.values()).some(idx => idx.isDirty);
|
|
725
|
+
if (this.dirtyEntries.size === 0 && this.dirtyFields.size === 0 && !hasDirtySortedIndices) {
|
|
449
726
|
return; // Nothing to flush
|
|
450
727
|
}
|
|
451
728
|
// Process in smaller batches to avoid blocking
|
|
@@ -479,6 +756,12 @@ export class MetadataIndexManager {
|
|
|
479
756
|
await this.yieldToEventLoop();
|
|
480
757
|
}
|
|
481
758
|
}
|
|
759
|
+
// Flush sorted indices (for range queries)
|
|
760
|
+
for (const [field, sortedIndex] of this.sortedIndices.entries()) {
|
|
761
|
+
if (sortedIndex.isDirty) {
|
|
762
|
+
allPromises.push(this.saveSortedIndex(field, sortedIndex));
|
|
763
|
+
}
|
|
764
|
+
}
|
|
482
765
|
// Wait for all operations to complete
|
|
483
766
|
await Promise.all(allPromises);
|
|
484
767
|
this.dirtyEntries.clear();
|
|
@@ -496,31 +779,41 @@ export class MetadataIndexManager {
|
|
|
496
779
|
* Load field index from storage
|
|
497
780
|
*/
|
|
498
781
|
async loadFieldIndex(field) {
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
}
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
782
|
+
const filename = this.getFieldIndexFilename(field);
|
|
783
|
+
const unifiedKey = `metadata:field:${filename}`;
|
|
784
|
+
// Check unified cache first with loader function
|
|
785
|
+
return await this.unifiedCache.get(unifiedKey, async () => {
|
|
786
|
+
try {
|
|
787
|
+
const cacheKey = `field_index_${filename}`;
|
|
788
|
+
// Check old cache for migration
|
|
789
|
+
const cached = this.metadataCache.get(cacheKey);
|
|
790
|
+
if (cached) {
|
|
791
|
+
// Add to unified cache
|
|
792
|
+
const size = JSON.stringify(cached).length;
|
|
793
|
+
this.unifiedCache.set(unifiedKey, cached, 'metadata', size, 1); // Low rebuild cost
|
|
794
|
+
return cached;
|
|
795
|
+
}
|
|
796
|
+
// Load from storage
|
|
797
|
+
const indexId = `__metadata_field_index__${filename}`;
|
|
798
|
+
const data = await this.storage.getMetadata(indexId);
|
|
799
|
+
if (data) {
|
|
800
|
+
const fieldIndex = {
|
|
801
|
+
values: data.values || {},
|
|
802
|
+
lastUpdated: data.lastUpdated || Date.now()
|
|
803
|
+
};
|
|
804
|
+
// Add to unified cache
|
|
805
|
+
const size = JSON.stringify(fieldIndex).length;
|
|
806
|
+
this.unifiedCache.set(unifiedKey, fieldIndex, 'metadata', size, 1);
|
|
807
|
+
// Also keep in old cache for now (transition period)
|
|
808
|
+
this.metadataCache.set(cacheKey, fieldIndex);
|
|
809
|
+
return fieldIndex;
|
|
810
|
+
}
|
|
518
811
|
}
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
812
|
+
catch (error) {
|
|
813
|
+
// Field index doesn't exist yet
|
|
814
|
+
}
|
|
815
|
+
return null;
|
|
816
|
+
});
|
|
524
817
|
}
|
|
525
818
|
/**
|
|
526
819
|
* Save field index to storage
|
|
@@ -528,13 +821,68 @@ export class MetadataIndexManager {
|
|
|
528
821
|
async saveFieldIndex(field, fieldIndex) {
|
|
529
822
|
const filename = this.getFieldIndexFilename(field);
|
|
530
823
|
const indexId = `__metadata_field_index__${filename}`;
|
|
824
|
+
const unifiedKey = `metadata:field:${filename}`;
|
|
531
825
|
await this.storage.saveMetadata(indexId, {
|
|
532
826
|
values: fieldIndex.values,
|
|
533
827
|
lastUpdated: fieldIndex.lastUpdated
|
|
534
828
|
});
|
|
535
|
-
//
|
|
829
|
+
// Update unified cache
|
|
830
|
+
const size = JSON.stringify(fieldIndex).length;
|
|
831
|
+
this.unifiedCache.set(unifiedKey, fieldIndex, 'metadata', size, 1);
|
|
832
|
+
// Invalidate old cache
|
|
536
833
|
this.metadataCache.invalidatePattern(`field_index_${filename}`);
|
|
537
834
|
}
|
|
835
|
+
/**
|
|
836
|
+
* Save sorted index to storage for range queries
|
|
837
|
+
*/
|
|
838
|
+
async saveSortedIndex(field, sortedIndex) {
|
|
839
|
+
const filename = `sorted_${field}`;
|
|
840
|
+
const indexId = `__metadata_sorted_index__${filename}`;
|
|
841
|
+
const unifiedKey = `metadata:sorted:${field}`;
|
|
842
|
+
// Convert Set to Array for serialization
|
|
843
|
+
const serializable = {
|
|
844
|
+
values: sortedIndex.values.map(([value, ids]) => [value, Array.from(ids)]),
|
|
845
|
+
fieldType: sortedIndex.fieldType,
|
|
846
|
+
lastUpdated: Date.now()
|
|
847
|
+
};
|
|
848
|
+
await this.storage.saveMetadata(indexId, serializable);
|
|
849
|
+
// Mark as clean
|
|
850
|
+
sortedIndex.isDirty = false;
|
|
851
|
+
// Update unified cache (sorted indices are expensive to rebuild)
|
|
852
|
+
const size = JSON.stringify(serializable).length;
|
|
853
|
+
this.unifiedCache.set(unifiedKey, sortedIndex, 'metadata', size, 100); // Higher rebuild cost
|
|
854
|
+
}
|
|
855
|
+
/**
|
|
856
|
+
* Load sorted index from storage
|
|
857
|
+
*/
|
|
858
|
+
async loadSortedIndex(field) {
|
|
859
|
+
const filename = `sorted_${field}`;
|
|
860
|
+
const indexId = `__metadata_sorted_index__${filename}`;
|
|
861
|
+
const unifiedKey = `metadata:sorted:${field}`;
|
|
862
|
+
// Check unified cache first
|
|
863
|
+
const cached = await this.unifiedCache.get(unifiedKey, async () => {
|
|
864
|
+
try {
|
|
865
|
+
const data = await this.storage.getMetadata(indexId);
|
|
866
|
+
if (data) {
|
|
867
|
+
// Convert Arrays back to Sets
|
|
868
|
+
const sortedIndex = {
|
|
869
|
+
values: data.values.map(([value, ids]) => [value, new Set(ids)]),
|
|
870
|
+
fieldType: data.fieldType || 'mixed',
|
|
871
|
+
isDirty: false
|
|
872
|
+
};
|
|
873
|
+
// Add to unified cache
|
|
874
|
+
const size = JSON.stringify(data).length;
|
|
875
|
+
this.unifiedCache.set(unifiedKey, sortedIndex, 'metadata', size, 100);
|
|
876
|
+
return sortedIndex;
|
|
877
|
+
}
|
|
878
|
+
}
|
|
879
|
+
catch (error) {
|
|
880
|
+
// Sorted index doesn't exist yet
|
|
881
|
+
}
|
|
882
|
+
return null;
|
|
883
|
+
});
|
|
884
|
+
return cached;
|
|
885
|
+
}
|
|
538
886
|
/**
|
|
539
887
|
* Get index statistics
|
|
540
888
|
*/
|
|
@@ -714,31 +1062,40 @@ export class MetadataIndexManager {
|
|
|
714
1062
|
* Load index entry from storage using safe filenames
|
|
715
1063
|
*/
|
|
716
1064
|
async loadIndexEntry(key) {
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
1065
|
+
const unifiedKey = `metadata:entry:${key}`;
|
|
1066
|
+
// Use unified cache with loader function
|
|
1067
|
+
return await this.unifiedCache.get(unifiedKey, async () => {
|
|
1068
|
+
try {
|
|
1069
|
+
// Extract field and value from key
|
|
1070
|
+
const [field, value] = key.split(':', 2);
|
|
1071
|
+
const filename = this.getValueChunkFilename(field, value);
|
|
1072
|
+
// Load from metadata indexes directory with safe filename
|
|
1073
|
+
const indexId = `__metadata_index__${filename}`;
|
|
1074
|
+
const data = await this.storage.getMetadata(indexId);
|
|
1075
|
+
if (data) {
|
|
1076
|
+
const entry = {
|
|
1077
|
+
field: data.field,
|
|
1078
|
+
value: data.value,
|
|
1079
|
+
ids: new Set(data.ids || []),
|
|
1080
|
+
lastUpdated: data.lastUpdated || Date.now()
|
|
1081
|
+
};
|
|
1082
|
+
// Add to unified cache (metadata entries are cheap to rebuild)
|
|
1083
|
+
const size = JSON.stringify(Array.from(entry.ids)).length + 100;
|
|
1084
|
+
this.unifiedCache.set(unifiedKey, entry, 'metadata', size, 1);
|
|
1085
|
+
return entry;
|
|
1086
|
+
}
|
|
731
1087
|
}
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
1088
|
+
catch (error) {
|
|
1089
|
+
// Index entry doesn't exist yet
|
|
1090
|
+
}
|
|
1091
|
+
return null;
|
|
1092
|
+
});
|
|
737
1093
|
}
|
|
738
1094
|
/**
|
|
739
1095
|
* Save index entry to storage using safe filenames
|
|
740
1096
|
*/
|
|
741
1097
|
async saveIndexEntry(key, entry) {
|
|
1098
|
+
const unifiedKey = `metadata:entry:${key}`;
|
|
742
1099
|
const data = {
|
|
743
1100
|
field: entry.field,
|
|
744
1101
|
value: entry.value,
|
|
@@ -751,16 +1108,22 @@ export class MetadataIndexManager {
|
|
|
751
1108
|
// Store metadata indexes with safe filename
|
|
752
1109
|
const indexId = `__metadata_index__${filename}`;
|
|
753
1110
|
await this.storage.saveMetadata(indexId, data);
|
|
1111
|
+
// Update unified cache
|
|
1112
|
+
const size = JSON.stringify(data.ids).length + 100;
|
|
1113
|
+
this.unifiedCache.set(unifiedKey, entry, 'metadata', size, 1);
|
|
754
1114
|
}
|
|
755
1115
|
/**
|
|
756
1116
|
* Delete index entry from storage using safe filenames
|
|
757
1117
|
*/
|
|
758
1118
|
async deleteIndexEntry(key) {
|
|
1119
|
+
const unifiedKey = `metadata:entry:${key}`;
|
|
759
1120
|
try {
|
|
760
1121
|
const [field, value] = key.split(':', 2);
|
|
761
1122
|
const filename = this.getValueChunkFilename(field, value);
|
|
762
1123
|
const indexId = `__metadata_index__${filename}`;
|
|
763
1124
|
await this.storage.saveMetadata(indexId, null);
|
|
1125
|
+
// Remove from unified cache
|
|
1126
|
+
this.unifiedCache.delete(unifiedKey);
|
|
764
1127
|
}
|
|
765
1128
|
catch (error) {
|
|
766
1129
|
// Entry might not exist
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Request Deduplicator Utility
|
|
3
|
+
* Provides key generation for request deduplication
|
|
4
|
+
*/
|
|
5
|
+
export declare class RequestDeduplicator {
|
|
6
|
+
/**
|
|
7
|
+
* Generate a unique key for search requests to enable deduplication
|
|
8
|
+
*/
|
|
9
|
+
static getSearchKey(query: string, k: number, options: any): string;
|
|
10
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Request Deduplicator Utility
|
|
3
|
+
* Provides key generation for request deduplication
|
|
4
|
+
*/
|
|
5
|
+
export class RequestDeduplicator {
|
|
6
|
+
/**
|
|
7
|
+
* Generate a unique key for search requests to enable deduplication
|
|
8
|
+
*/
|
|
9
|
+
static getSearchKey(query, k, options) {
|
|
10
|
+
// Create a consistent key from search parameters
|
|
11
|
+
const optionsKey = options ? JSON.stringify({
|
|
12
|
+
metadata: options.metadata,
|
|
13
|
+
service: options.service,
|
|
14
|
+
searchMode: options.searchMode,
|
|
15
|
+
threshold: options.threshold,
|
|
16
|
+
includeVectors: options.includeVectors,
|
|
17
|
+
includeMetadata: options.includeMetadata,
|
|
18
|
+
sortBy: options.sortBy,
|
|
19
|
+
cursor: options.cursor
|
|
20
|
+
}) : '{}';
|
|
21
|
+
return `search:${query}:${k}:${optionsKey}`;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
//# sourceMappingURL=requestDeduplicator.js.map
|