@soulcraft/brainy 4.5.2 → 4.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/brainy.js
CHANGED
|
@@ -446,7 +446,13 @@ export class Brainy {
|
|
|
446
446
|
id: noun.id,
|
|
447
447
|
vector: noun.vector,
|
|
448
448
|
type: nounType || NounType.Thing,
|
|
449
|
-
metadata
|
|
449
|
+
// Preserve timestamps in metadata for indexing (v4.5.4 fix)
|
|
450
|
+
// Metadata index needs these fields to enable sorting and range queries
|
|
451
|
+
metadata: {
|
|
452
|
+
...userMetadata,
|
|
453
|
+
...(createdAt !== undefined && { createdAt }),
|
|
454
|
+
...(updatedAt !== undefined && { updatedAt })
|
|
455
|
+
},
|
|
450
456
|
service: service,
|
|
451
457
|
createdAt: createdAt || Date.now(),
|
|
452
458
|
updatedAt: updatedAt
|
|
@@ -1104,8 +1110,17 @@ export class Brainy {
|
|
|
1104
1110
|
};
|
|
1105
1111
|
}
|
|
1106
1112
|
}
|
|
1107
|
-
//
|
|
1108
|
-
|
|
1113
|
+
// v4.5.4: Apply sorting if requested, otherwise just filter
|
|
1114
|
+
let filteredIds;
|
|
1115
|
+
if (params.orderBy) {
|
|
1116
|
+
// Get sorted IDs using production-scale sorted filtering
|
|
1117
|
+
filteredIds = await this.metadataIndex.getSortedIdsForFilter(filter, params.orderBy, params.order || 'asc');
|
|
1118
|
+
}
|
|
1119
|
+
else {
|
|
1120
|
+
// Just filter without sorting
|
|
1121
|
+
filteredIds = await this.metadataIndex.getIdsForFilter(filter);
|
|
1122
|
+
}
|
|
1123
|
+
// Paginate BEFORE loading entities (production-scale!)
|
|
1109
1124
|
const limit = params.limit || 10;
|
|
1110
1125
|
const offset = params.offset || 0;
|
|
1111
1126
|
const pageIds = filteredIds.slice(offset, offset + limit);
|
|
@@ -1252,6 +1267,23 @@ export class Brainy {
|
|
|
1252
1267
|
}
|
|
1253
1268
|
// Early return for metadata-only queries with pagination applied
|
|
1254
1269
|
if (!params.query && !params.connected) {
|
|
1270
|
+
// v4.5.4: Apply sorting if requested for metadata-only queries
|
|
1271
|
+
if (params.orderBy) {
|
|
1272
|
+
const sortedIds = await this.metadataIndex.getSortedIdsForFilter(filter, params.orderBy, params.order || 'asc');
|
|
1273
|
+
// Paginate sorted IDs BEFORE loading entities (production-scale!)
|
|
1274
|
+
const limit = params.limit || 10;
|
|
1275
|
+
const offset = params.offset || 0;
|
|
1276
|
+
const pageIds = sortedIds.slice(offset, offset + limit);
|
|
1277
|
+
// Load entities for paginated results only
|
|
1278
|
+
const sortedResults = [];
|
|
1279
|
+
for (const id of pageIds) {
|
|
1280
|
+
const entity = await this.get(id);
|
|
1281
|
+
if (entity) {
|
|
1282
|
+
sortedResults.push(this.createResult(id, 1.0, entity));
|
|
1283
|
+
}
|
|
1284
|
+
}
|
|
1285
|
+
return sortedResults;
|
|
1286
|
+
}
|
|
1255
1287
|
return results;
|
|
1256
1288
|
}
|
|
1257
1289
|
}
|
|
@@ -1265,7 +1297,35 @@ export class Brainy {
|
|
|
1265
1297
|
results = this.applyFusionScoring(results, params.fusion);
|
|
1266
1298
|
}
|
|
1267
1299
|
// OPTIMIZED: Sort first, then apply efficient pagination
|
|
1268
|
-
|
|
1300
|
+
// v4.5.4: Support custom orderBy for vector + metadata queries
|
|
1301
|
+
if (params.orderBy && results.length > 0) {
|
|
1302
|
+
// For vector + metadata queries, sort by specified field instead of score
|
|
1303
|
+
// Load sort field values for all results (small set, already filtered)
|
|
1304
|
+
const resultsWithValues = await Promise.all(results.map(async (r) => ({
|
|
1305
|
+
result: r,
|
|
1306
|
+
value: await this.metadataIndex.getFieldValueForEntity(r.id, params.orderBy)
|
|
1307
|
+
})));
|
|
1308
|
+
// Sort by field value
|
|
1309
|
+
resultsWithValues.sort((a, b) => {
|
|
1310
|
+
// Handle null/undefined
|
|
1311
|
+
if (a.value == null && b.value == null)
|
|
1312
|
+
return 0;
|
|
1313
|
+
if (a.value == null)
|
|
1314
|
+
return (params.order || 'asc') === 'asc' ? 1 : -1;
|
|
1315
|
+
if (b.value == null)
|
|
1316
|
+
return (params.order || 'asc') === 'asc' ? -1 : 1;
|
|
1317
|
+
// Compare values
|
|
1318
|
+
if (a.value === b.value)
|
|
1319
|
+
return 0;
|
|
1320
|
+
const comparison = a.value < b.value ? -1 : 1;
|
|
1321
|
+
return (params.order || 'asc') === 'asc' ? comparison : -comparison;
|
|
1322
|
+
});
|
|
1323
|
+
results = resultsWithValues.map(({ result }) => result);
|
|
1324
|
+
}
|
|
1325
|
+
else {
|
|
1326
|
+
// Default: sort by relevance score
|
|
1327
|
+
results.sort((a, b) => b.score - a.score);
|
|
1328
|
+
}
|
|
1269
1329
|
const limit = params.limit || 10;
|
|
1270
1330
|
const offset = params.offset || 0;
|
|
1271
1331
|
// Efficient pagination - only slice what we need
|
|
@@ -155,6 +155,7 @@ export declare class MetadataIndexManager {
|
|
|
155
155
|
/**
|
|
156
156
|
* Get IDs for a range using chunked sparse index with zone maps and roaring bitmaps (v3.43.0)
|
|
157
157
|
* v3.44.1: Now fully lazy-loaded via UnifiedCache (no local sparseIndices Map)
|
|
158
|
+
* v4.5.4: Normalize min/max for timestamp bucketing before comparison
|
|
158
159
|
*/
|
|
159
160
|
private getIdsFromChunksForRange;
|
|
160
161
|
/**
|
|
@@ -271,6 +272,87 @@ export declare class MetadataIndexManager {
|
|
|
271
272
|
* Get IDs matching Brainy Field Operator metadata filter using indexes where possible
|
|
272
273
|
*/
|
|
273
274
|
getIdsForFilter(filter: any): Promise<string[]>;
|
|
275
|
+
/**
|
|
276
|
+
* Get filtered IDs sorted by a field (production-scale sorting)
|
|
277
|
+
*
|
|
278
|
+
* **Performance Characteristics** (designed for billions of entities):
|
|
279
|
+
* - **Filtering**: O(log n) using roaring bitmaps with SIMD acceleration
|
|
280
|
+
* - **Field Loading**: O(k) where k = filtered result count (NOT O(n))
|
|
281
|
+
* - **Sorting**: O(k log k) in-memory (IDs + sort values only, NOT full entities)
|
|
282
|
+
* - **Memory**: O(k) for k filtered results, independent of total entity count
|
|
283
|
+
*
|
|
284
|
+
* **Scalability**:
|
|
285
|
+
* - Total entities: Billions (memory usage unaffected)
|
|
286
|
+
* - Filtered set: Up to 10M (reasonable for in-memory sort of ID+value pairs)
|
|
287
|
+
* - Pagination: Happens AFTER sorting, so only page entities are loaded
|
|
288
|
+
*
|
|
289
|
+
* **Example**:
|
|
290
|
+
* ```typescript
|
|
291
|
+
* // Production-scale: 1B entities, 100K match filter, sort by createdAt
|
|
292
|
+
* const sortedIds = await metadataIndex.getSortedIdsForFilter(
|
|
293
|
+
* { status: 'published', category: 'AI' },
|
|
294
|
+
* 'createdAt',
|
|
295
|
+
* 'desc'
|
|
296
|
+
* )
|
|
297
|
+
* // Returns: 100K sorted IDs
|
|
298
|
+
* // Memory: ~5MB (100K IDs + 100K timestamps)
|
|
299
|
+
* // Then caller paginates: sortedIds.slice(0, 20) and loads only 20 entities
|
|
300
|
+
* ```
|
|
301
|
+
*
|
|
302
|
+
* @param filter - Metadata filter criteria (uses roaring bitmaps)
|
|
303
|
+
* @param orderBy - Field name to sort by (e.g., 'createdAt', 'title')
|
|
304
|
+
* @param order - Sort direction: 'asc' (default) or 'desc'
|
|
305
|
+
* @returns Promise<string[]> - Entity IDs sorted by specified field
|
|
306
|
+
*
|
|
307
|
+
* @since v4.5.4
|
|
308
|
+
*/
|
|
309
|
+
getSortedIdsForFilter(filter: any, orderBy: string, order?: 'asc' | 'desc'): Promise<string[]>;
|
|
310
|
+
/**
|
|
311
|
+
* Get field value for a specific entity (helper for sorted queries)
|
|
312
|
+
*
|
|
313
|
+
* **IMPORTANT**: For timestamp fields (createdAt, updatedAt), this loads
|
|
314
|
+
* the ACTUAL value from entity metadata, NOT the bucketed index value.
|
|
315
|
+
* This is required because timestamp bucketing (1-minute precision) loses
|
|
316
|
+
* precision needed for accurate sorting.
|
|
317
|
+
*
|
|
318
|
+
* For non-timestamp fields, loads from the chunked sparse index without
|
|
319
|
+
* loading the full entity. This is critical for production-scale sorting.
|
|
320
|
+
*
|
|
321
|
+
* **Performance**:
|
|
322
|
+
* - Timestamp fields: O(1) metadata load from storage (cached)
|
|
323
|
+
* - Other fields: O(chunks) roaring bitmap lookup (typically 1-10 chunks)
|
|
324
|
+
*
|
|
325
|
+
* @param entityId - Entity UUID to get field value for
|
|
326
|
+
* @param field - Field name to retrieve (e.g., 'createdAt', 'title')
|
|
327
|
+
* @returns Promise<any> - Field value or undefined if not found
|
|
328
|
+
*
|
|
329
|
+
* @public (called from brainy.ts for sorted queries)
|
|
330
|
+
* @since v4.5.4
|
|
331
|
+
*/
|
|
332
|
+
getFieldValueForEntity(entityId: string, field: string): Promise<any>;
|
|
333
|
+
/**
|
|
334
|
+
* Denormalize a value (reverse of normalizeValue)
|
|
335
|
+
*
|
|
336
|
+
* Converts normalized/stringified values back to their original type.
|
|
337
|
+
* For most fields, this just parses numbers or returns strings as-is.
|
|
338
|
+
*
|
|
339
|
+
* **NOTE**: This is NOT used for timestamp sorting! Timestamp fields
|
|
340
|
+
* (createdAt, updatedAt) are loaded directly from entity metadata by
|
|
341
|
+
* getFieldValueForEntity() to avoid precision loss from bucketing.
|
|
342
|
+
*
|
|
343
|
+
* **Timestamp Bucketing (for range queries only)**:
|
|
344
|
+
* - Indexed as: Math.floor(timestamp / 60000) * 60000
|
|
345
|
+
* - Used for: Range queries (gte, lte) where 1-minute precision is acceptable
|
|
346
|
+
* - NOT used for: Sorting (requires exact millisecond precision)
|
|
347
|
+
*
|
|
348
|
+
* @param normalized - Normalized value string from index
|
|
349
|
+
* @param field - Field name (used for type inference)
|
|
350
|
+
* @returns Denormalized value in original type
|
|
351
|
+
*
|
|
352
|
+
* @private
|
|
353
|
+
* @since v4.5.4
|
|
354
|
+
*/
|
|
355
|
+
private denormalizeValue;
|
|
274
356
|
/**
|
|
275
357
|
* DEPRECATED - Old implementation for backward compatibility
|
|
276
358
|
*/
|
|
@@ -463,6 +463,7 @@ export class MetadataIndexManager {
|
|
|
463
463
|
/**
|
|
464
464
|
* Get IDs for a range using chunked sparse index with zone maps and roaring bitmaps (v3.43.0)
|
|
465
465
|
* v3.44.1: Now fully lazy-loaded via UnifiedCache (no local sparseIndices Map)
|
|
466
|
+
* v4.5.4: Normalize min/max for timestamp bucketing before comparison
|
|
466
467
|
*/
|
|
467
468
|
async getIdsFromChunksForRange(field, min, max, includeMin = true, includeMax = true) {
|
|
468
469
|
// Load sparse index via UnifiedCache (lazy loading)
|
|
@@ -470,8 +471,12 @@ export class MetadataIndexManager {
|
|
|
470
471
|
if (!sparseIndex) {
|
|
471
472
|
return []; // No chunked index exists yet
|
|
472
473
|
}
|
|
474
|
+
// v4.5.4: Normalize min/max for consistent comparison with indexed values
|
|
475
|
+
// (indexed values are bucketed for timestamps, so we must bucket the query bounds too)
|
|
476
|
+
const normalizedMin = min !== undefined ? this.normalizeValue(min, field) : undefined;
|
|
477
|
+
const normalizedMax = max !== undefined ? this.normalizeValue(max, field) : undefined;
|
|
473
478
|
// Find candidate chunks using zone maps
|
|
474
|
-
const candidateChunkIds = sparseIndex.findChunksForRange(
|
|
479
|
+
const candidateChunkIds = sparseIndex.findChunksForRange(normalizedMin, normalizedMax);
|
|
475
480
|
if (candidateChunkIds.length === 0) {
|
|
476
481
|
return [];
|
|
477
482
|
}
|
|
@@ -481,13 +486,13 @@ export class MetadataIndexManager {
|
|
|
481
486
|
const chunk = await this.chunkManager.loadChunk(field, chunkId);
|
|
482
487
|
if (chunk) {
|
|
483
488
|
for (const [value, bitmap] of chunk.entries) {
|
|
484
|
-
// Check if value is in range
|
|
489
|
+
// Check if value is in range (both value and normalized bounds are now bucketed)
|
|
485
490
|
let inRange = true;
|
|
486
|
-
if (
|
|
487
|
-
inRange = inRange && (includeMin ? value >=
|
|
491
|
+
if (normalizedMin !== undefined) {
|
|
492
|
+
inRange = inRange && (includeMin ? value >= normalizedMin : value > normalizedMin);
|
|
488
493
|
}
|
|
489
|
-
if (
|
|
490
|
-
inRange = inRange && (includeMax ? value <=
|
|
494
|
+
if (normalizedMax !== undefined) {
|
|
495
|
+
inRange = inRange && (includeMax ? value <= normalizedMax : value < normalizedMax);
|
|
491
496
|
}
|
|
492
497
|
if (inRange) {
|
|
493
498
|
// Iterate through roaring bitmap integers
|
|
@@ -1204,17 +1209,36 @@ export class MetadataIndexManager {
|
|
|
1204
1209
|
continue;
|
|
1205
1210
|
let fieldResults = [];
|
|
1206
1211
|
if (condition && typeof condition === 'object' && !Array.isArray(condition)) {
|
|
1207
|
-
// Handle Brainy Field Operators
|
|
1212
|
+
// Handle Brainy Field Operators (v4.5.4: canonical operators defined)
|
|
1213
|
+
// See docs/api/README.md for complete operator reference
|
|
1208
1214
|
for (const [op, operand] of Object.entries(condition)) {
|
|
1209
1215
|
switch (op) {
|
|
1210
|
-
//
|
|
1211
|
-
|
|
1212
|
-
case 'is':
|
|
1216
|
+
// ===== EQUALITY OPERATORS =====
|
|
1217
|
+
// Canonical: 'eq' | Alias: 'equals' | Deprecated: 'is' (remove in v5.0.0)
|
|
1218
|
+
case 'is': // DEPRECATED (v4.5.4): Use 'eq' instead
|
|
1219
|
+
case 'equals': // Alias for 'eq'
|
|
1213
1220
|
case 'eq':
|
|
1214
1221
|
fieldResults = await this.getIds(field, operand);
|
|
1215
1222
|
break;
|
|
1216
|
-
//
|
|
1217
|
-
|
|
1223
|
+
// ===== NEGATION OPERATORS =====
|
|
1224
|
+
// Canonical: 'ne' | Alias: 'notEquals' | Deprecated: 'isNot' (remove in v5.0.0)
|
|
1225
|
+
case 'isNot': // DEPRECATED (v4.5.4): Use 'ne' instead
|
|
1226
|
+
case 'notEquals': // Alias for 'ne'
|
|
1227
|
+
case 'ne':
|
|
1228
|
+
// For notEquals, we need all IDs EXCEPT those matching the value
|
|
1229
|
+
// This is especially important for soft delete: deleted !== true
|
|
1230
|
+
// should include items without a deleted field
|
|
1231
|
+
// First, get all IDs in the database
|
|
1232
|
+
const allItemIds = await this.getAllIds();
|
|
1233
|
+
// Then get IDs that match the value we want to exclude
|
|
1234
|
+
const excludeIds = await this.getIds(field, operand);
|
|
1235
|
+
const excludeSet = new Set(excludeIds);
|
|
1236
|
+
// Return all IDs except those to exclude
|
|
1237
|
+
fieldResults = allItemIds.filter(id => !excludeSet.has(id));
|
|
1238
|
+
break;
|
|
1239
|
+
// ===== MULTI-VALUE OPERATORS =====
|
|
1240
|
+
// Canonical: 'in' | Alias: 'oneOf'
|
|
1241
|
+
case 'oneOf': // Alias for 'in'
|
|
1218
1242
|
case 'in':
|
|
1219
1243
|
if (Array.isArray(operand)) {
|
|
1220
1244
|
const unionIds = new Set();
|
|
@@ -1225,35 +1249,46 @@ export class MetadataIndexManager {
|
|
|
1225
1249
|
fieldResults = Array.from(unionIds);
|
|
1226
1250
|
}
|
|
1227
1251
|
break;
|
|
1228
|
-
//
|
|
1229
|
-
|
|
1252
|
+
// ===== GREATER THAN OPERATORS =====
|
|
1253
|
+
// Canonical: 'gt' | Alias: 'greaterThan'
|
|
1254
|
+
case 'greaterThan': // Alias for 'gt'
|
|
1230
1255
|
case 'gt':
|
|
1231
1256
|
fieldResults = await this.getIdsForRange(field, operand, undefined, false, true);
|
|
1232
1257
|
break;
|
|
1233
|
-
|
|
1258
|
+
// ===== GREATER THAN OR EQUAL OPERATORS =====
|
|
1259
|
+
// Canonical: 'gte' | Alias: 'greaterThanOrEqual' | Deprecated: 'greaterEqual' (remove in v5.0.0)
|
|
1260
|
+
case 'greaterEqual': // DEPRECATED (v4.5.4): Use 'gte' instead
|
|
1261
|
+
case 'greaterThanOrEqual': // Alias for 'gte'
|
|
1234
1262
|
case 'gte':
|
|
1235
|
-
case 'greaterThanOrEqual':
|
|
1236
1263
|
fieldResults = await this.getIdsForRange(field, operand, undefined, true, true);
|
|
1237
1264
|
break;
|
|
1238
|
-
|
|
1265
|
+
// ===== LESS THAN OPERATORS =====
|
|
1266
|
+
// Canonical: 'lt' | Alias: 'lessThan'
|
|
1267
|
+
case 'lessThan': // Alias for 'lt'
|
|
1239
1268
|
case 'lt':
|
|
1240
1269
|
fieldResults = await this.getIdsForRange(field, undefined, operand, true, false);
|
|
1241
1270
|
break;
|
|
1242
|
-
|
|
1271
|
+
// ===== LESS THAN OR EQUAL OPERATORS =====
|
|
1272
|
+
// Canonical: 'lte' | Alias: 'lessThanOrEqual' | Deprecated: 'lessEqual' (remove in v5.0.0)
|
|
1273
|
+
case 'lessEqual': // DEPRECATED (v4.5.4): Use 'lte' instead
|
|
1274
|
+
case 'lessThanOrEqual': // Alias for 'lte'
|
|
1243
1275
|
case 'lte':
|
|
1244
|
-
case 'lessThanOrEqual':
|
|
1245
1276
|
fieldResults = await this.getIdsForRange(field, undefined, operand, true, true);
|
|
1246
1277
|
break;
|
|
1278
|
+
// ===== RANGE OPERATOR =====
|
|
1279
|
+
// between: [min, max] - inclusive range query
|
|
1247
1280
|
case 'between':
|
|
1248
1281
|
if (Array.isArray(operand) && operand.length === 2) {
|
|
1249
1282
|
fieldResults = await this.getIdsForRange(field, operand[0], operand[1], true, true);
|
|
1250
1283
|
}
|
|
1251
1284
|
break;
|
|
1252
|
-
//
|
|
1285
|
+
// ===== ARRAY CONTAINS OPERATOR =====
|
|
1286
|
+
// contains: value - check if array field contains value
|
|
1253
1287
|
case 'contains':
|
|
1254
1288
|
fieldResults = await this.getIds(field, operand);
|
|
1255
1289
|
break;
|
|
1256
|
-
//
|
|
1290
|
+
// ===== EXISTENCE OPERATOR =====
|
|
1291
|
+
// exists: boolean - check if field exists (any value)
|
|
1257
1292
|
case 'exists':
|
|
1258
1293
|
if (operand) {
|
|
1259
1294
|
// Get all IDs that have this field (any value) from chunked sparse index with roaring bitmaps (v3.43.0)
|
|
@@ -1279,26 +1314,11 @@ export class MetadataIndexManager {
|
|
|
1279
1314
|
fieldResults = this.idMapper.intsIterableToUuids(allIntIds);
|
|
1280
1315
|
}
|
|
1281
1316
|
break;
|
|
1282
|
-
// Negation operators
|
|
1283
|
-
case 'notEquals':
|
|
1284
|
-
case 'isNot':
|
|
1285
|
-
case 'ne':
|
|
1286
|
-
// For notEquals, we need all IDs EXCEPT those matching the value
|
|
1287
|
-
// This is especially important for soft delete: deleted !== true
|
|
1288
|
-
// should include items without a deleted field
|
|
1289
|
-
// First, get all IDs in the database
|
|
1290
|
-
const allItemIds = await this.getAllIds();
|
|
1291
|
-
// Then get IDs that match the value we want to exclude
|
|
1292
|
-
const excludeIds = await this.getIds(field, operand);
|
|
1293
|
-
const excludeSet = new Set(excludeIds);
|
|
1294
|
-
// Return all IDs except those to exclude
|
|
1295
|
-
fieldResults = allItemIds.filter(id => !excludeSet.has(id));
|
|
1296
|
-
break;
|
|
1297
1317
|
}
|
|
1298
1318
|
}
|
|
1299
1319
|
}
|
|
1300
1320
|
else {
|
|
1301
|
-
// Direct value match (shorthand for
|
|
1321
|
+
// Direct value match (shorthand for 'eq' operator)
|
|
1302
1322
|
fieldResults = await this.getIds(field, condition);
|
|
1303
1323
|
}
|
|
1304
1324
|
if (fieldResults.length > 0) {
|
|
@@ -1316,6 +1336,169 @@ export class MetadataIndexManager {
|
|
|
1316
1336
|
// Intersection of all field criteria (implicit AND)
|
|
1317
1337
|
return idSets.reduce((intersection, currentSet) => intersection.filter(id => currentSet.includes(id)));
|
|
1318
1338
|
}
|
|
1339
|
+
/**
|
|
1340
|
+
* Get filtered IDs sorted by a field (production-scale sorting)
|
|
1341
|
+
*
|
|
1342
|
+
* **Performance Characteristics** (designed for billions of entities):
|
|
1343
|
+
* - **Filtering**: O(log n) using roaring bitmaps with SIMD acceleration
|
|
1344
|
+
* - **Field Loading**: O(k) where k = filtered result count (NOT O(n))
|
|
1345
|
+
* - **Sorting**: O(k log k) in-memory (IDs + sort values only, NOT full entities)
|
|
1346
|
+
* - **Memory**: O(k) for k filtered results, independent of total entity count
|
|
1347
|
+
*
|
|
1348
|
+
* **Scalability**:
|
|
1349
|
+
* - Total entities: Billions (memory usage unaffected)
|
|
1350
|
+
* - Filtered set: Up to 10M (reasonable for in-memory sort of ID+value pairs)
|
|
1351
|
+
* - Pagination: Happens AFTER sorting, so only page entities are loaded
|
|
1352
|
+
*
|
|
1353
|
+
* **Example**:
|
|
1354
|
+
* ```typescript
|
|
1355
|
+
* // Production-scale: 1B entities, 100K match filter, sort by createdAt
|
|
1356
|
+
* const sortedIds = await metadataIndex.getSortedIdsForFilter(
|
|
1357
|
+
* { status: 'published', category: 'AI' },
|
|
1358
|
+
* 'createdAt',
|
|
1359
|
+
* 'desc'
|
|
1360
|
+
* )
|
|
1361
|
+
* // Returns: 100K sorted IDs
|
|
1362
|
+
* // Memory: ~5MB (100K IDs + 100K timestamps)
|
|
1363
|
+
* // Then caller paginates: sortedIds.slice(0, 20) and loads only 20 entities
|
|
1364
|
+
* ```
|
|
1365
|
+
*
|
|
1366
|
+
* @param filter - Metadata filter criteria (uses roaring bitmaps)
|
|
1367
|
+
* @param orderBy - Field name to sort by (e.g., 'createdAt', 'title')
|
|
1368
|
+
* @param order - Sort direction: 'asc' (default) or 'desc'
|
|
1369
|
+
* @returns Promise<string[]> - Entity IDs sorted by specified field
|
|
1370
|
+
*
|
|
1371
|
+
* @since v4.5.4
|
|
1372
|
+
*/
|
|
1373
|
+
async getSortedIdsForFilter(filter, orderBy, order = 'asc') {
|
|
1374
|
+
// 1. Get filtered IDs using existing roaring bitmap implementation (fast!)
|
|
1375
|
+
const filteredIds = await this.getIdsForFilter(filter);
|
|
1376
|
+
if (filteredIds.length === 0) {
|
|
1377
|
+
return [];
|
|
1378
|
+
}
|
|
1379
|
+
// 2. Load sort field values for filtered IDs ONLY
|
|
1380
|
+
// This is O(k) not O(n) where k = filtered count
|
|
1381
|
+
// We only load the ONE field needed for sorting, not full entities
|
|
1382
|
+
const idValuePairs = [];
|
|
1383
|
+
for (const id of filteredIds) {
|
|
1384
|
+
const value = await this.getFieldValueForEntity(id, orderBy);
|
|
1385
|
+
idValuePairs.push({ id, value });
|
|
1386
|
+
}
|
|
1387
|
+
// 3. Sort by value (in-memory BUT only IDs + sort values)
|
|
1388
|
+
// This is acceptable because we're sorting the FILTERED set, not all entities
|
|
1389
|
+
// Even 1M filtered results = ~50MB (IDs + values), manageable in-memory
|
|
1390
|
+
idValuePairs.sort((a, b) => {
|
|
1391
|
+
// Handle null/undefined (always sort to end)
|
|
1392
|
+
if (a.value == null && b.value == null)
|
|
1393
|
+
return 0;
|
|
1394
|
+
if (a.value == null)
|
|
1395
|
+
return order === 'asc' ? 1 : -1;
|
|
1396
|
+
if (b.value == null)
|
|
1397
|
+
return order === 'asc' ? -1 : 1;
|
|
1398
|
+
// Compare values
|
|
1399
|
+
if (a.value === b.value)
|
|
1400
|
+
return 0;
|
|
1401
|
+
const comparison = a.value < b.value ? -1 : 1;
|
|
1402
|
+
return order === 'asc' ? comparison : -comparison;
|
|
1403
|
+
});
|
|
1404
|
+
// 4. Return sorted IDs (caller handles pagination BEFORE loading entities)
|
|
1405
|
+
return idValuePairs.map(p => p.id);
|
|
1406
|
+
}
|
|
1407
|
+
/**
|
|
1408
|
+
* Get field value for a specific entity (helper for sorted queries)
|
|
1409
|
+
*
|
|
1410
|
+
* **IMPORTANT**: For timestamp fields (createdAt, updatedAt), this loads
|
|
1411
|
+
* the ACTUAL value from entity metadata, NOT the bucketed index value.
|
|
1412
|
+
* This is required because timestamp bucketing (1-minute precision) loses
|
|
1413
|
+
* precision needed for accurate sorting.
|
|
1414
|
+
*
|
|
1415
|
+
* For non-timestamp fields, loads from the chunked sparse index without
|
|
1416
|
+
* loading the full entity. This is critical for production-scale sorting.
|
|
1417
|
+
*
|
|
1418
|
+
* **Performance**:
|
|
1419
|
+
* - Timestamp fields: O(1) metadata load from storage (cached)
|
|
1420
|
+
* - Other fields: O(chunks) roaring bitmap lookup (typically 1-10 chunks)
|
|
1421
|
+
*
|
|
1422
|
+
* @param entityId - Entity UUID to get field value for
|
|
1423
|
+
* @param field - Field name to retrieve (e.g., 'createdAt', 'title')
|
|
1424
|
+
* @returns Promise<any> - Field value or undefined if not found
|
|
1425
|
+
*
|
|
1426
|
+
* @public (called from brainy.ts for sorted queries)
|
|
1427
|
+
* @since v4.5.4
|
|
1428
|
+
*/
|
|
1429
|
+
async getFieldValueForEntity(entityId, field) {
|
|
1430
|
+
// For timestamp fields, load ACTUAL value from entity metadata
|
|
1431
|
+
// (index has bucketed values which lose precision for sorting)
|
|
1432
|
+
if (field === 'createdAt' || field === 'updatedAt' || field === 'accessed' || field === 'modified') {
|
|
1433
|
+
try {
|
|
1434
|
+
const noun = await this.storage.getNoun(entityId);
|
|
1435
|
+
if (noun && noun.metadata) {
|
|
1436
|
+
return noun.metadata[field];
|
|
1437
|
+
}
|
|
1438
|
+
}
|
|
1439
|
+
catch (err) {
|
|
1440
|
+
// If metadata load fails, fall back to index (bucketed value)
|
|
1441
|
+
console.warn(`[MetadataIndex] Failed to load ${field} from metadata for ${entityId}, using bucketed value`);
|
|
1442
|
+
}
|
|
1443
|
+
}
|
|
1444
|
+
// For non-timestamp fields, use the sparse index (no bucketing issues)
|
|
1445
|
+
const intId = this.idMapper.getInt(entityId);
|
|
1446
|
+
if (intId === undefined) {
|
|
1447
|
+
return undefined;
|
|
1448
|
+
}
|
|
1449
|
+
// Load sparse index for this field (cached via UnifiedCache)
|
|
1450
|
+
const sparseIndex = await this.loadSparseIndex(field);
|
|
1451
|
+
if (!sparseIndex) {
|
|
1452
|
+
return undefined;
|
|
1453
|
+
}
|
|
1454
|
+
// Search through chunks to find which value this entity has
|
|
1455
|
+
// Typically 1-10 chunks per field, so this is fast
|
|
1456
|
+
for (const chunkId of sparseIndex.getAllChunkIds()) {
|
|
1457
|
+
const chunk = await this.chunkManager.loadChunk(field, chunkId);
|
|
1458
|
+
if (!chunk)
|
|
1459
|
+
continue;
|
|
1460
|
+
// Check each value's roaring bitmap for our entity ID
|
|
1461
|
+
// Roaring bitmap .has() is O(1) with SIMD optimization
|
|
1462
|
+
for (const [value, bitmap] of chunk.entries) {
|
|
1463
|
+
if (bitmap.has(intId)) {
|
|
1464
|
+
// Found it! Denormalize the value (no bucketing for non-timestamps)
|
|
1465
|
+
return this.denormalizeValue(value, field);
|
|
1466
|
+
}
|
|
1467
|
+
}
|
|
1468
|
+
}
|
|
1469
|
+
return undefined;
|
|
1470
|
+
}
|
|
1471
|
+
/**
|
|
1472
|
+
* Denormalize a value (reverse of normalizeValue)
|
|
1473
|
+
*
|
|
1474
|
+
* Converts normalized/stringified values back to their original type.
|
|
1475
|
+
* For most fields, this just parses numbers or returns strings as-is.
|
|
1476
|
+
*
|
|
1477
|
+
* **NOTE**: This is NOT used for timestamp sorting! Timestamp fields
|
|
1478
|
+
* (createdAt, updatedAt) are loaded directly from entity metadata by
|
|
1479
|
+
* getFieldValueForEntity() to avoid precision loss from bucketing.
|
|
1480
|
+
*
|
|
1481
|
+
* **Timestamp Bucketing (for range queries only)**:
|
|
1482
|
+
* - Indexed as: Math.floor(timestamp / 60000) * 60000
|
|
1483
|
+
* - Used for: Range queries (gte, lte) where 1-minute precision is acceptable
|
|
1484
|
+
* - NOT used for: Sorting (requires exact millisecond precision)
|
|
1485
|
+
*
|
|
1486
|
+
* @param normalized - Normalized value string from index
|
|
1487
|
+
* @param field - Field name (used for type inference)
|
|
1488
|
+
* @returns Denormalized value in original type
|
|
1489
|
+
*
|
|
1490
|
+
* @private
|
|
1491
|
+
* @since v4.5.4
|
|
1492
|
+
*/
|
|
1493
|
+
denormalizeValue(normalized, field) {
|
|
1494
|
+
// Try parsing as number (timestamps, integers, floats)
|
|
1495
|
+
const asNumber = Number(normalized);
|
|
1496
|
+
if (!isNaN(asNumber)) {
|
|
1497
|
+
return asNumber;
|
|
1498
|
+
}
|
|
1499
|
+
// For strings, return as-is (already denormalized)
|
|
1500
|
+
return normalized;
|
|
1501
|
+
}
|
|
1319
1502
|
/**
|
|
1320
1503
|
* DEPRECATED - Old implementation for backward compatibility
|
|
1321
1504
|
*/
|
|
@@ -106,10 +106,11 @@ export class VirtualFileSystem {
|
|
|
106
106
|
if (existing.length > 1) {
|
|
107
107
|
console.warn(`⚠️ Found ${existing.length} root entities! Using first one, consider cleanup.`);
|
|
108
108
|
// Sort by creation time - use oldest root (most likely to have children)
|
|
109
|
-
// v4.5.
|
|
109
|
+
// v4.5.3: FIX - createdAt is in entity object, not at Result level!
|
|
110
|
+
// brain.find() returns Result[], which has entity.createdAt, not top-level createdAt
|
|
110
111
|
existing.sort((a, b) => {
|
|
111
|
-
const aTime = a.createdAt || a.metadata?.modified || 0;
|
|
112
|
-
const bTime = b.createdAt || b.metadata?.modified || 0;
|
|
112
|
+
const aTime = a.entity?.createdAt || a.metadata?.modified || 0;
|
|
113
|
+
const bTime = b.entity?.createdAt || b.metadata?.modified || 0;
|
|
113
114
|
return aTime - bTime;
|
|
114
115
|
});
|
|
115
116
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.6.0",
|
|
4
4
|
"description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|