@soulcraft/brainy 2.10.0 → 2.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -10
- package/dist/augmentations/apiServerAugmentation.js +2 -2
- package/dist/augmentations/display/fieldPatterns.d.ts +1 -1
- package/dist/augmentations/display/fieldPatterns.js +1 -1
- package/dist/augmentations/display/intelligentComputation.d.ts +2 -2
- package/dist/augmentations/display/intelligentComputation.js +4 -4
- package/dist/augmentations/display/types.d.ts +1 -1
- package/dist/augmentations/neuralImport.js +4 -4
- package/dist/augmentations/synapseAugmentation.js +3 -3
- package/dist/augmentations/typeMatching/brainyTypes.d.ts +83 -0
- package/dist/augmentations/typeMatching/brainyTypes.js +425 -0
- package/dist/augmentations/universalDisplayAugmentation.d.ts +1 -1
- package/dist/augmentations/universalDisplayAugmentation.js +1 -1
- package/dist/brainyData.d.ts +15 -33
- package/dist/brainyData.js +1210 -1203
- package/dist/chat/BrainyChat.js +11 -11
- package/dist/examples/basicUsage.js +4 -1
- package/dist/importManager.js +2 -2
- package/dist/index.d.ts +3 -1
- package/dist/index.js +5 -1
- package/dist/neural/embeddedPatterns.d.ts +1 -1
- package/dist/neural/embeddedPatterns.js +2 -2
- package/dist/storage/adapters/fileSystemStorage.d.ts +2 -2
- package/dist/storage/adapters/fileSystemStorage.js +2 -2
- package/dist/storage/adapters/memoryStorage.d.ts +4 -4
- package/dist/storage/adapters/memoryStorage.js +4 -4
- package/dist/storage/adapters/opfsStorage.d.ts +2 -2
- package/dist/storage/adapters/opfsStorage.js +2 -2
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +2 -2
- package/dist/storage/adapters/s3CompatibleStorage.js +2 -2
- package/dist/storage/baseStorage.d.ts +12 -2
- package/dist/storage/baseStorage.js +32 -0
- package/dist/types/brainyDataInterface.d.ts +2 -5
- package/dist/utils/brainyTypes.d.ts +217 -0
- package/dist/utils/brainyTypes.js +261 -0
- package/dist/utils/typeValidation.d.ts +25 -0
- package/dist/utils/typeValidation.js +127 -0
- package/package.json +1 -1
package/dist/brainyData.js
CHANGED
|
@@ -12,6 +12,7 @@ import { enforceNodeVersion } from './utils/nodeVersionCheck.js';
|
|
|
12
12
|
import { createNamespacedMetadata, updateNamespacedMetadata, markDeleted, markRestored, isDeleted, getUserMetadata } from './utils/metadataNamespace.js';
|
|
13
13
|
import { PeriodicCleanup } from './utils/periodicCleanup.js';
|
|
14
14
|
import { NounType, VerbType } from './types/graphTypes.js';
|
|
15
|
+
import { validateNounType } from './utils/typeValidation.js';
|
|
15
16
|
import { createServerSearchAugmentations } from './augmentations/serverSearchAugmentations.js';
|
|
16
17
|
import { augmentationPipeline } from './augmentationPipeline.js';
|
|
17
18
|
import { prodLog } from './utils/logger.js';
|
|
@@ -127,7 +128,7 @@ export class BrainyData {
|
|
|
127
128
|
this.operationalMode = null;
|
|
128
129
|
this.domainDetector = null;
|
|
129
130
|
// Enforce Node.js version requirement for ONNX stability
|
|
130
|
-
if (typeof process !== 'undefined' && process.version) {
|
|
131
|
+
if (typeof process !== 'undefined' && process.version && !process.env.BRAINY_SKIP_VERSION_CHECK) {
|
|
131
132
|
enforceNodeVersion();
|
|
132
133
|
}
|
|
133
134
|
// Store raw config for processing in init()
|
|
@@ -1248,1317 +1249,1023 @@ export class BrainyData {
|
|
|
1248
1249
|
throw new Error(`Failed to connect to remote server: ${error}`);
|
|
1249
1250
|
}
|
|
1250
1251
|
}
|
|
1252
|
+
// REMOVED: addItem() - Use addNoun() instead (cleaner 2.0 API)
|
|
1253
|
+
// REMOVED: addToBoth() - Remote server functionality moved to post-2.0.0
|
|
1251
1254
|
/**
|
|
1252
|
-
* Add
|
|
1253
|
-
*
|
|
1254
|
-
* @param
|
|
1255
|
-
* @param metadata Optional metadata to associate with the
|
|
1256
|
-
* @
|
|
1257
|
-
* @
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1255
|
+
* Add a vector to the remote server
|
|
1256
|
+
* @param id ID of the vector to add
|
|
1257
|
+
* @param vector Vector to add
|
|
1258
|
+
* @param metadata Optional metadata to associate with the vector
|
|
1259
|
+
* @returns True if successful, false otherwise
|
|
1260
|
+
* @private
|
|
1261
|
+
*/
|
|
1262
|
+
async addToRemote(id, vector, metadata) {
|
|
1263
|
+
if (!this.isConnectedToRemoteServer()) {
|
|
1264
|
+
return false;
|
|
1265
|
+
}
|
|
1266
|
+
try {
|
|
1267
|
+
// TODO: Remote server operations (post-2.0.0 feature)
|
|
1268
|
+
// if (!this.serverSearchConduit || !this.serverConnection) {
|
|
1269
|
+
// throw new Error(
|
|
1270
|
+
// 'Server search conduit or connection is not initialized'
|
|
1271
|
+
// )
|
|
1272
|
+
// }
|
|
1273
|
+
// TODO: Add to remote server
|
|
1274
|
+
// const addResult = await this.serverSearchConduit.addToBoth(
|
|
1275
|
+
// this.serverConnection.connectionId,
|
|
1276
|
+
// vector,
|
|
1277
|
+
// metadata
|
|
1278
|
+
// )
|
|
1279
|
+
throw new Error('Remote server functionality not yet implemented in Brainy 2.0.0');
|
|
1280
|
+
// TODO: Handle remote add result (post-2.0.0 feature)
|
|
1281
|
+
// if (!addResult.success) {
|
|
1282
|
+
// throw new Error(`Remote add failed: ${addResult.error}`)
|
|
1283
|
+
// }
|
|
1284
|
+
return true;
|
|
1285
|
+
}
|
|
1286
|
+
catch (error) {
|
|
1287
|
+
console.error('Failed to add to remote server:', error);
|
|
1288
|
+
throw new Error(`Failed to add to remote server: ${error}`);
|
|
1289
|
+
}
|
|
1290
|
+
}
|
|
1291
|
+
/**
|
|
1292
|
+
* Add multiple vectors or data items to the database
|
|
1293
|
+
* @param items Array of items to add
|
|
1294
|
+
* @param options Additional options
|
|
1295
|
+
* @returns Array of IDs for the added items
|
|
1296
|
+
*/
|
|
1297
|
+
/**
|
|
1298
|
+
* Add multiple nouns in batch with required types
|
|
1299
|
+
* @param items Array of nouns to add (all must have types)
|
|
1300
|
+
* @param options Batch processing options
|
|
1301
|
+
* @returns Array of generated IDs
|
|
1270
1302
|
*/
|
|
1271
|
-
async
|
|
1303
|
+
async addNouns(items, options = {}) {
|
|
1272
1304
|
await this.ensureInitialized();
|
|
1273
1305
|
// Check if database is in read-only mode
|
|
1274
1306
|
this.checkReadOnly();
|
|
1275
|
-
// Validate
|
|
1276
|
-
|
|
1277
|
-
|
|
1307
|
+
// Validate all types upfront for better error handling
|
|
1308
|
+
const invalidItems = [];
|
|
1309
|
+
items.forEach((item, index) => {
|
|
1310
|
+
if (!item.nounType || typeof item.nounType !== 'string') {
|
|
1311
|
+
invalidItems.push(index);
|
|
1312
|
+
}
|
|
1313
|
+
else {
|
|
1314
|
+
// Validate the type is valid
|
|
1315
|
+
try {
|
|
1316
|
+
validateNounType(item.nounType);
|
|
1317
|
+
}
|
|
1318
|
+
catch (error) {
|
|
1319
|
+
invalidItems.push(index);
|
|
1320
|
+
}
|
|
1321
|
+
}
|
|
1322
|
+
});
|
|
1323
|
+
if (invalidItems.length > 0) {
|
|
1324
|
+
throw new Error(`Type validation failed for ${invalidItems.length} items at indices: ${invalidItems.slice(0, 5).join(', ')}${invalidItems.length > 5 ? '...' : ''}\n` +
|
|
1325
|
+
'All items must have valid noun types.\n' +
|
|
1326
|
+
'Example: { vectorOrData: "data", nounType: NounType.Content, metadata: {...} }');
|
|
1278
1327
|
}
|
|
1328
|
+
// Default concurrency to 4 if not specified
|
|
1329
|
+
const concurrency = options.concurrency || 4;
|
|
1330
|
+
// Default batch size to 50 if not specified
|
|
1331
|
+
const batchSize = options.batchSize || 50;
|
|
1279
1332
|
try {
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1333
|
+
// Process items in batches to control concurrency and memory usage
|
|
1334
|
+
const ids = [];
|
|
1335
|
+
const itemsToProcess = [...items]; // Create a copy to avoid modifying the original array
|
|
1336
|
+
while (itemsToProcess.length > 0) {
|
|
1337
|
+
// Take up to 'batchSize' items to process in a batch
|
|
1338
|
+
const batch = itemsToProcess.splice(0, batchSize);
|
|
1339
|
+
// Separate items that are already vectors from those that need embedding
|
|
1340
|
+
const vectorItems = [];
|
|
1341
|
+
const textItems = [];
|
|
1342
|
+
// Categorize items
|
|
1343
|
+
batch.forEach((item, index) => {
|
|
1344
|
+
if (Array.isArray(item.vectorOrData) &&
|
|
1345
|
+
item.vectorOrData.every((val) => typeof val === 'number') &&
|
|
1346
|
+
!options.forceEmbed) {
|
|
1347
|
+
// Item is already a vector
|
|
1348
|
+
vectorItems.push({
|
|
1349
|
+
vectorOrData: item.vectorOrData,
|
|
1350
|
+
nounType: item.nounType,
|
|
1351
|
+
metadata: item.metadata,
|
|
1352
|
+
index
|
|
1353
|
+
});
|
|
1354
|
+
}
|
|
1355
|
+
else if (typeof item.vectorOrData === 'string') {
|
|
1356
|
+
// Item is text that needs embedding
|
|
1357
|
+
textItems.push({
|
|
1358
|
+
text: item.vectorOrData,
|
|
1359
|
+
nounType: item.nounType,
|
|
1360
|
+
metadata: item.metadata,
|
|
1361
|
+
index
|
|
1362
|
+
});
|
|
1363
|
+
}
|
|
1364
|
+
else {
|
|
1365
|
+
// For now, treat other types as text
|
|
1366
|
+
// In a more complete implementation, we might handle other types differently
|
|
1367
|
+
const textRepresentation = String(item.vectorOrData);
|
|
1368
|
+
textItems.push({
|
|
1369
|
+
text: textRepresentation,
|
|
1370
|
+
nounType: item.nounType,
|
|
1371
|
+
metadata: item.metadata,
|
|
1372
|
+
index
|
|
1373
|
+
});
|
|
1286
1374
|
}
|
|
1375
|
+
});
|
|
1376
|
+
// Process vector items (already embedded)
|
|
1377
|
+
const vectorPromises = vectorItems.map((item) => this.addNoun(item.vectorOrData, item.nounType, item.metadata));
|
|
1378
|
+
// Process text items in a single batch embedding operation
|
|
1379
|
+
let textPromises = [];
|
|
1380
|
+
if (textItems.length > 0) {
|
|
1381
|
+
// Extract just the text for batch embedding
|
|
1382
|
+
const texts = textItems.map((item) => item.text);
|
|
1383
|
+
// Perform batch embedding
|
|
1384
|
+
const embeddings = await batchEmbed(texts);
|
|
1385
|
+
// Add each item with its embedding
|
|
1386
|
+
textPromises = textItems.map((item, i) => this.addNoun(embeddings[i], item.nounType, item.metadata));
|
|
1287
1387
|
}
|
|
1388
|
+
// Combine all promises
|
|
1389
|
+
const batchResults = await Promise.all([
|
|
1390
|
+
...vectorPromises,
|
|
1391
|
+
...textPromises
|
|
1392
|
+
]);
|
|
1393
|
+
// Add the results to our ids array
|
|
1394
|
+
ids.push(...batchResults);
|
|
1288
1395
|
}
|
|
1396
|
+
return ids;
|
|
1397
|
+
}
|
|
1398
|
+
catch (error) {
|
|
1399
|
+
console.error('Failed to add batch of items:', error);
|
|
1400
|
+
throw new Error(`Failed to add batch of items: ${error}`);
|
|
1401
|
+
}
|
|
1402
|
+
}
|
|
1403
|
+
/**
|
|
1404
|
+
* Add multiple vectors or data items to both local and remote databases
|
|
1405
|
+
* @param items Array of items to add (with required types)
|
|
1406
|
+
* @param options Additional options
|
|
1407
|
+
* @returns Array of IDs for the added items
|
|
1408
|
+
*/
|
|
1409
|
+
async addBatchToBoth(items, options = {}) {
|
|
1410
|
+
// Check if connected to a remote server
|
|
1411
|
+
if (!this.isConnectedToRemoteServer()) {
|
|
1412
|
+
throw new Error('Not connected to a remote server. Call connectToRemoteServer() first.');
|
|
1413
|
+
}
|
|
1414
|
+
// Add to local with addToRemote option
|
|
1415
|
+
return this.addNouns(items, { ...options, addToRemote: true });
|
|
1416
|
+
}
|
|
1417
|
+
/**
|
|
1418
|
+
* Filter search results by service
|
|
1419
|
+
* @param results Search results to filter
|
|
1420
|
+
* @param service Service to filter by
|
|
1421
|
+
* @returns Filtered search results
|
|
1422
|
+
* @private
|
|
1423
|
+
*/
|
|
1424
|
+
filterResultsByService(results, service) {
|
|
1425
|
+
if (!service)
|
|
1426
|
+
return results;
|
|
1427
|
+
return results.filter((result) => {
|
|
1428
|
+
if (!result.metadata || typeof result.metadata !== 'object')
|
|
1429
|
+
return false;
|
|
1430
|
+
if (!('createdBy' in result.metadata))
|
|
1431
|
+
return false;
|
|
1432
|
+
const createdBy = result.metadata.createdBy;
|
|
1433
|
+
if (!createdBy)
|
|
1434
|
+
return false;
|
|
1435
|
+
return createdBy.augmentation === service;
|
|
1436
|
+
});
|
|
1437
|
+
}
|
|
1438
|
+
/**
|
|
1439
|
+
* Search for similar vectors within specific noun types
|
|
1440
|
+
* @param queryVectorOrData Query vector or data to search for
|
|
1441
|
+
* @param k Number of results to return
|
|
1442
|
+
* @param nounTypes Array of noun types to search within, or null to search all
|
|
1443
|
+
* @param options Additional options
|
|
1444
|
+
* @returns Array of search results
|
|
1445
|
+
*/
|
|
1446
|
+
/**
|
|
1447
|
+
* @deprecated Use search() with nounTypes option instead
|
|
1448
|
+
* @example
|
|
1449
|
+
* // Old way (deprecated)
|
|
1450
|
+
* await brain.searchByNounTypes(query, 10, ['type1', 'type2'])
|
|
1451
|
+
* // New way
|
|
1452
|
+
* await brain.search(query, { limit: 10, nounTypes: ['type1', 'type2'] })
|
|
1453
|
+
*/
|
|
1454
|
+
async searchByNounTypes(queryVectorOrData, k = 10, nounTypes = null, options = {}) {
|
|
1455
|
+
// Helper function to filter results by service
|
|
1456
|
+
const filterByService = (metadata) => {
|
|
1457
|
+
if (!options.service)
|
|
1458
|
+
return true; // No filter, include all
|
|
1459
|
+
// Check if metadata has createdBy field with matching service
|
|
1460
|
+
if (!metadata || typeof metadata !== 'object')
|
|
1461
|
+
return false;
|
|
1462
|
+
if (!('createdBy' in metadata))
|
|
1463
|
+
return false;
|
|
1464
|
+
const createdBy = metadata.createdBy;
|
|
1465
|
+
if (!createdBy)
|
|
1466
|
+
return false;
|
|
1467
|
+
return createdBy.augmentation === options.service;
|
|
1468
|
+
};
|
|
1469
|
+
if (!this.isInitialized) {
|
|
1470
|
+
throw new Error('BrainyData must be initialized before searching. Call init() first.');
|
|
1471
|
+
}
|
|
1472
|
+
// Check if database is in write-only mode
|
|
1473
|
+
this.checkWriteOnly();
|
|
1474
|
+
try {
|
|
1475
|
+
let queryVector;
|
|
1289
1476
|
// Check if input is already a vector
|
|
1290
|
-
if (Array.isArray(
|
|
1291
|
-
|
|
1292
|
-
|
|
1477
|
+
if (Array.isArray(queryVectorOrData) &&
|
|
1478
|
+
queryVectorOrData.every((item) => typeof item === 'number') &&
|
|
1479
|
+
!options.forceEmbed) {
|
|
1480
|
+
// Input is already a vector
|
|
1481
|
+
queryVector = queryVectorOrData;
|
|
1293
1482
|
}
|
|
1294
1483
|
else {
|
|
1295
1484
|
// Input needs to be vectorized
|
|
1296
1485
|
try {
|
|
1297
|
-
|
|
1298
|
-
if (typeof vectorOrData === 'object' &&
|
|
1299
|
-
vectorOrData !== null &&
|
|
1300
|
-
!Array.isArray(vectorOrData)) {
|
|
1301
|
-
// Process JSON object for better vectorization
|
|
1302
|
-
const preparedText = prepareJsonForVectorization(vectorOrData, {
|
|
1303
|
-
// Prioritize common name/title fields if they exist
|
|
1304
|
-
priorityFields: [
|
|
1305
|
-
'name',
|
|
1306
|
-
'title',
|
|
1307
|
-
'company',
|
|
1308
|
-
'organization',
|
|
1309
|
-
'description',
|
|
1310
|
-
'summary'
|
|
1311
|
-
]
|
|
1312
|
-
});
|
|
1313
|
-
vector = await this.embeddingFunction(preparedText);
|
|
1314
|
-
// IMPORTANT: When an object is passed as data and no metadata is provided,
|
|
1315
|
-
// use the object AS the metadata too. This is expected behavior for the API.
|
|
1316
|
-
// Users can pass either:
|
|
1317
|
-
// 1. addNoun(string, metadata) - vectorize string, store metadata
|
|
1318
|
-
// 2. addNoun(object) - vectorize object text, store object as metadata
|
|
1319
|
-
// 3. addNoun(object, metadata) - vectorize object text, store provided metadata
|
|
1320
|
-
if (!metadata) {
|
|
1321
|
-
metadata = vectorOrData;
|
|
1322
|
-
}
|
|
1323
|
-
// Track field names for this JSON document
|
|
1324
|
-
const service = this.getServiceName(options);
|
|
1325
|
-
if (this.storage) {
|
|
1326
|
-
await this.storage.trackFieldNames(vectorOrData, service);
|
|
1327
|
-
}
|
|
1328
|
-
}
|
|
1329
|
-
else {
|
|
1330
|
-
// Use standard embedding for non-JSON data
|
|
1331
|
-
vector = await this.embeddingFunction(vectorOrData);
|
|
1332
|
-
}
|
|
1486
|
+
queryVector = await this.embeddingFunction(queryVectorOrData);
|
|
1333
1487
|
}
|
|
1334
1488
|
catch (embedError) {
|
|
1335
|
-
throw new Error(`Failed to vectorize data: ${embedError}`);
|
|
1489
|
+
throw new Error(`Failed to vectorize query data: ${embedError}`);
|
|
1336
1490
|
}
|
|
1337
1491
|
}
|
|
1338
|
-
// Check if vector is defined
|
|
1339
|
-
if (!
|
|
1340
|
-
throw new Error('
|
|
1492
|
+
// Check if query vector is defined
|
|
1493
|
+
if (!queryVector) {
|
|
1494
|
+
throw new Error('Query vector is undefined or null');
|
|
1341
1495
|
}
|
|
1342
|
-
//
|
|
1343
|
-
if (
|
|
1344
|
-
throw new Error(`
|
|
1496
|
+
// Check if query vector dimensions match the expected dimensions
|
|
1497
|
+
if (queryVector.length !== this._dimensions) {
|
|
1498
|
+
throw new Error(`Query vector dimension mismatch: expected ${this._dimensions}, got ${queryVector.length}`);
|
|
1345
1499
|
}
|
|
1346
|
-
//
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
if (options.id) {
|
|
1354
|
-
try {
|
|
1355
|
-
if (this.writeOnly) {
|
|
1356
|
-
// In write-only mode, check storage directly
|
|
1357
|
-
existingNoun =
|
|
1358
|
-
(await this.storage.getNoun(options.id)) ?? undefined;
|
|
1500
|
+
// If no noun types specified, search all nouns
|
|
1501
|
+
if (!nounTypes || nounTypes.length === 0) {
|
|
1502
|
+
// Check if we're in readonly mode with lazy loading and the index is empty
|
|
1503
|
+
const indexSize = this.index.getNouns().size;
|
|
1504
|
+
if (this.readOnly && this.lazyLoadInReadOnlyMode && indexSize === 0) {
|
|
1505
|
+
if (this.loggingConfig?.verbose) {
|
|
1506
|
+
console.log('Lazy loading mode: Index is empty, loading nodes for search...');
|
|
1359
1507
|
}
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1508
|
+
// In lazy loading mode, we need to load some nodes to search
|
|
1509
|
+
// Instead of loading all nodes, we'll load a subset of nodes
|
|
1510
|
+
// Load a limited number of nodes from storage using pagination
|
|
1511
|
+
const result = await this.storage.getNouns({
|
|
1512
|
+
pagination: { offset: 0, limit: k * 10 } // Get 10x more nodes than needed
|
|
1513
|
+
});
|
|
1514
|
+
const limitedNouns = result.items;
|
|
1515
|
+
// Add these nodes to the index
|
|
1516
|
+
for (const node of limitedNouns) {
|
|
1517
|
+
// Check if the vector dimensions match the expected dimensions
|
|
1518
|
+
if (node.vector.length !== this._dimensions) {
|
|
1519
|
+
console.warn(`Skipping node ${node.id} due to dimension mismatch: expected ${this._dimensions}, got ${node.vector.length}`);
|
|
1520
|
+
continue;
|
|
1366
1521
|
}
|
|
1522
|
+
// Add to index
|
|
1523
|
+
await this.index.addItem({
|
|
1524
|
+
id: node.id,
|
|
1525
|
+
vector: node.vector
|
|
1526
|
+
});
|
|
1367
1527
|
}
|
|
1368
|
-
if (
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1528
|
+
if (this.loggingConfig?.verbose) {
|
|
1529
|
+
console.log(`Lazy loading mode: Added ${limitedNouns.length} nodes to index for search`);
|
|
1530
|
+
}
|
|
1531
|
+
}
|
|
1532
|
+
// Create filter function for HNSW search with metadata index optimization
|
|
1533
|
+
const hasMetadataFilter = options.metadata && Object.keys(options.metadata).length > 0;
|
|
1534
|
+
const hasServiceFilter = !!options.service;
|
|
1535
|
+
let filterFunction;
|
|
1536
|
+
let preFilteredIds;
|
|
1537
|
+
// Use metadata index for pre-filtering if available
|
|
1538
|
+
if (hasMetadataFilter && this.metadataIndex) {
|
|
1539
|
+
try {
|
|
1540
|
+
// Ensure metadata index is up to date
|
|
1541
|
+
await this.metadataIndex?.flush?.();
|
|
1542
|
+
// Get candidate IDs from metadata index
|
|
1543
|
+
const candidateIds = await this.metadataIndex?.getIdsForFilter?.(options.metadata) || [];
|
|
1544
|
+
if (candidateIds.length > 0) {
|
|
1545
|
+
preFilteredIds = new Set(candidateIds);
|
|
1546
|
+
// Create a simple filter function that just checks the pre-filtered set
|
|
1547
|
+
filterFunction = async (id) => {
|
|
1548
|
+
if (!preFilteredIds.has(id))
|
|
1549
|
+
return false;
|
|
1550
|
+
// Still apply service filter if needed
|
|
1551
|
+
if (hasServiceFilter) {
|
|
1552
|
+
const metadata = await this.storage.getMetadata(id);
|
|
1553
|
+
const noun = this.index.getNouns().get(id);
|
|
1554
|
+
if (!noun || !metadata)
|
|
1555
|
+
return false;
|
|
1556
|
+
const result = { id, score: 0, vector: noun.vector, metadata };
|
|
1557
|
+
return this.filterResultsByService([result], options.service).length > 0;
|
|
1558
|
+
}
|
|
1559
|
+
return true;
|
|
1560
|
+
};
|
|
1379
1561
|
}
|
|
1380
1562
|
else {
|
|
1381
|
-
//
|
|
1382
|
-
|
|
1383
|
-
console.log(`Updating existing noun ${options.id}`);
|
|
1384
|
-
}
|
|
1563
|
+
// No items match the metadata criteria, return empty results immediately
|
|
1564
|
+
return [];
|
|
1385
1565
|
}
|
|
1386
1566
|
}
|
|
1567
|
+
catch (indexError) {
|
|
1568
|
+
console.warn('Metadata index error, falling back to full filtering:', indexError);
|
|
1569
|
+
// Fall back to full metadata filtering below
|
|
1570
|
+
}
|
|
1387
1571
|
}
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
// Create noun object directly without adding to index
|
|
1396
|
-
noun = {
|
|
1397
|
-
id,
|
|
1398
|
-
vector,
|
|
1399
|
-
connections: new Map(),
|
|
1400
|
-
level: 0, // Default level for new nodes
|
|
1401
|
-
metadata: undefined // Will be set separately
|
|
1402
|
-
};
|
|
1403
|
-
}
|
|
1404
|
-
else {
|
|
1405
|
-
// Normal mode: Add to HNSW index first
|
|
1406
|
-
await this.hnswIndex.addItem({ id, vector, metadata });
|
|
1407
|
-
// Get the noun from the HNSW index
|
|
1408
|
-
const indexNoun = this.hnswIndex.getNouns().get(id);
|
|
1409
|
-
if (!indexNoun) {
|
|
1410
|
-
throw new Error(`Failed to retrieve newly created noun with ID ${id}`);
|
|
1411
|
-
}
|
|
1412
|
-
noun = indexNoun;
|
|
1413
|
-
}
|
|
1414
|
-
// Save noun to storage using augmentation system
|
|
1415
|
-
await this.augmentations.execute('saveNoun', { noun, options }, async () => {
|
|
1416
|
-
await this.storage.saveNoun(noun);
|
|
1417
|
-
const service = this.getServiceName(options);
|
|
1418
|
-
await this.storage.incrementStatistic('noun', service);
|
|
1419
|
-
});
|
|
1420
|
-
// Save metadata if provided and not empty
|
|
1421
|
-
if (metadata !== undefined) {
|
|
1422
|
-
// Skip saving if metadata is an empty object
|
|
1423
|
-
if (metadata &&
|
|
1424
|
-
typeof metadata === 'object' &&
|
|
1425
|
-
Object.keys(metadata).length === 0) {
|
|
1426
|
-
// Don't save empty metadata
|
|
1427
|
-
// Explicitly save null to ensure no metadata is stored
|
|
1428
|
-
await this.storage.saveMetadata(id, null);
|
|
1429
|
-
}
|
|
1430
|
-
else {
|
|
1431
|
-
// Validate noun type if metadata is for a GraphNoun
|
|
1432
|
-
if (metadata && typeof metadata === 'object' && 'noun' in metadata) {
|
|
1433
|
-
const nounType = metadata.noun;
|
|
1434
|
-
// Check if the noun type is valid
|
|
1435
|
-
const isValidNounType = Object.values(NounType).includes(nounType);
|
|
1436
|
-
if (!isValidNounType) {
|
|
1437
|
-
console.warn(`Invalid noun type: ${nounType}. Falling back to GraphNoun.`);
|
|
1438
|
-
metadata.noun = NounType.Concept;
|
|
1439
|
-
}
|
|
1440
|
-
// Ensure createdBy field is populated for GraphNoun
|
|
1441
|
-
const service = options.service || this.getCurrentAugmentation();
|
|
1442
|
-
const graphNoun = metadata;
|
|
1443
|
-
// Only set createdBy if it doesn't exist or is being explicitly updated
|
|
1444
|
-
if (!graphNoun.createdBy || options.service) {
|
|
1445
|
-
graphNoun.createdBy = getAugmentationVersion(service);
|
|
1446
|
-
}
|
|
1447
|
-
// Update timestamps
|
|
1448
|
-
const now = new Date();
|
|
1449
|
-
const timestamp = {
|
|
1450
|
-
seconds: Math.floor(now.getTime() / 1000),
|
|
1451
|
-
nanoseconds: (now.getTime() % 1000) * 1000000
|
|
1452
|
-
};
|
|
1453
|
-
// Set createdAt if it doesn't exist
|
|
1454
|
-
if (!graphNoun.createdAt) {
|
|
1455
|
-
graphNoun.createdAt = timestamp;
|
|
1572
|
+
// Fallback to full metadata filtering if index wasn't used
|
|
1573
|
+
if (!filterFunction && (hasMetadataFilter || hasServiceFilter)) {
|
|
1574
|
+
filterFunction = async (id) => {
|
|
1575
|
+
// Get metadata for filtering
|
|
1576
|
+
let metadata = await this.storage.getMetadata(id);
|
|
1577
|
+
if (metadata === null) {
|
|
1578
|
+
metadata = {};
|
|
1456
1579
|
}
|
|
1457
|
-
//
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
// Add domain metadata if distributed mode is enabled
|
|
1463
|
-
if (this.domainDetector) {
|
|
1464
|
-
// First check if domain is already in metadata
|
|
1465
|
-
if (metadataToSave.domain) {
|
|
1466
|
-
// Domain already specified, keep it
|
|
1467
|
-
const domainInfo = this.domainDetector.detectDomain(metadataToSave);
|
|
1468
|
-
if (domainInfo.domainMetadata) {
|
|
1469
|
-
;
|
|
1470
|
-
metadataToSave.domainMetadata =
|
|
1471
|
-
domainInfo.domainMetadata;
|
|
1580
|
+
// Apply metadata filter
|
|
1581
|
+
if (hasMetadataFilter) {
|
|
1582
|
+
const matches = matchesMetadataFilter(metadata, options.metadata);
|
|
1583
|
+
if (!matches) {
|
|
1584
|
+
return false;
|
|
1472
1585
|
}
|
|
1473
1586
|
}
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
const
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
const
|
|
1480
|
-
if (
|
|
1481
|
-
;
|
|
1482
|
-
metadataToSave.domain = domainInfo.domain;
|
|
1483
|
-
if (domainInfo.domainMetadata) {
|
|
1484
|
-
;
|
|
1485
|
-
metadataToSave.domainMetadata =
|
|
1486
|
-
domainInfo.domainMetadata;
|
|
1487
|
-
}
|
|
1587
|
+
// Apply service filter
|
|
1588
|
+
if (hasServiceFilter) {
|
|
1589
|
+
const noun = this.index.getNouns().get(id);
|
|
1590
|
+
if (!noun)
|
|
1591
|
+
return false;
|
|
1592
|
+
const result = { id, score: 0, vector: noun.vector, metadata };
|
|
1593
|
+
if (!this.filterResultsByService([result], options.service).length) {
|
|
1594
|
+
return false;
|
|
1488
1595
|
}
|
|
1489
1596
|
}
|
|
1597
|
+
return true;
|
|
1598
|
+
};
|
|
1599
|
+
}
|
|
1600
|
+
// When using offset, we need to fetch more results and then slice
|
|
1601
|
+
const offset = options.offset || 0;
|
|
1602
|
+
const totalNeeded = k + offset;
|
|
1603
|
+
// Search in the index with filter
|
|
1604
|
+
const results = await this.index.search(queryVector, totalNeeded, filterFunction);
|
|
1605
|
+
// Skip the offset number of results
|
|
1606
|
+
const paginatedResults = results.slice(offset, offset + k);
|
|
1607
|
+
// Get metadata for each result
|
|
1608
|
+
const searchResults = [];
|
|
1609
|
+
for (const [id, score] of paginatedResults) {
|
|
1610
|
+
const noun = this.index.getNouns().get(id);
|
|
1611
|
+
if (!noun) {
|
|
1612
|
+
continue;
|
|
1490
1613
|
}
|
|
1491
|
-
|
|
1492
|
-
if
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
}
|
|
1496
|
-
await this.storage.saveMetadata(id, metadataToSave);
|
|
1497
|
-
// Update metadata index (write-only mode should build indices!)
|
|
1498
|
-
if (this.index && !this.frozen) {
|
|
1499
|
-
await this.metadataIndex?.addToIndex?.(id, metadataToSave);
|
|
1500
|
-
}
|
|
1501
|
-
// Track metadata statistics
|
|
1502
|
-
const metadataService = this.getServiceName(options);
|
|
1503
|
-
await this.storage.incrementStatistic('metadata', metadataService);
|
|
1504
|
-
// Track content type if it's a GraphNoun
|
|
1505
|
-
if (metadataToSave &&
|
|
1506
|
-
typeof metadataToSave === 'object' &&
|
|
1507
|
-
'noun' in metadataToSave) {
|
|
1508
|
-
this.metrics.trackContentType(metadataToSave.noun);
|
|
1614
|
+
let metadata = await this.storage.getMetadata(id);
|
|
1615
|
+
// Initialize metadata to an empty object if it's null
|
|
1616
|
+
if (metadata === null) {
|
|
1617
|
+
metadata = {};
|
|
1509
1618
|
}
|
|
1510
|
-
//
|
|
1619
|
+
// Preserve original metadata without overwriting user's custom fields
|
|
1620
|
+
// The search result already has Brainy's UUID in the main 'id' field
|
|
1621
|
+
searchResults.push({
|
|
1622
|
+
id,
|
|
1623
|
+
score: 1 - score, // Convert distance to similarity (higher = more similar)
|
|
1624
|
+
vector: noun.vector,
|
|
1625
|
+
metadata: metadata
|
|
1626
|
+
});
|
|
1511
1627
|
}
|
|
1628
|
+
return searchResults;
|
|
1512
1629
|
}
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
const
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
// If addToRemote is true and we're connected to a remote server, add to remote as well
|
|
1522
|
-
if (options.addToRemote && this.isConnectedToRemoteServer()) {
|
|
1523
|
-
try {
|
|
1524
|
-
await this.addToRemote(id, vector, metadata);
|
|
1630
|
+
else {
|
|
1631
|
+
// Get nouns for each noun type in parallel
|
|
1632
|
+
const nounPromises = nounTypes.map((nounType) => this.storage.getNounsByNounType(nounType));
|
|
1633
|
+
const nounArrays = await Promise.all(nounPromises);
|
|
1634
|
+
// Combine all nouns
|
|
1635
|
+
const nouns = [];
|
|
1636
|
+
for (const nounArray of nounArrays) {
|
|
1637
|
+
nouns.push(...nounArray);
|
|
1525
1638
|
}
|
|
1526
|
-
|
|
1527
|
-
|
|
1639
|
+
// Calculate distances for each noun
|
|
1640
|
+
const results = [];
|
|
1641
|
+
for (const noun of nouns) {
|
|
1642
|
+
const distance = this.index.getDistanceFunction()(queryVector, noun.vector);
|
|
1643
|
+
results.push([noun.id, distance]);
|
|
1528
1644
|
}
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
|
|
1540
|
-
shouldProcessNeurally = this.shouldAutoProcessNeurally(vectorOrData, metadata);
|
|
1541
|
-
}
|
|
1542
|
-
// 'literal' mode means no neural processing
|
|
1543
|
-
// 🧠 AI Processing (Neural Import) - Based on processing mode
|
|
1544
|
-
if (shouldProcessNeurally) {
|
|
1545
|
-
try {
|
|
1546
|
-
// Execute augmentation pipeline for data processing
|
|
1547
|
-
// Note: Augmentations will be called via this.augmentations.execute during the actual add operation
|
|
1548
|
-
// This replaces the legacy SENSE pipeline
|
|
1549
|
-
if (this.loggingConfig?.verbose) {
|
|
1550
|
-
console.log(`🧠 AI processing completed for data: ${id}`);
|
|
1645
|
+
// Sort by distance (ascending)
|
|
1646
|
+
results.sort((a, b) => a[1] - b[1]);
|
|
1647
|
+
// Apply offset and take k results
|
|
1648
|
+
const offset = options.offset || 0;
|
|
1649
|
+
const topResults = results.slice(offset, offset + k);
|
|
1650
|
+
// Get metadata for each result
|
|
1651
|
+
const searchResults = [];
|
|
1652
|
+
for (const [id, score] of topResults) {
|
|
1653
|
+
const noun = nouns.find((n) => n.id === id);
|
|
1654
|
+
if (!noun) {
|
|
1655
|
+
continue;
|
|
1551
1656
|
}
|
|
1657
|
+
let metadata = await this.storage.getMetadata(id);
|
|
1658
|
+
// Initialize metadata to an empty object if it's null
|
|
1659
|
+
if (metadata === null) {
|
|
1660
|
+
metadata = {};
|
|
1661
|
+
}
|
|
1662
|
+
// Preserve original metadata without overwriting user's custom fields
|
|
1663
|
+
// The search result already has Brainy's UUID in the main 'id' field
|
|
1664
|
+
searchResults.push({
|
|
1665
|
+
id,
|
|
1666
|
+
score: 1 - score, // Convert distance to similarity (higher = more similar)
|
|
1667
|
+
vector: noun.vector,
|
|
1668
|
+
metadata: metadata
|
|
1669
|
+
});
|
|
1552
1670
|
}
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
console.warn(`🧠 AI processing failed for ${id}:`, processingError);
|
|
1556
|
-
}
|
|
1671
|
+
// Results are already filtered, just return them
|
|
1672
|
+
return searchResults;
|
|
1557
1673
|
}
|
|
1558
|
-
return id;
|
|
1559
1674
|
}
|
|
1560
1675
|
catch (error) {
|
|
1561
|
-
console.error('Failed to
|
|
1562
|
-
|
|
1563
|
-
if (this.monitoring) {
|
|
1564
|
-
this.monitoring.recordRequest(0, true);
|
|
1565
|
-
}
|
|
1566
|
-
throw new Error(`Failed to add vector: ${error}`);
|
|
1676
|
+
console.error('Failed to search vectors by noun types:', error);
|
|
1677
|
+
throw new Error(`Failed to search vectors by noun types: ${error}`);
|
|
1567
1678
|
}
|
|
1568
1679
|
}
|
|
1569
|
-
// REMOVED: addItem() - Use addNoun() instead (cleaner 2.0 API)
|
|
1570
|
-
// REMOVED: addToBoth() - Remote server functionality moved to post-2.0.0
|
|
1571
1680
|
/**
|
|
1572
|
-
*
|
|
1573
|
-
* @param
|
|
1574
|
-
* @param
|
|
1575
|
-
* @param
|
|
1576
|
-
* @returns
|
|
1577
|
-
* @private
|
|
1681
|
+
* Search for similar vectors
|
|
1682
|
+
* @param queryVectorOrData Query vector or data to search for
|
|
1683
|
+
* @param k Number of results to return
|
|
1684
|
+
* @param options Additional options
|
|
1685
|
+
* @returns Array of search results
|
|
1578
1686
|
*/
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
|
|
1687
|
+
/**
|
|
1688
|
+
* 🔍 SIMPLE VECTOR SEARCH - Clean wrapper around find() for pure vector search
|
|
1689
|
+
*
|
|
1690
|
+
* @param queryVectorOrData Vector or text to search for
|
|
1691
|
+
* @param k Number of results to return
|
|
1692
|
+
* @param options Simple search options (metadata filters only)
|
|
1693
|
+
* @returns Vector search results
|
|
1694
|
+
*/
|
|
1695
|
+
/**
|
|
1696
|
+
* 🔍 Simple Vector Similarity Search - Clean wrapper around find()
|
|
1697
|
+
*
|
|
1698
|
+
* search(query) = find({like: query}) - Pure vector similarity search
|
|
1699
|
+
*
|
|
1700
|
+
* @param queryVectorOrData - Query string, vector, or object to search with
|
|
1701
|
+
* @param options - Search options for filtering and pagination
|
|
1702
|
+
* @returns Array of search results with scores and metadata
|
|
1703
|
+
*
|
|
1704
|
+
* @example
|
|
1705
|
+
* // Simple vector search
|
|
1706
|
+
* await brain.search('machine learning')
|
|
1707
|
+
*
|
|
1708
|
+
* // With filters and pagination
|
|
1709
|
+
* await brain.search('AI', {
|
|
1710
|
+
* limit: 20,
|
|
1711
|
+
* metadata: { type: 'article' },
|
|
1712
|
+
* nounTypes: ['document']
|
|
1713
|
+
* })
|
|
1714
|
+
*/
|
|
1715
|
+
async search(queryVectorOrData, options = {}) {
|
|
1716
|
+
// Build metadata filter from options
|
|
1717
|
+
const metadataFilter = { ...options.metadata };
|
|
1718
|
+
// Add noun type filtering
|
|
1719
|
+
if (options.nounTypes && options.nounTypes.length > 0) {
|
|
1720
|
+
metadataFilter.nounType = { in: options.nounTypes };
|
|
1582
1721
|
}
|
|
1583
|
-
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
// throw new Error(
|
|
1587
|
-
// 'Server search conduit or connection is not initialized'
|
|
1588
|
-
// )
|
|
1589
|
-
// }
|
|
1590
|
-
// TODO: Add to remote server
|
|
1591
|
-
// const addResult = await this.serverSearchConduit.addToBoth(
|
|
1592
|
-
// this.serverConnection.connectionId,
|
|
1593
|
-
// vector,
|
|
1594
|
-
// metadata
|
|
1595
|
-
// )
|
|
1596
|
-
throw new Error('Remote server functionality not yet implemented in Brainy 2.0.0');
|
|
1597
|
-
// TODO: Handle remote add result (post-2.0.0 feature)
|
|
1598
|
-
// if (!addResult.success) {
|
|
1599
|
-
// throw new Error(`Remote add failed: ${addResult.error}`)
|
|
1600
|
-
// }
|
|
1601
|
-
return true;
|
|
1722
|
+
// Add item ID filtering
|
|
1723
|
+
if (options.itemIds && options.itemIds.length > 0) {
|
|
1724
|
+
metadataFilter.id = { in: options.itemIds };
|
|
1602
1725
|
}
|
|
1603
|
-
|
|
1604
|
-
|
|
1605
|
-
|
|
1726
|
+
// Build simple TripleQuery for vector similarity
|
|
1727
|
+
const tripleQuery = {
|
|
1728
|
+
like: queryVectorOrData
|
|
1729
|
+
};
|
|
1730
|
+
// Add metadata filter if we have conditions
|
|
1731
|
+
if (Object.keys(metadataFilter).length > 0) {
|
|
1732
|
+
tripleQuery.where = metadataFilter;
|
|
1733
|
+
}
|
|
1734
|
+
// Extract find() options
|
|
1735
|
+
const findOptions = {
|
|
1736
|
+
limit: options.limit,
|
|
1737
|
+
offset: options.offset,
|
|
1738
|
+
cursor: options.cursor,
|
|
1739
|
+
excludeDeleted: options.excludeDeleted,
|
|
1740
|
+
timeout: options.timeout
|
|
1741
|
+
};
|
|
1742
|
+
// Call find() with structured query - this is the key simplification!
|
|
1743
|
+
let results = await this.find(tripleQuery, findOptions);
|
|
1744
|
+
// Apply threshold filtering if specified
|
|
1745
|
+
if (options.threshold !== undefined) {
|
|
1746
|
+
results = results.filter(r => (r.fusionScore || r.score || 0) >= options.threshold);
|
|
1606
1747
|
}
|
|
1748
|
+
// Convert to SearchResult format
|
|
1749
|
+
return results.map(r => ({
|
|
1750
|
+
...r,
|
|
1751
|
+
score: r.fusionScore || r.score || 0
|
|
1752
|
+
}));
|
|
1753
|
+
return results;
|
|
1607
1754
|
}
|
|
1608
1755
|
/**
|
|
1609
|
-
*
|
|
1610
|
-
* @
|
|
1611
|
-
* @param options Additional options
|
|
1612
|
-
* @returns Array of IDs for the added items
|
|
1756
|
+
* Helper method to encode cursor for pagination
|
|
1757
|
+
* @internal
|
|
1613
1758
|
*/
|
|
1759
|
+
encodeCursor(data) {
|
|
1760
|
+
return Buffer.from(JSON.stringify(data)).toString('base64');
|
|
1761
|
+
}
|
|
1614
1762
|
/**
|
|
1615
|
-
*
|
|
1616
|
-
* @
|
|
1617
|
-
* @param options Batch processing options
|
|
1618
|
-
* @returns Array of generated IDs
|
|
1763
|
+
* Helper method to decode cursor for pagination
|
|
1764
|
+
* @internal
|
|
1619
1765
|
*/
|
|
1620
|
-
|
|
1621
|
-
await this.ensureInitialized();
|
|
1622
|
-
// Check if database is in read-only mode
|
|
1623
|
-
this.checkReadOnly();
|
|
1624
|
-
// Default concurrency to 4 if not specified
|
|
1625
|
-
const concurrency = options.concurrency || 4;
|
|
1626
|
-
// Default batch size to 50 if not specified
|
|
1627
|
-
const batchSize = options.batchSize || 50;
|
|
1766
|
+
decodeCursor(cursor) {
|
|
1628
1767
|
try {
|
|
1629
|
-
|
|
1630
|
-
const ids = [];
|
|
1631
|
-
const itemsToProcess = [...items]; // Create a copy to avoid modifying the original array
|
|
1632
|
-
while (itemsToProcess.length > 0) {
|
|
1633
|
-
// Take up to 'batchSize' items to process in a batch
|
|
1634
|
-
const batch = itemsToProcess.splice(0, batchSize);
|
|
1635
|
-
// Separate items that are already vectors from those that need embedding
|
|
1636
|
-
const vectorItems = [];
|
|
1637
|
-
const textItems = [];
|
|
1638
|
-
// Categorize items
|
|
1639
|
-
batch.forEach((item, index) => {
|
|
1640
|
-
if (Array.isArray(item.vectorOrData) &&
|
|
1641
|
-
item.vectorOrData.every((val) => typeof val === 'number') &&
|
|
1642
|
-
!options.forceEmbed) {
|
|
1643
|
-
// Item is already a vector
|
|
1644
|
-
vectorItems.push({
|
|
1645
|
-
vectorOrData: item.vectorOrData,
|
|
1646
|
-
metadata: item.metadata,
|
|
1647
|
-
index
|
|
1648
|
-
});
|
|
1649
|
-
}
|
|
1650
|
-
else if (typeof item.vectorOrData === 'string') {
|
|
1651
|
-
// Item is text that needs embedding
|
|
1652
|
-
textItems.push({
|
|
1653
|
-
text: item.vectorOrData,
|
|
1654
|
-
metadata: item.metadata,
|
|
1655
|
-
index
|
|
1656
|
-
});
|
|
1657
|
-
}
|
|
1658
|
-
else {
|
|
1659
|
-
// For now, treat other types as text
|
|
1660
|
-
// In a more complete implementation, we might handle other types differently
|
|
1661
|
-
const textRepresentation = String(item.vectorOrData);
|
|
1662
|
-
textItems.push({
|
|
1663
|
-
text: textRepresentation,
|
|
1664
|
-
metadata: item.metadata,
|
|
1665
|
-
index
|
|
1666
|
-
});
|
|
1667
|
-
}
|
|
1668
|
-
});
|
|
1669
|
-
// Process vector items (already embedded)
|
|
1670
|
-
const vectorPromises = vectorItems.map((item) => this.addNoun(item.vectorOrData, item.metadata));
|
|
1671
|
-
// Process text items in a single batch embedding operation
|
|
1672
|
-
let textPromises = [];
|
|
1673
|
-
if (textItems.length > 0) {
|
|
1674
|
-
// Extract just the text for batch embedding
|
|
1675
|
-
const texts = textItems.map((item) => item.text);
|
|
1676
|
-
// Perform batch embedding
|
|
1677
|
-
const embeddings = await batchEmbed(texts);
|
|
1678
|
-
// Add each item with its embedding
|
|
1679
|
-
textPromises = textItems.map((item, i) => this.addNoun(embeddings[i], item.metadata));
|
|
1680
|
-
}
|
|
1681
|
-
// Combine all promises
|
|
1682
|
-
const batchResults = await Promise.all([
|
|
1683
|
-
...vectorPromises,
|
|
1684
|
-
...textPromises
|
|
1685
|
-
]);
|
|
1686
|
-
// Add the results to our ids array
|
|
1687
|
-
ids.push(...batchResults);
|
|
1688
|
-
}
|
|
1689
|
-
return ids;
|
|
1768
|
+
return JSON.parse(Buffer.from(cursor, 'base64').toString());
|
|
1690
1769
|
}
|
|
1691
|
-
catch
|
|
1692
|
-
|
|
1693
|
-
throw new Error(`Failed to add batch of items: ${error}`);
|
|
1770
|
+
catch {
|
|
1771
|
+
return { offset: 0, timestamp: 0 };
|
|
1694
1772
|
}
|
|
1695
1773
|
}
|
|
1696
1774
|
/**
|
|
1697
|
-
*
|
|
1698
|
-
*
|
|
1699
|
-
*
|
|
1700
|
-
*
|
|
1775
|
+
* Internal method for direct HNSW vector search
|
|
1776
|
+
* Used by TripleIntelligence to avoid circular dependencies
|
|
1777
|
+
* Note: For pure metadata filtering, use metadataIndex.getIdsForFilter() directly - it's O(log n)!
|
|
1778
|
+
* This method is for vector similarity search with optional metadata filtering during search
|
|
1779
|
+
* @internal
|
|
1701
1780
|
*/
|
|
1702
|
-
async
|
|
1703
|
-
//
|
|
1704
|
-
|
|
1705
|
-
|
|
1781
|
+
async _internalVectorSearch(queryVectorOrData, k = 10, options = {}) {
|
|
1782
|
+
// Generate query vector
|
|
1783
|
+
const queryVector = Array.isArray(queryVectorOrData) &&
|
|
1784
|
+
typeof queryVectorOrData[0] === 'number' ?
|
|
1785
|
+
queryVectorOrData :
|
|
1786
|
+
await this.embed(queryVectorOrData);
|
|
1787
|
+
// Apply metadata filter if provided
|
|
1788
|
+
let filterFunction;
|
|
1789
|
+
if (options.metadata) {
|
|
1790
|
+
const matchingIdsArray = await this.metadataIndex?.getIdsForFilter(options.metadata) || [];
|
|
1791
|
+
const matchingIds = new Set(matchingIdsArray);
|
|
1792
|
+
filterFunction = async (id) => matchingIds.has(id);
|
|
1706
1793
|
}
|
|
1707
|
-
//
|
|
1708
|
-
|
|
1794
|
+
// Direct HNSW search
|
|
1795
|
+
const results = await this.index.search(queryVector, k, filterFunction);
|
|
1796
|
+
// Get metadata for results
|
|
1797
|
+
const searchResults = [];
|
|
1798
|
+
for (const [id, similarity] of results) {
|
|
1799
|
+
const metadata = await this.getNoun(id);
|
|
1800
|
+
searchResults.push({
|
|
1801
|
+
id,
|
|
1802
|
+
score: similarity,
|
|
1803
|
+
vector: [],
|
|
1804
|
+
metadata: metadata?.metadata || {}
|
|
1805
|
+
});
|
|
1806
|
+
}
|
|
1807
|
+
return searchResults;
|
|
1709
1808
|
}
|
|
1710
1809
|
/**
|
|
1711
|
-
*
|
|
1712
|
-
*
|
|
1713
|
-
* @param service Service to filter by
|
|
1714
|
-
* @returns Filtered search results
|
|
1715
|
-
* @private
|
|
1810
|
+
* 🎯 LEGACY: Original search implementation (kept for complex cases)
|
|
1811
|
+
* This is the original search method, now used as fallback for edge cases
|
|
1716
1812
|
*/
|
|
1717
|
-
|
|
1718
|
-
|
|
1719
|
-
|
|
1720
|
-
|
|
1721
|
-
|
|
1722
|
-
|
|
1723
|
-
|
|
1724
|
-
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
|
|
1813
|
+
async _legacySearch(queryVectorOrData, k = 10, options = {}) {
|
|
1814
|
+
const startTime = Date.now();
|
|
1815
|
+
// Validate input is not null or undefined
|
|
1816
|
+
if (queryVectorOrData === null || queryVectorOrData === undefined) {
|
|
1817
|
+
throw new Error('Query cannot be null or undefined');
|
|
1818
|
+
}
|
|
1819
|
+
// Validate k parameter first, before any other logic
|
|
1820
|
+
if (k <= 0 || typeof k !== 'number' || isNaN(k)) {
|
|
1821
|
+
throw new Error('Parameter k must be a positive number');
|
|
1822
|
+
}
|
|
1823
|
+
if (!this.isInitialized) {
|
|
1824
|
+
throw new Error('BrainyData must be initialized before searching. Call init() first.');
|
|
1825
|
+
}
|
|
1826
|
+
// Check if database is in write-only mode
|
|
1827
|
+
this.checkWriteOnly();
|
|
1828
|
+
// If searching for verbs directly
|
|
1829
|
+
if (options.searchVerbs) {
|
|
1830
|
+
const verbResults = await this.searchVerbs(queryVectorOrData, k, {
|
|
1831
|
+
forceEmbed: options.forceEmbed,
|
|
1832
|
+
verbTypes: options.verbTypes
|
|
1833
|
+
});
|
|
1834
|
+
// Convert verb results to SearchResult format
|
|
1835
|
+
return verbResults.map((verb) => ({
|
|
1836
|
+
id: verb.id,
|
|
1837
|
+
score: verb.similarity,
|
|
1838
|
+
vector: verb.embedding || [],
|
|
1839
|
+
metadata: {
|
|
1840
|
+
verb: verb.verb,
|
|
1841
|
+
source: verb.source,
|
|
1842
|
+
target: verb.target,
|
|
1843
|
+
...verb.data
|
|
1844
|
+
}
|
|
1845
|
+
}));
|
|
1846
|
+
}
|
|
1847
|
+
// If searching for nouns connected by verbs
|
|
1848
|
+
if (options.searchConnectedNouns) {
|
|
1849
|
+
return this.searchNounsByVerbs(queryVectorOrData, k, {
|
|
1850
|
+
forceEmbed: options.forceEmbed,
|
|
1851
|
+
verbTypes: options.verbTypes,
|
|
1852
|
+
direction: options.verbDirection
|
|
1853
|
+
});
|
|
1854
|
+
}
|
|
1855
|
+
// If a specific search mode is specified, use the appropriate search method
|
|
1856
|
+
if (options.searchMode === 'local') {
|
|
1857
|
+
return this.searchLocal(queryVectorOrData, k, options);
|
|
1858
|
+
}
|
|
1859
|
+
else if (options.searchMode === 'remote') {
|
|
1860
|
+
return this.searchRemote(queryVectorOrData, k, options);
|
|
1861
|
+
}
|
|
1862
|
+
else if (options.searchMode === 'combined') {
|
|
1863
|
+
return this.searchCombined(queryVectorOrData, k, options);
|
|
1864
|
+
}
|
|
1865
|
+
// Generate deduplication key for concurrent request handling
|
|
1866
|
+
const dedupeKey = RequestDeduplicator.getSearchKey(typeof queryVectorOrData === 'string' ? queryVectorOrData : JSON.stringify(queryVectorOrData), k, options);
|
|
1867
|
+
// Use augmentation system for search (includes deduplication, batching, and caching)
|
|
1868
|
+
return this.augmentations.execute('search', { query: queryVectorOrData, k, options, dedupeKey }, async () => {
|
|
1869
|
+
// Default behavior (backward compatible): search locally
|
|
1870
|
+
try {
|
|
1871
|
+
// BEST OF BOTH: Automatically exclude soft-deleted items (Neural Intelligence improvement)
|
|
1872
|
+
// BUT only when there's already metadata filtering happening
|
|
1873
|
+
let metadataFilter = options.metadata;
|
|
1874
|
+
// Only add soft-delete filter if there's already metadata being filtered
|
|
1875
|
+
// This preserves pure vector searches without metadata
|
|
1876
|
+
if (metadataFilter && Object.keys(metadataFilter).length > 0) {
|
|
1877
|
+
// If no explicit deleted filter is provided, exclude soft-deleted items
|
|
1878
|
+
// Use namespaced field for O(1) performance
|
|
1879
|
+
if (!metadataFilter['_brainy.deleted'] && !metadataFilter.anyOf) {
|
|
1880
|
+
metadataFilter = {
|
|
1881
|
+
...metadataFilter,
|
|
1882
|
+
['_brainy.deleted']: false // O(1) positive match instead of notEquals
|
|
1883
|
+
};
|
|
1884
|
+
}
|
|
1885
|
+
}
|
|
1886
|
+
const hasMetadataFilter = metadataFilter && Object.keys(metadataFilter).length > 0;
|
|
1887
|
+
// Check cache first (transparent to user) - but skip cache if we have metadata filters
|
|
1888
|
+
if (!hasMetadataFilter) {
|
|
1889
|
+
const cacheKey = this.cache?.getCacheKey(queryVectorOrData, k, options);
|
|
1890
|
+
const cachedResults = this.cache?.get(cacheKey);
|
|
1891
|
+
if (cachedResults) {
|
|
1892
|
+
// Track cache hit in health monitor
|
|
1893
|
+
if (this.monitoring) {
|
|
1894
|
+
const latency = Date.now() - startTime;
|
|
1895
|
+
this.monitoring.recordRequest(latency, false);
|
|
1896
|
+
this.monitoring.recordCacheAccess(true);
|
|
1897
|
+
}
|
|
1898
|
+
return cachedResults;
|
|
1899
|
+
}
|
|
1900
|
+
}
|
|
1901
|
+
// Cache miss - perform actual search
|
|
1902
|
+
const results = await this.searchLocal(queryVectorOrData, k, {
|
|
1903
|
+
...options,
|
|
1904
|
+
metadata: metadataFilter
|
|
1905
|
+
});
|
|
1906
|
+
// Cache results for future queries (unless explicitly disabled or has metadata filter)
|
|
1907
|
+
if (!options.skipCache && !hasMetadataFilter) {
|
|
1908
|
+
const cacheKey = this.cache?.getCacheKey(queryVectorOrData, k, options);
|
|
1909
|
+
this.cache?.set(cacheKey, results);
|
|
1910
|
+
}
|
|
1911
|
+
// Track successful search in health monitor
|
|
1912
|
+
if (this.monitoring) {
|
|
1913
|
+
const latency = Date.now() - startTime;
|
|
1914
|
+
this.monitoring.recordRequest(latency, false);
|
|
1915
|
+
this.monitoring.recordCacheAccess(false);
|
|
1916
|
+
}
|
|
1917
|
+
return results;
|
|
1918
|
+
}
|
|
1919
|
+
catch (error) {
|
|
1920
|
+
// Track error in health monitor
|
|
1921
|
+
if (this.monitoring) {
|
|
1922
|
+
const latency = Date.now() - startTime;
|
|
1923
|
+
this.monitoring.recordRequest(latency, true);
|
|
1924
|
+
}
|
|
1925
|
+
throw error;
|
|
1926
|
+
}
|
|
1729
1927
|
});
|
|
1730
1928
|
}
|
|
1731
1929
|
/**
|
|
1732
|
-
* Search for
|
|
1930
|
+
* Search with cursor-based pagination for better performance on large datasets
|
|
1733
1931
|
* @param queryVectorOrData Query vector or data to search for
|
|
1734
1932
|
* @param k Number of results to return
|
|
1735
|
-
* @param
|
|
1736
|
-
* @
|
|
1737
|
-
* @returns Array of search results
|
|
1933
|
+
* @param options Additional options including cursor for pagination
|
|
1934
|
+
* @returns Paginated search results with cursor for next page
|
|
1738
1935
|
*/
|
|
1739
1936
|
/**
|
|
1740
|
-
* @deprecated Use search() with
|
|
1937
|
+
* @deprecated Use search() with cursor option instead
|
|
1741
1938
|
* @example
|
|
1742
1939
|
* // Old way (deprecated)
|
|
1743
|
-
* await brain.
|
|
1940
|
+
* await brain.searchWithCursor(query, 10, { cursor: 'abc123' })
|
|
1744
1941
|
* // New way
|
|
1745
|
-
* await brain.search(query, { limit: 10,
|
|
1942
|
+
* await brain.search(query, { limit: 10, cursor: 'abc123' })
|
|
1746
1943
|
*/
|
|
1747
|
-
async
|
|
1748
|
-
//
|
|
1749
|
-
const
|
|
1750
|
-
|
|
1751
|
-
|
|
1752
|
-
|
|
1753
|
-
|
|
1754
|
-
|
|
1755
|
-
|
|
1756
|
-
|
|
1757
|
-
|
|
1758
|
-
|
|
1759
|
-
|
|
1760
|
-
|
|
1761
|
-
|
|
1762
|
-
|
|
1763
|
-
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
this.checkWriteOnly();
|
|
1767
|
-
try {
|
|
1768
|
-
let queryVector;
|
|
1769
|
-
// Check if input is already a vector
|
|
1770
|
-
if (Array.isArray(queryVectorOrData) &&
|
|
1771
|
-
queryVectorOrData.every((item) => typeof item === 'number') &&
|
|
1772
|
-
!options.forceEmbed) {
|
|
1773
|
-
// Input is already a vector
|
|
1774
|
-
queryVector = queryVectorOrData;
|
|
1944
|
+
async searchWithCursor(queryVectorOrData, k = 10, options = {}) {
|
|
1945
|
+
// For cursor-based search, we need to fetch more results and filter
|
|
1946
|
+
const searchK = options.cursor ? k + 20 : k; // Get extra results for filtering
|
|
1947
|
+
// Perform regular search
|
|
1948
|
+
const { cursor, ...searchOptions } = options;
|
|
1949
|
+
const allResults = await this.search(queryVectorOrData, {
|
|
1950
|
+
limit: searchK,
|
|
1951
|
+
nounTypes: searchOptions.nounTypes,
|
|
1952
|
+
metadata: searchOptions.filter
|
|
1953
|
+
});
|
|
1954
|
+
let results = allResults;
|
|
1955
|
+
let startIndex = 0;
|
|
1956
|
+
// If cursor provided, find starting position
|
|
1957
|
+
if (options.cursor) {
|
|
1958
|
+
startIndex = allResults.findIndex((r) => r.id === options.cursor.lastId &&
|
|
1959
|
+
Math.abs(r.score - options.cursor.lastScore) < 0.0001);
|
|
1960
|
+
if (startIndex >= 0) {
|
|
1961
|
+
startIndex += 1; // Start after the cursor position
|
|
1962
|
+
results = allResults.slice(startIndex, startIndex + k);
|
|
1775
1963
|
}
|
|
1776
1964
|
else {
|
|
1777
|
-
//
|
|
1778
|
-
|
|
1779
|
-
|
|
1780
|
-
}
|
|
1781
|
-
catch (embedError) {
|
|
1782
|
-
throw new Error(`Failed to vectorize query data: ${embedError}`);
|
|
1783
|
-
}
|
|
1784
|
-
}
|
|
1785
|
-
// Check if query vector is defined
|
|
1786
|
-
if (!queryVector) {
|
|
1787
|
-
throw new Error('Query vector is undefined or null');
|
|
1788
|
-
}
|
|
1789
|
-
// Check if query vector dimensions match the expected dimensions
|
|
1790
|
-
if (queryVector.length !== this._dimensions) {
|
|
1791
|
-
throw new Error(`Query vector dimension mismatch: expected ${this._dimensions}, got ${queryVector.length}`);
|
|
1792
|
-
}
|
|
1793
|
-
// If no noun types specified, search all nouns
|
|
1794
|
-
if (!nounTypes || nounTypes.length === 0) {
|
|
1795
|
-
// Check if we're in readonly mode with lazy loading and the index is empty
|
|
1796
|
-
const indexSize = this.index.getNouns().size;
|
|
1797
|
-
if (this.readOnly && this.lazyLoadInReadOnlyMode && indexSize === 0) {
|
|
1798
|
-
if (this.loggingConfig?.verbose) {
|
|
1799
|
-
console.log('Lazy loading mode: Index is empty, loading nodes for search...');
|
|
1800
|
-
}
|
|
1801
|
-
// In lazy loading mode, we need to load some nodes to search
|
|
1802
|
-
// Instead of loading all nodes, we'll load a subset of nodes
|
|
1803
|
-
// Load a limited number of nodes from storage using pagination
|
|
1804
|
-
const result = await this.storage.getNouns({
|
|
1805
|
-
pagination: { offset: 0, limit: k * 10 } // Get 10x more nodes than needed
|
|
1806
|
-
});
|
|
1807
|
-
const limitedNouns = result.items;
|
|
1808
|
-
// Add these nodes to the index
|
|
1809
|
-
for (const node of limitedNouns) {
|
|
1810
|
-
// Check if the vector dimensions match the expected dimensions
|
|
1811
|
-
if (node.vector.length !== this._dimensions) {
|
|
1812
|
-
console.warn(`Skipping node ${node.id} due to dimension mismatch: expected ${this._dimensions}, got ${node.vector.length}`);
|
|
1813
|
-
continue;
|
|
1814
|
-
}
|
|
1815
|
-
// Add to index
|
|
1816
|
-
await this.index.addItem({
|
|
1817
|
-
id: node.id,
|
|
1818
|
-
vector: node.vector
|
|
1819
|
-
});
|
|
1820
|
-
}
|
|
1821
|
-
if (this.loggingConfig?.verbose) {
|
|
1822
|
-
console.log(`Lazy loading mode: Added ${limitedNouns.length} nodes to index for search`);
|
|
1823
|
-
}
|
|
1824
|
-
}
|
|
1825
|
-
// Create filter function for HNSW search with metadata index optimization
|
|
1826
|
-
const hasMetadataFilter = options.metadata && Object.keys(options.metadata).length > 0;
|
|
1827
|
-
const hasServiceFilter = !!options.service;
|
|
1828
|
-
let filterFunction;
|
|
1829
|
-
let preFilteredIds;
|
|
1830
|
-
// Use metadata index for pre-filtering if available
|
|
1831
|
-
if (hasMetadataFilter && this.metadataIndex) {
|
|
1832
|
-
try {
|
|
1833
|
-
// Ensure metadata index is up to date
|
|
1834
|
-
await this.metadataIndex?.flush?.();
|
|
1835
|
-
// Get candidate IDs from metadata index
|
|
1836
|
-
const candidateIds = await this.metadataIndex?.getIdsForFilter?.(options.metadata) || [];
|
|
1837
|
-
if (candidateIds.length > 0) {
|
|
1838
|
-
preFilteredIds = new Set(candidateIds);
|
|
1839
|
-
// Create a simple filter function that just checks the pre-filtered set
|
|
1840
|
-
filterFunction = async (id) => {
|
|
1841
|
-
if (!preFilteredIds.has(id))
|
|
1842
|
-
return false;
|
|
1843
|
-
// Still apply service filter if needed
|
|
1844
|
-
if (hasServiceFilter) {
|
|
1845
|
-
const metadata = await this.storage.getMetadata(id);
|
|
1846
|
-
const noun = this.index.getNouns().get(id);
|
|
1847
|
-
if (!noun || !metadata)
|
|
1848
|
-
return false;
|
|
1849
|
-
const result = { id, score: 0, vector: noun.vector, metadata };
|
|
1850
|
-
return this.filterResultsByService([result], options.service).length > 0;
|
|
1851
|
-
}
|
|
1852
|
-
return true;
|
|
1853
|
-
};
|
|
1854
|
-
}
|
|
1855
|
-
else {
|
|
1856
|
-
// No items match the metadata criteria, return empty results immediately
|
|
1857
|
-
return [];
|
|
1858
|
-
}
|
|
1859
|
-
}
|
|
1860
|
-
catch (indexError) {
|
|
1861
|
-
console.warn('Metadata index error, falling back to full filtering:', indexError);
|
|
1862
|
-
// Fall back to full metadata filtering below
|
|
1863
|
-
}
|
|
1864
|
-
}
|
|
1865
|
-
// Fallback to full metadata filtering if index wasn't used
|
|
1866
|
-
if (!filterFunction && (hasMetadataFilter || hasServiceFilter)) {
|
|
1867
|
-
filterFunction = async (id) => {
|
|
1868
|
-
// Get metadata for filtering
|
|
1869
|
-
let metadata = await this.storage.getMetadata(id);
|
|
1870
|
-
if (metadata === null) {
|
|
1871
|
-
metadata = {};
|
|
1872
|
-
}
|
|
1873
|
-
// Apply metadata filter
|
|
1874
|
-
if (hasMetadataFilter) {
|
|
1875
|
-
const matches = matchesMetadataFilter(metadata, options.metadata);
|
|
1876
|
-
if (!matches) {
|
|
1877
|
-
return false;
|
|
1878
|
-
}
|
|
1879
|
-
}
|
|
1880
|
-
// Apply service filter
|
|
1881
|
-
if (hasServiceFilter) {
|
|
1882
|
-
const noun = this.index.getNouns().get(id);
|
|
1883
|
-
if (!noun)
|
|
1884
|
-
return false;
|
|
1885
|
-
const result = { id, score: 0, vector: noun.vector, metadata };
|
|
1886
|
-
if (!this.filterResultsByService([result], options.service).length) {
|
|
1887
|
-
return false;
|
|
1888
|
-
}
|
|
1889
|
-
}
|
|
1890
|
-
return true;
|
|
1891
|
-
};
|
|
1892
|
-
}
|
|
1893
|
-
// When using offset, we need to fetch more results and then slice
|
|
1894
|
-
const offset = options.offset || 0;
|
|
1895
|
-
const totalNeeded = k + offset;
|
|
1896
|
-
// Search in the index with filter
|
|
1897
|
-
const results = await this.index.search(queryVector, totalNeeded, filterFunction);
|
|
1898
|
-
// Skip the offset number of results
|
|
1899
|
-
const paginatedResults = results.slice(offset, offset + k);
|
|
1900
|
-
// Get metadata for each result
|
|
1901
|
-
const searchResults = [];
|
|
1902
|
-
for (const [id, score] of paginatedResults) {
|
|
1903
|
-
const noun = this.index.getNouns().get(id);
|
|
1904
|
-
if (!noun) {
|
|
1905
|
-
continue;
|
|
1906
|
-
}
|
|
1907
|
-
let metadata = await this.storage.getMetadata(id);
|
|
1908
|
-
// Initialize metadata to an empty object if it's null
|
|
1909
|
-
if (metadata === null) {
|
|
1910
|
-
metadata = {};
|
|
1911
|
-
}
|
|
1912
|
-
// Preserve original metadata without overwriting user's custom fields
|
|
1913
|
-
// The search result already has Brainy's UUID in the main 'id' field
|
|
1914
|
-
searchResults.push({
|
|
1915
|
-
id,
|
|
1916
|
-
score: 1 - score, // Convert distance to similarity (higher = more similar)
|
|
1917
|
-
vector: noun.vector,
|
|
1918
|
-
metadata: metadata
|
|
1919
|
-
});
|
|
1920
|
-
}
|
|
1921
|
-
return searchResults;
|
|
1922
|
-
}
|
|
1923
|
-
else {
|
|
1924
|
-
// Get nouns for each noun type in parallel
|
|
1925
|
-
const nounPromises = nounTypes.map((nounType) => this.storage.getNounsByNounType(nounType));
|
|
1926
|
-
const nounArrays = await Promise.all(nounPromises);
|
|
1927
|
-
// Combine all nouns
|
|
1928
|
-
const nouns = [];
|
|
1929
|
-
for (const nounArray of nounArrays) {
|
|
1930
|
-
nouns.push(...nounArray);
|
|
1931
|
-
}
|
|
1932
|
-
// Calculate distances for each noun
|
|
1933
|
-
const results = [];
|
|
1934
|
-
for (const noun of nouns) {
|
|
1935
|
-
const distance = this.index.getDistanceFunction()(queryVector, noun.vector);
|
|
1936
|
-
results.push([noun.id, distance]);
|
|
1937
|
-
}
|
|
1938
|
-
// Sort by distance (ascending)
|
|
1939
|
-
results.sort((a, b) => a[1] - b[1]);
|
|
1940
|
-
// Apply offset and take k results
|
|
1941
|
-
const offset = options.offset || 0;
|
|
1942
|
-
const topResults = results.slice(offset, offset + k);
|
|
1943
|
-
// Get metadata for each result
|
|
1944
|
-
const searchResults = [];
|
|
1945
|
-
for (const [id, score] of topResults) {
|
|
1946
|
-
const noun = nouns.find((n) => n.id === id);
|
|
1947
|
-
if (!noun) {
|
|
1948
|
-
continue;
|
|
1949
|
-
}
|
|
1950
|
-
let metadata = await this.storage.getMetadata(id);
|
|
1951
|
-
// Initialize metadata to an empty object if it's null
|
|
1952
|
-
if (metadata === null) {
|
|
1953
|
-
metadata = {};
|
|
1954
|
-
}
|
|
1955
|
-
// Preserve original metadata without overwriting user's custom fields
|
|
1956
|
-
// The search result already has Brainy's UUID in the main 'id' field
|
|
1957
|
-
searchResults.push({
|
|
1958
|
-
id,
|
|
1959
|
-
score: 1 - score, // Convert distance to similarity (higher = more similar)
|
|
1960
|
-
vector: noun.vector,
|
|
1961
|
-
metadata: metadata
|
|
1962
|
-
});
|
|
1963
|
-
}
|
|
1964
|
-
// Results are already filtered, just return them
|
|
1965
|
-
return searchResults;
|
|
1965
|
+
// Cursor not found, might be stale - return from beginning
|
|
1966
|
+
results = allResults.slice(0, k);
|
|
1967
|
+
startIndex = 0;
|
|
1966
1968
|
}
|
|
1967
1969
|
}
|
|
1968
|
-
|
|
1969
|
-
|
|
1970
|
-
|
|
1970
|
+
else {
|
|
1971
|
+
results = allResults.slice(0, k);
|
|
1972
|
+
}
|
|
1973
|
+
// Create cursor for next page
|
|
1974
|
+
let nextCursor;
|
|
1975
|
+
const hasMoreResults = startIndex + results.length < allResults.length ||
|
|
1976
|
+
allResults.length >= searchK;
|
|
1977
|
+
if (results.length > 0 && hasMoreResults) {
|
|
1978
|
+
const lastResult = results[results.length - 1];
|
|
1979
|
+
nextCursor = {
|
|
1980
|
+
lastId: lastResult.id,
|
|
1981
|
+
lastScore: lastResult.score,
|
|
1982
|
+
position: startIndex + results.length
|
|
1983
|
+
};
|
|
1971
1984
|
}
|
|
1985
|
+
return {
|
|
1986
|
+
results,
|
|
1987
|
+
cursor: nextCursor,
|
|
1988
|
+
hasMore: !!nextCursor,
|
|
1989
|
+
totalEstimate: allResults.length > searchK ? undefined : allResults.length
|
|
1990
|
+
};
|
|
1972
1991
|
}
|
|
1973
1992
|
/**
|
|
1974
|
-
* Search for similar vectors
|
|
1993
|
+
* Search the local database for similar vectors
|
|
1975
1994
|
* @param queryVectorOrData Query vector or data to search for
|
|
1976
1995
|
* @param k Number of results to return
|
|
1977
1996
|
* @param options Additional options
|
|
1978
1997
|
* @returns Array of search results
|
|
1979
1998
|
*/
|
|
1980
|
-
|
|
1981
|
-
|
|
1982
|
-
|
|
1983
|
-
* @param queryVectorOrData Vector or text to search for
|
|
1984
|
-
* @param k Number of results to return
|
|
1985
|
-
* @param options Simple search options (metadata filters only)
|
|
1986
|
-
* @returns Vector search results
|
|
1987
|
-
*/
|
|
1988
|
-
/**
|
|
1989
|
-
* 🔍 Simple Vector Similarity Search - Clean wrapper around find()
|
|
1990
|
-
*
|
|
1991
|
-
* search(query) = find({like: query}) - Pure vector similarity search
|
|
1992
|
-
*
|
|
1993
|
-
* @param queryVectorOrData - Query string, vector, or object to search with
|
|
1994
|
-
* @param options - Search options for filtering and pagination
|
|
1995
|
-
* @returns Array of search results with scores and metadata
|
|
1996
|
-
*
|
|
1997
|
-
* @example
|
|
1998
|
-
* // Simple vector search
|
|
1999
|
-
* await brain.search('machine learning')
|
|
2000
|
-
*
|
|
2001
|
-
* // With filters and pagination
|
|
2002
|
-
* await brain.search('AI', {
|
|
2003
|
-
* limit: 20,
|
|
2004
|
-
* metadata: { type: 'article' },
|
|
2005
|
-
* nounTypes: ['document']
|
|
2006
|
-
* })
|
|
2007
|
-
*/
|
|
2008
|
-
async search(queryVectorOrData, options = {}) {
|
|
2009
|
-
// Build metadata filter from options
|
|
2010
|
-
const metadataFilter = { ...options.metadata };
|
|
2011
|
-
// Add noun type filtering
|
|
2012
|
-
if (options.nounTypes && options.nounTypes.length > 0) {
|
|
2013
|
-
metadataFilter.nounType = { in: options.nounTypes };
|
|
2014
|
-
}
|
|
2015
|
-
// Add item ID filtering
|
|
2016
|
-
if (options.itemIds && options.itemIds.length > 0) {
|
|
2017
|
-
metadataFilter.id = { in: options.itemIds };
|
|
1999
|
+
async searchLocal(queryVectorOrData, k = 10, options = {}) {
|
|
2000
|
+
if (!this.isInitialized) {
|
|
2001
|
+
throw new Error('BrainyData must be initialized before searching. Call init() first.');
|
|
2018
2002
|
}
|
|
2019
|
-
//
|
|
2020
|
-
|
|
2021
|
-
|
|
2022
|
-
|
|
2023
|
-
//
|
|
2024
|
-
if (
|
|
2025
|
-
|
|
2003
|
+
// Check if database is in write-only mode
|
|
2004
|
+
this.checkWriteOnly();
|
|
2005
|
+
// Process the query input for vectorization
|
|
2006
|
+
let queryToUse = queryVectorOrData;
|
|
2007
|
+
// Handle string queries
|
|
2008
|
+
if (typeof queryVectorOrData === 'string' && !options.forceEmbed) {
|
|
2009
|
+
queryToUse = await this.embed(queryVectorOrData);
|
|
2010
|
+
options.forceEmbed = false; // Already embedded, don't force again
|
|
2026
2011
|
}
|
|
2027
|
-
//
|
|
2028
|
-
|
|
2029
|
-
|
|
2030
|
-
|
|
2031
|
-
|
|
2032
|
-
|
|
2033
|
-
|
|
2034
|
-
|
|
2035
|
-
|
|
2036
|
-
|
|
2037
|
-
|
|
2038
|
-
|
|
2039
|
-
|
|
2012
|
+
// Handle JSON object queries with special processing
|
|
2013
|
+
else if (typeof queryVectorOrData === 'object' &&
|
|
2014
|
+
queryVectorOrData !== null &&
|
|
2015
|
+
!Array.isArray(queryVectorOrData) &&
|
|
2016
|
+
!options.forceEmbed) {
|
|
2017
|
+
// If searching within a specific field
|
|
2018
|
+
if (options.searchField) {
|
|
2019
|
+
// Extract text from the specific field
|
|
2020
|
+
const fieldText = extractFieldFromJson(queryVectorOrData, options.searchField);
|
|
2021
|
+
if (fieldText) {
|
|
2022
|
+
queryToUse = await this.embeddingFunction(fieldText);
|
|
2023
|
+
options.forceEmbed = false; // Already embedded, don't force again
|
|
2024
|
+
}
|
|
2025
|
+
}
|
|
2026
|
+
// Otherwise process the entire object with priority fields
|
|
2027
|
+
else {
|
|
2028
|
+
const preparedText = prepareJsonForVectorization(queryVectorOrData, {
|
|
2029
|
+
priorityFields: options.priorityFields || [
|
|
2030
|
+
'name',
|
|
2031
|
+
'title',
|
|
2032
|
+
'company',
|
|
2033
|
+
'organization',
|
|
2034
|
+
'description',
|
|
2035
|
+
'summary'
|
|
2036
|
+
]
|
|
2037
|
+
});
|
|
2038
|
+
queryToUse = await this.embeddingFunction(preparedText);
|
|
2039
|
+
options.forceEmbed = false; // Already embedded, don't force again
|
|
2040
|
+
}
|
|
2040
2041
|
}
|
|
2041
|
-
//
|
|
2042
|
-
|
|
2043
|
-
|
|
2044
|
-
|
|
2045
|
-
|
|
2046
|
-
|
|
2042
|
+
// If noun types are specified, use searchByNounTypes
|
|
2043
|
+
let searchResults;
|
|
2044
|
+
if (options.nounTypes && options.nounTypes.length > 0) {
|
|
2045
|
+
searchResults = await this.searchByNounTypes(queryToUse, k, options.nounTypes, {
|
|
2046
|
+
forceEmbed: options.forceEmbed,
|
|
2047
|
+
service: options.service,
|
|
2048
|
+
metadata: options.metadata,
|
|
2049
|
+
offset: options.offset
|
|
2050
|
+
});
|
|
2051
|
+
}
|
|
2052
|
+
else {
|
|
2053
|
+
// Otherwise, search all GraphNouns
|
|
2054
|
+
searchResults = await this.searchByNounTypes(queryToUse, k, null, {
|
|
2055
|
+
forceEmbed: options.forceEmbed,
|
|
2056
|
+
service: options.service,
|
|
2057
|
+
metadata: options.metadata,
|
|
2058
|
+
offset: options.offset
|
|
2059
|
+
});
|
|
2060
|
+
}
|
|
2061
|
+
// Filter out placeholder nouns and deleted items from search results
|
|
2062
|
+
searchResults = searchResults.filter((result) => {
|
|
2063
|
+
if (result.metadata && typeof result.metadata === 'object') {
|
|
2064
|
+
const metadata = result.metadata;
|
|
2065
|
+
// Exclude deleted items from search results (soft delete)
|
|
2066
|
+
// Check namespaced field
|
|
2067
|
+
if (metadata._brainy?.deleted === true) {
|
|
2068
|
+
return false;
|
|
2069
|
+
}
|
|
2070
|
+
// Exclude placeholder nouns from search results
|
|
2071
|
+
if (metadata.isPlaceholder) {
|
|
2072
|
+
return false;
|
|
2073
|
+
}
|
|
2074
|
+
// Apply domain filter if specified
|
|
2075
|
+
if (options.filter?.domain) {
|
|
2076
|
+
if (metadata.domain !== options.filter.domain) {
|
|
2077
|
+
return false;
|
|
2078
|
+
}
|
|
2079
|
+
}
|
|
2080
|
+
}
|
|
2081
|
+
return true;
|
|
2082
|
+
});
|
|
2083
|
+
// If includeVerbs is true, retrieve associated GraphVerbs for each result
|
|
2084
|
+
if (options.includeVerbs && this.storage) {
|
|
2085
|
+
for (const result of searchResults) {
|
|
2086
|
+
try {
|
|
2087
|
+
// Get outgoing verbs for this noun
|
|
2088
|
+
const outgoingVerbs = await this.storage.getVerbsBySource(result.id);
|
|
2089
|
+
// Get incoming verbs for this noun
|
|
2090
|
+
const incomingVerbs = await this.storage.getVerbsByTarget(result.id);
|
|
2091
|
+
// Combine all verbs
|
|
2092
|
+
const allVerbs = [...outgoingVerbs, ...incomingVerbs];
|
|
2093
|
+
// Add verbs to the result metadata
|
|
2094
|
+
if (!result.metadata) {
|
|
2095
|
+
result.metadata = {};
|
|
2096
|
+
}
|
|
2097
|
+
// Add the verbs to the metadata
|
|
2098
|
+
;
|
|
2099
|
+
result.metadata.associatedVerbs = allVerbs;
|
|
2100
|
+
}
|
|
2101
|
+
catch (error) {
|
|
2102
|
+
console.warn(`Failed to retrieve verbs for noun ${result.id}:`, error);
|
|
2103
|
+
}
|
|
2104
|
+
}
|
|
2105
|
+
}
|
|
2106
|
+
return searchResults;
|
|
2047
2107
|
}
|
|
2048
2108
|
/**
|
|
2049
|
-
*
|
|
2050
|
-
* @
|
|
2109
|
+
* Find entities similar to a given entity ID
|
|
2110
|
+
* @param id ID of the entity to find similar entities for
|
|
2111
|
+
* @param options Additional options
|
|
2112
|
+
* @returns Array of search results with similarity scores
|
|
2051
2113
|
*/
|
|
2052
|
-
|
|
2053
|
-
|
|
2114
|
+
async findSimilar(id, options = {}) {
|
|
2115
|
+
await this.ensureInitialized();
|
|
2116
|
+
// Get the entity by ID
|
|
2117
|
+
const entity = await this.getNoun(id);
|
|
2118
|
+
if (!entity) {
|
|
2119
|
+
throw new Error(`Entity with ID ${id} not found`);
|
|
2120
|
+
}
|
|
2121
|
+
// If relationType is specified, directly get related entities by that type
|
|
2122
|
+
if (options.relationType) {
|
|
2123
|
+
// Get all verbs (relationships) from the source entity
|
|
2124
|
+
const outgoingVerbs = await this.storage.getVerbsBySource(id);
|
|
2125
|
+
// Filter to only include verbs of the specified type
|
|
2126
|
+
const verbsOfType = outgoingVerbs.filter((verb) => verb.type === options.relationType);
|
|
2127
|
+
// Get the target IDs
|
|
2128
|
+
const targetIds = verbsOfType.map((verb) => verb.target);
|
|
2129
|
+
// Get the actual entities for these IDs
|
|
2130
|
+
const results = [];
|
|
2131
|
+
for (const targetId of targetIds) {
|
|
2132
|
+
// Skip undefined targetIds
|
|
2133
|
+
if (typeof targetId !== 'string')
|
|
2134
|
+
continue;
|
|
2135
|
+
const targetEntity = await this.getNoun(targetId);
|
|
2136
|
+
if (targetEntity) {
|
|
2137
|
+
results.push({
|
|
2138
|
+
id: targetId,
|
|
2139
|
+
score: 1.0, // Default similarity score
|
|
2140
|
+
vector: targetEntity.vector,
|
|
2141
|
+
metadata: targetEntity.metadata
|
|
2142
|
+
});
|
|
2143
|
+
}
|
|
2144
|
+
}
|
|
2145
|
+
// Return the results, limited to the requested number
|
|
2146
|
+
return results.slice(0, options.limit || 10);
|
|
2147
|
+
}
|
|
2148
|
+
// If no relationType is specified, use the original vector similarity search
|
|
2149
|
+
const k = (options.limit || 10) + 1; // Add 1 to account for the original entity
|
|
2150
|
+
const searchResults = await this.search(entity.vector, {
|
|
2151
|
+
limit: k,
|
|
2152
|
+
excludeDeleted: false,
|
|
2153
|
+
nounTypes: options.nounTypes
|
|
2154
|
+
});
|
|
2155
|
+
// Filter out the original entity and limit to the requested number
|
|
2156
|
+
return searchResults
|
|
2157
|
+
.filter((result) => result.id !== id)
|
|
2158
|
+
.slice(0, options.limit || 10);
|
|
2054
2159
|
}
|
|
2055
2160
|
/**
|
|
2056
|
-
*
|
|
2057
|
-
* @internal
|
|
2161
|
+
* Get a vector by ID
|
|
2058
2162
|
*/
|
|
2059
|
-
|
|
2163
|
+
// Legacy get() method removed - use getNoun() instead
|
|
2164
|
+
/**
|
|
2165
|
+
* Check if a document with the given ID exists
|
|
2166
|
+
* This is a direct storage operation that works in write-only mode when allowDirectReads is enabled
|
|
2167
|
+
* @param id The ID to check for existence
|
|
2168
|
+
* @returns Promise<boolean> True if the document exists, false otherwise
|
|
2169
|
+
*/
|
|
2170
|
+
async has(id) {
|
|
2171
|
+
if (id === null || id === undefined) {
|
|
2172
|
+
throw new Error('ID cannot be null or undefined');
|
|
2173
|
+
}
|
|
2174
|
+
await this.ensureInitialized();
|
|
2175
|
+
// This is a direct storage operation - check if allowed in write-only mode
|
|
2176
|
+
if (this.writeOnly && !this.allowDirectReads) {
|
|
2177
|
+
throw new Error('Cannot perform has() operation: database is in write-only mode. Enable allowDirectReads for direct storage operations.');
|
|
2178
|
+
}
|
|
2060
2179
|
try {
|
|
2061
|
-
|
|
2180
|
+
// Always query storage directly for existence check
|
|
2181
|
+
const noun = await this.storage.getNoun(id);
|
|
2182
|
+
return noun !== null;
|
|
2062
2183
|
}
|
|
2063
|
-
catch {
|
|
2064
|
-
|
|
2184
|
+
catch (error) {
|
|
2185
|
+
// If storage lookup fails, the item doesn't exist
|
|
2186
|
+
return false;
|
|
2065
2187
|
}
|
|
2066
2188
|
}
|
|
2067
2189
|
/**
|
|
2068
|
-
*
|
|
2069
|
-
*
|
|
2070
|
-
*
|
|
2071
|
-
*
|
|
2072
|
-
* @internal
|
|
2190
|
+
* Check if a document with the given ID exists (alias for has)
|
|
2191
|
+
* This is a direct storage operation that works in write-only mode when allowDirectReads is enabled
|
|
2192
|
+
* @param id The ID to check for existence
|
|
2193
|
+
* @returns Promise<boolean> True if the document exists, false otherwise
|
|
2073
2194
|
*/
|
|
2074
|
-
async _internalVectorSearch(queryVectorOrData, k = 10, options = {}) {
|
|
2075
|
-
// Generate query vector
|
|
2076
|
-
const queryVector = Array.isArray(queryVectorOrData) &&
|
|
2077
|
-
typeof queryVectorOrData[0] === 'number' ?
|
|
2078
|
-
queryVectorOrData :
|
|
2079
|
-
await this.embed(queryVectorOrData);
|
|
2080
|
-
// Apply metadata filter if provided
|
|
2081
|
-
let filterFunction;
|
|
2082
|
-
if (options.metadata) {
|
|
2083
|
-
const matchingIdsArray = await this.metadataIndex?.getIdsForFilter(options.metadata) || [];
|
|
2084
|
-
const matchingIds = new Set(matchingIdsArray);
|
|
2085
|
-
filterFunction = async (id) => matchingIds.has(id);
|
|
2086
|
-
}
|
|
2087
|
-
// Direct HNSW search
|
|
2088
|
-
const results = await this.index.search(queryVector, k, filterFunction);
|
|
2089
|
-
// Get metadata for results
|
|
2090
|
-
const searchResults = [];
|
|
2091
|
-
for (const [id, similarity] of results) {
|
|
2092
|
-
const metadata = await this.getNoun(id);
|
|
2093
|
-
searchResults.push({
|
|
2094
|
-
id,
|
|
2095
|
-
score: similarity,
|
|
2096
|
-
vector: [],
|
|
2097
|
-
metadata: metadata?.metadata || {}
|
|
2098
|
-
});
|
|
2099
|
-
}
|
|
2100
|
-
return searchResults;
|
|
2101
|
-
}
|
|
2102
2195
|
/**
|
|
2103
|
-
*
|
|
2104
|
-
*
|
|
2196
|
+
* Check if a noun exists
|
|
2197
|
+
* @param id The noun ID
|
|
2198
|
+
* @returns True if exists
|
|
2105
2199
|
*/
|
|
2106
|
-
async
|
|
2107
|
-
|
|
2108
|
-
// Validate input is not null or undefined
|
|
2109
|
-
if (queryVectorOrData === null || queryVectorOrData === undefined) {
|
|
2110
|
-
throw new Error('Query cannot be null or undefined');
|
|
2111
|
-
}
|
|
2112
|
-
// Validate k parameter first, before any other logic
|
|
2113
|
-
if (k <= 0 || typeof k !== 'number' || isNaN(k)) {
|
|
2114
|
-
throw new Error('Parameter k must be a positive number');
|
|
2115
|
-
}
|
|
2116
|
-
if (!this.isInitialized) {
|
|
2117
|
-
throw new Error('BrainyData must be initialized before searching. Call init() first.');
|
|
2118
|
-
}
|
|
2119
|
-
// Check if database is in write-only mode
|
|
2120
|
-
this.checkWriteOnly();
|
|
2121
|
-
// If searching for verbs directly
|
|
2122
|
-
if (options.searchVerbs) {
|
|
2123
|
-
const verbResults = await this.searchVerbs(queryVectorOrData, k, {
|
|
2124
|
-
forceEmbed: options.forceEmbed,
|
|
2125
|
-
verbTypes: options.verbTypes
|
|
2126
|
-
});
|
|
2127
|
-
// Convert verb results to SearchResult format
|
|
2128
|
-
return verbResults.map((verb) => ({
|
|
2129
|
-
id: verb.id,
|
|
2130
|
-
score: verb.similarity,
|
|
2131
|
-
vector: verb.embedding || [],
|
|
2132
|
-
metadata: {
|
|
2133
|
-
verb: verb.verb,
|
|
2134
|
-
source: verb.source,
|
|
2135
|
-
target: verb.target,
|
|
2136
|
-
...verb.data
|
|
2137
|
-
}
|
|
2138
|
-
}));
|
|
2139
|
-
}
|
|
2140
|
-
// If searching for nouns connected by verbs
|
|
2141
|
-
if (options.searchConnectedNouns) {
|
|
2142
|
-
return this.searchNounsByVerbs(queryVectorOrData, k, {
|
|
2143
|
-
forceEmbed: options.forceEmbed,
|
|
2144
|
-
verbTypes: options.verbTypes,
|
|
2145
|
-
direction: options.verbDirection
|
|
2146
|
-
});
|
|
2147
|
-
}
|
|
2148
|
-
// If a specific search mode is specified, use the appropriate search method
|
|
2149
|
-
if (options.searchMode === 'local') {
|
|
2150
|
-
return this.searchLocal(queryVectorOrData, k, options);
|
|
2151
|
-
}
|
|
2152
|
-
else if (options.searchMode === 'remote') {
|
|
2153
|
-
return this.searchRemote(queryVectorOrData, k, options);
|
|
2154
|
-
}
|
|
2155
|
-
else if (options.searchMode === 'combined') {
|
|
2156
|
-
return this.searchCombined(queryVectorOrData, k, options);
|
|
2157
|
-
}
|
|
2158
|
-
// Generate deduplication key for concurrent request handling
|
|
2159
|
-
const dedupeKey = RequestDeduplicator.getSearchKey(typeof queryVectorOrData === 'string' ? queryVectorOrData : JSON.stringify(queryVectorOrData), k, options);
|
|
2160
|
-
// Use augmentation system for search (includes deduplication, batching, and caching)
|
|
2161
|
-
return this.augmentations.execute('search', { query: queryVectorOrData, k, options, dedupeKey }, async () => {
|
|
2162
|
-
// Default behavior (backward compatible): search locally
|
|
2163
|
-
try {
|
|
2164
|
-
// BEST OF BOTH: Automatically exclude soft-deleted items (Neural Intelligence improvement)
|
|
2165
|
-
// BUT only when there's already metadata filtering happening
|
|
2166
|
-
let metadataFilter = options.metadata;
|
|
2167
|
-
// Only add soft-delete filter if there's already metadata being filtered
|
|
2168
|
-
// This preserves pure vector searches without metadata
|
|
2169
|
-
if (metadataFilter && Object.keys(metadataFilter).length > 0) {
|
|
2170
|
-
// If no explicit deleted filter is provided, exclude soft-deleted items
|
|
2171
|
-
// Use namespaced field for O(1) performance
|
|
2172
|
-
if (!metadataFilter['_brainy.deleted'] && !metadataFilter.anyOf) {
|
|
2173
|
-
metadataFilter = {
|
|
2174
|
-
...metadataFilter,
|
|
2175
|
-
['_brainy.deleted']: false // O(1) positive match instead of notEquals
|
|
2176
|
-
};
|
|
2177
|
-
}
|
|
2178
|
-
}
|
|
2179
|
-
const hasMetadataFilter = metadataFilter && Object.keys(metadataFilter).length > 0;
|
|
2180
|
-
// Check cache first (transparent to user) - but skip cache if we have metadata filters
|
|
2181
|
-
if (!hasMetadataFilter) {
|
|
2182
|
-
const cacheKey = this.cache?.getCacheKey(queryVectorOrData, k, options);
|
|
2183
|
-
const cachedResults = this.cache?.get(cacheKey);
|
|
2184
|
-
if (cachedResults) {
|
|
2185
|
-
// Track cache hit in health monitor
|
|
2186
|
-
if (this.monitoring) {
|
|
2187
|
-
const latency = Date.now() - startTime;
|
|
2188
|
-
this.monitoring.recordRequest(latency, false);
|
|
2189
|
-
this.monitoring.recordCacheAccess(true);
|
|
2190
|
-
}
|
|
2191
|
-
return cachedResults;
|
|
2192
|
-
}
|
|
2193
|
-
}
|
|
2194
|
-
// Cache miss - perform actual search
|
|
2195
|
-
const results = await this.searchLocal(queryVectorOrData, k, {
|
|
2196
|
-
...options,
|
|
2197
|
-
metadata: metadataFilter
|
|
2198
|
-
});
|
|
2199
|
-
// Cache results for future queries (unless explicitly disabled or has metadata filter)
|
|
2200
|
-
if (!options.skipCache && !hasMetadataFilter) {
|
|
2201
|
-
const cacheKey = this.cache?.getCacheKey(queryVectorOrData, k, options);
|
|
2202
|
-
this.cache?.set(cacheKey, results);
|
|
2203
|
-
}
|
|
2204
|
-
// Track successful search in health monitor
|
|
2205
|
-
if (this.monitoring) {
|
|
2206
|
-
const latency = Date.now() - startTime;
|
|
2207
|
-
this.monitoring.recordRequest(latency, false);
|
|
2208
|
-
this.monitoring.recordCacheAccess(false);
|
|
2209
|
-
}
|
|
2210
|
-
return results;
|
|
2211
|
-
}
|
|
2212
|
-
catch (error) {
|
|
2213
|
-
// Track error in health monitor
|
|
2214
|
-
if (this.monitoring) {
|
|
2215
|
-
const latency = Date.now() - startTime;
|
|
2216
|
-
this.monitoring.recordRequest(latency, true);
|
|
2217
|
-
}
|
|
2218
|
-
throw error;
|
|
2219
|
-
}
|
|
2220
|
-
});
|
|
2200
|
+
async hasNoun(id) {
|
|
2201
|
+
return this.hasNoun(id);
|
|
2221
2202
|
}
|
|
2222
2203
|
/**
|
|
2223
|
-
*
|
|
2224
|
-
*
|
|
2225
|
-
* @param
|
|
2226
|
-
* @
|
|
2227
|
-
* @returns Paginated search results with cursor for next page
|
|
2204
|
+
* Get metadata for a document by ID
|
|
2205
|
+
* This is a direct storage operation that works in write-only mode when allowDirectReads is enabled
|
|
2206
|
+
* @param id The ID of the document
|
|
2207
|
+
* @returns Promise<T | null> The metadata object or null if not found
|
|
2228
2208
|
*/
|
|
2209
|
+
// Legacy getMetadata() method removed - use getNounMetadata() instead
|
|
2229
2210
|
/**
|
|
2230
|
-
*
|
|
2211
|
+
* Get multiple documents by their IDs
|
|
2212
|
+
* This is a direct storage operation that works in write-only mode when allowDirectReads is enabled
|
|
2213
|
+
* @param ids Array of IDs to retrieve
|
|
2214
|
+
* @returns Promise<Array<VectorDocument<T> | null>> Array of documents (null for missing IDs)
|
|
2215
|
+
*/
|
|
2216
|
+
/**
|
|
2217
|
+
* Get multiple nouns - by IDs, filters, or pagination
|
|
2218
|
+
* @param idsOrOptions Array of IDs or query options
|
|
2219
|
+
* @returns Array of noun documents
|
|
2220
|
+
*
|
|
2231
2221
|
* @example
|
|
2232
|
-
* //
|
|
2233
|
-
* await brain.
|
|
2234
|
-
*
|
|
2235
|
-
*
|
|
2222
|
+
* // Get by IDs
|
|
2223
|
+
* await brain.getNouns(['id1', 'id2'])
|
|
2224
|
+
*
|
|
2225
|
+
* // Get with filters
|
|
2226
|
+
* await brain.getNouns({
|
|
2227
|
+
* filter: { type: 'article' },
|
|
2228
|
+
* limit: 10
|
|
2229
|
+
* })
|
|
2230
|
+
*
|
|
2231
|
+
* // Get with pagination
|
|
2232
|
+
* await brain.getNouns({
|
|
2233
|
+
* offset: 20,
|
|
2234
|
+
* limit: 10
|
|
2235
|
+
* })
|
|
2236
2236
|
*/
|
|
2237
|
-
async
|
|
2238
|
-
//
|
|
2239
|
-
|
|
2240
|
-
|
|
2241
|
-
const { cursor, ...searchOptions } = options;
|
|
2242
|
-
const allResults = await this.search(queryVectorOrData, {
|
|
2243
|
-
limit: searchK,
|
|
2244
|
-
nounTypes: searchOptions.nounTypes,
|
|
2245
|
-
metadata: searchOptions.filter
|
|
2246
|
-
});
|
|
2247
|
-
let results = allResults;
|
|
2248
|
-
let startIndex = 0;
|
|
2249
|
-
// If cursor provided, find starting position
|
|
2250
|
-
if (options.cursor) {
|
|
2251
|
-
startIndex = allResults.findIndex((r) => r.id === options.cursor.lastId &&
|
|
2252
|
-
Math.abs(r.score - options.cursor.lastScore) < 0.0001);
|
|
2253
|
-
if (startIndex >= 0) {
|
|
2254
|
-
startIndex += 1; // Start after the cursor position
|
|
2255
|
-
results = allResults.slice(startIndex, startIndex + k);
|
|
2256
|
-
}
|
|
2257
|
-
else {
|
|
2258
|
-
// Cursor not found, might be stale - return from beginning
|
|
2259
|
-
results = allResults.slice(0, k);
|
|
2260
|
-
startIndex = 0;
|
|
2261
|
-
}
|
|
2262
|
-
}
|
|
2263
|
-
else {
|
|
2264
|
-
results = allResults.slice(0, k);
|
|
2237
|
+
async getNouns(idsOrOptions) {
|
|
2238
|
+
// Handle array of IDs
|
|
2239
|
+
if (Array.isArray(idsOrOptions)) {
|
|
2240
|
+
return this.getNounsByIds(idsOrOptions);
|
|
2265
2241
|
}
|
|
2266
|
-
//
|
|
2267
|
-
|
|
2268
|
-
|
|
2269
|
-
|
|
2270
|
-
|
|
2271
|
-
const lastResult = results[results.length - 1];
|
|
2272
|
-
nextCursor = {
|
|
2273
|
-
lastId: lastResult.id,
|
|
2274
|
-
lastScore: lastResult.score,
|
|
2275
|
-
position: startIndex + results.length
|
|
2276
|
-
};
|
|
2242
|
+
// Handle options object
|
|
2243
|
+
const options = idsOrOptions || {};
|
|
2244
|
+
// If ids are provided in options, get by IDs
|
|
2245
|
+
if (options.ids) {
|
|
2246
|
+
return this.getNounsByIds(options.ids);
|
|
2277
2247
|
}
|
|
2278
|
-
|
|
2279
|
-
|
|
2280
|
-
|
|
2281
|
-
hasMore: !!nextCursor,
|
|
2282
|
-
totalEstimate: allResults.length > searchK ? undefined : allResults.length
|
|
2283
|
-
};
|
|
2248
|
+
// Otherwise, do a filtered/paginated query and extract items
|
|
2249
|
+
const result = await this.queryNounsByFilter(options);
|
|
2250
|
+
return result.items;
|
|
2284
2251
|
}
|
|
2285
2252
|
/**
|
|
2286
|
-
*
|
|
2287
|
-
* @param queryVectorOrData Query vector or data to search for
|
|
2288
|
-
* @param k Number of results to return
|
|
2289
|
-
* @param options Additional options
|
|
2290
|
-
* @returns Array of search results
|
|
2253
|
+
* Internal: Get nouns by IDs
|
|
2291
2254
|
*/
|
|
2292
|
-
async
|
|
2293
|
-
if (!
|
|
2294
|
-
throw new Error('
|
|
2255
|
+
async getNounsByIds(ids) {
|
|
2256
|
+
if (!Array.isArray(ids)) {
|
|
2257
|
+
throw new Error('IDs must be provided as an array');
|
|
2295
2258
|
}
|
|
2296
|
-
|
|
2297
|
-
|
|
2298
|
-
|
|
2299
|
-
|
|
2300
|
-
// Handle string queries
|
|
2301
|
-
if (typeof queryVectorOrData === 'string' && !options.forceEmbed) {
|
|
2302
|
-
queryToUse = await this.embed(queryVectorOrData);
|
|
2303
|
-
options.forceEmbed = false; // Already embedded, don't force again
|
|
2259
|
+
await this.ensureInitialized();
|
|
2260
|
+
// This is a direct storage operation - check if allowed in write-only mode
|
|
2261
|
+
if (this.writeOnly && !this.allowDirectReads) {
|
|
2262
|
+
throw new Error('Cannot perform getBatch() operation: database is in write-only mode. Enable allowDirectReads for direct storage operations.');
|
|
2304
2263
|
}
|
|
2305
|
-
|
|
2306
|
-
|
|
2307
|
-
|
|
2308
|
-
|
|
2309
|
-
|
|
2310
|
-
// If searching within a specific field
|
|
2311
|
-
if (options.searchField) {
|
|
2312
|
-
// Extract text from the specific field
|
|
2313
|
-
const fieldText = extractFieldFromJson(queryVectorOrData, options.searchField);
|
|
2314
|
-
if (fieldText) {
|
|
2315
|
-
queryToUse = await this.embeddingFunction(fieldText);
|
|
2316
|
-
options.forceEmbed = false; // Already embedded, don't force again
|
|
2317
|
-
}
|
|
2318
|
-
}
|
|
2319
|
-
// Otherwise process the entire object with priority fields
|
|
2320
|
-
else {
|
|
2321
|
-
const preparedText = prepareJsonForVectorization(queryVectorOrData, {
|
|
2322
|
-
priorityFields: options.priorityFields || [
|
|
2323
|
-
'name',
|
|
2324
|
-
'title',
|
|
2325
|
-
'company',
|
|
2326
|
-
'organization',
|
|
2327
|
-
'description',
|
|
2328
|
-
'summary'
|
|
2329
|
-
]
|
|
2330
|
-
});
|
|
2331
|
-
queryToUse = await this.embeddingFunction(preparedText);
|
|
2332
|
-
options.forceEmbed = false; // Already embedded, don't force again
|
|
2333
|
-
}
|
|
2334
|
-
}
|
|
2335
|
-
// If noun types are specified, use searchByNounTypes
|
|
2336
|
-
let searchResults;
|
|
2337
|
-
if (options.nounTypes && options.nounTypes.length > 0) {
|
|
2338
|
-
searchResults = await this.searchByNounTypes(queryToUse, k, options.nounTypes, {
|
|
2339
|
-
forceEmbed: options.forceEmbed,
|
|
2340
|
-
service: options.service,
|
|
2341
|
-
metadata: options.metadata,
|
|
2342
|
-
offset: options.offset
|
|
2343
|
-
});
|
|
2344
|
-
}
|
|
2345
|
-
else {
|
|
2346
|
-
// Otherwise, search all GraphNouns
|
|
2347
|
-
searchResults = await this.searchByNounTypes(queryToUse, k, null, {
|
|
2348
|
-
forceEmbed: options.forceEmbed,
|
|
2349
|
-
service: options.service,
|
|
2350
|
-
metadata: options.metadata,
|
|
2351
|
-
offset: options.offset
|
|
2352
|
-
});
|
|
2353
|
-
}
|
|
2354
|
-
// Filter out placeholder nouns and deleted items from search results
|
|
2355
|
-
searchResults = searchResults.filter((result) => {
|
|
2356
|
-
if (result.metadata && typeof result.metadata === 'object') {
|
|
2357
|
-
const metadata = result.metadata;
|
|
2358
|
-
// Exclude deleted items from search results (soft delete)
|
|
2359
|
-
// Check namespaced field
|
|
2360
|
-
if (metadata._brainy?.deleted === true) {
|
|
2361
|
-
return false;
|
|
2362
|
-
}
|
|
2363
|
-
// Exclude placeholder nouns from search results
|
|
2364
|
-
if (metadata.isPlaceholder) {
|
|
2365
|
-
return false;
|
|
2366
|
-
}
|
|
2367
|
-
// Apply domain filter if specified
|
|
2368
|
-
if (options.filter?.domain) {
|
|
2369
|
-
if (metadata.domain !== options.filter.domain) {
|
|
2370
|
-
return false;
|
|
2371
|
-
}
|
|
2372
|
-
}
|
|
2373
|
-
}
|
|
2374
|
-
return true;
|
|
2375
|
-
});
|
|
2376
|
-
// If includeVerbs is true, retrieve associated GraphVerbs for each result
|
|
2377
|
-
if (options.includeVerbs && this.storage) {
|
|
2378
|
-
for (const result of searchResults) {
|
|
2379
|
-
try {
|
|
2380
|
-
// Get outgoing verbs for this noun
|
|
2381
|
-
const outgoingVerbs = await this.storage.getVerbsBySource(result.id);
|
|
2382
|
-
// Get incoming verbs for this noun
|
|
2383
|
-
const incomingVerbs = await this.storage.getVerbsByTarget(result.id);
|
|
2384
|
-
// Combine all verbs
|
|
2385
|
-
const allVerbs = [...outgoingVerbs, ...incomingVerbs];
|
|
2386
|
-
// Add verbs to the result metadata
|
|
2387
|
-
if (!result.metadata) {
|
|
2388
|
-
result.metadata = {};
|
|
2389
|
-
}
|
|
2390
|
-
// Add the verbs to the metadata
|
|
2391
|
-
;
|
|
2392
|
-
result.metadata.associatedVerbs = allVerbs;
|
|
2393
|
-
}
|
|
2394
|
-
catch (error) {
|
|
2395
|
-
console.warn(`Failed to retrieve verbs for noun ${result.id}:`, error);
|
|
2396
|
-
}
|
|
2397
|
-
}
|
|
2398
|
-
}
|
|
2399
|
-
return searchResults;
|
|
2400
|
-
}
|
|
2401
|
-
/**
|
|
2402
|
-
* Find entities similar to a given entity ID
|
|
2403
|
-
* @param id ID of the entity to find similar entities for
|
|
2404
|
-
* @param options Additional options
|
|
2405
|
-
* @returns Array of search results with similarity scores
|
|
2406
|
-
*/
|
|
2407
|
-
async findSimilar(id, options = {}) {
|
|
2408
|
-
await this.ensureInitialized();
|
|
2409
|
-
// Get the entity by ID
|
|
2410
|
-
const entity = await this.getNoun(id);
|
|
2411
|
-
if (!entity) {
|
|
2412
|
-
throw new Error(`Entity with ID ${id} not found`);
|
|
2413
|
-
}
|
|
2414
|
-
// If relationType is specified, directly get related entities by that type
|
|
2415
|
-
if (options.relationType) {
|
|
2416
|
-
// Get all verbs (relationships) from the source entity
|
|
2417
|
-
const outgoingVerbs = await this.storage.getVerbsBySource(id);
|
|
2418
|
-
// Filter to only include verbs of the specified type
|
|
2419
|
-
const verbsOfType = outgoingVerbs.filter((verb) => verb.type === options.relationType);
|
|
2420
|
-
// Get the target IDs
|
|
2421
|
-
const targetIds = verbsOfType.map((verb) => verb.target);
|
|
2422
|
-
// Get the actual entities for these IDs
|
|
2423
|
-
const results = [];
|
|
2424
|
-
for (const targetId of targetIds) {
|
|
2425
|
-
// Skip undefined targetIds
|
|
2426
|
-
if (typeof targetId !== 'string')
|
|
2427
|
-
continue;
|
|
2428
|
-
const targetEntity = await this.getNoun(targetId);
|
|
2429
|
-
if (targetEntity) {
|
|
2430
|
-
results.push({
|
|
2431
|
-
id: targetId,
|
|
2432
|
-
score: 1.0, // Default similarity score
|
|
2433
|
-
vector: targetEntity.vector,
|
|
2434
|
-
metadata: targetEntity.metadata
|
|
2435
|
-
});
|
|
2436
|
-
}
|
|
2437
|
-
}
|
|
2438
|
-
// Return the results, limited to the requested number
|
|
2439
|
-
return results.slice(0, options.limit || 10);
|
|
2440
|
-
}
|
|
2441
|
-
// If no relationType is specified, use the original vector similarity search
|
|
2442
|
-
const k = (options.limit || 10) + 1; // Add 1 to account for the original entity
|
|
2443
|
-
const searchResults = await this.search(entity.vector, {
|
|
2444
|
-
limit: k,
|
|
2445
|
-
excludeDeleted: false,
|
|
2446
|
-
nounTypes: options.nounTypes
|
|
2447
|
-
});
|
|
2448
|
-
// Filter out the original entity and limit to the requested number
|
|
2449
|
-
return searchResults
|
|
2450
|
-
.filter((result) => result.id !== id)
|
|
2451
|
-
.slice(0, options.limit || 10);
|
|
2452
|
-
}
|
|
2453
|
-
/**
|
|
2454
|
-
* Get a vector by ID
|
|
2455
|
-
*/
|
|
2456
|
-
// Legacy get() method removed - use getNoun() instead
|
|
2457
|
-
/**
|
|
2458
|
-
* Check if a document with the given ID exists
|
|
2459
|
-
* This is a direct storage operation that works in write-only mode when allowDirectReads is enabled
|
|
2460
|
-
* @param id The ID to check for existence
|
|
2461
|
-
* @returns Promise<boolean> True if the document exists, false otherwise
|
|
2462
|
-
*/
|
|
2463
|
-
async has(id) {
|
|
2464
|
-
if (id === null || id === undefined) {
|
|
2465
|
-
throw new Error('ID cannot be null or undefined');
|
|
2466
|
-
}
|
|
2467
|
-
await this.ensureInitialized();
|
|
2468
|
-
// This is a direct storage operation - check if allowed in write-only mode
|
|
2469
|
-
if (this.writeOnly && !this.allowDirectReads) {
|
|
2470
|
-
throw new Error('Cannot perform has() operation: database is in write-only mode. Enable allowDirectReads for direct storage operations.');
|
|
2471
|
-
}
|
|
2472
|
-
try {
|
|
2473
|
-
// Always query storage directly for existence check
|
|
2474
|
-
const noun = await this.storage.getNoun(id);
|
|
2475
|
-
return noun !== null;
|
|
2476
|
-
}
|
|
2477
|
-
catch (error) {
|
|
2478
|
-
// If storage lookup fails, the item doesn't exist
|
|
2479
|
-
return false;
|
|
2480
|
-
}
|
|
2481
|
-
}
|
|
2482
|
-
/**
|
|
2483
|
-
* Check if a document with the given ID exists (alias for has)
|
|
2484
|
-
* This is a direct storage operation that works in write-only mode when allowDirectReads is enabled
|
|
2485
|
-
* @param id The ID to check for existence
|
|
2486
|
-
* @returns Promise<boolean> True if the document exists, false otherwise
|
|
2487
|
-
*/
|
|
2488
|
-
/**
|
|
2489
|
-
* Check if a noun exists
|
|
2490
|
-
* @param id The noun ID
|
|
2491
|
-
* @returns True if exists
|
|
2492
|
-
*/
|
|
2493
|
-
async hasNoun(id) {
|
|
2494
|
-
return this.hasNoun(id);
|
|
2495
|
-
}
|
|
2496
|
-
/**
|
|
2497
|
-
* Get metadata for a document by ID
|
|
2498
|
-
* This is a direct storage operation that works in write-only mode when allowDirectReads is enabled
|
|
2499
|
-
* @param id The ID of the document
|
|
2500
|
-
* @returns Promise<T | null> The metadata object or null if not found
|
|
2501
|
-
*/
|
|
2502
|
-
// Legacy getMetadata() method removed - use getNounMetadata() instead
|
|
2503
|
-
/**
|
|
2504
|
-
* Get multiple documents by their IDs
|
|
2505
|
-
* This is a direct storage operation that works in write-only mode when allowDirectReads is enabled
|
|
2506
|
-
* @param ids Array of IDs to retrieve
|
|
2507
|
-
* @returns Promise<Array<VectorDocument<T> | null>> Array of documents (null for missing IDs)
|
|
2508
|
-
*/
|
|
2509
|
-
/**
|
|
2510
|
-
* Get multiple nouns - by IDs, filters, or pagination
|
|
2511
|
-
* @param idsOrOptions Array of IDs or query options
|
|
2512
|
-
* @returns Array of noun documents
|
|
2513
|
-
*
|
|
2514
|
-
* @example
|
|
2515
|
-
* // Get by IDs
|
|
2516
|
-
* await brain.getNouns(['id1', 'id2'])
|
|
2517
|
-
*
|
|
2518
|
-
* // Get with filters
|
|
2519
|
-
* await brain.getNouns({
|
|
2520
|
-
* filter: { type: 'article' },
|
|
2521
|
-
* limit: 10
|
|
2522
|
-
* })
|
|
2523
|
-
*
|
|
2524
|
-
* // Get with pagination
|
|
2525
|
-
* await brain.getNouns({
|
|
2526
|
-
* offset: 20,
|
|
2527
|
-
* limit: 10
|
|
2528
|
-
* })
|
|
2529
|
-
*/
|
|
2530
|
-
async getNouns(idsOrOptions) {
|
|
2531
|
-
// Handle array of IDs
|
|
2532
|
-
if (Array.isArray(idsOrOptions)) {
|
|
2533
|
-
return this.getNounsByIds(idsOrOptions);
|
|
2534
|
-
}
|
|
2535
|
-
// Handle options object
|
|
2536
|
-
const options = idsOrOptions || {};
|
|
2537
|
-
// If ids are provided in options, get by IDs
|
|
2538
|
-
if (options.ids) {
|
|
2539
|
-
return this.getNounsByIds(options.ids);
|
|
2540
|
-
}
|
|
2541
|
-
// Otherwise, do a filtered/paginated query and extract items
|
|
2542
|
-
const result = await this.queryNounsByFilter(options);
|
|
2543
|
-
return result.items;
|
|
2544
|
-
}
|
|
2545
|
-
/**
|
|
2546
|
-
* Internal: Get nouns by IDs
|
|
2547
|
-
*/
|
|
2548
|
-
async getNounsByIds(ids) {
|
|
2549
|
-
if (!Array.isArray(ids)) {
|
|
2550
|
-
throw new Error('IDs must be provided as an array');
|
|
2551
|
-
}
|
|
2552
|
-
await this.ensureInitialized();
|
|
2553
|
-
// This is a direct storage operation - check if allowed in write-only mode
|
|
2554
|
-
if (this.writeOnly && !this.allowDirectReads) {
|
|
2555
|
-
throw new Error('Cannot perform getBatch() operation: database is in write-only mode. Enable allowDirectReads for direct storage operations.');
|
|
2556
|
-
}
|
|
2557
|
-
const results = [];
|
|
2558
|
-
for (const id of ids) {
|
|
2559
|
-
if (id === null || id === undefined) {
|
|
2560
|
-
results.push(null);
|
|
2561
|
-
continue;
|
|
2264
|
+
const results = [];
|
|
2265
|
+
for (const id of ids) {
|
|
2266
|
+
if (id === null || id === undefined) {
|
|
2267
|
+
results.push(null);
|
|
2268
|
+
continue;
|
|
2562
2269
|
}
|
|
2563
2270
|
try {
|
|
2564
2271
|
const result = await this.getNoun(id);
|
|
@@ -4690,8 +4397,12 @@ export class BrainyData {
|
|
|
4690
4397
|
noun.vector = await this.embeddingFunction(noun.metadata);
|
|
4691
4398
|
}
|
|
4692
4399
|
}
|
|
4400
|
+
// Extract type from metadata or default to Content
|
|
4401
|
+
const nounType = (noun.metadata && typeof noun.metadata === 'object' && 'noun' in noun.metadata)
|
|
4402
|
+
? noun.metadata.noun
|
|
4403
|
+
: NounType.Content;
|
|
4693
4404
|
// Add the noun with its vector and metadata (custom ID not supported)
|
|
4694
|
-
await this.addNoun(noun.vector, noun.metadata);
|
|
4405
|
+
await this.addNoun(noun.vector, nounType, noun.metadata);
|
|
4695
4406
|
nounsRestored++;
|
|
4696
4407
|
}
|
|
4697
4408
|
catch (error) {
|
|
@@ -4847,8 +4558,8 @@ export class BrainyData {
|
|
|
4847
4558
|
tags: [`tag-${i % 5}`, `category-${i % 3}`]
|
|
4848
4559
|
}
|
|
4849
4560
|
};
|
|
4850
|
-
// Add the noun
|
|
4851
|
-
const id = await this.addNoun(metadata.description, metadata);
|
|
4561
|
+
// Add the noun with explicit type
|
|
4562
|
+
const id = await this.addNoun(metadata.description, nounType, metadata);
|
|
4852
4563
|
nounIds.push(id);
|
|
4853
4564
|
}
|
|
4854
4565
|
// Generate random verbs between nouns
|
|
@@ -5038,8 +4749,7 @@ export class BrainyData {
|
|
|
5038
4749
|
const configValue = options?.encrypt ? await this.encryptData(JSON.stringify(value)) : value;
|
|
5039
4750
|
// Use simple text for vectorization
|
|
5040
4751
|
const searchableText = `Configuration setting for ${key}`;
|
|
5041
|
-
await this.addNoun(searchableText, {
|
|
5042
|
-
nounType: NounType.State,
|
|
4752
|
+
await this.addNoun(searchableText, NounType.State, {
|
|
5043
4753
|
configKey: key,
|
|
5044
4754
|
configValue: configValue,
|
|
5045
4755
|
encrypted: !!options?.encrypt,
|
|
@@ -5171,15 +4881,312 @@ export class BrainyData {
|
|
|
5171
4881
|
* @returns Created noun ID
|
|
5172
4882
|
*/
|
|
5173
4883
|
/**
|
|
5174
|
-
* Add a noun to the database
|
|
4884
|
+
* Add a noun to the database with required type
|
|
5175
4885
|
* Clean 2.0 API - primary method for adding data
|
|
5176
4886
|
*
|
|
5177
4887
|
* @param vectorOrData Vector array or data to embed
|
|
5178
|
-
* @param
|
|
4888
|
+
* @param nounType Required noun type (one of 31 types)
|
|
4889
|
+
* @param metadata Optional metadata object
|
|
5179
4890
|
* @returns The generated ID
|
|
5180
4891
|
*/
|
|
5181
|
-
async addNoun(vectorOrData, metadata) {
|
|
5182
|
-
|
|
4892
|
+
async addNoun(vectorOrData, nounType, metadata, options = {}) {
|
|
4893
|
+
// Validate noun type
|
|
4894
|
+
const validatedType = validateNounType(nounType);
|
|
4895
|
+
// Enrich metadata with validated type
|
|
4896
|
+
let enrichedMetadata = {
|
|
4897
|
+
...metadata,
|
|
4898
|
+
noun: validatedType
|
|
4899
|
+
};
|
|
4900
|
+
await this.ensureInitialized();
|
|
4901
|
+
// Check if database is in read-only mode
|
|
4902
|
+
this.checkReadOnly();
|
|
4903
|
+
// Validate input is not null or undefined
|
|
4904
|
+
if (vectorOrData === null || vectorOrData === undefined) {
|
|
4905
|
+
throw new Error('Input cannot be null or undefined');
|
|
4906
|
+
}
|
|
4907
|
+
try {
|
|
4908
|
+
let vector;
|
|
4909
|
+
if (Array.isArray(vectorOrData)) {
|
|
4910
|
+
for (let i = 0; i < vectorOrData.length; i++) {
|
|
4911
|
+
if (typeof vectorOrData[i] !== 'number') {
|
|
4912
|
+
throw new Error('Vector contains non-numeric values');
|
|
4913
|
+
}
|
|
4914
|
+
}
|
|
4915
|
+
}
|
|
4916
|
+
// Check if input is already a vector
|
|
4917
|
+
if (Array.isArray(vectorOrData) && !options.forceEmbed) {
|
|
4918
|
+
// Input is already a vector (and we've validated it contains only numbers)
|
|
4919
|
+
vector = vectorOrData;
|
|
4920
|
+
}
|
|
4921
|
+
else {
|
|
4922
|
+
// Input needs to be vectorized
|
|
4923
|
+
try {
|
|
4924
|
+
// Check if input is a JSON object and process it specially
|
|
4925
|
+
if (typeof vectorOrData === 'object' &&
|
|
4926
|
+
vectorOrData !== null &&
|
|
4927
|
+
!Array.isArray(vectorOrData)) {
|
|
4928
|
+
// Process JSON object for better vectorization
|
|
4929
|
+
const preparedText = prepareJsonForVectorization(vectorOrData, {
|
|
4930
|
+
// Prioritize common name/title fields if they exist
|
|
4931
|
+
priorityFields: [
|
|
4932
|
+
'name',
|
|
4933
|
+
'title',
|
|
4934
|
+
'company',
|
|
4935
|
+
'organization',
|
|
4936
|
+
'description',
|
|
4937
|
+
'summary'
|
|
4938
|
+
]
|
|
4939
|
+
});
|
|
4940
|
+
vector = await this.embeddingFunction(preparedText);
|
|
4941
|
+
// IMPORTANT: When an object is passed as data and no metadata is provided,
|
|
4942
|
+
// use the object AS the metadata too. This is expected behavior for the API.
|
|
4943
|
+
// Users can pass either:
|
|
4944
|
+
// 1. addNoun(string, metadata) - vectorize string, store metadata
|
|
4945
|
+
// 2. addNoun(object) - vectorize object text, store object as metadata
|
|
4946
|
+
// 3. addNoun(object, metadata) - vectorize object text, store provided metadata
|
|
4947
|
+
if (!enrichedMetadata || Object.keys(enrichedMetadata).length === 1) { // Only has 'noun' key
|
|
4948
|
+
enrichedMetadata = { ...vectorOrData, noun: validatedType };
|
|
4949
|
+
}
|
|
4950
|
+
// Track field names for this JSON document
|
|
4951
|
+
const service = this.getServiceName(options);
|
|
4952
|
+
if (this.storage) {
|
|
4953
|
+
await this.storage.trackFieldNames(vectorOrData, service);
|
|
4954
|
+
}
|
|
4955
|
+
}
|
|
4956
|
+
else {
|
|
4957
|
+
// Use standard embedding for non-JSON data
|
|
4958
|
+
vector = await this.embeddingFunction(vectorOrData);
|
|
4959
|
+
}
|
|
4960
|
+
}
|
|
4961
|
+
catch (embedError) {
|
|
4962
|
+
throw new Error(`Failed to vectorize data: ${embedError}`);
|
|
4963
|
+
}
|
|
4964
|
+
}
|
|
4965
|
+
// Check if vector is defined
|
|
4966
|
+
if (!vector) {
|
|
4967
|
+
throw new Error('Vector is undefined or null');
|
|
4968
|
+
}
|
|
4969
|
+
// Validate vector dimensions
|
|
4970
|
+
if (vector.length !== this._dimensions) {
|
|
4971
|
+
throw new Error(`Vector dimension mismatch: expected ${this._dimensions}, got ${vector.length}`);
|
|
4972
|
+
}
|
|
4973
|
+
// Use ID from options if it exists, otherwise from metadata, otherwise generate a new UUID
|
|
4974
|
+
const id = options.id ||
|
|
4975
|
+
(enrichedMetadata && typeof enrichedMetadata === 'object' && 'id' in enrichedMetadata
|
|
4976
|
+
? enrichedMetadata.id
|
|
4977
|
+
: uuidv4());
|
|
4978
|
+
// Check for existing noun (both write-only and normal modes)
|
|
4979
|
+
let existingNoun;
|
|
4980
|
+
if (options.id) {
|
|
4981
|
+
try {
|
|
4982
|
+
if (this.writeOnly) {
|
|
4983
|
+
// In write-only mode, check storage directly
|
|
4984
|
+
existingNoun =
|
|
4985
|
+
(await this.storage.getNoun(options.id)) ?? undefined;
|
|
4986
|
+
}
|
|
4987
|
+
else {
|
|
4988
|
+
// In normal mode, check index first, then storage
|
|
4989
|
+
existingNoun = this.index.getNouns().get(options.id);
|
|
4990
|
+
if (!existingNoun) {
|
|
4991
|
+
existingNoun =
|
|
4992
|
+
(await this.storage.getNoun(options.id)) ?? undefined;
|
|
4993
|
+
}
|
|
4994
|
+
}
|
|
4995
|
+
if (existingNoun) {
|
|
4996
|
+
// Check if existing noun is a placeholder
|
|
4997
|
+
const existingMetadata = await this.storage.getMetadata(options.id);
|
|
4998
|
+
const isPlaceholder = existingMetadata &&
|
|
4999
|
+
typeof existingMetadata === 'object' &&
|
|
5000
|
+
existingMetadata.isPlaceholder;
|
|
5001
|
+
if (isPlaceholder) {
|
|
5002
|
+
// Replace placeholder with real data
|
|
5003
|
+
if (this.loggingConfig?.verbose) {
|
|
5004
|
+
console.log(`Replacing placeholder noun ${options.id} with real data`);
|
|
5005
|
+
}
|
|
5006
|
+
}
|
|
5007
|
+
else {
|
|
5008
|
+
// Real noun already exists, update it
|
|
5009
|
+
if (this.loggingConfig?.verbose) {
|
|
5010
|
+
console.log(`Updating existing noun ${options.id}`);
|
|
5011
|
+
}
|
|
5012
|
+
}
|
|
5013
|
+
}
|
|
5014
|
+
}
|
|
5015
|
+
catch (storageError) {
|
|
5016
|
+
// Item doesn't exist, continue with add operation
|
|
5017
|
+
}
|
|
5018
|
+
}
|
|
5019
|
+
let noun;
|
|
5020
|
+
// In write-only mode, skip index operations since index is not loaded
|
|
5021
|
+
if (this.writeOnly) {
|
|
5022
|
+
// Create noun object directly without adding to index
|
|
5023
|
+
noun = {
|
|
5024
|
+
id,
|
|
5025
|
+
vector,
|
|
5026
|
+
connections: new Map(),
|
|
5027
|
+
level: 0, // Default level for new nodes
|
|
5028
|
+
metadata: undefined // Will be set separately
|
|
5029
|
+
};
|
|
5030
|
+
}
|
|
5031
|
+
else {
|
|
5032
|
+
// Normal mode: Add to HNSW index first
|
|
5033
|
+
await this.hnswIndex.addItem({ id, vector, metadata: enrichedMetadata });
|
|
5034
|
+
// Get the noun from the HNSW index
|
|
5035
|
+
const indexNoun = this.hnswIndex.getNouns().get(id);
|
|
5036
|
+
if (!indexNoun) {
|
|
5037
|
+
throw new Error(`Failed to retrieve newly created noun with ID ${id}`);
|
|
5038
|
+
}
|
|
5039
|
+
noun = indexNoun;
|
|
5040
|
+
}
|
|
5041
|
+
// Save noun to storage using augmentation system
|
|
5042
|
+
await this.augmentations.execute('saveNoun', { noun, options }, async () => {
|
|
5043
|
+
await this.storage.saveNoun(noun);
|
|
5044
|
+
const service = this.getServiceName(options);
|
|
5045
|
+
await this.storage.incrementStatistic('noun', service);
|
|
5046
|
+
});
|
|
5047
|
+
// Save metadata if provided and not empty
|
|
5048
|
+
if (enrichedMetadata !== undefined) {
|
|
5049
|
+
// Skip saving if metadata is an empty object
|
|
5050
|
+
if (enrichedMetadata &&
|
|
5051
|
+
typeof enrichedMetadata === 'object' &&
|
|
5052
|
+
Object.keys(enrichedMetadata).length === 0) {
|
|
5053
|
+
// Don't save empty metadata
|
|
5054
|
+
// Explicitly save null to ensure no metadata is stored
|
|
5055
|
+
await this.storage.saveMetadata(id, null);
|
|
5056
|
+
}
|
|
5057
|
+
else {
|
|
5058
|
+
// Validate noun type if metadata is for a GraphNoun
|
|
5059
|
+
if (enrichedMetadata && typeof enrichedMetadata === 'object' && 'noun' in enrichedMetadata) {
|
|
5060
|
+
const nounType = enrichedMetadata.noun;
|
|
5061
|
+
// Check if the noun type is valid
|
|
5062
|
+
const isValidNounType = Object.values(NounType).includes(nounType);
|
|
5063
|
+
if (!isValidNounType) {
|
|
5064
|
+
console.warn(`Invalid noun type: ${nounType}. Falling back to GraphNoun.`);
|
|
5065
|
+
enrichedMetadata.noun = NounType.Concept;
|
|
5066
|
+
}
|
|
5067
|
+
// Ensure createdBy field is populated for GraphNoun
|
|
5068
|
+
const service = options.service || this.getCurrentAugmentation();
|
|
5069
|
+
const graphNoun = enrichedMetadata;
|
|
5070
|
+
// Only set createdBy if it doesn't exist or is being explicitly updated
|
|
5071
|
+
if (!graphNoun.createdBy || options.service) {
|
|
5072
|
+
graphNoun.createdBy = getAugmentationVersion(service);
|
|
5073
|
+
}
|
|
5074
|
+
// Update timestamps
|
|
5075
|
+
const now = new Date();
|
|
5076
|
+
const timestamp = {
|
|
5077
|
+
seconds: Math.floor(now.getTime() / 1000),
|
|
5078
|
+
nanoseconds: (now.getTime() % 1000) * 1000000
|
|
5079
|
+
};
|
|
5080
|
+
// Set createdAt if it doesn't exist
|
|
5081
|
+
if (!graphNoun.createdAt) {
|
|
5082
|
+
graphNoun.createdAt = timestamp;
|
|
5083
|
+
}
|
|
5084
|
+
// Always update updatedAt
|
|
5085
|
+
graphNoun.updatedAt = timestamp;
|
|
5086
|
+
}
|
|
5087
|
+
// Create properly namespaced metadata for new items
|
|
5088
|
+
let metadataToSave = createNamespacedMetadata(enrichedMetadata);
|
|
5089
|
+
// Add domain metadata if distributed mode is enabled
|
|
5090
|
+
if (this.domainDetector) {
|
|
5091
|
+
// First check if domain is already in metadata
|
|
5092
|
+
if (metadataToSave.domain) {
|
|
5093
|
+
// Domain already specified, keep it
|
|
5094
|
+
const domainInfo = this.domainDetector.detectDomain(metadataToSave);
|
|
5095
|
+
if (domainInfo.domainMetadata) {
|
|
5096
|
+
;
|
|
5097
|
+
metadataToSave.domainMetadata =
|
|
5098
|
+
domainInfo.domainMetadata;
|
|
5099
|
+
}
|
|
5100
|
+
}
|
|
5101
|
+
else {
|
|
5102
|
+
// Try to detect domain from the data
|
|
5103
|
+
const dataToAnalyze = Array.isArray(vectorOrData)
|
|
5104
|
+
? enrichedMetadata
|
|
5105
|
+
: vectorOrData;
|
|
5106
|
+
const domainInfo = this.domainDetector.detectDomain(dataToAnalyze);
|
|
5107
|
+
if (domainInfo.domain) {
|
|
5108
|
+
;
|
|
5109
|
+
metadataToSave.domain = domainInfo.domain;
|
|
5110
|
+
if (domainInfo.domainMetadata) {
|
|
5111
|
+
;
|
|
5112
|
+
metadataToSave.domainMetadata =
|
|
5113
|
+
domainInfo.domainMetadata;
|
|
5114
|
+
}
|
|
5115
|
+
}
|
|
5116
|
+
}
|
|
5117
|
+
}
|
|
5118
|
+
// Add partition information if distributed mode is enabled
|
|
5119
|
+
if (this.partitioner) {
|
|
5120
|
+
const partition = this.partitioner.getPartition(id);
|
|
5121
|
+
metadataToSave.partition = partition;
|
|
5122
|
+
}
|
|
5123
|
+
await this.storage.saveMetadata(id, metadataToSave);
|
|
5124
|
+
// Update metadata index (write-only mode should build indices!)
|
|
5125
|
+
if (this.index && !this.frozen) {
|
|
5126
|
+
await this.metadataIndex?.addToIndex?.(id, metadataToSave);
|
|
5127
|
+
}
|
|
5128
|
+
// Track metadata statistics
|
|
5129
|
+
const metadataService = this.getServiceName(options);
|
|
5130
|
+
await this.storage.incrementStatistic('metadata', metadataService);
|
|
5131
|
+
// Content type tracking removed - metrics system not initialized
|
|
5132
|
+
// Track update timestamp (handled by metrics augmentation)
|
|
5133
|
+
}
|
|
5134
|
+
}
|
|
5135
|
+
// Update HNSW index size with actual index size
|
|
5136
|
+
const indexSize = this.index.size();
|
|
5137
|
+
await this.storage.updateHnswIndexSize(indexSize);
|
|
5138
|
+
// Update health metrics if in distributed mode
|
|
5139
|
+
if (this.monitoring) {
|
|
5140
|
+
const vectorCount = await this.getNounCount();
|
|
5141
|
+
this.monitoring.updateVectorCount(vectorCount);
|
|
5142
|
+
}
|
|
5143
|
+
// If addToRemote is true and we're connected to a remote server, add to remote as well
|
|
5144
|
+
if (options.addToRemote && this.isConnectedToRemoteServer()) {
|
|
5145
|
+
try {
|
|
5146
|
+
await this.addToRemote(id, vector, enrichedMetadata);
|
|
5147
|
+
}
|
|
5148
|
+
catch (remoteError) {
|
|
5149
|
+
console.warn(`Failed to add to remote server: ${remoteError}. Continuing with local add.`);
|
|
5150
|
+
}
|
|
5151
|
+
}
|
|
5152
|
+
// Invalidate search cache since data has changed
|
|
5153
|
+
this.cache?.invalidateOnDataChange('add');
|
|
5154
|
+
// Determine processing mode
|
|
5155
|
+
const processingMode = options.process || 'auto';
|
|
5156
|
+
let shouldProcessNeurally = false;
|
|
5157
|
+
if (processingMode === 'neural') {
|
|
5158
|
+
shouldProcessNeurally = true;
|
|
5159
|
+
}
|
|
5160
|
+
else if (processingMode === 'auto') {
|
|
5161
|
+
// Auto-detect whether to use neural processing
|
|
5162
|
+
shouldProcessNeurally = this.shouldAutoProcessNeurally(vectorOrData, enrichedMetadata);
|
|
5163
|
+
}
|
|
5164
|
+
// 'literal' mode means no neural processing
|
|
5165
|
+
// 🧠 AI Processing (Neural Import) - Based on processing mode
|
|
5166
|
+
if (shouldProcessNeurally) {
|
|
5167
|
+
try {
|
|
5168
|
+
// Execute augmentation pipeline for data processing
|
|
5169
|
+
// Note: Augmentations will be called via this.augmentations.execute during the actual add operation
|
|
5170
|
+
// This replaces the legacy SENSE pipeline
|
|
5171
|
+
if (this.loggingConfig?.verbose) {
|
|
5172
|
+
console.log(`🧠 AI processing completed for data: ${id}`);
|
|
5173
|
+
}
|
|
5174
|
+
}
|
|
5175
|
+
catch (processingError) {
|
|
5176
|
+
// Don't fail the add operation if processing fails
|
|
5177
|
+
console.warn(`🧠 AI processing failed for ${id}:`, processingError);
|
|
5178
|
+
}
|
|
5179
|
+
}
|
|
5180
|
+
return id;
|
|
5181
|
+
}
|
|
5182
|
+
catch (error) {
|
|
5183
|
+
console.error('Failed to add vector:', error);
|
|
5184
|
+
// Track error in health monitor
|
|
5185
|
+
if (this.monitoring) {
|
|
5186
|
+
this.monitoring.recordRequest(0, true);
|
|
5187
|
+
}
|
|
5188
|
+
throw new Error(`Failed to add vector: ${error}`);
|
|
5189
|
+
}
|
|
5183
5190
|
}
|
|
5184
5191
|
/**
|
|
5185
5192
|
* Add Verb - Unified relationship creation between nouns
|
|
@@ -5514,10 +5521,10 @@ export class BrainyData {
|
|
|
5514
5521
|
}
|
|
5515
5522
|
};
|
|
5516
5523
|
// Store coordination plan in _system directory
|
|
5517
|
-
await this.addNoun({
|
|
5524
|
+
await this.addNoun('Cortex coordination plan', NounType.Process, {
|
|
5518
5525
|
id: '_system/coordination',
|
|
5519
5526
|
type: 'cortex_coordination',
|
|
5520
|
-
|
|
5527
|
+
...coordinationPlan
|
|
5521
5528
|
});
|
|
5522
5529
|
prodLog.info('📋 Storage migration coordination plan created');
|
|
5523
5530
|
prodLog.info('All services will automatically detect and execute the migration');
|