@soulcraft/brainy 2.10.0 → 2.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +10 -10
  2. package/dist/augmentations/apiServerAugmentation.js +2 -2
  3. package/dist/augmentations/display/fieldPatterns.d.ts +1 -1
  4. package/dist/augmentations/display/fieldPatterns.js +1 -1
  5. package/dist/augmentations/display/intelligentComputation.d.ts +2 -2
  6. package/dist/augmentations/display/intelligentComputation.js +4 -4
  7. package/dist/augmentations/display/types.d.ts +1 -1
  8. package/dist/augmentations/neuralImport.js +4 -4
  9. package/dist/augmentations/synapseAugmentation.js +3 -3
  10. package/dist/augmentations/typeMatching/brainyTypes.d.ts +83 -0
  11. package/dist/augmentations/typeMatching/brainyTypes.js +425 -0
  12. package/dist/augmentations/universalDisplayAugmentation.d.ts +1 -1
  13. package/dist/augmentations/universalDisplayAugmentation.js +1 -1
  14. package/dist/brainyData.d.ts +15 -33
  15. package/dist/brainyData.js +1210 -1203
  16. package/dist/chat/BrainyChat.js +11 -11
  17. package/dist/examples/basicUsage.js +4 -1
  18. package/dist/importManager.js +2 -2
  19. package/dist/index.d.ts +3 -1
  20. package/dist/index.js +5 -1
  21. package/dist/neural/embeddedPatterns.d.ts +1 -1
  22. package/dist/neural/embeddedPatterns.js +2 -2
  23. package/dist/storage/adapters/fileSystemStorage.d.ts +2 -2
  24. package/dist/storage/adapters/fileSystemStorage.js +2 -2
  25. package/dist/storage/adapters/memoryStorage.d.ts +4 -4
  26. package/dist/storage/adapters/memoryStorage.js +4 -4
  27. package/dist/storage/adapters/opfsStorage.d.ts +2 -2
  28. package/dist/storage/adapters/opfsStorage.js +2 -2
  29. package/dist/storage/adapters/s3CompatibleStorage.d.ts +2 -2
  30. package/dist/storage/adapters/s3CompatibleStorage.js +2 -2
  31. package/dist/storage/baseStorage.d.ts +12 -2
  32. package/dist/storage/baseStorage.js +32 -0
  33. package/dist/types/brainyDataInterface.d.ts +2 -5
  34. package/dist/utils/brainyTypes.d.ts +217 -0
  35. package/dist/utils/brainyTypes.js +261 -0
  36. package/dist/utils/typeValidation.d.ts +25 -0
  37. package/dist/utils/typeValidation.js +127 -0
  38. package/package.json +1 -1
@@ -12,6 +12,7 @@ import { enforceNodeVersion } from './utils/nodeVersionCheck.js';
12
12
  import { createNamespacedMetadata, updateNamespacedMetadata, markDeleted, markRestored, isDeleted, getUserMetadata } from './utils/metadataNamespace.js';
13
13
  import { PeriodicCleanup } from './utils/periodicCleanup.js';
14
14
  import { NounType, VerbType } from './types/graphTypes.js';
15
+ import { validateNounType } from './utils/typeValidation.js';
15
16
  import { createServerSearchAugmentations } from './augmentations/serverSearchAugmentations.js';
16
17
  import { augmentationPipeline } from './augmentationPipeline.js';
17
18
  import { prodLog } from './utils/logger.js';
@@ -127,7 +128,7 @@ export class BrainyData {
127
128
  this.operationalMode = null;
128
129
  this.domainDetector = null;
129
130
  // Enforce Node.js version requirement for ONNX stability
130
- if (typeof process !== 'undefined' && process.version) {
131
+ if (typeof process !== 'undefined' && process.version && !process.env.BRAINY_SKIP_VERSION_CHECK) {
131
132
  enforceNodeVersion();
132
133
  }
133
134
  // Store raw config for processing in init()
@@ -1248,1317 +1249,1023 @@ export class BrainyData {
1248
1249
  throw new Error(`Failed to connect to remote server: ${error}`);
1249
1250
  }
1250
1251
  }
1252
+ // REMOVED: addItem() - Use addNoun() instead (cleaner 2.0 API)
1253
+ // REMOVED: addToBoth() - Remote server functionality moved to post-2.0.0
1251
1254
  /**
1252
- * Add data to the database with intelligent processing
1253
- *
1254
- * @param vectorOrData Vector or data to add
1255
- * @param metadata Optional metadata to associate with the data
1256
- * @param options Additional options for processing
1257
- * @returns The ID of the added data
1258
- *
1259
- * @example
1260
- * // Auto mode - intelligently decides processing
1261
- * await brainy.add("Customer feedback: Great product!")
1262
- *
1263
- * @example
1264
- * // Explicit literal mode for sensitive data
1265
- * await brainy.add("API_KEY=secret123", null, { process: 'literal' })
1266
- *
1267
- * @example
1268
- * // Force neural processing
1269
- * await brainy.add("John works at Acme Corp", null, { process: 'neural' })
1255
+ * Add a vector to the remote server
1256
+ * @param id ID of the vector to add
1257
+ * @param vector Vector to add
1258
+ * @param metadata Optional metadata to associate with the vector
1259
+ * @returns True if successful, false otherwise
1260
+ * @private
1261
+ */
1262
+ async addToRemote(id, vector, metadata) {
1263
+ if (!this.isConnectedToRemoteServer()) {
1264
+ return false;
1265
+ }
1266
+ try {
1267
+ // TODO: Remote server operations (post-2.0.0 feature)
1268
+ // if (!this.serverSearchConduit || !this.serverConnection) {
1269
+ // throw new Error(
1270
+ // 'Server search conduit or connection is not initialized'
1271
+ // )
1272
+ // }
1273
+ // TODO: Add to remote server
1274
+ // const addResult = await this.serverSearchConduit.addToBoth(
1275
+ // this.serverConnection.connectionId,
1276
+ // vector,
1277
+ // metadata
1278
+ // )
1279
+ throw new Error('Remote server functionality not yet implemented in Brainy 2.0.0');
1280
+ // TODO: Handle remote add result (post-2.0.0 feature)
1281
+ // if (!addResult.success) {
1282
+ // throw new Error(`Remote add failed: ${addResult.error}`)
1283
+ // }
1284
+ return true;
1285
+ }
1286
+ catch (error) {
1287
+ console.error('Failed to add to remote server:', error);
1288
+ throw new Error(`Failed to add to remote server: ${error}`);
1289
+ }
1290
+ }
1291
+ /**
1292
+ * Add multiple vectors or data items to the database
1293
+ * @param items Array of items to add
1294
+ * @param options Additional options
1295
+ * @returns Array of IDs for the added items
1296
+ */
1297
+ /**
1298
+ * Add multiple nouns in batch with required types
1299
+ * @param items Array of nouns to add (all must have types)
1300
+ * @param options Batch processing options
1301
+ * @returns Array of generated IDs
1270
1302
  */
1271
- async add(vectorOrData, metadata, options = {}) {
1303
+ async addNouns(items, options = {}) {
1272
1304
  await this.ensureInitialized();
1273
1305
  // Check if database is in read-only mode
1274
1306
  this.checkReadOnly();
1275
- // Validate input is not null or undefined
1276
- if (vectorOrData === null || vectorOrData === undefined) {
1277
- throw new Error('Input cannot be null or undefined');
1307
+ // Validate all types upfront for better error handling
1308
+ const invalidItems = [];
1309
+ items.forEach((item, index) => {
1310
+ if (!item.nounType || typeof item.nounType !== 'string') {
1311
+ invalidItems.push(index);
1312
+ }
1313
+ else {
1314
+ // Validate the type is valid
1315
+ try {
1316
+ validateNounType(item.nounType);
1317
+ }
1318
+ catch (error) {
1319
+ invalidItems.push(index);
1320
+ }
1321
+ }
1322
+ });
1323
+ if (invalidItems.length > 0) {
1324
+ throw new Error(`Type validation failed for ${invalidItems.length} items at indices: ${invalidItems.slice(0, 5).join(', ')}${invalidItems.length > 5 ? '...' : ''}\n` +
1325
+ 'All items must have valid noun types.\n' +
1326
+ 'Example: { vectorOrData: "data", nounType: NounType.Content, metadata: {...} }');
1278
1327
  }
1328
+ // Default concurrency to 4 if not specified
1329
+ const concurrency = options.concurrency || 4;
1330
+ // Default batch size to 50 if not specified
1331
+ const batchSize = options.batchSize || 50;
1279
1332
  try {
1280
- let vector;
1281
- // First validate if input is an array but contains non-numeric values
1282
- if (Array.isArray(vectorOrData)) {
1283
- for (let i = 0; i < vectorOrData.length; i++) {
1284
- if (typeof vectorOrData[i] !== 'number') {
1285
- throw new Error('Vector contains non-numeric values');
1333
+ // Process items in batches to control concurrency and memory usage
1334
+ const ids = [];
1335
+ const itemsToProcess = [...items]; // Create a copy to avoid modifying the original array
1336
+ while (itemsToProcess.length > 0) {
1337
+ // Take up to 'batchSize' items to process in a batch
1338
+ const batch = itemsToProcess.splice(0, batchSize);
1339
+ // Separate items that are already vectors from those that need embedding
1340
+ const vectorItems = [];
1341
+ const textItems = [];
1342
+ // Categorize items
1343
+ batch.forEach((item, index) => {
1344
+ if (Array.isArray(item.vectorOrData) &&
1345
+ item.vectorOrData.every((val) => typeof val === 'number') &&
1346
+ !options.forceEmbed) {
1347
+ // Item is already a vector
1348
+ vectorItems.push({
1349
+ vectorOrData: item.vectorOrData,
1350
+ nounType: item.nounType,
1351
+ metadata: item.metadata,
1352
+ index
1353
+ });
1354
+ }
1355
+ else if (typeof item.vectorOrData === 'string') {
1356
+ // Item is text that needs embedding
1357
+ textItems.push({
1358
+ text: item.vectorOrData,
1359
+ nounType: item.nounType,
1360
+ metadata: item.metadata,
1361
+ index
1362
+ });
1363
+ }
1364
+ else {
1365
+ // For now, treat other types as text
1366
+ // In a more complete implementation, we might handle other types differently
1367
+ const textRepresentation = String(item.vectorOrData);
1368
+ textItems.push({
1369
+ text: textRepresentation,
1370
+ nounType: item.nounType,
1371
+ metadata: item.metadata,
1372
+ index
1373
+ });
1286
1374
  }
1375
+ });
1376
+ // Process vector items (already embedded)
1377
+ const vectorPromises = vectorItems.map((item) => this.addNoun(item.vectorOrData, item.nounType, item.metadata));
1378
+ // Process text items in a single batch embedding operation
1379
+ let textPromises = [];
1380
+ if (textItems.length > 0) {
1381
+ // Extract just the text for batch embedding
1382
+ const texts = textItems.map((item) => item.text);
1383
+ // Perform batch embedding
1384
+ const embeddings = await batchEmbed(texts);
1385
+ // Add each item with its embedding
1386
+ textPromises = textItems.map((item, i) => this.addNoun(embeddings[i], item.nounType, item.metadata));
1287
1387
  }
1388
+ // Combine all promises
1389
+ const batchResults = await Promise.all([
1390
+ ...vectorPromises,
1391
+ ...textPromises
1392
+ ]);
1393
+ // Add the results to our ids array
1394
+ ids.push(...batchResults);
1288
1395
  }
1396
+ return ids;
1397
+ }
1398
+ catch (error) {
1399
+ console.error('Failed to add batch of items:', error);
1400
+ throw new Error(`Failed to add batch of items: ${error}`);
1401
+ }
1402
+ }
1403
+ /**
1404
+ * Add multiple vectors or data items to both local and remote databases
1405
+ * @param items Array of items to add (with required types)
1406
+ * @param options Additional options
1407
+ * @returns Array of IDs for the added items
1408
+ */
1409
+ async addBatchToBoth(items, options = {}) {
1410
+ // Check if connected to a remote server
1411
+ if (!this.isConnectedToRemoteServer()) {
1412
+ throw new Error('Not connected to a remote server. Call connectToRemoteServer() first.');
1413
+ }
1414
+ // Add to local with addToRemote option
1415
+ return this.addNouns(items, { ...options, addToRemote: true });
1416
+ }
1417
+ /**
1418
+ * Filter search results by service
1419
+ * @param results Search results to filter
1420
+ * @param service Service to filter by
1421
+ * @returns Filtered search results
1422
+ * @private
1423
+ */
1424
+ filterResultsByService(results, service) {
1425
+ if (!service)
1426
+ return results;
1427
+ return results.filter((result) => {
1428
+ if (!result.metadata || typeof result.metadata !== 'object')
1429
+ return false;
1430
+ if (!('createdBy' in result.metadata))
1431
+ return false;
1432
+ const createdBy = result.metadata.createdBy;
1433
+ if (!createdBy)
1434
+ return false;
1435
+ return createdBy.augmentation === service;
1436
+ });
1437
+ }
1438
+ /**
1439
+ * Search for similar vectors within specific noun types
1440
+ * @param queryVectorOrData Query vector or data to search for
1441
+ * @param k Number of results to return
1442
+ * @param nounTypes Array of noun types to search within, or null to search all
1443
+ * @param options Additional options
1444
+ * @returns Array of search results
1445
+ */
1446
+ /**
1447
+ * @deprecated Use search() with nounTypes option instead
1448
+ * @example
1449
+ * // Old way (deprecated)
1450
+ * await brain.searchByNounTypes(query, 10, ['type1', 'type2'])
1451
+ * // New way
1452
+ * await brain.search(query, { limit: 10, nounTypes: ['type1', 'type2'] })
1453
+ */
1454
+ async searchByNounTypes(queryVectorOrData, k = 10, nounTypes = null, options = {}) {
1455
+ // Helper function to filter results by service
1456
+ const filterByService = (metadata) => {
1457
+ if (!options.service)
1458
+ return true; // No filter, include all
1459
+ // Check if metadata has createdBy field with matching service
1460
+ if (!metadata || typeof metadata !== 'object')
1461
+ return false;
1462
+ if (!('createdBy' in metadata))
1463
+ return false;
1464
+ const createdBy = metadata.createdBy;
1465
+ if (!createdBy)
1466
+ return false;
1467
+ return createdBy.augmentation === options.service;
1468
+ };
1469
+ if (!this.isInitialized) {
1470
+ throw new Error('BrainyData must be initialized before searching. Call init() first.');
1471
+ }
1472
+ // Check if database is in write-only mode
1473
+ this.checkWriteOnly();
1474
+ try {
1475
+ let queryVector;
1289
1476
  // Check if input is already a vector
1290
- if (Array.isArray(vectorOrData) && !options.forceEmbed) {
1291
- // Input is already a vector (and we've validated it contains only numbers)
1292
- vector = vectorOrData;
1477
+ if (Array.isArray(queryVectorOrData) &&
1478
+ queryVectorOrData.every((item) => typeof item === 'number') &&
1479
+ !options.forceEmbed) {
1480
+ // Input is already a vector
1481
+ queryVector = queryVectorOrData;
1293
1482
  }
1294
1483
  else {
1295
1484
  // Input needs to be vectorized
1296
1485
  try {
1297
- // Check if input is a JSON object and process it specially
1298
- if (typeof vectorOrData === 'object' &&
1299
- vectorOrData !== null &&
1300
- !Array.isArray(vectorOrData)) {
1301
- // Process JSON object for better vectorization
1302
- const preparedText = prepareJsonForVectorization(vectorOrData, {
1303
- // Prioritize common name/title fields if they exist
1304
- priorityFields: [
1305
- 'name',
1306
- 'title',
1307
- 'company',
1308
- 'organization',
1309
- 'description',
1310
- 'summary'
1311
- ]
1312
- });
1313
- vector = await this.embeddingFunction(preparedText);
1314
- // IMPORTANT: When an object is passed as data and no metadata is provided,
1315
- // use the object AS the metadata too. This is expected behavior for the API.
1316
- // Users can pass either:
1317
- // 1. addNoun(string, metadata) - vectorize string, store metadata
1318
- // 2. addNoun(object) - vectorize object text, store object as metadata
1319
- // 3. addNoun(object, metadata) - vectorize object text, store provided metadata
1320
- if (!metadata) {
1321
- metadata = vectorOrData;
1322
- }
1323
- // Track field names for this JSON document
1324
- const service = this.getServiceName(options);
1325
- if (this.storage) {
1326
- await this.storage.trackFieldNames(vectorOrData, service);
1327
- }
1328
- }
1329
- else {
1330
- // Use standard embedding for non-JSON data
1331
- vector = await this.embeddingFunction(vectorOrData);
1332
- }
1486
+ queryVector = await this.embeddingFunction(queryVectorOrData);
1333
1487
  }
1334
1488
  catch (embedError) {
1335
- throw new Error(`Failed to vectorize data: ${embedError}`);
1489
+ throw new Error(`Failed to vectorize query data: ${embedError}`);
1336
1490
  }
1337
1491
  }
1338
- // Check if vector is defined
1339
- if (!vector) {
1340
- throw new Error('Vector is undefined or null');
1492
+ // Check if query vector is defined
1493
+ if (!queryVector) {
1494
+ throw new Error('Query vector is undefined or null');
1341
1495
  }
1342
- // Validate vector dimensions
1343
- if (vector.length !== this._dimensions) {
1344
- throw new Error(`Vector dimension mismatch: expected ${this._dimensions}, got ${vector.length}`);
1496
+ // Check if query vector dimensions match the expected dimensions
1497
+ if (queryVector.length !== this._dimensions) {
1498
+ throw new Error(`Query vector dimension mismatch: expected ${this._dimensions}, got ${queryVector.length}`);
1345
1499
  }
1346
- // Use ID from options if it exists, otherwise from metadata, otherwise generate a new UUID
1347
- const id = options.id ||
1348
- (metadata && typeof metadata === 'object' && 'id' in metadata
1349
- ? metadata.id
1350
- : uuidv4());
1351
- // Check for existing noun (both write-only and normal modes)
1352
- let existingNoun;
1353
- if (options.id) {
1354
- try {
1355
- if (this.writeOnly) {
1356
- // In write-only mode, check storage directly
1357
- existingNoun =
1358
- (await this.storage.getNoun(options.id)) ?? undefined;
1500
+ // If no noun types specified, search all nouns
1501
+ if (!nounTypes || nounTypes.length === 0) {
1502
+ // Check if we're in readonly mode with lazy loading and the index is empty
1503
+ const indexSize = this.index.getNouns().size;
1504
+ if (this.readOnly && this.lazyLoadInReadOnlyMode && indexSize === 0) {
1505
+ if (this.loggingConfig?.verbose) {
1506
+ console.log('Lazy loading mode: Index is empty, loading nodes for search...');
1359
1507
  }
1360
- else {
1361
- // In normal mode, check index first, then storage
1362
- existingNoun = this.index.getNouns().get(options.id);
1363
- if (!existingNoun) {
1364
- existingNoun =
1365
- (await this.storage.getNoun(options.id)) ?? undefined;
1508
+ // In lazy loading mode, we need to load some nodes to search
1509
+ // Instead of loading all nodes, we'll load a subset of nodes
1510
+ // Load a limited number of nodes from storage using pagination
1511
+ const result = await this.storage.getNouns({
1512
+ pagination: { offset: 0, limit: k * 10 } // Get 10x more nodes than needed
1513
+ });
1514
+ const limitedNouns = result.items;
1515
+ // Add these nodes to the index
1516
+ for (const node of limitedNouns) {
1517
+ // Check if the vector dimensions match the expected dimensions
1518
+ if (node.vector.length !== this._dimensions) {
1519
+ console.warn(`Skipping node ${node.id} due to dimension mismatch: expected ${this._dimensions}, got ${node.vector.length}`);
1520
+ continue;
1366
1521
  }
1522
+ // Add to index
1523
+ await this.index.addItem({
1524
+ id: node.id,
1525
+ vector: node.vector
1526
+ });
1367
1527
  }
1368
- if (existingNoun) {
1369
- // Check if existing noun is a placeholder
1370
- const existingMetadata = await this.storage.getMetadata(options.id);
1371
- const isPlaceholder = existingMetadata &&
1372
- typeof existingMetadata === 'object' &&
1373
- existingMetadata.isPlaceholder;
1374
- if (isPlaceholder) {
1375
- // Replace placeholder with real data
1376
- if (this.loggingConfig?.verbose) {
1377
- console.log(`Replacing placeholder noun ${options.id} with real data`);
1378
- }
1528
+ if (this.loggingConfig?.verbose) {
1529
+ console.log(`Lazy loading mode: Added ${limitedNouns.length} nodes to index for search`);
1530
+ }
1531
+ }
1532
+ // Create filter function for HNSW search with metadata index optimization
1533
+ const hasMetadataFilter = options.metadata && Object.keys(options.metadata).length > 0;
1534
+ const hasServiceFilter = !!options.service;
1535
+ let filterFunction;
1536
+ let preFilteredIds;
1537
+ // Use metadata index for pre-filtering if available
1538
+ if (hasMetadataFilter && this.metadataIndex) {
1539
+ try {
1540
+ // Ensure metadata index is up to date
1541
+ await this.metadataIndex?.flush?.();
1542
+ // Get candidate IDs from metadata index
1543
+ const candidateIds = await this.metadataIndex?.getIdsForFilter?.(options.metadata) || [];
1544
+ if (candidateIds.length > 0) {
1545
+ preFilteredIds = new Set(candidateIds);
1546
+ // Create a simple filter function that just checks the pre-filtered set
1547
+ filterFunction = async (id) => {
1548
+ if (!preFilteredIds.has(id))
1549
+ return false;
1550
+ // Still apply service filter if needed
1551
+ if (hasServiceFilter) {
1552
+ const metadata = await this.storage.getMetadata(id);
1553
+ const noun = this.index.getNouns().get(id);
1554
+ if (!noun || !metadata)
1555
+ return false;
1556
+ const result = { id, score: 0, vector: noun.vector, metadata };
1557
+ return this.filterResultsByService([result], options.service).length > 0;
1558
+ }
1559
+ return true;
1560
+ };
1379
1561
  }
1380
1562
  else {
1381
- // Real noun already exists, update it
1382
- if (this.loggingConfig?.verbose) {
1383
- console.log(`Updating existing noun ${options.id}`);
1384
- }
1563
+ // No items match the metadata criteria, return empty results immediately
1564
+ return [];
1385
1565
  }
1386
1566
  }
1567
+ catch (indexError) {
1568
+ console.warn('Metadata index error, falling back to full filtering:', indexError);
1569
+ // Fall back to full metadata filtering below
1570
+ }
1387
1571
  }
1388
- catch (storageError) {
1389
- // Item doesn't exist, continue with add operation
1390
- }
1391
- }
1392
- let noun;
1393
- // In write-only mode, skip index operations since index is not loaded
1394
- if (this.writeOnly) {
1395
- // Create noun object directly without adding to index
1396
- noun = {
1397
- id,
1398
- vector,
1399
- connections: new Map(),
1400
- level: 0, // Default level for new nodes
1401
- metadata: undefined // Will be set separately
1402
- };
1403
- }
1404
- else {
1405
- // Normal mode: Add to HNSW index first
1406
- await this.hnswIndex.addItem({ id, vector, metadata });
1407
- // Get the noun from the HNSW index
1408
- const indexNoun = this.hnswIndex.getNouns().get(id);
1409
- if (!indexNoun) {
1410
- throw new Error(`Failed to retrieve newly created noun with ID ${id}`);
1411
- }
1412
- noun = indexNoun;
1413
- }
1414
- // Save noun to storage using augmentation system
1415
- await this.augmentations.execute('saveNoun', { noun, options }, async () => {
1416
- await this.storage.saveNoun(noun);
1417
- const service = this.getServiceName(options);
1418
- await this.storage.incrementStatistic('noun', service);
1419
- });
1420
- // Save metadata if provided and not empty
1421
- if (metadata !== undefined) {
1422
- // Skip saving if metadata is an empty object
1423
- if (metadata &&
1424
- typeof metadata === 'object' &&
1425
- Object.keys(metadata).length === 0) {
1426
- // Don't save empty metadata
1427
- // Explicitly save null to ensure no metadata is stored
1428
- await this.storage.saveMetadata(id, null);
1429
- }
1430
- else {
1431
- // Validate noun type if metadata is for a GraphNoun
1432
- if (metadata && typeof metadata === 'object' && 'noun' in metadata) {
1433
- const nounType = metadata.noun;
1434
- // Check if the noun type is valid
1435
- const isValidNounType = Object.values(NounType).includes(nounType);
1436
- if (!isValidNounType) {
1437
- console.warn(`Invalid noun type: ${nounType}. Falling back to GraphNoun.`);
1438
- metadata.noun = NounType.Concept;
1439
- }
1440
- // Ensure createdBy field is populated for GraphNoun
1441
- const service = options.service || this.getCurrentAugmentation();
1442
- const graphNoun = metadata;
1443
- // Only set createdBy if it doesn't exist or is being explicitly updated
1444
- if (!graphNoun.createdBy || options.service) {
1445
- graphNoun.createdBy = getAugmentationVersion(service);
1446
- }
1447
- // Update timestamps
1448
- const now = new Date();
1449
- const timestamp = {
1450
- seconds: Math.floor(now.getTime() / 1000),
1451
- nanoseconds: (now.getTime() % 1000) * 1000000
1452
- };
1453
- // Set createdAt if it doesn't exist
1454
- if (!graphNoun.createdAt) {
1455
- graphNoun.createdAt = timestamp;
1572
+ // Fallback to full metadata filtering if index wasn't used
1573
+ if (!filterFunction && (hasMetadataFilter || hasServiceFilter)) {
1574
+ filterFunction = async (id) => {
1575
+ // Get metadata for filtering
1576
+ let metadata = await this.storage.getMetadata(id);
1577
+ if (metadata === null) {
1578
+ metadata = {};
1456
1579
  }
1457
- // Always update updatedAt
1458
- graphNoun.updatedAt = timestamp;
1459
- }
1460
- // Create properly namespaced metadata for new items
1461
- let metadataToSave = createNamespacedMetadata(metadata);
1462
- // Add domain metadata if distributed mode is enabled
1463
- if (this.domainDetector) {
1464
- // First check if domain is already in metadata
1465
- if (metadataToSave.domain) {
1466
- // Domain already specified, keep it
1467
- const domainInfo = this.domainDetector.detectDomain(metadataToSave);
1468
- if (domainInfo.domainMetadata) {
1469
- ;
1470
- metadataToSave.domainMetadata =
1471
- domainInfo.domainMetadata;
1580
+ // Apply metadata filter
1581
+ if (hasMetadataFilter) {
1582
+ const matches = matchesMetadataFilter(metadata, options.metadata);
1583
+ if (!matches) {
1584
+ return false;
1472
1585
  }
1473
1586
  }
1474
- else {
1475
- // Try to detect domain from the data
1476
- const dataToAnalyze = Array.isArray(vectorOrData)
1477
- ? metadata
1478
- : vectorOrData;
1479
- const domainInfo = this.domainDetector.detectDomain(dataToAnalyze);
1480
- if (domainInfo.domain) {
1481
- ;
1482
- metadataToSave.domain = domainInfo.domain;
1483
- if (domainInfo.domainMetadata) {
1484
- ;
1485
- metadataToSave.domainMetadata =
1486
- domainInfo.domainMetadata;
1487
- }
1587
+ // Apply service filter
1588
+ if (hasServiceFilter) {
1589
+ const noun = this.index.getNouns().get(id);
1590
+ if (!noun)
1591
+ return false;
1592
+ const result = { id, score: 0, vector: noun.vector, metadata };
1593
+ if (!this.filterResultsByService([result], options.service).length) {
1594
+ return false;
1488
1595
  }
1489
1596
  }
1597
+ return true;
1598
+ };
1599
+ }
1600
+ // When using offset, we need to fetch more results and then slice
1601
+ const offset = options.offset || 0;
1602
+ const totalNeeded = k + offset;
1603
+ // Search in the index with filter
1604
+ const results = await this.index.search(queryVector, totalNeeded, filterFunction);
1605
+ // Skip the offset number of results
1606
+ const paginatedResults = results.slice(offset, offset + k);
1607
+ // Get metadata for each result
1608
+ const searchResults = [];
1609
+ for (const [id, score] of paginatedResults) {
1610
+ const noun = this.index.getNouns().get(id);
1611
+ if (!noun) {
1612
+ continue;
1490
1613
  }
1491
- // Add partition information if distributed mode is enabled
1492
- if (this.partitioner) {
1493
- const partition = this.partitioner.getPartition(id);
1494
- metadataToSave.partition = partition;
1495
- }
1496
- await this.storage.saveMetadata(id, metadataToSave);
1497
- // Update metadata index (write-only mode should build indices!)
1498
- if (this.index && !this.frozen) {
1499
- await this.metadataIndex?.addToIndex?.(id, metadataToSave);
1500
- }
1501
- // Track metadata statistics
1502
- const metadataService = this.getServiceName(options);
1503
- await this.storage.incrementStatistic('metadata', metadataService);
1504
- // Track content type if it's a GraphNoun
1505
- if (metadataToSave &&
1506
- typeof metadataToSave === 'object' &&
1507
- 'noun' in metadataToSave) {
1508
- this.metrics.trackContentType(metadataToSave.noun);
1614
+ let metadata = await this.storage.getMetadata(id);
1615
+ // Initialize metadata to an empty object if it's null
1616
+ if (metadata === null) {
1617
+ metadata = {};
1509
1618
  }
1510
- // Track update timestamp (handled by metrics augmentation)
1619
+ // Preserve original metadata without overwriting user's custom fields
1620
+ // The search result already has Brainy's UUID in the main 'id' field
1621
+ searchResults.push({
1622
+ id,
1623
+ score: 1 - score, // Convert distance to similarity (higher = more similar)
1624
+ vector: noun.vector,
1625
+ metadata: metadata
1626
+ });
1511
1627
  }
1628
+ return searchResults;
1512
1629
  }
1513
- // Update HNSW index size with actual index size
1514
- const indexSize = this.index.size();
1515
- await this.storage.updateHnswIndexSize(indexSize);
1516
- // Update health metrics if in distributed mode
1517
- if (this.monitoring) {
1518
- const vectorCount = await this.getNounCount();
1519
- this.monitoring.updateVectorCount(vectorCount);
1520
- }
1521
- // If addToRemote is true and we're connected to a remote server, add to remote as well
1522
- if (options.addToRemote && this.isConnectedToRemoteServer()) {
1523
- try {
1524
- await this.addToRemote(id, vector, metadata);
1630
+ else {
1631
+ // Get nouns for each noun type in parallel
1632
+ const nounPromises = nounTypes.map((nounType) => this.storage.getNounsByNounType(nounType));
1633
+ const nounArrays = await Promise.all(nounPromises);
1634
+ // Combine all nouns
1635
+ const nouns = [];
1636
+ for (const nounArray of nounArrays) {
1637
+ nouns.push(...nounArray);
1525
1638
  }
1526
- catch (remoteError) {
1527
- console.warn(`Failed to add to remote server: ${remoteError}. Continuing with local add.`);
1639
+ // Calculate distances for each noun
1640
+ const results = [];
1641
+ for (const noun of nouns) {
1642
+ const distance = this.index.getDistanceFunction()(queryVector, noun.vector);
1643
+ results.push([noun.id, distance]);
1528
1644
  }
1529
- }
1530
- // Invalidate search cache since data has changed
1531
- this.cache?.invalidateOnDataChange('add');
1532
- // Determine processing mode
1533
- const processingMode = options.process || 'auto';
1534
- let shouldProcessNeurally = false;
1535
- if (processingMode === 'neural') {
1536
- shouldProcessNeurally = true;
1537
- }
1538
- else if (processingMode === 'auto') {
1539
- // Auto-detect whether to use neural processing
1540
- shouldProcessNeurally = this.shouldAutoProcessNeurally(vectorOrData, metadata);
1541
- }
1542
- // 'literal' mode means no neural processing
1543
- // 🧠 AI Processing (Neural Import) - Based on processing mode
1544
- if (shouldProcessNeurally) {
1545
- try {
1546
- // Execute augmentation pipeline for data processing
1547
- // Note: Augmentations will be called via this.augmentations.execute during the actual add operation
1548
- // This replaces the legacy SENSE pipeline
1549
- if (this.loggingConfig?.verbose) {
1550
- console.log(`🧠 AI processing completed for data: ${id}`);
1645
+ // Sort by distance (ascending)
1646
+ results.sort((a, b) => a[1] - b[1]);
1647
+ // Apply offset and take k results
1648
+ const offset = options.offset || 0;
1649
+ const topResults = results.slice(offset, offset + k);
1650
+ // Get metadata for each result
1651
+ const searchResults = [];
1652
+ for (const [id, score] of topResults) {
1653
+ const noun = nouns.find((n) => n.id === id);
1654
+ if (!noun) {
1655
+ continue;
1551
1656
  }
1657
+ let metadata = await this.storage.getMetadata(id);
1658
+ // Initialize metadata to an empty object if it's null
1659
+ if (metadata === null) {
1660
+ metadata = {};
1661
+ }
1662
+ // Preserve original metadata without overwriting user's custom fields
1663
+ // The search result already has Brainy's UUID in the main 'id' field
1664
+ searchResults.push({
1665
+ id,
1666
+ score: 1 - score, // Convert distance to similarity (higher = more similar)
1667
+ vector: noun.vector,
1668
+ metadata: metadata
1669
+ });
1552
1670
  }
1553
- catch (processingError) {
1554
- // Don't fail the add operation if processing fails
1555
- console.warn(`🧠 AI processing failed for ${id}:`, processingError);
1556
- }
1671
+ // Results are already filtered, just return them
1672
+ return searchResults;
1557
1673
  }
1558
- return id;
1559
1674
  }
1560
1675
  catch (error) {
1561
- console.error('Failed to add vector:', error);
1562
- // Track error in health monitor
1563
- if (this.monitoring) {
1564
- this.monitoring.recordRequest(0, true);
1565
- }
1566
- throw new Error(`Failed to add vector: ${error}`);
1676
+ console.error('Failed to search vectors by noun types:', error);
1677
+ throw new Error(`Failed to search vectors by noun types: ${error}`);
1567
1678
  }
1568
1679
  }
1569
- // REMOVED: addItem() - Use addNoun() instead (cleaner 2.0 API)
1570
- // REMOVED: addToBoth() - Remote server functionality moved to post-2.0.0
1571
1680
  /**
1572
- * Add a vector to the remote server
1573
- * @param id ID of the vector to add
1574
- * @param vector Vector to add
1575
- * @param metadata Optional metadata to associate with the vector
1576
- * @returns True if successful, false otherwise
1577
- * @private
1681
+ * Search for similar vectors
1682
+ * @param queryVectorOrData Query vector or data to search for
1683
+ * @param k Number of results to return
1684
+ * @param options Additional options
1685
+ * @returns Array of search results
1578
1686
  */
1579
- async addToRemote(id, vector, metadata) {
1580
- if (!this.isConnectedToRemoteServer()) {
1581
- return false;
1687
+ /**
1688
+ * 🔍 SIMPLE VECTOR SEARCH - Clean wrapper around find() for pure vector search
1689
+ *
1690
+ * @param queryVectorOrData Vector or text to search for
1691
+ * @param k Number of results to return
1692
+ * @param options Simple search options (metadata filters only)
1693
+ * @returns Vector search results
1694
+ */
1695
+ /**
1696
+ * 🔍 Simple Vector Similarity Search - Clean wrapper around find()
1697
+ *
1698
+ * search(query) = find({like: query}) - Pure vector similarity search
1699
+ *
1700
+ * @param queryVectorOrData - Query string, vector, or object to search with
1701
+ * @param options - Search options for filtering and pagination
1702
+ * @returns Array of search results with scores and metadata
1703
+ *
1704
+ * @example
1705
+ * // Simple vector search
1706
+ * await brain.search('machine learning')
1707
+ *
1708
+ * // With filters and pagination
1709
+ * await brain.search('AI', {
1710
+ * limit: 20,
1711
+ * metadata: { type: 'article' },
1712
+ * nounTypes: ['document']
1713
+ * })
1714
+ */
1715
+ async search(queryVectorOrData, options = {}) {
1716
+ // Build metadata filter from options
1717
+ const metadataFilter = { ...options.metadata };
1718
+ // Add noun type filtering
1719
+ if (options.nounTypes && options.nounTypes.length > 0) {
1720
+ metadataFilter.nounType = { in: options.nounTypes };
1582
1721
  }
1583
- try {
1584
- // TODO: Remote server operations (post-2.0.0 feature)
1585
- // if (!this.serverSearchConduit || !this.serverConnection) {
1586
- // throw new Error(
1587
- // 'Server search conduit or connection is not initialized'
1588
- // )
1589
- // }
1590
- // TODO: Add to remote server
1591
- // const addResult = await this.serverSearchConduit.addToBoth(
1592
- // this.serverConnection.connectionId,
1593
- // vector,
1594
- // metadata
1595
- // )
1596
- throw new Error('Remote server functionality not yet implemented in Brainy 2.0.0');
1597
- // TODO: Handle remote add result (post-2.0.0 feature)
1598
- // if (!addResult.success) {
1599
- // throw new Error(`Remote add failed: ${addResult.error}`)
1600
- // }
1601
- return true;
1722
+ // Add item ID filtering
1723
+ if (options.itemIds && options.itemIds.length > 0) {
1724
+ metadataFilter.id = { in: options.itemIds };
1602
1725
  }
1603
- catch (error) {
1604
- console.error('Failed to add to remote server:', error);
1605
- throw new Error(`Failed to add to remote server: ${error}`);
1726
+ // Build simple TripleQuery for vector similarity
1727
+ const tripleQuery = {
1728
+ like: queryVectorOrData
1729
+ };
1730
+ // Add metadata filter if we have conditions
1731
+ if (Object.keys(metadataFilter).length > 0) {
1732
+ tripleQuery.where = metadataFilter;
1733
+ }
1734
+ // Extract find() options
1735
+ const findOptions = {
1736
+ limit: options.limit,
1737
+ offset: options.offset,
1738
+ cursor: options.cursor,
1739
+ excludeDeleted: options.excludeDeleted,
1740
+ timeout: options.timeout
1741
+ };
1742
+ // Call find() with structured query - this is the key simplification!
1743
+ let results = await this.find(tripleQuery, findOptions);
1744
+ // Apply threshold filtering if specified
1745
+ if (options.threshold !== undefined) {
1746
+ results = results.filter(r => (r.fusionScore || r.score || 0) >= options.threshold);
1606
1747
  }
1748
+ // Convert to SearchResult format
1749
+ return results.map(r => ({
1750
+ ...r,
1751
+ score: r.fusionScore || r.score || 0
1752
+ }));
1753
+ return results;
1607
1754
  }
1608
1755
  /**
1609
- * Add multiple vectors or data items to the database
1610
- * @param items Array of items to add
1611
- * @param options Additional options
1612
- * @returns Array of IDs for the added items
1756
+ * Helper method to encode cursor for pagination
1757
+ * @internal
1613
1758
  */
1759
+ encodeCursor(data) {
1760
+ return Buffer.from(JSON.stringify(data)).toString('base64');
1761
+ }
1614
1762
  /**
1615
- * Add multiple nouns in batch
1616
- * @param items Array of nouns to add
1617
- * @param options Batch processing options
1618
- * @returns Array of generated IDs
1763
+ * Helper method to decode cursor for pagination
1764
+ * @internal
1619
1765
  */
1620
- async addNouns(items, options = {}) {
1621
- await this.ensureInitialized();
1622
- // Check if database is in read-only mode
1623
- this.checkReadOnly();
1624
- // Default concurrency to 4 if not specified
1625
- const concurrency = options.concurrency || 4;
1626
- // Default batch size to 50 if not specified
1627
- const batchSize = options.batchSize || 50;
1766
+ decodeCursor(cursor) {
1628
1767
  try {
1629
- // Process items in batches to control concurrency and memory usage
1630
- const ids = [];
1631
- const itemsToProcess = [...items]; // Create a copy to avoid modifying the original array
1632
- while (itemsToProcess.length > 0) {
1633
- // Take up to 'batchSize' items to process in a batch
1634
- const batch = itemsToProcess.splice(0, batchSize);
1635
- // Separate items that are already vectors from those that need embedding
1636
- const vectorItems = [];
1637
- const textItems = [];
1638
- // Categorize items
1639
- batch.forEach((item, index) => {
1640
- if (Array.isArray(item.vectorOrData) &&
1641
- item.vectorOrData.every((val) => typeof val === 'number') &&
1642
- !options.forceEmbed) {
1643
- // Item is already a vector
1644
- vectorItems.push({
1645
- vectorOrData: item.vectorOrData,
1646
- metadata: item.metadata,
1647
- index
1648
- });
1649
- }
1650
- else if (typeof item.vectorOrData === 'string') {
1651
- // Item is text that needs embedding
1652
- textItems.push({
1653
- text: item.vectorOrData,
1654
- metadata: item.metadata,
1655
- index
1656
- });
1657
- }
1658
- else {
1659
- // For now, treat other types as text
1660
- // In a more complete implementation, we might handle other types differently
1661
- const textRepresentation = String(item.vectorOrData);
1662
- textItems.push({
1663
- text: textRepresentation,
1664
- metadata: item.metadata,
1665
- index
1666
- });
1667
- }
1668
- });
1669
- // Process vector items (already embedded)
1670
- const vectorPromises = vectorItems.map((item) => this.addNoun(item.vectorOrData, item.metadata));
1671
- // Process text items in a single batch embedding operation
1672
- let textPromises = [];
1673
- if (textItems.length > 0) {
1674
- // Extract just the text for batch embedding
1675
- const texts = textItems.map((item) => item.text);
1676
- // Perform batch embedding
1677
- const embeddings = await batchEmbed(texts);
1678
- // Add each item with its embedding
1679
- textPromises = textItems.map((item, i) => this.addNoun(embeddings[i], item.metadata));
1680
- }
1681
- // Combine all promises
1682
- const batchResults = await Promise.all([
1683
- ...vectorPromises,
1684
- ...textPromises
1685
- ]);
1686
- // Add the results to our ids array
1687
- ids.push(...batchResults);
1688
- }
1689
- return ids;
1768
+ return JSON.parse(Buffer.from(cursor, 'base64').toString());
1690
1769
  }
1691
- catch (error) {
1692
- console.error('Failed to add batch of items:', error);
1693
- throw new Error(`Failed to add batch of items: ${error}`);
1770
+ catch {
1771
+ return { offset: 0, timestamp: 0 };
1694
1772
  }
1695
1773
  }
1696
1774
  /**
1697
- * Add multiple vectors or data items to both local and remote databases
1698
- * @param items Array of items to add
1699
- * @param options Additional options
1700
- * @returns Array of IDs for the added items
1775
+ * Internal method for direct HNSW vector search
1776
+ * Used by TripleIntelligence to avoid circular dependencies
1777
+ * Note: For pure metadata filtering, use metadataIndex.getIdsForFilter() directly - it's O(log n)!
1778
+ * This method is for vector similarity search with optional metadata filtering during search
1779
+ * @internal
1701
1780
  */
1702
- async addBatchToBoth(items, options = {}) {
1703
- // Check if connected to a remote server
1704
- if (!this.isConnectedToRemoteServer()) {
1705
- throw new Error('Not connected to a remote server. Call connectToRemoteServer() first.');
1781
+ async _internalVectorSearch(queryVectorOrData, k = 10, options = {}) {
1782
+ // Generate query vector
1783
+ const queryVector = Array.isArray(queryVectorOrData) &&
1784
+ typeof queryVectorOrData[0] === 'number' ?
1785
+ queryVectorOrData :
1786
+ await this.embed(queryVectorOrData);
1787
+ // Apply metadata filter if provided
1788
+ let filterFunction;
1789
+ if (options.metadata) {
1790
+ const matchingIdsArray = await this.metadataIndex?.getIdsForFilter(options.metadata) || [];
1791
+ const matchingIds = new Set(matchingIdsArray);
1792
+ filterFunction = async (id) => matchingIds.has(id);
1706
1793
  }
1707
- // Add to local with addToRemote option
1708
- return this.addNouns(items, { ...options, addToRemote: true });
1794
+ // Direct HNSW search
1795
+ const results = await this.index.search(queryVector, k, filterFunction);
1796
+ // Get metadata for results
1797
+ const searchResults = [];
1798
+ for (const [id, similarity] of results) {
1799
+ const metadata = await this.getNoun(id);
1800
+ searchResults.push({
1801
+ id,
1802
+ score: similarity,
1803
+ vector: [],
1804
+ metadata: metadata?.metadata || {}
1805
+ });
1806
+ }
1807
+ return searchResults;
1709
1808
  }
1710
1809
  /**
1711
- * Filter search results by service
1712
- * @param results Search results to filter
1713
- * @param service Service to filter by
1714
- * @returns Filtered search results
1715
- * @private
1810
+ * 🎯 LEGACY: Original search implementation (kept for complex cases)
1811
+ * This is the original search method, now used as fallback for edge cases
1716
1812
  */
1717
- filterResultsByService(results, service) {
1718
- if (!service)
1719
- return results;
1720
- return results.filter((result) => {
1721
- if (!result.metadata || typeof result.metadata !== 'object')
1722
- return false;
1723
- if (!('createdBy' in result.metadata))
1724
- return false;
1725
- const createdBy = result.metadata.createdBy;
1726
- if (!createdBy)
1727
- return false;
1728
- return createdBy.augmentation === service;
1813
+ async _legacySearch(queryVectorOrData, k = 10, options = {}) {
1814
+ const startTime = Date.now();
1815
+ // Validate input is not null or undefined
1816
+ if (queryVectorOrData === null || queryVectorOrData === undefined) {
1817
+ throw new Error('Query cannot be null or undefined');
1818
+ }
1819
+ // Validate k parameter first, before any other logic
1820
+ if (k <= 0 || typeof k !== 'number' || isNaN(k)) {
1821
+ throw new Error('Parameter k must be a positive number');
1822
+ }
1823
+ if (!this.isInitialized) {
1824
+ throw new Error('BrainyData must be initialized before searching. Call init() first.');
1825
+ }
1826
+ // Check if database is in write-only mode
1827
+ this.checkWriteOnly();
1828
+ // If searching for verbs directly
1829
+ if (options.searchVerbs) {
1830
+ const verbResults = await this.searchVerbs(queryVectorOrData, k, {
1831
+ forceEmbed: options.forceEmbed,
1832
+ verbTypes: options.verbTypes
1833
+ });
1834
+ // Convert verb results to SearchResult format
1835
+ return verbResults.map((verb) => ({
1836
+ id: verb.id,
1837
+ score: verb.similarity,
1838
+ vector: verb.embedding || [],
1839
+ metadata: {
1840
+ verb: verb.verb,
1841
+ source: verb.source,
1842
+ target: verb.target,
1843
+ ...verb.data
1844
+ }
1845
+ }));
1846
+ }
1847
+ // If searching for nouns connected by verbs
1848
+ if (options.searchConnectedNouns) {
1849
+ return this.searchNounsByVerbs(queryVectorOrData, k, {
1850
+ forceEmbed: options.forceEmbed,
1851
+ verbTypes: options.verbTypes,
1852
+ direction: options.verbDirection
1853
+ });
1854
+ }
1855
+ // If a specific search mode is specified, use the appropriate search method
1856
+ if (options.searchMode === 'local') {
1857
+ return this.searchLocal(queryVectorOrData, k, options);
1858
+ }
1859
+ else if (options.searchMode === 'remote') {
1860
+ return this.searchRemote(queryVectorOrData, k, options);
1861
+ }
1862
+ else if (options.searchMode === 'combined') {
1863
+ return this.searchCombined(queryVectorOrData, k, options);
1864
+ }
1865
+ // Generate deduplication key for concurrent request handling
1866
+ const dedupeKey = RequestDeduplicator.getSearchKey(typeof queryVectorOrData === 'string' ? queryVectorOrData : JSON.stringify(queryVectorOrData), k, options);
1867
+ // Use augmentation system for search (includes deduplication, batching, and caching)
1868
+ return this.augmentations.execute('search', { query: queryVectorOrData, k, options, dedupeKey }, async () => {
1869
+ // Default behavior (backward compatible): search locally
1870
+ try {
1871
+ // BEST OF BOTH: Automatically exclude soft-deleted items (Neural Intelligence improvement)
1872
+ // BUT only when there's already metadata filtering happening
1873
+ let metadataFilter = options.metadata;
1874
+ // Only add soft-delete filter if there's already metadata being filtered
1875
+ // This preserves pure vector searches without metadata
1876
+ if (metadataFilter && Object.keys(metadataFilter).length > 0) {
1877
+ // If no explicit deleted filter is provided, exclude soft-deleted items
1878
+ // Use namespaced field for O(1) performance
1879
+ if (!metadataFilter['_brainy.deleted'] && !metadataFilter.anyOf) {
1880
+ metadataFilter = {
1881
+ ...metadataFilter,
1882
+ ['_brainy.deleted']: false // O(1) positive match instead of notEquals
1883
+ };
1884
+ }
1885
+ }
1886
+ const hasMetadataFilter = metadataFilter && Object.keys(metadataFilter).length > 0;
1887
+ // Check cache first (transparent to user) - but skip cache if we have metadata filters
1888
+ if (!hasMetadataFilter) {
1889
+ const cacheKey = this.cache?.getCacheKey(queryVectorOrData, k, options);
1890
+ const cachedResults = this.cache?.get(cacheKey);
1891
+ if (cachedResults) {
1892
+ // Track cache hit in health monitor
1893
+ if (this.monitoring) {
1894
+ const latency = Date.now() - startTime;
1895
+ this.monitoring.recordRequest(latency, false);
1896
+ this.monitoring.recordCacheAccess(true);
1897
+ }
1898
+ return cachedResults;
1899
+ }
1900
+ }
1901
+ // Cache miss - perform actual search
1902
+ const results = await this.searchLocal(queryVectorOrData, k, {
1903
+ ...options,
1904
+ metadata: metadataFilter
1905
+ });
1906
+ // Cache results for future queries (unless explicitly disabled or has metadata filter)
1907
+ if (!options.skipCache && !hasMetadataFilter) {
1908
+ const cacheKey = this.cache?.getCacheKey(queryVectorOrData, k, options);
1909
+ this.cache?.set(cacheKey, results);
1910
+ }
1911
+ // Track successful search in health monitor
1912
+ if (this.monitoring) {
1913
+ const latency = Date.now() - startTime;
1914
+ this.monitoring.recordRequest(latency, false);
1915
+ this.monitoring.recordCacheAccess(false);
1916
+ }
1917
+ return results;
1918
+ }
1919
+ catch (error) {
1920
+ // Track error in health monitor
1921
+ if (this.monitoring) {
1922
+ const latency = Date.now() - startTime;
1923
+ this.monitoring.recordRequest(latency, true);
1924
+ }
1925
+ throw error;
1926
+ }
1729
1927
  });
1730
1928
  }
1731
1929
  /**
1732
- * Search for similar vectors within specific noun types
1930
+ * Search with cursor-based pagination for better performance on large datasets
1733
1931
  * @param queryVectorOrData Query vector or data to search for
1734
1932
  * @param k Number of results to return
1735
- * @param nounTypes Array of noun types to search within, or null to search all
1736
- * @param options Additional options
1737
- * @returns Array of search results
1933
+ * @param options Additional options including cursor for pagination
1934
+ * @returns Paginated search results with cursor for next page
1738
1935
  */
1739
1936
  /**
1740
- * @deprecated Use search() with nounTypes option instead
1937
+ * @deprecated Use search() with cursor option instead
1741
1938
  * @example
1742
1939
  * // Old way (deprecated)
1743
- * await brain.searchByNounTypes(query, 10, ['type1', 'type2'])
1940
+ * await brain.searchWithCursor(query, 10, { cursor: 'abc123' })
1744
1941
  * // New way
1745
- * await brain.search(query, { limit: 10, nounTypes: ['type1', 'type2'] })
1942
+ * await brain.search(query, { limit: 10, cursor: 'abc123' })
1746
1943
  */
1747
- async searchByNounTypes(queryVectorOrData, k = 10, nounTypes = null, options = {}) {
1748
- // Helper function to filter results by service
1749
- const filterByService = (metadata) => {
1750
- if (!options.service)
1751
- return true; // No filter, include all
1752
- // Check if metadata has createdBy field with matching service
1753
- if (!metadata || typeof metadata !== 'object')
1754
- return false;
1755
- if (!('createdBy' in metadata))
1756
- return false;
1757
- const createdBy = metadata.createdBy;
1758
- if (!createdBy)
1759
- return false;
1760
- return createdBy.augmentation === options.service;
1761
- };
1762
- if (!this.isInitialized) {
1763
- throw new Error('BrainyData must be initialized before searching. Call init() first.');
1764
- }
1765
- // Check if database is in write-only mode
1766
- this.checkWriteOnly();
1767
- try {
1768
- let queryVector;
1769
- // Check if input is already a vector
1770
- if (Array.isArray(queryVectorOrData) &&
1771
- queryVectorOrData.every((item) => typeof item === 'number') &&
1772
- !options.forceEmbed) {
1773
- // Input is already a vector
1774
- queryVector = queryVectorOrData;
1944
+ async searchWithCursor(queryVectorOrData, k = 10, options = {}) {
1945
+ // For cursor-based search, we need to fetch more results and filter
1946
+ const searchK = options.cursor ? k + 20 : k; // Get extra results for filtering
1947
+ // Perform regular search
1948
+ const { cursor, ...searchOptions } = options;
1949
+ const allResults = await this.search(queryVectorOrData, {
1950
+ limit: searchK,
1951
+ nounTypes: searchOptions.nounTypes,
1952
+ metadata: searchOptions.filter
1953
+ });
1954
+ let results = allResults;
1955
+ let startIndex = 0;
1956
+ // If cursor provided, find starting position
1957
+ if (options.cursor) {
1958
+ startIndex = allResults.findIndex((r) => r.id === options.cursor.lastId &&
1959
+ Math.abs(r.score - options.cursor.lastScore) < 0.0001);
1960
+ if (startIndex >= 0) {
1961
+ startIndex += 1; // Start after the cursor position
1962
+ results = allResults.slice(startIndex, startIndex + k);
1775
1963
  }
1776
1964
  else {
1777
- // Input needs to be vectorized
1778
- try {
1779
- queryVector = await this.embeddingFunction(queryVectorOrData);
1780
- }
1781
- catch (embedError) {
1782
- throw new Error(`Failed to vectorize query data: ${embedError}`);
1783
- }
1784
- }
1785
- // Check if query vector is defined
1786
- if (!queryVector) {
1787
- throw new Error('Query vector is undefined or null');
1788
- }
1789
- // Check if query vector dimensions match the expected dimensions
1790
- if (queryVector.length !== this._dimensions) {
1791
- throw new Error(`Query vector dimension mismatch: expected ${this._dimensions}, got ${queryVector.length}`);
1792
- }
1793
- // If no noun types specified, search all nouns
1794
- if (!nounTypes || nounTypes.length === 0) {
1795
- // Check if we're in readonly mode with lazy loading and the index is empty
1796
- const indexSize = this.index.getNouns().size;
1797
- if (this.readOnly && this.lazyLoadInReadOnlyMode && indexSize === 0) {
1798
- if (this.loggingConfig?.verbose) {
1799
- console.log('Lazy loading mode: Index is empty, loading nodes for search...');
1800
- }
1801
- // In lazy loading mode, we need to load some nodes to search
1802
- // Instead of loading all nodes, we'll load a subset of nodes
1803
- // Load a limited number of nodes from storage using pagination
1804
- const result = await this.storage.getNouns({
1805
- pagination: { offset: 0, limit: k * 10 } // Get 10x more nodes than needed
1806
- });
1807
- const limitedNouns = result.items;
1808
- // Add these nodes to the index
1809
- for (const node of limitedNouns) {
1810
- // Check if the vector dimensions match the expected dimensions
1811
- if (node.vector.length !== this._dimensions) {
1812
- console.warn(`Skipping node ${node.id} due to dimension mismatch: expected ${this._dimensions}, got ${node.vector.length}`);
1813
- continue;
1814
- }
1815
- // Add to index
1816
- await this.index.addItem({
1817
- id: node.id,
1818
- vector: node.vector
1819
- });
1820
- }
1821
- if (this.loggingConfig?.verbose) {
1822
- console.log(`Lazy loading mode: Added ${limitedNouns.length} nodes to index for search`);
1823
- }
1824
- }
1825
- // Create filter function for HNSW search with metadata index optimization
1826
- const hasMetadataFilter = options.metadata && Object.keys(options.metadata).length > 0;
1827
- const hasServiceFilter = !!options.service;
1828
- let filterFunction;
1829
- let preFilteredIds;
1830
- // Use metadata index for pre-filtering if available
1831
- if (hasMetadataFilter && this.metadataIndex) {
1832
- try {
1833
- // Ensure metadata index is up to date
1834
- await this.metadataIndex?.flush?.();
1835
- // Get candidate IDs from metadata index
1836
- const candidateIds = await this.metadataIndex?.getIdsForFilter?.(options.metadata) || [];
1837
- if (candidateIds.length > 0) {
1838
- preFilteredIds = new Set(candidateIds);
1839
- // Create a simple filter function that just checks the pre-filtered set
1840
- filterFunction = async (id) => {
1841
- if (!preFilteredIds.has(id))
1842
- return false;
1843
- // Still apply service filter if needed
1844
- if (hasServiceFilter) {
1845
- const metadata = await this.storage.getMetadata(id);
1846
- const noun = this.index.getNouns().get(id);
1847
- if (!noun || !metadata)
1848
- return false;
1849
- const result = { id, score: 0, vector: noun.vector, metadata };
1850
- return this.filterResultsByService([result], options.service).length > 0;
1851
- }
1852
- return true;
1853
- };
1854
- }
1855
- else {
1856
- // No items match the metadata criteria, return empty results immediately
1857
- return [];
1858
- }
1859
- }
1860
- catch (indexError) {
1861
- console.warn('Metadata index error, falling back to full filtering:', indexError);
1862
- // Fall back to full metadata filtering below
1863
- }
1864
- }
1865
- // Fallback to full metadata filtering if index wasn't used
1866
- if (!filterFunction && (hasMetadataFilter || hasServiceFilter)) {
1867
- filterFunction = async (id) => {
1868
- // Get metadata for filtering
1869
- let metadata = await this.storage.getMetadata(id);
1870
- if (metadata === null) {
1871
- metadata = {};
1872
- }
1873
- // Apply metadata filter
1874
- if (hasMetadataFilter) {
1875
- const matches = matchesMetadataFilter(metadata, options.metadata);
1876
- if (!matches) {
1877
- return false;
1878
- }
1879
- }
1880
- // Apply service filter
1881
- if (hasServiceFilter) {
1882
- const noun = this.index.getNouns().get(id);
1883
- if (!noun)
1884
- return false;
1885
- const result = { id, score: 0, vector: noun.vector, metadata };
1886
- if (!this.filterResultsByService([result], options.service).length) {
1887
- return false;
1888
- }
1889
- }
1890
- return true;
1891
- };
1892
- }
1893
- // When using offset, we need to fetch more results and then slice
1894
- const offset = options.offset || 0;
1895
- const totalNeeded = k + offset;
1896
- // Search in the index with filter
1897
- const results = await this.index.search(queryVector, totalNeeded, filterFunction);
1898
- // Skip the offset number of results
1899
- const paginatedResults = results.slice(offset, offset + k);
1900
- // Get metadata for each result
1901
- const searchResults = [];
1902
- for (const [id, score] of paginatedResults) {
1903
- const noun = this.index.getNouns().get(id);
1904
- if (!noun) {
1905
- continue;
1906
- }
1907
- let metadata = await this.storage.getMetadata(id);
1908
- // Initialize metadata to an empty object if it's null
1909
- if (metadata === null) {
1910
- metadata = {};
1911
- }
1912
- // Preserve original metadata without overwriting user's custom fields
1913
- // The search result already has Brainy's UUID in the main 'id' field
1914
- searchResults.push({
1915
- id,
1916
- score: 1 - score, // Convert distance to similarity (higher = more similar)
1917
- vector: noun.vector,
1918
- metadata: metadata
1919
- });
1920
- }
1921
- return searchResults;
1922
- }
1923
- else {
1924
- // Get nouns for each noun type in parallel
1925
- const nounPromises = nounTypes.map((nounType) => this.storage.getNounsByNounType(nounType));
1926
- const nounArrays = await Promise.all(nounPromises);
1927
- // Combine all nouns
1928
- const nouns = [];
1929
- for (const nounArray of nounArrays) {
1930
- nouns.push(...nounArray);
1931
- }
1932
- // Calculate distances for each noun
1933
- const results = [];
1934
- for (const noun of nouns) {
1935
- const distance = this.index.getDistanceFunction()(queryVector, noun.vector);
1936
- results.push([noun.id, distance]);
1937
- }
1938
- // Sort by distance (ascending)
1939
- results.sort((a, b) => a[1] - b[1]);
1940
- // Apply offset and take k results
1941
- const offset = options.offset || 0;
1942
- const topResults = results.slice(offset, offset + k);
1943
- // Get metadata for each result
1944
- const searchResults = [];
1945
- for (const [id, score] of topResults) {
1946
- const noun = nouns.find((n) => n.id === id);
1947
- if (!noun) {
1948
- continue;
1949
- }
1950
- let metadata = await this.storage.getMetadata(id);
1951
- // Initialize metadata to an empty object if it's null
1952
- if (metadata === null) {
1953
- metadata = {};
1954
- }
1955
- // Preserve original metadata without overwriting user's custom fields
1956
- // The search result already has Brainy's UUID in the main 'id' field
1957
- searchResults.push({
1958
- id,
1959
- score: 1 - score, // Convert distance to similarity (higher = more similar)
1960
- vector: noun.vector,
1961
- metadata: metadata
1962
- });
1963
- }
1964
- // Results are already filtered, just return them
1965
- return searchResults;
1965
+ // Cursor not found, might be stale - return from beginning
1966
+ results = allResults.slice(0, k);
1967
+ startIndex = 0;
1966
1968
  }
1967
1969
  }
1968
- catch (error) {
1969
- console.error('Failed to search vectors by noun types:', error);
1970
- throw new Error(`Failed to search vectors by noun types: ${error}`);
1970
+ else {
1971
+ results = allResults.slice(0, k);
1972
+ }
1973
+ // Create cursor for next page
1974
+ let nextCursor;
1975
+ const hasMoreResults = startIndex + results.length < allResults.length ||
1976
+ allResults.length >= searchK;
1977
+ if (results.length > 0 && hasMoreResults) {
1978
+ const lastResult = results[results.length - 1];
1979
+ nextCursor = {
1980
+ lastId: lastResult.id,
1981
+ lastScore: lastResult.score,
1982
+ position: startIndex + results.length
1983
+ };
1971
1984
  }
1985
+ return {
1986
+ results,
1987
+ cursor: nextCursor,
1988
+ hasMore: !!nextCursor,
1989
+ totalEstimate: allResults.length > searchK ? undefined : allResults.length
1990
+ };
1972
1991
  }
1973
1992
  /**
1974
- * Search for similar vectors
1993
+ * Search the local database for similar vectors
1975
1994
  * @param queryVectorOrData Query vector or data to search for
1976
1995
  * @param k Number of results to return
1977
1996
  * @param options Additional options
1978
1997
  * @returns Array of search results
1979
1998
  */
1980
- /**
1981
- * 🔍 SIMPLE VECTOR SEARCH - Clean wrapper around find() for pure vector search
1982
- *
1983
- * @param queryVectorOrData Vector or text to search for
1984
- * @param k Number of results to return
1985
- * @param options Simple search options (metadata filters only)
1986
- * @returns Vector search results
1987
- */
1988
- /**
1989
- * 🔍 Simple Vector Similarity Search - Clean wrapper around find()
1990
- *
1991
- * search(query) = find({like: query}) - Pure vector similarity search
1992
- *
1993
- * @param queryVectorOrData - Query string, vector, or object to search with
1994
- * @param options - Search options for filtering and pagination
1995
- * @returns Array of search results with scores and metadata
1996
- *
1997
- * @example
1998
- * // Simple vector search
1999
- * await brain.search('machine learning')
2000
- *
2001
- * // With filters and pagination
2002
- * await brain.search('AI', {
2003
- * limit: 20,
2004
- * metadata: { type: 'article' },
2005
- * nounTypes: ['document']
2006
- * })
2007
- */
2008
- async search(queryVectorOrData, options = {}) {
2009
- // Build metadata filter from options
2010
- const metadataFilter = { ...options.metadata };
2011
- // Add noun type filtering
2012
- if (options.nounTypes && options.nounTypes.length > 0) {
2013
- metadataFilter.nounType = { in: options.nounTypes };
2014
- }
2015
- // Add item ID filtering
2016
- if (options.itemIds && options.itemIds.length > 0) {
2017
- metadataFilter.id = { in: options.itemIds };
1999
+ async searchLocal(queryVectorOrData, k = 10, options = {}) {
2000
+ if (!this.isInitialized) {
2001
+ throw new Error('BrainyData must be initialized before searching. Call init() first.');
2018
2002
  }
2019
- // Build simple TripleQuery for vector similarity
2020
- const tripleQuery = {
2021
- like: queryVectorOrData
2022
- };
2023
- // Add metadata filter if we have conditions
2024
- if (Object.keys(metadataFilter).length > 0) {
2025
- tripleQuery.where = metadataFilter;
2003
+ // Check if database is in write-only mode
2004
+ this.checkWriteOnly();
2005
+ // Process the query input for vectorization
2006
+ let queryToUse = queryVectorOrData;
2007
+ // Handle string queries
2008
+ if (typeof queryVectorOrData === 'string' && !options.forceEmbed) {
2009
+ queryToUse = await this.embed(queryVectorOrData);
2010
+ options.forceEmbed = false; // Already embedded, don't force again
2026
2011
  }
2027
- // Extract find() options
2028
- const findOptions = {
2029
- limit: options.limit,
2030
- offset: options.offset,
2031
- cursor: options.cursor,
2032
- excludeDeleted: options.excludeDeleted,
2033
- timeout: options.timeout
2034
- };
2035
- // Call find() with structured query - this is the key simplification!
2036
- let results = await this.find(tripleQuery, findOptions);
2037
- // Apply threshold filtering if specified
2038
- if (options.threshold !== undefined) {
2039
- results = results.filter(r => (r.fusionScore || r.score || 0) >= options.threshold);
2012
+ // Handle JSON object queries with special processing
2013
+ else if (typeof queryVectorOrData === 'object' &&
2014
+ queryVectorOrData !== null &&
2015
+ !Array.isArray(queryVectorOrData) &&
2016
+ !options.forceEmbed) {
2017
+ // If searching within a specific field
2018
+ if (options.searchField) {
2019
+ // Extract text from the specific field
2020
+ const fieldText = extractFieldFromJson(queryVectorOrData, options.searchField);
2021
+ if (fieldText) {
2022
+ queryToUse = await this.embeddingFunction(fieldText);
2023
+ options.forceEmbed = false; // Already embedded, don't force again
2024
+ }
2025
+ }
2026
+ // Otherwise process the entire object with priority fields
2027
+ else {
2028
+ const preparedText = prepareJsonForVectorization(queryVectorOrData, {
2029
+ priorityFields: options.priorityFields || [
2030
+ 'name',
2031
+ 'title',
2032
+ 'company',
2033
+ 'organization',
2034
+ 'description',
2035
+ 'summary'
2036
+ ]
2037
+ });
2038
+ queryToUse = await this.embeddingFunction(preparedText);
2039
+ options.forceEmbed = false; // Already embedded, don't force again
2040
+ }
2040
2041
  }
2041
- // Convert to SearchResult format
2042
- return results.map(r => ({
2043
- ...r,
2044
- score: r.fusionScore || r.score || 0
2045
- }));
2046
- return results;
2042
+ // If noun types are specified, use searchByNounTypes
2043
+ let searchResults;
2044
+ if (options.nounTypes && options.nounTypes.length > 0) {
2045
+ searchResults = await this.searchByNounTypes(queryToUse, k, options.nounTypes, {
2046
+ forceEmbed: options.forceEmbed,
2047
+ service: options.service,
2048
+ metadata: options.metadata,
2049
+ offset: options.offset
2050
+ });
2051
+ }
2052
+ else {
2053
+ // Otherwise, search all GraphNouns
2054
+ searchResults = await this.searchByNounTypes(queryToUse, k, null, {
2055
+ forceEmbed: options.forceEmbed,
2056
+ service: options.service,
2057
+ metadata: options.metadata,
2058
+ offset: options.offset
2059
+ });
2060
+ }
2061
+ // Filter out placeholder nouns and deleted items from search results
2062
+ searchResults = searchResults.filter((result) => {
2063
+ if (result.metadata && typeof result.metadata === 'object') {
2064
+ const metadata = result.metadata;
2065
+ // Exclude deleted items from search results (soft delete)
2066
+ // Check namespaced field
2067
+ if (metadata._brainy?.deleted === true) {
2068
+ return false;
2069
+ }
2070
+ // Exclude placeholder nouns from search results
2071
+ if (metadata.isPlaceholder) {
2072
+ return false;
2073
+ }
2074
+ // Apply domain filter if specified
2075
+ if (options.filter?.domain) {
2076
+ if (metadata.domain !== options.filter.domain) {
2077
+ return false;
2078
+ }
2079
+ }
2080
+ }
2081
+ return true;
2082
+ });
2083
+ // If includeVerbs is true, retrieve associated GraphVerbs for each result
2084
+ if (options.includeVerbs && this.storage) {
2085
+ for (const result of searchResults) {
2086
+ try {
2087
+ // Get outgoing verbs for this noun
2088
+ const outgoingVerbs = await this.storage.getVerbsBySource(result.id);
2089
+ // Get incoming verbs for this noun
2090
+ const incomingVerbs = await this.storage.getVerbsByTarget(result.id);
2091
+ // Combine all verbs
2092
+ const allVerbs = [...outgoingVerbs, ...incomingVerbs];
2093
+ // Add verbs to the result metadata
2094
+ if (!result.metadata) {
2095
+ result.metadata = {};
2096
+ }
2097
+ // Add the verbs to the metadata
2098
+ ;
2099
+ result.metadata.associatedVerbs = allVerbs;
2100
+ }
2101
+ catch (error) {
2102
+ console.warn(`Failed to retrieve verbs for noun ${result.id}:`, error);
2103
+ }
2104
+ }
2105
+ }
2106
+ return searchResults;
2047
2107
  }
2048
2108
  /**
2049
- * Helper method to encode cursor for pagination
2050
- * @internal
2109
+ * Find entities similar to a given entity ID
2110
+ * @param id ID of the entity to find similar entities for
2111
+ * @param options Additional options
2112
+ * @returns Array of search results with similarity scores
2051
2113
  */
2052
- encodeCursor(data) {
2053
- return Buffer.from(JSON.stringify(data)).toString('base64');
2114
+ async findSimilar(id, options = {}) {
2115
+ await this.ensureInitialized();
2116
+ // Get the entity by ID
2117
+ const entity = await this.getNoun(id);
2118
+ if (!entity) {
2119
+ throw new Error(`Entity with ID ${id} not found`);
2120
+ }
2121
+ // If relationType is specified, directly get related entities by that type
2122
+ if (options.relationType) {
2123
+ // Get all verbs (relationships) from the source entity
2124
+ const outgoingVerbs = await this.storage.getVerbsBySource(id);
2125
+ // Filter to only include verbs of the specified type
2126
+ const verbsOfType = outgoingVerbs.filter((verb) => verb.type === options.relationType);
2127
+ // Get the target IDs
2128
+ const targetIds = verbsOfType.map((verb) => verb.target);
2129
+ // Get the actual entities for these IDs
2130
+ const results = [];
2131
+ for (const targetId of targetIds) {
2132
+ // Skip undefined targetIds
2133
+ if (typeof targetId !== 'string')
2134
+ continue;
2135
+ const targetEntity = await this.getNoun(targetId);
2136
+ if (targetEntity) {
2137
+ results.push({
2138
+ id: targetId,
2139
+ score: 1.0, // Default similarity score
2140
+ vector: targetEntity.vector,
2141
+ metadata: targetEntity.metadata
2142
+ });
2143
+ }
2144
+ }
2145
+ // Return the results, limited to the requested number
2146
+ return results.slice(0, options.limit || 10);
2147
+ }
2148
+ // If no relationType is specified, use the original vector similarity search
2149
+ const k = (options.limit || 10) + 1; // Add 1 to account for the original entity
2150
+ const searchResults = await this.search(entity.vector, {
2151
+ limit: k,
2152
+ excludeDeleted: false,
2153
+ nounTypes: options.nounTypes
2154
+ });
2155
+ // Filter out the original entity and limit to the requested number
2156
+ return searchResults
2157
+ .filter((result) => result.id !== id)
2158
+ .slice(0, options.limit || 10);
2054
2159
  }
2055
2160
  /**
2056
- * Helper method to decode cursor for pagination
2057
- * @internal
2161
+ * Get a vector by ID
2058
2162
  */
2059
- decodeCursor(cursor) {
2163
+ // Legacy get() method removed - use getNoun() instead
2164
+ /**
2165
+ * Check if a document with the given ID exists
2166
+ * This is a direct storage operation that works in write-only mode when allowDirectReads is enabled
2167
+ * @param id The ID to check for existence
2168
+ * @returns Promise<boolean> True if the document exists, false otherwise
2169
+ */
2170
+ async has(id) {
2171
+ if (id === null || id === undefined) {
2172
+ throw new Error('ID cannot be null or undefined');
2173
+ }
2174
+ await this.ensureInitialized();
2175
+ // This is a direct storage operation - check if allowed in write-only mode
2176
+ if (this.writeOnly && !this.allowDirectReads) {
2177
+ throw new Error('Cannot perform has() operation: database is in write-only mode. Enable allowDirectReads for direct storage operations.');
2178
+ }
2060
2179
  try {
2061
- return JSON.parse(Buffer.from(cursor, 'base64').toString());
2180
+ // Always query storage directly for existence check
2181
+ const noun = await this.storage.getNoun(id);
2182
+ return noun !== null;
2062
2183
  }
2063
- catch {
2064
- return { offset: 0, timestamp: 0 };
2184
+ catch (error) {
2185
+ // If storage lookup fails, the item doesn't exist
2186
+ return false;
2065
2187
  }
2066
2188
  }
2067
2189
  /**
2068
- * Internal method for direct HNSW vector search
2069
- * Used by TripleIntelligence to avoid circular dependencies
2070
- * Note: For pure metadata filtering, use metadataIndex.getIdsForFilter() directly - it's O(log n)!
2071
- * This method is for vector similarity search with optional metadata filtering during search
2072
- * @internal
2190
+ * Check if a document with the given ID exists (alias for has)
2191
+ * This is a direct storage operation that works in write-only mode when allowDirectReads is enabled
2192
+ * @param id The ID to check for existence
2193
+ * @returns Promise<boolean> True if the document exists, false otherwise
2073
2194
  */
2074
- async _internalVectorSearch(queryVectorOrData, k = 10, options = {}) {
2075
- // Generate query vector
2076
- const queryVector = Array.isArray(queryVectorOrData) &&
2077
- typeof queryVectorOrData[0] === 'number' ?
2078
- queryVectorOrData :
2079
- await this.embed(queryVectorOrData);
2080
- // Apply metadata filter if provided
2081
- let filterFunction;
2082
- if (options.metadata) {
2083
- const matchingIdsArray = await this.metadataIndex?.getIdsForFilter(options.metadata) || [];
2084
- const matchingIds = new Set(matchingIdsArray);
2085
- filterFunction = async (id) => matchingIds.has(id);
2086
- }
2087
- // Direct HNSW search
2088
- const results = await this.index.search(queryVector, k, filterFunction);
2089
- // Get metadata for results
2090
- const searchResults = [];
2091
- for (const [id, similarity] of results) {
2092
- const metadata = await this.getNoun(id);
2093
- searchResults.push({
2094
- id,
2095
- score: similarity,
2096
- vector: [],
2097
- metadata: metadata?.metadata || {}
2098
- });
2099
- }
2100
- return searchResults;
2101
- }
2102
2195
  /**
2103
- * 🎯 LEGACY: Original search implementation (kept for complex cases)
2104
- * This is the original search method, now used as fallback for edge cases
2196
+ * Check if a noun exists
2197
+ * @param id The noun ID
2198
+ * @returns True if exists
2105
2199
  */
2106
- async _legacySearch(queryVectorOrData, k = 10, options = {}) {
2107
- const startTime = Date.now();
2108
- // Validate input is not null or undefined
2109
- if (queryVectorOrData === null || queryVectorOrData === undefined) {
2110
- throw new Error('Query cannot be null or undefined');
2111
- }
2112
- // Validate k parameter first, before any other logic
2113
- if (k <= 0 || typeof k !== 'number' || isNaN(k)) {
2114
- throw new Error('Parameter k must be a positive number');
2115
- }
2116
- if (!this.isInitialized) {
2117
- throw new Error('BrainyData must be initialized before searching. Call init() first.');
2118
- }
2119
- // Check if database is in write-only mode
2120
- this.checkWriteOnly();
2121
- // If searching for verbs directly
2122
- if (options.searchVerbs) {
2123
- const verbResults = await this.searchVerbs(queryVectorOrData, k, {
2124
- forceEmbed: options.forceEmbed,
2125
- verbTypes: options.verbTypes
2126
- });
2127
- // Convert verb results to SearchResult format
2128
- return verbResults.map((verb) => ({
2129
- id: verb.id,
2130
- score: verb.similarity,
2131
- vector: verb.embedding || [],
2132
- metadata: {
2133
- verb: verb.verb,
2134
- source: verb.source,
2135
- target: verb.target,
2136
- ...verb.data
2137
- }
2138
- }));
2139
- }
2140
- // If searching for nouns connected by verbs
2141
- if (options.searchConnectedNouns) {
2142
- return this.searchNounsByVerbs(queryVectorOrData, k, {
2143
- forceEmbed: options.forceEmbed,
2144
- verbTypes: options.verbTypes,
2145
- direction: options.verbDirection
2146
- });
2147
- }
2148
- // If a specific search mode is specified, use the appropriate search method
2149
- if (options.searchMode === 'local') {
2150
- return this.searchLocal(queryVectorOrData, k, options);
2151
- }
2152
- else if (options.searchMode === 'remote') {
2153
- return this.searchRemote(queryVectorOrData, k, options);
2154
- }
2155
- else if (options.searchMode === 'combined') {
2156
- return this.searchCombined(queryVectorOrData, k, options);
2157
- }
2158
- // Generate deduplication key for concurrent request handling
2159
- const dedupeKey = RequestDeduplicator.getSearchKey(typeof queryVectorOrData === 'string' ? queryVectorOrData : JSON.stringify(queryVectorOrData), k, options);
2160
- // Use augmentation system for search (includes deduplication, batching, and caching)
2161
- return this.augmentations.execute('search', { query: queryVectorOrData, k, options, dedupeKey }, async () => {
2162
- // Default behavior (backward compatible): search locally
2163
- try {
2164
- // BEST OF BOTH: Automatically exclude soft-deleted items (Neural Intelligence improvement)
2165
- // BUT only when there's already metadata filtering happening
2166
- let metadataFilter = options.metadata;
2167
- // Only add soft-delete filter if there's already metadata being filtered
2168
- // This preserves pure vector searches without metadata
2169
- if (metadataFilter && Object.keys(metadataFilter).length > 0) {
2170
- // If no explicit deleted filter is provided, exclude soft-deleted items
2171
- // Use namespaced field for O(1) performance
2172
- if (!metadataFilter['_brainy.deleted'] && !metadataFilter.anyOf) {
2173
- metadataFilter = {
2174
- ...metadataFilter,
2175
- ['_brainy.deleted']: false // O(1) positive match instead of notEquals
2176
- };
2177
- }
2178
- }
2179
- const hasMetadataFilter = metadataFilter && Object.keys(metadataFilter).length > 0;
2180
- // Check cache first (transparent to user) - but skip cache if we have metadata filters
2181
- if (!hasMetadataFilter) {
2182
- const cacheKey = this.cache?.getCacheKey(queryVectorOrData, k, options);
2183
- const cachedResults = this.cache?.get(cacheKey);
2184
- if (cachedResults) {
2185
- // Track cache hit in health monitor
2186
- if (this.monitoring) {
2187
- const latency = Date.now() - startTime;
2188
- this.monitoring.recordRequest(latency, false);
2189
- this.monitoring.recordCacheAccess(true);
2190
- }
2191
- return cachedResults;
2192
- }
2193
- }
2194
- // Cache miss - perform actual search
2195
- const results = await this.searchLocal(queryVectorOrData, k, {
2196
- ...options,
2197
- metadata: metadataFilter
2198
- });
2199
- // Cache results for future queries (unless explicitly disabled or has metadata filter)
2200
- if (!options.skipCache && !hasMetadataFilter) {
2201
- const cacheKey = this.cache?.getCacheKey(queryVectorOrData, k, options);
2202
- this.cache?.set(cacheKey, results);
2203
- }
2204
- // Track successful search in health monitor
2205
- if (this.monitoring) {
2206
- const latency = Date.now() - startTime;
2207
- this.monitoring.recordRequest(latency, false);
2208
- this.monitoring.recordCacheAccess(false);
2209
- }
2210
- return results;
2211
- }
2212
- catch (error) {
2213
- // Track error in health monitor
2214
- if (this.monitoring) {
2215
- const latency = Date.now() - startTime;
2216
- this.monitoring.recordRequest(latency, true);
2217
- }
2218
- throw error;
2219
- }
2220
- });
2200
+ async hasNoun(id) {
2201
+ return this.hasNoun(id);
2221
2202
  }
2222
2203
  /**
2223
- * Search with cursor-based pagination for better performance on large datasets
2224
- * @param queryVectorOrData Query vector or data to search for
2225
- * @param k Number of results to return
2226
- * @param options Additional options including cursor for pagination
2227
- * @returns Paginated search results with cursor for next page
2204
+ * Get metadata for a document by ID
2205
+ * This is a direct storage operation that works in write-only mode when allowDirectReads is enabled
2206
+ * @param id The ID of the document
2207
+ * @returns Promise<T | null> The metadata object or null if not found
2228
2208
  */
2209
+ // Legacy getMetadata() method removed - use getNounMetadata() instead
2229
2210
  /**
2230
- * @deprecated Use search() with cursor option instead
2211
+ * Get multiple documents by their IDs
2212
+ * This is a direct storage operation that works in write-only mode when allowDirectReads is enabled
2213
+ * @param ids Array of IDs to retrieve
2214
+ * @returns Promise<Array<VectorDocument<T> | null>> Array of documents (null for missing IDs)
2215
+ */
2216
+ /**
2217
+ * Get multiple nouns - by IDs, filters, or pagination
2218
+ * @param idsOrOptions Array of IDs or query options
2219
+ * @returns Array of noun documents
2220
+ *
2231
2221
  * @example
2232
- * // Old way (deprecated)
2233
- * await brain.searchWithCursor(query, 10, { cursor: 'abc123' })
2234
- * // New way
2235
- * await brain.search(query, { limit: 10, cursor: 'abc123' })
2222
+ * // Get by IDs
2223
+ * await brain.getNouns(['id1', 'id2'])
2224
+ *
2225
+ * // Get with filters
2226
+ * await brain.getNouns({
2227
+ * filter: { type: 'article' },
2228
+ * limit: 10
2229
+ * })
2230
+ *
2231
+ * // Get with pagination
2232
+ * await brain.getNouns({
2233
+ * offset: 20,
2234
+ * limit: 10
2235
+ * })
2236
2236
  */
2237
- async searchWithCursor(queryVectorOrData, k = 10, options = {}) {
2238
- // For cursor-based search, we need to fetch more results and filter
2239
- const searchK = options.cursor ? k + 20 : k; // Get extra results for filtering
2240
- // Perform regular search
2241
- const { cursor, ...searchOptions } = options;
2242
- const allResults = await this.search(queryVectorOrData, {
2243
- limit: searchK,
2244
- nounTypes: searchOptions.nounTypes,
2245
- metadata: searchOptions.filter
2246
- });
2247
- let results = allResults;
2248
- let startIndex = 0;
2249
- // If cursor provided, find starting position
2250
- if (options.cursor) {
2251
- startIndex = allResults.findIndex((r) => r.id === options.cursor.lastId &&
2252
- Math.abs(r.score - options.cursor.lastScore) < 0.0001);
2253
- if (startIndex >= 0) {
2254
- startIndex += 1; // Start after the cursor position
2255
- results = allResults.slice(startIndex, startIndex + k);
2256
- }
2257
- else {
2258
- // Cursor not found, might be stale - return from beginning
2259
- results = allResults.slice(0, k);
2260
- startIndex = 0;
2261
- }
2262
- }
2263
- else {
2264
- results = allResults.slice(0, k);
2237
+ async getNouns(idsOrOptions) {
2238
+ // Handle array of IDs
2239
+ if (Array.isArray(idsOrOptions)) {
2240
+ return this.getNounsByIds(idsOrOptions);
2265
2241
  }
2266
- // Create cursor for next page
2267
- let nextCursor;
2268
- const hasMoreResults = startIndex + results.length < allResults.length ||
2269
- allResults.length >= searchK;
2270
- if (results.length > 0 && hasMoreResults) {
2271
- const lastResult = results[results.length - 1];
2272
- nextCursor = {
2273
- lastId: lastResult.id,
2274
- lastScore: lastResult.score,
2275
- position: startIndex + results.length
2276
- };
2242
+ // Handle options object
2243
+ const options = idsOrOptions || {};
2244
+ // If ids are provided in options, get by IDs
2245
+ if (options.ids) {
2246
+ return this.getNounsByIds(options.ids);
2277
2247
  }
2278
- return {
2279
- results,
2280
- cursor: nextCursor,
2281
- hasMore: !!nextCursor,
2282
- totalEstimate: allResults.length > searchK ? undefined : allResults.length
2283
- };
2248
+ // Otherwise, do a filtered/paginated query and extract items
2249
+ const result = await this.queryNounsByFilter(options);
2250
+ return result.items;
2284
2251
  }
2285
2252
  /**
2286
- * Search the local database for similar vectors
2287
- * @param queryVectorOrData Query vector or data to search for
2288
- * @param k Number of results to return
2289
- * @param options Additional options
2290
- * @returns Array of search results
2253
+ * Internal: Get nouns by IDs
2291
2254
  */
2292
- async searchLocal(queryVectorOrData, k = 10, options = {}) {
2293
- if (!this.isInitialized) {
2294
- throw new Error('BrainyData must be initialized before searching. Call init() first.');
2255
+ async getNounsByIds(ids) {
2256
+ if (!Array.isArray(ids)) {
2257
+ throw new Error('IDs must be provided as an array');
2295
2258
  }
2296
- // Check if database is in write-only mode
2297
- this.checkWriteOnly();
2298
- // Process the query input for vectorization
2299
- let queryToUse = queryVectorOrData;
2300
- // Handle string queries
2301
- if (typeof queryVectorOrData === 'string' && !options.forceEmbed) {
2302
- queryToUse = await this.embed(queryVectorOrData);
2303
- options.forceEmbed = false; // Already embedded, don't force again
2259
+ await this.ensureInitialized();
2260
+ // This is a direct storage operation - check if allowed in write-only mode
2261
+ if (this.writeOnly && !this.allowDirectReads) {
2262
+ throw new Error('Cannot perform getBatch() operation: database is in write-only mode. Enable allowDirectReads for direct storage operations.');
2304
2263
  }
2305
- // Handle JSON object queries with special processing
2306
- else if (typeof queryVectorOrData === 'object' &&
2307
- queryVectorOrData !== null &&
2308
- !Array.isArray(queryVectorOrData) &&
2309
- !options.forceEmbed) {
2310
- // If searching within a specific field
2311
- if (options.searchField) {
2312
- // Extract text from the specific field
2313
- const fieldText = extractFieldFromJson(queryVectorOrData, options.searchField);
2314
- if (fieldText) {
2315
- queryToUse = await this.embeddingFunction(fieldText);
2316
- options.forceEmbed = false; // Already embedded, don't force again
2317
- }
2318
- }
2319
- // Otherwise process the entire object with priority fields
2320
- else {
2321
- const preparedText = prepareJsonForVectorization(queryVectorOrData, {
2322
- priorityFields: options.priorityFields || [
2323
- 'name',
2324
- 'title',
2325
- 'company',
2326
- 'organization',
2327
- 'description',
2328
- 'summary'
2329
- ]
2330
- });
2331
- queryToUse = await this.embeddingFunction(preparedText);
2332
- options.forceEmbed = false; // Already embedded, don't force again
2333
- }
2334
- }
2335
- // If noun types are specified, use searchByNounTypes
2336
- let searchResults;
2337
- if (options.nounTypes && options.nounTypes.length > 0) {
2338
- searchResults = await this.searchByNounTypes(queryToUse, k, options.nounTypes, {
2339
- forceEmbed: options.forceEmbed,
2340
- service: options.service,
2341
- metadata: options.metadata,
2342
- offset: options.offset
2343
- });
2344
- }
2345
- else {
2346
- // Otherwise, search all GraphNouns
2347
- searchResults = await this.searchByNounTypes(queryToUse, k, null, {
2348
- forceEmbed: options.forceEmbed,
2349
- service: options.service,
2350
- metadata: options.metadata,
2351
- offset: options.offset
2352
- });
2353
- }
2354
- // Filter out placeholder nouns and deleted items from search results
2355
- searchResults = searchResults.filter((result) => {
2356
- if (result.metadata && typeof result.metadata === 'object') {
2357
- const metadata = result.metadata;
2358
- // Exclude deleted items from search results (soft delete)
2359
- // Check namespaced field
2360
- if (metadata._brainy?.deleted === true) {
2361
- return false;
2362
- }
2363
- // Exclude placeholder nouns from search results
2364
- if (metadata.isPlaceholder) {
2365
- return false;
2366
- }
2367
- // Apply domain filter if specified
2368
- if (options.filter?.domain) {
2369
- if (metadata.domain !== options.filter.domain) {
2370
- return false;
2371
- }
2372
- }
2373
- }
2374
- return true;
2375
- });
2376
- // If includeVerbs is true, retrieve associated GraphVerbs for each result
2377
- if (options.includeVerbs && this.storage) {
2378
- for (const result of searchResults) {
2379
- try {
2380
- // Get outgoing verbs for this noun
2381
- const outgoingVerbs = await this.storage.getVerbsBySource(result.id);
2382
- // Get incoming verbs for this noun
2383
- const incomingVerbs = await this.storage.getVerbsByTarget(result.id);
2384
- // Combine all verbs
2385
- const allVerbs = [...outgoingVerbs, ...incomingVerbs];
2386
- // Add verbs to the result metadata
2387
- if (!result.metadata) {
2388
- result.metadata = {};
2389
- }
2390
- // Add the verbs to the metadata
2391
- ;
2392
- result.metadata.associatedVerbs = allVerbs;
2393
- }
2394
- catch (error) {
2395
- console.warn(`Failed to retrieve verbs for noun ${result.id}:`, error);
2396
- }
2397
- }
2398
- }
2399
- return searchResults;
2400
- }
2401
- /**
2402
- * Find entities similar to a given entity ID
2403
- * @param id ID of the entity to find similar entities for
2404
- * @param options Additional options
2405
- * @returns Array of search results with similarity scores
2406
- */
2407
- async findSimilar(id, options = {}) {
2408
- await this.ensureInitialized();
2409
- // Get the entity by ID
2410
- const entity = await this.getNoun(id);
2411
- if (!entity) {
2412
- throw new Error(`Entity with ID ${id} not found`);
2413
- }
2414
- // If relationType is specified, directly get related entities by that type
2415
- if (options.relationType) {
2416
- // Get all verbs (relationships) from the source entity
2417
- const outgoingVerbs = await this.storage.getVerbsBySource(id);
2418
- // Filter to only include verbs of the specified type
2419
- const verbsOfType = outgoingVerbs.filter((verb) => verb.type === options.relationType);
2420
- // Get the target IDs
2421
- const targetIds = verbsOfType.map((verb) => verb.target);
2422
- // Get the actual entities for these IDs
2423
- const results = [];
2424
- for (const targetId of targetIds) {
2425
- // Skip undefined targetIds
2426
- if (typeof targetId !== 'string')
2427
- continue;
2428
- const targetEntity = await this.getNoun(targetId);
2429
- if (targetEntity) {
2430
- results.push({
2431
- id: targetId,
2432
- score: 1.0, // Default similarity score
2433
- vector: targetEntity.vector,
2434
- metadata: targetEntity.metadata
2435
- });
2436
- }
2437
- }
2438
- // Return the results, limited to the requested number
2439
- return results.slice(0, options.limit || 10);
2440
- }
2441
- // If no relationType is specified, use the original vector similarity search
2442
- const k = (options.limit || 10) + 1; // Add 1 to account for the original entity
2443
- const searchResults = await this.search(entity.vector, {
2444
- limit: k,
2445
- excludeDeleted: false,
2446
- nounTypes: options.nounTypes
2447
- });
2448
- // Filter out the original entity and limit to the requested number
2449
- return searchResults
2450
- .filter((result) => result.id !== id)
2451
- .slice(0, options.limit || 10);
2452
- }
2453
- /**
2454
- * Get a vector by ID
2455
- */
2456
- // Legacy get() method removed - use getNoun() instead
2457
- /**
2458
- * Check if a document with the given ID exists
2459
- * This is a direct storage operation that works in write-only mode when allowDirectReads is enabled
2460
- * @param id The ID to check for existence
2461
- * @returns Promise<boolean> True if the document exists, false otherwise
2462
- */
2463
- async has(id) {
2464
- if (id === null || id === undefined) {
2465
- throw new Error('ID cannot be null or undefined');
2466
- }
2467
- await this.ensureInitialized();
2468
- // This is a direct storage operation - check if allowed in write-only mode
2469
- if (this.writeOnly && !this.allowDirectReads) {
2470
- throw new Error('Cannot perform has() operation: database is in write-only mode. Enable allowDirectReads for direct storage operations.');
2471
- }
2472
- try {
2473
- // Always query storage directly for existence check
2474
- const noun = await this.storage.getNoun(id);
2475
- return noun !== null;
2476
- }
2477
- catch (error) {
2478
- // If storage lookup fails, the item doesn't exist
2479
- return false;
2480
- }
2481
- }
2482
- /**
2483
- * Check if a document with the given ID exists (alias for has)
2484
- * This is a direct storage operation that works in write-only mode when allowDirectReads is enabled
2485
- * @param id The ID to check for existence
2486
- * @returns Promise<boolean> True if the document exists, false otherwise
2487
- */
2488
- /**
2489
- * Check if a noun exists
2490
- * @param id The noun ID
2491
- * @returns True if exists
2492
- */
2493
- async hasNoun(id) {
2494
- return this.hasNoun(id);
2495
- }
2496
- /**
2497
- * Get metadata for a document by ID
2498
- * This is a direct storage operation that works in write-only mode when allowDirectReads is enabled
2499
- * @param id The ID of the document
2500
- * @returns Promise<T | null> The metadata object or null if not found
2501
- */
2502
- // Legacy getMetadata() method removed - use getNounMetadata() instead
2503
- /**
2504
- * Get multiple documents by their IDs
2505
- * This is a direct storage operation that works in write-only mode when allowDirectReads is enabled
2506
- * @param ids Array of IDs to retrieve
2507
- * @returns Promise<Array<VectorDocument<T> | null>> Array of documents (null for missing IDs)
2508
- */
2509
- /**
2510
- * Get multiple nouns - by IDs, filters, or pagination
2511
- * @param idsOrOptions Array of IDs or query options
2512
- * @returns Array of noun documents
2513
- *
2514
- * @example
2515
- * // Get by IDs
2516
- * await brain.getNouns(['id1', 'id2'])
2517
- *
2518
- * // Get with filters
2519
- * await brain.getNouns({
2520
- * filter: { type: 'article' },
2521
- * limit: 10
2522
- * })
2523
- *
2524
- * // Get with pagination
2525
- * await brain.getNouns({
2526
- * offset: 20,
2527
- * limit: 10
2528
- * })
2529
- */
2530
- async getNouns(idsOrOptions) {
2531
- // Handle array of IDs
2532
- if (Array.isArray(idsOrOptions)) {
2533
- return this.getNounsByIds(idsOrOptions);
2534
- }
2535
- // Handle options object
2536
- const options = idsOrOptions || {};
2537
- // If ids are provided in options, get by IDs
2538
- if (options.ids) {
2539
- return this.getNounsByIds(options.ids);
2540
- }
2541
- // Otherwise, do a filtered/paginated query and extract items
2542
- const result = await this.queryNounsByFilter(options);
2543
- return result.items;
2544
- }
2545
- /**
2546
- * Internal: Get nouns by IDs
2547
- */
2548
- async getNounsByIds(ids) {
2549
- if (!Array.isArray(ids)) {
2550
- throw new Error('IDs must be provided as an array');
2551
- }
2552
- await this.ensureInitialized();
2553
- // This is a direct storage operation - check if allowed in write-only mode
2554
- if (this.writeOnly && !this.allowDirectReads) {
2555
- throw new Error('Cannot perform getBatch() operation: database is in write-only mode. Enable allowDirectReads for direct storage operations.');
2556
- }
2557
- const results = [];
2558
- for (const id of ids) {
2559
- if (id === null || id === undefined) {
2560
- results.push(null);
2561
- continue;
2264
+ const results = [];
2265
+ for (const id of ids) {
2266
+ if (id === null || id === undefined) {
2267
+ results.push(null);
2268
+ continue;
2562
2269
  }
2563
2270
  try {
2564
2271
  const result = await this.getNoun(id);
@@ -4690,8 +4397,12 @@ export class BrainyData {
4690
4397
  noun.vector = await this.embeddingFunction(noun.metadata);
4691
4398
  }
4692
4399
  }
4400
+ // Extract type from metadata or default to Content
4401
+ const nounType = (noun.metadata && typeof noun.metadata === 'object' && 'noun' in noun.metadata)
4402
+ ? noun.metadata.noun
4403
+ : NounType.Content;
4693
4404
  // Add the noun with its vector and metadata (custom ID not supported)
4694
- await this.addNoun(noun.vector, noun.metadata);
4405
+ await this.addNoun(noun.vector, nounType, noun.metadata);
4695
4406
  nounsRestored++;
4696
4407
  }
4697
4408
  catch (error) {
@@ -4847,8 +4558,8 @@ export class BrainyData {
4847
4558
  tags: [`tag-${i % 5}`, `category-${i % 3}`]
4848
4559
  }
4849
4560
  };
4850
- // Add the noun
4851
- const id = await this.addNoun(metadata.description, metadata);
4561
+ // Add the noun with explicit type
4562
+ const id = await this.addNoun(metadata.description, nounType, metadata);
4852
4563
  nounIds.push(id);
4853
4564
  }
4854
4565
  // Generate random verbs between nouns
@@ -5038,8 +4749,7 @@ export class BrainyData {
5038
4749
  const configValue = options?.encrypt ? await this.encryptData(JSON.stringify(value)) : value;
5039
4750
  // Use simple text for vectorization
5040
4751
  const searchableText = `Configuration setting for ${key}`;
5041
- await this.addNoun(searchableText, {
5042
- nounType: NounType.State,
4752
+ await this.addNoun(searchableText, NounType.State, {
5043
4753
  configKey: key,
5044
4754
  configValue: configValue,
5045
4755
  encrypted: !!options?.encrypt,
@@ -5171,15 +4881,312 @@ export class BrainyData {
5171
4881
  * @returns Created noun ID
5172
4882
  */
5173
4883
  /**
5174
- * Add a noun to the database
4884
+ * Add a noun to the database with required type
5175
4885
  * Clean 2.0 API - primary method for adding data
5176
4886
  *
5177
4887
  * @param vectorOrData Vector array or data to embed
5178
- * @param metadata Metadata to store with the noun
4888
+ * @param nounType Required noun type (one of 31 types)
4889
+ * @param metadata Optional metadata object
5179
4890
  * @returns The generated ID
5180
4891
  */
5181
- async addNoun(vectorOrData, metadata) {
5182
- return await this.add(vectorOrData, metadata);
4892
+ async addNoun(vectorOrData, nounType, metadata, options = {}) {
4893
+ // Validate noun type
4894
+ const validatedType = validateNounType(nounType);
4895
+ // Enrich metadata with validated type
4896
+ let enrichedMetadata = {
4897
+ ...metadata,
4898
+ noun: validatedType
4899
+ };
4900
+ await this.ensureInitialized();
4901
+ // Check if database is in read-only mode
4902
+ this.checkReadOnly();
4903
+ // Validate input is not null or undefined
4904
+ if (vectorOrData === null || vectorOrData === undefined) {
4905
+ throw new Error('Input cannot be null or undefined');
4906
+ }
4907
+ try {
4908
+ let vector;
4909
+ if (Array.isArray(vectorOrData)) {
4910
+ for (let i = 0; i < vectorOrData.length; i++) {
4911
+ if (typeof vectorOrData[i] !== 'number') {
4912
+ throw new Error('Vector contains non-numeric values');
4913
+ }
4914
+ }
4915
+ }
4916
+ // Check if input is already a vector
4917
+ if (Array.isArray(vectorOrData) && !options.forceEmbed) {
4918
+ // Input is already a vector (and we've validated it contains only numbers)
4919
+ vector = vectorOrData;
4920
+ }
4921
+ else {
4922
+ // Input needs to be vectorized
4923
+ try {
4924
+ // Check if input is a JSON object and process it specially
4925
+ if (typeof vectorOrData === 'object' &&
4926
+ vectorOrData !== null &&
4927
+ !Array.isArray(vectorOrData)) {
4928
+ // Process JSON object for better vectorization
4929
+ const preparedText = prepareJsonForVectorization(vectorOrData, {
4930
+ // Prioritize common name/title fields if they exist
4931
+ priorityFields: [
4932
+ 'name',
4933
+ 'title',
4934
+ 'company',
4935
+ 'organization',
4936
+ 'description',
4937
+ 'summary'
4938
+ ]
4939
+ });
4940
+ vector = await this.embeddingFunction(preparedText);
4941
+ // IMPORTANT: When an object is passed as data and no metadata is provided,
4942
+ // use the object AS the metadata too. This is expected behavior for the API.
4943
+ // Users can pass either:
4944
+ // 1. addNoun(string, metadata) - vectorize string, store metadata
4945
+ // 2. addNoun(object) - vectorize object text, store object as metadata
4946
+ // 3. addNoun(object, metadata) - vectorize object text, store provided metadata
4947
+ if (!enrichedMetadata || Object.keys(enrichedMetadata).length === 1) { // Only has 'noun' key
4948
+ enrichedMetadata = { ...vectorOrData, noun: validatedType };
4949
+ }
4950
+ // Track field names for this JSON document
4951
+ const service = this.getServiceName(options);
4952
+ if (this.storage) {
4953
+ await this.storage.trackFieldNames(vectorOrData, service);
4954
+ }
4955
+ }
4956
+ else {
4957
+ // Use standard embedding for non-JSON data
4958
+ vector = await this.embeddingFunction(vectorOrData);
4959
+ }
4960
+ }
4961
+ catch (embedError) {
4962
+ throw new Error(`Failed to vectorize data: ${embedError}`);
4963
+ }
4964
+ }
4965
+ // Check if vector is defined
4966
+ if (!vector) {
4967
+ throw new Error('Vector is undefined or null');
4968
+ }
4969
+ // Validate vector dimensions
4970
+ if (vector.length !== this._dimensions) {
4971
+ throw new Error(`Vector dimension mismatch: expected ${this._dimensions}, got ${vector.length}`);
4972
+ }
4973
+ // Use ID from options if it exists, otherwise from metadata, otherwise generate a new UUID
4974
+ const id = options.id ||
4975
+ (enrichedMetadata && typeof enrichedMetadata === 'object' && 'id' in enrichedMetadata
4976
+ ? enrichedMetadata.id
4977
+ : uuidv4());
4978
+ // Check for existing noun (both write-only and normal modes)
4979
+ let existingNoun;
4980
+ if (options.id) {
4981
+ try {
4982
+ if (this.writeOnly) {
4983
+ // In write-only mode, check storage directly
4984
+ existingNoun =
4985
+ (await this.storage.getNoun(options.id)) ?? undefined;
4986
+ }
4987
+ else {
4988
+ // In normal mode, check index first, then storage
4989
+ existingNoun = this.index.getNouns().get(options.id);
4990
+ if (!existingNoun) {
4991
+ existingNoun =
4992
+ (await this.storage.getNoun(options.id)) ?? undefined;
4993
+ }
4994
+ }
4995
+ if (existingNoun) {
4996
+ // Check if existing noun is a placeholder
4997
+ const existingMetadata = await this.storage.getMetadata(options.id);
4998
+ const isPlaceholder = existingMetadata &&
4999
+ typeof existingMetadata === 'object' &&
5000
+ existingMetadata.isPlaceholder;
5001
+ if (isPlaceholder) {
5002
+ // Replace placeholder with real data
5003
+ if (this.loggingConfig?.verbose) {
5004
+ console.log(`Replacing placeholder noun ${options.id} with real data`);
5005
+ }
5006
+ }
5007
+ else {
5008
+ // Real noun already exists, update it
5009
+ if (this.loggingConfig?.verbose) {
5010
+ console.log(`Updating existing noun ${options.id}`);
5011
+ }
5012
+ }
5013
+ }
5014
+ }
5015
+ catch (storageError) {
5016
+ // Item doesn't exist, continue with add operation
5017
+ }
5018
+ }
5019
+ let noun;
5020
+ // In write-only mode, skip index operations since index is not loaded
5021
+ if (this.writeOnly) {
5022
+ // Create noun object directly without adding to index
5023
+ noun = {
5024
+ id,
5025
+ vector,
5026
+ connections: new Map(),
5027
+ level: 0, // Default level for new nodes
5028
+ metadata: undefined // Will be set separately
5029
+ };
5030
+ }
5031
+ else {
5032
+ // Normal mode: Add to HNSW index first
5033
+ await this.hnswIndex.addItem({ id, vector, metadata: enrichedMetadata });
5034
+ // Get the noun from the HNSW index
5035
+ const indexNoun = this.hnswIndex.getNouns().get(id);
5036
+ if (!indexNoun) {
5037
+ throw new Error(`Failed to retrieve newly created noun with ID ${id}`);
5038
+ }
5039
+ noun = indexNoun;
5040
+ }
5041
+ // Save noun to storage using augmentation system
5042
+ await this.augmentations.execute('saveNoun', { noun, options }, async () => {
5043
+ await this.storage.saveNoun(noun);
5044
+ const service = this.getServiceName(options);
5045
+ await this.storage.incrementStatistic('noun', service);
5046
+ });
5047
+ // Save metadata if provided and not empty
5048
+ if (enrichedMetadata !== undefined) {
5049
+ // Skip saving if metadata is an empty object
5050
+ if (enrichedMetadata &&
5051
+ typeof enrichedMetadata === 'object' &&
5052
+ Object.keys(enrichedMetadata).length === 0) {
5053
+ // Don't save empty metadata
5054
+ // Explicitly save null to ensure no metadata is stored
5055
+ await this.storage.saveMetadata(id, null);
5056
+ }
5057
+ else {
5058
+ // Validate noun type if metadata is for a GraphNoun
5059
+ if (enrichedMetadata && typeof enrichedMetadata === 'object' && 'noun' in enrichedMetadata) {
5060
+ const nounType = enrichedMetadata.noun;
5061
+ // Check if the noun type is valid
5062
+ const isValidNounType = Object.values(NounType).includes(nounType);
5063
+ if (!isValidNounType) {
5064
+ console.warn(`Invalid noun type: ${nounType}. Falling back to GraphNoun.`);
5065
+ enrichedMetadata.noun = NounType.Concept;
5066
+ }
5067
+ // Ensure createdBy field is populated for GraphNoun
5068
+ const service = options.service || this.getCurrentAugmentation();
5069
+ const graphNoun = enrichedMetadata;
5070
+ // Only set createdBy if it doesn't exist or is being explicitly updated
5071
+ if (!graphNoun.createdBy || options.service) {
5072
+ graphNoun.createdBy = getAugmentationVersion(service);
5073
+ }
5074
+ // Update timestamps
5075
+ const now = new Date();
5076
+ const timestamp = {
5077
+ seconds: Math.floor(now.getTime() / 1000),
5078
+ nanoseconds: (now.getTime() % 1000) * 1000000
5079
+ };
5080
+ // Set createdAt if it doesn't exist
5081
+ if (!graphNoun.createdAt) {
5082
+ graphNoun.createdAt = timestamp;
5083
+ }
5084
+ // Always update updatedAt
5085
+ graphNoun.updatedAt = timestamp;
5086
+ }
5087
+ // Create properly namespaced metadata for new items
5088
+ let metadataToSave = createNamespacedMetadata(enrichedMetadata);
5089
+ // Add domain metadata if distributed mode is enabled
5090
+ if (this.domainDetector) {
5091
+ // First check if domain is already in metadata
5092
+ if (metadataToSave.domain) {
5093
+ // Domain already specified, keep it
5094
+ const domainInfo = this.domainDetector.detectDomain(metadataToSave);
5095
+ if (domainInfo.domainMetadata) {
5096
+ ;
5097
+ metadataToSave.domainMetadata =
5098
+ domainInfo.domainMetadata;
5099
+ }
5100
+ }
5101
+ else {
5102
+ // Try to detect domain from the data
5103
+ const dataToAnalyze = Array.isArray(vectorOrData)
5104
+ ? enrichedMetadata
5105
+ : vectorOrData;
5106
+ const domainInfo = this.domainDetector.detectDomain(dataToAnalyze);
5107
+ if (domainInfo.domain) {
5108
+ ;
5109
+ metadataToSave.domain = domainInfo.domain;
5110
+ if (domainInfo.domainMetadata) {
5111
+ ;
5112
+ metadataToSave.domainMetadata =
5113
+ domainInfo.domainMetadata;
5114
+ }
5115
+ }
5116
+ }
5117
+ }
5118
+ // Add partition information if distributed mode is enabled
5119
+ if (this.partitioner) {
5120
+ const partition = this.partitioner.getPartition(id);
5121
+ metadataToSave.partition = partition;
5122
+ }
5123
+ await this.storage.saveMetadata(id, metadataToSave);
5124
+ // Update metadata index (write-only mode should build indices!)
5125
+ if (this.index && !this.frozen) {
5126
+ await this.metadataIndex?.addToIndex?.(id, metadataToSave);
5127
+ }
5128
+ // Track metadata statistics
5129
+ const metadataService = this.getServiceName(options);
5130
+ await this.storage.incrementStatistic('metadata', metadataService);
5131
+ // Content type tracking removed - metrics system not initialized
5132
+ // Track update timestamp (handled by metrics augmentation)
5133
+ }
5134
+ }
5135
+ // Update HNSW index size with actual index size
5136
+ const indexSize = this.index.size();
5137
+ await this.storage.updateHnswIndexSize(indexSize);
5138
+ // Update health metrics if in distributed mode
5139
+ if (this.monitoring) {
5140
+ const vectorCount = await this.getNounCount();
5141
+ this.monitoring.updateVectorCount(vectorCount);
5142
+ }
5143
+ // If addToRemote is true and we're connected to a remote server, add to remote as well
5144
+ if (options.addToRemote && this.isConnectedToRemoteServer()) {
5145
+ try {
5146
+ await this.addToRemote(id, vector, enrichedMetadata);
5147
+ }
5148
+ catch (remoteError) {
5149
+ console.warn(`Failed to add to remote server: ${remoteError}. Continuing with local add.`);
5150
+ }
5151
+ }
5152
+ // Invalidate search cache since data has changed
5153
+ this.cache?.invalidateOnDataChange('add');
5154
+ // Determine processing mode
5155
+ const processingMode = options.process || 'auto';
5156
+ let shouldProcessNeurally = false;
5157
+ if (processingMode === 'neural') {
5158
+ shouldProcessNeurally = true;
5159
+ }
5160
+ else if (processingMode === 'auto') {
5161
+ // Auto-detect whether to use neural processing
5162
+ shouldProcessNeurally = this.shouldAutoProcessNeurally(vectorOrData, enrichedMetadata);
5163
+ }
5164
+ // 'literal' mode means no neural processing
5165
+ // 🧠 AI Processing (Neural Import) - Based on processing mode
5166
+ if (shouldProcessNeurally) {
5167
+ try {
5168
+ // Execute augmentation pipeline for data processing
5169
+ // Note: Augmentations will be called via this.augmentations.execute during the actual add operation
5170
+ // This replaces the legacy SENSE pipeline
5171
+ if (this.loggingConfig?.verbose) {
5172
+ console.log(`🧠 AI processing completed for data: ${id}`);
5173
+ }
5174
+ }
5175
+ catch (processingError) {
5176
+ // Don't fail the add operation if processing fails
5177
+ console.warn(`🧠 AI processing failed for ${id}:`, processingError);
5178
+ }
5179
+ }
5180
+ return id;
5181
+ }
5182
+ catch (error) {
5183
+ console.error('Failed to add vector:', error);
5184
+ // Track error in health monitor
5185
+ if (this.monitoring) {
5186
+ this.monitoring.recordRequest(0, true);
5187
+ }
5188
+ throw new Error(`Failed to add vector: ${error}`);
5189
+ }
5183
5190
  }
5184
5191
  /**
5185
5192
  * Add Verb - Unified relationship creation between nouns
@@ -5514,10 +5521,10 @@ export class BrainyData {
5514
5521
  }
5515
5522
  };
5516
5523
  // Store coordination plan in _system directory
5517
- await this.addNoun({
5524
+ await this.addNoun('Cortex coordination plan', NounType.Process, {
5518
5525
  id: '_system/coordination',
5519
5526
  type: 'cortex_coordination',
5520
- metadata: coordinationPlan
5527
+ ...coordinationPlan
5521
5528
  });
5522
5529
  prodLog.info('📋 Storage migration coordination plan created');
5523
5530
  prodLog.info('All services will automatically detect and execute the migration');