@soulcraft/brainy 2.10.1 → 2.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +10 -10
  2. package/dist/augmentations/apiServerAugmentation.js +2 -2
  3. package/dist/augmentations/display/fieldPatterns.d.ts +1 -1
  4. package/dist/augmentations/display/fieldPatterns.js +1 -1
  5. package/dist/augmentations/display/intelligentComputation.d.ts +2 -2
  6. package/dist/augmentations/display/intelligentComputation.js +4 -4
  7. package/dist/augmentations/display/types.d.ts +1 -1
  8. package/dist/augmentations/neuralImport.js +4 -4
  9. package/dist/augmentations/synapseAugmentation.js +3 -3
  10. package/dist/augmentations/typeMatching/brainyTypes.d.ts +83 -0
  11. package/dist/augmentations/typeMatching/brainyTypes.js +425 -0
  12. package/dist/augmentations/universalDisplayAugmentation.d.ts +1 -1
  13. package/dist/augmentations/universalDisplayAugmentation.js +1 -1
  14. package/dist/brainyData.d.ts +20 -41
  15. package/dist/brainyData.js +1467 -1430
  16. package/dist/chat/BrainyChat.js +11 -11
  17. package/dist/examples/basicUsage.js +4 -1
  18. package/dist/importManager.js +2 -2
  19. package/dist/index.d.ts +3 -1
  20. package/dist/index.js +5 -1
  21. package/dist/neural/embeddedPatterns.d.ts +1 -1
  22. package/dist/neural/embeddedPatterns.js +2 -2
  23. package/dist/neural/improvedNeuralAPI.d.ts +346 -0
  24. package/dist/neural/improvedNeuralAPI.js +2439 -0
  25. package/dist/neural/types.d.ts +267 -0
  26. package/dist/neural/types.js +24 -0
  27. package/dist/storage/adapters/fileSystemStorage.d.ts +2 -2
  28. package/dist/storage/adapters/fileSystemStorage.js +2 -2
  29. package/dist/storage/adapters/memoryStorage.d.ts +4 -4
  30. package/dist/storage/adapters/memoryStorage.js +4 -4
  31. package/dist/storage/adapters/opfsStorage.d.ts +2 -2
  32. package/dist/storage/adapters/opfsStorage.js +2 -2
  33. package/dist/storage/adapters/s3CompatibleStorage.d.ts +2 -2
  34. package/dist/storage/adapters/s3CompatibleStorage.js +2 -2
  35. package/dist/storage/baseStorage.d.ts +12 -2
  36. package/dist/storage/baseStorage.js +32 -0
  37. package/dist/types/brainyDataInterface.d.ts +2 -5
  38. package/dist/utils/brainyTypes.d.ts +217 -0
  39. package/dist/utils/brainyTypes.js +261 -0
  40. package/dist/utils/typeValidation.d.ts +25 -0
  41. package/dist/utils/typeValidation.js +127 -0
  42. package/package.json +1 -1
@@ -12,6 +12,7 @@ import { enforceNodeVersion } from './utils/nodeVersionCheck.js';
12
12
  import { createNamespacedMetadata, updateNamespacedMetadata, markDeleted, markRestored, isDeleted, getUserMetadata } from './utils/metadataNamespace.js';
13
13
  import { PeriodicCleanup } from './utils/periodicCleanup.js';
14
14
  import { NounType, VerbType } from './types/graphTypes.js';
15
+ import { validateNounType } from './utils/typeValidation.js';
15
16
  import { createServerSearchAugmentations } from './augmentations/serverSearchAugmentations.js';
16
17
  import { augmentationPipeline } from './augmentationPipeline.js';
17
18
  import { prodLog } from './utils/logger.js';
@@ -28,7 +29,7 @@ import { EntityRegistryAugmentation, AutoRegisterEntitiesAugmentation } from './
28
29
  import { createDefaultAugmentations } from './augmentations/defaultAugmentations.js';
29
30
  // import { RealtimeStreamingAugmentation } from './augmentations/realtimeStreamingAugmentation.js'
30
31
  import { IntelligentVerbScoringAugmentation } from './augmentations/intelligentVerbScoringAugmentation.js';
31
- import { NeuralAPI } from './neural/neuralAPI.js';
32
+ import { ImprovedNeuralAPI } from './neural/improvedNeuralAPI.js';
32
33
  import { TripleIntelligenceEngine } from './triple/TripleIntelligence.js';
33
34
  export class BrainyData {
34
35
  // REMOVED: HealthMonitor is now handled by MonitoringAugmentation
@@ -127,7 +128,7 @@ export class BrainyData {
127
128
  this.operationalMode = null;
128
129
  this.domainDetector = null;
129
130
  // Enforce Node.js version requirement for ONNX stability
130
- if (typeof process !== 'undefined' && process.version) {
131
+ if (typeof process !== 'undefined' && process.version && !process.env.BRAINY_SKIP_VERSION_CHECK) {
131
132
  enforceNodeVersion();
132
133
  }
133
134
  // Store raw config for processing in init()
@@ -1248,1317 +1249,1023 @@ export class BrainyData {
1248
1249
  throw new Error(`Failed to connect to remote server: ${error}`);
1249
1250
  }
1250
1251
  }
1252
+ // REMOVED: addItem() - Use addNoun() instead (cleaner 2.0 API)
1253
+ // REMOVED: addToBoth() - Remote server functionality moved to post-2.0.0
1251
1254
  /**
1252
- * Add data to the database with intelligent processing
1253
- *
1254
- * @param vectorOrData Vector or data to add
1255
- * @param metadata Optional metadata to associate with the data
1256
- * @param options Additional options for processing
1257
- * @returns The ID of the added data
1258
- *
1259
- * @example
1260
- * // Auto mode - intelligently decides processing
1261
- * await brainy.add("Customer feedback: Great product!")
1262
- *
1263
- * @example
1264
- * // Explicit literal mode for sensitive data
1265
- * await brainy.add("API_KEY=secret123", null, { process: 'literal' })
1266
- *
1267
- * @example
1268
- * // Force neural processing
1269
- * await brainy.add("John works at Acme Corp", null, { process: 'neural' })
1255
+ * Add a vector to the remote server
1256
+ * @param id ID of the vector to add
1257
+ * @param vector Vector to add
1258
+ * @param metadata Optional metadata to associate with the vector
1259
+ * @returns True if successful, false otherwise
1260
+ * @private
1261
+ */
1262
+ async addToRemote(id, vector, metadata) {
1263
+ if (!this.isConnectedToRemoteServer()) {
1264
+ return false;
1265
+ }
1266
+ try {
1267
+ // TODO: Remote server operations (post-2.0.0 feature)
1268
+ // if (!this.serverSearchConduit || !this.serverConnection) {
1269
+ // throw new Error(
1270
+ // 'Server search conduit or connection is not initialized'
1271
+ // )
1272
+ // }
1273
+ // TODO: Add to remote server
1274
+ // const addResult = await this.serverSearchConduit.addToBoth(
1275
+ // this.serverConnection.connectionId,
1276
+ // vector,
1277
+ // metadata
1278
+ // )
1279
+ throw new Error('Remote server functionality not yet implemented in Brainy 2.0.0');
1280
+ // TODO: Handle remote add result (post-2.0.0 feature)
1281
+ // if (!addResult.success) {
1282
+ // throw new Error(`Remote add failed: ${addResult.error}`)
1283
+ // }
1284
+ return true;
1285
+ }
1286
+ catch (error) {
1287
+ console.error('Failed to add to remote server:', error);
1288
+ throw new Error(`Failed to add to remote server: ${error}`);
1289
+ }
1290
+ }
1291
+ /**
1292
+ * Add multiple vectors or data items to the database
1293
+ * @param items Array of items to add
1294
+ * @param options Additional options
1295
+ * @returns Array of IDs for the added items
1296
+ */
1297
+ /**
1298
+ * Add multiple nouns in batch with required types
1299
+ * @param items Array of nouns to add (all must have types)
1300
+ * @param options Batch processing options
1301
+ * @returns Array of generated IDs
1270
1302
  */
1271
- async add(vectorOrData, metadata, options = {}) {
1303
+ async addNouns(items, options = {}) {
1272
1304
  await this.ensureInitialized();
1273
1305
  // Check if database is in read-only mode
1274
1306
  this.checkReadOnly();
1275
- // Validate input is not null or undefined
1276
- if (vectorOrData === null || vectorOrData === undefined) {
1277
- throw new Error('Input cannot be null or undefined');
1307
+ // Validate all types upfront for better error handling
1308
+ const invalidItems = [];
1309
+ items.forEach((item, index) => {
1310
+ if (!item.nounType || typeof item.nounType !== 'string') {
1311
+ invalidItems.push(index);
1312
+ }
1313
+ else {
1314
+ // Validate the type is valid
1315
+ try {
1316
+ validateNounType(item.nounType);
1317
+ }
1318
+ catch (error) {
1319
+ invalidItems.push(index);
1320
+ }
1321
+ }
1322
+ });
1323
+ if (invalidItems.length > 0) {
1324
+ throw new Error(`Type validation failed for ${invalidItems.length} items at indices: ${invalidItems.slice(0, 5).join(', ')}${invalidItems.length > 5 ? '...' : ''}\n` +
1325
+ 'All items must have valid noun types.\n' +
1326
+ 'Example: { vectorOrData: "data", nounType: NounType.Content, metadata: {...} }');
1278
1327
  }
1328
+ // Default concurrency to 4 if not specified
1329
+ const concurrency = options.concurrency || 4;
1330
+ // Default batch size to 50 if not specified
1331
+ const batchSize = options.batchSize || 50;
1279
1332
  try {
1280
- let vector;
1281
- // First validate if input is an array but contains non-numeric values
1282
- if (Array.isArray(vectorOrData)) {
1283
- for (let i = 0; i < vectorOrData.length; i++) {
1284
- if (typeof vectorOrData[i] !== 'number') {
1285
- throw new Error('Vector contains non-numeric values');
1333
+ // Process items in batches to control concurrency and memory usage
1334
+ const ids = [];
1335
+ const itemsToProcess = [...items]; // Create a copy to avoid modifying the original array
1336
+ while (itemsToProcess.length > 0) {
1337
+ // Take up to 'batchSize' items to process in a batch
1338
+ const batch = itemsToProcess.splice(0, batchSize);
1339
+ // Separate items that are already vectors from those that need embedding
1340
+ const vectorItems = [];
1341
+ const textItems = [];
1342
+ // Categorize items
1343
+ batch.forEach((item, index) => {
1344
+ if (Array.isArray(item.vectorOrData) &&
1345
+ item.vectorOrData.every((val) => typeof val === 'number') &&
1346
+ !options.forceEmbed) {
1347
+ // Item is already a vector
1348
+ vectorItems.push({
1349
+ vectorOrData: item.vectorOrData,
1350
+ nounType: item.nounType,
1351
+ metadata: item.metadata,
1352
+ index
1353
+ });
1354
+ }
1355
+ else if (typeof item.vectorOrData === 'string') {
1356
+ // Item is text that needs embedding
1357
+ textItems.push({
1358
+ text: item.vectorOrData,
1359
+ nounType: item.nounType,
1360
+ metadata: item.metadata,
1361
+ index
1362
+ });
1363
+ }
1364
+ else {
1365
+ // For now, treat other types as text
1366
+ // In a more complete implementation, we might handle other types differently
1367
+ const textRepresentation = String(item.vectorOrData);
1368
+ textItems.push({
1369
+ text: textRepresentation,
1370
+ nounType: item.nounType,
1371
+ metadata: item.metadata,
1372
+ index
1373
+ });
1286
1374
  }
1375
+ });
1376
+ // Process vector items (already embedded)
1377
+ const vectorPromises = vectorItems.map((item) => this.addNoun(item.vectorOrData, item.nounType, item.metadata));
1378
+ // Process text items in a single batch embedding operation
1379
+ let textPromises = [];
1380
+ if (textItems.length > 0) {
1381
+ // Extract just the text for batch embedding
1382
+ const texts = textItems.map((item) => item.text);
1383
+ // Perform batch embedding
1384
+ const embeddings = await batchEmbed(texts);
1385
+ // Add each item with its embedding
1386
+ textPromises = textItems.map((item, i) => this.addNoun(embeddings[i], item.nounType, item.metadata));
1287
1387
  }
1388
+ // Combine all promises
1389
+ const batchResults = await Promise.all([
1390
+ ...vectorPromises,
1391
+ ...textPromises
1392
+ ]);
1393
+ // Add the results to our ids array
1394
+ ids.push(...batchResults);
1288
1395
  }
1396
+ return ids;
1397
+ }
1398
+ catch (error) {
1399
+ console.error('Failed to add batch of items:', error);
1400
+ throw new Error(`Failed to add batch of items: ${error}`);
1401
+ }
1402
+ }
1403
+ /**
1404
+ * Add multiple vectors or data items to both local and remote databases
1405
+ * @param items Array of items to add (with required types)
1406
+ * @param options Additional options
1407
+ * @returns Array of IDs for the added items
1408
+ */
1409
+ async addBatchToBoth(items, options = {}) {
1410
+ // Check if connected to a remote server
1411
+ if (!this.isConnectedToRemoteServer()) {
1412
+ throw new Error('Not connected to a remote server. Call connectToRemoteServer() first.');
1413
+ }
1414
+ // Add to local with addToRemote option
1415
+ return this.addNouns(items, { ...options, addToRemote: true });
1416
+ }
1417
+ /**
1418
+ * Filter search results by service
1419
+ * @param results Search results to filter
1420
+ * @param service Service to filter by
1421
+ * @returns Filtered search results
1422
+ * @private
1423
+ */
1424
+ filterResultsByService(results, service) {
1425
+ if (!service)
1426
+ return results;
1427
+ return results.filter((result) => {
1428
+ if (!result.metadata || typeof result.metadata !== 'object')
1429
+ return false;
1430
+ if (!('createdBy' in result.metadata))
1431
+ return false;
1432
+ const createdBy = result.metadata.createdBy;
1433
+ if (!createdBy)
1434
+ return false;
1435
+ return createdBy.augmentation === service;
1436
+ });
1437
+ }
1438
+ /**
1439
+ * Search for similar vectors within specific noun types
1440
+ * @param queryVectorOrData Query vector or data to search for
1441
+ * @param k Number of results to return
1442
+ * @param nounTypes Array of noun types to search within, or null to search all
1443
+ * @param options Additional options
1444
+ * @returns Array of search results
1445
+ */
1446
+ /**
1447
+ * @deprecated Use search() with nounTypes option instead
1448
+ * @example
1449
+ * // Old way (deprecated)
1450
+ * await brain.searchByNounTypes(query, 10, ['type1', 'type2'])
1451
+ * // New way
1452
+ * await brain.search(query, { limit: 10, nounTypes: ['type1', 'type2'] })
1453
+ */
1454
+ async searchByNounTypes(queryVectorOrData, k = 10, nounTypes = null, options = {}) {
1455
+ // Helper function to filter results by service
1456
+ const filterByService = (metadata) => {
1457
+ if (!options.service)
1458
+ return true; // No filter, include all
1459
+ // Check if metadata has createdBy field with matching service
1460
+ if (!metadata || typeof metadata !== 'object')
1461
+ return false;
1462
+ if (!('createdBy' in metadata))
1463
+ return false;
1464
+ const createdBy = metadata.createdBy;
1465
+ if (!createdBy)
1466
+ return false;
1467
+ return createdBy.augmentation === options.service;
1468
+ };
1469
+ if (!this.isInitialized) {
1470
+ throw new Error('BrainyData must be initialized before searching. Call init() first.');
1471
+ }
1472
+ // Check if database is in write-only mode
1473
+ this.checkWriteOnly();
1474
+ try {
1475
+ let queryVector;
1289
1476
  // Check if input is already a vector
1290
- if (Array.isArray(vectorOrData) && !options.forceEmbed) {
1291
- // Input is already a vector (and we've validated it contains only numbers)
1292
- vector = vectorOrData;
1477
+ if (Array.isArray(queryVectorOrData) &&
1478
+ queryVectorOrData.every((item) => typeof item === 'number') &&
1479
+ !options.forceEmbed) {
1480
+ // Input is already a vector
1481
+ queryVector = queryVectorOrData;
1293
1482
  }
1294
1483
  else {
1295
1484
  // Input needs to be vectorized
1296
1485
  try {
1297
- // Check if input is a JSON object and process it specially
1298
- if (typeof vectorOrData === 'object' &&
1299
- vectorOrData !== null &&
1300
- !Array.isArray(vectorOrData)) {
1301
- // Process JSON object for better vectorization
1302
- const preparedText = prepareJsonForVectorization(vectorOrData, {
1303
- // Prioritize common name/title fields if they exist
1304
- priorityFields: [
1305
- 'name',
1306
- 'title',
1307
- 'company',
1308
- 'organization',
1309
- 'description',
1310
- 'summary'
1311
- ]
1312
- });
1313
- vector = await this.embeddingFunction(preparedText);
1314
- // IMPORTANT: When an object is passed as data and no metadata is provided,
1315
- // use the object AS the metadata too. This is expected behavior for the API.
1316
- // Users can pass either:
1317
- // 1. addNoun(string, metadata) - vectorize string, store metadata
1318
- // 2. addNoun(object) - vectorize object text, store object as metadata
1319
- // 3. addNoun(object, metadata) - vectorize object text, store provided metadata
1320
- if (!metadata) {
1321
- metadata = vectorOrData;
1322
- }
1323
- // Track field names for this JSON document
1324
- const service = this.getServiceName(options);
1325
- if (this.storage) {
1326
- await this.storage.trackFieldNames(vectorOrData, service);
1327
- }
1328
- }
1329
- else {
1330
- // Use standard embedding for non-JSON data
1331
- vector = await this.embeddingFunction(vectorOrData);
1332
- }
1486
+ queryVector = await this.embeddingFunction(queryVectorOrData);
1333
1487
  }
1334
1488
  catch (embedError) {
1335
- throw new Error(`Failed to vectorize data: ${embedError}`);
1489
+ throw new Error(`Failed to vectorize query data: ${embedError}`);
1336
1490
  }
1337
1491
  }
1338
- // Check if vector is defined
1339
- if (!vector) {
1340
- throw new Error('Vector is undefined or null');
1492
+ // Check if query vector is defined
1493
+ if (!queryVector) {
1494
+ throw new Error('Query vector is undefined or null');
1341
1495
  }
1342
- // Validate vector dimensions
1343
- if (vector.length !== this._dimensions) {
1344
- throw new Error(`Vector dimension mismatch: expected ${this._dimensions}, got ${vector.length}`);
1496
+ // Check if query vector dimensions match the expected dimensions
1497
+ if (queryVector.length !== this._dimensions) {
1498
+ throw new Error(`Query vector dimension mismatch: expected ${this._dimensions}, got ${queryVector.length}`);
1345
1499
  }
1346
- // Use ID from options if it exists, otherwise from metadata, otherwise generate a new UUID
1347
- const id = options.id ||
1348
- (metadata && typeof metadata === 'object' && 'id' in metadata
1349
- ? metadata.id
1350
- : uuidv4());
1351
- // Check for existing noun (both write-only and normal modes)
1352
- let existingNoun;
1353
- if (options.id) {
1354
- try {
1355
- if (this.writeOnly) {
1356
- // In write-only mode, check storage directly
1357
- existingNoun =
1358
- (await this.storage.getNoun(options.id)) ?? undefined;
1500
+ // If no noun types specified, search all nouns
1501
+ if (!nounTypes || nounTypes.length === 0) {
1502
+ // Check if we're in readonly mode with lazy loading and the index is empty
1503
+ const indexSize = this.index.getNouns().size;
1504
+ if (this.readOnly && this.lazyLoadInReadOnlyMode && indexSize === 0) {
1505
+ if (this.loggingConfig?.verbose) {
1506
+ console.log('Lazy loading mode: Index is empty, loading nodes for search...');
1359
1507
  }
1360
- else {
1361
- // In normal mode, check index first, then storage
1362
- existingNoun = this.index.getNouns().get(options.id);
1363
- if (!existingNoun) {
1364
- existingNoun =
1365
- (await this.storage.getNoun(options.id)) ?? undefined;
1508
+ // In lazy loading mode, we need to load some nodes to search
1509
+ // Instead of loading all nodes, we'll load a subset of nodes
1510
+ // Load a limited number of nodes from storage using pagination
1511
+ const result = await this.storage.getNouns({
1512
+ pagination: { offset: 0, limit: k * 10 } // Get 10x more nodes than needed
1513
+ });
1514
+ const limitedNouns = result.items;
1515
+ // Add these nodes to the index
1516
+ for (const node of limitedNouns) {
1517
+ // Check if the vector dimensions match the expected dimensions
1518
+ if (node.vector.length !== this._dimensions) {
1519
+ console.warn(`Skipping node ${node.id} due to dimension mismatch: expected ${this._dimensions}, got ${node.vector.length}`);
1520
+ continue;
1366
1521
  }
1522
+ // Add to index
1523
+ await this.index.addItem({
1524
+ id: node.id,
1525
+ vector: node.vector
1526
+ });
1367
1527
  }
1368
- if (existingNoun) {
1369
- // Check if existing noun is a placeholder
1370
- const existingMetadata = await this.storage.getMetadata(options.id);
1371
- const isPlaceholder = existingMetadata &&
1372
- typeof existingMetadata === 'object' &&
1373
- existingMetadata.isPlaceholder;
1374
- if (isPlaceholder) {
1375
- // Replace placeholder with real data
1376
- if (this.loggingConfig?.verbose) {
1377
- console.log(`Replacing placeholder noun ${options.id} with real data`);
1378
- }
1528
+ if (this.loggingConfig?.verbose) {
1529
+ console.log(`Lazy loading mode: Added ${limitedNouns.length} nodes to index for search`);
1530
+ }
1531
+ }
1532
+ // Create filter function for HNSW search with metadata index optimization
1533
+ const hasMetadataFilter = options.metadata && Object.keys(options.metadata).length > 0;
1534
+ const hasServiceFilter = !!options.service;
1535
+ let filterFunction;
1536
+ let preFilteredIds;
1537
+ // Use metadata index for pre-filtering if available
1538
+ if (hasMetadataFilter && this.metadataIndex) {
1539
+ try {
1540
+ // Ensure metadata index is up to date
1541
+ await this.metadataIndex?.flush?.();
1542
+ // Get candidate IDs from metadata index
1543
+ const candidateIds = await this.metadataIndex?.getIdsForFilter?.(options.metadata) || [];
1544
+ if (candidateIds.length > 0) {
1545
+ preFilteredIds = new Set(candidateIds);
1546
+ // Create a simple filter function that just checks the pre-filtered set
1547
+ filterFunction = async (id) => {
1548
+ if (!preFilteredIds.has(id))
1549
+ return false;
1550
+ // Still apply service filter if needed
1551
+ if (hasServiceFilter) {
1552
+ const metadata = await this.storage.getMetadata(id);
1553
+ const noun = this.index.getNouns().get(id);
1554
+ if (!noun || !metadata)
1555
+ return false;
1556
+ const result = { id, score: 0, vector: noun.vector, metadata };
1557
+ return this.filterResultsByService([result], options.service).length > 0;
1558
+ }
1559
+ return true;
1560
+ };
1379
1561
  }
1380
1562
  else {
1381
- // Real noun already exists, update it
1382
- if (this.loggingConfig?.verbose) {
1383
- console.log(`Updating existing noun ${options.id}`);
1384
- }
1563
+ // No items match the metadata criteria, return empty results immediately
1564
+ return [];
1385
1565
  }
1386
1566
  }
1567
+ catch (indexError) {
1568
+ console.warn('Metadata index error, falling back to full filtering:', indexError);
1569
+ // Fall back to full metadata filtering below
1570
+ }
1387
1571
  }
1388
- catch (storageError) {
1389
- // Item doesn't exist, continue with add operation
1390
- }
1391
- }
1392
- let noun;
1393
- // In write-only mode, skip index operations since index is not loaded
1394
- if (this.writeOnly) {
1395
- // Create noun object directly without adding to index
1396
- noun = {
1397
- id,
1398
- vector,
1399
- connections: new Map(),
1400
- level: 0, // Default level for new nodes
1401
- metadata: undefined // Will be set separately
1402
- };
1403
- }
1404
- else {
1405
- // Normal mode: Add to HNSW index first
1406
- await this.hnswIndex.addItem({ id, vector, metadata });
1407
- // Get the noun from the HNSW index
1408
- const indexNoun = this.hnswIndex.getNouns().get(id);
1409
- if (!indexNoun) {
1410
- throw new Error(`Failed to retrieve newly created noun with ID ${id}`);
1411
- }
1412
- noun = indexNoun;
1413
- }
1414
- // Save noun to storage using augmentation system
1415
- await this.augmentations.execute('saveNoun', { noun, options }, async () => {
1416
- await this.storage.saveNoun(noun);
1417
- const service = this.getServiceName(options);
1418
- await this.storage.incrementStatistic('noun', service);
1419
- });
1420
- // Save metadata if provided and not empty
1421
- if (metadata !== undefined) {
1422
- // Skip saving if metadata is an empty object
1423
- if (metadata &&
1424
- typeof metadata === 'object' &&
1425
- Object.keys(metadata).length === 0) {
1426
- // Don't save empty metadata
1427
- // Explicitly save null to ensure no metadata is stored
1428
- await this.storage.saveMetadata(id, null);
1429
- }
1430
- else {
1431
- // Validate noun type if metadata is for a GraphNoun
1432
- if (metadata && typeof metadata === 'object' && 'noun' in metadata) {
1433
- const nounType = metadata.noun;
1434
- // Check if the noun type is valid
1435
- const isValidNounType = Object.values(NounType).includes(nounType);
1436
- if (!isValidNounType) {
1437
- console.warn(`Invalid noun type: ${nounType}. Falling back to GraphNoun.`);
1438
- metadata.noun = NounType.Concept;
1439
- }
1440
- // Ensure createdBy field is populated for GraphNoun
1441
- const service = options.service || this.getCurrentAugmentation();
1442
- const graphNoun = metadata;
1443
- // Only set createdBy if it doesn't exist or is being explicitly updated
1444
- if (!graphNoun.createdBy || options.service) {
1445
- graphNoun.createdBy = getAugmentationVersion(service);
1446
- }
1447
- // Update timestamps
1448
- const now = new Date();
1449
- const timestamp = {
1450
- seconds: Math.floor(now.getTime() / 1000),
1451
- nanoseconds: (now.getTime() % 1000) * 1000000
1452
- };
1453
- // Set createdAt if it doesn't exist
1454
- if (!graphNoun.createdAt) {
1455
- graphNoun.createdAt = timestamp;
1572
+ // Fallback to full metadata filtering if index wasn't used
1573
+ if (!filterFunction && (hasMetadataFilter || hasServiceFilter)) {
1574
+ filterFunction = async (id) => {
1575
+ // Get metadata for filtering
1576
+ let metadata = await this.storage.getMetadata(id);
1577
+ if (metadata === null) {
1578
+ metadata = {};
1456
1579
  }
1457
- // Always update updatedAt
1458
- graphNoun.updatedAt = timestamp;
1459
- }
1460
- // Create properly namespaced metadata for new items
1461
- let metadataToSave = createNamespacedMetadata(metadata);
1462
- // Add domain metadata if distributed mode is enabled
1463
- if (this.domainDetector) {
1464
- // First check if domain is already in metadata
1465
- if (metadataToSave.domain) {
1466
- // Domain already specified, keep it
1467
- const domainInfo = this.domainDetector.detectDomain(metadataToSave);
1468
- if (domainInfo.domainMetadata) {
1469
- ;
1470
- metadataToSave.domainMetadata =
1471
- domainInfo.domainMetadata;
1580
+ // Apply metadata filter
1581
+ if (hasMetadataFilter) {
1582
+ const matches = matchesMetadataFilter(metadata, options.metadata);
1583
+ if (!matches) {
1584
+ return false;
1472
1585
  }
1473
1586
  }
1474
- else {
1475
- // Try to detect domain from the data
1476
- const dataToAnalyze = Array.isArray(vectorOrData)
1477
- ? metadata
1478
- : vectorOrData;
1479
- const domainInfo = this.domainDetector.detectDomain(dataToAnalyze);
1480
- if (domainInfo.domain) {
1481
- ;
1482
- metadataToSave.domain = domainInfo.domain;
1483
- if (domainInfo.domainMetadata) {
1484
- ;
1485
- metadataToSave.domainMetadata =
1486
- domainInfo.domainMetadata;
1487
- }
1587
+ // Apply service filter
1588
+ if (hasServiceFilter) {
1589
+ const noun = this.index.getNouns().get(id);
1590
+ if (!noun)
1591
+ return false;
1592
+ const result = { id, score: 0, vector: noun.vector, metadata };
1593
+ if (!this.filterResultsByService([result], options.service).length) {
1594
+ return false;
1488
1595
  }
1489
1596
  }
1597
+ return true;
1598
+ };
1599
+ }
1600
+ // When using offset, we need to fetch more results and then slice
1601
+ const offset = options.offset || 0;
1602
+ const totalNeeded = k + offset;
1603
+ // Search in the index with filter
1604
+ const results = await this.index.search(queryVector, totalNeeded, filterFunction);
1605
+ // Skip the offset number of results
1606
+ const paginatedResults = results.slice(offset, offset + k);
1607
+ // Get metadata for each result
1608
+ const searchResults = [];
1609
+ for (const [id, score] of paginatedResults) {
1610
+ const noun = this.index.getNouns().get(id);
1611
+ if (!noun) {
1612
+ continue;
1490
1613
  }
1491
- // Add partition information if distributed mode is enabled
1492
- if (this.partitioner) {
1493
- const partition = this.partitioner.getPartition(id);
1494
- metadataToSave.partition = partition;
1495
- }
1496
- await this.storage.saveMetadata(id, metadataToSave);
1497
- // Update metadata index (write-only mode should build indices!)
1498
- if (this.index && !this.frozen) {
1499
- await this.metadataIndex?.addToIndex?.(id, metadataToSave);
1500
- }
1501
- // Track metadata statistics
1502
- const metadataService = this.getServiceName(options);
1503
- await this.storage.incrementStatistic('metadata', metadataService);
1504
- // Track content type if it's a GraphNoun
1505
- if (metadataToSave &&
1506
- typeof metadataToSave === 'object' &&
1507
- 'noun' in metadataToSave) {
1508
- this.metrics.trackContentType(metadataToSave.noun);
1614
+ let metadata = await this.storage.getMetadata(id);
1615
+ // Initialize metadata to an empty object if it's null
1616
+ if (metadata === null) {
1617
+ metadata = {};
1509
1618
  }
1510
- // Track update timestamp (handled by metrics augmentation)
1619
+ // Preserve original metadata without overwriting user's custom fields
1620
+ // The search result already has Brainy's UUID in the main 'id' field
1621
+ searchResults.push({
1622
+ id,
1623
+ score: 1 - score, // Convert distance to similarity (higher = more similar)
1624
+ vector: noun.vector,
1625
+ metadata: metadata
1626
+ });
1511
1627
  }
1628
+ return searchResults;
1512
1629
  }
1513
- // Update HNSW index size with actual index size
1514
- const indexSize = this.index.size();
1515
- await this.storage.updateHnswIndexSize(indexSize);
1516
- // Update health metrics if in distributed mode
1517
- if (this.monitoring) {
1518
- const vectorCount = await this.getNounCount();
1519
- this.monitoring.updateVectorCount(vectorCount);
1520
- }
1521
- // If addToRemote is true and we're connected to a remote server, add to remote as well
1522
- if (options.addToRemote && this.isConnectedToRemoteServer()) {
1523
- try {
1524
- await this.addToRemote(id, vector, metadata);
1630
+ else {
1631
+ // Get nouns for each noun type in parallel
1632
+ const nounPromises = nounTypes.map((nounType) => this.storage.getNounsByNounType(nounType));
1633
+ const nounArrays = await Promise.all(nounPromises);
1634
+ // Combine all nouns
1635
+ const nouns = [];
1636
+ for (const nounArray of nounArrays) {
1637
+ nouns.push(...nounArray);
1525
1638
  }
1526
- catch (remoteError) {
1527
- console.warn(`Failed to add to remote server: ${remoteError}. Continuing with local add.`);
1639
+ // Calculate distances for each noun
1640
+ const results = [];
1641
+ for (const noun of nouns) {
1642
+ const distance = this.index.getDistanceFunction()(queryVector, noun.vector);
1643
+ results.push([noun.id, distance]);
1528
1644
  }
1529
- }
1530
- // Invalidate search cache since data has changed
1531
- this.cache?.invalidateOnDataChange('add');
1532
- // Determine processing mode
1533
- const processingMode = options.process || 'auto';
1534
- let shouldProcessNeurally = false;
1535
- if (processingMode === 'neural') {
1536
- shouldProcessNeurally = true;
1537
- }
1538
- else if (processingMode === 'auto') {
1539
- // Auto-detect whether to use neural processing
1540
- shouldProcessNeurally = this.shouldAutoProcessNeurally(vectorOrData, metadata);
1541
- }
1542
- // 'literal' mode means no neural processing
1543
- // 🧠 AI Processing (Neural Import) - Based on processing mode
1544
- if (shouldProcessNeurally) {
1545
- try {
1546
- // Execute augmentation pipeline for data processing
1547
- // Note: Augmentations will be called via this.augmentations.execute during the actual add operation
1548
- // This replaces the legacy SENSE pipeline
1549
- if (this.loggingConfig?.verbose) {
1550
- console.log(`🧠 AI processing completed for data: ${id}`);
1645
+ // Sort by distance (ascending)
1646
+ results.sort((a, b) => a[1] - b[1]);
1647
+ // Apply offset and take k results
1648
+ const offset = options.offset || 0;
1649
+ const topResults = results.slice(offset, offset + k);
1650
+ // Get metadata for each result
1651
+ const searchResults = [];
1652
+ for (const [id, score] of topResults) {
1653
+ const noun = nouns.find((n) => n.id === id);
1654
+ if (!noun) {
1655
+ continue;
1551
1656
  }
1657
+ let metadata = await this.storage.getMetadata(id);
1658
+ // Initialize metadata to an empty object if it's null
1659
+ if (metadata === null) {
1660
+ metadata = {};
1661
+ }
1662
+ // Preserve original metadata without overwriting user's custom fields
1663
+ // The search result already has Brainy's UUID in the main 'id' field
1664
+ searchResults.push({
1665
+ id,
1666
+ score: 1 - score, // Convert distance to similarity (higher = more similar)
1667
+ vector: noun.vector,
1668
+ metadata: metadata
1669
+ });
1552
1670
  }
1553
- catch (processingError) {
1554
- // Don't fail the add operation if processing fails
1555
- console.warn(`🧠 AI processing failed for ${id}:`, processingError);
1556
- }
1671
+ // Results are already filtered, just return them
1672
+ return searchResults;
1557
1673
  }
1558
- return id;
1559
1674
  }
1560
1675
  catch (error) {
1561
- console.error('Failed to add vector:', error);
1562
- // Track error in health monitor
1563
- if (this.monitoring) {
1564
- this.monitoring.recordRequest(0, true);
1565
- }
1566
- throw new Error(`Failed to add vector: ${error}`);
1676
+ console.error('Failed to search vectors by noun types:', error);
1677
+ throw new Error(`Failed to search vectors by noun types: ${error}`);
1567
1678
  }
1568
1679
  }
1569
- // REMOVED: addItem() - Use addNoun() instead (cleaner 2.0 API)
1570
- // REMOVED: addToBoth() - Remote server functionality moved to post-2.0.0
1571
1680
  /**
1572
- * Add a vector to the remote server
1573
- * @param id ID of the vector to add
1574
- * @param vector Vector to add
1575
- * @param metadata Optional metadata to associate with the vector
1576
- * @returns True if successful, false otherwise
1577
- * @private
1681
+ * Search for similar vectors
1682
+ * @param queryVectorOrData Query vector or data to search for
1683
+ * @param k Number of results to return
1684
+ * @param options Additional options
1685
+ * @returns Array of search results
1578
1686
  */
1579
- async addToRemote(id, vector, metadata) {
1580
- if (!this.isConnectedToRemoteServer()) {
1581
- return false;
1687
+ /**
1688
+ * 🔍 SIMPLE VECTOR SEARCH - Clean wrapper around find() for pure vector search
1689
+ *
1690
+ * @param queryVectorOrData Vector or text to search for
1691
+ * @param k Number of results to return
1692
+ * @param options Simple search options (metadata filters only)
1693
+ * @returns Vector search results
1694
+ */
1695
+ /**
1696
+ * 🔍 Simple Vector Similarity Search - Clean wrapper around find()
1697
+ *
1698
+ * search(query) = find({like: query}) - Pure vector similarity search
1699
+ *
1700
+ * @param queryVectorOrData - Query string, vector, or object to search with
1701
+ * @param options - Search options for filtering and pagination
1702
+ * @returns Array of search results with scores and metadata
1703
+ *
1704
+ * @example
1705
+ * // Simple vector search
1706
+ * await brain.search('machine learning')
1707
+ *
1708
+ * // With filters and pagination
1709
+ * await brain.search('AI', {
1710
+ * limit: 20,
1711
+ * metadata: { type: 'article' },
1712
+ * nounTypes: ['document']
1713
+ * })
1714
+ */
1715
+ async search(queryVectorOrData, options = {}) {
1716
+ // Build metadata filter from options
1717
+ const metadataFilter = { ...options.metadata };
1718
+ // Add noun type filtering
1719
+ if (options.nounTypes && options.nounTypes.length > 0) {
1720
+ metadataFilter.nounType = { in: options.nounTypes };
1582
1721
  }
1583
- try {
1584
- // TODO: Remote server operations (post-2.0.0 feature)
1585
- // if (!this.serverSearchConduit || !this.serverConnection) {
1586
- // throw new Error(
1587
- // 'Server search conduit or connection is not initialized'
1588
- // )
1589
- // }
1590
- // TODO: Add to remote server
1591
- // const addResult = await this.serverSearchConduit.addToBoth(
1592
- // this.serverConnection.connectionId,
1593
- // vector,
1594
- // metadata
1595
- // )
1596
- throw new Error('Remote server functionality not yet implemented in Brainy 2.0.0');
1597
- // TODO: Handle remote add result (post-2.0.0 feature)
1598
- // if (!addResult.success) {
1599
- // throw new Error(`Remote add failed: ${addResult.error}`)
1600
- // }
1601
- return true;
1722
+ // Add item ID filtering
1723
+ if (options.itemIds && options.itemIds.length > 0) {
1724
+ metadataFilter.id = { in: options.itemIds };
1602
1725
  }
1603
- catch (error) {
1604
- console.error('Failed to add to remote server:', error);
1605
- throw new Error(`Failed to add to remote server: ${error}`);
1726
+ // Build simple TripleQuery for vector similarity
1727
+ const tripleQuery = {
1728
+ like: queryVectorOrData
1729
+ };
1730
+ // Add metadata filter if we have conditions
1731
+ if (Object.keys(metadataFilter).length > 0) {
1732
+ tripleQuery.where = metadataFilter;
1733
+ }
1734
+ // Extract find() options
1735
+ const findOptions = {
1736
+ limit: options.limit,
1737
+ offset: options.offset,
1738
+ cursor: options.cursor,
1739
+ excludeDeleted: options.excludeDeleted,
1740
+ timeout: options.timeout
1741
+ };
1742
+ // Call find() with structured query - this is the key simplification!
1743
+ let results = await this.find(tripleQuery, findOptions);
1744
+ // Apply threshold filtering if specified
1745
+ if (options.threshold !== undefined) {
1746
+ results = results.filter(r => (r.fusionScore || r.score || 0) >= options.threshold);
1606
1747
  }
1748
+ // Convert to SearchResult format
1749
+ return results.map(r => ({
1750
+ ...r,
1751
+ score: r.fusionScore || r.score || 0
1752
+ }));
1753
+ return results;
1607
1754
  }
1608
1755
  /**
1609
- * Add multiple vectors or data items to the database
1610
- * @param items Array of items to add
1611
- * @param options Additional options
1612
- * @returns Array of IDs for the added items
1756
+ * Helper method to encode cursor for pagination
1757
+ * @internal
1613
1758
  */
1759
+ encodeCursor(data) {
1760
+ return Buffer.from(JSON.stringify(data)).toString('base64');
1761
+ }
1614
1762
  /**
1615
- * Add multiple nouns in batch
1616
- * @param items Array of nouns to add
1617
- * @param options Batch processing options
1618
- * @returns Array of generated IDs
1763
+ * Helper method to decode cursor for pagination
1764
+ * @internal
1619
1765
  */
1620
- async addNouns(items, options = {}) {
1621
- await this.ensureInitialized();
1622
- // Check if database is in read-only mode
1623
- this.checkReadOnly();
1624
- // Default concurrency to 4 if not specified
1625
- const concurrency = options.concurrency || 4;
1626
- // Default batch size to 50 if not specified
1627
- const batchSize = options.batchSize || 50;
1766
+ decodeCursor(cursor) {
1628
1767
  try {
1629
- // Process items in batches to control concurrency and memory usage
1630
- const ids = [];
1631
- const itemsToProcess = [...items]; // Create a copy to avoid modifying the original array
1632
- while (itemsToProcess.length > 0) {
1633
- // Take up to 'batchSize' items to process in a batch
1634
- const batch = itemsToProcess.splice(0, batchSize);
1635
- // Separate items that are already vectors from those that need embedding
1636
- const vectorItems = [];
1637
- const textItems = [];
1638
- // Categorize items
1639
- batch.forEach((item, index) => {
1640
- if (Array.isArray(item.vectorOrData) &&
1641
- item.vectorOrData.every((val) => typeof val === 'number') &&
1642
- !options.forceEmbed) {
1643
- // Item is already a vector
1644
- vectorItems.push({
1645
- vectorOrData: item.vectorOrData,
1646
- metadata: item.metadata,
1647
- index
1648
- });
1649
- }
1650
- else if (typeof item.vectorOrData === 'string') {
1651
- // Item is text that needs embedding
1652
- textItems.push({
1653
- text: item.vectorOrData,
1654
- metadata: item.metadata,
1655
- index
1656
- });
1657
- }
1658
- else {
1659
- // For now, treat other types as text
1660
- // In a more complete implementation, we might handle other types differently
1661
- const textRepresentation = String(item.vectorOrData);
1662
- textItems.push({
1663
- text: textRepresentation,
1664
- metadata: item.metadata,
1665
- index
1666
- });
1667
- }
1668
- });
1669
- // Process vector items (already embedded)
1670
- const vectorPromises = vectorItems.map((item) => this.addNoun(item.vectorOrData, item.metadata));
1671
- // Process text items in a single batch embedding operation
1672
- let textPromises = [];
1673
- if (textItems.length > 0) {
1674
- // Extract just the text for batch embedding
1675
- const texts = textItems.map((item) => item.text);
1676
- // Perform batch embedding
1677
- const embeddings = await batchEmbed(texts);
1678
- // Add each item with its embedding
1679
- textPromises = textItems.map((item, i) => this.addNoun(embeddings[i], item.metadata));
1680
- }
1681
- // Combine all promises
1682
- const batchResults = await Promise.all([
1683
- ...vectorPromises,
1684
- ...textPromises
1685
- ]);
1686
- // Add the results to our ids array
1687
- ids.push(...batchResults);
1688
- }
1689
- return ids;
1768
+ return JSON.parse(Buffer.from(cursor, 'base64').toString());
1690
1769
  }
1691
- catch (error) {
1692
- console.error('Failed to add batch of items:', error);
1693
- throw new Error(`Failed to add batch of items: ${error}`);
1770
+ catch {
1771
+ return { offset: 0, timestamp: 0 };
1694
1772
  }
1695
1773
  }
1696
1774
  /**
1697
- * Add multiple vectors or data items to both local and remote databases
1698
- * @param items Array of items to add
1699
- * @param options Additional options
1700
- * @returns Array of IDs for the added items
1775
+ * Internal method for direct HNSW vector search
1776
+ * Used by TripleIntelligence to avoid circular dependencies
1777
+ * Note: For pure metadata filtering, use metadataIndex.getIdsForFilter() directly - it's O(log n)!
1778
+ * This method is for vector similarity search with optional metadata filtering during search
1779
+ * @internal
1701
1780
  */
1702
- async addBatchToBoth(items, options = {}) {
1703
- // Check if connected to a remote server
1704
- if (!this.isConnectedToRemoteServer()) {
1705
- throw new Error('Not connected to a remote server. Call connectToRemoteServer() first.');
1781
+ async _internalVectorSearch(queryVectorOrData, k = 10, options = {}) {
1782
+ // Generate query vector
1783
+ const queryVector = Array.isArray(queryVectorOrData) &&
1784
+ typeof queryVectorOrData[0] === 'number' ?
1785
+ queryVectorOrData :
1786
+ await this.embed(queryVectorOrData);
1787
+ // Apply metadata filter if provided
1788
+ let filterFunction;
1789
+ if (options.metadata) {
1790
+ const matchingIdsArray = await this.metadataIndex?.getIdsForFilter(options.metadata) || [];
1791
+ const matchingIds = new Set(matchingIdsArray);
1792
+ filterFunction = async (id) => matchingIds.has(id);
1706
1793
  }
1707
- // Add to local with addToRemote option
1708
- return this.addNouns(items, { ...options, addToRemote: true });
1794
+ // Direct HNSW search
1795
+ const results = await this.index.search(queryVector, k, filterFunction);
1796
+ // Get metadata for results
1797
+ const searchResults = [];
1798
+ for (const [id, similarity] of results) {
1799
+ const metadata = await this.getNoun(id);
1800
+ searchResults.push({
1801
+ id,
1802
+ score: similarity,
1803
+ vector: [],
1804
+ metadata: metadata?.metadata || {}
1805
+ });
1806
+ }
1807
+ return searchResults;
1709
1808
  }
1710
1809
  /**
1711
- * Filter search results by service
1712
- * @param results Search results to filter
1713
- * @param service Service to filter by
1714
- * @returns Filtered search results
1715
- * @private
1810
+ * 🎯 LEGACY: Original search implementation (kept for complex cases)
1811
+ * This is the original search method, now used as fallback for edge cases
1716
1812
  */
1717
- filterResultsByService(results, service) {
1718
- if (!service)
1719
- return results;
1720
- return results.filter((result) => {
1721
- if (!result.metadata || typeof result.metadata !== 'object')
1722
- return false;
1723
- if (!('createdBy' in result.metadata))
1724
- return false;
1725
- const createdBy = result.metadata.createdBy;
1726
- if (!createdBy)
1727
- return false;
1728
- return createdBy.augmentation === service;
1813
+ async _legacySearch(queryVectorOrData, k = 10, options = {}) {
1814
+ const startTime = Date.now();
1815
+ // Validate input is not null or undefined
1816
+ if (queryVectorOrData === null || queryVectorOrData === undefined) {
1817
+ throw new Error('Query cannot be null or undefined');
1818
+ }
1819
+ // Validate k parameter first, before any other logic
1820
+ if (k <= 0 || typeof k !== 'number' || isNaN(k)) {
1821
+ throw new Error('Parameter k must be a positive number');
1822
+ }
1823
+ if (!this.isInitialized) {
1824
+ throw new Error('BrainyData must be initialized before searching. Call init() first.');
1825
+ }
1826
+ // Check if database is in write-only mode
1827
+ this.checkWriteOnly();
1828
+ // If searching for verbs directly
1829
+ if (options.searchVerbs) {
1830
+ const verbResults = await this.searchVerbs(queryVectorOrData, k, {
1831
+ forceEmbed: options.forceEmbed,
1832
+ verbTypes: options.verbTypes
1833
+ });
1834
+ // Convert verb results to SearchResult format
1835
+ return verbResults.map((verb) => ({
1836
+ id: verb.id,
1837
+ score: verb.similarity,
1838
+ vector: verb.embedding || [],
1839
+ metadata: {
1840
+ verb: verb.verb,
1841
+ source: verb.source,
1842
+ target: verb.target,
1843
+ ...verb.data
1844
+ }
1845
+ }));
1846
+ }
1847
+ // If searching for nouns connected by verbs
1848
+ if (options.searchConnectedNouns) {
1849
+ return this.searchNounsByVerbs(queryVectorOrData, k, {
1850
+ forceEmbed: options.forceEmbed,
1851
+ verbTypes: options.verbTypes,
1852
+ direction: options.verbDirection
1853
+ });
1854
+ }
1855
+ // If a specific search mode is specified, use the appropriate search method
1856
+ if (options.searchMode === 'local') {
1857
+ return this.searchLocal(queryVectorOrData, k, options);
1858
+ }
1859
+ else if (options.searchMode === 'remote') {
1860
+ return this.searchRemote(queryVectorOrData, k, options);
1861
+ }
1862
+ else if (options.searchMode === 'combined') {
1863
+ return this.searchCombined(queryVectorOrData, k, options);
1864
+ }
1865
+ // Generate deduplication key for concurrent request handling
1866
+ const dedupeKey = RequestDeduplicator.getSearchKey(typeof queryVectorOrData === 'string' ? queryVectorOrData : JSON.stringify(queryVectorOrData), k, options);
1867
+ // Use augmentation system for search (includes deduplication, batching, and caching)
1868
+ return this.augmentations.execute('search', { query: queryVectorOrData, k, options, dedupeKey }, async () => {
1869
+ // Default behavior (backward compatible): search locally
1870
+ try {
1871
+ // BEST OF BOTH: Automatically exclude soft-deleted items (Neural Intelligence improvement)
1872
+ // BUT only when there's already metadata filtering happening
1873
+ let metadataFilter = options.metadata;
1874
+ // Only add soft-delete filter if there's already metadata being filtered
1875
+ // This preserves pure vector searches without metadata
1876
+ if (metadataFilter && Object.keys(metadataFilter).length > 0) {
1877
+ // If no explicit deleted filter is provided, exclude soft-deleted items
1878
+ // Use namespaced field for O(1) performance
1879
+ if (!metadataFilter['_brainy.deleted'] && !metadataFilter.anyOf) {
1880
+ metadataFilter = {
1881
+ ...metadataFilter,
1882
+ ['_brainy.deleted']: false // O(1) positive match instead of notEquals
1883
+ };
1884
+ }
1885
+ }
1886
+ const hasMetadataFilter = metadataFilter && Object.keys(metadataFilter).length > 0;
1887
+ // Check cache first (transparent to user) - but skip cache if we have metadata filters
1888
+ if (!hasMetadataFilter) {
1889
+ const cacheKey = this.cache?.getCacheKey(queryVectorOrData, k, options);
1890
+ const cachedResults = this.cache?.get(cacheKey);
1891
+ if (cachedResults) {
1892
+ // Track cache hit in health monitor
1893
+ if (this.monitoring) {
1894
+ const latency = Date.now() - startTime;
1895
+ this.monitoring.recordRequest(latency, false);
1896
+ this.monitoring.recordCacheAccess(true);
1897
+ }
1898
+ return cachedResults;
1899
+ }
1900
+ }
1901
+ // Cache miss - perform actual search
1902
+ const results = await this.searchLocal(queryVectorOrData, k, {
1903
+ ...options,
1904
+ metadata: metadataFilter
1905
+ });
1906
+ // Cache results for future queries (unless explicitly disabled or has metadata filter)
1907
+ if (!options.skipCache && !hasMetadataFilter) {
1908
+ const cacheKey = this.cache?.getCacheKey(queryVectorOrData, k, options);
1909
+ this.cache?.set(cacheKey, results);
1910
+ }
1911
+ // Track successful search in health monitor
1912
+ if (this.monitoring) {
1913
+ const latency = Date.now() - startTime;
1914
+ this.monitoring.recordRequest(latency, false);
1915
+ this.monitoring.recordCacheAccess(false);
1916
+ }
1917
+ return results;
1918
+ }
1919
+ catch (error) {
1920
+ // Track error in health monitor
1921
+ if (this.monitoring) {
1922
+ const latency = Date.now() - startTime;
1923
+ this.monitoring.recordRequest(latency, true);
1924
+ }
1925
+ throw error;
1926
+ }
1729
1927
  });
1730
1928
  }
1731
1929
  /**
1732
- * Search for similar vectors within specific noun types
1930
+ * Search with cursor-based pagination for better performance on large datasets
1733
1931
  * @param queryVectorOrData Query vector or data to search for
1734
1932
  * @param k Number of results to return
1735
- * @param nounTypes Array of noun types to search within, or null to search all
1736
- * @param options Additional options
1737
- * @returns Array of search results
1933
+ * @param options Additional options including cursor for pagination
1934
+ * @returns Paginated search results with cursor for next page
1738
1935
  */
1739
1936
  /**
1740
- * @deprecated Use search() with nounTypes option instead
1937
+ * @deprecated Use search() with cursor option instead
1741
1938
  * @example
1742
1939
  * // Old way (deprecated)
1743
- * await brain.searchByNounTypes(query, 10, ['type1', 'type2'])
1940
+ * await brain.searchWithCursor(query, 10, { cursor: 'abc123' })
1744
1941
  * // New way
1745
- * await brain.search(query, { limit: 10, nounTypes: ['type1', 'type2'] })
1942
+ * await brain.search(query, { limit: 10, cursor: 'abc123' })
1746
1943
  */
1747
- async searchByNounTypes(queryVectorOrData, k = 10, nounTypes = null, options = {}) {
1748
- // Helper function to filter results by service
1749
- const filterByService = (metadata) => {
1750
- if (!options.service)
1751
- return true; // No filter, include all
1752
- // Check if metadata has createdBy field with matching service
1753
- if (!metadata || typeof metadata !== 'object')
1754
- return false;
1755
- if (!('createdBy' in metadata))
1756
- return false;
1757
- const createdBy = metadata.createdBy;
1758
- if (!createdBy)
1759
- return false;
1760
- return createdBy.augmentation === options.service;
1761
- };
1762
- if (!this.isInitialized) {
1763
- throw new Error('BrainyData must be initialized before searching. Call init() first.');
1764
- }
1765
- // Check if database is in write-only mode
1766
- this.checkWriteOnly();
1767
- try {
1768
- let queryVector;
1769
- // Check if input is already a vector
1770
- if (Array.isArray(queryVectorOrData) &&
1771
- queryVectorOrData.every((item) => typeof item === 'number') &&
1772
- !options.forceEmbed) {
1773
- // Input is already a vector
1774
- queryVector = queryVectorOrData;
1944
+ async searchWithCursor(queryVectorOrData, k = 10, options = {}) {
1945
+ // For cursor-based search, we need to fetch more results and filter
1946
+ const searchK = options.cursor ? k + 20 : k; // Get extra results for filtering
1947
+ // Perform regular search
1948
+ const { cursor, ...searchOptions } = options;
1949
+ const allResults = await this.search(queryVectorOrData, {
1950
+ limit: searchK,
1951
+ nounTypes: searchOptions.nounTypes,
1952
+ metadata: searchOptions.filter
1953
+ });
1954
+ let results = allResults;
1955
+ let startIndex = 0;
1956
+ // If cursor provided, find starting position
1957
+ if (options.cursor) {
1958
+ startIndex = allResults.findIndex((r) => r.id === options.cursor.lastId &&
1959
+ Math.abs(r.score - options.cursor.lastScore) < 0.0001);
1960
+ if (startIndex >= 0) {
1961
+ startIndex += 1; // Start after the cursor position
1962
+ results = allResults.slice(startIndex, startIndex + k);
1775
1963
  }
1776
1964
  else {
1777
- // Input needs to be vectorized
1778
- try {
1779
- queryVector = await this.embeddingFunction(queryVectorOrData);
1780
- }
1781
- catch (embedError) {
1782
- throw new Error(`Failed to vectorize query data: ${embedError}`);
1783
- }
1784
- }
1785
- // Check if query vector is defined
1786
- if (!queryVector) {
1787
- throw new Error('Query vector is undefined or null');
1788
- }
1789
- // Check if query vector dimensions match the expected dimensions
1790
- if (queryVector.length !== this._dimensions) {
1791
- throw new Error(`Query vector dimension mismatch: expected ${this._dimensions}, got ${queryVector.length}`);
1792
- }
1793
- // If no noun types specified, search all nouns
1794
- if (!nounTypes || nounTypes.length === 0) {
1795
- // Check if we're in readonly mode with lazy loading and the index is empty
1796
- const indexSize = this.index.getNouns().size;
1797
- if (this.readOnly && this.lazyLoadInReadOnlyMode && indexSize === 0) {
1798
- if (this.loggingConfig?.verbose) {
1799
- console.log('Lazy loading mode: Index is empty, loading nodes for search...');
1800
- }
1801
- // In lazy loading mode, we need to load some nodes to search
1802
- // Instead of loading all nodes, we'll load a subset of nodes
1803
- // Load a limited number of nodes from storage using pagination
1804
- const result = await this.storage.getNouns({
1805
- pagination: { offset: 0, limit: k * 10 } // Get 10x more nodes than needed
1806
- });
1807
- const limitedNouns = result.items;
1808
- // Add these nodes to the index
1809
- for (const node of limitedNouns) {
1810
- // Check if the vector dimensions match the expected dimensions
1811
- if (node.vector.length !== this._dimensions) {
1812
- console.warn(`Skipping node ${node.id} due to dimension mismatch: expected ${this._dimensions}, got ${node.vector.length}`);
1813
- continue;
1814
- }
1815
- // Add to index
1816
- await this.index.addItem({
1817
- id: node.id,
1818
- vector: node.vector
1819
- });
1820
- }
1821
- if (this.loggingConfig?.verbose) {
1822
- console.log(`Lazy loading mode: Added ${limitedNouns.length} nodes to index for search`);
1823
- }
1824
- }
1825
- // Create filter function for HNSW search with metadata index optimization
1826
- const hasMetadataFilter = options.metadata && Object.keys(options.metadata).length > 0;
1827
- const hasServiceFilter = !!options.service;
1828
- let filterFunction;
1829
- let preFilteredIds;
1830
- // Use metadata index for pre-filtering if available
1831
- if (hasMetadataFilter && this.metadataIndex) {
1832
- try {
1833
- // Ensure metadata index is up to date
1834
- await this.metadataIndex?.flush?.();
1835
- // Get candidate IDs from metadata index
1836
- const candidateIds = await this.metadataIndex?.getIdsForFilter?.(options.metadata) || [];
1837
- if (candidateIds.length > 0) {
1838
- preFilteredIds = new Set(candidateIds);
1839
- // Create a simple filter function that just checks the pre-filtered set
1840
- filterFunction = async (id) => {
1841
- if (!preFilteredIds.has(id))
1842
- return false;
1843
- // Still apply service filter if needed
1844
- if (hasServiceFilter) {
1845
- const metadata = await this.storage.getMetadata(id);
1846
- const noun = this.index.getNouns().get(id);
1847
- if (!noun || !metadata)
1848
- return false;
1849
- const result = { id, score: 0, vector: noun.vector, metadata };
1850
- return this.filterResultsByService([result], options.service).length > 0;
1851
- }
1852
- return true;
1853
- };
1854
- }
1855
- else {
1856
- // No items match the metadata criteria, return empty results immediately
1857
- return [];
1858
- }
1859
- }
1860
- catch (indexError) {
1861
- console.warn('Metadata index error, falling back to full filtering:', indexError);
1862
- // Fall back to full metadata filtering below
1863
- }
1864
- }
1865
- // Fallback to full metadata filtering if index wasn't used
1866
- if (!filterFunction && (hasMetadataFilter || hasServiceFilter)) {
1867
- filterFunction = async (id) => {
1868
- // Get metadata for filtering
1869
- let metadata = await this.storage.getMetadata(id);
1870
- if (metadata === null) {
1871
- metadata = {};
1872
- }
1873
- // Apply metadata filter
1874
- if (hasMetadataFilter) {
1875
- const matches = matchesMetadataFilter(metadata, options.metadata);
1876
- if (!matches) {
1877
- return false;
1878
- }
1879
- }
1880
- // Apply service filter
1881
- if (hasServiceFilter) {
1882
- const noun = this.index.getNouns().get(id);
1883
- if (!noun)
1884
- return false;
1885
- const result = { id, score: 0, vector: noun.vector, metadata };
1886
- if (!this.filterResultsByService([result], options.service).length) {
1887
- return false;
1888
- }
1889
- }
1890
- return true;
1891
- };
1892
- }
1893
- // When using offset, we need to fetch more results and then slice
1894
- const offset = options.offset || 0;
1895
- const totalNeeded = k + offset;
1896
- // Search in the index with filter
1897
- const results = await this.index.search(queryVector, totalNeeded, filterFunction);
1898
- // Skip the offset number of results
1899
- const paginatedResults = results.slice(offset, offset + k);
1900
- // Get metadata for each result
1901
- const searchResults = [];
1902
- for (const [id, score] of paginatedResults) {
1903
- const noun = this.index.getNouns().get(id);
1904
- if (!noun) {
1905
- continue;
1906
- }
1907
- let metadata = await this.storage.getMetadata(id);
1908
- // Initialize metadata to an empty object if it's null
1909
- if (metadata === null) {
1910
- metadata = {};
1911
- }
1912
- // Preserve original metadata without overwriting user's custom fields
1913
- // The search result already has Brainy's UUID in the main 'id' field
1914
- searchResults.push({
1915
- id,
1916
- score: 1 - score, // Convert distance to similarity (higher = more similar)
1917
- vector: noun.vector,
1918
- metadata: metadata
1919
- });
1920
- }
1921
- return searchResults;
1922
- }
1923
- else {
1924
- // Get nouns for each noun type in parallel
1925
- const nounPromises = nounTypes.map((nounType) => this.storage.getNounsByNounType(nounType));
1926
- const nounArrays = await Promise.all(nounPromises);
1927
- // Combine all nouns
1928
- const nouns = [];
1929
- for (const nounArray of nounArrays) {
1930
- nouns.push(...nounArray);
1931
- }
1932
- // Calculate distances for each noun
1933
- const results = [];
1934
- for (const noun of nouns) {
1935
- const distance = this.index.getDistanceFunction()(queryVector, noun.vector);
1936
- results.push([noun.id, distance]);
1937
- }
1938
- // Sort by distance (ascending)
1939
- results.sort((a, b) => a[1] - b[1]);
1940
- // Apply offset and take k results
1941
- const offset = options.offset || 0;
1942
- const topResults = results.slice(offset, offset + k);
1943
- // Get metadata for each result
1944
- const searchResults = [];
1945
- for (const [id, score] of topResults) {
1946
- const noun = nouns.find((n) => n.id === id);
1947
- if (!noun) {
1948
- continue;
1949
- }
1950
- let metadata = await this.storage.getMetadata(id);
1951
- // Initialize metadata to an empty object if it's null
1952
- if (metadata === null) {
1953
- metadata = {};
1954
- }
1955
- // Preserve original metadata without overwriting user's custom fields
1956
- // The search result already has Brainy's UUID in the main 'id' field
1957
- searchResults.push({
1958
- id,
1959
- score: 1 - score, // Convert distance to similarity (higher = more similar)
1960
- vector: noun.vector,
1961
- metadata: metadata
1962
- });
1963
- }
1964
- // Results are already filtered, just return them
1965
- return searchResults;
1965
+ // Cursor not found, might be stale - return from beginning
1966
+ results = allResults.slice(0, k);
1967
+ startIndex = 0;
1966
1968
  }
1967
1969
  }
1968
- catch (error) {
1969
- console.error('Failed to search vectors by noun types:', error);
1970
- throw new Error(`Failed to search vectors by noun types: ${error}`);
1970
+ else {
1971
+ results = allResults.slice(0, k);
1972
+ }
1973
+ // Create cursor for next page
1974
+ let nextCursor;
1975
+ const hasMoreResults = startIndex + results.length < allResults.length ||
1976
+ allResults.length >= searchK;
1977
+ if (results.length > 0 && hasMoreResults) {
1978
+ const lastResult = results[results.length - 1];
1979
+ nextCursor = {
1980
+ lastId: lastResult.id,
1981
+ lastScore: lastResult.score,
1982
+ position: startIndex + results.length
1983
+ };
1971
1984
  }
1985
+ return {
1986
+ results,
1987
+ cursor: nextCursor,
1988
+ hasMore: !!nextCursor,
1989
+ totalEstimate: allResults.length > searchK ? undefined : allResults.length
1990
+ };
1972
1991
  }
1973
1992
  /**
1974
- * Search for similar vectors
1993
+ * Search the local database for similar vectors
1975
1994
  * @param queryVectorOrData Query vector or data to search for
1976
1995
  * @param k Number of results to return
1977
1996
  * @param options Additional options
1978
1997
  * @returns Array of search results
1979
1998
  */
1980
- /**
1981
- * 🔍 SIMPLE VECTOR SEARCH - Clean wrapper around find() for pure vector search
1982
- *
1983
- * @param queryVectorOrData Vector or text to search for
1984
- * @param k Number of results to return
1985
- * @param options Simple search options (metadata filters only)
1986
- * @returns Vector search results
1987
- */
1988
- /**
1989
- * 🔍 Simple Vector Similarity Search - Clean wrapper around find()
1990
- *
1991
- * search(query) = find({like: query}) - Pure vector similarity search
1992
- *
1993
- * @param queryVectorOrData - Query string, vector, or object to search with
1994
- * @param options - Search options for filtering and pagination
1995
- * @returns Array of search results with scores and metadata
1996
- *
1997
- * @example
1998
- * // Simple vector search
1999
- * await brain.search('machine learning')
2000
- *
2001
- * // With filters and pagination
2002
- * await brain.search('AI', {
2003
- * limit: 20,
2004
- * metadata: { type: 'article' },
2005
- * nounTypes: ['document']
2006
- * })
2007
- */
2008
- async search(queryVectorOrData, options = {}) {
2009
- // Build metadata filter from options
2010
- const metadataFilter = { ...options.metadata };
2011
- // Add noun type filtering
2012
- if (options.nounTypes && options.nounTypes.length > 0) {
2013
- metadataFilter.nounType = { in: options.nounTypes };
2014
- }
2015
- // Add item ID filtering
2016
- if (options.itemIds && options.itemIds.length > 0) {
2017
- metadataFilter.id = { in: options.itemIds };
1999
+ async searchLocal(queryVectorOrData, k = 10, options = {}) {
2000
+ if (!this.isInitialized) {
2001
+ throw new Error('BrainyData must be initialized before searching. Call init() first.');
2018
2002
  }
2019
- // Build simple TripleQuery for vector similarity
2020
- const tripleQuery = {
2021
- like: queryVectorOrData
2022
- };
2023
- // Add metadata filter if we have conditions
2024
- if (Object.keys(metadataFilter).length > 0) {
2025
- tripleQuery.where = metadataFilter;
2003
+ // Check if database is in write-only mode
2004
+ this.checkWriteOnly();
2005
+ // Process the query input for vectorization
2006
+ let queryToUse = queryVectorOrData;
2007
+ // Handle string queries
2008
+ if (typeof queryVectorOrData === 'string' && !options.forceEmbed) {
2009
+ queryToUse = await this.embed(queryVectorOrData);
2010
+ options.forceEmbed = false; // Already embedded, don't force again
2026
2011
  }
2027
- // Extract find() options
2028
- const findOptions = {
2029
- limit: options.limit,
2030
- offset: options.offset,
2031
- cursor: options.cursor,
2032
- excludeDeleted: options.excludeDeleted,
2033
- timeout: options.timeout
2034
- };
2035
- // Call find() with structured query - this is the key simplification!
2036
- let results = await this.find(tripleQuery, findOptions);
2037
- // Apply threshold filtering if specified
2038
- if (options.threshold !== undefined) {
2039
- results = results.filter(r => (r.fusionScore || r.score || 0) >= options.threshold);
2012
+ // Handle JSON object queries with special processing
2013
+ else if (typeof queryVectorOrData === 'object' &&
2014
+ queryVectorOrData !== null &&
2015
+ !Array.isArray(queryVectorOrData) &&
2016
+ !options.forceEmbed) {
2017
+ // If searching within a specific field
2018
+ if (options.searchField) {
2019
+ // Extract text from the specific field
2020
+ const fieldText = extractFieldFromJson(queryVectorOrData, options.searchField);
2021
+ if (fieldText) {
2022
+ queryToUse = await this.embeddingFunction(fieldText);
2023
+ options.forceEmbed = false; // Already embedded, don't force again
2024
+ }
2025
+ }
2026
+ // Otherwise process the entire object with priority fields
2027
+ else {
2028
+ const preparedText = prepareJsonForVectorization(queryVectorOrData, {
2029
+ priorityFields: options.priorityFields || [
2030
+ 'name',
2031
+ 'title',
2032
+ 'company',
2033
+ 'organization',
2034
+ 'description',
2035
+ 'summary'
2036
+ ]
2037
+ });
2038
+ queryToUse = await this.embeddingFunction(preparedText);
2039
+ options.forceEmbed = false; // Already embedded, don't force again
2040
+ }
2040
2041
  }
2041
- // Convert to SearchResult format
2042
- return results.map(r => ({
2043
- ...r,
2044
- score: r.fusionScore || r.score || 0
2045
- }));
2046
- return results;
2042
+ // If noun types are specified, use searchByNounTypes
2043
+ let searchResults;
2044
+ if (options.nounTypes && options.nounTypes.length > 0) {
2045
+ searchResults = await this.searchByNounTypes(queryToUse, k, options.nounTypes, {
2046
+ forceEmbed: options.forceEmbed,
2047
+ service: options.service,
2048
+ metadata: options.metadata,
2049
+ offset: options.offset
2050
+ });
2051
+ }
2052
+ else {
2053
+ // Otherwise, search all GraphNouns
2054
+ searchResults = await this.searchByNounTypes(queryToUse, k, null, {
2055
+ forceEmbed: options.forceEmbed,
2056
+ service: options.service,
2057
+ metadata: options.metadata,
2058
+ offset: options.offset
2059
+ });
2060
+ }
2061
+ // Filter out placeholder nouns and deleted items from search results
2062
+ searchResults = searchResults.filter((result) => {
2063
+ if (result.metadata && typeof result.metadata === 'object') {
2064
+ const metadata = result.metadata;
2065
+ // Exclude deleted items from search results (soft delete)
2066
+ // Check namespaced field
2067
+ if (metadata._brainy?.deleted === true) {
2068
+ return false;
2069
+ }
2070
+ // Exclude placeholder nouns from search results
2071
+ if (metadata.isPlaceholder) {
2072
+ return false;
2073
+ }
2074
+ // Apply domain filter if specified
2075
+ if (options.filter?.domain) {
2076
+ if (metadata.domain !== options.filter.domain) {
2077
+ return false;
2078
+ }
2079
+ }
2080
+ }
2081
+ return true;
2082
+ });
2083
+ // If includeVerbs is true, retrieve associated GraphVerbs for each result
2084
+ if (options.includeVerbs && this.storage) {
2085
+ for (const result of searchResults) {
2086
+ try {
2087
+ // Get outgoing verbs for this noun
2088
+ const outgoingVerbs = await this.storage.getVerbsBySource(result.id);
2089
+ // Get incoming verbs for this noun
2090
+ const incomingVerbs = await this.storage.getVerbsByTarget(result.id);
2091
+ // Combine all verbs
2092
+ const allVerbs = [...outgoingVerbs, ...incomingVerbs];
2093
+ // Add verbs to the result metadata
2094
+ if (!result.metadata) {
2095
+ result.metadata = {};
2096
+ }
2097
+ // Add the verbs to the metadata
2098
+ ;
2099
+ result.metadata.associatedVerbs = allVerbs;
2100
+ }
2101
+ catch (error) {
2102
+ console.warn(`Failed to retrieve verbs for noun ${result.id}:`, error);
2103
+ }
2104
+ }
2105
+ }
2106
+ return searchResults;
2047
2107
  }
2048
2108
  /**
2049
- * Helper method to encode cursor for pagination
2050
- * @internal
2109
+ * Find entities similar to a given entity ID
2110
+ * @param id ID of the entity to find similar entities for
2111
+ * @param options Additional options
2112
+ * @returns Array of search results with similarity scores
2051
2113
  */
2052
- encodeCursor(data) {
2053
- return Buffer.from(JSON.stringify(data)).toString('base64');
2114
+ async findSimilar(id, options = {}) {
2115
+ await this.ensureInitialized();
2116
+ // Get the entity by ID
2117
+ const entity = await this.getNoun(id);
2118
+ if (!entity) {
2119
+ throw new Error(`Entity with ID ${id} not found`);
2120
+ }
2121
+ // If relationType is specified, directly get related entities by that type
2122
+ if (options.relationType) {
2123
+ // Get all verbs (relationships) from the source entity
2124
+ const outgoingVerbs = await this.storage.getVerbsBySource(id);
2125
+ // Filter to only include verbs of the specified type
2126
+ const verbsOfType = outgoingVerbs.filter((verb) => verb.type === options.relationType);
2127
+ // Get the target IDs
2128
+ const targetIds = verbsOfType.map((verb) => verb.target);
2129
+ // Get the actual entities for these IDs
2130
+ const results = [];
2131
+ for (const targetId of targetIds) {
2132
+ // Skip undefined targetIds
2133
+ if (typeof targetId !== 'string')
2134
+ continue;
2135
+ const targetEntity = await this.getNoun(targetId);
2136
+ if (targetEntity) {
2137
+ results.push({
2138
+ id: targetId,
2139
+ score: 1.0, // Default similarity score
2140
+ vector: targetEntity.vector,
2141
+ metadata: targetEntity.metadata
2142
+ });
2143
+ }
2144
+ }
2145
+ // Return the results, limited to the requested number
2146
+ return results.slice(0, options.limit || 10);
2147
+ }
2148
+ // If no relationType is specified, use the original vector similarity search
2149
+ const k = (options.limit || 10) + 1; // Add 1 to account for the original entity
2150
+ const searchResults = await this.search(entity.vector, {
2151
+ limit: k,
2152
+ excludeDeleted: false,
2153
+ nounTypes: options.nounTypes
2154
+ });
2155
+ // Filter out the original entity and limit to the requested number
2156
+ return searchResults
2157
+ .filter((result) => result.id !== id)
2158
+ .slice(0, options.limit || 10);
2054
2159
  }
2055
2160
  /**
2056
- * Helper method to decode cursor for pagination
2057
- * @internal
2161
+ * Get a vector by ID
2058
2162
  */
2059
- decodeCursor(cursor) {
2163
+ // Legacy get() method removed - use getNoun() instead
2164
+ /**
2165
+ * Check if a document with the given ID exists
2166
+ * This is a direct storage operation that works in write-only mode when allowDirectReads is enabled
2167
+ * @param id The ID to check for existence
2168
+ * @returns Promise<boolean> True if the document exists, false otherwise
2169
+ */
2170
+ async has(id) {
2171
+ if (id === null || id === undefined) {
2172
+ throw new Error('ID cannot be null or undefined');
2173
+ }
2174
+ await this.ensureInitialized();
2175
+ // This is a direct storage operation - check if allowed in write-only mode
2176
+ if (this.writeOnly && !this.allowDirectReads) {
2177
+ throw new Error('Cannot perform has() operation: database is in write-only mode. Enable allowDirectReads for direct storage operations.');
2178
+ }
2060
2179
  try {
2061
- return JSON.parse(Buffer.from(cursor, 'base64').toString());
2180
+ // Always query storage directly for existence check
2181
+ const noun = await this.storage.getNoun(id);
2182
+ return noun !== null;
2062
2183
  }
2063
- catch {
2064
- return { offset: 0, timestamp: 0 };
2184
+ catch (error) {
2185
+ // If storage lookup fails, the item doesn't exist
2186
+ return false;
2065
2187
  }
2066
2188
  }
2067
2189
  /**
2068
- * Internal method for direct HNSW vector search
2069
- * Used by TripleIntelligence to avoid circular dependencies
2070
- * Note: For pure metadata filtering, use metadataIndex.getIdsForFilter() directly - it's O(log n)!
2071
- * This method is for vector similarity search with optional metadata filtering during search
2072
- * @internal
2190
+ * Check if a document with the given ID exists (alias for has)
2191
+ * This is a direct storage operation that works in write-only mode when allowDirectReads is enabled
2192
+ * @param id The ID to check for existence
2193
+ * @returns Promise<boolean> True if the document exists, false otherwise
2073
2194
  */
2074
- async _internalVectorSearch(queryVectorOrData, k = 10, options = {}) {
2075
- // Generate query vector
2076
- const queryVector = Array.isArray(queryVectorOrData) &&
2077
- typeof queryVectorOrData[0] === 'number' ?
2078
- queryVectorOrData :
2079
- await this.embed(queryVectorOrData);
2080
- // Apply metadata filter if provided
2081
- let filterFunction;
2082
- if (options.metadata) {
2083
- const matchingIdsArray = await this.metadataIndex?.getIdsForFilter(options.metadata) || [];
2084
- const matchingIds = new Set(matchingIdsArray);
2085
- filterFunction = async (id) => matchingIds.has(id);
2086
- }
2087
- // Direct HNSW search
2088
- const results = await this.index.search(queryVector, k, filterFunction);
2089
- // Get metadata for results
2090
- const searchResults = [];
2091
- for (const [id, similarity] of results) {
2092
- const metadata = await this.getNoun(id);
2093
- searchResults.push({
2094
- id,
2095
- score: similarity,
2096
- vector: [],
2097
- metadata: metadata?.metadata || {}
2098
- });
2099
- }
2100
- return searchResults;
2101
- }
2102
2195
  /**
2103
- * 🎯 LEGACY: Original search implementation (kept for complex cases)
2104
- * This is the original search method, now used as fallback for edge cases
2196
+ * Check if a noun exists
2197
+ * @param id The noun ID
2198
+ * @returns True if exists
2105
2199
  */
2106
- async _legacySearch(queryVectorOrData, k = 10, options = {}) {
2107
- const startTime = Date.now();
2108
- // Validate input is not null or undefined
2109
- if (queryVectorOrData === null || queryVectorOrData === undefined) {
2110
- throw new Error('Query cannot be null or undefined');
2111
- }
2112
- // Validate k parameter first, before any other logic
2113
- if (k <= 0 || typeof k !== 'number' || isNaN(k)) {
2114
- throw new Error('Parameter k must be a positive number');
2115
- }
2116
- if (!this.isInitialized) {
2117
- throw new Error('BrainyData must be initialized before searching. Call init() first.');
2118
- }
2119
- // Check if database is in write-only mode
2120
- this.checkWriteOnly();
2121
- // If searching for verbs directly
2122
- if (options.searchVerbs) {
2123
- const verbResults = await this.searchVerbs(queryVectorOrData, k, {
2124
- forceEmbed: options.forceEmbed,
2125
- verbTypes: options.verbTypes
2126
- });
2127
- // Convert verb results to SearchResult format
2128
- return verbResults.map((verb) => ({
2129
- id: verb.id,
2130
- score: verb.similarity,
2131
- vector: verb.embedding || [],
2132
- metadata: {
2133
- verb: verb.verb,
2134
- source: verb.source,
2135
- target: verb.target,
2136
- ...verb.data
2137
- }
2138
- }));
2139
- }
2140
- // If searching for nouns connected by verbs
2141
- if (options.searchConnectedNouns) {
2142
- return this.searchNounsByVerbs(queryVectorOrData, k, {
2143
- forceEmbed: options.forceEmbed,
2144
- verbTypes: options.verbTypes,
2145
- direction: options.verbDirection
2146
- });
2147
- }
2148
- // If a specific search mode is specified, use the appropriate search method
2149
- if (options.searchMode === 'local') {
2150
- return this.searchLocal(queryVectorOrData, k, options);
2151
- }
2152
- else if (options.searchMode === 'remote') {
2153
- return this.searchRemote(queryVectorOrData, k, options);
2154
- }
2155
- else if (options.searchMode === 'combined') {
2156
- return this.searchCombined(queryVectorOrData, k, options);
2157
- }
2158
- // Generate deduplication key for concurrent request handling
2159
- const dedupeKey = RequestDeduplicator.getSearchKey(typeof queryVectorOrData === 'string' ? queryVectorOrData : JSON.stringify(queryVectorOrData), k, options);
2160
- // Use augmentation system for search (includes deduplication, batching, and caching)
2161
- return this.augmentations.execute('search', { query: queryVectorOrData, k, options, dedupeKey }, async () => {
2162
- // Default behavior (backward compatible): search locally
2163
- try {
2164
- // BEST OF BOTH: Automatically exclude soft-deleted items (Neural Intelligence improvement)
2165
- // BUT only when there's already metadata filtering happening
2166
- let metadataFilter = options.metadata;
2167
- // Only add soft-delete filter if there's already metadata being filtered
2168
- // This preserves pure vector searches without metadata
2169
- if (metadataFilter && Object.keys(metadataFilter).length > 0) {
2170
- // If no explicit deleted filter is provided, exclude soft-deleted items
2171
- // Use namespaced field for O(1) performance
2172
- if (!metadataFilter['_brainy.deleted'] && !metadataFilter.anyOf) {
2173
- metadataFilter = {
2174
- ...metadataFilter,
2175
- ['_brainy.deleted']: false // O(1) positive match instead of notEquals
2176
- };
2177
- }
2178
- }
2179
- const hasMetadataFilter = metadataFilter && Object.keys(metadataFilter).length > 0;
2180
- // Check cache first (transparent to user) - but skip cache if we have metadata filters
2181
- if (!hasMetadataFilter) {
2182
- const cacheKey = this.cache?.getCacheKey(queryVectorOrData, k, options);
2183
- const cachedResults = this.cache?.get(cacheKey);
2184
- if (cachedResults) {
2185
- // Track cache hit in health monitor
2186
- if (this.monitoring) {
2187
- const latency = Date.now() - startTime;
2188
- this.monitoring.recordRequest(latency, false);
2189
- this.monitoring.recordCacheAccess(true);
2190
- }
2191
- return cachedResults;
2192
- }
2193
- }
2194
- // Cache miss - perform actual search
2195
- const results = await this.searchLocal(queryVectorOrData, k, {
2196
- ...options,
2197
- metadata: metadataFilter
2198
- });
2199
- // Cache results for future queries (unless explicitly disabled or has metadata filter)
2200
- if (!options.skipCache && !hasMetadataFilter) {
2201
- const cacheKey = this.cache?.getCacheKey(queryVectorOrData, k, options);
2202
- this.cache?.set(cacheKey, results);
2203
- }
2204
- // Track successful search in health monitor
2205
- if (this.monitoring) {
2206
- const latency = Date.now() - startTime;
2207
- this.monitoring.recordRequest(latency, false);
2208
- this.monitoring.recordCacheAccess(false);
2209
- }
2210
- return results;
2211
- }
2212
- catch (error) {
2213
- // Track error in health monitor
2214
- if (this.monitoring) {
2215
- const latency = Date.now() - startTime;
2216
- this.monitoring.recordRequest(latency, true);
2217
- }
2218
- throw error;
2219
- }
2220
- });
2200
+ async hasNoun(id) {
2201
+ return this.hasNoun(id);
2221
2202
  }
2222
2203
  /**
2223
- * Search with cursor-based pagination for better performance on large datasets
2224
- * @param queryVectorOrData Query vector or data to search for
2225
- * @param k Number of results to return
2226
- * @param options Additional options including cursor for pagination
2227
- * @returns Paginated search results with cursor for next page
2204
+ * Get metadata for a document by ID
2205
+ * This is a direct storage operation that works in write-only mode when allowDirectReads is enabled
2206
+ * @param id The ID of the document
2207
+ * @returns Promise<T | null> The metadata object or null if not found
2228
2208
  */
2209
+ // Legacy getMetadata() method removed - use getNounMetadata() instead
2229
2210
  /**
2230
- * @deprecated Use search() with cursor option instead
2211
+ * Get multiple documents by their IDs
2212
+ * This is a direct storage operation that works in write-only mode when allowDirectReads is enabled
2213
+ * @param ids Array of IDs to retrieve
2214
+ * @returns Promise<Array<VectorDocument<T> | null>> Array of documents (null for missing IDs)
2215
+ */
2216
+ /**
2217
+ * Get multiple nouns - by IDs, filters, or pagination
2218
+ * @param idsOrOptions Array of IDs or query options
2219
+ * @returns Array of noun documents
2220
+ *
2231
2221
  * @example
2232
- * // Old way (deprecated)
2233
- * await brain.searchWithCursor(query, 10, { cursor: 'abc123' })
2234
- * // New way
2235
- * await brain.search(query, { limit: 10, cursor: 'abc123' })
2222
+ * // Get by IDs
2223
+ * await brain.getNouns(['id1', 'id2'])
2224
+ *
2225
+ * // Get with filters
2226
+ * await brain.getNouns({
2227
+ * filter: { type: 'article' },
2228
+ * limit: 10
2229
+ * })
2230
+ *
2231
+ * // Get with pagination
2232
+ * await brain.getNouns({
2233
+ * offset: 20,
2234
+ * limit: 10
2235
+ * })
2236
2236
  */
2237
- async searchWithCursor(queryVectorOrData, k = 10, options = {}) {
2238
- // For cursor-based search, we need to fetch more results and filter
2239
- const searchK = options.cursor ? k + 20 : k; // Get extra results for filtering
2240
- // Perform regular search
2241
- const { cursor, ...searchOptions } = options;
2242
- const allResults = await this.search(queryVectorOrData, {
2243
- limit: searchK,
2244
- nounTypes: searchOptions.nounTypes,
2245
- metadata: searchOptions.filter
2246
- });
2247
- let results = allResults;
2248
- let startIndex = 0;
2249
- // If cursor provided, find starting position
2250
- if (options.cursor) {
2251
- startIndex = allResults.findIndex((r) => r.id === options.cursor.lastId &&
2252
- Math.abs(r.score - options.cursor.lastScore) < 0.0001);
2253
- if (startIndex >= 0) {
2254
- startIndex += 1; // Start after the cursor position
2255
- results = allResults.slice(startIndex, startIndex + k);
2256
- }
2257
- else {
2258
- // Cursor not found, might be stale - return from beginning
2259
- results = allResults.slice(0, k);
2260
- startIndex = 0;
2261
- }
2262
- }
2263
- else {
2264
- results = allResults.slice(0, k);
2237
+ async getNouns(idsOrOptions) {
2238
+ // Handle array of IDs
2239
+ if (Array.isArray(idsOrOptions)) {
2240
+ return this.getNounsByIds(idsOrOptions);
2265
2241
  }
2266
- // Create cursor for next page
2267
- let nextCursor;
2268
- const hasMoreResults = startIndex + results.length < allResults.length ||
2269
- allResults.length >= searchK;
2270
- if (results.length > 0 && hasMoreResults) {
2271
- const lastResult = results[results.length - 1];
2272
- nextCursor = {
2273
- lastId: lastResult.id,
2274
- lastScore: lastResult.score,
2275
- position: startIndex + results.length
2276
- };
2242
+ // Handle options object
2243
+ const options = idsOrOptions || {};
2244
+ // If ids are provided in options, get by IDs
2245
+ if (options.ids) {
2246
+ return this.getNounsByIds(options.ids);
2277
2247
  }
2278
- return {
2279
- results,
2280
- cursor: nextCursor,
2281
- hasMore: !!nextCursor,
2282
- totalEstimate: allResults.length > searchK ? undefined : allResults.length
2283
- };
2248
+ // Otherwise, do a filtered/paginated query and extract items
2249
+ const result = await this.queryNounsByFilter(options);
2250
+ return result.items;
2284
2251
  }
2285
2252
  /**
2286
- * Search the local database for similar vectors
2287
- * @param queryVectorOrData Query vector or data to search for
2288
- * @param k Number of results to return
2289
- * @param options Additional options
2290
- * @returns Array of search results
2253
+ * Internal: Get nouns by IDs
2291
2254
  */
2292
- async searchLocal(queryVectorOrData, k = 10, options = {}) {
2293
- if (!this.isInitialized) {
2294
- throw new Error('BrainyData must be initialized before searching. Call init() first.');
2255
+ async getNounsByIds(ids) {
2256
+ if (!Array.isArray(ids)) {
2257
+ throw new Error('IDs must be provided as an array');
2295
2258
  }
2296
- // Check if database is in write-only mode
2297
- this.checkWriteOnly();
2298
- // Process the query input for vectorization
2299
- let queryToUse = queryVectorOrData;
2300
- // Handle string queries
2301
- if (typeof queryVectorOrData === 'string' && !options.forceEmbed) {
2302
- queryToUse = await this.embed(queryVectorOrData);
2303
- options.forceEmbed = false; // Already embedded, don't force again
2259
+ await this.ensureInitialized();
2260
+ // This is a direct storage operation - check if allowed in write-only mode
2261
+ if (this.writeOnly && !this.allowDirectReads) {
2262
+ throw new Error('Cannot perform getBatch() operation: database is in write-only mode. Enable allowDirectReads for direct storage operations.');
2304
2263
  }
2305
- // Handle JSON object queries with special processing
2306
- else if (typeof queryVectorOrData === 'object' &&
2307
- queryVectorOrData !== null &&
2308
- !Array.isArray(queryVectorOrData) &&
2309
- !options.forceEmbed) {
2310
- // If searching within a specific field
2311
- if (options.searchField) {
2312
- // Extract text from the specific field
2313
- const fieldText = extractFieldFromJson(queryVectorOrData, options.searchField);
2314
- if (fieldText) {
2315
- queryToUse = await this.embeddingFunction(fieldText);
2316
- options.forceEmbed = false; // Already embedded, don't force again
2317
- }
2318
- }
2319
- // Otherwise process the entire object with priority fields
2320
- else {
2321
- const preparedText = prepareJsonForVectorization(queryVectorOrData, {
2322
- priorityFields: options.priorityFields || [
2323
- 'name',
2324
- 'title',
2325
- 'company',
2326
- 'organization',
2327
- 'description',
2328
- 'summary'
2329
- ]
2330
- });
2331
- queryToUse = await this.embeddingFunction(preparedText);
2332
- options.forceEmbed = false; // Already embedded, don't force again
2333
- }
2334
- }
2335
- // If noun types are specified, use searchByNounTypes
2336
- let searchResults;
2337
- if (options.nounTypes && options.nounTypes.length > 0) {
2338
- searchResults = await this.searchByNounTypes(queryToUse, k, options.nounTypes, {
2339
- forceEmbed: options.forceEmbed,
2340
- service: options.service,
2341
- metadata: options.metadata,
2342
- offset: options.offset
2343
- });
2344
- }
2345
- else {
2346
- // Otherwise, search all GraphNouns
2347
- searchResults = await this.searchByNounTypes(queryToUse, k, null, {
2348
- forceEmbed: options.forceEmbed,
2349
- service: options.service,
2350
- metadata: options.metadata,
2351
- offset: options.offset
2352
- });
2353
- }
2354
- // Filter out placeholder nouns and deleted items from search results
2355
- searchResults = searchResults.filter((result) => {
2356
- if (result.metadata && typeof result.metadata === 'object') {
2357
- const metadata = result.metadata;
2358
- // Exclude deleted items from search results (soft delete)
2359
- // Check namespaced field
2360
- if (metadata._brainy?.deleted === true) {
2361
- return false;
2362
- }
2363
- // Exclude placeholder nouns from search results
2364
- if (metadata.isPlaceholder) {
2365
- return false;
2366
- }
2367
- // Apply domain filter if specified
2368
- if (options.filter?.domain) {
2369
- if (metadata.domain !== options.filter.domain) {
2370
- return false;
2371
- }
2372
- }
2373
- }
2374
- return true;
2375
- });
2376
- // If includeVerbs is true, retrieve associated GraphVerbs for each result
2377
- if (options.includeVerbs && this.storage) {
2378
- for (const result of searchResults) {
2379
- try {
2380
- // Get outgoing verbs for this noun
2381
- const outgoingVerbs = await this.storage.getVerbsBySource(result.id);
2382
- // Get incoming verbs for this noun
2383
- const incomingVerbs = await this.storage.getVerbsByTarget(result.id);
2384
- // Combine all verbs
2385
- const allVerbs = [...outgoingVerbs, ...incomingVerbs];
2386
- // Add verbs to the result metadata
2387
- if (!result.metadata) {
2388
- result.metadata = {};
2389
- }
2390
- // Add the verbs to the metadata
2391
- ;
2392
- result.metadata.associatedVerbs = allVerbs;
2393
- }
2394
- catch (error) {
2395
- console.warn(`Failed to retrieve verbs for noun ${result.id}:`, error);
2396
- }
2397
- }
2398
- }
2399
- return searchResults;
2400
- }
2401
- /**
2402
- * Find entities similar to a given entity ID
2403
- * @param id ID of the entity to find similar entities for
2404
- * @param options Additional options
2405
- * @returns Array of search results with similarity scores
2406
- */
2407
- async findSimilar(id, options = {}) {
2408
- await this.ensureInitialized();
2409
- // Get the entity by ID
2410
- const entity = await this.getNoun(id);
2411
- if (!entity) {
2412
- throw new Error(`Entity with ID ${id} not found`);
2413
- }
2414
- // If relationType is specified, directly get related entities by that type
2415
- if (options.relationType) {
2416
- // Get all verbs (relationships) from the source entity
2417
- const outgoingVerbs = await this.storage.getVerbsBySource(id);
2418
- // Filter to only include verbs of the specified type
2419
- const verbsOfType = outgoingVerbs.filter((verb) => verb.type === options.relationType);
2420
- // Get the target IDs
2421
- const targetIds = verbsOfType.map((verb) => verb.target);
2422
- // Get the actual entities for these IDs
2423
- const results = [];
2424
- for (const targetId of targetIds) {
2425
- // Skip undefined targetIds
2426
- if (typeof targetId !== 'string')
2427
- continue;
2428
- const targetEntity = await this.getNoun(targetId);
2429
- if (targetEntity) {
2430
- results.push({
2431
- id: targetId,
2432
- score: 1.0, // Default similarity score
2433
- vector: targetEntity.vector,
2434
- metadata: targetEntity.metadata
2435
- });
2436
- }
2437
- }
2438
- // Return the results, limited to the requested number
2439
- return results.slice(0, options.limit || 10);
2440
- }
2441
- // If no relationType is specified, use the original vector similarity search
2442
- const k = (options.limit || 10) + 1; // Add 1 to account for the original entity
2443
- const searchResults = await this.search(entity.vector, {
2444
- limit: k,
2445
- excludeDeleted: false,
2446
- nounTypes: options.nounTypes
2447
- });
2448
- // Filter out the original entity and limit to the requested number
2449
- return searchResults
2450
- .filter((result) => result.id !== id)
2451
- .slice(0, options.limit || 10);
2452
- }
2453
- /**
2454
- * Get a vector by ID
2455
- */
2456
- // Legacy get() method removed - use getNoun() instead
2457
- /**
2458
- * Check if a document with the given ID exists
2459
- * This is a direct storage operation that works in write-only mode when allowDirectReads is enabled
2460
- * @param id The ID to check for existence
2461
- * @returns Promise<boolean> True if the document exists, false otherwise
2462
- */
2463
- async has(id) {
2464
- if (id === null || id === undefined) {
2465
- throw new Error('ID cannot be null or undefined');
2466
- }
2467
- await this.ensureInitialized();
2468
- // This is a direct storage operation - check if allowed in write-only mode
2469
- if (this.writeOnly && !this.allowDirectReads) {
2470
- throw new Error('Cannot perform has() operation: database is in write-only mode. Enable allowDirectReads for direct storage operations.');
2471
- }
2472
- try {
2473
- // Always query storage directly for existence check
2474
- const noun = await this.storage.getNoun(id);
2475
- return noun !== null;
2476
- }
2477
- catch (error) {
2478
- // If storage lookup fails, the item doesn't exist
2479
- return false;
2480
- }
2481
- }
2482
- /**
2483
- * Check if a document with the given ID exists (alias for has)
2484
- * This is a direct storage operation that works in write-only mode when allowDirectReads is enabled
2485
- * @param id The ID to check for existence
2486
- * @returns Promise<boolean> True if the document exists, false otherwise
2487
- */
2488
- /**
2489
- * Check if a noun exists
2490
- * @param id The noun ID
2491
- * @returns True if exists
2492
- */
2493
- async hasNoun(id) {
2494
- return this.hasNoun(id);
2495
- }
2496
- /**
2497
- * Get metadata for a document by ID
2498
- * This is a direct storage operation that works in write-only mode when allowDirectReads is enabled
2499
- * @param id The ID of the document
2500
- * @returns Promise<T | null> The metadata object or null if not found
2501
- */
2502
- // Legacy getMetadata() method removed - use getNounMetadata() instead
2503
- /**
2504
- * Get multiple documents by their IDs
2505
- * This is a direct storage operation that works in write-only mode when allowDirectReads is enabled
2506
- * @param ids Array of IDs to retrieve
2507
- * @returns Promise<Array<VectorDocument<T> | null>> Array of documents (null for missing IDs)
2508
- */
2509
- /**
2510
- * Get multiple nouns - by IDs, filters, or pagination
2511
- * @param idsOrOptions Array of IDs or query options
2512
- * @returns Array of noun documents
2513
- *
2514
- * @example
2515
- * // Get by IDs
2516
- * await brain.getNouns(['id1', 'id2'])
2517
- *
2518
- * // Get with filters
2519
- * await brain.getNouns({
2520
- * filter: { type: 'article' },
2521
- * limit: 10
2522
- * })
2523
- *
2524
- * // Get with pagination
2525
- * await brain.getNouns({
2526
- * offset: 20,
2527
- * limit: 10
2528
- * })
2529
- */
2530
- async getNouns(idsOrOptions) {
2531
- // Handle array of IDs
2532
- if (Array.isArray(idsOrOptions)) {
2533
- return this.getNounsByIds(idsOrOptions);
2534
- }
2535
- // Handle options object
2536
- const options = idsOrOptions || {};
2537
- // If ids are provided in options, get by IDs
2538
- if (options.ids) {
2539
- return this.getNounsByIds(options.ids);
2540
- }
2541
- // Otherwise, do a filtered/paginated query and extract items
2542
- const result = await this.queryNounsByFilter(options);
2543
- return result.items;
2544
- }
2545
- /**
2546
- * Internal: Get nouns by IDs
2547
- */
2548
- async getNounsByIds(ids) {
2549
- if (!Array.isArray(ids)) {
2550
- throw new Error('IDs must be provided as an array');
2551
- }
2552
- await this.ensureInitialized();
2553
- // This is a direct storage operation - check if allowed in write-only mode
2554
- if (this.writeOnly && !this.allowDirectReads) {
2555
- throw new Error('Cannot perform getBatch() operation: database is in write-only mode. Enable allowDirectReads for direct storage operations.');
2556
- }
2557
- const results = [];
2558
- for (const id of ids) {
2559
- if (id === null || id === undefined) {
2560
- results.push(null);
2561
- continue;
2264
+ const results = [];
2265
+ for (const id of ids) {
2266
+ if (id === null || id === undefined) {
2267
+ results.push(null);
2268
+ continue;
2562
2269
  }
2563
2270
  try {
2564
2271
  const result = await this.getNoun(id);
@@ -3304,9 +3011,16 @@ export class BrainyData {
3304
3011
  */
3305
3012
  async addVerbs(verbs) {
3306
3013
  const ids = [];
3307
- for (const verb of verbs) {
3308
- const id = await this.addVerb(verb.source, verb.target, verb.type, verb.metadata);
3309
- ids.push(id);
3014
+ const chunkSize = 10; // Conservative chunk size for parallel processing
3015
+ // Process verbs in parallel chunks to improve performance
3016
+ for (let i = 0; i < verbs.length; i += chunkSize) {
3017
+ const chunk = verbs.slice(i, i + chunkSize);
3018
+ // Process chunk in parallel
3019
+ const chunkPromises = chunk.map(verb => this.addVerb(verb.source, verb.target, verb.type, verb.metadata));
3020
+ // Wait for all in chunk to complete
3021
+ const chunkIds = await Promise.all(chunkPromises);
3022
+ // Maintain order by adding chunk results
3023
+ ids.push(...chunkIds);
3310
3024
  }
3311
3025
  return ids;
3312
3026
  }
@@ -3317,8 +3031,16 @@ export class BrainyData {
3317
3031
  */
3318
3032
  async deleteVerbs(ids) {
3319
3033
  const results = [];
3320
- for (const id of ids) {
3321
- results.push(await this.deleteVerb(id));
3034
+ const chunkSize = 10; // Conservative chunk size for parallel processing
3035
+ // Process deletions in parallel chunks to improve performance
3036
+ for (let i = 0; i < ids.length; i += chunkSize) {
3037
+ const chunk = ids.slice(i, i + chunkSize);
3038
+ // Process chunk in parallel
3039
+ const chunkPromises = chunk.map(id => this.deleteVerb(id));
3040
+ // Wait for all in chunk to complete
3041
+ const chunkResults = await Promise.all(chunkPromises);
3042
+ // Maintain order by adding chunk results
3043
+ results.push(...chunkResults);
3322
3044
  }
3323
3045
  return results;
3324
3046
  }
@@ -4690,8 +4412,12 @@ export class BrainyData {
4690
4412
  noun.vector = await this.embeddingFunction(noun.metadata);
4691
4413
  }
4692
4414
  }
4415
+ // Extract type from metadata or default to Content
4416
+ const nounType = (noun.metadata && typeof noun.metadata === 'object' && 'noun' in noun.metadata)
4417
+ ? noun.metadata.noun
4418
+ : NounType.Content;
4693
4419
  // Add the noun with its vector and metadata (custom ID not supported)
4694
- await this.addNoun(noun.vector, noun.metadata);
4420
+ await this.addNoun(noun.vector, nounType, noun.metadata);
4695
4421
  nounsRestored++;
4696
4422
  }
4697
4423
  catch (error) {
@@ -4847,8 +4573,8 @@ export class BrainyData {
4847
4573
  tags: [`tag-${i % 5}`, `category-${i % 3}`]
4848
4574
  }
4849
4575
  };
4850
- // Add the noun
4851
- const id = await this.addNoun(metadata.description, metadata);
4576
+ // Add the noun with explicit type
4577
+ const id = await this.addNoun(metadata.description, nounType, metadata);
4852
4578
  nounIds.push(id);
4853
4579
  }
4854
4580
  // Generate random verbs between nouns
@@ -4961,225 +4687,521 @@ export class BrainyData {
4961
4687
  filteredMappings[service] = serviceFieldMappings[service];
4962
4688
  }
4963
4689
  }
4964
- serviceFieldMappings = filteredMappings;
4965
- }
4966
- // If no mappings after filtering, return empty results
4967
- if (Object.keys(serviceFieldMappings).length === 0) {
4968
- return [];
4969
- }
4970
- // Search in each service's fields and combine results
4971
- const allResults = [];
4972
- for (const [service, fieldNames] of Object.entries(serviceFieldMappings)) {
4973
- for (const fieldName of fieldNames) {
4974
- // Search using the specific field name for this service
4975
- const results = await this.search(searchTerm, {
4976
- limit: k
4977
- });
4978
- // Add results to the combined list
4979
- allResults.push(...results);
4980
- }
4981
- }
4982
- // Sort by score and limit to k results
4983
- return allResults.sort((a, b) => b.score - a.score).slice(0, k);
4984
- }
4985
- /**
4986
- * Cleanup distributed resources
4987
- * Should be called when shutting down the instance
4988
- */
4989
- async cleanup() {
4990
- // Stop real-time updates
4991
- if (this.updateTimerId) {
4992
- clearInterval(this.updateTimerId);
4993
- this.updateTimerId = null;
4994
- }
4995
- // Stop maintenance intervals
4996
- for (const intervalId of this.maintenanceIntervals) {
4997
- clearInterval(intervalId);
4998
- }
4999
- this.maintenanceIntervals = [];
5000
- // Flush metadata index one last time
5001
- if (this.metadataIndex) {
5002
- try {
5003
- await this.metadataIndex?.flush?.();
4690
+ serviceFieldMappings = filteredMappings;
4691
+ }
4692
+ // If no mappings after filtering, return empty results
4693
+ if (Object.keys(serviceFieldMappings).length === 0) {
4694
+ return [];
4695
+ }
4696
+ // Search in each service's fields and combine results
4697
+ const allResults = [];
4698
+ for (const [service, fieldNames] of Object.entries(serviceFieldMappings)) {
4699
+ for (const fieldName of fieldNames) {
4700
+ // Search using the specific field name for this service
4701
+ const results = await this.search(searchTerm, {
4702
+ limit: k
4703
+ });
4704
+ // Add results to the combined list
4705
+ allResults.push(...results);
4706
+ }
4707
+ }
4708
+ // Sort by score and limit to k results
4709
+ return allResults.sort((a, b) => b.score - a.score).slice(0, k);
4710
+ }
4711
+ /**
4712
+ * Cleanup distributed resources
4713
+ * Should be called when shutting down the instance
4714
+ */
4715
+ async cleanup() {
4716
+ // Stop real-time updates
4717
+ if (this.updateTimerId) {
4718
+ clearInterval(this.updateTimerId);
4719
+ this.updateTimerId = null;
4720
+ }
4721
+ // Stop maintenance intervals
4722
+ for (const intervalId of this.maintenanceIntervals) {
4723
+ clearInterval(intervalId);
4724
+ }
4725
+ this.maintenanceIntervals = [];
4726
+ // Flush metadata index one last time
4727
+ if (this.metadataIndex) {
4728
+ try {
4729
+ await this.metadataIndex?.flush?.();
4730
+ }
4731
+ catch (error) {
4732
+ console.warn('Error flushing metadata index during cleanup:', error);
4733
+ }
4734
+ }
4735
+ // Clean up distributed mode resources
4736
+ if (this.monitoring) {
4737
+ this.monitoring.stop();
4738
+ }
4739
+ if (this.configManager) {
4740
+ await this.configManager.cleanup();
4741
+ }
4742
+ // Clean up worker pools
4743
+ await cleanupWorkerPools();
4744
+ }
4745
+ /**
4746
+ * Load environment variables from Cortex configuration
4747
+ * This enables services to automatically load all their configs from Brainy
4748
+ * @returns Promise that resolves when environment is loaded
4749
+ */
4750
+ async loadEnvironment() {
4751
+ // Cortex integration coming in next release
4752
+ prodLog.debug('Cortex integration coming soon');
4753
+ }
4754
+ /**
4755
+ * Set a configuration value with optional encryption
4756
+ * @param key Configuration key
4757
+ * @param value Configuration value
4758
+ * @param options Options including encryption
4759
+ */
4760
+ async setConfig(key, value, options) {
4761
+ // Use a predictable ID based on the config key
4762
+ const configId = `config-${key}`;
4763
+ // Store the config data in metadata (not as vectorized data)
4764
+ const configValue = options?.encrypt ? await this.encryptData(JSON.stringify(value)) : value;
4765
+ // Use simple text for vectorization
4766
+ const searchableText = `Configuration setting for ${key}`;
4767
+ await this.addNoun(searchableText, NounType.State, {
4768
+ configKey: key,
4769
+ configValue: configValue,
4770
+ encrypted: !!options?.encrypt,
4771
+ timestamp: new Date().toISOString()
4772
+ });
4773
+ }
4774
+ /**
4775
+ * Get a configuration value with automatic decryption
4776
+ * @param key Configuration key
4777
+ * @param options Options including decryption (auto-detected by default)
4778
+ * @returns Configuration value or undefined
4779
+ */
4780
+ async getConfig(key, options) {
4781
+ try {
4782
+ // Use the predictable ID to get the config directly
4783
+ const configId = `config-${key}`;
4784
+ const storedNoun = await this.getNoun(configId);
4785
+ if (!storedNoun)
4786
+ return undefined;
4787
+ // The config data is now stored in metadata
4788
+ const value = storedNoun.metadata?.configValue;
4789
+ const encrypted = storedNoun.metadata?.encrypted;
4790
+ // BEST OF BOTH: Respect explicit decrypt option OR auto-decrypt if encrypted
4791
+ const shouldDecrypt = options?.decrypt !== undefined ? options.decrypt : encrypted;
4792
+ if (shouldDecrypt && encrypted && typeof value === 'string') {
4793
+ const decrypted = await this.decryptData(value);
4794
+ return JSON.parse(decrypted);
4795
+ }
4796
+ return value;
4797
+ }
4798
+ catch (error) {
4799
+ prodLog.debug('Config retrieval failed:', error);
4800
+ return undefined;
4801
+ }
4802
+ }
4803
+ /**
4804
+ * Encrypt data using universal crypto utilities
4805
+ */
4806
+ async encryptData(data) {
4807
+ const crypto = await import('./universal/crypto.js');
4808
+ const key = crypto.randomBytes(32);
4809
+ const iv = crypto.randomBytes(16);
4810
+ const cipher = crypto.createCipheriv('aes-256-cbc', key, iv);
4811
+ let encrypted = cipher.update(data, 'utf8', 'hex');
4812
+ encrypted += cipher.final('hex');
4813
+ // Store key and iv with encrypted data (in production, manage keys separately)
4814
+ return JSON.stringify({
4815
+ encrypted,
4816
+ key: Array.from(key).map(b => b.toString(16).padStart(2, '0')).join(''),
4817
+ iv: Array.from(iv).map(b => b.toString(16).padStart(2, '0')).join('')
4818
+ });
4819
+ }
4820
+ /**
4821
+ * Decrypt data using universal crypto utilities
4822
+ */
4823
+ async decryptData(encryptedData) {
4824
+ const crypto = await import('./universal/crypto.js');
4825
+ const { encrypted, key: keyHex, iv: ivHex } = JSON.parse(encryptedData);
4826
+ const key = new Uint8Array(keyHex.match(/.{1,2}/g).map((byte) => parseInt(byte, 16)));
4827
+ const iv = new Uint8Array(ivHex.match(/.{1,2}/g).map((byte) => parseInt(byte, 16)));
4828
+ const decipher = crypto.createDecipheriv('aes-256-cbc', key, iv);
4829
+ let decrypted = decipher.update(encrypted, 'hex', 'utf8');
4830
+ decrypted += decipher.final('utf8');
4831
+ return decrypted;
4832
+ }
4833
+ // ========================================
4834
+ // UNIFIED API - Core Methods (7 total)
4835
+ // ONE way to do everything! 🧠⚛️
4836
+ //
4837
+ // 1. add() - Smart data addition (auto/guided/explicit/literal)
4838
+ // 2. search() - Triple-power search (vector + graph + facets)
4839
+ // 3. import() - Neural import with semantic type detection
4840
+ // 4. addNoun() - Explicit noun creation with NounType
4841
+ // 5. addVerb() - Relationship creation between nouns
4842
+ // 6. update() - Update noun data/metadata with index sync
4843
+ // 7. delete() - Smart delete with soft delete default (enhanced original)
4844
+ // ========================================
4845
+ /**
4846
+ * Neural Import - Smart bulk data import with semantic type detection
4847
+ * Uses transformer embeddings to automatically detect and classify data types
4848
+ * @param data Array of data items or single item to import
4849
+ * @param options Import options including type hints and processing mode
4850
+ * @returns Array of created IDs
4851
+ */
4852
+ async import(source, options) {
4853
+ // Lazy-load import manager for zero overhead when not used
4854
+ if (!this._importManager) {
4855
+ const { ImportManager } = await import('./importManager.js');
4856
+ this._importManager = new ImportManager(this);
4857
+ await this._importManager.init();
4858
+ }
4859
+ // AUTO-DETECT: Is it a URL or file path?
4860
+ if (typeof source === 'string') {
4861
+ // URL detection
4862
+ if (source.startsWith('http://') || source.startsWith('https://')) {
4863
+ const result = await this._importManager.importUrl(source, options || {});
4864
+ return result.nouns;
4865
+ }
4866
+ // File path detection
4867
+ try {
4868
+ const { exists } = await import('./universal/fs.js');
4869
+ if (await exists(source)) {
4870
+ const result = await this._importManager.importFile(source, options || {});
4871
+ return result.nouns;
4872
+ }
4873
+ }
4874
+ catch { }
4875
+ }
4876
+ // Regular data import (objects, arrays, or raw text)
4877
+ const result = await this._importManager.import(source, {
4878
+ format: options?.format || 'auto',
4879
+ batchSize: options?.batchSize || 50,
4880
+ extractRelationships: options?.relationships !== false,
4881
+ autoDetect: true, // Always intelligent
4882
+ parallel: true // Always fast
4883
+ });
4884
+ if (result.errors.length > 0) {
4885
+ prodLog.warn(`Import had ${result.errors.length} errors:`, result.errors[0]);
4886
+ }
4887
+ prodLog.info(`✨ Imported ${result.stats.imported} items, ${result.stats.relationships} relationships`);
4888
+ return result.nouns;
4889
+ }
4890
+ /**
4891
+ * Add Noun - Explicit noun creation with strongly-typed NounType
4892
+ * For when you know exactly what type of noun you're creating
4893
+ * @param data The noun data
4894
+ * @param nounType The explicit noun type from NounType enum
4895
+ * @param metadata Additional metadata
4896
+ * @returns Created noun ID
4897
+ */
4898
+ /**
4899
+ * Add a noun to the database with required type
4900
+ * Clean 2.0 API - primary method for adding data
4901
+ *
4902
+ * @param vectorOrData Vector array or data to embed
4903
+ * @param nounType Required noun type (one of 31 types)
4904
+ * @param metadata Optional metadata object
4905
+ * @returns The generated ID
4906
+ */
4907
+ async addNoun(vectorOrData, nounType, metadata, options = {}) {
4908
+ // Validate noun type
4909
+ const validatedType = validateNounType(nounType);
4910
+ // Enrich metadata with validated type
4911
+ let enrichedMetadata = {
4912
+ ...metadata,
4913
+ noun: validatedType
4914
+ };
4915
+ await this.ensureInitialized();
4916
+ // Check if database is in read-only mode
4917
+ this.checkReadOnly();
4918
+ // Validate input is not null or undefined
4919
+ if (vectorOrData === null || vectorOrData === undefined) {
4920
+ throw new Error('Input cannot be null or undefined');
4921
+ }
4922
+ try {
4923
+ let vector;
4924
+ if (Array.isArray(vectorOrData)) {
4925
+ for (let i = 0; i < vectorOrData.length; i++) {
4926
+ if (typeof vectorOrData[i] !== 'number') {
4927
+ throw new Error('Vector contains non-numeric values');
4928
+ }
4929
+ }
4930
+ }
4931
+ // Check if input is already a vector
4932
+ if (Array.isArray(vectorOrData) && !options.forceEmbed) {
4933
+ // Input is already a vector (and we've validated it contains only numbers)
4934
+ vector = vectorOrData;
4935
+ }
4936
+ else {
4937
+ // Input needs to be vectorized
4938
+ try {
4939
+ // Check if input is a JSON object and process it specially
4940
+ if (typeof vectorOrData === 'object' &&
4941
+ vectorOrData !== null &&
4942
+ !Array.isArray(vectorOrData)) {
4943
+ // Process JSON object for better vectorization
4944
+ const preparedText = prepareJsonForVectorization(vectorOrData, {
4945
+ // Prioritize common name/title fields if they exist
4946
+ priorityFields: [
4947
+ 'name',
4948
+ 'title',
4949
+ 'company',
4950
+ 'organization',
4951
+ 'description',
4952
+ 'summary'
4953
+ ]
4954
+ });
4955
+ vector = await this.embeddingFunction(preparedText);
4956
+ // IMPORTANT: When an object is passed as data and no metadata is provided,
4957
+ // use the object AS the metadata too. This is expected behavior for the API.
4958
+ // Users can pass either:
4959
+ // 1. addNoun(string, metadata) - vectorize string, store metadata
4960
+ // 2. addNoun(object) - vectorize object text, store object as metadata
4961
+ // 3. addNoun(object, metadata) - vectorize object text, store provided metadata
4962
+ if (!enrichedMetadata || Object.keys(enrichedMetadata).length === 1) { // Only has 'noun' key
4963
+ enrichedMetadata = { ...vectorOrData, noun: validatedType };
4964
+ }
4965
+ // Track field names for this JSON document
4966
+ const service = this.getServiceName(options);
4967
+ if (this.storage) {
4968
+ await this.storage.trackFieldNames(vectorOrData, service);
4969
+ }
4970
+ }
4971
+ else {
4972
+ // Use standard embedding for non-JSON data
4973
+ vector = await this.embeddingFunction(vectorOrData);
4974
+ }
4975
+ }
4976
+ catch (embedError) {
4977
+ throw new Error(`Failed to vectorize data: ${embedError}`);
4978
+ }
4979
+ }
4980
+ // Check if vector is defined
4981
+ if (!vector) {
4982
+ throw new Error('Vector is undefined or null');
4983
+ }
4984
+ // Validate vector dimensions
4985
+ if (vector.length !== this._dimensions) {
4986
+ throw new Error(`Vector dimension mismatch: expected ${this._dimensions}, got ${vector.length}`);
4987
+ }
4988
+ // Use ID from options if it exists, otherwise from metadata, otherwise generate a new UUID
4989
+ const id = options.id ||
4990
+ (enrichedMetadata && typeof enrichedMetadata === 'object' && 'id' in enrichedMetadata
4991
+ ? enrichedMetadata.id
4992
+ : uuidv4());
4993
+ // Check for existing noun (both write-only and normal modes)
4994
+ let existingNoun;
4995
+ if (options.id) {
4996
+ try {
4997
+ if (this.writeOnly) {
4998
+ // In write-only mode, check storage directly
4999
+ existingNoun =
5000
+ (await this.storage.getNoun(options.id)) ?? undefined;
5001
+ }
5002
+ else {
5003
+ // In normal mode, check index first, then storage
5004
+ existingNoun = this.index.getNouns().get(options.id);
5005
+ if (!existingNoun) {
5006
+ existingNoun =
5007
+ (await this.storage.getNoun(options.id)) ?? undefined;
5008
+ }
5009
+ }
5010
+ if (existingNoun) {
5011
+ // Check if existing noun is a placeholder
5012
+ const existingMetadata = await this.storage.getMetadata(options.id);
5013
+ const isPlaceholder = existingMetadata &&
5014
+ typeof existingMetadata === 'object' &&
5015
+ existingMetadata.isPlaceholder;
5016
+ if (isPlaceholder) {
5017
+ // Replace placeholder with real data
5018
+ if (this.loggingConfig?.verbose) {
5019
+ console.log(`Replacing placeholder noun ${options.id} with real data`);
5020
+ }
5021
+ }
5022
+ else {
5023
+ // Real noun already exists, update it
5024
+ if (this.loggingConfig?.verbose) {
5025
+ console.log(`Updating existing noun ${options.id}`);
5026
+ }
5027
+ }
5028
+ }
5029
+ }
5030
+ catch (storageError) {
5031
+ // Item doesn't exist, continue with add operation
5032
+ }
5033
+ }
5034
+ let noun;
5035
+ // In write-only mode, skip index operations since index is not loaded
5036
+ if (this.writeOnly) {
5037
+ // Create noun object directly without adding to index
5038
+ noun = {
5039
+ id,
5040
+ vector,
5041
+ connections: new Map(),
5042
+ level: 0, // Default level for new nodes
5043
+ metadata: undefined // Will be set separately
5044
+ };
5045
+ }
5046
+ else {
5047
+ // Normal mode: Add to HNSW index first
5048
+ await this.hnswIndex.addItem({ id, vector, metadata: enrichedMetadata });
5049
+ // Get the noun from the HNSW index
5050
+ const indexNoun = this.hnswIndex.getNouns().get(id);
5051
+ if (!indexNoun) {
5052
+ throw new Error(`Failed to retrieve newly created noun with ID ${id}`);
5053
+ }
5054
+ noun = indexNoun;
5055
+ }
5056
+ // Save noun to storage using augmentation system
5057
+ await this.augmentations.execute('saveNoun', { noun, options }, async () => {
5058
+ await this.storage.saveNoun(noun);
5059
+ const service = this.getServiceName(options);
5060
+ await this.storage.incrementStatistic('noun', service);
5061
+ });
5062
+ // Save metadata if provided and not empty
5063
+ if (enrichedMetadata !== undefined) {
5064
+ // Skip saving if metadata is an empty object
5065
+ if (enrichedMetadata &&
5066
+ typeof enrichedMetadata === 'object' &&
5067
+ Object.keys(enrichedMetadata).length === 0) {
5068
+ // Don't save empty metadata
5069
+ // Explicitly save null to ensure no metadata is stored
5070
+ await this.storage.saveMetadata(id, null);
5071
+ }
5072
+ else {
5073
+ // Validate noun type if metadata is for a GraphNoun
5074
+ if (enrichedMetadata && typeof enrichedMetadata === 'object' && 'noun' in enrichedMetadata) {
5075
+ const nounType = enrichedMetadata.noun;
5076
+ // Check if the noun type is valid
5077
+ const isValidNounType = Object.values(NounType).includes(nounType);
5078
+ if (!isValidNounType) {
5079
+ console.warn(`Invalid noun type: ${nounType}. Falling back to GraphNoun.`);
5080
+ enrichedMetadata.noun = NounType.Concept;
5081
+ }
5082
+ // Ensure createdBy field is populated for GraphNoun
5083
+ const service = options.service || this.getCurrentAugmentation();
5084
+ const graphNoun = enrichedMetadata;
5085
+ // Only set createdBy if it doesn't exist or is being explicitly updated
5086
+ if (!graphNoun.createdBy || options.service) {
5087
+ graphNoun.createdBy = getAugmentationVersion(service);
5088
+ }
5089
+ // Update timestamps
5090
+ const now = new Date();
5091
+ const timestamp = {
5092
+ seconds: Math.floor(now.getTime() / 1000),
5093
+ nanoseconds: (now.getTime() % 1000) * 1000000
5094
+ };
5095
+ // Set createdAt if it doesn't exist
5096
+ if (!graphNoun.createdAt) {
5097
+ graphNoun.createdAt = timestamp;
5098
+ }
5099
+ // Always update updatedAt
5100
+ graphNoun.updatedAt = timestamp;
5101
+ }
5102
+ // Create properly namespaced metadata for new items
5103
+ let metadataToSave = createNamespacedMetadata(enrichedMetadata);
5104
+ // Add domain metadata if distributed mode is enabled
5105
+ if (this.domainDetector) {
5106
+ // First check if domain is already in metadata
5107
+ if (metadataToSave.domain) {
5108
+ // Domain already specified, keep it
5109
+ const domainInfo = this.domainDetector.detectDomain(metadataToSave);
5110
+ if (domainInfo.domainMetadata) {
5111
+ ;
5112
+ metadataToSave.domainMetadata =
5113
+ domainInfo.domainMetadata;
5114
+ }
5115
+ }
5116
+ else {
5117
+ // Try to detect domain from the data
5118
+ const dataToAnalyze = Array.isArray(vectorOrData)
5119
+ ? enrichedMetadata
5120
+ : vectorOrData;
5121
+ const domainInfo = this.domainDetector.detectDomain(dataToAnalyze);
5122
+ if (domainInfo.domain) {
5123
+ ;
5124
+ metadataToSave.domain = domainInfo.domain;
5125
+ if (domainInfo.domainMetadata) {
5126
+ ;
5127
+ metadataToSave.domainMetadata =
5128
+ domainInfo.domainMetadata;
5129
+ }
5130
+ }
5131
+ }
5132
+ }
5133
+ // Add partition information if distributed mode is enabled
5134
+ if (this.partitioner) {
5135
+ const partition = this.partitioner.getPartition(id);
5136
+ metadataToSave.partition = partition;
5137
+ }
5138
+ await this.storage.saveMetadata(id, metadataToSave);
5139
+ // Update metadata index (write-only mode should build indices!)
5140
+ if (this.index && !this.frozen) {
5141
+ await this.metadataIndex?.addToIndex?.(id, metadataToSave);
5142
+ }
5143
+ // Track metadata statistics
5144
+ const metadataService = this.getServiceName(options);
5145
+ await this.storage.incrementStatistic('metadata', metadataService);
5146
+ // Content type tracking removed - metrics system not initialized
5147
+ // Track update timestamp (handled by metrics augmentation)
5148
+ }
5149
+ }
5150
+ // Update HNSW index size with actual index size
5151
+ const indexSize = this.index.size();
5152
+ await this.storage.updateHnswIndexSize(indexSize);
5153
+ // Update health metrics if in distributed mode
5154
+ if (this.monitoring) {
5155
+ const vectorCount = await this.getNounCount();
5156
+ this.monitoring.updateVectorCount(vectorCount);
5004
5157
  }
5005
- catch (error) {
5006
- console.warn('Error flushing metadata index during cleanup:', error);
5158
+ // If addToRemote is true and we're connected to a remote server, add to remote as well
5159
+ if (options.addToRemote && this.isConnectedToRemoteServer()) {
5160
+ try {
5161
+ await this.addToRemote(id, vector, enrichedMetadata);
5162
+ }
5163
+ catch (remoteError) {
5164
+ console.warn(`Failed to add to remote server: ${remoteError}. Continuing with local add.`);
5165
+ }
5007
5166
  }
5008
- }
5009
- // Clean up distributed mode resources
5010
- if (this.monitoring) {
5011
- this.monitoring.stop();
5012
- }
5013
- if (this.configManager) {
5014
- await this.configManager.cleanup();
5015
- }
5016
- // Clean up worker pools
5017
- await cleanupWorkerPools();
5018
- }
5019
- /**
5020
- * Load environment variables from Cortex configuration
5021
- * This enables services to automatically load all their configs from Brainy
5022
- * @returns Promise that resolves when environment is loaded
5023
- */
5024
- async loadEnvironment() {
5025
- // Cortex integration coming in next release
5026
- prodLog.debug('Cortex integration coming soon');
5027
- }
5028
- /**
5029
- * Set a configuration value with optional encryption
5030
- * @param key Configuration key
5031
- * @param value Configuration value
5032
- * @param options Options including encryption
5033
- */
5034
- async setConfig(key, value, options) {
5035
- // Use a predictable ID based on the config key
5036
- const configId = `config-${key}`;
5037
- // Store the config data in metadata (not as vectorized data)
5038
- const configValue = options?.encrypt ? await this.encryptData(JSON.stringify(value)) : value;
5039
- // Use simple text for vectorization
5040
- const searchableText = `Configuration setting for ${key}`;
5041
- await this.addNoun(searchableText, {
5042
- nounType: NounType.State,
5043
- configKey: key,
5044
- configValue: configValue,
5045
- encrypted: !!options?.encrypt,
5046
- timestamp: new Date().toISOString()
5047
- });
5048
- }
5049
- /**
5050
- * Get a configuration value with automatic decryption
5051
- * @param key Configuration key
5052
- * @param options Options including decryption (auto-detected by default)
5053
- * @returns Configuration value or undefined
5054
- */
5055
- async getConfig(key, options) {
5056
- try {
5057
- // Use the predictable ID to get the config directly
5058
- const configId = `config-${key}`;
5059
- const storedNoun = await this.getNoun(configId);
5060
- if (!storedNoun)
5061
- return undefined;
5062
- // The config data is now stored in metadata
5063
- const value = storedNoun.metadata?.configValue;
5064
- const encrypted = storedNoun.metadata?.encrypted;
5065
- // BEST OF BOTH: Respect explicit decrypt option OR auto-decrypt if encrypted
5066
- const shouldDecrypt = options?.decrypt !== undefined ? options.decrypt : encrypted;
5067
- if (shouldDecrypt && encrypted && typeof value === 'string') {
5068
- const decrypted = await this.decryptData(value);
5069
- return JSON.parse(decrypted);
5167
+ // Invalidate search cache since data has changed
5168
+ this.cache?.invalidateOnDataChange('add');
5169
+ // Determine processing mode
5170
+ const processingMode = options.process || 'auto';
5171
+ let shouldProcessNeurally = false;
5172
+ if (processingMode === 'neural') {
5173
+ shouldProcessNeurally = true;
5070
5174
  }
5071
- return value;
5072
- }
5073
- catch (error) {
5074
- prodLog.debug('Config retrieval failed:', error);
5075
- return undefined;
5076
- }
5077
- }
5078
- /**
5079
- * Encrypt data using universal crypto utilities
5080
- */
5081
- async encryptData(data) {
5082
- const crypto = await import('./universal/crypto.js');
5083
- const key = crypto.randomBytes(32);
5084
- const iv = crypto.randomBytes(16);
5085
- const cipher = crypto.createCipheriv('aes-256-cbc', key, iv);
5086
- let encrypted = cipher.update(data, 'utf8', 'hex');
5087
- encrypted += cipher.final('hex');
5088
- // Store key and iv with encrypted data (in production, manage keys separately)
5089
- return JSON.stringify({
5090
- encrypted,
5091
- key: Array.from(key).map(b => b.toString(16).padStart(2, '0')).join(''),
5092
- iv: Array.from(iv).map(b => b.toString(16).padStart(2, '0')).join('')
5093
- });
5094
- }
5095
- /**
5096
- * Decrypt data using universal crypto utilities
5097
- */
5098
- async decryptData(encryptedData) {
5099
- const crypto = await import('./universal/crypto.js');
5100
- const { encrypted, key: keyHex, iv: ivHex } = JSON.parse(encryptedData);
5101
- const key = new Uint8Array(keyHex.match(/.{1,2}/g).map((byte) => parseInt(byte, 16)));
5102
- const iv = new Uint8Array(ivHex.match(/.{1,2}/g).map((byte) => parseInt(byte, 16)));
5103
- const decipher = crypto.createDecipheriv('aes-256-cbc', key, iv);
5104
- let decrypted = decipher.update(encrypted, 'hex', 'utf8');
5105
- decrypted += decipher.final('utf8');
5106
- return decrypted;
5107
- }
5108
- // ========================================
5109
- // UNIFIED API - Core Methods (7 total)
5110
- // ONE way to do everything! 🧠⚛️
5111
- //
5112
- // 1. add() - Smart data addition (auto/guided/explicit/literal)
5113
- // 2. search() - Triple-power search (vector + graph + facets)
5114
- // 3. import() - Neural import with semantic type detection
5115
- // 4. addNoun() - Explicit noun creation with NounType
5116
- // 5. addVerb() - Relationship creation between nouns
5117
- // 6. update() - Update noun data/metadata with index sync
5118
- // 7. delete() - Smart delete with soft delete default (enhanced original)
5119
- // ========================================
5120
- /**
5121
- * Neural Import - Smart bulk data import with semantic type detection
5122
- * Uses transformer embeddings to automatically detect and classify data types
5123
- * @param data Array of data items or single item to import
5124
- * @param options Import options including type hints and processing mode
5125
- * @returns Array of created IDs
5126
- */
5127
- async import(source, options) {
5128
- // Lazy-load import manager for zero overhead when not used
5129
- if (!this._importManager) {
5130
- const { ImportManager } = await import('./importManager.js');
5131
- this._importManager = new ImportManager(this);
5132
- await this._importManager.init();
5133
- }
5134
- // AUTO-DETECT: Is it a URL or file path?
5135
- if (typeof source === 'string') {
5136
- // URL detection
5137
- if (source.startsWith('http://') || source.startsWith('https://')) {
5138
- const result = await this._importManager.importUrl(source, options || {});
5139
- return result.nouns;
5175
+ else if (processingMode === 'auto') {
5176
+ // Auto-detect whether to use neural processing
5177
+ shouldProcessNeurally = this.shouldAutoProcessNeurally(vectorOrData, enrichedMetadata);
5140
5178
  }
5141
- // File path detection
5142
- try {
5143
- const { exists } = await import('./universal/fs.js');
5144
- if (await exists(source)) {
5145
- const result = await this._importManager.importFile(source, options || {});
5146
- return result.nouns;
5179
+ // 'literal' mode means no neural processing
5180
+ // 🧠 AI Processing (Neural Import) - Based on processing mode
5181
+ if (shouldProcessNeurally) {
5182
+ try {
5183
+ // Execute augmentation pipeline for data processing
5184
+ // Note: Augmentations will be called via this.augmentations.execute during the actual add operation
5185
+ // This replaces the legacy SENSE pipeline
5186
+ if (this.loggingConfig?.verbose) {
5187
+ console.log(`🧠 AI processing completed for data: ${id}`);
5188
+ }
5189
+ }
5190
+ catch (processingError) {
5191
+ // Don't fail the add operation if processing fails
5192
+ console.warn(`🧠 AI processing failed for ${id}:`, processingError);
5147
5193
  }
5148
5194
  }
5149
- catch { }
5195
+ return id;
5150
5196
  }
5151
- // Regular data import (objects, arrays, or raw text)
5152
- const result = await this._importManager.import(source, {
5153
- format: options?.format || 'auto',
5154
- batchSize: options?.batchSize || 50,
5155
- extractRelationships: options?.relationships !== false,
5156
- autoDetect: true, // Always intelligent
5157
- parallel: true // Always fast
5158
- });
5159
- if (result.errors.length > 0) {
5160
- prodLog.warn(`Import had ${result.errors.length} errors:`, result.errors[0]);
5197
+ catch (error) {
5198
+ console.error('Failed to add vector:', error);
5199
+ // Track error in health monitor
5200
+ if (this.monitoring) {
5201
+ this.monitoring.recordRequest(0, true);
5202
+ }
5203
+ throw new Error(`Failed to add vector: ${error}`);
5161
5204
  }
5162
- prodLog.info(`✨ Imported ${result.stats.imported} items, ${result.stats.relationships} relationships`);
5163
- return result.nouns;
5164
- }
5165
- /**
5166
- * Add Noun - Explicit noun creation with strongly-typed NounType
5167
- * For when you know exactly what type of noun you're creating
5168
- * @param data The noun data
5169
- * @param nounType The explicit noun type from NounType enum
5170
- * @param metadata Additional metadata
5171
- * @returns Created noun ID
5172
- */
5173
- /**
5174
- * Add a noun to the database
5175
- * Clean 2.0 API - primary method for adding data
5176
- *
5177
- * @param vectorOrData Vector array or data to embed
5178
- * @param metadata Metadata to store with the noun
5179
- * @returns The generated ID
5180
- */
5181
- async addNoun(vectorOrData, metadata) {
5182
- return await this.add(vectorOrData, metadata);
5183
5205
  }
5184
5206
  /**
5185
5207
  * Add Verb - Unified relationship creation between nouns
@@ -5514,10 +5536,10 @@ export class BrainyData {
5514
5536
  }
5515
5537
  };
5516
5538
  // Store coordination plan in _system directory
5517
- await this.addNoun({
5539
+ await this.addNoun('Cortex coordination plan', NounType.Process, {
5518
5540
  id: '_system/coordination',
5519
5541
  type: 'cortex_coordination',
5520
- metadata: coordinationPlan
5542
+ ...coordinationPlan
5521
5543
  });
5522
5544
  prodLog.info('📋 Storage migration coordination plan created');
5523
5545
  prodLog.info('All services will automatically detect and execute the migration');
@@ -5742,8 +5764,16 @@ export class BrainyData {
5742
5764
  */
5743
5765
  async deleteNouns(ids) {
5744
5766
  const results = [];
5745
- for (const id of ids) {
5746
- results.push(await this.deleteNoun(id));
5767
+ const chunkSize = 10; // Conservative chunk size for parallel processing
5768
+ // Process deletions in parallel chunks to improve performance
5769
+ for (let i = 0; i < ids.length; i += chunkSize) {
5770
+ const chunk = ids.slice(i, i + chunkSize);
5771
+ // Process chunk in parallel
5772
+ const chunkPromises = chunk.map(id => this.deleteNoun(id));
5773
+ // Wait for all in chunk to complete
5774
+ const chunkResults = await Promise.all(chunkPromises);
5775
+ // Maintain order by adding chunk results
5776
+ results.push(...chunkResults);
5747
5777
  }
5748
5778
  return results;
5749
5779
  }
@@ -5924,34 +5954,41 @@ export class BrainyData {
5924
5954
  get neural() {
5925
5955
  if (!this._neural) {
5926
5956
  // Create the unified Neural API instance
5927
- this._neural = new NeuralAPI(this);
5957
+ this._neural = new ImprovedNeuralAPI(this);
5928
5958
  }
5929
5959
  return this._neural;
5930
5960
  }
5931
5961
  /**
5932
5962
  * Simple similarity check (shorthand for neural.similar)
5933
5963
  */
5934
- async similar(a, b) {
5935
- return this.neural.similar(a, b);
5964
+ async similar(a, b, options) {
5965
+ const result = await this.neural.similar(a, b, options);
5966
+ // Always return simple number for main class shortcut
5967
+ return typeof result === 'object' ? result.score : result;
5936
5968
  }
5937
5969
  /**
5938
5970
  * Get semantic clusters (shorthand for neural.clusters)
5939
5971
  */
5940
- async clusters(options) {
5941
- return this.neural.clusters(options);
5972
+ async clusters(items, options) {
5973
+ // Support both (items, options) and (options) patterns
5974
+ if (typeof items === 'object' && !Array.isArray(items) && options === undefined) {
5975
+ // First argument is options object
5976
+ return this.neural.clusters(items);
5977
+ }
5978
+ // Standard (items, options) pattern
5979
+ if (options) {
5980
+ return this.neural.clusters({ ...options, items });
5981
+ }
5982
+ return this.neural.clusters(items);
5942
5983
  }
5943
5984
  /**
5944
5985
  * Get related items (shorthand for neural.neighbors)
5945
5986
  */
5946
- async related(id, limit) {
5947
- const result = await this.neural.neighbors(id, { limit });
5948
- return result.neighbors;
5949
- }
5950
- /**
5951
- * Get visualization data (shorthand for neural.visualize)
5952
- */
5953
- async visualize(options) {
5954
- return this.neural.visualize(options);
5987
+ async related(id, options) {
5988
+ const limit = typeof options === 'number' ? options : options?.limit;
5989
+ const fullOptions = typeof options === 'number' ? { limit } : options;
5990
+ const result = await this.neural.neighbors(id, fullOptions);
5991
+ return result.neighbors || [];
5955
5992
  }
5956
5993
  /**
5957
5994
  * 🚀 TRIPLE INTELLIGENCE SEARCH - Natural Language & Complex Queries