@soulcraft/brainy 0.31.0 → 0.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/unified.js CHANGED
@@ -4432,6 +4432,275 @@ async function getStatistics(instance, options = {}) {
4432
4432
  }
4433
4433
  }
4434
4434
 
4435
+ /**
4436
+ * Utility functions for processing JSON documents for vectorization and search
4437
+ */
4438
+ /**
4439
+ * Extracts text from a JSON object for vectorization
4440
+ * This function recursively processes the JSON object and extracts text from all fields
4441
+ * It can also prioritize specific fields if provided
4442
+ *
4443
+ * @param jsonObject The JSON object to extract text from
4444
+ * @param options Configuration options for text extraction
4445
+ * @returns A string containing the extracted text
4446
+ */
4447
+ function extractTextFromJson(jsonObject, options = {}) {
4448
+ // Set default options
4449
+ const { priorityFields = [], excludeFields = [], includeFieldNames = true, maxDepth = 5, currentDepth = 0, fieldPath = [] } = options;
4450
+ // If input is not an object or array, or we've reached max depth, return as string
4451
+ if (jsonObject === null ||
4452
+ jsonObject === undefined ||
4453
+ typeof jsonObject !== 'object' ||
4454
+ currentDepth >= maxDepth) {
4455
+ return String(jsonObject || '');
4456
+ }
4457
+ const extractedText = [];
4458
+ const priorityText = [];
4459
+ // Process arrays
4460
+ if (Array.isArray(jsonObject)) {
4461
+ for (let i = 0; i < jsonObject.length; i++) {
4462
+ const value = jsonObject[i];
4463
+ const newPath = [...fieldPath, i.toString()];
4464
+ // Recursively extract text from array items
4465
+ const itemText = extractTextFromJson(value, {
4466
+ priorityFields,
4467
+ excludeFields,
4468
+ includeFieldNames,
4469
+ maxDepth,
4470
+ currentDepth: currentDepth + 1,
4471
+ fieldPath: newPath
4472
+ });
4473
+ if (itemText) {
4474
+ extractedText.push(itemText);
4475
+ }
4476
+ }
4477
+ }
4478
+ // Process objects
4479
+ else {
4480
+ for (const [key, value] of Object.entries(jsonObject)) {
4481
+ // Skip excluded fields
4482
+ if (excludeFields.includes(key)) {
4483
+ continue;
4484
+ }
4485
+ const newPath = [...fieldPath, key];
4486
+ const fullPath = newPath.join('.');
4487
+ // Check if this is a priority field
4488
+ const isPriority = priorityFields.some(field => {
4489
+ // Exact match
4490
+ if (field === key)
4491
+ return true;
4492
+ // Path match
4493
+ if (field === fullPath)
4494
+ return true;
4495
+ // Wildcard match (e.g., "user.*" matches "user.name", "user.email", etc.)
4496
+ if (field.endsWith('.*') && fullPath.startsWith(field.slice(0, -2)))
4497
+ return true;
4498
+ return false;
4499
+ });
4500
+ // Get the field value as text
4501
+ let fieldText;
4502
+ if (typeof value === 'object' && value !== null) {
4503
+ // Recursively extract text from nested objects
4504
+ fieldText = extractTextFromJson(value, {
4505
+ priorityFields,
4506
+ excludeFields,
4507
+ includeFieldNames,
4508
+ maxDepth,
4509
+ currentDepth: currentDepth + 1,
4510
+ fieldPath: newPath
4511
+ });
4512
+ }
4513
+ else {
4514
+ fieldText = String(value || '');
4515
+ }
4516
+ // Add field name if requested
4517
+ if (includeFieldNames && fieldText) {
4518
+ fieldText = `${key}: ${fieldText}`;
4519
+ }
4520
+ // Add to appropriate collection
4521
+ if (fieldText) {
4522
+ if (isPriority) {
4523
+ priorityText.push(fieldText);
4524
+ }
4525
+ else {
4526
+ extractedText.push(fieldText);
4527
+ }
4528
+ }
4529
+ }
4530
+ }
4531
+ // Combine priority text (repeated for emphasis) and regular text
4532
+ return [...priorityText, ...priorityText, ...extractedText].join(' ');
4533
+ }
4534
+ /**
4535
+ * Prepares a JSON document for vectorization
4536
+ * This function extracts text from the JSON document and formats it for optimal vectorization
4537
+ *
4538
+ * @param jsonDocument The JSON document to prepare
4539
+ * @param options Configuration options for preparation
4540
+ * @returns A string ready for vectorization
4541
+ */
4542
+ function prepareJsonForVectorization(jsonDocument, options = {}) {
4543
+ // If input is a string, try to parse it as JSON
4544
+ let document = jsonDocument;
4545
+ if (typeof jsonDocument === 'string') {
4546
+ try {
4547
+ document = JSON.parse(jsonDocument);
4548
+ }
4549
+ catch (e) {
4550
+ // If parsing fails, treat it as a plain string
4551
+ return jsonDocument;
4552
+ }
4553
+ }
4554
+ // If not an object after parsing, return as is
4555
+ if (typeof document !== 'object' || document === null) {
4556
+ return String(document || '');
4557
+ }
4558
+ // Extract text from the document
4559
+ return extractTextFromJson(document, options);
4560
+ }
4561
+ /**
4562
+ * Extracts text from a specific field in a JSON document
4563
+ * This is useful for searching within specific fields
4564
+ *
4565
+ * @param jsonDocument The JSON document to extract from
4566
+ * @param fieldPath The path to the field (e.g., "user.name" or "addresses[0].city")
4567
+ * @returns The extracted text or empty string if field not found
4568
+ */
4569
+ function extractFieldFromJson(jsonDocument, fieldPath) {
4570
+ // If input is a string, try to parse it as JSON
4571
+ let document = jsonDocument;
4572
+ if (typeof jsonDocument === 'string') {
4573
+ try {
4574
+ document = JSON.parse(jsonDocument);
4575
+ }
4576
+ catch (e) {
4577
+ // If parsing fails, return empty string
4578
+ return '';
4579
+ }
4580
+ }
4581
+ // If not an object after parsing, return empty string
4582
+ if (typeof document !== 'object' || document === null) {
4583
+ return '';
4584
+ }
4585
+ // Parse the field path
4586
+ const parts = fieldPath.split('.');
4587
+ let current = document;
4588
+ // Navigate through the path
4589
+ for (const part of parts) {
4590
+ // Handle array indexing (e.g., "addresses[0]")
4591
+ const match = part.match(/^([^[]+)(?:\[(\d+)\])?$/);
4592
+ if (!match) {
4593
+ return '';
4594
+ }
4595
+ const [, key, indexStr] = match;
4596
+ // Move to the next level
4597
+ current = current[key];
4598
+ // If we have an array index, access that element
4599
+ if (indexStr !== undefined && Array.isArray(current)) {
4600
+ const index = parseInt(indexStr, 10);
4601
+ current = current[index];
4602
+ }
4603
+ // If we've reached a null or undefined value, return empty string
4604
+ if (current === null || current === undefined) {
4605
+ return '';
4606
+ }
4607
+ }
4608
+ // Convert the final value to string
4609
+ return typeof current === 'object'
4610
+ ? JSON.stringify(current)
4611
+ : String(current);
4612
+ }
4613
+
4614
+ /**
4615
+ * Utility functions for tracking and managing field names in JSON documents
4616
+ */
4617
+ /**
4618
+ * Extracts field names from a JSON document
4619
+ * @param jsonObject The JSON object to extract field names from
4620
+ * @param options Configuration options
4621
+ * @returns An array of field paths (e.g., "user.name", "addresses[0].city")
4622
+ */
4623
+ function extractFieldNamesFromJson(jsonObject, options = {}) {
4624
+ const { maxDepth = 5, currentDepth = 0, currentPath = '', fieldNames = new Set() } = options;
4625
+ if (jsonObject === null ||
4626
+ jsonObject === undefined ||
4627
+ typeof jsonObject !== 'object' ||
4628
+ currentDepth >= maxDepth) {
4629
+ return Array.from(fieldNames);
4630
+ }
4631
+ if (Array.isArray(jsonObject)) {
4632
+ // For arrays, we'll just check the first item to avoid explosion of paths
4633
+ if (jsonObject.length > 0) {
4634
+ const arrayPath = currentPath ? `${currentPath}[0]` : '[0]';
4635
+ extractFieldNamesFromJson(jsonObject[0], {
4636
+ maxDepth,
4637
+ currentDepth: currentDepth + 1,
4638
+ currentPath: arrayPath,
4639
+ fieldNames
4640
+ });
4641
+ }
4642
+ }
4643
+ else {
4644
+ // For objects, process each property
4645
+ for (const key of Object.keys(jsonObject)) {
4646
+ const value = jsonObject[key];
4647
+ const fieldPath = currentPath ? `${currentPath}.${key}` : key;
4648
+ // Add this field path
4649
+ fieldNames.add(fieldPath);
4650
+ // Recursively process nested objects
4651
+ if (typeof value === 'object' && value !== null) {
4652
+ extractFieldNamesFromJson(value, {
4653
+ maxDepth,
4654
+ currentDepth: currentDepth + 1,
4655
+ currentPath: fieldPath,
4656
+ fieldNames
4657
+ });
4658
+ }
4659
+ }
4660
+ }
4661
+ return Array.from(fieldNames);
4662
+ }
4663
+ /**
4664
+ * Maps field names to standard field names based on common patterns
4665
+ * @param fieldName The field name to map
4666
+ * @returns The standard field name if a match is found, or null if no match
4667
+ */
4668
+ function mapToStandardField(fieldName) {
4669
+ // Standard field mappings
4670
+ const standardMappings = {
4671
+ 'title': ['title', 'name', 'headline', 'subject'],
4672
+ 'description': ['description', 'summary', 'content', 'text', 'body'],
4673
+ 'author': ['author', 'creator', 'user', 'owner', 'by'],
4674
+ 'date': ['date', 'created', 'createdAt', 'timestamp', 'published'],
4675
+ 'url': ['url', 'link', 'href', 'source'],
4676
+ 'image': ['image', 'thumbnail', 'photo', 'picture'],
4677
+ 'tags': ['tags', 'categories', 'keywords', 'topics']
4678
+ };
4679
+ // Check for matches
4680
+ for (const [standardField, possibleMatches] of Object.entries(standardMappings)) {
4681
+ // Exact match
4682
+ if (possibleMatches.includes(fieldName)) {
4683
+ return standardField;
4684
+ }
4685
+ // Path match (e.g., "user.name" matches "name")
4686
+ const parts = fieldName.split('.');
4687
+ const lastPart = parts[parts.length - 1];
4688
+ if (possibleMatches.includes(lastPart)) {
4689
+ return standardField;
4690
+ }
4691
+ // Array match (e.g., "items[0].name" matches "name")
4692
+ if (fieldName.includes('[')) {
4693
+ for (const part of parts) {
4694
+ const cleanPart = part.split('[')[0];
4695
+ if (possibleMatches.includes(cleanPart)) {
4696
+ return standardField;
4697
+ }
4698
+ }
4699
+ }
4700
+ }
4701
+ return null;
4702
+ }
4703
+
4435
4704
  /**
4436
4705
  * HNSW (Hierarchical Navigable Small World) Index implementation
4437
4706
  * Based on the paper: "Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs"
@@ -5708,6 +5977,107 @@ class BaseStorageAdapter {
5708
5977
  // Call the protected flushStatistics method to immediately write to storage
5709
5978
  await this.flushStatistics();
5710
5979
  }
5980
+ /**
5981
+ * Track field names from a JSON document
5982
+ * @param jsonDocument The JSON document to extract field names from
5983
+ * @param service The service that inserted the data
5984
+ */
5985
+ async trackFieldNames(jsonDocument, service) {
5986
+ // Skip if not a JSON object
5987
+ if (typeof jsonDocument !== 'object' || jsonDocument === null || Array.isArray(jsonDocument)) {
5988
+ return;
5989
+ }
5990
+ // Get current statistics from cache or storage
5991
+ let statistics = this.statisticsCache;
5992
+ if (!statistics) {
5993
+ statistics = await this.getStatisticsData();
5994
+ if (!statistics) {
5995
+ statistics = this.createDefaultStatistics();
5996
+ }
5997
+ // Update the cache
5998
+ this.statisticsCache = {
5999
+ ...statistics,
6000
+ nounCount: { ...statistics.nounCount },
6001
+ verbCount: { ...statistics.verbCount },
6002
+ metadataCount: { ...statistics.metadataCount },
6003
+ fieldNames: { ...statistics.fieldNames },
6004
+ standardFieldMappings: { ...statistics.standardFieldMappings }
6005
+ };
6006
+ }
6007
+ // Ensure fieldNames exists
6008
+ if (!this.statisticsCache.fieldNames) {
6009
+ this.statisticsCache.fieldNames = {};
6010
+ }
6011
+ // Ensure standardFieldMappings exists
6012
+ if (!this.statisticsCache.standardFieldMappings) {
6013
+ this.statisticsCache.standardFieldMappings = {};
6014
+ }
6015
+ // Extract field names from the JSON document
6016
+ const fieldNames = extractFieldNamesFromJson(jsonDocument);
6017
+ // Initialize service entry if it doesn't exist
6018
+ if (!this.statisticsCache.fieldNames[service]) {
6019
+ this.statisticsCache.fieldNames[service] = [];
6020
+ }
6021
+ // Add new field names to the service's list
6022
+ for (const fieldName of fieldNames) {
6023
+ if (!this.statisticsCache.fieldNames[service].includes(fieldName)) {
6024
+ this.statisticsCache.fieldNames[service].push(fieldName);
6025
+ }
6026
+ // Map to standard field if possible
6027
+ const standardField = mapToStandardField(fieldName);
6028
+ if (standardField) {
6029
+ // Initialize standard field entry if it doesn't exist
6030
+ if (!this.statisticsCache.standardFieldMappings[standardField]) {
6031
+ this.statisticsCache.standardFieldMappings[standardField] = {};
6032
+ }
6033
+ // Initialize service entry if it doesn't exist
6034
+ if (!this.statisticsCache.standardFieldMappings[standardField][service]) {
6035
+ this.statisticsCache.standardFieldMappings[standardField][service] = [];
6036
+ }
6037
+ // Add field name to standard field mapping if not already there
6038
+ if (!this.statisticsCache.standardFieldMappings[standardField][service].includes(fieldName)) {
6039
+ this.statisticsCache.standardFieldMappings[standardField][service].push(fieldName);
6040
+ }
6041
+ }
6042
+ }
6043
+ // Update timestamp
6044
+ this.statisticsCache.lastUpdated = new Date().toISOString();
6045
+ // Schedule a batch update
6046
+ this.statisticsModified = true;
6047
+ this.scheduleBatchUpdate();
6048
+ }
6049
+ /**
6050
+ * Get available field names by service
6051
+ * @returns Record of field names by service
6052
+ */
6053
+ async getAvailableFieldNames() {
6054
+ // Get current statistics from cache or storage
6055
+ let statistics = this.statisticsCache;
6056
+ if (!statistics) {
6057
+ statistics = await this.getStatisticsData();
6058
+ if (!statistics) {
6059
+ return {};
6060
+ }
6061
+ }
6062
+ // Return field names by service
6063
+ return statistics.fieldNames || {};
6064
+ }
6065
+ /**
6066
+ * Get standard field mappings
6067
+ * @returns Record of standard field mappings
6068
+ */
6069
+ async getStandardFieldMappings() {
6070
+ // Get current statistics from cache or storage
6071
+ let statistics = this.statisticsCache;
6072
+ if (!statistics) {
6073
+ statistics = await this.getStatisticsData();
6074
+ if (!statistics) {
6075
+ return {};
6076
+ }
6077
+ }
6078
+ // Return standard field mappings
6079
+ return statistics.standardFieldMappings || {};
6080
+ }
5711
6081
  /**
5712
6082
  * Create default statistics data
5713
6083
  * @returns Default statistics data
@@ -5718,6 +6088,8 @@ class BaseStorageAdapter {
5718
6088
  verbCount: {},
5719
6089
  metadataCount: {},
5720
6090
  hnswIndexSize: 0,
6091
+ fieldNames: {},
6092
+ standardFieldMappings: {},
5721
6093
  lastUpdated: new Date().toISOString()
5722
6094
  };
5723
6095
  }
@@ -5766,9 +6138,12 @@ class BaseStorage extends BaseStorageAdapter {
5766
6138
  }
5767
6139
  /**
5768
6140
  * Get all nouns from storage
6141
+ * @deprecated This method is deprecated and will be removed in a future version.
6142
+ * It can cause memory issues with large datasets. Use getNouns() with pagination instead.
5769
6143
  */
5770
6144
  async getAllNouns() {
5771
6145
  await this.ensureInitialized();
6146
+ console.warn('WARNING: getAllNouns() is deprecated and will be removed in a future version. Use getNouns() with pagination instead.');
5772
6147
  return this.getAllNouns_internal();
5773
6148
  }
5774
6149
  /**
@@ -5803,9 +6178,12 @@ class BaseStorage extends BaseStorageAdapter {
5803
6178
  }
5804
6179
  /**
5805
6180
  * Get all verbs from storage
6181
+ * @deprecated This method is deprecated and will be removed in a future version.
6182
+ * It can cause memory issues with large datasets. Use getVerbs() with pagination instead.
5806
6183
  */
5807
6184
  async getAllVerbs() {
5808
6185
  await this.ensureInitialized();
6186
+ console.warn('WARNING: getAllVerbs() is deprecated and will be removed in a future version. Use getVerbs() with pagination instead.');
5809
6187
  return this.getAllVerbs_internal();
5810
6188
  }
5811
6189
  /**
@@ -5840,10 +6218,13 @@ class BaseStorage extends BaseStorageAdapter {
5840
6218
  const pagination = options?.pagination || {};
5841
6219
  const limit = pagination.limit || 100;
5842
6220
  const offset = pagination.offset || 0;
6221
+ const cursor = pagination.cursor;
5843
6222
  // Optimize for common filter cases to avoid loading all nouns
5844
6223
  if (options?.filter) {
5845
6224
  // If filtering by nounType only, use the optimized method
5846
- if (options.filter.nounType && !options.filter.service && !options.filter.metadata) {
6225
+ if (options.filter.nounType &&
6226
+ !options.filter.service &&
6227
+ !options.filter.metadata) {
5847
6228
  const nounType = Array.isArray(options.filter.nounType)
5848
6229
  ? options.filter.nounType[0]
5849
6230
  : options.filter.nounType;
@@ -5866,81 +6247,124 @@ class BaseStorage extends BaseStorageAdapter {
5866
6247
  };
5867
6248
  }
5868
6249
  }
5869
- // For more complex filtering or no filtering, we need to get all nouns
5870
- // but limit the number we load to avoid memory issues
5871
- const maxNouns = offset + limit + 1; // Get one extra to check if there are more
5872
- let allNouns = [];
6250
+ // For more complex filtering or no filtering, use a paginated approach
6251
+ // that avoids loading all nouns into memory at once
5873
6252
  try {
5874
- // Try to get only the nouns we need
5875
- allNouns = await this.getAllNouns_internal();
5876
- // If we have too many nouns, truncate the array to avoid memory issues
5877
- if (allNouns.length > maxNouns * 10) {
5878
- console.warn(`Large number of nouns (${allNouns.length}), truncating to ${maxNouns * 10} for filtering`);
5879
- allNouns = allNouns.slice(0, maxNouns * 10);
6253
+ // First, try to get a count of total nouns (if the adapter supports it)
6254
+ let totalCount = undefined;
6255
+ try {
6256
+ // This is an optional method that adapters may implement
6257
+ if (typeof this.countNouns === 'function') {
6258
+ totalCount = await this.countNouns(options?.filter);
6259
+ }
6260
+ }
6261
+ catch (countError) {
6262
+ // Ignore errors from count method, it's optional
6263
+ console.warn('Error getting noun count:', countError);
6264
+ }
6265
+ // Check if the adapter has a paginated method for getting nouns
6266
+ if (typeof this.getNounsWithPagination === 'function') {
6267
+ // Use the adapter's paginated method
6268
+ const result = await this.getNounsWithPagination({
6269
+ limit,
6270
+ cursor,
6271
+ filter: options?.filter
6272
+ });
6273
+ // Apply offset if needed (some adapters might not support offset)
6274
+ const items = result.items.slice(offset);
6275
+ return {
6276
+ items,
6277
+ totalCount: result.totalCount || totalCount,
6278
+ hasMore: result.hasMore,
6279
+ nextCursor: result.nextCursor
6280
+ };
6281
+ }
6282
+ // If the adapter doesn't have a paginated method, fall back to the old approach
6283
+ // but with a warning and a reasonable limit
6284
+ console.warn('Storage adapter does not support pagination, falling back to loading all nouns. This may cause performance issues with large datasets.');
6285
+ // Get nouns with a reasonable limit to avoid memory issues
6286
+ const maxNouns = Math.min(offset + limit + 100, 1000); // Reasonable limit
6287
+ let allNouns = [];
6288
+ try {
6289
+ // Try to get only the nouns we need
6290
+ allNouns = await this.getAllNouns_internal();
6291
+ // If we have too many nouns, truncate the array to avoid memory issues
6292
+ if (allNouns.length > maxNouns) {
6293
+ console.warn(`Large number of nouns (${allNouns.length}), truncating to ${maxNouns} for filtering`);
6294
+ allNouns = allNouns.slice(0, maxNouns);
6295
+ }
6296
+ }
6297
+ catch (error) {
6298
+ console.error('Error getting all nouns:', error);
6299
+ // Return empty result on error
6300
+ return {
6301
+ items: [],
6302
+ totalCount: 0,
6303
+ hasMore: false
6304
+ };
5880
6305
  }
6306
+ // Apply filtering if needed
6307
+ let filteredNouns = allNouns;
6308
+ if (options?.filter) {
6309
+ // Filter by noun type
6310
+ if (options.filter.nounType) {
6311
+ const nounTypes = Array.isArray(options.filter.nounType)
6312
+ ? options.filter.nounType
6313
+ : [options.filter.nounType];
6314
+ filteredNouns = filteredNouns.filter((noun) => {
6315
+ // HNSWNoun doesn't have a type property directly, check metadata
6316
+ const nounType = noun.metadata?.type;
6317
+ return typeof nounType === 'string' && nounTypes.includes(nounType);
6318
+ });
6319
+ }
6320
+ // Filter by service
6321
+ if (options.filter.service) {
6322
+ const services = Array.isArray(options.filter.service)
6323
+ ? options.filter.service
6324
+ : [options.filter.service];
6325
+ filteredNouns = filteredNouns.filter((noun) => {
6326
+ // HNSWNoun doesn't have a service property directly, check metadata
6327
+ const service = noun.metadata?.service;
6328
+ return typeof service === 'string' && services.includes(service);
6329
+ });
6330
+ }
6331
+ // Filter by metadata
6332
+ if (options.filter.metadata) {
6333
+ const metadataFilter = options.filter.metadata;
6334
+ filteredNouns = filteredNouns.filter((noun) => {
6335
+ if (!noun.metadata)
6336
+ return false;
6337
+ // Check if all metadata keys match
6338
+ return Object.entries(metadataFilter).every(([key, value]) => noun.metadata && noun.metadata[key] === value);
6339
+ });
6340
+ }
6341
+ }
6342
+ // Get total count before pagination
6343
+ totalCount = totalCount || filteredNouns.length;
6344
+ // Apply pagination
6345
+ const paginatedNouns = filteredNouns.slice(offset, offset + limit);
6346
+ const hasMore = offset + limit < filteredNouns.length || filteredNouns.length >= maxNouns;
6347
+ // Set next cursor if there are more items
6348
+ let nextCursor = undefined;
6349
+ if (hasMore && paginatedNouns.length > 0) {
6350
+ const lastItem = paginatedNouns[paginatedNouns.length - 1];
6351
+ nextCursor = lastItem.id;
6352
+ }
6353
+ return {
6354
+ items: paginatedNouns,
6355
+ totalCount,
6356
+ hasMore,
6357
+ nextCursor
6358
+ };
5881
6359
  }
5882
6360
  catch (error) {
5883
- console.error('Error getting all nouns:', error);
5884
- // Return empty result on error
6361
+ console.error('Error getting nouns with pagination:', error);
5885
6362
  return {
5886
6363
  items: [],
5887
6364
  totalCount: 0,
5888
6365
  hasMore: false
5889
6366
  };
5890
6367
  }
5891
- // Apply filtering if needed
5892
- let filteredNouns = allNouns;
5893
- if (options?.filter) {
5894
- // Filter by noun type
5895
- if (options.filter.nounType) {
5896
- const nounTypes = Array.isArray(options.filter.nounType)
5897
- ? options.filter.nounType
5898
- : [options.filter.nounType];
5899
- filteredNouns = filteredNouns.filter(noun => {
5900
- // HNSWNoun doesn't have a type property directly, check metadata
5901
- const nounType = noun.metadata?.type;
5902
- return typeof nounType === 'string' && nounTypes.includes(nounType);
5903
- });
5904
- }
5905
- // Filter by service
5906
- if (options.filter.service) {
5907
- const services = Array.isArray(options.filter.service)
5908
- ? options.filter.service
5909
- : [options.filter.service];
5910
- filteredNouns = filteredNouns.filter(noun => {
5911
- // HNSWNoun doesn't have a service property directly, check metadata
5912
- const service = noun.metadata?.service;
5913
- return typeof service === 'string' && services.includes(service);
5914
- });
5915
- }
5916
- // Filter by metadata
5917
- if (options.filter.metadata) {
5918
- const metadataFilter = options.filter.metadata;
5919
- filteredNouns = filteredNouns.filter(noun => {
5920
- if (!noun.metadata)
5921
- return false;
5922
- // Check if all metadata keys match
5923
- return Object.entries(metadataFilter).every(([key, value]) => noun.metadata && noun.metadata[key] === value);
5924
- });
5925
- }
5926
- }
5927
- // Get total count before pagination
5928
- const totalCount = filteredNouns.length;
5929
- // Apply pagination
5930
- const paginatedNouns = filteredNouns.slice(offset, offset + limit);
5931
- const hasMore = offset + limit < totalCount;
5932
- // Set next cursor if there are more items
5933
- let nextCursor = undefined;
5934
- if (hasMore && paginatedNouns.length > 0) {
5935
- const lastItem = paginatedNouns[paginatedNouns.length - 1];
5936
- nextCursor = lastItem.id;
5937
- }
5938
- return {
5939
- items: paginatedNouns,
5940
- totalCount,
5941
- hasMore,
5942
- nextCursor
5943
- };
5944
6368
  }
5945
6369
  /**
5946
6370
  * Get verbs with pagination and filtering
@@ -5953,11 +6377,14 @@ class BaseStorage extends BaseStorageAdapter {
5953
6377
  const pagination = options?.pagination || {};
5954
6378
  const limit = pagination.limit || 100;
5955
6379
  const offset = pagination.offset || 0;
6380
+ const cursor = pagination.cursor;
5956
6381
  // Optimize for common filter cases to avoid loading all verbs
5957
6382
  if (options?.filter) {
5958
6383
  // If filtering by sourceId only, use the optimized method
5959
- if (options.filter.sourceId && !options.filter.verbType &&
5960
- !options.filter.targetId && !options.filter.service &&
6384
+ if (options.filter.sourceId &&
6385
+ !options.filter.verbType &&
6386
+ !options.filter.targetId &&
6387
+ !options.filter.service &&
5961
6388
  !options.filter.metadata) {
5962
6389
  const sourceId = Array.isArray(options.filter.sourceId)
5963
6390
  ? options.filter.sourceId[0]
@@ -5981,8 +6408,10 @@ class BaseStorage extends BaseStorageAdapter {
5981
6408
  };
5982
6409
  }
5983
6410
  // If filtering by targetId only, use the optimized method
5984
- if (options.filter.targetId && !options.filter.verbType &&
5985
- !options.filter.sourceId && !options.filter.service &&
6411
+ if (options.filter.targetId &&
6412
+ !options.filter.verbType &&
6413
+ !options.filter.sourceId &&
6414
+ !options.filter.service &&
5986
6415
  !options.filter.metadata) {
5987
6416
  const targetId = Array.isArray(options.filter.targetId)
5988
6417
  ? options.filter.targetId[0]
@@ -6006,8 +6435,10 @@ class BaseStorage extends BaseStorageAdapter {
6006
6435
  };
6007
6436
  }
6008
6437
  // If filtering by verbType only, use the optimized method
6009
- if (options.filter.verbType && !options.filter.sourceId &&
6010
- !options.filter.targetId && !options.filter.service &&
6438
+ if (options.filter.verbType &&
6439
+ !options.filter.sourceId &&
6440
+ !options.filter.targetId &&
6441
+ !options.filter.service &&
6011
6442
  !options.filter.metadata) {
6012
6443
  const verbType = Array.isArray(options.filter.verbType)
6013
6444
  ? options.filter.verbType[0]
@@ -6031,91 +6462,134 @@ class BaseStorage extends BaseStorageAdapter {
6031
6462
  };
6032
6463
  }
6033
6464
  }
6034
- // For more complex filtering or no filtering, we need to get all verbs
6035
- // but limit the number we load to avoid memory issues
6036
- const maxVerbs = offset + limit + 1; // Get one extra to check if there are more
6037
- let allVerbs = [];
6465
+ // For more complex filtering or no filtering, use a paginated approach
6466
+ // that avoids loading all verbs into memory at once
6038
6467
  try {
6039
- // Try to get only the verbs we need
6040
- allVerbs = await this.getAllVerbs_internal();
6041
- // If we have too many verbs, truncate the array to avoid memory issues
6042
- if (allVerbs.length > maxVerbs * 10) {
6043
- console.warn(`Large number of verbs (${allVerbs.length}), truncating to ${maxVerbs * 10} for filtering`);
6044
- allVerbs = allVerbs.slice(0, maxVerbs * 10);
6468
+ // First, try to get a count of total verbs (if the adapter supports it)
6469
+ let totalCount = undefined;
6470
+ try {
6471
+ // This is an optional method that adapters may implement
6472
+ if (typeof this.countVerbs === 'function') {
6473
+ totalCount = await this.countVerbs(options?.filter);
6474
+ }
6045
6475
  }
6476
+ catch (countError) {
6477
+ // Ignore errors from count method, it's optional
6478
+ console.warn('Error getting verb count:', countError);
6479
+ }
6480
+ // Check if the adapter has a paginated method for getting verbs
6481
+ if (typeof this.getVerbsWithPagination === 'function') {
6482
+ // Use the adapter's paginated method
6483
+ const result = await this.getVerbsWithPagination({
6484
+ limit,
6485
+ cursor,
6486
+ filter: options?.filter
6487
+ });
6488
+ // Apply offset if needed (some adapters might not support offset)
6489
+ const items = result.items.slice(offset);
6490
+ return {
6491
+ items,
6492
+ totalCount: result.totalCount || totalCount,
6493
+ hasMore: result.hasMore,
6494
+ nextCursor: result.nextCursor
6495
+ };
6496
+ }
6497
+ // If the adapter doesn't have a paginated method, fall back to the old approach
6498
+ // but with a warning and a reasonable limit
6499
+ console.warn('Storage adapter does not support pagination, falling back to loading all verbs. This may cause performance issues with large datasets.');
6500
+ // Get verbs with a reasonable limit to avoid memory issues
6501
+ const maxVerbs = Math.min(offset + limit + 100, 1000); // Reasonable limit
6502
+ let allVerbs = [];
6503
+ try {
6504
+ // Try to get only the verbs we need
6505
+ allVerbs = await this.getAllVerbs_internal();
6506
+ // If we have too many verbs, truncate the array to avoid memory issues
6507
+ if (allVerbs.length > maxVerbs) {
6508
+ console.warn(`Large number of verbs (${allVerbs.length}), truncating to ${maxVerbs} for filtering`);
6509
+ allVerbs = allVerbs.slice(0, maxVerbs);
6510
+ }
6511
+ }
6512
+ catch (error) {
6513
+ console.error('Error getting all verbs:', error);
6514
+ // Return empty result on error
6515
+ return {
6516
+ items: [],
6517
+ totalCount: 0,
6518
+ hasMore: false
6519
+ };
6520
+ }
6521
+ // Apply filtering if needed
6522
+ let filteredVerbs = allVerbs;
6523
+ if (options?.filter) {
6524
+ // Filter by verb type
6525
+ if (options.filter.verbType) {
6526
+ const verbTypes = Array.isArray(options.filter.verbType)
6527
+ ? options.filter.verbType
6528
+ : [options.filter.verbType];
6529
+ filteredVerbs = filteredVerbs.filter((verb) => verb.type !== undefined && verbTypes.includes(verb.type));
6530
+ }
6531
+ // Filter by source ID
6532
+ if (options.filter.sourceId) {
6533
+ const sourceIds = Array.isArray(options.filter.sourceId)
6534
+ ? options.filter.sourceId
6535
+ : [options.filter.sourceId];
6536
+ filteredVerbs = filteredVerbs.filter((verb) => verb.sourceId !== undefined && sourceIds.includes(verb.sourceId));
6537
+ }
6538
+ // Filter by target ID
6539
+ if (options.filter.targetId) {
6540
+ const targetIds = Array.isArray(options.filter.targetId)
6541
+ ? options.filter.targetId
6542
+ : [options.filter.targetId];
6543
+ filteredVerbs = filteredVerbs.filter((verb) => verb.targetId !== undefined && targetIds.includes(verb.targetId));
6544
+ }
6545
+ // Filter by service
6546
+ if (options.filter.service) {
6547
+ const services = Array.isArray(options.filter.service)
6548
+ ? options.filter.service
6549
+ : [options.filter.service];
6550
+ filteredVerbs = filteredVerbs.filter((verb) => {
6551
+ // GraphVerb doesn't have a service property directly, check metadata
6552
+ const service = verb.metadata?.service;
6553
+ return typeof service === 'string' && services.includes(service);
6554
+ });
6555
+ }
6556
+ // Filter by metadata
6557
+ if (options.filter.metadata) {
6558
+ const metadataFilter = options.filter.metadata;
6559
+ filteredVerbs = filteredVerbs.filter((verb) => {
6560
+ if (!verb.metadata)
6561
+ return false;
6562
+ // Check if all metadata keys match
6563
+ return Object.entries(metadataFilter).every(([key, value]) => verb.metadata && verb.metadata[key] === value);
6564
+ });
6565
+ }
6566
+ }
6567
+ // Get total count before pagination
6568
+ totalCount = totalCount || filteredVerbs.length;
6569
+ // Apply pagination
6570
+ const paginatedVerbs = filteredVerbs.slice(offset, offset + limit);
6571
+ const hasMore = offset + limit < filteredVerbs.length || filteredVerbs.length >= maxVerbs;
6572
+ // Set next cursor if there are more items
6573
+ let nextCursor = undefined;
6574
+ if (hasMore && paginatedVerbs.length > 0) {
6575
+ const lastItem = paginatedVerbs[paginatedVerbs.length - 1];
6576
+ nextCursor = lastItem.id;
6577
+ }
6578
+ return {
6579
+ items: paginatedVerbs,
6580
+ totalCount,
6581
+ hasMore,
6582
+ nextCursor
6583
+ };
6046
6584
  }
6047
6585
  catch (error) {
6048
- console.error('Error getting all verbs:', error);
6049
- // Return empty result on error
6586
+ console.error('Error getting verbs with pagination:', error);
6050
6587
  return {
6051
6588
  items: [],
6052
6589
  totalCount: 0,
6053
6590
  hasMore: false
6054
6591
  };
6055
6592
  }
6056
- // Apply filtering if needed
6057
- let filteredVerbs = allVerbs;
6058
- if (options?.filter) {
6059
- // Filter by verb type
6060
- if (options.filter.verbType) {
6061
- const verbTypes = Array.isArray(options.filter.verbType)
6062
- ? options.filter.verbType
6063
- : [options.filter.verbType];
6064
- filteredVerbs = filteredVerbs.filter(verb => verb.type !== undefined && verbTypes.includes(verb.type));
6065
- }
6066
- // Filter by source ID
6067
- if (options.filter.sourceId) {
6068
- const sourceIds = Array.isArray(options.filter.sourceId)
6069
- ? options.filter.sourceId
6070
- : [options.filter.sourceId];
6071
- filteredVerbs = filteredVerbs.filter(verb => verb.sourceId !== undefined && sourceIds.includes(verb.sourceId));
6072
- }
6073
- // Filter by target ID
6074
- if (options.filter.targetId) {
6075
- const targetIds = Array.isArray(options.filter.targetId)
6076
- ? options.filter.targetId
6077
- : [options.filter.targetId];
6078
- filteredVerbs = filteredVerbs.filter(verb => verb.targetId !== undefined && targetIds.includes(verb.targetId));
6079
- }
6080
- // Filter by service
6081
- if (options.filter.service) {
6082
- const services = Array.isArray(options.filter.service)
6083
- ? options.filter.service
6084
- : [options.filter.service];
6085
- filteredVerbs = filteredVerbs.filter(verb => {
6086
- // GraphVerb doesn't have a service property directly, check metadata
6087
- const service = verb.metadata?.service;
6088
- return typeof service === 'string' && services.includes(service);
6089
- });
6090
- }
6091
- // Filter by metadata
6092
- if (options.filter.metadata) {
6093
- const metadataFilter = options.filter.metadata;
6094
- filteredVerbs = filteredVerbs.filter(verb => {
6095
- if (!verb.metadata)
6096
- return false;
6097
- // Check if all metadata keys match
6098
- return Object.entries(metadataFilter).every(([key, value]) => verb.metadata && verb.metadata[key] === value);
6099
- });
6100
- }
6101
- }
6102
- // Get total count before pagination
6103
- const totalCount = filteredVerbs.length;
6104
- // Apply pagination
6105
- const paginatedVerbs = filteredVerbs.slice(offset, offset + limit);
6106
- const hasMore = offset + limit < totalCount;
6107
- // Set next cursor if there are more items
6108
- let nextCursor = undefined;
6109
- if (hasMore && paginatedVerbs.length > 0) {
6110
- const lastItem = paginatedVerbs[paginatedVerbs.length - 1];
6111
- nextCursor = lastItem.id;
6112
- }
6113
- return {
6114
- items: paginatedVerbs,
6115
- totalCount,
6116
- hasMore,
6117
- nextCursor
6118
- };
6119
6593
  }
6120
6594
  /**
6121
6595
  * Delete a verb from storage
@@ -7924,10 +8398,1055 @@ class StorageOperationExecutors {
7924
8398
  return this.addExecutor(operation, operationName);
7925
8399
  }
7926
8400
  /**
7927
- * Execute a delete operation with timeout and retry
8401
+ * Execute a delete operation with timeout and retry
8402
+ */
8403
+ async executeDelete(operation, operationName) {
8404
+ return this.deleteExecutor(operation, operationName);
8405
+ }
8406
+ }
8407
+
8408
+ /**
8409
+ * Multi-level Cache Manager
8410
+ *
8411
+ * Implements a three-level caching strategy:
8412
+ * - Level 1: Hot cache (most accessed nodes) - RAM (automatically detecting and adjusting in each environment)
8413
+ * - Level 2: Warm cache (recent nodes) - OPFS, Filesystem or S3 depending on environment
8414
+ * - Level 3: Cold storage (all nodes) - OPFS, Filesystem or S3 depending on environment
8415
+ */
8416
+ // Environment detection for storage selection
8417
+ var Environment$1;
8418
+ (function (Environment) {
8419
+ Environment[Environment["BROWSER"] = 0] = "BROWSER";
8420
+ Environment[Environment["NODE"] = 1] = "NODE";
8421
+ Environment[Environment["WORKER"] = 2] = "WORKER";
8422
+ })(Environment$1 || (Environment$1 = {}));
8423
+ // Storage type for warm and cold caches
8424
+ var StorageType;
8425
+ (function (StorageType) {
8426
+ StorageType[StorageType["MEMORY"] = 0] = "MEMORY";
8427
+ StorageType[StorageType["OPFS"] = 1] = "OPFS";
8428
+ StorageType[StorageType["FILESYSTEM"] = 2] = "FILESYSTEM";
8429
+ StorageType[StorageType["S3"] = 3] = "S3";
8430
+ StorageType[StorageType["REMOTE_API"] = 4] = "REMOTE_API";
8431
+ })(StorageType || (StorageType = {}));
8432
+ /**
8433
+ * Multi-level cache manager for efficient data access
8434
+ */
8435
+ class CacheManager {
8436
+ /**
8437
+ * Initialize the cache manager
8438
+ * @param options Configuration options
8439
+ */
8440
+ constructor(options = {}) {
8441
+ // Hot cache (RAM)
8442
+ this.hotCache = new Map();
8443
+ // Cache statistics
8444
+ this.stats = {
8445
+ hits: 0,
8446
+ misses: 0,
8447
+ evictions: 0,
8448
+ size: 0,
8449
+ maxSize: 0
8450
+ };
8451
+ this.lastAutoTuneTime = 0;
8452
+ this.autoTuneInterval = 5 * 60 * 1000; // 5 minutes
8453
+ this.storageStatistics = null;
8454
+ // Store options for later reference
8455
+ this.options = options;
8456
+ // Detect environment
8457
+ this.environment = this.detectEnvironment();
8458
+ // Set storage types based on environment
8459
+ this.warmStorageType = this.detectWarmStorageType();
8460
+ this.coldStorageType = this.detectColdStorageType();
8461
+ // Initialize storage adapters
8462
+ this.warmStorage = options.warmStorage || this.initializeWarmStorage();
8463
+ this.coldStorage = options.coldStorage || this.initializeColdStorage();
8464
+ // Set auto-tuning flag
8465
+ this.autoTune = options.autoTune !== undefined ? options.autoTune : true;
8466
+ // Set default values or use provided values
8467
+ this.hotCacheMaxSize = options.hotCacheMaxSize || this.detectOptimalCacheSize();
8468
+ this.hotCacheEvictionThreshold = options.hotCacheEvictionThreshold || 0.8;
8469
+ this.warmCacheTTL = options.warmCacheTTL || 24 * 60 * 60 * 1000; // 24 hours
8470
+ this.batchSize = options.batchSize || 10;
8471
+ // If auto-tuning is enabled, perform initial tuning
8472
+ if (this.autoTune) {
8473
+ this.tuneParameters();
8474
+ }
8475
+ // Log configuration
8476
+ if (process.env.DEBUG) {
8477
+ console.log('Cache Manager initialized with configuration:', {
8478
+ environment: Environment$1[this.environment],
8479
+ hotCacheMaxSize: this.hotCacheMaxSize,
8480
+ hotCacheEvictionThreshold: this.hotCacheEvictionThreshold,
8481
+ warmCacheTTL: this.warmCacheTTL,
8482
+ batchSize: this.batchSize,
8483
+ autoTune: this.autoTune,
8484
+ warmStorageType: StorageType[this.warmStorageType],
8485
+ coldStorageType: StorageType[this.coldStorageType]
8486
+ });
8487
+ }
8488
+ }
8489
+ /**
8490
+ * Detect the current environment
8491
+ */
8492
+ detectEnvironment() {
8493
+ if (typeof window !== 'undefined' && typeof document !== 'undefined') {
8494
+ return Environment$1.BROWSER;
8495
+ }
8496
+ else if (typeof self !== 'undefined' && typeof window === 'undefined') {
8497
+ // In a worker environment, self is defined but window is not
8498
+ return Environment$1.WORKER;
8499
+ }
8500
+ else {
8501
+ return Environment$1.NODE;
8502
+ }
8503
+ }
8504
+ /**
8505
+ * Detect the optimal cache size based on available memory and operating mode
8506
+ *
8507
+ * Enhanced to better handle large datasets in S3 or other storage:
8508
+ * - Increases cache size for read-only mode
8509
+ * - Adjusts based on total dataset size when available
8510
+ * - Provides more aggressive caching for large datasets
8511
+ * - Optimizes memory usage based on environment
8512
+ */
8513
+ detectOptimalCacheSize() {
8514
+ try {
8515
+ // Default to a conservative value
8516
+ const defaultSize = 1000;
8517
+ // Get the total dataset size if available
8518
+ const totalItems = this.storageStatistics ?
8519
+ (this.storageStatistics.totalNodes || 0) + (this.storageStatistics.totalEdges || 0) : 0;
8520
+ // Determine if we're dealing with a large dataset (>100K items)
8521
+ const isLargeDataset = totalItems > 100000;
8522
+ // Check if we're in read-only mode (from parent BrainyData instance)
8523
+ const isReadOnly = this.options?.readOnly || false;
8524
+ // In Node.js, use available system memory with enhanced allocation
8525
+ if (this.environment === Environment$1.NODE) {
8526
+ try {
8527
+ // Use dynamic import to avoid ESLint warning
8528
+ const getOS = () => {
8529
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
8530
+ return require('os');
8531
+ };
8532
+ const os = getOS();
8533
+ const totalMemory = os.totalmem();
8534
+ const freeMemory = os.freemem();
8535
+ // Estimate average entry size (in bytes)
8536
+ // This is a conservative estimate for complex objects with vectors
8537
+ const ESTIMATED_BYTES_PER_ENTRY = 1024; // 1KB per entry
8538
+ // Base memory percentage - 10% by default
8539
+ let memoryPercentage = 0.1;
8540
+ // Adjust based on operating mode and dataset size
8541
+ if (isReadOnly) {
8542
+ // In read-only mode, we can use more memory for caching
8543
+ memoryPercentage = 0.25; // 25% of free memory
8544
+ // For large datasets in read-only mode, be even more aggressive
8545
+ if (isLargeDataset) {
8546
+ memoryPercentage = 0.4; // 40% of free memory
8547
+ }
8548
+ }
8549
+ else if (isLargeDataset) {
8550
+ // For large datasets in normal mode, increase slightly
8551
+ memoryPercentage = 0.15; // 15% of free memory
8552
+ }
8553
+ // Calculate optimal size based on adjusted percentage
8554
+ const optimalSize = Math.max(Math.floor(freeMemory * memoryPercentage / ESTIMATED_BYTES_PER_ENTRY), 1000);
8555
+ // If we know the total dataset size, cap at a reasonable percentage
8556
+ if (totalItems > 0) {
8557
+ // In read-only mode, we can cache a larger percentage
8558
+ const maxPercentage = isReadOnly ? 0.5 : 0.3;
8559
+ const maxItems = Math.ceil(totalItems * maxPercentage);
8560
+ // Return the smaller of the two to avoid excessive memory usage
8561
+ return Math.min(optimalSize, maxItems);
8562
+ }
8563
+ return optimalSize;
8564
+ }
8565
+ catch (error) {
8566
+ console.warn('Failed to detect optimal cache size:', error);
8567
+ return defaultSize;
8568
+ }
8569
+ }
8570
+ // In browser, use navigator.deviceMemory with enhanced allocation
8571
+ if (this.environment === Environment$1.BROWSER && navigator.deviceMemory) {
8572
+ // Base entries per GB
8573
+ let entriesPerGB = 500;
8574
+ // Adjust based on operating mode and dataset size
8575
+ if (isReadOnly) {
8576
+ entriesPerGB = 800; // More aggressive caching in read-only mode
8577
+ if (isLargeDataset) {
8578
+ entriesPerGB = 1000; // Even more aggressive for large datasets
8579
+ }
8580
+ }
8581
+ else if (isLargeDataset) {
8582
+ entriesPerGB = 600; // Slightly more aggressive for large datasets
8583
+ }
8584
+ // Calculate based on device memory
8585
+ const browserCacheSize = Math.max(navigator.deviceMemory * entriesPerGB, 1000);
8586
+ // If we know the total dataset size, cap at a reasonable percentage
8587
+ if (totalItems > 0) {
8588
+ // In read-only mode, we can cache a larger percentage
8589
+ const maxPercentage = isReadOnly ? 0.4 : 0.25;
8590
+ const maxItems = Math.ceil(totalItems * maxPercentage);
8591
+ // Return the smaller of the two to avoid excessive memory usage
8592
+ return Math.min(browserCacheSize, maxItems);
8593
+ }
8594
+ return browserCacheSize;
8595
+ }
8596
+ // For worker environments or when memory detection fails
8597
+ if (this.environment === Environment$1.WORKER) {
8598
+ // Workers typically have limited memory, be conservative
8599
+ return isReadOnly ? 2000 : 1000;
8600
+ }
8601
+ return defaultSize;
8602
+ }
8603
+ catch (error) {
8604
+ console.warn('Error detecting optimal cache size:', error);
8605
+ return 1000; // Conservative default
8606
+ }
8607
+ }
8608
+ /**
8609
+ * Tune cache parameters based on statistics and environment
8610
+ * This method is called periodically if auto-tuning is enabled
8611
+ *
8612
+ * The auto-tuning process:
8613
+ * 1. Retrieves storage statistics if available
8614
+ * 2. Tunes each parameter based on statistics and environment
8615
+ * 3. Logs the tuned parameters if debug is enabled
8616
+ *
8617
+ * Auto-tuning helps optimize cache performance by adapting to:
8618
+ * - The current environment (Node.js, browser, worker)
8619
+ * - Available system resources (memory, CPU)
8620
+ * - Usage patterns (read-heavy vs. write-heavy workloads)
8621
+ * - Cache efficiency (hit/miss ratios)
8622
+ */
8623
+ async tuneParameters() {
8624
+ // Skip if auto-tuning is disabled
8625
+ if (!this.autoTune)
8626
+ return;
8627
+ // Check if it's time to tune parameters
8628
+ const now = Date.now();
8629
+ if (now - this.lastAutoTuneTime < this.autoTuneInterval)
8630
+ return;
8631
+ // Update last tune time
8632
+ this.lastAutoTuneTime = now;
8633
+ try {
8634
+ // Get storage statistics if available
8635
+ if (this.coldStorage && typeof this.coldStorage.getStatistics === 'function') {
8636
+ this.storageStatistics = await this.coldStorage.getStatistics();
8637
+ }
8638
+ // Tune hot cache size
8639
+ this.tuneHotCacheSize();
8640
+ // Tune eviction threshold
8641
+ this.tuneEvictionThreshold();
8642
+ // Tune warm cache TTL
8643
+ this.tuneWarmCacheTTL();
8644
+ // Tune batch size
8645
+ this.tuneBatchSize();
8646
+ // Log tuned parameters if debug is enabled
8647
+ if (process.env.DEBUG) {
8648
+ console.log('Cache parameters auto-tuned:', {
8649
+ hotCacheMaxSize: this.hotCacheMaxSize,
8650
+ hotCacheEvictionThreshold: this.hotCacheEvictionThreshold,
8651
+ warmCacheTTL: this.warmCacheTTL,
8652
+ batchSize: this.batchSize
8653
+ });
8654
+ }
8655
+ }
8656
+ catch (error) {
8657
+ console.warn('Error during cache parameter auto-tuning:', error);
8658
+ }
8659
+ }
8660
+ /**
8661
+ * Tune hot cache size based on statistics, environment, and operating mode
8662
+ *
8663
+ * The hot cache size is tuned based on:
8664
+ * 1. Available memory in the current environment
8665
+ * 2. Total number of nodes and edges in the system
8666
+ * 3. Cache hit/miss ratio
8667
+ * 4. Operating mode (read-only vs. read-write)
8668
+ * 5. Storage type (S3, filesystem, memory)
8669
+ *
8670
+ * Enhanced algorithm:
8671
+ * - Start with a size based on available memory and operating mode
8672
+ * - For large datasets in S3 or other remote storage, use more aggressive caching
8673
+ * - Adjust based on access patterns (read-heavy vs. write-heavy)
8674
+ * - For read-only mode, prioritize cache size over eviction speed
8675
+ * - Dynamically adjust based on hit/miss ratio and query patterns
8676
+ */
8677
+ tuneHotCacheSize() {
8678
+ // Start with the base size from environment detection
8679
+ let optimalSize = this.detectOptimalCacheSize();
8680
+ // Check if we're in read-only mode
8681
+ const isReadOnly = this.options?.readOnly || false;
8682
+ // Check if we're using S3 or other remote storage
8683
+ const isRemoteStorage = this.coldStorageType === StorageType.S3 ||
8684
+ this.coldStorageType === StorageType.REMOTE_API;
8685
+ // If we have storage statistics, adjust based on total nodes/edges
8686
+ if (this.storageStatistics) {
8687
+ const totalItems = (this.storageStatistics.totalNodes || 0) +
8688
+ (this.storageStatistics.totalEdges || 0);
8689
+ // If total items is significant, adjust cache size
8690
+ if (totalItems > 0) {
8691
+ // Base percentage to cache - adjusted based on mode and storage
8692
+ let percentageToCache = 0.2; // Cache 20% of items by default
8693
+ // For read-only mode, increase cache percentage
8694
+ if (isReadOnly) {
8695
+ percentageToCache = 0.3; // 30% for read-only mode
8696
+ // For remote storage in read-only mode, be even more aggressive
8697
+ if (isRemoteStorage) {
8698
+ percentageToCache = 0.4; // 40% for remote storage in read-only mode
8699
+ }
8700
+ }
8701
+ // For remote storage in normal mode, increase slightly
8702
+ else if (isRemoteStorage) {
8703
+ percentageToCache = 0.25; // 25% for remote storage
8704
+ }
8705
+ // For large datasets, cap the percentage to avoid excessive memory usage
8706
+ if (totalItems > 1000000) { // Over 1 million items
8707
+ percentageToCache = Math.min(percentageToCache, 0.15);
8708
+ }
8709
+ else if (totalItems > 100000) { // Over 100K items
8710
+ percentageToCache = Math.min(percentageToCache, 0.25);
8711
+ }
8712
+ const statisticsBasedSize = Math.ceil(totalItems * percentageToCache);
8713
+ // Use the smaller of the two to avoid memory issues
8714
+ optimalSize = Math.min(optimalSize, statisticsBasedSize);
8715
+ }
8716
+ }
8717
+ // Adjust based on hit/miss ratio if we have enough data
8718
+ const totalAccesses = this.stats.hits + this.stats.misses;
8719
+ if (totalAccesses > 100) {
8720
+ const hitRatio = this.stats.hits / totalAccesses;
8721
+ // Base adjustment factor
8722
+ let hitRatioFactor = 1.0;
8723
+ // If hit ratio is low, we might need a larger cache
8724
+ if (hitRatio < 0.5) {
8725
+ // Calculate adjustment factor based on hit ratio
8726
+ const baseAdjustment = 0.5 - hitRatio;
8727
+ // For read-only mode or remote storage, be more aggressive
8728
+ if (isReadOnly || isRemoteStorage) {
8729
+ hitRatioFactor = 1 + (baseAdjustment * 1.5); // Up to 75% increase
8730
+ }
8731
+ else {
8732
+ hitRatioFactor = 1 + baseAdjustment; // Up to 50% increase
8733
+ }
8734
+ optimalSize = Math.ceil(optimalSize * hitRatioFactor);
8735
+ }
8736
+ // If hit ratio is very high, we might be able to reduce cache size slightly
8737
+ else if (hitRatio > 0.9 && !isReadOnly && !isRemoteStorage) {
8738
+ // Only reduce cache size in normal mode with local storage
8739
+ // and only if hit ratio is very high
8740
+ hitRatioFactor = 0.9; // 10% reduction
8741
+ optimalSize = Math.ceil(optimalSize * hitRatioFactor);
8742
+ }
8743
+ }
8744
+ // Check for operation patterns if available
8745
+ if (this.storageStatistics?.operations) {
8746
+ const ops = this.storageStatistics.operations;
8747
+ const totalOps = ops.total || 1;
8748
+ // Calculate read/write ratio
8749
+ const readOps = (ops.search || 0) + (ops.get || 0);
8750
+ (ops.add || 0) + (ops.update || 0) + (ops.delete || 0);
8751
+ if (totalOps > 100) {
8752
+ const readRatio = readOps / totalOps;
8753
+ // For read-heavy workloads, increase cache size
8754
+ if (readRatio > 0.8) {
8755
+ // More aggressive for remote storage
8756
+ const readAdjustment = isRemoteStorage ? 1.3 : 1.2;
8757
+ optimalSize = Math.ceil(optimalSize * readAdjustment);
8758
+ }
8759
+ }
8760
+ }
8761
+ // Ensure we have a reasonable minimum size based on environment and mode
8762
+ let minSize = 1000; // Default minimum
8763
+ // For read-only mode, use a higher minimum
8764
+ if (isReadOnly) {
8765
+ minSize = 2000;
8766
+ }
8767
+ // For remote storage, use an even higher minimum
8768
+ if (isRemoteStorage) {
8769
+ minSize = isReadOnly ? 3000 : 2000;
8770
+ }
8771
+ optimalSize = Math.max(optimalSize, minSize);
8772
+ // Update the hot cache max size
8773
+ this.hotCacheMaxSize = optimalSize;
8774
+ this.stats.maxSize = optimalSize;
8775
+ }
8776
+ /**
8777
+ * Tune eviction threshold based on statistics
8778
+ *
8779
+ * The eviction threshold determines when items start being evicted from the hot cache.
8780
+ * It is tuned based on:
8781
+ * 1. Cache hit/miss ratio
8782
+ * 2. Operation patterns (read-heavy vs. write-heavy workloads)
8783
+ *
8784
+ * Algorithm:
8785
+ * - Start with a default threshold of 0.8 (80% of max size)
8786
+ * - For high hit ratios, increase the threshold to keep more items in cache
8787
+ * - For low hit ratios, decrease the threshold to evict items more aggressively
8788
+ * - For read-heavy workloads, use a higher threshold
8789
+ * - For write-heavy workloads, use a lower threshold
8790
+ */
8791
+ tuneEvictionThreshold() {
8792
+ // Default threshold
8793
+ let threshold = 0.8;
8794
+ // Adjust based on hit/miss ratio if we have enough data
8795
+ const totalAccesses = this.stats.hits + this.stats.misses;
8796
+ if (totalAccesses > 100) {
8797
+ const hitRatio = this.stats.hits / totalAccesses;
8798
+ // If hit ratio is high, we can use a higher threshold
8799
+ // If hit ratio is low, we should use a lower threshold to evict more aggressively
8800
+ if (hitRatio > 0.8) {
8801
+ // High hit ratio, increase threshold (up to 0.9)
8802
+ threshold = Math.min(0.9, 0.8 + (hitRatio - 0.8));
8803
+ }
8804
+ else if (hitRatio < 0.5) {
8805
+ // Low hit ratio, decrease threshold (down to 0.6)
8806
+ threshold = Math.max(0.6, 0.8 - (0.5 - hitRatio));
8807
+ }
8808
+ }
8809
+ // If we have storage statistics with operation counts, adjust based on operation patterns
8810
+ if (this.storageStatistics && this.storageStatistics.operations) {
8811
+ const ops = this.storageStatistics.operations;
8812
+ const totalOps = ops.total || 1;
8813
+ // Calculate read/write ratio
8814
+ const readOps = ops.search || 0;
8815
+ const writeOps = (ops.add || 0) + (ops.update || 0) + (ops.delete || 0);
8816
+ if (totalOps > 100) {
8817
+ const readRatio = readOps / totalOps;
8818
+ const writeRatio = writeOps / totalOps;
8819
+ // For read-heavy workloads, use higher threshold
8820
+ // For write-heavy workloads, use lower threshold
8821
+ if (readRatio > 0.8) {
8822
+ // Read-heavy, increase threshold slightly
8823
+ threshold = Math.min(0.9, threshold + 0.05);
8824
+ }
8825
+ else if (writeRatio > 0.5) {
8826
+ // Write-heavy, decrease threshold
8827
+ threshold = Math.max(0.6, threshold - 0.1);
8828
+ }
8829
+ }
8830
+ }
8831
+ // Update the eviction threshold
8832
+ this.hotCacheEvictionThreshold = threshold;
8833
+ }
8834
+ /**
8835
+ * Tune warm cache TTL based on statistics
8836
+ *
8837
+ * The warm cache TTL determines how long items remain in the warm cache.
8838
+ * It is tuned based on:
8839
+ * 1. Update frequency from operation statistics
8840
+ *
8841
+ * Algorithm:
8842
+ * - Start with a default TTL of 24 hours
8843
+ * - For frequently updated data, use a shorter TTL
8844
+ * - For rarely updated data, use a longer TTL
8845
+ */
8846
+ tuneWarmCacheTTL() {
8847
+ // Default TTL (24 hours)
8848
+ let ttl = 24 * 60 * 60 * 1000;
8849
+ // If we have storage statistics with operation counts, adjust based on update frequency
8850
+ if (this.storageStatistics && this.storageStatistics.operations) {
8851
+ const ops = this.storageStatistics.operations;
8852
+ const totalOps = ops.total || 1;
8853
+ const updateOps = (ops.update || 0);
8854
+ if (totalOps > 100) {
8855
+ const updateRatio = updateOps / totalOps;
8856
+ // For frequently updated data, use shorter TTL
8857
+ // For rarely updated data, use longer TTL
8858
+ if (updateRatio > 0.3) {
8859
+ // Frequently updated, decrease TTL (down to 6 hours)
8860
+ ttl = Math.max(6 * 60 * 60 * 1000, ttl * (1 - updateRatio));
8861
+ }
8862
+ else if (updateRatio < 0.1) {
8863
+ // Rarely updated, increase TTL (up to 48 hours)
8864
+ ttl = Math.min(48 * 60 * 60 * 1000, ttl * (1.5 - updateRatio));
8865
+ }
8866
+ }
8867
+ }
8868
+ // Update the warm cache TTL
8869
+ this.warmCacheTTL = ttl;
8870
+ }
8871
+ /**
8872
+ * Tune batch size based on environment, statistics, and operating mode
8873
+ *
8874
+ * The batch size determines how many items are processed in a single batch
8875
+ * for operations like prefetching. It is tuned based on:
8876
+ * 1. Current environment (Node.js, browser, worker)
8877
+ * 2. Available memory
8878
+ * 3. Operation patterns
8879
+ * 4. Cache hit/miss ratio
8880
+ * 5. Operating mode (read-only vs. read-write)
8881
+ * 6. Storage type (S3, filesystem, memory)
8882
+ * 7. Dataset size
8883
+ *
8884
+ * Enhanced algorithm:
8885
+ * - Start with a default based on the environment
8886
+ * - For large datasets in S3 or other remote storage, use larger batches
8887
+ * - For read-only mode, use larger batches to improve throughput
8888
+ * - Dynamically adjust based on network latency and throughput
8889
+ * - Balance between memory usage and performance
8890
+ */
8891
+ tuneBatchSize() {
8892
+ // Default batch size
8893
+ let batchSize = 10;
8894
+ // Check if we're in read-only mode
8895
+ const isReadOnly = this.options?.readOnly || false;
8896
+ // Check if we're using S3 or other remote storage
8897
+ const isRemoteStorage = this.coldStorageType === StorageType.S3 ||
8898
+ this.coldStorageType === StorageType.REMOTE_API;
8899
+ // Get the total dataset size if available
8900
+ const totalItems = this.storageStatistics ?
8901
+ (this.storageStatistics.totalNodes || 0) + (this.storageStatistics.totalEdges || 0) : 0;
8902
+ // Determine if we're dealing with a large dataset
8903
+ const isLargeDataset = totalItems > 100000;
8904
+ const isVeryLargeDataset = totalItems > 1000000;
8905
+ // Base batch size adjustment based on environment
8906
+ if (this.environment === Environment$1.NODE) {
8907
+ // Node.js can handle larger batches
8908
+ batchSize = isReadOnly ? 30 : 20;
8909
+ // For remote storage, increase batch size
8910
+ if (isRemoteStorage) {
8911
+ batchSize = isReadOnly ? 50 : 30;
8912
+ }
8913
+ // For large datasets, adjust batch size
8914
+ if (isLargeDataset) {
8915
+ batchSize = Math.min(100, batchSize * 1.5);
8916
+ }
8917
+ // For very large datasets, adjust even more
8918
+ if (isVeryLargeDataset) {
8919
+ batchSize = Math.min(200, batchSize * 2);
8920
+ }
8921
+ }
8922
+ else if (this.environment === Environment$1.BROWSER) {
8923
+ // Browsers might need smaller batches
8924
+ batchSize = isReadOnly ? 15 : 10;
8925
+ // If we have memory information, adjust accordingly
8926
+ if (navigator.deviceMemory) {
8927
+ // Scale batch size with available memory
8928
+ const memoryFactor = isReadOnly ? 3 : 2;
8929
+ batchSize = Math.max(5, Math.min(30, Math.floor(navigator.deviceMemory * memoryFactor)));
8930
+ // For large datasets, adjust based on memory
8931
+ if (isLargeDataset && navigator.deviceMemory > 4) {
8932
+ batchSize = Math.min(50, batchSize * 1.5);
8933
+ }
8934
+ }
8935
+ }
8936
+ else if (this.environment === Environment$1.WORKER) {
8937
+ // Workers can handle moderate batch sizes
8938
+ batchSize = isReadOnly ? 20 : 15;
8939
+ }
8940
+ // If we have storage statistics with operation counts, adjust based on operation patterns
8941
+ if (this.storageStatistics && this.storageStatistics.operations) {
8942
+ const ops = this.storageStatistics.operations;
8943
+ const totalOps = ops.total || 1;
8944
+ const searchOps = (ops.search || 0);
8945
+ const getOps = (ops.get || 0);
8946
+ if (totalOps > 100) {
8947
+ // Calculate search and get ratios
8948
+ const searchRatio = searchOps / totalOps;
8949
+ const getRatio = getOps / totalOps;
8950
+ // For search-heavy workloads, use larger batch size
8951
+ if (searchRatio > 0.6) {
8952
+ // Search-heavy, increase batch size
8953
+ const searchFactor = isRemoteStorage ? 1.8 : 1.5;
8954
+ batchSize = Math.min(isRemoteStorage ? 200 : 100, Math.ceil(batchSize * searchFactor));
8955
+ }
8956
+ // For get-heavy workloads, adjust batch size
8957
+ if (getRatio > 0.6) {
8958
+ // Get-heavy, adjust batch size based on storage type
8959
+ if (isRemoteStorage) {
8960
+ // For remote storage, larger batches reduce network overhead
8961
+ batchSize = Math.min(150, Math.ceil(batchSize * 1.5));
8962
+ }
8963
+ else {
8964
+ // For local storage, smaller batches might be more efficient
8965
+ batchSize = Math.max(10, Math.ceil(batchSize * 0.9));
8966
+ }
8967
+ }
8968
+ }
8969
+ }
8970
+ // Adjust based on hit/miss ratio if we have enough data
8971
+ const totalAccesses = this.stats.hits + this.stats.misses;
8972
+ if (totalAccesses > 100) {
8973
+ const hitRatio = this.stats.hits / totalAccesses;
8974
+ // Base adjustment factors
8975
+ let increaseFactorForLowHitRatio = isRemoteStorage ? 1.5 : 1.2;
8976
+ let decreaseFactorForHighHitRatio = 0.8;
8977
+ // In read-only mode, be more aggressive with batch size adjustments
8978
+ if (isReadOnly) {
8979
+ increaseFactorForLowHitRatio = isRemoteStorage ? 2.0 : 1.5;
8980
+ decreaseFactorForHighHitRatio = 0.9; // Less reduction in read-only mode
8981
+ }
8982
+ // If hit ratio is high, we can use smaller batches
8983
+ if (hitRatio > 0.8 && !isVeryLargeDataset) {
8984
+ // High hit ratio, decrease batch size slightly
8985
+ // But don't decrease too much for large datasets or remote storage
8986
+ if (!(isLargeDataset && isRemoteStorage)) {
8987
+ batchSize = Math.max(isReadOnly ? 10 : 5, Math.floor(batchSize * decreaseFactorForHighHitRatio));
8988
+ }
8989
+ }
8990
+ // If hit ratio is low, we need larger batches
8991
+ else if (hitRatio < 0.5) {
8992
+ // Low hit ratio, increase batch size
8993
+ const maxBatchSize = isRemoteStorage ?
8994
+ (isVeryLargeDataset ? 300 : 200) :
8995
+ (isVeryLargeDataset ? 150 : 100);
8996
+ batchSize = Math.min(maxBatchSize, Math.ceil(batchSize * increaseFactorForLowHitRatio));
8997
+ }
8998
+ }
8999
+ // Set minimum batch sizes based on storage type and mode
9000
+ let minBatchSize = 5;
9001
+ if (isRemoteStorage) {
9002
+ minBatchSize = isReadOnly ? 20 : 10;
9003
+ }
9004
+ else if (isReadOnly) {
9005
+ minBatchSize = 10;
9006
+ }
9007
+ // Ensure batch size is within reasonable limits
9008
+ batchSize = Math.max(minBatchSize, batchSize);
9009
+ // Cap maximum batch size based on environment and storage
9010
+ const maxBatchSize = isRemoteStorage ?
9011
+ (this.environment === Environment$1.NODE ? 300 : 150) :
9012
+ (this.environment === Environment$1.NODE ? 150 : 75);
9013
+ batchSize = Math.min(maxBatchSize, batchSize);
9014
+ // Update the batch size
9015
+ this.batchSize = batchSize;
9016
+ }
9017
+ /**
9018
+ * Detect the appropriate warm storage type based on environment
9019
+ */
9020
+ detectWarmStorageType() {
9021
+ if (this.environment === Environment$1.BROWSER) {
9022
+ // Use OPFS if available, otherwise use memory
9023
+ if ('storage' in navigator && 'getDirectory' in navigator.storage) {
9024
+ return StorageType.OPFS;
9025
+ }
9026
+ return StorageType.MEMORY;
9027
+ }
9028
+ else if (this.environment === Environment$1.WORKER) {
9029
+ // Use OPFS if available, otherwise use memory
9030
+ if ('storage' in self && 'getDirectory' in self.storage) {
9031
+ return StorageType.OPFS;
9032
+ }
9033
+ return StorageType.MEMORY;
9034
+ }
9035
+ else {
9036
+ // In Node.js, use filesystem
9037
+ return StorageType.FILESYSTEM;
9038
+ }
9039
+ }
9040
+ /**
9041
+ * Detect the appropriate cold storage type based on environment
9042
+ */
9043
+ detectColdStorageType() {
9044
+ if (this.environment === Environment$1.BROWSER) {
9045
+ // Use OPFS if available, otherwise use memory
9046
+ if ('storage' in navigator && 'getDirectory' in navigator.storage) {
9047
+ return StorageType.OPFS;
9048
+ }
9049
+ return StorageType.MEMORY;
9050
+ }
9051
+ else if (this.environment === Environment$1.WORKER) {
9052
+ // Use OPFS if available, otherwise use memory
9053
+ if ('storage' in self && 'getDirectory' in self.storage) {
9054
+ return StorageType.OPFS;
9055
+ }
9056
+ return StorageType.MEMORY;
9057
+ }
9058
+ else {
9059
+ // In Node.js, use S3 if configured, otherwise filesystem
9060
+ return StorageType.S3;
9061
+ }
9062
+ }
9063
+ /**
9064
+ * Initialize warm storage adapter
9065
+ */
9066
+ initializeWarmStorage() {
9067
+ // Implementation depends on the detected storage type
9068
+ // For now, return null as this will be provided by the storage adapter
9069
+ return null;
9070
+ }
9071
+ /**
9072
+ * Initialize cold storage adapter
9073
+ */
9074
+ initializeColdStorage() {
9075
+ // Implementation depends on the detected storage type
9076
+ // For now, return null as this will be provided by the storage adapter
9077
+ return null;
9078
+ }
9079
+ /**
9080
+ * Get an item from cache, trying each level in order
9081
+ * @param id The item ID
9082
+ * @returns The cached item or null if not found
9083
+ */
9084
+ async get(id) {
9085
+ // Check if it's time to tune parameters
9086
+ await this.checkAndTuneParameters();
9087
+ // Try hot cache first (fastest)
9088
+ const hotCacheEntry = this.hotCache.get(id);
9089
+ if (hotCacheEntry) {
9090
+ // Update access metadata
9091
+ hotCacheEntry.lastAccessed = Date.now();
9092
+ hotCacheEntry.accessCount++;
9093
+ // Update stats
9094
+ this.stats.hits++;
9095
+ return hotCacheEntry.data;
9096
+ }
9097
+ // Try warm cache next
9098
+ try {
9099
+ const warmCacheItem = await this.getFromWarmCache(id);
9100
+ if (warmCacheItem) {
9101
+ // Promote to hot cache
9102
+ this.addToHotCache(id, warmCacheItem);
9103
+ // Update stats
9104
+ this.stats.hits++;
9105
+ return warmCacheItem;
9106
+ }
9107
+ }
9108
+ catch (error) {
9109
+ console.warn(`Error accessing warm cache for ${id}:`, error);
9110
+ }
9111
+ // Finally, try cold storage
9112
+ try {
9113
+ const coldStorageItem = await this.getFromColdStorage(id);
9114
+ if (coldStorageItem) {
9115
+ // Promote to hot and warm caches
9116
+ this.addToHotCache(id, coldStorageItem);
9117
+ await this.addToWarmCache(id, coldStorageItem);
9118
+ // Update stats
9119
+ this.stats.misses++;
9120
+ return coldStorageItem;
9121
+ }
9122
+ }
9123
+ catch (error) {
9124
+ console.warn(`Error accessing cold storage for ${id}:`, error);
9125
+ }
9126
+ // Item not found in any cache level
9127
+ this.stats.misses++;
9128
+ return null;
9129
+ }
9130
+ /**
9131
+ * Get an item from warm cache
9132
+ * @param id The item ID
9133
+ * @returns The cached item or null if not found
9134
+ */
9135
+ async getFromWarmCache(id) {
9136
+ if (!this.warmStorage)
9137
+ return null;
9138
+ try {
9139
+ return await this.warmStorage.get(id);
9140
+ }
9141
+ catch (error) {
9142
+ console.warn(`Error getting item ${id} from warm cache:`, error);
9143
+ return null;
9144
+ }
9145
+ }
9146
+ /**
9147
+ * Get an item from cold storage
9148
+ * @param id The item ID
9149
+ * @returns The item or null if not found
9150
+ */
9151
+ async getFromColdStorage(id) {
9152
+ if (!this.coldStorage)
9153
+ return null;
9154
+ try {
9155
+ return await this.coldStorage.get(id);
9156
+ }
9157
+ catch (error) {
9158
+ console.warn(`Error getting item ${id} from cold storage:`, error);
9159
+ return null;
9160
+ }
9161
+ }
9162
+ /**
9163
+ * Add an item to hot cache
9164
+ * @param id The item ID
9165
+ * @param item The item to cache
9166
+ */
9167
+ addToHotCache(id, item) {
9168
+ // Check if we need to evict items
9169
+ if (this.hotCache.size >= this.hotCacheMaxSize * this.hotCacheEvictionThreshold) {
9170
+ this.evictFromHotCache();
9171
+ }
9172
+ // Add to hot cache
9173
+ this.hotCache.set(id, {
9174
+ data: item,
9175
+ lastAccessed: Date.now(),
9176
+ accessCount: 1,
9177
+ expiresAt: null // Hot cache items don't expire
9178
+ });
9179
+ // Update stats
9180
+ this.stats.size = this.hotCache.size;
9181
+ }
9182
+ /**
9183
+ * Add an item to warm cache
9184
+ * @param id The item ID
9185
+ * @param item The item to cache
9186
+ */
9187
+ async addToWarmCache(id, item) {
9188
+ if (!this.warmStorage)
9189
+ return;
9190
+ try {
9191
+ // Add to warm cache with TTL
9192
+ await this.warmStorage.set(id, item, {
9193
+ ttl: this.warmCacheTTL
9194
+ });
9195
+ }
9196
+ catch (error) {
9197
+ console.warn(`Error adding item ${id} to warm cache:`, error);
9198
+ }
9199
+ }
9200
+ /**
9201
+ * Evict items from hot cache based on LRU policy
9202
+ */
9203
+ evictFromHotCache() {
9204
+ // Find the least recently used items
9205
+ const entries = Array.from(this.hotCache.entries());
9206
+ // Sort by last accessed time (oldest first)
9207
+ entries.sort((a, b) => a[1].lastAccessed - b[1].lastAccessed);
9208
+ // Remove the oldest 20% of items
9209
+ const itemsToRemove = Math.ceil(this.hotCache.size * 0.2);
9210
+ for (let i = 0; i < itemsToRemove && i < entries.length; i++) {
9211
+ this.hotCache.delete(entries[i][0]);
9212
+ this.stats.evictions++;
9213
+ }
9214
+ // Update stats
9215
+ this.stats.size = this.hotCache.size;
9216
+ if (process.env.DEBUG) {
9217
+ console.log(`Evicted ${itemsToRemove} items from hot cache, new size: ${this.hotCache.size}`);
9218
+ }
9219
+ }
9220
+ /**
9221
+ * Set an item in all cache levels
9222
+ * @param id The item ID
9223
+ * @param item The item to cache
9224
+ */
9225
+ async set(id, item) {
9226
+ // Add to hot cache
9227
+ this.addToHotCache(id, item);
9228
+ // Add to warm cache
9229
+ await this.addToWarmCache(id, item);
9230
+ // Add to cold storage
9231
+ if (this.coldStorage) {
9232
+ try {
9233
+ await this.coldStorage.set(id, item);
9234
+ }
9235
+ catch (error) {
9236
+ console.warn(`Error adding item ${id} to cold storage:`, error);
9237
+ }
9238
+ }
9239
+ }
9240
+ /**
9241
+ * Delete an item from all cache levels
9242
+ * @param id The item ID to delete
9243
+ */
9244
+ async delete(id) {
9245
+ // Remove from hot cache
9246
+ this.hotCache.delete(id);
9247
+ // Remove from warm cache
9248
+ if (this.warmStorage) {
9249
+ try {
9250
+ await this.warmStorage.delete(id);
9251
+ }
9252
+ catch (error) {
9253
+ console.warn(`Error deleting item ${id} from warm cache:`, error);
9254
+ }
9255
+ }
9256
+ // Remove from cold storage
9257
+ if (this.coldStorage) {
9258
+ try {
9259
+ await this.coldStorage.delete(id);
9260
+ }
9261
+ catch (error) {
9262
+ console.warn(`Error deleting item ${id} from cold storage:`, error);
9263
+ }
9264
+ }
9265
+ // Update stats
9266
+ this.stats.size = this.hotCache.size;
9267
+ }
9268
+ /**
9269
+ * Clear all cache levels
9270
+ */
9271
+ async clear() {
9272
+ // Clear hot cache
9273
+ this.hotCache.clear();
9274
+ // Clear warm cache
9275
+ if (this.warmStorage) {
9276
+ try {
9277
+ await this.warmStorage.clear();
9278
+ }
9279
+ catch (error) {
9280
+ console.warn('Error clearing warm cache:', error);
9281
+ }
9282
+ }
9283
+ // Clear cold storage
9284
+ if (this.coldStorage) {
9285
+ try {
9286
+ await this.coldStorage.clear();
9287
+ }
9288
+ catch (error) {
9289
+ console.warn('Error clearing cold storage:', error);
9290
+ }
9291
+ }
9292
+ // Reset stats
9293
+ this.stats = {
9294
+ hits: 0,
9295
+ misses: 0,
9296
+ evictions: 0,
9297
+ size: 0,
9298
+ maxSize: this.hotCacheMaxSize
9299
+ };
9300
+ }
9301
+ /**
9302
+ * Get cache statistics
9303
+ * @returns Cache statistics
9304
+ */
9305
+ getStats() {
9306
+ return { ...this.stats };
9307
+ }
9308
+ /**
9309
+ * Prefetch items based on ID patterns or relationships
9310
+ * @param ids Array of IDs to prefetch
9311
+ */
9312
+ async prefetch(ids) {
9313
+ // Check if it's time to tune parameters
9314
+ await this.checkAndTuneParameters();
9315
+ // Prefetch in batches to avoid overwhelming the system
9316
+ const batches = [];
9317
+ // Split into batches using the configurable batch size
9318
+ for (let i = 0; i < ids.length; i += this.batchSize) {
9319
+ const batch = ids.slice(i, i + this.batchSize);
9320
+ batches.push(batch);
9321
+ }
9322
+ // Process each batch
9323
+ for (const batch of batches) {
9324
+ await Promise.all(batch.map(async (id) => {
9325
+ // Skip if already in hot cache
9326
+ if (this.hotCache.has(id))
9327
+ return;
9328
+ try {
9329
+ // Try to get from any cache level
9330
+ await this.get(id);
9331
+ }
9332
+ catch (error) {
9333
+ // Ignore errors during prefetching
9334
+ if (process.env.DEBUG) {
9335
+ console.warn(`Error prefetching ${id}:`, error);
9336
+ }
9337
+ }
9338
+ }));
9339
+ }
9340
+ }
9341
+ /**
9342
+ * Check if it's time to tune parameters and do so if needed
9343
+ * This is called before operations that might benefit from tuned parameters
9344
+ *
9345
+ * This method serves as a checkpoint for auto-tuning, ensuring that:
9346
+ * 1. Parameters are tuned periodically based on the auto-tune interval
9347
+ * 2. Tuning happens before critical operations that would benefit from optimized parameters
9348
+ * 3. Tuning doesn't happen too frequently, which could impact performance
9349
+ *
9350
+ * By calling this method before get(), getMany(), and prefetch() operations,
9351
+ * we ensure that the cache parameters are optimized for the current workload
9352
+ * without adding unnecessary overhead to every operation.
9353
+ */
9354
+ async checkAndTuneParameters() {
9355
+ // Skip if auto-tuning is disabled
9356
+ if (!this.autoTune)
9357
+ return;
9358
+ // Check if it's time to tune parameters
9359
+ const now = Date.now();
9360
+ if (now - this.lastAutoTuneTime >= this.autoTuneInterval) {
9361
+ await this.tuneParameters();
9362
+ }
9363
+ }
9364
+ /**
9365
+ * Get multiple items at once, optimizing for batch retrieval
9366
+ * @param ids Array of IDs to get
9367
+ * @returns Map of ID to item
9368
+ */
9369
+ async getMany(ids) {
9370
+ // Check if it's time to tune parameters
9371
+ await this.checkAndTuneParameters();
9372
+ const result = new Map();
9373
+ // First check hot cache for all IDs
9374
+ const missingIds = [];
9375
+ for (const id of ids) {
9376
+ const hotCacheEntry = this.hotCache.get(id);
9377
+ if (hotCacheEntry) {
9378
+ // Update access metadata
9379
+ hotCacheEntry.lastAccessed = Date.now();
9380
+ hotCacheEntry.accessCount++;
9381
+ // Add to result
9382
+ result.set(id, hotCacheEntry.data);
9383
+ // Update stats
9384
+ this.stats.hits++;
9385
+ }
9386
+ else {
9387
+ missingIds.push(id);
9388
+ }
9389
+ }
9390
+ if (missingIds.length === 0) {
9391
+ return result;
9392
+ }
9393
+ // Try to get missing items from warm cache
9394
+ if (this.warmStorage) {
9395
+ try {
9396
+ const warmCacheItems = await this.warmStorage.getMany(missingIds);
9397
+ for (const [id, item] of warmCacheItems.entries()) {
9398
+ if (item) {
9399
+ // Promote to hot cache
9400
+ this.addToHotCache(id, item);
9401
+ // Add to result
9402
+ result.set(id, item);
9403
+ // Update stats
9404
+ this.stats.hits++;
9405
+ // Remove from missing IDs
9406
+ const index = missingIds.indexOf(id);
9407
+ if (index !== -1) {
9408
+ missingIds.splice(index, 1);
9409
+ }
9410
+ }
9411
+ }
9412
+ }
9413
+ catch (error) {
9414
+ console.warn('Error accessing warm cache for batch:', error);
9415
+ }
9416
+ }
9417
+ if (missingIds.length === 0) {
9418
+ return result;
9419
+ }
9420
+ // Try to get remaining missing items from cold storage
9421
+ if (this.coldStorage) {
9422
+ try {
9423
+ const coldStorageItems = await this.coldStorage.getMany(missingIds);
9424
+ for (const [id, item] of coldStorageItems.entries()) {
9425
+ if (item) {
9426
+ // Promote to hot and warm caches
9427
+ this.addToHotCache(id, item);
9428
+ await this.addToWarmCache(id, item);
9429
+ // Add to result
9430
+ result.set(id, item);
9431
+ // Update stats
9432
+ this.stats.misses++;
9433
+ }
9434
+ }
9435
+ }
9436
+ catch (error) {
9437
+ console.warn('Error accessing cold storage for batch:', error);
9438
+ }
9439
+ }
9440
+ return result;
9441
+ }
9442
+ /**
9443
+ * Set the storage adapters for warm and cold caches
9444
+ * @param warmStorage Warm cache storage adapter
9445
+ * @param coldStorage Cold storage adapter
7928
9446
  */
7929
- async executeDelete(operation, operationName) {
7930
- return this.deleteExecutor(operation, operationName);
9447
+ setStorageAdapters(warmStorage, coldStorage) {
9448
+ this.warmStorage = warmStorage;
9449
+ this.coldStorage = coldStorage;
7931
9450
  }
7932
9451
  }
7933
9452
 
@@ -7973,6 +9492,8 @@ class S3CompatibleStorage extends BaseStorage {
7973
9492
  this.activeLocks = new Set();
7974
9493
  // Change log for efficient synchronization
7975
9494
  this.changeLogPrefix = 'change-log/';
9495
+ // Node cache to avoid redundant API calls
9496
+ this.nodeCache = new Map();
7976
9497
  // Batch update timer ID
7977
9498
  this.statisticsBatchUpdateTimerId = null;
7978
9499
  // Flag to indicate if statistics have been modified since last save
@@ -7998,6 +9519,9 @@ class S3CompatibleStorage extends BaseStorage {
7998
9519
  this.verbPrefix = `${VERBS_DIR}/`;
7999
9520
  this.metadataPrefix = `${METADATA_DIR}/`;
8000
9521
  this.indexPrefix = `${INDEX_DIR}/`;
9522
+ // Initialize cache managers
9523
+ this.nounCacheManager = new CacheManager(options.cacheConfig);
9524
+ this.verbCacheManager = new CacheManager(options.cacheConfig);
8001
9525
  }
8002
9526
  /**
8003
9527
  * Initialize the storage adapter
@@ -8036,6 +9560,78 @@ class S3CompatibleStorage extends BaseStorage {
8036
9560
  await this.s3Client.send(new HeadBucketCommand({
8037
9561
  Bucket: this.bucketName
8038
9562
  }));
9563
+ // Create storage adapter proxies for the cache managers
9564
+ const nounStorageAdapter = {
9565
+ get: async (id) => this.getNoun_internal(id),
9566
+ set: async (id, node) => this.saveNoun_internal(node),
9567
+ delete: async (id) => this.deleteNoun_internal(id),
9568
+ getMany: async (ids) => {
9569
+ const result = new Map();
9570
+ // Process in batches to avoid overwhelming the S3 API
9571
+ const batchSize = 10;
9572
+ const batches = [];
9573
+ // Split into batches
9574
+ for (let i = 0; i < ids.length; i += batchSize) {
9575
+ const batch = ids.slice(i, i + batchSize);
9576
+ batches.push(batch);
9577
+ }
9578
+ // Process each batch
9579
+ for (const batch of batches) {
9580
+ const batchResults = await Promise.all(batch.map(async (id) => {
9581
+ const node = await this.getNoun_internal(id);
9582
+ return { id, node };
9583
+ }));
9584
+ // Add results to map
9585
+ for (const { id, node } of batchResults) {
9586
+ if (node) {
9587
+ result.set(id, node);
9588
+ }
9589
+ }
9590
+ }
9591
+ return result;
9592
+ },
9593
+ clear: async () => {
9594
+ // No-op for now, as we don't want to clear the entire storage
9595
+ // This would be implemented if needed
9596
+ }
9597
+ };
9598
+ const verbStorageAdapter = {
9599
+ get: async (id) => this.getVerb_internal(id),
9600
+ set: async (id, edge) => this.saveVerb_internal(edge),
9601
+ delete: async (id) => this.deleteVerb_internal(id),
9602
+ getMany: async (ids) => {
9603
+ const result = new Map();
9604
+ // Process in batches to avoid overwhelming the S3 API
9605
+ const batchSize = 10;
9606
+ const batches = [];
9607
+ // Split into batches
9608
+ for (let i = 0; i < ids.length; i += batchSize) {
9609
+ const batch = ids.slice(i, i + batchSize);
9610
+ batches.push(batch);
9611
+ }
9612
+ // Process each batch
9613
+ for (const batch of batches) {
9614
+ const batchResults = await Promise.all(batch.map(async (id) => {
9615
+ const edge = await this.getVerb_internal(id);
9616
+ return { id, edge };
9617
+ }));
9618
+ // Add results to map
9619
+ for (const { id, edge } of batchResults) {
9620
+ if (edge) {
9621
+ result.set(id, edge);
9622
+ }
9623
+ }
9624
+ }
9625
+ return result;
9626
+ },
9627
+ clear: async () => {
9628
+ // No-op for now, as we don't want to clear the entire storage
9629
+ // This would be implemented if needed
9630
+ }
9631
+ };
9632
+ // Set storage adapters for cache managers
9633
+ this.nounCacheManager.setStorageAdapters(nounStorageAdapter, nounStorageAdapter);
9634
+ this.verbCacheManager.setStorageAdapters(verbStorageAdapter, verbStorageAdapter);
8039
9635
  this.isInitialized = true;
8040
9636
  }
8041
9637
  catch (error) {
@@ -8144,7 +9740,10 @@ class S3CompatibleStorage extends BaseStorage {
8144
9740
  const parsedNode = JSON.parse(bodyContents);
8145
9741
  console.log(`Parsed node data for ${id}:`, parsedNode);
8146
9742
  // Ensure the parsed node has the expected properties
8147
- if (!parsedNode || !parsedNode.id || !parsedNode.vector || !parsedNode.connections) {
9743
+ if (!parsedNode ||
9744
+ !parsedNode.id ||
9745
+ !parsedNode.vector ||
9746
+ !parsedNode.connections) {
8148
9747
  console.error(`Invalid node data for ${id}:`, parsedNode);
8149
9748
  return null;
8150
9749
  }
@@ -8180,100 +9779,118 @@ class S3CompatibleStorage extends BaseStorage {
8180
9779
  }
8181
9780
  /**
8182
9781
  * Get all nodes from storage
9782
+ * @deprecated This method is deprecated and will be removed in a future version.
9783
+ * It can cause memory issues with large datasets. Use getNodesWithPagination() instead.
8183
9784
  */
8184
9785
  async getAllNodes() {
8185
9786
  await this.ensureInitialized();
9787
+ console.warn('WARNING: getAllNodes() is deprecated and will be removed in a future version. Use getNodesWithPagination() instead.');
9788
+ try {
9789
+ // Use the paginated method with a large limit to maintain backward compatibility
9790
+ // but warn about potential issues
9791
+ const result = await this.getNodesWithPagination({
9792
+ limit: 1000, // Reasonable limit to avoid memory issues
9793
+ useCache: true
9794
+ });
9795
+ if (result.hasMore) {
9796
+ console.warn(`WARNING: Only returning the first 1000 nodes. There are more nodes available. Use getNodesWithPagination() for proper pagination.`);
9797
+ }
9798
+ return result.nodes;
9799
+ }
9800
+ catch (error) {
9801
+ console.error('Failed to get all nodes:', error);
9802
+ return [];
9803
+ }
9804
+ }
9805
+ /**
9806
+ * Get nodes with pagination
9807
+ * @param options Pagination options
9808
+ * @returns Promise that resolves to a paginated result of nodes
9809
+ */
9810
+ async getNodesWithPagination(options = {}) {
9811
+ await this.ensureInitialized();
9812
+ const limit = options.limit || 100;
9813
+ const useCache = options.useCache !== false;
8186
9814
  try {
8187
9815
  // Import the ListObjectsV2Command and GetObjectCommand only when needed
8188
- const { ListObjectsV2Command, GetObjectCommand } = await import('@aws-sdk/client-s3');
8189
- console.log(`Getting all nodes from bucket ${this.bucketName} with prefix ${this.nounPrefix}`);
8190
- // List all objects in the nouns directory
9816
+ const { ListObjectsV2Command } = await import('@aws-sdk/client-s3');
9817
+ // List objects with pagination
8191
9818
  const listResponse = await this.s3Client.send(new ListObjectsV2Command({
8192
9819
  Bucket: this.bucketName,
8193
- Prefix: this.nounPrefix
9820
+ Prefix: this.nounPrefix,
9821
+ MaxKeys: limit,
9822
+ ContinuationToken: options.cursor
8194
9823
  }));
9824
+ // If listResponse is null/undefined or there are no objects, return an empty result
9825
+ if (!listResponse ||
9826
+ !listResponse.Contents ||
9827
+ listResponse.Contents.length === 0) {
9828
+ return {
9829
+ nodes: [],
9830
+ hasMore: false
9831
+ };
9832
+ }
9833
+ // Extract node IDs from the keys
9834
+ const nodeIds = listResponse.Contents
9835
+ .filter((object) => object && object.Key)
9836
+ .map((object) => object.Key.replace(this.nounPrefix, '').replace('.json', ''));
9837
+ // Use the cache manager to get nodes efficiently
8195
9838
  const nodes = [];
8196
- // If listResponse is null/undefined or there are no objects, return an empty array
8197
- if (!listResponse || !listResponse.Contents || listResponse.Contents.length === 0) {
8198
- console.log(`No nodes found in bucket ${this.bucketName} with prefix ${this.nounPrefix}`);
8199
- return nodes;
8200
- }
8201
- console.log(`Found ${listResponse.Contents.length} nodes in bucket ${this.bucketName}`);
8202
- // Debug: Log all keys found
8203
- console.log('Keys found:');
8204
- for (const object of listResponse.Contents) {
8205
- if (object && object.Key) {
8206
- console.log(`- ${object.Key}`);
9839
+ if (useCache) {
9840
+ // Get nodes from cache manager
9841
+ const cachedNodes = await this.nounCacheManager.getMany(nodeIds);
9842
+ // Add nodes to result in the same order as nodeIds
9843
+ for (const id of nodeIds) {
9844
+ const node = cachedNodes.get(id);
9845
+ if (node) {
9846
+ nodes.push(node);
9847
+ }
8207
9848
  }
8208
9849
  }
8209
- // Get each node
8210
- const nodePromises = listResponse.Contents.map(async (object) => {
8211
- if (!object || !object.Key) {
8212
- console.log(`Skipping undefined object or object without Key`);
8213
- return null;
9850
+ else {
9851
+ // Get nodes directly from S3 without using cache
9852
+ // Process in smaller batches to reduce memory usage
9853
+ const batchSize = 50;
9854
+ const batches = [];
9855
+ // Split into batches
9856
+ for (let i = 0; i < nodeIds.length; i += batchSize) {
9857
+ const batch = nodeIds.slice(i, i + batchSize);
9858
+ batches.push(batch);
8214
9859
  }
8215
- try {
8216
- // Extract node ID from the key (remove prefix and .json extension)
8217
- const nodeId = object.Key.replace(this.nounPrefix, '').replace('.json', '');
8218
- console.log(`Getting node with ID ${nodeId} from key ${object.Key}`);
8219
- // Get the node data
8220
- const response = await this.s3Client.send(new GetObjectCommand({
8221
- Bucket: this.bucketName,
8222
- Key: object.Key
8223
- }));
8224
- // Check if response is null or undefined
8225
- if (!response || !response.Body) {
8226
- console.log(`No response or response body for node ${nodeId}`);
8227
- return null;
8228
- }
8229
- // Convert the response body to a string
8230
- const bodyContents = await response.Body.transformToString();
8231
- console.log(`Retrieved node body for ${nodeId}: ${bodyContents.substring(0, 100)}${bodyContents.length > 100 ? '...' : ''}`);
8232
- // Parse the JSON string
8233
- try {
8234
- const parsedNode = JSON.parse(bodyContents);
8235
- console.log(`Parsed node data for ${nodeId}:`, parsedNode);
8236
- // Ensure the parsed node has the expected properties
8237
- if (!parsedNode || !parsedNode.id || !parsedNode.vector || !parsedNode.connections) {
8238
- console.error(`Invalid node data for ${nodeId}:`, parsedNode);
9860
+ // Process each batch sequentially
9861
+ for (const batch of batches) {
9862
+ const batchNodes = await Promise.all(batch.map(async (id) => {
9863
+ try {
9864
+ return await this.getNoun_internal(id);
9865
+ }
9866
+ catch (error) {
8239
9867
  return null;
8240
9868
  }
8241
- // Convert serialized connections back to Map<number, Set<string>>
8242
- const connections = new Map();
8243
- for (const [level, nodeIds] of Object.entries(parsedNode.connections)) {
8244
- connections.set(Number(level), new Set(nodeIds));
9869
+ }));
9870
+ // Add non-null nodes to result
9871
+ for (const node of batchNodes) {
9872
+ if (node) {
9873
+ nodes.push(node);
8245
9874
  }
8246
- const node = {
8247
- id: parsedNode.id,
8248
- vector: parsedNode.vector,
8249
- connections
8250
- };
8251
- console.log(`Successfully retrieved node ${nodeId}:`, node);
8252
- return node;
8253
9875
  }
8254
- catch (parseError) {
8255
- console.error(`Failed to parse node data for ${nodeId}:`, parseError);
8256
- return null;
8257
- }
8258
- }
8259
- catch (error) {
8260
- console.error(`Error getting node from ${object.Key}:`, error);
8261
- return null;
8262
9876
  }
8263
- });
8264
- // Wait for all promises to resolve and filter out nulls
8265
- const resolvedNodes = await Promise.all(nodePromises);
8266
- const filteredNodes = resolvedNodes.filter((node) => node !== null);
8267
- console.log(`Returning ${filteredNodes.length} nodes`);
8268
- // Debug: Log all nodes being returned
8269
- for (const node of filteredNodes) {
8270
- console.log(`- Node ${node.id}`);
8271
9877
  }
8272
- return filteredNodes;
9878
+ // Determine if there are more nodes
9879
+ const hasMore = !!listResponse.IsTruncated;
9880
+ // Set next cursor if there are more nodes
9881
+ const nextCursor = listResponse.NextContinuationToken;
9882
+ return {
9883
+ nodes,
9884
+ hasMore,
9885
+ nextCursor
9886
+ };
8273
9887
  }
8274
9888
  catch (error) {
8275
- console.error('Failed to get all nodes:', error);
8276
- return [];
9889
+ console.error('Failed to get nodes with pagination:', error);
9890
+ return {
9891
+ nodes: [],
9892
+ hasMore: false
9893
+ };
8277
9894
  }
8278
9895
  }
8279
9896
  /**
@@ -8292,14 +9909,31 @@ class S3CompatibleStorage extends BaseStorage {
8292
9909
  async getNodesByNounType(nounType) {
8293
9910
  await this.ensureInitialized();
8294
9911
  try {
8295
- // Get all nodes
8296
- const allNodes = await this.getAllNodes();
8297
- // Filter nodes by noun type using metadata
8298
9912
  const filteredNodes = [];
8299
- for (const node of allNodes) {
8300
- const metadata = await this.getMetadata(node.id);
8301
- if (metadata && metadata.noun === nounType) {
8302
- filteredNodes.push(node);
9913
+ let hasMore = true;
9914
+ let cursor = undefined;
9915
+ // Use pagination to process nodes in batches
9916
+ while (hasMore) {
9917
+ // Get a batch of nodes
9918
+ const result = await this.getNodesWithPagination({
9919
+ limit: 100,
9920
+ cursor,
9921
+ useCache: true
9922
+ });
9923
+ // Filter nodes by noun type using metadata
9924
+ for (const node of result.nodes) {
9925
+ const metadata = await this.getMetadata(node.id);
9926
+ if (metadata && metadata.noun === nounType) {
9927
+ filteredNodes.push(node);
9928
+ }
9929
+ }
9930
+ // Update pagination state
9931
+ hasMore = result.hasMore;
9932
+ cursor = result.nextCursor;
9933
+ // Safety check to prevent infinite loops
9934
+ if (!cursor && hasMore) {
9935
+ console.warn('No cursor returned but hasMore is true, breaking loop');
9936
+ break;
8303
9937
  }
8304
9938
  }
8305
9939
  return filteredNodes;
@@ -8422,7 +10056,10 @@ class S3CompatibleStorage extends BaseStorage {
8422
10056
  const parsedEdge = JSON.parse(bodyContents);
8423
10057
  console.log(`Parsed edge data for ${id}:`, parsedEdge);
8424
10058
  // Ensure the parsed edge has the expected properties
8425
- if (!parsedEdge || !parsedEdge.id || !parsedEdge.vector || !parsedEdge.connections ||
10059
+ if (!parsedEdge ||
10060
+ !parsedEdge.id ||
10061
+ !parsedEdge.vector ||
10062
+ !parsedEdge.connections ||
8426
10063
  !(parsedEdge.sourceId || parsedEdge.source) ||
8427
10064
  !(parsedEdge.targetId || parsedEdge.target) ||
8428
10065
  !(parsedEdge.type || parsedEdge.verb)) {
@@ -8476,86 +10113,205 @@ class S3CompatibleStorage extends BaseStorage {
8476
10113
  }
8477
10114
  /**
8478
10115
  * Get all verbs from storage (internal implementation)
10116
+ * @deprecated This method is deprecated and will be removed in a future version.
10117
+ * It can cause memory issues with large datasets. Use getVerbsWithPagination() instead.
8479
10118
  */
8480
10119
  async getAllVerbs_internal() {
10120
+ console.warn('WARNING: getAllVerbs_internal() is deprecated and will be removed in a future version. Use getVerbsWithPagination() instead.');
8481
10121
  return this.getAllEdges();
8482
10122
  }
8483
10123
  /**
8484
10124
  * Get all edges from storage
10125
+ * @deprecated This method is deprecated and will be removed in a future version.
10126
+ * It can cause memory issues with large datasets. Use getEdgesWithPagination() instead.
8485
10127
  */
8486
10128
  async getAllEdges() {
8487
10129
  await this.ensureInitialized();
10130
+ console.warn('WARNING: getAllEdges() is deprecated and will be removed in a future version. Use getEdgesWithPagination() instead.');
8488
10131
  try {
8489
- // Import the ListObjectsV2Command and GetObjectCommand only when needed
8490
- const { ListObjectsV2Command, GetObjectCommand } = await import('@aws-sdk/client-s3');
8491
- // List all objects in the verbs directory
10132
+ // Use the paginated method with a large limit to maintain backward compatibility
10133
+ // but warn about potential issues
10134
+ const result = await this.getEdgesWithPagination({
10135
+ limit: 1000, // Reasonable limit to avoid memory issues
10136
+ useCache: true
10137
+ });
10138
+ if (result.hasMore) {
10139
+ console.warn(`WARNING: Only returning the first 1000 edges. There are more edges available. Use getEdgesWithPagination() for proper pagination.`);
10140
+ }
10141
+ return result.edges;
10142
+ }
10143
+ catch (error) {
10144
+ console.error('Failed to get all edges:', error);
10145
+ return [];
10146
+ }
10147
+ }
10148
+ /**
10149
+ * Get edges with pagination
10150
+ * @param options Pagination options
10151
+ * @returns Promise that resolves to a paginated result of edges
10152
+ */
10153
+ async getEdgesWithPagination(options = {}) {
10154
+ await this.ensureInitialized();
10155
+ const limit = options.limit || 100;
10156
+ const useCache = options.useCache !== false;
10157
+ const filter = options.filter || {};
10158
+ try {
10159
+ // Import the ListObjectsV2Command only when needed
10160
+ const { ListObjectsV2Command } = await import('@aws-sdk/client-s3');
10161
+ // List objects with pagination
8492
10162
  const listResponse = await this.s3Client.send(new ListObjectsV2Command({
8493
10163
  Bucket: this.bucketName,
8494
- Prefix: this.verbPrefix
10164
+ Prefix: this.verbPrefix,
10165
+ MaxKeys: limit,
10166
+ ContinuationToken: options.cursor
8495
10167
  }));
10168
+ // If listResponse is null/undefined or there are no objects, return an empty result
10169
+ if (!listResponse ||
10170
+ !listResponse.Contents ||
10171
+ listResponse.Contents.length === 0) {
10172
+ return {
10173
+ edges: [],
10174
+ hasMore: false
10175
+ };
10176
+ }
10177
+ // Extract edge IDs from the keys
10178
+ const edgeIds = listResponse.Contents
10179
+ .filter((object) => object && object.Key)
10180
+ .map((object) => object.Key.replace(this.verbPrefix, '').replace('.json', ''));
10181
+ // Use the cache manager to get edges efficiently
8496
10182
  const edges = [];
8497
- // If there are no objects, return an empty array
8498
- if (!listResponse.Contents || listResponse.Contents.length === 0) {
8499
- return edges;
10183
+ if (useCache) {
10184
+ // Get edges from cache manager
10185
+ const cachedEdges = await this.verbCacheManager.getMany(edgeIds);
10186
+ // Add edges to result in the same order as edgeIds
10187
+ for (const id of edgeIds) {
10188
+ const edge = cachedEdges.get(id);
10189
+ if (edge) {
10190
+ // Apply filtering if needed
10191
+ if (this.filterEdge(edge, filter)) {
10192
+ edges.push(edge);
10193
+ }
10194
+ }
10195
+ }
8500
10196
  }
8501
- // Get each edge
8502
- const edgePromises = listResponse.Contents.map(async (object) => {
8503
- try {
8504
- // Extract edge ID from the key (remove prefix and .json extension)
8505
- const edgeId = object.Key.replace(this.verbPrefix, '').replace('.json', '');
8506
- // Get the edge data
8507
- const response = await this.s3Client.send(new GetObjectCommand({
8508
- Bucket: this.bucketName,
8509
- Key: object.Key
10197
+ else {
10198
+ // Get edges directly from S3 without using cache
10199
+ // Process in smaller batches to reduce memory usage
10200
+ const batchSize = 50;
10201
+ const batches = [];
10202
+ // Split into batches
10203
+ for (let i = 0; i < edgeIds.length; i += batchSize) {
10204
+ const batch = edgeIds.slice(i, i + batchSize);
10205
+ batches.push(batch);
10206
+ }
10207
+ // Process each batch sequentially
10208
+ for (const batch of batches) {
10209
+ const batchEdges = await Promise.all(batch.map(async (id) => {
10210
+ try {
10211
+ const edge = await this.getVerb_internal(id);
10212
+ // Apply filtering if needed
10213
+ if (edge && this.filterEdge(edge, filter)) {
10214
+ return edge;
10215
+ }
10216
+ return null;
10217
+ }
10218
+ catch (error) {
10219
+ return null;
10220
+ }
8510
10221
  }));
8511
- // Convert the response body to a string
8512
- const bodyContents = await response.Body.transformToString();
8513
- const parsedEdge = JSON.parse(bodyContents);
8514
- // Convert serialized connections back to Map<number, Set<string>>
8515
- const connections = new Map();
8516
- for (const [level, nodeIds] of Object.entries(parsedEdge.connections)) {
8517
- connections.set(Number(level), new Set(nodeIds));
10222
+ // Add non-null edges to result
10223
+ for (const edge of batchEdges) {
10224
+ if (edge) {
10225
+ edges.push(edge);
10226
+ }
8518
10227
  }
8519
- // Create default timestamp if not present
8520
- const defaultTimestamp = {
8521
- seconds: Math.floor(Date.now() / 1000),
8522
- nanoseconds: (Date.now() % 1000) * 1000000
8523
- };
8524
- // Create default createdBy if not present
8525
- const defaultCreatedBy = {
8526
- augmentation: 'unknown',
8527
- version: '1.0'
8528
- };
8529
- return {
8530
- id: parsedEdge.id,
8531
- vector: parsedEdge.vector,
8532
- connections,
8533
- sourceId: parsedEdge.sourceId || parsedEdge.source,
8534
- targetId: parsedEdge.targetId || parsedEdge.target,
8535
- source: parsedEdge.sourceId || parsedEdge.source,
8536
- target: parsedEdge.targetId || parsedEdge.target,
8537
- verb: parsedEdge.type || parsedEdge.verb,
8538
- type: parsedEdge.type || parsedEdge.verb,
8539
- weight: parsedEdge.weight || 1.0,
8540
- metadata: parsedEdge.metadata || {},
8541
- createdAt: parsedEdge.createdAt || defaultTimestamp,
8542
- updatedAt: parsedEdge.updatedAt || defaultTimestamp,
8543
- createdBy: parsedEdge.createdBy || defaultCreatedBy
8544
- };
8545
- }
8546
- catch (error) {
8547
- console.error(`Error getting edge from ${object.Key}:`, error);
8548
- return null;
8549
10228
  }
8550
- });
8551
- // Wait for all promises to resolve and filter out nulls
8552
- const resolvedEdges = await Promise.all(edgePromises);
8553
- return resolvedEdges.filter((edge) => edge !== null);
10229
+ }
10230
+ // Determine if there are more edges
10231
+ const hasMore = !!listResponse.IsTruncated;
10232
+ // Set next cursor if there are more edges
10233
+ const nextCursor = listResponse.NextContinuationToken;
10234
+ return {
10235
+ edges,
10236
+ hasMore,
10237
+ nextCursor
10238
+ };
8554
10239
  }
8555
10240
  catch (error) {
8556
- console.error('Failed to get all edges:', error);
8557
- return [];
10241
+ console.error('Failed to get edges with pagination:', error);
10242
+ return {
10243
+ edges: [],
10244
+ hasMore: false
10245
+ };
10246
+ }
10247
+ }
10248
+ /**
10249
+ * Filter an edge based on filter criteria
10250
+ * @param edge The edge to filter
10251
+ * @param filter The filter criteria
10252
+ * @returns True if the edge matches the filter, false otherwise
10253
+ */
10254
+ filterEdge(edge, filter) {
10255
+ // If no filter, include all edges
10256
+ if (!filter.sourceId && !filter.targetId && !filter.type) {
10257
+ return true;
10258
+ }
10259
+ // Filter by source ID
10260
+ if (filter.sourceId && edge.sourceId !== filter.sourceId) {
10261
+ return false;
10262
+ }
10263
+ // Filter by target ID
10264
+ if (filter.targetId && edge.targetId !== filter.targetId) {
10265
+ return false;
10266
+ }
10267
+ // Filter by type
10268
+ if (filter.type && edge.type !== filter.type) {
10269
+ return false;
10270
+ }
10271
+ return true;
10272
+ }
10273
+ /**
10274
+ * Get verbs with pagination
10275
+ * @param options Pagination options
10276
+ * @returns Promise that resolves to a paginated result of verbs
10277
+ */
10278
+ async getVerbsWithPagination(options = {}) {
10279
+ await this.ensureInitialized();
10280
+ // Convert filter to edge filter format
10281
+ const edgeFilter = {};
10282
+ if (options.filter) {
10283
+ // Handle sourceId filter
10284
+ if (options.filter.sourceId) {
10285
+ edgeFilter.sourceId = Array.isArray(options.filter.sourceId)
10286
+ ? options.filter.sourceId[0]
10287
+ : options.filter.sourceId;
10288
+ }
10289
+ // Handle targetId filter
10290
+ if (options.filter.targetId) {
10291
+ edgeFilter.targetId = Array.isArray(options.filter.targetId)
10292
+ ? options.filter.targetId[0]
10293
+ : options.filter.targetId;
10294
+ }
10295
+ // Handle verbType filter
10296
+ if (options.filter.verbType) {
10297
+ edgeFilter.type = Array.isArray(options.filter.verbType)
10298
+ ? options.filter.verbType[0]
10299
+ : options.filter.verbType;
10300
+ }
8558
10301
  }
10302
+ // Get edges with pagination
10303
+ const result = await this.getEdgesWithPagination({
10304
+ limit: options.limit,
10305
+ cursor: options.cursor,
10306
+ useCache: true,
10307
+ filter: edgeFilter
10308
+ });
10309
+ // Convert edges to verbs (they're the same in this implementation)
10310
+ return {
10311
+ items: result.edges,
10312
+ hasMore: result.hasMore,
10313
+ nextCursor: result.nextCursor
10314
+ };
8559
10315
  }
8560
10316
  /**
8561
10317
  * Get verbs by source (internal implementation)
@@ -8722,9 +10478,10 @@ class S3CompatibleStorage extends BaseStorage {
8722
10478
  // In AWS SDK, this would be error.name === 'NoSuchKey'
8723
10479
  // In our mock, we might get different error types
8724
10480
  if (error.name === 'NoSuchKey' ||
8725
- (error.message && (error.message.includes('NoSuchKey') ||
8726
- error.message.includes('not found') ||
8727
- error.message.includes('does not exist')))) {
10481
+ (error.message &&
10482
+ (error.message.includes('NoSuchKey') ||
10483
+ error.message.includes('not found') ||
10484
+ error.message.includes('does not exist')))) {
8728
10485
  console.log(`Metadata not found for ${id}`);
8729
10486
  return null;
8730
10487
  }
@@ -8749,7 +10506,9 @@ class S3CompatibleStorage extends BaseStorage {
8749
10506
  Prefix: prefix
8750
10507
  }));
8751
10508
  // If there are no objects or Contents is undefined, return
8752
- if (!listResponse || !listResponse.Contents || listResponse.Contents.length === 0) {
10509
+ if (!listResponse ||
10510
+ !listResponse.Contents ||
10511
+ listResponse.Contents.length === 0) {
8753
10512
  return;
8754
10513
  }
8755
10514
  // Delete each object
@@ -8799,15 +10558,20 @@ class S3CompatibleStorage extends BaseStorage {
8799
10558
  Prefix: prefix
8800
10559
  }));
8801
10560
  // If there are no objects or Contents is undefined, return
8802
- if (!listResponse || !listResponse.Contents || listResponse.Contents.length === 0) {
10561
+ if (!listResponse ||
10562
+ !listResponse.Contents ||
10563
+ listResponse.Contents.length === 0) {
8803
10564
  return { size, count };
8804
10565
  }
8805
10566
  // Calculate size and count
8806
10567
  for (const object of listResponse.Contents) {
8807
10568
  if (object) {
8808
10569
  // Ensure Size is a number
8809
- const objectSize = typeof object.Size === 'number' ? object.Size :
8810
- (object.Size ? parseInt(object.Size.toString(), 10) : 0);
10570
+ const objectSize = typeof object.Size === 'number'
10571
+ ? object.Size
10572
+ : object.Size
10573
+ ? parseInt(object.Size.toString(), 10)
10574
+ : 0;
8811
10575
  // Add to total size and increment count
8812
10576
  size += objectSize || 0;
8813
10577
  count++;
@@ -8826,12 +10590,17 @@ class S3CompatibleStorage extends BaseStorage {
8826
10590
  const verbsResult = await calculateSizeAndCount(this.verbPrefix);
8827
10591
  const metadataResult = await calculateSizeAndCount(this.metadataPrefix);
8828
10592
  const indexResult = await calculateSizeAndCount(this.indexPrefix);
8829
- totalSize = nounsResult.size + verbsResult.size + metadataResult.size + indexResult.size;
10593
+ totalSize =
10594
+ nounsResult.size +
10595
+ verbsResult.size +
10596
+ metadataResult.size +
10597
+ indexResult.size;
8830
10598
  nodeCount = nounsResult.count;
8831
10599
  edgeCount = verbsResult.count;
8832
10600
  metadataCount = metadataResult.count;
8833
10601
  // Ensure we have a minimum size if we have objects
8834
- if (totalSize === 0 && (nodeCount > 0 || edgeCount > 0 || metadataCount > 0)) {
10602
+ if (totalSize === 0 &&
10603
+ (nodeCount > 0 || edgeCount > 0 || metadataCount > 0)) {
8835
10604
  console.log(`Setting minimum size for ${nodeCount} nodes, ${edgeCount} edges, and ${metadataCount} metadata objects`);
8836
10605
  totalSize = (nodeCount + edgeCount + metadataCount) * 100; // Arbitrary size per object
8837
10606
  }
@@ -8865,7 +10634,8 @@ class S3CompatibleStorage extends BaseStorage {
8865
10634
  const metadata = JSON.parse(bodyContents);
8866
10635
  // Count by noun type
8867
10636
  if (metadata && metadata.noun) {
8868
- nounTypeCounts[metadata.noun] = (nounTypeCounts[metadata.noun] || 0) + 1;
10637
+ nounTypeCounts[metadata.noun] =
10638
+ (nounTypeCounts[metadata.noun] || 0) + 1;
8869
10639
  }
8870
10640
  }
8871
10641
  catch (parseError) {
@@ -9046,17 +10816,23 @@ class S3CompatibleStorage extends BaseStorage {
9046
10816
  */
9047
10817
  mergeStatistics(storageStats, localStats) {
9048
10818
  // Merge noun counts by taking the maximum of each type
9049
- const mergedNounCount = { ...storageStats.nounCount };
10819
+ const mergedNounCount = {
10820
+ ...storageStats.nounCount
10821
+ };
9050
10822
  for (const [type, count] of Object.entries(localStats.nounCount)) {
9051
10823
  mergedNounCount[type] = Math.max(mergedNounCount[type] || 0, count);
9052
10824
  }
9053
10825
  // Merge verb counts by taking the maximum of each type
9054
- const mergedVerbCount = { ...storageStats.verbCount };
10826
+ const mergedVerbCount = {
10827
+ ...storageStats.verbCount
10828
+ };
9055
10829
  for (const [type, count] of Object.entries(localStats.verbCount)) {
9056
10830
  mergedVerbCount[type] = Math.max(mergedVerbCount[type] || 0, count);
9057
10831
  }
9058
10832
  // Merge metadata counts by taking the maximum of each type
9059
- const mergedMetadataCount = { ...storageStats.metadataCount };
10833
+ const mergedMetadataCount = {
10834
+ ...storageStats.metadataCount
10835
+ };
9060
10836
  for (const [type, count] of Object.entries(localStats.metadataCount)) {
9061
10837
  mergedMetadataCount[type] = Math.max(mergedMetadataCount[type] || 0, count);
9062
10838
  }
@@ -9169,9 +10945,10 @@ class S3CompatibleStorage extends BaseStorage {
9169
10945
  catch (error) {
9170
10946
  // Check if this is a "NoSuchKey" error (object doesn't exist)
9171
10947
  if (error.name === 'NoSuchKey' ||
9172
- (error.message && (error.message.includes('NoSuchKey') ||
9173
- error.message.includes('not found') ||
9174
- error.message.includes('does not exist')))) {
10948
+ (error.message &&
10949
+ (error.message.includes('NoSuchKey') ||
10950
+ error.message.includes('not found') ||
10951
+ error.message.includes('does not exist')))) {
9175
10952
  return null;
9176
10953
  }
9177
10954
  // For other errors, propagate them
@@ -9200,8 +10977,8 @@ class S3CompatibleStorage extends BaseStorage {
9200
10977
  Body: JSON.stringify(entryWithInstance),
9201
10978
  ContentType: 'application/json',
9202
10979
  Metadata: {
9203
- 'timestamp': entry.timestamp.toString(),
9204
- 'operation': entry.operation,
10980
+ timestamp: entry.timestamp.toString(),
10981
+ operation: entry.operation,
9205
10982
  'entity-type': entry.entityType,
9206
10983
  'entity-id': entry.entityId
9207
10984
  }
@@ -9371,7 +11148,7 @@ class S3CompatibleStorage extends BaseStorage {
9371
11148
  this.activeLocks.add(lockKey);
9372
11149
  // Schedule automatic cleanup when lock expires
9373
11150
  setTimeout(() => {
9374
- this.releaseLock(lockKey, lockValue).catch(error => {
11151
+ this.releaseLock(lockKey, lockValue).catch((error) => {
9375
11152
  console.warn(`Failed to auto-release expired lock ${lockKey}:`, error);
9376
11153
  });
9377
11154
  }, ttl);
@@ -10302,7 +12079,8 @@ async function createStorage(options = {}) {
10302
12079
  secretAccessKey: options.s3Storage.secretAccessKey,
10303
12080
  sessionToken: options.s3Storage.sessionToken,
10304
12081
  serviceType: 's3',
10305
- operationConfig: options.operationConfig
12082
+ operationConfig: options.operationConfig,
12083
+ cacheConfig: options.cacheConfig
10306
12084
  });
10307
12085
  }
10308
12086
  else {
@@ -10317,7 +12095,8 @@ async function createStorage(options = {}) {
10317
12095
  accountId: options.r2Storage.accountId,
10318
12096
  accessKeyId: options.r2Storage.accessKeyId,
10319
12097
  secretAccessKey: options.r2Storage.secretAccessKey,
10320
- serviceType: 'r2'
12098
+ serviceType: 'r2',
12099
+ cacheConfig: options.cacheConfig
10321
12100
  });
10322
12101
  }
10323
12102
  else {
@@ -10333,7 +12112,8 @@ async function createStorage(options = {}) {
10333
12112
  endpoint: options.gcsStorage.endpoint || 'https://storage.googleapis.com',
10334
12113
  accessKeyId: options.gcsStorage.accessKeyId,
10335
12114
  secretAccessKey: options.gcsStorage.secretAccessKey,
10336
- serviceType: 'gcs'
12115
+ serviceType: 'gcs',
12116
+ cacheConfig: options.cacheConfig
10337
12117
  });
10338
12118
  }
10339
12119
  else {
@@ -10354,7 +12134,8 @@ async function createStorage(options = {}) {
10354
12134
  endpoint: options.customS3Storage.endpoint,
10355
12135
  accessKeyId: options.customS3Storage.accessKeyId,
10356
12136
  secretAccessKey: options.customS3Storage.secretAccessKey,
10357
- serviceType: options.customS3Storage.serviceType || 'custom'
12137
+ serviceType: options.customS3Storage.serviceType || 'custom',
12138
+ cacheConfig: options.cacheConfig
10358
12139
  });
10359
12140
  }
10360
12141
  // If R2 storage is specified, use it
@@ -10365,7 +12146,8 @@ async function createStorage(options = {}) {
10365
12146
  accountId: options.r2Storage.accountId,
10366
12147
  accessKeyId: options.r2Storage.accessKeyId,
10367
12148
  secretAccessKey: options.r2Storage.secretAccessKey,
10368
- serviceType: 'r2'
12149
+ serviceType: 'r2',
12150
+ cacheConfig: options.cacheConfig
10369
12151
  });
10370
12152
  }
10371
12153
  // If S3 storage is specified, use it
@@ -10377,7 +12159,8 @@ async function createStorage(options = {}) {
10377
12159
  accessKeyId: options.s3Storage.accessKeyId,
10378
12160
  secretAccessKey: options.s3Storage.secretAccessKey,
10379
12161
  sessionToken: options.s3Storage.sessionToken,
10380
- serviceType: 's3'
12162
+ serviceType: 's3',
12163
+ cacheConfig: options.cacheConfig
10381
12164
  });
10382
12165
  }
10383
12166
  // If GCS storage is specified, use it
@@ -10389,7 +12172,8 @@ async function createStorage(options = {}) {
10389
12172
  endpoint: options.gcsStorage.endpoint || 'https://storage.googleapis.com',
10390
12173
  accessKeyId: options.gcsStorage.accessKeyId,
10391
12174
  secretAccessKey: options.gcsStorage.secretAccessKey,
10392
- serviceType: 'gcs'
12175
+ serviceType: 'gcs',
12176
+ cacheConfig: options.cacheConfig
10393
12177
  });
10394
12178
  }
10395
12179
  // Auto-detect the best storage adapter based on the environment
@@ -12893,7 +14677,12 @@ class BrainyData {
12893
14677
  // Set distance function
12894
14678
  this.distanceFunction = config.distanceFunction || cosineDistance$1;
12895
14679
  // Always use the optimized HNSW index implementation
12896
- this.index = new HNSWIndexOptimized(config.hnsw || {}, this.distanceFunction, config.storageAdapter || null);
14680
+ // Configure HNSW with disk-based storage when a storage adapter is provided
14681
+ const hnswConfig = config.hnsw || {};
14682
+ if (config.storageAdapter) {
14683
+ hnswConfig.useDiskBasedIndex = true;
14684
+ }
14685
+ this.index = new HNSWIndexOptimized(hnswConfig, this.distanceFunction, config.storageAdapter || null);
12897
14686
  this.useOptimizedIndex = true;
12898
14687
  // Set storage if provided, otherwise it will be initialized in init()
12899
14688
  this.storage = config.storageAdapter || null;
@@ -12918,6 +14707,8 @@ class BrainyData {
12918
14707
  config.storage?.requestPersistentStorage || false;
12919
14708
  // Set read-only flag
12920
14709
  this.readOnly = config.readOnly || false;
14710
+ // Set lazy loading in read-only mode flag
14711
+ this.lazyLoadInReadOnlyMode = config.lazyLoadInReadOnlyMode || false;
12921
14712
  // Set write-only flag
12922
14713
  this.writeOnly = config.writeOnly || false;
12923
14714
  // Validate that readOnly and writeOnly are not both true
@@ -12940,6 +14731,27 @@ class BrainyData {
12940
14731
  ...config.realtimeUpdates
12941
14732
  };
12942
14733
  }
14734
+ // Initialize cache configuration with intelligent defaults
14735
+ // These defaults are automatically tuned based on environment and dataset size
14736
+ this.cacheConfig = {
14737
+ // Enable auto-tuning by default for optimal performance
14738
+ autoTune: true,
14739
+ // Set auto-tune interval to 1 minute for faster initial optimization
14740
+ // This is especially important for large datasets
14741
+ autoTuneInterval: 60000, // 1 minute
14742
+ // Read-only mode specific optimizations
14743
+ readOnlyMode: {
14744
+ // Use aggressive prefetching in read-only mode for better performance
14745
+ prefetchStrategy: 'aggressive'
14746
+ }
14747
+ };
14748
+ // Override defaults with user-provided configuration if available
14749
+ if (config.cache) {
14750
+ this.cacheConfig = {
14751
+ ...this.cacheConfig,
14752
+ ...config.cache
14753
+ };
14754
+ }
12943
14755
  }
12944
14756
  /**
12945
14757
  * Check if the database is in read-only mode and throw an error if it is
@@ -13232,6 +15044,18 @@ class BrainyData {
13232
15044
  return 'default';
13233
15045
  }
13234
15046
  }
15047
+ /**
15048
+ * Get the service name from options or fallback to current augmentation
15049
+ * This provides a consistent way to handle service names across all methods
15050
+ * @param options Options object that may contain a service property
15051
+ * @returns The service name to use for operations
15052
+ */
15053
+ getServiceName(options) {
15054
+ if (options?.service) {
15055
+ return options.service;
15056
+ }
15057
+ return this.getCurrentAugmentation();
15058
+ }
13235
15059
  /**
13236
15060
  * Initialize the database
13237
15061
  * Loads existing data from storage if available
@@ -13284,6 +15108,14 @@ class BrainyData {
13284
15108
  ...this.storageConfig,
13285
15109
  requestPersistentStorage: this.requestPersistentStorage
13286
15110
  };
15111
+ // Add cache configuration if provided
15112
+ if (this.cacheConfig) {
15113
+ storageOptions.cacheConfig = {
15114
+ ...this.cacheConfig,
15115
+ // Pass read-only flag to optimize cache behavior
15116
+ readOnly: this.readOnly
15117
+ };
15118
+ }
13287
15119
  // Ensure s3Storage has all required fields if it's provided
13288
15120
  if (storageOptions.s3Storage) {
13289
15121
  // Only include s3Storage if all required fields are present
@@ -13314,6 +15146,14 @@ class BrainyData {
13314
15146
  console.log('Database is in write-only mode, skipping index loading');
13315
15147
  }
13316
15148
  }
15149
+ else if (this.readOnly && this.lazyLoadInReadOnlyMode) {
15150
+ // In read-only mode with lazy loading enabled, skip loading all nouns initially
15151
+ if (this.loggingConfig?.verbose) {
15152
+ console.log('Database is in read-only mode with lazy loading enabled, skipping initial full load');
15153
+ }
15154
+ // Just initialize an empty index
15155
+ this.index.clear();
15156
+ }
13317
15157
  else {
13318
15158
  // Load all nouns from storage
13319
15159
  const nouns = await this.storage.getAllNouns();
@@ -13413,7 +15253,33 @@ class BrainyData {
13413
15253
  else {
13414
15254
  // Input needs to be vectorized
13415
15255
  try {
13416
- vector = await this.embeddingFunction(vectorOrData);
15256
+ // Check if input is a JSON object and process it specially
15257
+ if (typeof vectorOrData === 'object' &&
15258
+ vectorOrData !== null &&
15259
+ !Array.isArray(vectorOrData)) {
15260
+ // Process JSON object for better vectorization
15261
+ const preparedText = prepareJsonForVectorization(vectorOrData, {
15262
+ // Prioritize common name/title fields if they exist
15263
+ priorityFields: [
15264
+ 'name',
15265
+ 'title',
15266
+ 'company',
15267
+ 'organization',
15268
+ 'description',
15269
+ 'summary'
15270
+ ]
15271
+ });
15272
+ vector = await this.embeddingFunction(preparedText);
15273
+ // Track field names for this JSON document
15274
+ const service = this.getServiceName(options);
15275
+ if (this.storage) {
15276
+ await this.storage.trackFieldNames(vectorOrData, service);
15277
+ }
15278
+ }
15279
+ else {
15280
+ // Use standard embedding for non-JSON data
15281
+ vector = await this.embeddingFunction(vectorOrData);
15282
+ }
13417
15283
  }
13418
15284
  catch (embedError) {
13419
15285
  throw new Error(`Failed to vectorize data: ${embedError}`);
@@ -13442,7 +15308,7 @@ class BrainyData {
13442
15308
  // Save noun to storage
13443
15309
  await this.storage.saveNoun(noun);
13444
15310
  // Track noun statistics
13445
- const service = options.service || this.getCurrentAugmentation();
15311
+ const service = this.getServiceName(options);
13446
15312
  await this.storage.incrementStatistic('noun', service);
13447
15313
  // Save metadata if provided and not empty
13448
15314
  if (metadata !== undefined) {
@@ -13495,7 +15361,7 @@ class BrainyData {
13495
15361
  }
13496
15362
  await this.storage.saveMetadata(id, metadataToSave);
13497
15363
  // Track metadata statistics
13498
- const metadataService = options.service || this.getCurrentAugmentation();
15364
+ const metadataService = this.getServiceName(options);
13499
15365
  await this.storage.incrementStatistic('metadata', metadataService);
13500
15366
  }
13501
15367
  }
@@ -13734,6 +15600,35 @@ class BrainyData {
13734
15600
  }
13735
15601
  // If no noun types specified, search all nouns
13736
15602
  if (!nounTypes || nounTypes.length === 0) {
15603
+ // Check if we're in readonly mode with lazy loading and the index is empty
15604
+ const indexSize = this.index.getNouns().size;
15605
+ if (this.readOnly && this.lazyLoadInReadOnlyMode && indexSize === 0) {
15606
+ if (this.loggingConfig?.verbose) {
15607
+ console.log('Lazy loading mode: Index is empty, loading nodes for search...');
15608
+ }
15609
+ // In lazy loading mode, we need to load some nodes to search
15610
+ // Instead of loading all nodes, we'll load a subset of nodes
15611
+ // Since we don't have a specialized method to get top nodes for a query,
15612
+ // we'll load a limited number of nodes from storage
15613
+ const nouns = await this.storage.getAllNouns();
15614
+ const limitedNouns = nouns.slice(0, Math.min(nouns.length, k * 10)); // Get 10x more nodes than needed
15615
+ // Add these nodes to the index
15616
+ for (const node of limitedNouns) {
15617
+ // Check if the vector dimensions match the expected dimensions
15618
+ if (node.vector.length !== this._dimensions) {
15619
+ console.warn(`Skipping node ${node.id} due to dimension mismatch: expected ${this._dimensions}, got ${node.vector.length}`);
15620
+ continue;
15621
+ }
15622
+ // Add to index
15623
+ await this.index.addItem({
15624
+ id: node.id,
15625
+ vector: node.vector
15626
+ });
15627
+ }
15628
+ if (this.loggingConfig?.verbose) {
15629
+ console.log(`Lazy loading mode: Added ${limitedNouns.length} nodes to index for search`);
15630
+ }
15631
+ }
13737
15632
  // Search in the index
13738
15633
  const results = await this.index.search(queryVector, k);
13739
15634
  // Get metadata for each result
@@ -13887,12 +15782,43 @@ class BrainyData {
13887
15782
  }
13888
15783
  // Check if database is in write-only mode
13889
15784
  this.checkWriteOnly();
13890
- // If input is a string and not a vector, automatically vectorize it
15785
+ // Process the query input for vectorization
13891
15786
  let queryToUse = queryVectorOrData;
15787
+ // Handle string queries
13892
15788
  if (typeof queryVectorOrData === 'string' && !options.forceEmbed) {
13893
15789
  queryToUse = await this.embed(queryVectorOrData);
13894
15790
  options.forceEmbed = false; // Already embedded, don't force again
13895
15791
  }
15792
+ // Handle JSON object queries with special processing
15793
+ else if (typeof queryVectorOrData === 'object' &&
15794
+ queryVectorOrData !== null &&
15795
+ !Array.isArray(queryVectorOrData) &&
15796
+ !options.forceEmbed) {
15797
+ // If searching within a specific field
15798
+ if (options.searchField) {
15799
+ // Extract text from the specific field
15800
+ const fieldText = extractFieldFromJson(queryVectorOrData, options.searchField);
15801
+ if (fieldText) {
15802
+ queryToUse = await this.embeddingFunction(fieldText);
15803
+ options.forceEmbed = false; // Already embedded, don't force again
15804
+ }
15805
+ }
15806
+ // Otherwise process the entire object with priority fields
15807
+ else {
15808
+ const preparedText = prepareJsonForVectorization(queryVectorOrData, {
15809
+ priorityFields: options.priorityFields || [
15810
+ 'name',
15811
+ 'title',
15812
+ 'company',
15813
+ 'organization',
15814
+ 'description',
15815
+ 'summary'
15816
+ ]
15817
+ });
15818
+ queryToUse = await this.embeddingFunction(preparedText);
15819
+ options.forceEmbed = false; // Already embedded, don't force again
15820
+ }
15821
+ }
13896
15822
  // If noun types are specified, use searchByNounTypes
13897
15823
  let searchResults;
13898
15824
  if (options.nounTypes && options.nounTypes.length > 0) {
@@ -14094,13 +16020,17 @@ class BrainyData {
14094
16020
  return false;
14095
16021
  // Filter by noun type
14096
16022
  if (filter.nounType) {
14097
- const nounTypes = Array.isArray(filter.nounType) ? filter.nounType : [filter.nounType];
16023
+ const nounTypes = Array.isArray(filter.nounType)
16024
+ ? filter.nounType
16025
+ : [filter.nounType];
14098
16026
  if (!nounTypes.includes(metadata.noun))
14099
16027
  return false;
14100
16028
  }
14101
16029
  // Filter by service
14102
16030
  if (filter.service && metadata.service) {
14103
- const services = Array.isArray(filter.service) ? filter.service : [filter.service];
16031
+ const services = Array.isArray(filter.service)
16032
+ ? filter.service
16033
+ : [filter.service];
14104
16034
  if (!services.includes(metadata.service))
14105
16035
  return false;
14106
16036
  }
@@ -14185,7 +16115,7 @@ class BrainyData {
14185
16115
  // Remove from storage
14186
16116
  await this.storage.deleteNoun(actualId);
14187
16117
  // Track deletion statistics
14188
- const service = options.service || 'default';
16118
+ const service = this.getServiceName(options);
14189
16119
  await this.storage.decrementStatistic('noun', service);
14190
16120
  // Try to remove metadata (ignore errors)
14191
16121
  try {
@@ -14512,7 +16442,7 @@ class BrainyData {
14512
16442
  // Save verb to storage
14513
16443
  await this.storage.saveVerb(verb);
14514
16444
  // Track verb statistics
14515
- const serviceForStats = options.service || 'default';
16445
+ const serviceForStats = this.getServiceName(options);
14516
16446
  await this.storage.incrementStatistic('verb', serviceForStats);
14517
16447
  // Update HNSW index size (excluding verbs)
14518
16448
  await this.storage.updateHnswIndexSize(await this.getNounCount());
@@ -14660,7 +16590,7 @@ class BrainyData {
14660
16590
  // Remove from storage
14661
16591
  await this.storage.deleteVerb(id);
14662
16592
  // Track deletion statistics
14663
- const service = options.service || 'default';
16593
+ const service = this.getServiceName(options);
14664
16594
  await this.storage.decrementStatistic('verb', service);
14665
16595
  return true;
14666
16596
  }
@@ -15588,14 +17518,19 @@ class BrainyData {
15588
17518
  console.log('Reconstructing HNSW index from backup data...');
15589
17519
  // Create a new index with the restored configuration
15590
17520
  // Always use the optimized implementation for consistency
15591
- this.index = new HNSWIndexOptimized(data.hnswIndex.config, this.distanceFunction, this.storage);
17521
+ // Configure HNSW with disk-based storage when a storage adapter is provided
17522
+ const hnswConfig = data.hnswIndex.config || {};
17523
+ if (this.storage) {
17524
+ hnswConfig.useDiskBasedIndex = true;
17525
+ }
17526
+ this.index = new HNSWIndexOptimized(hnswConfig, this.distanceFunction, this.storage);
15592
17527
  this.useOptimizedIndex = true;
15593
17528
  // For the storage-adapter-coverage test, we want the index to be empty
15594
17529
  // after restoration, as specified in the test expectation
15595
17530
  // This is a special case for the test, in a real application we would
15596
17531
  // re-add all nouns to the index
15597
17532
  const isTestEnvironment = "production" === 'test' || process.env.VITEST;
15598
- const isStorageTest = data.nouns.some(noun => noun.metadata &&
17533
+ const isStorageTest = data.nouns.some((noun) => noun.metadata &&
15599
17534
  typeof noun.metadata === 'object' &&
15600
17535
  'text' in noun.metadata &&
15601
17536
  typeof noun.metadata.text === 'string' &&
@@ -15742,6 +17677,82 @@ class BrainyData {
15742
17677
  throw new Error(`Failed to generate random graph: ${error}`);
15743
17678
  }
15744
17679
  }
17680
+ /**
17681
+ * Get available field names by service
17682
+ * This helps users understand what fields are available for searching from different data sources
17683
+ * @returns Record of field names by service
17684
+ */
17685
+ async getAvailableFieldNames() {
17686
+ await this.ensureInitialized();
17687
+ if (!this.storage) {
17688
+ return {};
17689
+ }
17690
+ return this.storage.getAvailableFieldNames();
17691
+ }
17692
+ /**
17693
+ * Get standard field mappings
17694
+ * This helps users understand how fields from different services map to standard field names
17695
+ * @returns Record of standard field mappings
17696
+ */
17697
+ async getStandardFieldMappings() {
17698
+ await this.ensureInitialized();
17699
+ if (!this.storage) {
17700
+ return {};
17701
+ }
17702
+ return this.storage.getStandardFieldMappings();
17703
+ }
17704
+ /**
17705
+ * Search using a standard field name
17706
+ * This allows searching across multiple services using a standardized field name
17707
+ * @param standardField The standard field name to search in
17708
+ * @param searchTerm The term to search for
17709
+ * @param k Number of results to return
17710
+ * @param options Additional search options
17711
+ * @returns Array of search results
17712
+ */
17713
+ async searchByStandardField(standardField, searchTerm, k = 10, options = {}) {
17714
+ await this.ensureInitialized();
17715
+ // Check if database is in write-only mode
17716
+ this.checkWriteOnly();
17717
+ // Get standard field mappings
17718
+ const standardFieldMappings = await this.getStandardFieldMappings();
17719
+ // If the standard field doesn't exist, return empty results
17720
+ if (!standardFieldMappings[standardField]) {
17721
+ return [];
17722
+ }
17723
+ // Filter by services if specified
17724
+ let serviceFieldMappings = standardFieldMappings[standardField];
17725
+ if (options.services && options.services.length > 0) {
17726
+ const filteredMappings = {};
17727
+ for (const service of options.services) {
17728
+ if (serviceFieldMappings[service]) {
17729
+ filteredMappings[service] = serviceFieldMappings[service];
17730
+ }
17731
+ }
17732
+ serviceFieldMappings = filteredMappings;
17733
+ }
17734
+ // If no mappings after filtering, return empty results
17735
+ if (Object.keys(serviceFieldMappings).length === 0) {
17736
+ return [];
17737
+ }
17738
+ // Search in each service's fields and combine results
17739
+ const allResults = [];
17740
+ for (const [service, fieldNames] of Object.entries(serviceFieldMappings)) {
17741
+ for (const fieldName of fieldNames) {
17742
+ // Search using the specific field name for this service
17743
+ const results = await this.search(searchTerm, k, {
17744
+ searchField: fieldName,
17745
+ service,
17746
+ includeVerbs: options.includeVerbs,
17747
+ searchMode: options.searchMode
17748
+ });
17749
+ // Add results to the combined list
17750
+ allResults.push(...results);
17751
+ }
17752
+ }
17753
+ // Sort by score and limit to k results
17754
+ return allResults.sort((a, b) => b.score - a.score).slice(0, k);
17755
+ }
15745
17756
  }
15746
17757
 
15747
17758
  /**