@soulcraft/brainy 0.31.0 → 0.33.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +552 -357
- package/dist/brainyData.d.ts +123 -0
- package/dist/coreTypes.d.ts +27 -0
- package/dist/storage/adapters/baseStorageAdapter.d.ts +16 -0
- package/dist/storage/adapters/baseStorageAdapter.d.ts.map +1 -1
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +75 -0
- package/dist/storage/adapters/s3CompatibleStorage.d.ts.map +1 -1
- package/dist/storage/baseStorage.d.ts +4 -0
- package/dist/storage/baseStorage.d.ts.map +1 -1
- package/dist/storage/cacheManager.d.ts +264 -0
- package/dist/storage/cacheManager.d.ts.map +1 -0
- package/dist/storage/storageFactory.d.ts +44 -0
- package/dist/storage/storageFactory.d.ts.map +1 -1
- package/dist/unified.js +2341 -330
- package/dist/unified.min.js +748 -748
- package/dist/utils/fieldNameTracking.d.ts +21 -0
- package/dist/utils/fieldNameTracking.d.ts.map +1 -0
- package/dist/utils/index.d.ts +2 -0
- package/dist/utils/index.d.ts.map +1 -1
- package/dist/utils/jsonProcessing.d.ts +43 -0
- package/dist/utils/jsonProcessing.d.ts.map +1 -0
- package/package.json +1 -1
package/dist/unified.js
CHANGED
|
@@ -4432,6 +4432,275 @@ async function getStatistics(instance, options = {}) {
|
|
|
4432
4432
|
}
|
|
4433
4433
|
}
|
|
4434
4434
|
|
|
4435
|
+
/**
|
|
4436
|
+
* Utility functions for processing JSON documents for vectorization and search
|
|
4437
|
+
*/
|
|
4438
|
+
/**
|
|
4439
|
+
* Extracts text from a JSON object for vectorization
|
|
4440
|
+
* This function recursively processes the JSON object and extracts text from all fields
|
|
4441
|
+
* It can also prioritize specific fields if provided
|
|
4442
|
+
*
|
|
4443
|
+
* @param jsonObject The JSON object to extract text from
|
|
4444
|
+
* @param options Configuration options for text extraction
|
|
4445
|
+
* @returns A string containing the extracted text
|
|
4446
|
+
*/
|
|
4447
|
+
function extractTextFromJson(jsonObject, options = {}) {
|
|
4448
|
+
// Set default options
|
|
4449
|
+
const { priorityFields = [], excludeFields = [], includeFieldNames = true, maxDepth = 5, currentDepth = 0, fieldPath = [] } = options;
|
|
4450
|
+
// If input is not an object or array, or we've reached max depth, return as string
|
|
4451
|
+
if (jsonObject === null ||
|
|
4452
|
+
jsonObject === undefined ||
|
|
4453
|
+
typeof jsonObject !== 'object' ||
|
|
4454
|
+
currentDepth >= maxDepth) {
|
|
4455
|
+
return String(jsonObject || '');
|
|
4456
|
+
}
|
|
4457
|
+
const extractedText = [];
|
|
4458
|
+
const priorityText = [];
|
|
4459
|
+
// Process arrays
|
|
4460
|
+
if (Array.isArray(jsonObject)) {
|
|
4461
|
+
for (let i = 0; i < jsonObject.length; i++) {
|
|
4462
|
+
const value = jsonObject[i];
|
|
4463
|
+
const newPath = [...fieldPath, i.toString()];
|
|
4464
|
+
// Recursively extract text from array items
|
|
4465
|
+
const itemText = extractTextFromJson(value, {
|
|
4466
|
+
priorityFields,
|
|
4467
|
+
excludeFields,
|
|
4468
|
+
includeFieldNames,
|
|
4469
|
+
maxDepth,
|
|
4470
|
+
currentDepth: currentDepth + 1,
|
|
4471
|
+
fieldPath: newPath
|
|
4472
|
+
});
|
|
4473
|
+
if (itemText) {
|
|
4474
|
+
extractedText.push(itemText);
|
|
4475
|
+
}
|
|
4476
|
+
}
|
|
4477
|
+
}
|
|
4478
|
+
// Process objects
|
|
4479
|
+
else {
|
|
4480
|
+
for (const [key, value] of Object.entries(jsonObject)) {
|
|
4481
|
+
// Skip excluded fields
|
|
4482
|
+
if (excludeFields.includes(key)) {
|
|
4483
|
+
continue;
|
|
4484
|
+
}
|
|
4485
|
+
const newPath = [...fieldPath, key];
|
|
4486
|
+
const fullPath = newPath.join('.');
|
|
4487
|
+
// Check if this is a priority field
|
|
4488
|
+
const isPriority = priorityFields.some(field => {
|
|
4489
|
+
// Exact match
|
|
4490
|
+
if (field === key)
|
|
4491
|
+
return true;
|
|
4492
|
+
// Path match
|
|
4493
|
+
if (field === fullPath)
|
|
4494
|
+
return true;
|
|
4495
|
+
// Wildcard match (e.g., "user.*" matches "user.name", "user.email", etc.)
|
|
4496
|
+
if (field.endsWith('.*') && fullPath.startsWith(field.slice(0, -2)))
|
|
4497
|
+
return true;
|
|
4498
|
+
return false;
|
|
4499
|
+
});
|
|
4500
|
+
// Get the field value as text
|
|
4501
|
+
let fieldText;
|
|
4502
|
+
if (typeof value === 'object' && value !== null) {
|
|
4503
|
+
// Recursively extract text from nested objects
|
|
4504
|
+
fieldText = extractTextFromJson(value, {
|
|
4505
|
+
priorityFields,
|
|
4506
|
+
excludeFields,
|
|
4507
|
+
includeFieldNames,
|
|
4508
|
+
maxDepth,
|
|
4509
|
+
currentDepth: currentDepth + 1,
|
|
4510
|
+
fieldPath: newPath
|
|
4511
|
+
});
|
|
4512
|
+
}
|
|
4513
|
+
else {
|
|
4514
|
+
fieldText = String(value || '');
|
|
4515
|
+
}
|
|
4516
|
+
// Add field name if requested
|
|
4517
|
+
if (includeFieldNames && fieldText) {
|
|
4518
|
+
fieldText = `${key}: ${fieldText}`;
|
|
4519
|
+
}
|
|
4520
|
+
// Add to appropriate collection
|
|
4521
|
+
if (fieldText) {
|
|
4522
|
+
if (isPriority) {
|
|
4523
|
+
priorityText.push(fieldText);
|
|
4524
|
+
}
|
|
4525
|
+
else {
|
|
4526
|
+
extractedText.push(fieldText);
|
|
4527
|
+
}
|
|
4528
|
+
}
|
|
4529
|
+
}
|
|
4530
|
+
}
|
|
4531
|
+
// Combine priority text (repeated for emphasis) and regular text
|
|
4532
|
+
return [...priorityText, ...priorityText, ...extractedText].join(' ');
|
|
4533
|
+
}
|
|
4534
|
+
/**
|
|
4535
|
+
* Prepares a JSON document for vectorization
|
|
4536
|
+
* This function extracts text from the JSON document and formats it for optimal vectorization
|
|
4537
|
+
*
|
|
4538
|
+
* @param jsonDocument The JSON document to prepare
|
|
4539
|
+
* @param options Configuration options for preparation
|
|
4540
|
+
* @returns A string ready for vectorization
|
|
4541
|
+
*/
|
|
4542
|
+
function prepareJsonForVectorization(jsonDocument, options = {}) {
|
|
4543
|
+
// If input is a string, try to parse it as JSON
|
|
4544
|
+
let document = jsonDocument;
|
|
4545
|
+
if (typeof jsonDocument === 'string') {
|
|
4546
|
+
try {
|
|
4547
|
+
document = JSON.parse(jsonDocument);
|
|
4548
|
+
}
|
|
4549
|
+
catch (e) {
|
|
4550
|
+
// If parsing fails, treat it as a plain string
|
|
4551
|
+
return jsonDocument;
|
|
4552
|
+
}
|
|
4553
|
+
}
|
|
4554
|
+
// If not an object after parsing, return as is
|
|
4555
|
+
if (typeof document !== 'object' || document === null) {
|
|
4556
|
+
return String(document || '');
|
|
4557
|
+
}
|
|
4558
|
+
// Extract text from the document
|
|
4559
|
+
return extractTextFromJson(document, options);
|
|
4560
|
+
}
|
|
4561
|
+
/**
|
|
4562
|
+
* Extracts text from a specific field in a JSON document
|
|
4563
|
+
* This is useful for searching within specific fields
|
|
4564
|
+
*
|
|
4565
|
+
* @param jsonDocument The JSON document to extract from
|
|
4566
|
+
* @param fieldPath The path to the field (e.g., "user.name" or "addresses[0].city")
|
|
4567
|
+
* @returns The extracted text or empty string if field not found
|
|
4568
|
+
*/
|
|
4569
|
+
function extractFieldFromJson(jsonDocument, fieldPath) {
|
|
4570
|
+
// If input is a string, try to parse it as JSON
|
|
4571
|
+
let document = jsonDocument;
|
|
4572
|
+
if (typeof jsonDocument === 'string') {
|
|
4573
|
+
try {
|
|
4574
|
+
document = JSON.parse(jsonDocument);
|
|
4575
|
+
}
|
|
4576
|
+
catch (e) {
|
|
4577
|
+
// If parsing fails, return empty string
|
|
4578
|
+
return '';
|
|
4579
|
+
}
|
|
4580
|
+
}
|
|
4581
|
+
// If not an object after parsing, return empty string
|
|
4582
|
+
if (typeof document !== 'object' || document === null) {
|
|
4583
|
+
return '';
|
|
4584
|
+
}
|
|
4585
|
+
// Parse the field path
|
|
4586
|
+
const parts = fieldPath.split('.');
|
|
4587
|
+
let current = document;
|
|
4588
|
+
// Navigate through the path
|
|
4589
|
+
for (const part of parts) {
|
|
4590
|
+
// Handle array indexing (e.g., "addresses[0]")
|
|
4591
|
+
const match = part.match(/^([^[]+)(?:\[(\d+)\])?$/);
|
|
4592
|
+
if (!match) {
|
|
4593
|
+
return '';
|
|
4594
|
+
}
|
|
4595
|
+
const [, key, indexStr] = match;
|
|
4596
|
+
// Move to the next level
|
|
4597
|
+
current = current[key];
|
|
4598
|
+
// If we have an array index, access that element
|
|
4599
|
+
if (indexStr !== undefined && Array.isArray(current)) {
|
|
4600
|
+
const index = parseInt(indexStr, 10);
|
|
4601
|
+
current = current[index];
|
|
4602
|
+
}
|
|
4603
|
+
// If we've reached a null or undefined value, return empty string
|
|
4604
|
+
if (current === null || current === undefined) {
|
|
4605
|
+
return '';
|
|
4606
|
+
}
|
|
4607
|
+
}
|
|
4608
|
+
// Convert the final value to string
|
|
4609
|
+
return typeof current === 'object'
|
|
4610
|
+
? JSON.stringify(current)
|
|
4611
|
+
: String(current);
|
|
4612
|
+
}
|
|
4613
|
+
|
|
4614
|
+
/**
|
|
4615
|
+
* Utility functions for tracking and managing field names in JSON documents
|
|
4616
|
+
*/
|
|
4617
|
+
/**
|
|
4618
|
+
* Extracts field names from a JSON document
|
|
4619
|
+
* @param jsonObject The JSON object to extract field names from
|
|
4620
|
+
* @param options Configuration options
|
|
4621
|
+
* @returns An array of field paths (e.g., "user.name", "addresses[0].city")
|
|
4622
|
+
*/
|
|
4623
|
+
function extractFieldNamesFromJson(jsonObject, options = {}) {
|
|
4624
|
+
const { maxDepth = 5, currentDepth = 0, currentPath = '', fieldNames = new Set() } = options;
|
|
4625
|
+
if (jsonObject === null ||
|
|
4626
|
+
jsonObject === undefined ||
|
|
4627
|
+
typeof jsonObject !== 'object' ||
|
|
4628
|
+
currentDepth >= maxDepth) {
|
|
4629
|
+
return Array.from(fieldNames);
|
|
4630
|
+
}
|
|
4631
|
+
if (Array.isArray(jsonObject)) {
|
|
4632
|
+
// For arrays, we'll just check the first item to avoid explosion of paths
|
|
4633
|
+
if (jsonObject.length > 0) {
|
|
4634
|
+
const arrayPath = currentPath ? `${currentPath}[0]` : '[0]';
|
|
4635
|
+
extractFieldNamesFromJson(jsonObject[0], {
|
|
4636
|
+
maxDepth,
|
|
4637
|
+
currentDepth: currentDepth + 1,
|
|
4638
|
+
currentPath: arrayPath,
|
|
4639
|
+
fieldNames
|
|
4640
|
+
});
|
|
4641
|
+
}
|
|
4642
|
+
}
|
|
4643
|
+
else {
|
|
4644
|
+
// For objects, process each property
|
|
4645
|
+
for (const key of Object.keys(jsonObject)) {
|
|
4646
|
+
const value = jsonObject[key];
|
|
4647
|
+
const fieldPath = currentPath ? `${currentPath}.${key}` : key;
|
|
4648
|
+
// Add this field path
|
|
4649
|
+
fieldNames.add(fieldPath);
|
|
4650
|
+
// Recursively process nested objects
|
|
4651
|
+
if (typeof value === 'object' && value !== null) {
|
|
4652
|
+
extractFieldNamesFromJson(value, {
|
|
4653
|
+
maxDepth,
|
|
4654
|
+
currentDepth: currentDepth + 1,
|
|
4655
|
+
currentPath: fieldPath,
|
|
4656
|
+
fieldNames
|
|
4657
|
+
});
|
|
4658
|
+
}
|
|
4659
|
+
}
|
|
4660
|
+
}
|
|
4661
|
+
return Array.from(fieldNames);
|
|
4662
|
+
}
|
|
4663
|
+
/**
|
|
4664
|
+
* Maps field names to standard field names based on common patterns
|
|
4665
|
+
* @param fieldName The field name to map
|
|
4666
|
+
* @returns The standard field name if a match is found, or null if no match
|
|
4667
|
+
*/
|
|
4668
|
+
function mapToStandardField(fieldName) {
|
|
4669
|
+
// Standard field mappings
|
|
4670
|
+
const standardMappings = {
|
|
4671
|
+
'title': ['title', 'name', 'headline', 'subject'],
|
|
4672
|
+
'description': ['description', 'summary', 'content', 'text', 'body'],
|
|
4673
|
+
'author': ['author', 'creator', 'user', 'owner', 'by'],
|
|
4674
|
+
'date': ['date', 'created', 'createdAt', 'timestamp', 'published'],
|
|
4675
|
+
'url': ['url', 'link', 'href', 'source'],
|
|
4676
|
+
'image': ['image', 'thumbnail', 'photo', 'picture'],
|
|
4677
|
+
'tags': ['tags', 'categories', 'keywords', 'topics']
|
|
4678
|
+
};
|
|
4679
|
+
// Check for matches
|
|
4680
|
+
for (const [standardField, possibleMatches] of Object.entries(standardMappings)) {
|
|
4681
|
+
// Exact match
|
|
4682
|
+
if (possibleMatches.includes(fieldName)) {
|
|
4683
|
+
return standardField;
|
|
4684
|
+
}
|
|
4685
|
+
// Path match (e.g., "user.name" matches "name")
|
|
4686
|
+
const parts = fieldName.split('.');
|
|
4687
|
+
const lastPart = parts[parts.length - 1];
|
|
4688
|
+
if (possibleMatches.includes(lastPart)) {
|
|
4689
|
+
return standardField;
|
|
4690
|
+
}
|
|
4691
|
+
// Array match (e.g., "items[0].name" matches "name")
|
|
4692
|
+
if (fieldName.includes('[')) {
|
|
4693
|
+
for (const part of parts) {
|
|
4694
|
+
const cleanPart = part.split('[')[0];
|
|
4695
|
+
if (possibleMatches.includes(cleanPart)) {
|
|
4696
|
+
return standardField;
|
|
4697
|
+
}
|
|
4698
|
+
}
|
|
4699
|
+
}
|
|
4700
|
+
}
|
|
4701
|
+
return null;
|
|
4702
|
+
}
|
|
4703
|
+
|
|
4435
4704
|
/**
|
|
4436
4705
|
* HNSW (Hierarchical Navigable Small World) Index implementation
|
|
4437
4706
|
* Based on the paper: "Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs"
|
|
@@ -5708,6 +5977,107 @@ class BaseStorageAdapter {
|
|
|
5708
5977
|
// Call the protected flushStatistics method to immediately write to storage
|
|
5709
5978
|
await this.flushStatistics();
|
|
5710
5979
|
}
|
|
5980
|
+
/**
|
|
5981
|
+
* Track field names from a JSON document
|
|
5982
|
+
* @param jsonDocument The JSON document to extract field names from
|
|
5983
|
+
* @param service The service that inserted the data
|
|
5984
|
+
*/
|
|
5985
|
+
async trackFieldNames(jsonDocument, service) {
|
|
5986
|
+
// Skip if not a JSON object
|
|
5987
|
+
if (typeof jsonDocument !== 'object' || jsonDocument === null || Array.isArray(jsonDocument)) {
|
|
5988
|
+
return;
|
|
5989
|
+
}
|
|
5990
|
+
// Get current statistics from cache or storage
|
|
5991
|
+
let statistics = this.statisticsCache;
|
|
5992
|
+
if (!statistics) {
|
|
5993
|
+
statistics = await this.getStatisticsData();
|
|
5994
|
+
if (!statistics) {
|
|
5995
|
+
statistics = this.createDefaultStatistics();
|
|
5996
|
+
}
|
|
5997
|
+
// Update the cache
|
|
5998
|
+
this.statisticsCache = {
|
|
5999
|
+
...statistics,
|
|
6000
|
+
nounCount: { ...statistics.nounCount },
|
|
6001
|
+
verbCount: { ...statistics.verbCount },
|
|
6002
|
+
metadataCount: { ...statistics.metadataCount },
|
|
6003
|
+
fieldNames: { ...statistics.fieldNames },
|
|
6004
|
+
standardFieldMappings: { ...statistics.standardFieldMappings }
|
|
6005
|
+
};
|
|
6006
|
+
}
|
|
6007
|
+
// Ensure fieldNames exists
|
|
6008
|
+
if (!this.statisticsCache.fieldNames) {
|
|
6009
|
+
this.statisticsCache.fieldNames = {};
|
|
6010
|
+
}
|
|
6011
|
+
// Ensure standardFieldMappings exists
|
|
6012
|
+
if (!this.statisticsCache.standardFieldMappings) {
|
|
6013
|
+
this.statisticsCache.standardFieldMappings = {};
|
|
6014
|
+
}
|
|
6015
|
+
// Extract field names from the JSON document
|
|
6016
|
+
const fieldNames = extractFieldNamesFromJson(jsonDocument);
|
|
6017
|
+
// Initialize service entry if it doesn't exist
|
|
6018
|
+
if (!this.statisticsCache.fieldNames[service]) {
|
|
6019
|
+
this.statisticsCache.fieldNames[service] = [];
|
|
6020
|
+
}
|
|
6021
|
+
// Add new field names to the service's list
|
|
6022
|
+
for (const fieldName of fieldNames) {
|
|
6023
|
+
if (!this.statisticsCache.fieldNames[service].includes(fieldName)) {
|
|
6024
|
+
this.statisticsCache.fieldNames[service].push(fieldName);
|
|
6025
|
+
}
|
|
6026
|
+
// Map to standard field if possible
|
|
6027
|
+
const standardField = mapToStandardField(fieldName);
|
|
6028
|
+
if (standardField) {
|
|
6029
|
+
// Initialize standard field entry if it doesn't exist
|
|
6030
|
+
if (!this.statisticsCache.standardFieldMappings[standardField]) {
|
|
6031
|
+
this.statisticsCache.standardFieldMappings[standardField] = {};
|
|
6032
|
+
}
|
|
6033
|
+
// Initialize service entry if it doesn't exist
|
|
6034
|
+
if (!this.statisticsCache.standardFieldMappings[standardField][service]) {
|
|
6035
|
+
this.statisticsCache.standardFieldMappings[standardField][service] = [];
|
|
6036
|
+
}
|
|
6037
|
+
// Add field name to standard field mapping if not already there
|
|
6038
|
+
if (!this.statisticsCache.standardFieldMappings[standardField][service].includes(fieldName)) {
|
|
6039
|
+
this.statisticsCache.standardFieldMappings[standardField][service].push(fieldName);
|
|
6040
|
+
}
|
|
6041
|
+
}
|
|
6042
|
+
}
|
|
6043
|
+
// Update timestamp
|
|
6044
|
+
this.statisticsCache.lastUpdated = new Date().toISOString();
|
|
6045
|
+
// Schedule a batch update
|
|
6046
|
+
this.statisticsModified = true;
|
|
6047
|
+
this.scheduleBatchUpdate();
|
|
6048
|
+
}
|
|
6049
|
+
/**
|
|
6050
|
+
* Get available field names by service
|
|
6051
|
+
* @returns Record of field names by service
|
|
6052
|
+
*/
|
|
6053
|
+
async getAvailableFieldNames() {
|
|
6054
|
+
// Get current statistics from cache or storage
|
|
6055
|
+
let statistics = this.statisticsCache;
|
|
6056
|
+
if (!statistics) {
|
|
6057
|
+
statistics = await this.getStatisticsData();
|
|
6058
|
+
if (!statistics) {
|
|
6059
|
+
return {};
|
|
6060
|
+
}
|
|
6061
|
+
}
|
|
6062
|
+
// Return field names by service
|
|
6063
|
+
return statistics.fieldNames || {};
|
|
6064
|
+
}
|
|
6065
|
+
/**
|
|
6066
|
+
* Get standard field mappings
|
|
6067
|
+
* @returns Record of standard field mappings
|
|
6068
|
+
*/
|
|
6069
|
+
async getStandardFieldMappings() {
|
|
6070
|
+
// Get current statistics from cache or storage
|
|
6071
|
+
let statistics = this.statisticsCache;
|
|
6072
|
+
if (!statistics) {
|
|
6073
|
+
statistics = await this.getStatisticsData();
|
|
6074
|
+
if (!statistics) {
|
|
6075
|
+
return {};
|
|
6076
|
+
}
|
|
6077
|
+
}
|
|
6078
|
+
// Return standard field mappings
|
|
6079
|
+
return statistics.standardFieldMappings || {};
|
|
6080
|
+
}
|
|
5711
6081
|
/**
|
|
5712
6082
|
* Create default statistics data
|
|
5713
6083
|
* @returns Default statistics data
|
|
@@ -5718,6 +6088,8 @@ class BaseStorageAdapter {
|
|
|
5718
6088
|
verbCount: {},
|
|
5719
6089
|
metadataCount: {},
|
|
5720
6090
|
hnswIndexSize: 0,
|
|
6091
|
+
fieldNames: {},
|
|
6092
|
+
standardFieldMappings: {},
|
|
5721
6093
|
lastUpdated: new Date().toISOString()
|
|
5722
6094
|
};
|
|
5723
6095
|
}
|
|
@@ -5766,9 +6138,12 @@ class BaseStorage extends BaseStorageAdapter {
|
|
|
5766
6138
|
}
|
|
5767
6139
|
/**
|
|
5768
6140
|
* Get all nouns from storage
|
|
6141
|
+
* @deprecated This method is deprecated and will be removed in a future version.
|
|
6142
|
+
* It can cause memory issues with large datasets. Use getNouns() with pagination instead.
|
|
5769
6143
|
*/
|
|
5770
6144
|
async getAllNouns() {
|
|
5771
6145
|
await this.ensureInitialized();
|
|
6146
|
+
console.warn('WARNING: getAllNouns() is deprecated and will be removed in a future version. Use getNouns() with pagination instead.');
|
|
5772
6147
|
return this.getAllNouns_internal();
|
|
5773
6148
|
}
|
|
5774
6149
|
/**
|
|
@@ -5803,9 +6178,12 @@ class BaseStorage extends BaseStorageAdapter {
|
|
|
5803
6178
|
}
|
|
5804
6179
|
/**
|
|
5805
6180
|
* Get all verbs from storage
|
|
6181
|
+
* @deprecated This method is deprecated and will be removed in a future version.
|
|
6182
|
+
* It can cause memory issues with large datasets. Use getVerbs() with pagination instead.
|
|
5806
6183
|
*/
|
|
5807
6184
|
async getAllVerbs() {
|
|
5808
6185
|
await this.ensureInitialized();
|
|
6186
|
+
console.warn('WARNING: getAllVerbs() is deprecated and will be removed in a future version. Use getVerbs() with pagination instead.');
|
|
5809
6187
|
return this.getAllVerbs_internal();
|
|
5810
6188
|
}
|
|
5811
6189
|
/**
|
|
@@ -5840,10 +6218,13 @@ class BaseStorage extends BaseStorageAdapter {
|
|
|
5840
6218
|
const pagination = options?.pagination || {};
|
|
5841
6219
|
const limit = pagination.limit || 100;
|
|
5842
6220
|
const offset = pagination.offset || 0;
|
|
6221
|
+
const cursor = pagination.cursor;
|
|
5843
6222
|
// Optimize for common filter cases to avoid loading all nouns
|
|
5844
6223
|
if (options?.filter) {
|
|
5845
6224
|
// If filtering by nounType only, use the optimized method
|
|
5846
|
-
if (options.filter.nounType &&
|
|
6225
|
+
if (options.filter.nounType &&
|
|
6226
|
+
!options.filter.service &&
|
|
6227
|
+
!options.filter.metadata) {
|
|
5847
6228
|
const nounType = Array.isArray(options.filter.nounType)
|
|
5848
6229
|
? options.filter.nounType[0]
|
|
5849
6230
|
: options.filter.nounType;
|
|
@@ -5866,81 +6247,124 @@ class BaseStorage extends BaseStorageAdapter {
|
|
|
5866
6247
|
};
|
|
5867
6248
|
}
|
|
5868
6249
|
}
|
|
5869
|
-
// For more complex filtering or no filtering,
|
|
5870
|
-
//
|
|
5871
|
-
const maxNouns = offset + limit + 1; // Get one extra to check if there are more
|
|
5872
|
-
let allNouns = [];
|
|
6250
|
+
// For more complex filtering or no filtering, use a paginated approach
|
|
6251
|
+
// that avoids loading all nouns into memory at once
|
|
5873
6252
|
try {
|
|
5874
|
-
//
|
|
5875
|
-
|
|
5876
|
-
|
|
5877
|
-
|
|
5878
|
-
|
|
5879
|
-
|
|
6253
|
+
// First, try to get a count of total nouns (if the adapter supports it)
|
|
6254
|
+
let totalCount = undefined;
|
|
6255
|
+
try {
|
|
6256
|
+
// This is an optional method that adapters may implement
|
|
6257
|
+
if (typeof this.countNouns === 'function') {
|
|
6258
|
+
totalCount = await this.countNouns(options?.filter);
|
|
6259
|
+
}
|
|
6260
|
+
}
|
|
6261
|
+
catch (countError) {
|
|
6262
|
+
// Ignore errors from count method, it's optional
|
|
6263
|
+
console.warn('Error getting noun count:', countError);
|
|
6264
|
+
}
|
|
6265
|
+
// Check if the adapter has a paginated method for getting nouns
|
|
6266
|
+
if (typeof this.getNounsWithPagination === 'function') {
|
|
6267
|
+
// Use the adapter's paginated method
|
|
6268
|
+
const result = await this.getNounsWithPagination({
|
|
6269
|
+
limit,
|
|
6270
|
+
cursor,
|
|
6271
|
+
filter: options?.filter
|
|
6272
|
+
});
|
|
6273
|
+
// Apply offset if needed (some adapters might not support offset)
|
|
6274
|
+
const items = result.items.slice(offset);
|
|
6275
|
+
return {
|
|
6276
|
+
items,
|
|
6277
|
+
totalCount: result.totalCount || totalCount,
|
|
6278
|
+
hasMore: result.hasMore,
|
|
6279
|
+
nextCursor: result.nextCursor
|
|
6280
|
+
};
|
|
6281
|
+
}
|
|
6282
|
+
// If the adapter doesn't have a paginated method, fall back to the old approach
|
|
6283
|
+
// but with a warning and a reasonable limit
|
|
6284
|
+
console.warn('Storage adapter does not support pagination, falling back to loading all nouns. This may cause performance issues with large datasets.');
|
|
6285
|
+
// Get nouns with a reasonable limit to avoid memory issues
|
|
6286
|
+
const maxNouns = Math.min(offset + limit + 100, 1000); // Reasonable limit
|
|
6287
|
+
let allNouns = [];
|
|
6288
|
+
try {
|
|
6289
|
+
// Try to get only the nouns we need
|
|
6290
|
+
allNouns = await this.getAllNouns_internal();
|
|
6291
|
+
// If we have too many nouns, truncate the array to avoid memory issues
|
|
6292
|
+
if (allNouns.length > maxNouns) {
|
|
6293
|
+
console.warn(`Large number of nouns (${allNouns.length}), truncating to ${maxNouns} for filtering`);
|
|
6294
|
+
allNouns = allNouns.slice(0, maxNouns);
|
|
6295
|
+
}
|
|
6296
|
+
}
|
|
6297
|
+
catch (error) {
|
|
6298
|
+
console.error('Error getting all nouns:', error);
|
|
6299
|
+
// Return empty result on error
|
|
6300
|
+
return {
|
|
6301
|
+
items: [],
|
|
6302
|
+
totalCount: 0,
|
|
6303
|
+
hasMore: false
|
|
6304
|
+
};
|
|
5880
6305
|
}
|
|
6306
|
+
// Apply filtering if needed
|
|
6307
|
+
let filteredNouns = allNouns;
|
|
6308
|
+
if (options?.filter) {
|
|
6309
|
+
// Filter by noun type
|
|
6310
|
+
if (options.filter.nounType) {
|
|
6311
|
+
const nounTypes = Array.isArray(options.filter.nounType)
|
|
6312
|
+
? options.filter.nounType
|
|
6313
|
+
: [options.filter.nounType];
|
|
6314
|
+
filteredNouns = filteredNouns.filter((noun) => {
|
|
6315
|
+
// HNSWNoun doesn't have a type property directly, check metadata
|
|
6316
|
+
const nounType = noun.metadata?.type;
|
|
6317
|
+
return typeof nounType === 'string' && nounTypes.includes(nounType);
|
|
6318
|
+
});
|
|
6319
|
+
}
|
|
6320
|
+
// Filter by service
|
|
6321
|
+
if (options.filter.service) {
|
|
6322
|
+
const services = Array.isArray(options.filter.service)
|
|
6323
|
+
? options.filter.service
|
|
6324
|
+
: [options.filter.service];
|
|
6325
|
+
filteredNouns = filteredNouns.filter((noun) => {
|
|
6326
|
+
// HNSWNoun doesn't have a service property directly, check metadata
|
|
6327
|
+
const service = noun.metadata?.service;
|
|
6328
|
+
return typeof service === 'string' && services.includes(service);
|
|
6329
|
+
});
|
|
6330
|
+
}
|
|
6331
|
+
// Filter by metadata
|
|
6332
|
+
if (options.filter.metadata) {
|
|
6333
|
+
const metadataFilter = options.filter.metadata;
|
|
6334
|
+
filteredNouns = filteredNouns.filter((noun) => {
|
|
6335
|
+
if (!noun.metadata)
|
|
6336
|
+
return false;
|
|
6337
|
+
// Check if all metadata keys match
|
|
6338
|
+
return Object.entries(metadataFilter).every(([key, value]) => noun.metadata && noun.metadata[key] === value);
|
|
6339
|
+
});
|
|
6340
|
+
}
|
|
6341
|
+
}
|
|
6342
|
+
// Get total count before pagination
|
|
6343
|
+
totalCount = totalCount || filteredNouns.length;
|
|
6344
|
+
// Apply pagination
|
|
6345
|
+
const paginatedNouns = filteredNouns.slice(offset, offset + limit);
|
|
6346
|
+
const hasMore = offset + limit < filteredNouns.length || filteredNouns.length >= maxNouns;
|
|
6347
|
+
// Set next cursor if there are more items
|
|
6348
|
+
let nextCursor = undefined;
|
|
6349
|
+
if (hasMore && paginatedNouns.length > 0) {
|
|
6350
|
+
const lastItem = paginatedNouns[paginatedNouns.length - 1];
|
|
6351
|
+
nextCursor = lastItem.id;
|
|
6352
|
+
}
|
|
6353
|
+
return {
|
|
6354
|
+
items: paginatedNouns,
|
|
6355
|
+
totalCount,
|
|
6356
|
+
hasMore,
|
|
6357
|
+
nextCursor
|
|
6358
|
+
};
|
|
5881
6359
|
}
|
|
5882
6360
|
catch (error) {
|
|
5883
|
-
console.error('Error getting
|
|
5884
|
-
// Return empty result on error
|
|
6361
|
+
console.error('Error getting nouns with pagination:', error);
|
|
5885
6362
|
return {
|
|
5886
6363
|
items: [],
|
|
5887
6364
|
totalCount: 0,
|
|
5888
6365
|
hasMore: false
|
|
5889
6366
|
};
|
|
5890
6367
|
}
|
|
5891
|
-
// Apply filtering if needed
|
|
5892
|
-
let filteredNouns = allNouns;
|
|
5893
|
-
if (options?.filter) {
|
|
5894
|
-
// Filter by noun type
|
|
5895
|
-
if (options.filter.nounType) {
|
|
5896
|
-
const nounTypes = Array.isArray(options.filter.nounType)
|
|
5897
|
-
? options.filter.nounType
|
|
5898
|
-
: [options.filter.nounType];
|
|
5899
|
-
filteredNouns = filteredNouns.filter(noun => {
|
|
5900
|
-
// HNSWNoun doesn't have a type property directly, check metadata
|
|
5901
|
-
const nounType = noun.metadata?.type;
|
|
5902
|
-
return typeof nounType === 'string' && nounTypes.includes(nounType);
|
|
5903
|
-
});
|
|
5904
|
-
}
|
|
5905
|
-
// Filter by service
|
|
5906
|
-
if (options.filter.service) {
|
|
5907
|
-
const services = Array.isArray(options.filter.service)
|
|
5908
|
-
? options.filter.service
|
|
5909
|
-
: [options.filter.service];
|
|
5910
|
-
filteredNouns = filteredNouns.filter(noun => {
|
|
5911
|
-
// HNSWNoun doesn't have a service property directly, check metadata
|
|
5912
|
-
const service = noun.metadata?.service;
|
|
5913
|
-
return typeof service === 'string' && services.includes(service);
|
|
5914
|
-
});
|
|
5915
|
-
}
|
|
5916
|
-
// Filter by metadata
|
|
5917
|
-
if (options.filter.metadata) {
|
|
5918
|
-
const metadataFilter = options.filter.metadata;
|
|
5919
|
-
filteredNouns = filteredNouns.filter(noun => {
|
|
5920
|
-
if (!noun.metadata)
|
|
5921
|
-
return false;
|
|
5922
|
-
// Check if all metadata keys match
|
|
5923
|
-
return Object.entries(metadataFilter).every(([key, value]) => noun.metadata && noun.metadata[key] === value);
|
|
5924
|
-
});
|
|
5925
|
-
}
|
|
5926
|
-
}
|
|
5927
|
-
// Get total count before pagination
|
|
5928
|
-
const totalCount = filteredNouns.length;
|
|
5929
|
-
// Apply pagination
|
|
5930
|
-
const paginatedNouns = filteredNouns.slice(offset, offset + limit);
|
|
5931
|
-
const hasMore = offset + limit < totalCount;
|
|
5932
|
-
// Set next cursor if there are more items
|
|
5933
|
-
let nextCursor = undefined;
|
|
5934
|
-
if (hasMore && paginatedNouns.length > 0) {
|
|
5935
|
-
const lastItem = paginatedNouns[paginatedNouns.length - 1];
|
|
5936
|
-
nextCursor = lastItem.id;
|
|
5937
|
-
}
|
|
5938
|
-
return {
|
|
5939
|
-
items: paginatedNouns,
|
|
5940
|
-
totalCount,
|
|
5941
|
-
hasMore,
|
|
5942
|
-
nextCursor
|
|
5943
|
-
};
|
|
5944
6368
|
}
|
|
5945
6369
|
/**
|
|
5946
6370
|
* Get verbs with pagination and filtering
|
|
@@ -5953,11 +6377,14 @@ class BaseStorage extends BaseStorageAdapter {
|
|
|
5953
6377
|
const pagination = options?.pagination || {};
|
|
5954
6378
|
const limit = pagination.limit || 100;
|
|
5955
6379
|
const offset = pagination.offset || 0;
|
|
6380
|
+
const cursor = pagination.cursor;
|
|
5956
6381
|
// Optimize for common filter cases to avoid loading all verbs
|
|
5957
6382
|
if (options?.filter) {
|
|
5958
6383
|
// If filtering by sourceId only, use the optimized method
|
|
5959
|
-
if (options.filter.sourceId &&
|
|
5960
|
-
!options.filter.
|
|
6384
|
+
if (options.filter.sourceId &&
|
|
6385
|
+
!options.filter.verbType &&
|
|
6386
|
+
!options.filter.targetId &&
|
|
6387
|
+
!options.filter.service &&
|
|
5961
6388
|
!options.filter.metadata) {
|
|
5962
6389
|
const sourceId = Array.isArray(options.filter.sourceId)
|
|
5963
6390
|
? options.filter.sourceId[0]
|
|
@@ -5981,8 +6408,10 @@ class BaseStorage extends BaseStorageAdapter {
|
|
|
5981
6408
|
};
|
|
5982
6409
|
}
|
|
5983
6410
|
// If filtering by targetId only, use the optimized method
|
|
5984
|
-
if (options.filter.targetId &&
|
|
5985
|
-
!options.filter.
|
|
6411
|
+
if (options.filter.targetId &&
|
|
6412
|
+
!options.filter.verbType &&
|
|
6413
|
+
!options.filter.sourceId &&
|
|
6414
|
+
!options.filter.service &&
|
|
5986
6415
|
!options.filter.metadata) {
|
|
5987
6416
|
const targetId = Array.isArray(options.filter.targetId)
|
|
5988
6417
|
? options.filter.targetId[0]
|
|
@@ -6006,8 +6435,10 @@ class BaseStorage extends BaseStorageAdapter {
|
|
|
6006
6435
|
};
|
|
6007
6436
|
}
|
|
6008
6437
|
// If filtering by verbType only, use the optimized method
|
|
6009
|
-
if (options.filter.verbType &&
|
|
6010
|
-
!options.filter.
|
|
6438
|
+
if (options.filter.verbType &&
|
|
6439
|
+
!options.filter.sourceId &&
|
|
6440
|
+
!options.filter.targetId &&
|
|
6441
|
+
!options.filter.service &&
|
|
6011
6442
|
!options.filter.metadata) {
|
|
6012
6443
|
const verbType = Array.isArray(options.filter.verbType)
|
|
6013
6444
|
? options.filter.verbType[0]
|
|
@@ -6031,91 +6462,134 @@ class BaseStorage extends BaseStorageAdapter {
|
|
|
6031
6462
|
};
|
|
6032
6463
|
}
|
|
6033
6464
|
}
|
|
6034
|
-
// For more complex filtering or no filtering,
|
|
6035
|
-
//
|
|
6036
|
-
const maxVerbs = offset + limit + 1; // Get one extra to check if there are more
|
|
6037
|
-
let allVerbs = [];
|
|
6465
|
+
// For more complex filtering or no filtering, use a paginated approach
|
|
6466
|
+
// that avoids loading all verbs into memory at once
|
|
6038
6467
|
try {
|
|
6039
|
-
//
|
|
6040
|
-
|
|
6041
|
-
|
|
6042
|
-
|
|
6043
|
-
|
|
6044
|
-
|
|
6468
|
+
// First, try to get a count of total verbs (if the adapter supports it)
|
|
6469
|
+
let totalCount = undefined;
|
|
6470
|
+
try {
|
|
6471
|
+
// This is an optional method that adapters may implement
|
|
6472
|
+
if (typeof this.countVerbs === 'function') {
|
|
6473
|
+
totalCount = await this.countVerbs(options?.filter);
|
|
6474
|
+
}
|
|
6045
6475
|
}
|
|
6476
|
+
catch (countError) {
|
|
6477
|
+
// Ignore errors from count method, it's optional
|
|
6478
|
+
console.warn('Error getting verb count:', countError);
|
|
6479
|
+
}
|
|
6480
|
+
// Check if the adapter has a paginated method for getting verbs
|
|
6481
|
+
if (typeof this.getVerbsWithPagination === 'function') {
|
|
6482
|
+
// Use the adapter's paginated method
|
|
6483
|
+
const result = await this.getVerbsWithPagination({
|
|
6484
|
+
limit,
|
|
6485
|
+
cursor,
|
|
6486
|
+
filter: options?.filter
|
|
6487
|
+
});
|
|
6488
|
+
// Apply offset if needed (some adapters might not support offset)
|
|
6489
|
+
const items = result.items.slice(offset);
|
|
6490
|
+
return {
|
|
6491
|
+
items,
|
|
6492
|
+
totalCount: result.totalCount || totalCount,
|
|
6493
|
+
hasMore: result.hasMore,
|
|
6494
|
+
nextCursor: result.nextCursor
|
|
6495
|
+
};
|
|
6496
|
+
}
|
|
6497
|
+
// If the adapter doesn't have a paginated method, fall back to the old approach
|
|
6498
|
+
// but with a warning and a reasonable limit
|
|
6499
|
+
console.warn('Storage adapter does not support pagination, falling back to loading all verbs. This may cause performance issues with large datasets.');
|
|
6500
|
+
// Get verbs with a reasonable limit to avoid memory issues
|
|
6501
|
+
const maxVerbs = Math.min(offset + limit + 100, 1000); // Reasonable limit
|
|
6502
|
+
let allVerbs = [];
|
|
6503
|
+
try {
|
|
6504
|
+
// Try to get only the verbs we need
|
|
6505
|
+
allVerbs = await this.getAllVerbs_internal();
|
|
6506
|
+
// If we have too many verbs, truncate the array to avoid memory issues
|
|
6507
|
+
if (allVerbs.length > maxVerbs) {
|
|
6508
|
+
console.warn(`Large number of verbs (${allVerbs.length}), truncating to ${maxVerbs} for filtering`);
|
|
6509
|
+
allVerbs = allVerbs.slice(0, maxVerbs);
|
|
6510
|
+
}
|
|
6511
|
+
}
|
|
6512
|
+
catch (error) {
|
|
6513
|
+
console.error('Error getting all verbs:', error);
|
|
6514
|
+
// Return empty result on error
|
|
6515
|
+
return {
|
|
6516
|
+
items: [],
|
|
6517
|
+
totalCount: 0,
|
|
6518
|
+
hasMore: false
|
|
6519
|
+
};
|
|
6520
|
+
}
|
|
6521
|
+
// Apply filtering if needed
|
|
6522
|
+
let filteredVerbs = allVerbs;
|
|
6523
|
+
if (options?.filter) {
|
|
6524
|
+
// Filter by verb type
|
|
6525
|
+
if (options.filter.verbType) {
|
|
6526
|
+
const verbTypes = Array.isArray(options.filter.verbType)
|
|
6527
|
+
? options.filter.verbType
|
|
6528
|
+
: [options.filter.verbType];
|
|
6529
|
+
filteredVerbs = filteredVerbs.filter((verb) => verb.type !== undefined && verbTypes.includes(verb.type));
|
|
6530
|
+
}
|
|
6531
|
+
// Filter by source ID
|
|
6532
|
+
if (options.filter.sourceId) {
|
|
6533
|
+
const sourceIds = Array.isArray(options.filter.sourceId)
|
|
6534
|
+
? options.filter.sourceId
|
|
6535
|
+
: [options.filter.sourceId];
|
|
6536
|
+
filteredVerbs = filteredVerbs.filter((verb) => verb.sourceId !== undefined && sourceIds.includes(verb.sourceId));
|
|
6537
|
+
}
|
|
6538
|
+
// Filter by target ID
|
|
6539
|
+
if (options.filter.targetId) {
|
|
6540
|
+
const targetIds = Array.isArray(options.filter.targetId)
|
|
6541
|
+
? options.filter.targetId
|
|
6542
|
+
: [options.filter.targetId];
|
|
6543
|
+
filteredVerbs = filteredVerbs.filter((verb) => verb.targetId !== undefined && targetIds.includes(verb.targetId));
|
|
6544
|
+
}
|
|
6545
|
+
// Filter by service
|
|
6546
|
+
if (options.filter.service) {
|
|
6547
|
+
const services = Array.isArray(options.filter.service)
|
|
6548
|
+
? options.filter.service
|
|
6549
|
+
: [options.filter.service];
|
|
6550
|
+
filteredVerbs = filteredVerbs.filter((verb) => {
|
|
6551
|
+
// GraphVerb doesn't have a service property directly, check metadata
|
|
6552
|
+
const service = verb.metadata?.service;
|
|
6553
|
+
return typeof service === 'string' && services.includes(service);
|
|
6554
|
+
});
|
|
6555
|
+
}
|
|
6556
|
+
// Filter by metadata
|
|
6557
|
+
if (options.filter.metadata) {
|
|
6558
|
+
const metadataFilter = options.filter.metadata;
|
|
6559
|
+
filteredVerbs = filteredVerbs.filter((verb) => {
|
|
6560
|
+
if (!verb.metadata)
|
|
6561
|
+
return false;
|
|
6562
|
+
// Check if all metadata keys match
|
|
6563
|
+
return Object.entries(metadataFilter).every(([key, value]) => verb.metadata && verb.metadata[key] === value);
|
|
6564
|
+
});
|
|
6565
|
+
}
|
|
6566
|
+
}
|
|
6567
|
+
// Get total count before pagination
|
|
6568
|
+
totalCount = totalCount || filteredVerbs.length;
|
|
6569
|
+
// Apply pagination
|
|
6570
|
+
const paginatedVerbs = filteredVerbs.slice(offset, offset + limit);
|
|
6571
|
+
const hasMore = offset + limit < filteredVerbs.length || filteredVerbs.length >= maxVerbs;
|
|
6572
|
+
// Set next cursor if there are more items
|
|
6573
|
+
let nextCursor = undefined;
|
|
6574
|
+
if (hasMore && paginatedVerbs.length > 0) {
|
|
6575
|
+
const lastItem = paginatedVerbs[paginatedVerbs.length - 1];
|
|
6576
|
+
nextCursor = lastItem.id;
|
|
6577
|
+
}
|
|
6578
|
+
return {
|
|
6579
|
+
items: paginatedVerbs,
|
|
6580
|
+
totalCount,
|
|
6581
|
+
hasMore,
|
|
6582
|
+
nextCursor
|
|
6583
|
+
};
|
|
6046
6584
|
}
|
|
6047
6585
|
catch (error) {
|
|
6048
|
-
console.error('Error getting
|
|
6049
|
-
// Return empty result on error
|
|
6586
|
+
console.error('Error getting verbs with pagination:', error);
|
|
6050
6587
|
return {
|
|
6051
6588
|
items: [],
|
|
6052
6589
|
totalCount: 0,
|
|
6053
6590
|
hasMore: false
|
|
6054
6591
|
};
|
|
6055
6592
|
}
|
|
6056
|
-
// Apply filtering if needed
|
|
6057
|
-
let filteredVerbs = allVerbs;
|
|
6058
|
-
if (options?.filter) {
|
|
6059
|
-
// Filter by verb type
|
|
6060
|
-
if (options.filter.verbType) {
|
|
6061
|
-
const verbTypes = Array.isArray(options.filter.verbType)
|
|
6062
|
-
? options.filter.verbType
|
|
6063
|
-
: [options.filter.verbType];
|
|
6064
|
-
filteredVerbs = filteredVerbs.filter(verb => verb.type !== undefined && verbTypes.includes(verb.type));
|
|
6065
|
-
}
|
|
6066
|
-
// Filter by source ID
|
|
6067
|
-
if (options.filter.sourceId) {
|
|
6068
|
-
const sourceIds = Array.isArray(options.filter.sourceId)
|
|
6069
|
-
? options.filter.sourceId
|
|
6070
|
-
: [options.filter.sourceId];
|
|
6071
|
-
filteredVerbs = filteredVerbs.filter(verb => verb.sourceId !== undefined && sourceIds.includes(verb.sourceId));
|
|
6072
|
-
}
|
|
6073
|
-
// Filter by target ID
|
|
6074
|
-
if (options.filter.targetId) {
|
|
6075
|
-
const targetIds = Array.isArray(options.filter.targetId)
|
|
6076
|
-
? options.filter.targetId
|
|
6077
|
-
: [options.filter.targetId];
|
|
6078
|
-
filteredVerbs = filteredVerbs.filter(verb => verb.targetId !== undefined && targetIds.includes(verb.targetId));
|
|
6079
|
-
}
|
|
6080
|
-
// Filter by service
|
|
6081
|
-
if (options.filter.service) {
|
|
6082
|
-
const services = Array.isArray(options.filter.service)
|
|
6083
|
-
? options.filter.service
|
|
6084
|
-
: [options.filter.service];
|
|
6085
|
-
filteredVerbs = filteredVerbs.filter(verb => {
|
|
6086
|
-
// GraphVerb doesn't have a service property directly, check metadata
|
|
6087
|
-
const service = verb.metadata?.service;
|
|
6088
|
-
return typeof service === 'string' && services.includes(service);
|
|
6089
|
-
});
|
|
6090
|
-
}
|
|
6091
|
-
// Filter by metadata
|
|
6092
|
-
if (options.filter.metadata) {
|
|
6093
|
-
const metadataFilter = options.filter.metadata;
|
|
6094
|
-
filteredVerbs = filteredVerbs.filter(verb => {
|
|
6095
|
-
if (!verb.metadata)
|
|
6096
|
-
return false;
|
|
6097
|
-
// Check if all metadata keys match
|
|
6098
|
-
return Object.entries(metadataFilter).every(([key, value]) => verb.metadata && verb.metadata[key] === value);
|
|
6099
|
-
});
|
|
6100
|
-
}
|
|
6101
|
-
}
|
|
6102
|
-
// Get total count before pagination
|
|
6103
|
-
const totalCount = filteredVerbs.length;
|
|
6104
|
-
// Apply pagination
|
|
6105
|
-
const paginatedVerbs = filteredVerbs.slice(offset, offset + limit);
|
|
6106
|
-
const hasMore = offset + limit < totalCount;
|
|
6107
|
-
// Set next cursor if there are more items
|
|
6108
|
-
let nextCursor = undefined;
|
|
6109
|
-
if (hasMore && paginatedVerbs.length > 0) {
|
|
6110
|
-
const lastItem = paginatedVerbs[paginatedVerbs.length - 1];
|
|
6111
|
-
nextCursor = lastItem.id;
|
|
6112
|
-
}
|
|
6113
|
-
return {
|
|
6114
|
-
items: paginatedVerbs,
|
|
6115
|
-
totalCount,
|
|
6116
|
-
hasMore,
|
|
6117
|
-
nextCursor
|
|
6118
|
-
};
|
|
6119
6593
|
}
|
|
6120
6594
|
/**
|
|
6121
6595
|
* Delete a verb from storage
|
|
@@ -7924,10 +8398,1055 @@ class StorageOperationExecutors {
|
|
|
7924
8398
|
return this.addExecutor(operation, operationName);
|
|
7925
8399
|
}
|
|
7926
8400
|
/**
|
|
7927
|
-
* Execute a delete operation with timeout and retry
|
|
8401
|
+
* Execute a delete operation with timeout and retry
|
|
8402
|
+
*/
|
|
8403
|
+
async executeDelete(operation, operationName) {
|
|
8404
|
+
return this.deleteExecutor(operation, operationName);
|
|
8405
|
+
}
|
|
8406
|
+
}
|
|
8407
|
+
|
|
8408
|
+
/**
|
|
8409
|
+
* Multi-level Cache Manager
|
|
8410
|
+
*
|
|
8411
|
+
* Implements a three-level caching strategy:
|
|
8412
|
+
* - Level 1: Hot cache (most accessed nodes) - RAM (automatically detecting and adjusting in each environment)
|
|
8413
|
+
* - Level 2: Warm cache (recent nodes) - OPFS, Filesystem or S3 depending on environment
|
|
8414
|
+
* - Level 3: Cold storage (all nodes) - OPFS, Filesystem or S3 depending on environment
|
|
8415
|
+
*/
|
|
8416
|
+
// Environment detection for storage selection
|
|
8417
|
+
var Environment$1;
|
|
8418
|
+
(function (Environment) {
|
|
8419
|
+
Environment[Environment["BROWSER"] = 0] = "BROWSER";
|
|
8420
|
+
Environment[Environment["NODE"] = 1] = "NODE";
|
|
8421
|
+
Environment[Environment["WORKER"] = 2] = "WORKER";
|
|
8422
|
+
})(Environment$1 || (Environment$1 = {}));
|
|
8423
|
+
// Storage type for warm and cold caches
|
|
8424
|
+
var StorageType;
|
|
8425
|
+
(function (StorageType) {
|
|
8426
|
+
StorageType[StorageType["MEMORY"] = 0] = "MEMORY";
|
|
8427
|
+
StorageType[StorageType["OPFS"] = 1] = "OPFS";
|
|
8428
|
+
StorageType[StorageType["FILESYSTEM"] = 2] = "FILESYSTEM";
|
|
8429
|
+
StorageType[StorageType["S3"] = 3] = "S3";
|
|
8430
|
+
StorageType[StorageType["REMOTE_API"] = 4] = "REMOTE_API";
|
|
8431
|
+
})(StorageType || (StorageType = {}));
|
|
8432
|
+
/**
|
|
8433
|
+
* Multi-level cache manager for efficient data access
|
|
8434
|
+
*/
|
|
8435
|
+
class CacheManager {
|
|
8436
|
+
/**
|
|
8437
|
+
* Initialize the cache manager
|
|
8438
|
+
* @param options Configuration options
|
|
8439
|
+
*/
|
|
8440
|
+
constructor(options = {}) {
|
|
8441
|
+
// Hot cache (RAM)
|
|
8442
|
+
this.hotCache = new Map();
|
|
8443
|
+
// Cache statistics
|
|
8444
|
+
this.stats = {
|
|
8445
|
+
hits: 0,
|
|
8446
|
+
misses: 0,
|
|
8447
|
+
evictions: 0,
|
|
8448
|
+
size: 0,
|
|
8449
|
+
maxSize: 0
|
|
8450
|
+
};
|
|
8451
|
+
this.lastAutoTuneTime = 0;
|
|
8452
|
+
this.autoTuneInterval = 5 * 60 * 1000; // 5 minutes
|
|
8453
|
+
this.storageStatistics = null;
|
|
8454
|
+
// Store options for later reference
|
|
8455
|
+
this.options = options;
|
|
8456
|
+
// Detect environment
|
|
8457
|
+
this.environment = this.detectEnvironment();
|
|
8458
|
+
// Set storage types based on environment
|
|
8459
|
+
this.warmStorageType = this.detectWarmStorageType();
|
|
8460
|
+
this.coldStorageType = this.detectColdStorageType();
|
|
8461
|
+
// Initialize storage adapters
|
|
8462
|
+
this.warmStorage = options.warmStorage || this.initializeWarmStorage();
|
|
8463
|
+
this.coldStorage = options.coldStorage || this.initializeColdStorage();
|
|
8464
|
+
// Set auto-tuning flag
|
|
8465
|
+
this.autoTune = options.autoTune !== undefined ? options.autoTune : true;
|
|
8466
|
+
// Set default values or use provided values
|
|
8467
|
+
this.hotCacheMaxSize = options.hotCacheMaxSize || this.detectOptimalCacheSize();
|
|
8468
|
+
this.hotCacheEvictionThreshold = options.hotCacheEvictionThreshold || 0.8;
|
|
8469
|
+
this.warmCacheTTL = options.warmCacheTTL || 24 * 60 * 60 * 1000; // 24 hours
|
|
8470
|
+
this.batchSize = options.batchSize || 10;
|
|
8471
|
+
// If auto-tuning is enabled, perform initial tuning
|
|
8472
|
+
if (this.autoTune) {
|
|
8473
|
+
this.tuneParameters();
|
|
8474
|
+
}
|
|
8475
|
+
// Log configuration
|
|
8476
|
+
if (process.env.DEBUG) {
|
|
8477
|
+
console.log('Cache Manager initialized with configuration:', {
|
|
8478
|
+
environment: Environment$1[this.environment],
|
|
8479
|
+
hotCacheMaxSize: this.hotCacheMaxSize,
|
|
8480
|
+
hotCacheEvictionThreshold: this.hotCacheEvictionThreshold,
|
|
8481
|
+
warmCacheTTL: this.warmCacheTTL,
|
|
8482
|
+
batchSize: this.batchSize,
|
|
8483
|
+
autoTune: this.autoTune,
|
|
8484
|
+
warmStorageType: StorageType[this.warmStorageType],
|
|
8485
|
+
coldStorageType: StorageType[this.coldStorageType]
|
|
8486
|
+
});
|
|
8487
|
+
}
|
|
8488
|
+
}
|
|
8489
|
+
/**
|
|
8490
|
+
* Detect the current environment
|
|
8491
|
+
*/
|
|
8492
|
+
detectEnvironment() {
|
|
8493
|
+
if (typeof window !== 'undefined' && typeof document !== 'undefined') {
|
|
8494
|
+
return Environment$1.BROWSER;
|
|
8495
|
+
}
|
|
8496
|
+
else if (typeof self !== 'undefined' && typeof window === 'undefined') {
|
|
8497
|
+
// In a worker environment, self is defined but window is not
|
|
8498
|
+
return Environment$1.WORKER;
|
|
8499
|
+
}
|
|
8500
|
+
else {
|
|
8501
|
+
return Environment$1.NODE;
|
|
8502
|
+
}
|
|
8503
|
+
}
|
|
8504
|
+
/**
|
|
8505
|
+
* Detect the optimal cache size based on available memory and operating mode
|
|
8506
|
+
*
|
|
8507
|
+
* Enhanced to better handle large datasets in S3 or other storage:
|
|
8508
|
+
* - Increases cache size for read-only mode
|
|
8509
|
+
* - Adjusts based on total dataset size when available
|
|
8510
|
+
* - Provides more aggressive caching for large datasets
|
|
8511
|
+
* - Optimizes memory usage based on environment
|
|
8512
|
+
*/
|
|
8513
|
+
detectOptimalCacheSize() {
|
|
8514
|
+
try {
|
|
8515
|
+
// Default to a conservative value
|
|
8516
|
+
const defaultSize = 1000;
|
|
8517
|
+
// Get the total dataset size if available
|
|
8518
|
+
const totalItems = this.storageStatistics ?
|
|
8519
|
+
(this.storageStatistics.totalNodes || 0) + (this.storageStatistics.totalEdges || 0) : 0;
|
|
8520
|
+
// Determine if we're dealing with a large dataset (>100K items)
|
|
8521
|
+
const isLargeDataset = totalItems > 100000;
|
|
8522
|
+
// Check if we're in read-only mode (from parent BrainyData instance)
|
|
8523
|
+
const isReadOnly = this.options?.readOnly || false;
|
|
8524
|
+
// In Node.js, use available system memory with enhanced allocation
|
|
8525
|
+
if (this.environment === Environment$1.NODE) {
|
|
8526
|
+
try {
|
|
8527
|
+
// Use dynamic import to avoid ESLint warning
|
|
8528
|
+
const getOS = () => {
|
|
8529
|
+
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
8530
|
+
return require('os');
|
|
8531
|
+
};
|
|
8532
|
+
const os = getOS();
|
|
8533
|
+
const totalMemory = os.totalmem();
|
|
8534
|
+
const freeMemory = os.freemem();
|
|
8535
|
+
// Estimate average entry size (in bytes)
|
|
8536
|
+
// This is a conservative estimate for complex objects with vectors
|
|
8537
|
+
const ESTIMATED_BYTES_PER_ENTRY = 1024; // 1KB per entry
|
|
8538
|
+
// Base memory percentage - 10% by default
|
|
8539
|
+
let memoryPercentage = 0.1;
|
|
8540
|
+
// Adjust based on operating mode and dataset size
|
|
8541
|
+
if (isReadOnly) {
|
|
8542
|
+
// In read-only mode, we can use more memory for caching
|
|
8543
|
+
memoryPercentage = 0.25; // 25% of free memory
|
|
8544
|
+
// For large datasets in read-only mode, be even more aggressive
|
|
8545
|
+
if (isLargeDataset) {
|
|
8546
|
+
memoryPercentage = 0.4; // 40% of free memory
|
|
8547
|
+
}
|
|
8548
|
+
}
|
|
8549
|
+
else if (isLargeDataset) {
|
|
8550
|
+
// For large datasets in normal mode, increase slightly
|
|
8551
|
+
memoryPercentage = 0.15; // 15% of free memory
|
|
8552
|
+
}
|
|
8553
|
+
// Calculate optimal size based on adjusted percentage
|
|
8554
|
+
const optimalSize = Math.max(Math.floor(freeMemory * memoryPercentage / ESTIMATED_BYTES_PER_ENTRY), 1000);
|
|
8555
|
+
// If we know the total dataset size, cap at a reasonable percentage
|
|
8556
|
+
if (totalItems > 0) {
|
|
8557
|
+
// In read-only mode, we can cache a larger percentage
|
|
8558
|
+
const maxPercentage = isReadOnly ? 0.5 : 0.3;
|
|
8559
|
+
const maxItems = Math.ceil(totalItems * maxPercentage);
|
|
8560
|
+
// Return the smaller of the two to avoid excessive memory usage
|
|
8561
|
+
return Math.min(optimalSize, maxItems);
|
|
8562
|
+
}
|
|
8563
|
+
return optimalSize;
|
|
8564
|
+
}
|
|
8565
|
+
catch (error) {
|
|
8566
|
+
console.warn('Failed to detect optimal cache size:', error);
|
|
8567
|
+
return defaultSize;
|
|
8568
|
+
}
|
|
8569
|
+
}
|
|
8570
|
+
// In browser, use navigator.deviceMemory with enhanced allocation
|
|
8571
|
+
if (this.environment === Environment$1.BROWSER && navigator.deviceMemory) {
|
|
8572
|
+
// Base entries per GB
|
|
8573
|
+
let entriesPerGB = 500;
|
|
8574
|
+
// Adjust based on operating mode and dataset size
|
|
8575
|
+
if (isReadOnly) {
|
|
8576
|
+
entriesPerGB = 800; // More aggressive caching in read-only mode
|
|
8577
|
+
if (isLargeDataset) {
|
|
8578
|
+
entriesPerGB = 1000; // Even more aggressive for large datasets
|
|
8579
|
+
}
|
|
8580
|
+
}
|
|
8581
|
+
else if (isLargeDataset) {
|
|
8582
|
+
entriesPerGB = 600; // Slightly more aggressive for large datasets
|
|
8583
|
+
}
|
|
8584
|
+
// Calculate based on device memory
|
|
8585
|
+
const browserCacheSize = Math.max(navigator.deviceMemory * entriesPerGB, 1000);
|
|
8586
|
+
// If we know the total dataset size, cap at a reasonable percentage
|
|
8587
|
+
if (totalItems > 0) {
|
|
8588
|
+
// In read-only mode, we can cache a larger percentage
|
|
8589
|
+
const maxPercentage = isReadOnly ? 0.4 : 0.25;
|
|
8590
|
+
const maxItems = Math.ceil(totalItems * maxPercentage);
|
|
8591
|
+
// Return the smaller of the two to avoid excessive memory usage
|
|
8592
|
+
return Math.min(browserCacheSize, maxItems);
|
|
8593
|
+
}
|
|
8594
|
+
return browserCacheSize;
|
|
8595
|
+
}
|
|
8596
|
+
// For worker environments or when memory detection fails
|
|
8597
|
+
if (this.environment === Environment$1.WORKER) {
|
|
8598
|
+
// Workers typically have limited memory, be conservative
|
|
8599
|
+
return isReadOnly ? 2000 : 1000;
|
|
8600
|
+
}
|
|
8601
|
+
return defaultSize;
|
|
8602
|
+
}
|
|
8603
|
+
catch (error) {
|
|
8604
|
+
console.warn('Error detecting optimal cache size:', error);
|
|
8605
|
+
return 1000; // Conservative default
|
|
8606
|
+
}
|
|
8607
|
+
}
|
|
8608
|
+
/**
|
|
8609
|
+
* Tune cache parameters based on statistics and environment
|
|
8610
|
+
* This method is called periodically if auto-tuning is enabled
|
|
8611
|
+
*
|
|
8612
|
+
* The auto-tuning process:
|
|
8613
|
+
* 1. Retrieves storage statistics if available
|
|
8614
|
+
* 2. Tunes each parameter based on statistics and environment
|
|
8615
|
+
* 3. Logs the tuned parameters if debug is enabled
|
|
8616
|
+
*
|
|
8617
|
+
* Auto-tuning helps optimize cache performance by adapting to:
|
|
8618
|
+
* - The current environment (Node.js, browser, worker)
|
|
8619
|
+
* - Available system resources (memory, CPU)
|
|
8620
|
+
* - Usage patterns (read-heavy vs. write-heavy workloads)
|
|
8621
|
+
* - Cache efficiency (hit/miss ratios)
|
|
8622
|
+
*/
|
|
8623
|
+
async tuneParameters() {
|
|
8624
|
+
// Skip if auto-tuning is disabled
|
|
8625
|
+
if (!this.autoTune)
|
|
8626
|
+
return;
|
|
8627
|
+
// Check if it's time to tune parameters
|
|
8628
|
+
const now = Date.now();
|
|
8629
|
+
if (now - this.lastAutoTuneTime < this.autoTuneInterval)
|
|
8630
|
+
return;
|
|
8631
|
+
// Update last tune time
|
|
8632
|
+
this.lastAutoTuneTime = now;
|
|
8633
|
+
try {
|
|
8634
|
+
// Get storage statistics if available
|
|
8635
|
+
if (this.coldStorage && typeof this.coldStorage.getStatistics === 'function') {
|
|
8636
|
+
this.storageStatistics = await this.coldStorage.getStatistics();
|
|
8637
|
+
}
|
|
8638
|
+
// Tune hot cache size
|
|
8639
|
+
this.tuneHotCacheSize();
|
|
8640
|
+
// Tune eviction threshold
|
|
8641
|
+
this.tuneEvictionThreshold();
|
|
8642
|
+
// Tune warm cache TTL
|
|
8643
|
+
this.tuneWarmCacheTTL();
|
|
8644
|
+
// Tune batch size
|
|
8645
|
+
this.tuneBatchSize();
|
|
8646
|
+
// Log tuned parameters if debug is enabled
|
|
8647
|
+
if (process.env.DEBUG) {
|
|
8648
|
+
console.log('Cache parameters auto-tuned:', {
|
|
8649
|
+
hotCacheMaxSize: this.hotCacheMaxSize,
|
|
8650
|
+
hotCacheEvictionThreshold: this.hotCacheEvictionThreshold,
|
|
8651
|
+
warmCacheTTL: this.warmCacheTTL,
|
|
8652
|
+
batchSize: this.batchSize
|
|
8653
|
+
});
|
|
8654
|
+
}
|
|
8655
|
+
}
|
|
8656
|
+
catch (error) {
|
|
8657
|
+
console.warn('Error during cache parameter auto-tuning:', error);
|
|
8658
|
+
}
|
|
8659
|
+
}
|
|
8660
|
+
/**
|
|
8661
|
+
* Tune hot cache size based on statistics, environment, and operating mode
|
|
8662
|
+
*
|
|
8663
|
+
* The hot cache size is tuned based on:
|
|
8664
|
+
* 1. Available memory in the current environment
|
|
8665
|
+
* 2. Total number of nodes and edges in the system
|
|
8666
|
+
* 3. Cache hit/miss ratio
|
|
8667
|
+
* 4. Operating mode (read-only vs. read-write)
|
|
8668
|
+
* 5. Storage type (S3, filesystem, memory)
|
|
8669
|
+
*
|
|
8670
|
+
* Enhanced algorithm:
|
|
8671
|
+
* - Start with a size based on available memory and operating mode
|
|
8672
|
+
* - For large datasets in S3 or other remote storage, use more aggressive caching
|
|
8673
|
+
* - Adjust based on access patterns (read-heavy vs. write-heavy)
|
|
8674
|
+
* - For read-only mode, prioritize cache size over eviction speed
|
|
8675
|
+
* - Dynamically adjust based on hit/miss ratio and query patterns
|
|
8676
|
+
*/
|
|
8677
|
+
tuneHotCacheSize() {
|
|
8678
|
+
// Start with the base size from environment detection
|
|
8679
|
+
let optimalSize = this.detectOptimalCacheSize();
|
|
8680
|
+
// Check if we're in read-only mode
|
|
8681
|
+
const isReadOnly = this.options?.readOnly || false;
|
|
8682
|
+
// Check if we're using S3 or other remote storage
|
|
8683
|
+
const isRemoteStorage = this.coldStorageType === StorageType.S3 ||
|
|
8684
|
+
this.coldStorageType === StorageType.REMOTE_API;
|
|
8685
|
+
// If we have storage statistics, adjust based on total nodes/edges
|
|
8686
|
+
if (this.storageStatistics) {
|
|
8687
|
+
const totalItems = (this.storageStatistics.totalNodes || 0) +
|
|
8688
|
+
(this.storageStatistics.totalEdges || 0);
|
|
8689
|
+
// If total items is significant, adjust cache size
|
|
8690
|
+
if (totalItems > 0) {
|
|
8691
|
+
// Base percentage to cache - adjusted based on mode and storage
|
|
8692
|
+
let percentageToCache = 0.2; // Cache 20% of items by default
|
|
8693
|
+
// For read-only mode, increase cache percentage
|
|
8694
|
+
if (isReadOnly) {
|
|
8695
|
+
percentageToCache = 0.3; // 30% for read-only mode
|
|
8696
|
+
// For remote storage in read-only mode, be even more aggressive
|
|
8697
|
+
if (isRemoteStorage) {
|
|
8698
|
+
percentageToCache = 0.4; // 40% for remote storage in read-only mode
|
|
8699
|
+
}
|
|
8700
|
+
}
|
|
8701
|
+
// For remote storage in normal mode, increase slightly
|
|
8702
|
+
else if (isRemoteStorage) {
|
|
8703
|
+
percentageToCache = 0.25; // 25% for remote storage
|
|
8704
|
+
}
|
|
8705
|
+
// For large datasets, cap the percentage to avoid excessive memory usage
|
|
8706
|
+
if (totalItems > 1000000) { // Over 1 million items
|
|
8707
|
+
percentageToCache = Math.min(percentageToCache, 0.15);
|
|
8708
|
+
}
|
|
8709
|
+
else if (totalItems > 100000) { // Over 100K items
|
|
8710
|
+
percentageToCache = Math.min(percentageToCache, 0.25);
|
|
8711
|
+
}
|
|
8712
|
+
const statisticsBasedSize = Math.ceil(totalItems * percentageToCache);
|
|
8713
|
+
// Use the smaller of the two to avoid memory issues
|
|
8714
|
+
optimalSize = Math.min(optimalSize, statisticsBasedSize);
|
|
8715
|
+
}
|
|
8716
|
+
}
|
|
8717
|
+
// Adjust based on hit/miss ratio if we have enough data
|
|
8718
|
+
const totalAccesses = this.stats.hits + this.stats.misses;
|
|
8719
|
+
if (totalAccesses > 100) {
|
|
8720
|
+
const hitRatio = this.stats.hits / totalAccesses;
|
|
8721
|
+
// Base adjustment factor
|
|
8722
|
+
let hitRatioFactor = 1.0;
|
|
8723
|
+
// If hit ratio is low, we might need a larger cache
|
|
8724
|
+
if (hitRatio < 0.5) {
|
|
8725
|
+
// Calculate adjustment factor based on hit ratio
|
|
8726
|
+
const baseAdjustment = 0.5 - hitRatio;
|
|
8727
|
+
// For read-only mode or remote storage, be more aggressive
|
|
8728
|
+
if (isReadOnly || isRemoteStorage) {
|
|
8729
|
+
hitRatioFactor = 1 + (baseAdjustment * 1.5); // Up to 75% increase
|
|
8730
|
+
}
|
|
8731
|
+
else {
|
|
8732
|
+
hitRatioFactor = 1 + baseAdjustment; // Up to 50% increase
|
|
8733
|
+
}
|
|
8734
|
+
optimalSize = Math.ceil(optimalSize * hitRatioFactor);
|
|
8735
|
+
}
|
|
8736
|
+
// If hit ratio is very high, we might be able to reduce cache size slightly
|
|
8737
|
+
else if (hitRatio > 0.9 && !isReadOnly && !isRemoteStorage) {
|
|
8738
|
+
// Only reduce cache size in normal mode with local storage
|
|
8739
|
+
// and only if hit ratio is very high
|
|
8740
|
+
hitRatioFactor = 0.9; // 10% reduction
|
|
8741
|
+
optimalSize = Math.ceil(optimalSize * hitRatioFactor);
|
|
8742
|
+
}
|
|
8743
|
+
}
|
|
8744
|
+
// Check for operation patterns if available
|
|
8745
|
+
if (this.storageStatistics?.operations) {
|
|
8746
|
+
const ops = this.storageStatistics.operations;
|
|
8747
|
+
const totalOps = ops.total || 1;
|
|
8748
|
+
// Calculate read/write ratio
|
|
8749
|
+
const readOps = (ops.search || 0) + (ops.get || 0);
|
|
8750
|
+
(ops.add || 0) + (ops.update || 0) + (ops.delete || 0);
|
|
8751
|
+
if (totalOps > 100) {
|
|
8752
|
+
const readRatio = readOps / totalOps;
|
|
8753
|
+
// For read-heavy workloads, increase cache size
|
|
8754
|
+
if (readRatio > 0.8) {
|
|
8755
|
+
// More aggressive for remote storage
|
|
8756
|
+
const readAdjustment = isRemoteStorage ? 1.3 : 1.2;
|
|
8757
|
+
optimalSize = Math.ceil(optimalSize * readAdjustment);
|
|
8758
|
+
}
|
|
8759
|
+
}
|
|
8760
|
+
}
|
|
8761
|
+
// Ensure we have a reasonable minimum size based on environment and mode
|
|
8762
|
+
let minSize = 1000; // Default minimum
|
|
8763
|
+
// For read-only mode, use a higher minimum
|
|
8764
|
+
if (isReadOnly) {
|
|
8765
|
+
minSize = 2000;
|
|
8766
|
+
}
|
|
8767
|
+
// For remote storage, use an even higher minimum
|
|
8768
|
+
if (isRemoteStorage) {
|
|
8769
|
+
minSize = isReadOnly ? 3000 : 2000;
|
|
8770
|
+
}
|
|
8771
|
+
optimalSize = Math.max(optimalSize, minSize);
|
|
8772
|
+
// Update the hot cache max size
|
|
8773
|
+
this.hotCacheMaxSize = optimalSize;
|
|
8774
|
+
this.stats.maxSize = optimalSize;
|
|
8775
|
+
}
|
|
8776
|
+
/**
|
|
8777
|
+
* Tune eviction threshold based on statistics
|
|
8778
|
+
*
|
|
8779
|
+
* The eviction threshold determines when items start being evicted from the hot cache.
|
|
8780
|
+
* It is tuned based on:
|
|
8781
|
+
* 1. Cache hit/miss ratio
|
|
8782
|
+
* 2. Operation patterns (read-heavy vs. write-heavy workloads)
|
|
8783
|
+
*
|
|
8784
|
+
* Algorithm:
|
|
8785
|
+
* - Start with a default threshold of 0.8 (80% of max size)
|
|
8786
|
+
* - For high hit ratios, increase the threshold to keep more items in cache
|
|
8787
|
+
* - For low hit ratios, decrease the threshold to evict items more aggressively
|
|
8788
|
+
* - For read-heavy workloads, use a higher threshold
|
|
8789
|
+
* - For write-heavy workloads, use a lower threshold
|
|
8790
|
+
*/
|
|
8791
|
+
tuneEvictionThreshold() {
|
|
8792
|
+
// Default threshold
|
|
8793
|
+
let threshold = 0.8;
|
|
8794
|
+
// Adjust based on hit/miss ratio if we have enough data
|
|
8795
|
+
const totalAccesses = this.stats.hits + this.stats.misses;
|
|
8796
|
+
if (totalAccesses > 100) {
|
|
8797
|
+
const hitRatio = this.stats.hits / totalAccesses;
|
|
8798
|
+
// If hit ratio is high, we can use a higher threshold
|
|
8799
|
+
// If hit ratio is low, we should use a lower threshold to evict more aggressively
|
|
8800
|
+
if (hitRatio > 0.8) {
|
|
8801
|
+
// High hit ratio, increase threshold (up to 0.9)
|
|
8802
|
+
threshold = Math.min(0.9, 0.8 + (hitRatio - 0.8));
|
|
8803
|
+
}
|
|
8804
|
+
else if (hitRatio < 0.5) {
|
|
8805
|
+
// Low hit ratio, decrease threshold (down to 0.6)
|
|
8806
|
+
threshold = Math.max(0.6, 0.8 - (0.5 - hitRatio));
|
|
8807
|
+
}
|
|
8808
|
+
}
|
|
8809
|
+
// If we have storage statistics with operation counts, adjust based on operation patterns
|
|
8810
|
+
if (this.storageStatistics && this.storageStatistics.operations) {
|
|
8811
|
+
const ops = this.storageStatistics.operations;
|
|
8812
|
+
const totalOps = ops.total || 1;
|
|
8813
|
+
// Calculate read/write ratio
|
|
8814
|
+
const readOps = ops.search || 0;
|
|
8815
|
+
const writeOps = (ops.add || 0) + (ops.update || 0) + (ops.delete || 0);
|
|
8816
|
+
if (totalOps > 100) {
|
|
8817
|
+
const readRatio = readOps / totalOps;
|
|
8818
|
+
const writeRatio = writeOps / totalOps;
|
|
8819
|
+
// For read-heavy workloads, use higher threshold
|
|
8820
|
+
// For write-heavy workloads, use lower threshold
|
|
8821
|
+
if (readRatio > 0.8) {
|
|
8822
|
+
// Read-heavy, increase threshold slightly
|
|
8823
|
+
threshold = Math.min(0.9, threshold + 0.05);
|
|
8824
|
+
}
|
|
8825
|
+
else if (writeRatio > 0.5) {
|
|
8826
|
+
// Write-heavy, decrease threshold
|
|
8827
|
+
threshold = Math.max(0.6, threshold - 0.1);
|
|
8828
|
+
}
|
|
8829
|
+
}
|
|
8830
|
+
}
|
|
8831
|
+
// Update the eviction threshold
|
|
8832
|
+
this.hotCacheEvictionThreshold = threshold;
|
|
8833
|
+
}
|
|
8834
|
+
/**
|
|
8835
|
+
* Tune warm cache TTL based on statistics
|
|
8836
|
+
*
|
|
8837
|
+
* The warm cache TTL determines how long items remain in the warm cache.
|
|
8838
|
+
* It is tuned based on:
|
|
8839
|
+
* 1. Update frequency from operation statistics
|
|
8840
|
+
*
|
|
8841
|
+
* Algorithm:
|
|
8842
|
+
* - Start with a default TTL of 24 hours
|
|
8843
|
+
* - For frequently updated data, use a shorter TTL
|
|
8844
|
+
* - For rarely updated data, use a longer TTL
|
|
8845
|
+
*/
|
|
8846
|
+
tuneWarmCacheTTL() {
|
|
8847
|
+
// Default TTL (24 hours)
|
|
8848
|
+
let ttl = 24 * 60 * 60 * 1000;
|
|
8849
|
+
// If we have storage statistics with operation counts, adjust based on update frequency
|
|
8850
|
+
if (this.storageStatistics && this.storageStatistics.operations) {
|
|
8851
|
+
const ops = this.storageStatistics.operations;
|
|
8852
|
+
const totalOps = ops.total || 1;
|
|
8853
|
+
const updateOps = (ops.update || 0);
|
|
8854
|
+
if (totalOps > 100) {
|
|
8855
|
+
const updateRatio = updateOps / totalOps;
|
|
8856
|
+
// For frequently updated data, use shorter TTL
|
|
8857
|
+
// For rarely updated data, use longer TTL
|
|
8858
|
+
if (updateRatio > 0.3) {
|
|
8859
|
+
// Frequently updated, decrease TTL (down to 6 hours)
|
|
8860
|
+
ttl = Math.max(6 * 60 * 60 * 1000, ttl * (1 - updateRatio));
|
|
8861
|
+
}
|
|
8862
|
+
else if (updateRatio < 0.1) {
|
|
8863
|
+
// Rarely updated, increase TTL (up to 48 hours)
|
|
8864
|
+
ttl = Math.min(48 * 60 * 60 * 1000, ttl * (1.5 - updateRatio));
|
|
8865
|
+
}
|
|
8866
|
+
}
|
|
8867
|
+
}
|
|
8868
|
+
// Update the warm cache TTL
|
|
8869
|
+
this.warmCacheTTL = ttl;
|
|
8870
|
+
}
|
|
8871
|
+
/**
|
|
8872
|
+
* Tune batch size based on environment, statistics, and operating mode
|
|
8873
|
+
*
|
|
8874
|
+
* The batch size determines how many items are processed in a single batch
|
|
8875
|
+
* for operations like prefetching. It is tuned based on:
|
|
8876
|
+
* 1. Current environment (Node.js, browser, worker)
|
|
8877
|
+
* 2. Available memory
|
|
8878
|
+
* 3. Operation patterns
|
|
8879
|
+
* 4. Cache hit/miss ratio
|
|
8880
|
+
* 5. Operating mode (read-only vs. read-write)
|
|
8881
|
+
* 6. Storage type (S3, filesystem, memory)
|
|
8882
|
+
* 7. Dataset size
|
|
8883
|
+
*
|
|
8884
|
+
* Enhanced algorithm:
|
|
8885
|
+
* - Start with a default based on the environment
|
|
8886
|
+
* - For large datasets in S3 or other remote storage, use larger batches
|
|
8887
|
+
* - For read-only mode, use larger batches to improve throughput
|
|
8888
|
+
* - Dynamically adjust based on network latency and throughput
|
|
8889
|
+
* - Balance between memory usage and performance
|
|
8890
|
+
*/
|
|
8891
|
+
tuneBatchSize() {
|
|
8892
|
+
// Default batch size
|
|
8893
|
+
let batchSize = 10;
|
|
8894
|
+
// Check if we're in read-only mode
|
|
8895
|
+
const isReadOnly = this.options?.readOnly || false;
|
|
8896
|
+
// Check if we're using S3 or other remote storage
|
|
8897
|
+
const isRemoteStorage = this.coldStorageType === StorageType.S3 ||
|
|
8898
|
+
this.coldStorageType === StorageType.REMOTE_API;
|
|
8899
|
+
// Get the total dataset size if available
|
|
8900
|
+
const totalItems = this.storageStatistics ?
|
|
8901
|
+
(this.storageStatistics.totalNodes || 0) + (this.storageStatistics.totalEdges || 0) : 0;
|
|
8902
|
+
// Determine if we're dealing with a large dataset
|
|
8903
|
+
const isLargeDataset = totalItems > 100000;
|
|
8904
|
+
const isVeryLargeDataset = totalItems > 1000000;
|
|
8905
|
+
// Base batch size adjustment based on environment
|
|
8906
|
+
if (this.environment === Environment$1.NODE) {
|
|
8907
|
+
// Node.js can handle larger batches
|
|
8908
|
+
batchSize = isReadOnly ? 30 : 20;
|
|
8909
|
+
// For remote storage, increase batch size
|
|
8910
|
+
if (isRemoteStorage) {
|
|
8911
|
+
batchSize = isReadOnly ? 50 : 30;
|
|
8912
|
+
}
|
|
8913
|
+
// For large datasets, adjust batch size
|
|
8914
|
+
if (isLargeDataset) {
|
|
8915
|
+
batchSize = Math.min(100, batchSize * 1.5);
|
|
8916
|
+
}
|
|
8917
|
+
// For very large datasets, adjust even more
|
|
8918
|
+
if (isVeryLargeDataset) {
|
|
8919
|
+
batchSize = Math.min(200, batchSize * 2);
|
|
8920
|
+
}
|
|
8921
|
+
}
|
|
8922
|
+
else if (this.environment === Environment$1.BROWSER) {
|
|
8923
|
+
// Browsers might need smaller batches
|
|
8924
|
+
batchSize = isReadOnly ? 15 : 10;
|
|
8925
|
+
// If we have memory information, adjust accordingly
|
|
8926
|
+
if (navigator.deviceMemory) {
|
|
8927
|
+
// Scale batch size with available memory
|
|
8928
|
+
const memoryFactor = isReadOnly ? 3 : 2;
|
|
8929
|
+
batchSize = Math.max(5, Math.min(30, Math.floor(navigator.deviceMemory * memoryFactor)));
|
|
8930
|
+
// For large datasets, adjust based on memory
|
|
8931
|
+
if (isLargeDataset && navigator.deviceMemory > 4) {
|
|
8932
|
+
batchSize = Math.min(50, batchSize * 1.5);
|
|
8933
|
+
}
|
|
8934
|
+
}
|
|
8935
|
+
}
|
|
8936
|
+
else if (this.environment === Environment$1.WORKER) {
|
|
8937
|
+
// Workers can handle moderate batch sizes
|
|
8938
|
+
batchSize = isReadOnly ? 20 : 15;
|
|
8939
|
+
}
|
|
8940
|
+
// If we have storage statistics with operation counts, adjust based on operation patterns
|
|
8941
|
+
if (this.storageStatistics && this.storageStatistics.operations) {
|
|
8942
|
+
const ops = this.storageStatistics.operations;
|
|
8943
|
+
const totalOps = ops.total || 1;
|
|
8944
|
+
const searchOps = (ops.search || 0);
|
|
8945
|
+
const getOps = (ops.get || 0);
|
|
8946
|
+
if (totalOps > 100) {
|
|
8947
|
+
// Calculate search and get ratios
|
|
8948
|
+
const searchRatio = searchOps / totalOps;
|
|
8949
|
+
const getRatio = getOps / totalOps;
|
|
8950
|
+
// For search-heavy workloads, use larger batch size
|
|
8951
|
+
if (searchRatio > 0.6) {
|
|
8952
|
+
// Search-heavy, increase batch size
|
|
8953
|
+
const searchFactor = isRemoteStorage ? 1.8 : 1.5;
|
|
8954
|
+
batchSize = Math.min(isRemoteStorage ? 200 : 100, Math.ceil(batchSize * searchFactor));
|
|
8955
|
+
}
|
|
8956
|
+
// For get-heavy workloads, adjust batch size
|
|
8957
|
+
if (getRatio > 0.6) {
|
|
8958
|
+
// Get-heavy, adjust batch size based on storage type
|
|
8959
|
+
if (isRemoteStorage) {
|
|
8960
|
+
// For remote storage, larger batches reduce network overhead
|
|
8961
|
+
batchSize = Math.min(150, Math.ceil(batchSize * 1.5));
|
|
8962
|
+
}
|
|
8963
|
+
else {
|
|
8964
|
+
// For local storage, smaller batches might be more efficient
|
|
8965
|
+
batchSize = Math.max(10, Math.ceil(batchSize * 0.9));
|
|
8966
|
+
}
|
|
8967
|
+
}
|
|
8968
|
+
}
|
|
8969
|
+
}
|
|
8970
|
+
// Adjust based on hit/miss ratio if we have enough data
|
|
8971
|
+
const totalAccesses = this.stats.hits + this.stats.misses;
|
|
8972
|
+
if (totalAccesses > 100) {
|
|
8973
|
+
const hitRatio = this.stats.hits / totalAccesses;
|
|
8974
|
+
// Base adjustment factors
|
|
8975
|
+
let increaseFactorForLowHitRatio = isRemoteStorage ? 1.5 : 1.2;
|
|
8976
|
+
let decreaseFactorForHighHitRatio = 0.8;
|
|
8977
|
+
// In read-only mode, be more aggressive with batch size adjustments
|
|
8978
|
+
if (isReadOnly) {
|
|
8979
|
+
increaseFactorForLowHitRatio = isRemoteStorage ? 2.0 : 1.5;
|
|
8980
|
+
decreaseFactorForHighHitRatio = 0.9; // Less reduction in read-only mode
|
|
8981
|
+
}
|
|
8982
|
+
// If hit ratio is high, we can use smaller batches
|
|
8983
|
+
if (hitRatio > 0.8 && !isVeryLargeDataset) {
|
|
8984
|
+
// High hit ratio, decrease batch size slightly
|
|
8985
|
+
// But don't decrease too much for large datasets or remote storage
|
|
8986
|
+
if (!(isLargeDataset && isRemoteStorage)) {
|
|
8987
|
+
batchSize = Math.max(isReadOnly ? 10 : 5, Math.floor(batchSize * decreaseFactorForHighHitRatio));
|
|
8988
|
+
}
|
|
8989
|
+
}
|
|
8990
|
+
// If hit ratio is low, we need larger batches
|
|
8991
|
+
else if (hitRatio < 0.5) {
|
|
8992
|
+
// Low hit ratio, increase batch size
|
|
8993
|
+
const maxBatchSize = isRemoteStorage ?
|
|
8994
|
+
(isVeryLargeDataset ? 300 : 200) :
|
|
8995
|
+
(isVeryLargeDataset ? 150 : 100);
|
|
8996
|
+
batchSize = Math.min(maxBatchSize, Math.ceil(batchSize * increaseFactorForLowHitRatio));
|
|
8997
|
+
}
|
|
8998
|
+
}
|
|
8999
|
+
// Set minimum batch sizes based on storage type and mode
|
|
9000
|
+
let minBatchSize = 5;
|
|
9001
|
+
if (isRemoteStorage) {
|
|
9002
|
+
minBatchSize = isReadOnly ? 20 : 10;
|
|
9003
|
+
}
|
|
9004
|
+
else if (isReadOnly) {
|
|
9005
|
+
minBatchSize = 10;
|
|
9006
|
+
}
|
|
9007
|
+
// Ensure batch size is within reasonable limits
|
|
9008
|
+
batchSize = Math.max(minBatchSize, batchSize);
|
|
9009
|
+
// Cap maximum batch size based on environment and storage
|
|
9010
|
+
const maxBatchSize = isRemoteStorage ?
|
|
9011
|
+
(this.environment === Environment$1.NODE ? 300 : 150) :
|
|
9012
|
+
(this.environment === Environment$1.NODE ? 150 : 75);
|
|
9013
|
+
batchSize = Math.min(maxBatchSize, batchSize);
|
|
9014
|
+
// Update the batch size
|
|
9015
|
+
this.batchSize = batchSize;
|
|
9016
|
+
}
|
|
9017
|
+
/**
|
|
9018
|
+
* Detect the appropriate warm storage type based on environment
|
|
9019
|
+
*/
|
|
9020
|
+
detectWarmStorageType() {
|
|
9021
|
+
if (this.environment === Environment$1.BROWSER) {
|
|
9022
|
+
// Use OPFS if available, otherwise use memory
|
|
9023
|
+
if ('storage' in navigator && 'getDirectory' in navigator.storage) {
|
|
9024
|
+
return StorageType.OPFS;
|
|
9025
|
+
}
|
|
9026
|
+
return StorageType.MEMORY;
|
|
9027
|
+
}
|
|
9028
|
+
else if (this.environment === Environment$1.WORKER) {
|
|
9029
|
+
// Use OPFS if available, otherwise use memory
|
|
9030
|
+
if ('storage' in self && 'getDirectory' in self.storage) {
|
|
9031
|
+
return StorageType.OPFS;
|
|
9032
|
+
}
|
|
9033
|
+
return StorageType.MEMORY;
|
|
9034
|
+
}
|
|
9035
|
+
else {
|
|
9036
|
+
// In Node.js, use filesystem
|
|
9037
|
+
return StorageType.FILESYSTEM;
|
|
9038
|
+
}
|
|
9039
|
+
}
|
|
9040
|
+
/**
|
|
9041
|
+
* Detect the appropriate cold storage type based on environment
|
|
9042
|
+
*/
|
|
9043
|
+
detectColdStorageType() {
|
|
9044
|
+
if (this.environment === Environment$1.BROWSER) {
|
|
9045
|
+
// Use OPFS if available, otherwise use memory
|
|
9046
|
+
if ('storage' in navigator && 'getDirectory' in navigator.storage) {
|
|
9047
|
+
return StorageType.OPFS;
|
|
9048
|
+
}
|
|
9049
|
+
return StorageType.MEMORY;
|
|
9050
|
+
}
|
|
9051
|
+
else if (this.environment === Environment$1.WORKER) {
|
|
9052
|
+
// Use OPFS if available, otherwise use memory
|
|
9053
|
+
if ('storage' in self && 'getDirectory' in self.storage) {
|
|
9054
|
+
return StorageType.OPFS;
|
|
9055
|
+
}
|
|
9056
|
+
return StorageType.MEMORY;
|
|
9057
|
+
}
|
|
9058
|
+
else {
|
|
9059
|
+
// In Node.js, use S3 if configured, otherwise filesystem
|
|
9060
|
+
return StorageType.S3;
|
|
9061
|
+
}
|
|
9062
|
+
}
|
|
9063
|
+
/**
|
|
9064
|
+
* Initialize warm storage adapter
|
|
9065
|
+
*/
|
|
9066
|
+
initializeWarmStorage() {
|
|
9067
|
+
// Implementation depends on the detected storage type
|
|
9068
|
+
// For now, return null as this will be provided by the storage adapter
|
|
9069
|
+
return null;
|
|
9070
|
+
}
|
|
9071
|
+
/**
|
|
9072
|
+
* Initialize cold storage adapter
|
|
9073
|
+
*/
|
|
9074
|
+
initializeColdStorage() {
|
|
9075
|
+
// Implementation depends on the detected storage type
|
|
9076
|
+
// For now, return null as this will be provided by the storage adapter
|
|
9077
|
+
return null;
|
|
9078
|
+
}
|
|
9079
|
+
/**
|
|
9080
|
+
* Get an item from cache, trying each level in order
|
|
9081
|
+
* @param id The item ID
|
|
9082
|
+
* @returns The cached item or null if not found
|
|
9083
|
+
*/
|
|
9084
|
+
async get(id) {
|
|
9085
|
+
// Check if it's time to tune parameters
|
|
9086
|
+
await this.checkAndTuneParameters();
|
|
9087
|
+
// Try hot cache first (fastest)
|
|
9088
|
+
const hotCacheEntry = this.hotCache.get(id);
|
|
9089
|
+
if (hotCacheEntry) {
|
|
9090
|
+
// Update access metadata
|
|
9091
|
+
hotCacheEntry.lastAccessed = Date.now();
|
|
9092
|
+
hotCacheEntry.accessCount++;
|
|
9093
|
+
// Update stats
|
|
9094
|
+
this.stats.hits++;
|
|
9095
|
+
return hotCacheEntry.data;
|
|
9096
|
+
}
|
|
9097
|
+
// Try warm cache next
|
|
9098
|
+
try {
|
|
9099
|
+
const warmCacheItem = await this.getFromWarmCache(id);
|
|
9100
|
+
if (warmCacheItem) {
|
|
9101
|
+
// Promote to hot cache
|
|
9102
|
+
this.addToHotCache(id, warmCacheItem);
|
|
9103
|
+
// Update stats
|
|
9104
|
+
this.stats.hits++;
|
|
9105
|
+
return warmCacheItem;
|
|
9106
|
+
}
|
|
9107
|
+
}
|
|
9108
|
+
catch (error) {
|
|
9109
|
+
console.warn(`Error accessing warm cache for ${id}:`, error);
|
|
9110
|
+
}
|
|
9111
|
+
// Finally, try cold storage
|
|
9112
|
+
try {
|
|
9113
|
+
const coldStorageItem = await this.getFromColdStorage(id);
|
|
9114
|
+
if (coldStorageItem) {
|
|
9115
|
+
// Promote to hot and warm caches
|
|
9116
|
+
this.addToHotCache(id, coldStorageItem);
|
|
9117
|
+
await this.addToWarmCache(id, coldStorageItem);
|
|
9118
|
+
// Update stats
|
|
9119
|
+
this.stats.misses++;
|
|
9120
|
+
return coldStorageItem;
|
|
9121
|
+
}
|
|
9122
|
+
}
|
|
9123
|
+
catch (error) {
|
|
9124
|
+
console.warn(`Error accessing cold storage for ${id}:`, error);
|
|
9125
|
+
}
|
|
9126
|
+
// Item not found in any cache level
|
|
9127
|
+
this.stats.misses++;
|
|
9128
|
+
return null;
|
|
9129
|
+
}
|
|
9130
|
+
/**
|
|
9131
|
+
* Get an item from warm cache
|
|
9132
|
+
* @param id The item ID
|
|
9133
|
+
* @returns The cached item or null if not found
|
|
9134
|
+
*/
|
|
9135
|
+
async getFromWarmCache(id) {
|
|
9136
|
+
if (!this.warmStorage)
|
|
9137
|
+
return null;
|
|
9138
|
+
try {
|
|
9139
|
+
return await this.warmStorage.get(id);
|
|
9140
|
+
}
|
|
9141
|
+
catch (error) {
|
|
9142
|
+
console.warn(`Error getting item ${id} from warm cache:`, error);
|
|
9143
|
+
return null;
|
|
9144
|
+
}
|
|
9145
|
+
}
|
|
9146
|
+
/**
|
|
9147
|
+
* Get an item from cold storage
|
|
9148
|
+
* @param id The item ID
|
|
9149
|
+
* @returns The item or null if not found
|
|
9150
|
+
*/
|
|
9151
|
+
async getFromColdStorage(id) {
|
|
9152
|
+
if (!this.coldStorage)
|
|
9153
|
+
return null;
|
|
9154
|
+
try {
|
|
9155
|
+
return await this.coldStorage.get(id);
|
|
9156
|
+
}
|
|
9157
|
+
catch (error) {
|
|
9158
|
+
console.warn(`Error getting item ${id} from cold storage:`, error);
|
|
9159
|
+
return null;
|
|
9160
|
+
}
|
|
9161
|
+
}
|
|
9162
|
+
/**
|
|
9163
|
+
* Add an item to hot cache
|
|
9164
|
+
* @param id The item ID
|
|
9165
|
+
* @param item The item to cache
|
|
9166
|
+
*/
|
|
9167
|
+
addToHotCache(id, item) {
|
|
9168
|
+
// Check if we need to evict items
|
|
9169
|
+
if (this.hotCache.size >= this.hotCacheMaxSize * this.hotCacheEvictionThreshold) {
|
|
9170
|
+
this.evictFromHotCache();
|
|
9171
|
+
}
|
|
9172
|
+
// Add to hot cache
|
|
9173
|
+
this.hotCache.set(id, {
|
|
9174
|
+
data: item,
|
|
9175
|
+
lastAccessed: Date.now(),
|
|
9176
|
+
accessCount: 1,
|
|
9177
|
+
expiresAt: null // Hot cache items don't expire
|
|
9178
|
+
});
|
|
9179
|
+
// Update stats
|
|
9180
|
+
this.stats.size = this.hotCache.size;
|
|
9181
|
+
}
|
|
9182
|
+
/**
|
|
9183
|
+
* Add an item to warm cache
|
|
9184
|
+
* @param id The item ID
|
|
9185
|
+
* @param item The item to cache
|
|
9186
|
+
*/
|
|
9187
|
+
async addToWarmCache(id, item) {
|
|
9188
|
+
if (!this.warmStorage)
|
|
9189
|
+
return;
|
|
9190
|
+
try {
|
|
9191
|
+
// Add to warm cache with TTL
|
|
9192
|
+
await this.warmStorage.set(id, item, {
|
|
9193
|
+
ttl: this.warmCacheTTL
|
|
9194
|
+
});
|
|
9195
|
+
}
|
|
9196
|
+
catch (error) {
|
|
9197
|
+
console.warn(`Error adding item ${id} to warm cache:`, error);
|
|
9198
|
+
}
|
|
9199
|
+
}
|
|
9200
|
+
/**
|
|
9201
|
+
* Evict items from hot cache based on LRU policy
|
|
9202
|
+
*/
|
|
9203
|
+
evictFromHotCache() {
|
|
9204
|
+
// Find the least recently used items
|
|
9205
|
+
const entries = Array.from(this.hotCache.entries());
|
|
9206
|
+
// Sort by last accessed time (oldest first)
|
|
9207
|
+
entries.sort((a, b) => a[1].lastAccessed - b[1].lastAccessed);
|
|
9208
|
+
// Remove the oldest 20% of items
|
|
9209
|
+
const itemsToRemove = Math.ceil(this.hotCache.size * 0.2);
|
|
9210
|
+
for (let i = 0; i < itemsToRemove && i < entries.length; i++) {
|
|
9211
|
+
this.hotCache.delete(entries[i][0]);
|
|
9212
|
+
this.stats.evictions++;
|
|
9213
|
+
}
|
|
9214
|
+
// Update stats
|
|
9215
|
+
this.stats.size = this.hotCache.size;
|
|
9216
|
+
if (process.env.DEBUG) {
|
|
9217
|
+
console.log(`Evicted ${itemsToRemove} items from hot cache, new size: ${this.hotCache.size}`);
|
|
9218
|
+
}
|
|
9219
|
+
}
|
|
9220
|
+
/**
|
|
9221
|
+
* Set an item in all cache levels
|
|
9222
|
+
* @param id The item ID
|
|
9223
|
+
* @param item The item to cache
|
|
9224
|
+
*/
|
|
9225
|
+
async set(id, item) {
|
|
9226
|
+
// Add to hot cache
|
|
9227
|
+
this.addToHotCache(id, item);
|
|
9228
|
+
// Add to warm cache
|
|
9229
|
+
await this.addToWarmCache(id, item);
|
|
9230
|
+
// Add to cold storage
|
|
9231
|
+
if (this.coldStorage) {
|
|
9232
|
+
try {
|
|
9233
|
+
await this.coldStorage.set(id, item);
|
|
9234
|
+
}
|
|
9235
|
+
catch (error) {
|
|
9236
|
+
console.warn(`Error adding item ${id} to cold storage:`, error);
|
|
9237
|
+
}
|
|
9238
|
+
}
|
|
9239
|
+
}
|
|
9240
|
+
/**
|
|
9241
|
+
* Delete an item from all cache levels
|
|
9242
|
+
* @param id The item ID to delete
|
|
9243
|
+
*/
|
|
9244
|
+
async delete(id) {
|
|
9245
|
+
// Remove from hot cache
|
|
9246
|
+
this.hotCache.delete(id);
|
|
9247
|
+
// Remove from warm cache
|
|
9248
|
+
if (this.warmStorage) {
|
|
9249
|
+
try {
|
|
9250
|
+
await this.warmStorage.delete(id);
|
|
9251
|
+
}
|
|
9252
|
+
catch (error) {
|
|
9253
|
+
console.warn(`Error deleting item ${id} from warm cache:`, error);
|
|
9254
|
+
}
|
|
9255
|
+
}
|
|
9256
|
+
// Remove from cold storage
|
|
9257
|
+
if (this.coldStorage) {
|
|
9258
|
+
try {
|
|
9259
|
+
await this.coldStorage.delete(id);
|
|
9260
|
+
}
|
|
9261
|
+
catch (error) {
|
|
9262
|
+
console.warn(`Error deleting item ${id} from cold storage:`, error);
|
|
9263
|
+
}
|
|
9264
|
+
}
|
|
9265
|
+
// Update stats
|
|
9266
|
+
this.stats.size = this.hotCache.size;
|
|
9267
|
+
}
|
|
9268
|
+
/**
|
|
9269
|
+
* Clear all cache levels
|
|
9270
|
+
*/
|
|
9271
|
+
async clear() {
|
|
9272
|
+
// Clear hot cache
|
|
9273
|
+
this.hotCache.clear();
|
|
9274
|
+
// Clear warm cache
|
|
9275
|
+
if (this.warmStorage) {
|
|
9276
|
+
try {
|
|
9277
|
+
await this.warmStorage.clear();
|
|
9278
|
+
}
|
|
9279
|
+
catch (error) {
|
|
9280
|
+
console.warn('Error clearing warm cache:', error);
|
|
9281
|
+
}
|
|
9282
|
+
}
|
|
9283
|
+
// Clear cold storage
|
|
9284
|
+
if (this.coldStorage) {
|
|
9285
|
+
try {
|
|
9286
|
+
await this.coldStorage.clear();
|
|
9287
|
+
}
|
|
9288
|
+
catch (error) {
|
|
9289
|
+
console.warn('Error clearing cold storage:', error);
|
|
9290
|
+
}
|
|
9291
|
+
}
|
|
9292
|
+
// Reset stats
|
|
9293
|
+
this.stats = {
|
|
9294
|
+
hits: 0,
|
|
9295
|
+
misses: 0,
|
|
9296
|
+
evictions: 0,
|
|
9297
|
+
size: 0,
|
|
9298
|
+
maxSize: this.hotCacheMaxSize
|
|
9299
|
+
};
|
|
9300
|
+
}
|
|
9301
|
+
/**
|
|
9302
|
+
* Get cache statistics
|
|
9303
|
+
* @returns Cache statistics
|
|
9304
|
+
*/
|
|
9305
|
+
getStats() {
|
|
9306
|
+
return { ...this.stats };
|
|
9307
|
+
}
|
|
9308
|
+
/**
|
|
9309
|
+
* Prefetch items based on ID patterns or relationships
|
|
9310
|
+
* @param ids Array of IDs to prefetch
|
|
9311
|
+
*/
|
|
9312
|
+
async prefetch(ids) {
|
|
9313
|
+
// Check if it's time to tune parameters
|
|
9314
|
+
await this.checkAndTuneParameters();
|
|
9315
|
+
// Prefetch in batches to avoid overwhelming the system
|
|
9316
|
+
const batches = [];
|
|
9317
|
+
// Split into batches using the configurable batch size
|
|
9318
|
+
for (let i = 0; i < ids.length; i += this.batchSize) {
|
|
9319
|
+
const batch = ids.slice(i, i + this.batchSize);
|
|
9320
|
+
batches.push(batch);
|
|
9321
|
+
}
|
|
9322
|
+
// Process each batch
|
|
9323
|
+
for (const batch of batches) {
|
|
9324
|
+
await Promise.all(batch.map(async (id) => {
|
|
9325
|
+
// Skip if already in hot cache
|
|
9326
|
+
if (this.hotCache.has(id))
|
|
9327
|
+
return;
|
|
9328
|
+
try {
|
|
9329
|
+
// Try to get from any cache level
|
|
9330
|
+
await this.get(id);
|
|
9331
|
+
}
|
|
9332
|
+
catch (error) {
|
|
9333
|
+
// Ignore errors during prefetching
|
|
9334
|
+
if (process.env.DEBUG) {
|
|
9335
|
+
console.warn(`Error prefetching ${id}:`, error);
|
|
9336
|
+
}
|
|
9337
|
+
}
|
|
9338
|
+
}));
|
|
9339
|
+
}
|
|
9340
|
+
}
|
|
9341
|
+
/**
|
|
9342
|
+
* Check if it's time to tune parameters and do so if needed
|
|
9343
|
+
* This is called before operations that might benefit from tuned parameters
|
|
9344
|
+
*
|
|
9345
|
+
* This method serves as a checkpoint for auto-tuning, ensuring that:
|
|
9346
|
+
* 1. Parameters are tuned periodically based on the auto-tune interval
|
|
9347
|
+
* 2. Tuning happens before critical operations that would benefit from optimized parameters
|
|
9348
|
+
* 3. Tuning doesn't happen too frequently, which could impact performance
|
|
9349
|
+
*
|
|
9350
|
+
* By calling this method before get(), getMany(), and prefetch() operations,
|
|
9351
|
+
* we ensure that the cache parameters are optimized for the current workload
|
|
9352
|
+
* without adding unnecessary overhead to every operation.
|
|
9353
|
+
*/
|
|
9354
|
+
async checkAndTuneParameters() {
|
|
9355
|
+
// Skip if auto-tuning is disabled
|
|
9356
|
+
if (!this.autoTune)
|
|
9357
|
+
return;
|
|
9358
|
+
// Check if it's time to tune parameters
|
|
9359
|
+
const now = Date.now();
|
|
9360
|
+
if (now - this.lastAutoTuneTime >= this.autoTuneInterval) {
|
|
9361
|
+
await this.tuneParameters();
|
|
9362
|
+
}
|
|
9363
|
+
}
|
|
9364
|
+
/**
|
|
9365
|
+
* Get multiple items at once, optimizing for batch retrieval
|
|
9366
|
+
* @param ids Array of IDs to get
|
|
9367
|
+
* @returns Map of ID to item
|
|
9368
|
+
*/
|
|
9369
|
+
async getMany(ids) {
|
|
9370
|
+
// Check if it's time to tune parameters
|
|
9371
|
+
await this.checkAndTuneParameters();
|
|
9372
|
+
const result = new Map();
|
|
9373
|
+
// First check hot cache for all IDs
|
|
9374
|
+
const missingIds = [];
|
|
9375
|
+
for (const id of ids) {
|
|
9376
|
+
const hotCacheEntry = this.hotCache.get(id);
|
|
9377
|
+
if (hotCacheEntry) {
|
|
9378
|
+
// Update access metadata
|
|
9379
|
+
hotCacheEntry.lastAccessed = Date.now();
|
|
9380
|
+
hotCacheEntry.accessCount++;
|
|
9381
|
+
// Add to result
|
|
9382
|
+
result.set(id, hotCacheEntry.data);
|
|
9383
|
+
// Update stats
|
|
9384
|
+
this.stats.hits++;
|
|
9385
|
+
}
|
|
9386
|
+
else {
|
|
9387
|
+
missingIds.push(id);
|
|
9388
|
+
}
|
|
9389
|
+
}
|
|
9390
|
+
if (missingIds.length === 0) {
|
|
9391
|
+
return result;
|
|
9392
|
+
}
|
|
9393
|
+
// Try to get missing items from warm cache
|
|
9394
|
+
if (this.warmStorage) {
|
|
9395
|
+
try {
|
|
9396
|
+
const warmCacheItems = await this.warmStorage.getMany(missingIds);
|
|
9397
|
+
for (const [id, item] of warmCacheItems.entries()) {
|
|
9398
|
+
if (item) {
|
|
9399
|
+
// Promote to hot cache
|
|
9400
|
+
this.addToHotCache(id, item);
|
|
9401
|
+
// Add to result
|
|
9402
|
+
result.set(id, item);
|
|
9403
|
+
// Update stats
|
|
9404
|
+
this.stats.hits++;
|
|
9405
|
+
// Remove from missing IDs
|
|
9406
|
+
const index = missingIds.indexOf(id);
|
|
9407
|
+
if (index !== -1) {
|
|
9408
|
+
missingIds.splice(index, 1);
|
|
9409
|
+
}
|
|
9410
|
+
}
|
|
9411
|
+
}
|
|
9412
|
+
}
|
|
9413
|
+
catch (error) {
|
|
9414
|
+
console.warn('Error accessing warm cache for batch:', error);
|
|
9415
|
+
}
|
|
9416
|
+
}
|
|
9417
|
+
if (missingIds.length === 0) {
|
|
9418
|
+
return result;
|
|
9419
|
+
}
|
|
9420
|
+
// Try to get remaining missing items from cold storage
|
|
9421
|
+
if (this.coldStorage) {
|
|
9422
|
+
try {
|
|
9423
|
+
const coldStorageItems = await this.coldStorage.getMany(missingIds);
|
|
9424
|
+
for (const [id, item] of coldStorageItems.entries()) {
|
|
9425
|
+
if (item) {
|
|
9426
|
+
// Promote to hot and warm caches
|
|
9427
|
+
this.addToHotCache(id, item);
|
|
9428
|
+
await this.addToWarmCache(id, item);
|
|
9429
|
+
// Add to result
|
|
9430
|
+
result.set(id, item);
|
|
9431
|
+
// Update stats
|
|
9432
|
+
this.stats.misses++;
|
|
9433
|
+
}
|
|
9434
|
+
}
|
|
9435
|
+
}
|
|
9436
|
+
catch (error) {
|
|
9437
|
+
console.warn('Error accessing cold storage for batch:', error);
|
|
9438
|
+
}
|
|
9439
|
+
}
|
|
9440
|
+
return result;
|
|
9441
|
+
}
|
|
9442
|
+
/**
|
|
9443
|
+
* Set the storage adapters for warm and cold caches
|
|
9444
|
+
* @param warmStorage Warm cache storage adapter
|
|
9445
|
+
* @param coldStorage Cold storage adapter
|
|
7928
9446
|
*/
|
|
7929
|
-
|
|
7930
|
-
|
|
9447
|
+
setStorageAdapters(warmStorage, coldStorage) {
|
|
9448
|
+
this.warmStorage = warmStorage;
|
|
9449
|
+
this.coldStorage = coldStorage;
|
|
7931
9450
|
}
|
|
7932
9451
|
}
|
|
7933
9452
|
|
|
@@ -7973,6 +9492,8 @@ class S3CompatibleStorage extends BaseStorage {
|
|
|
7973
9492
|
this.activeLocks = new Set();
|
|
7974
9493
|
// Change log for efficient synchronization
|
|
7975
9494
|
this.changeLogPrefix = 'change-log/';
|
|
9495
|
+
// Node cache to avoid redundant API calls
|
|
9496
|
+
this.nodeCache = new Map();
|
|
7976
9497
|
// Batch update timer ID
|
|
7977
9498
|
this.statisticsBatchUpdateTimerId = null;
|
|
7978
9499
|
// Flag to indicate if statistics have been modified since last save
|
|
@@ -7998,6 +9519,9 @@ class S3CompatibleStorage extends BaseStorage {
|
|
|
7998
9519
|
this.verbPrefix = `${VERBS_DIR}/`;
|
|
7999
9520
|
this.metadataPrefix = `${METADATA_DIR}/`;
|
|
8000
9521
|
this.indexPrefix = `${INDEX_DIR}/`;
|
|
9522
|
+
// Initialize cache managers
|
|
9523
|
+
this.nounCacheManager = new CacheManager(options.cacheConfig);
|
|
9524
|
+
this.verbCacheManager = new CacheManager(options.cacheConfig);
|
|
8001
9525
|
}
|
|
8002
9526
|
/**
|
|
8003
9527
|
* Initialize the storage adapter
|
|
@@ -8036,6 +9560,78 @@ class S3CompatibleStorage extends BaseStorage {
|
|
|
8036
9560
|
await this.s3Client.send(new HeadBucketCommand({
|
|
8037
9561
|
Bucket: this.bucketName
|
|
8038
9562
|
}));
|
|
9563
|
+
// Create storage adapter proxies for the cache managers
|
|
9564
|
+
const nounStorageAdapter = {
|
|
9565
|
+
get: async (id) => this.getNoun_internal(id),
|
|
9566
|
+
set: async (id, node) => this.saveNoun_internal(node),
|
|
9567
|
+
delete: async (id) => this.deleteNoun_internal(id),
|
|
9568
|
+
getMany: async (ids) => {
|
|
9569
|
+
const result = new Map();
|
|
9570
|
+
// Process in batches to avoid overwhelming the S3 API
|
|
9571
|
+
const batchSize = 10;
|
|
9572
|
+
const batches = [];
|
|
9573
|
+
// Split into batches
|
|
9574
|
+
for (let i = 0; i < ids.length; i += batchSize) {
|
|
9575
|
+
const batch = ids.slice(i, i + batchSize);
|
|
9576
|
+
batches.push(batch);
|
|
9577
|
+
}
|
|
9578
|
+
// Process each batch
|
|
9579
|
+
for (const batch of batches) {
|
|
9580
|
+
const batchResults = await Promise.all(batch.map(async (id) => {
|
|
9581
|
+
const node = await this.getNoun_internal(id);
|
|
9582
|
+
return { id, node };
|
|
9583
|
+
}));
|
|
9584
|
+
// Add results to map
|
|
9585
|
+
for (const { id, node } of batchResults) {
|
|
9586
|
+
if (node) {
|
|
9587
|
+
result.set(id, node);
|
|
9588
|
+
}
|
|
9589
|
+
}
|
|
9590
|
+
}
|
|
9591
|
+
return result;
|
|
9592
|
+
},
|
|
9593
|
+
clear: async () => {
|
|
9594
|
+
// No-op for now, as we don't want to clear the entire storage
|
|
9595
|
+
// This would be implemented if needed
|
|
9596
|
+
}
|
|
9597
|
+
};
|
|
9598
|
+
const verbStorageAdapter = {
|
|
9599
|
+
get: async (id) => this.getVerb_internal(id),
|
|
9600
|
+
set: async (id, edge) => this.saveVerb_internal(edge),
|
|
9601
|
+
delete: async (id) => this.deleteVerb_internal(id),
|
|
9602
|
+
getMany: async (ids) => {
|
|
9603
|
+
const result = new Map();
|
|
9604
|
+
// Process in batches to avoid overwhelming the S3 API
|
|
9605
|
+
const batchSize = 10;
|
|
9606
|
+
const batches = [];
|
|
9607
|
+
// Split into batches
|
|
9608
|
+
for (let i = 0; i < ids.length; i += batchSize) {
|
|
9609
|
+
const batch = ids.slice(i, i + batchSize);
|
|
9610
|
+
batches.push(batch);
|
|
9611
|
+
}
|
|
9612
|
+
// Process each batch
|
|
9613
|
+
for (const batch of batches) {
|
|
9614
|
+
const batchResults = await Promise.all(batch.map(async (id) => {
|
|
9615
|
+
const edge = await this.getVerb_internal(id);
|
|
9616
|
+
return { id, edge };
|
|
9617
|
+
}));
|
|
9618
|
+
// Add results to map
|
|
9619
|
+
for (const { id, edge } of batchResults) {
|
|
9620
|
+
if (edge) {
|
|
9621
|
+
result.set(id, edge);
|
|
9622
|
+
}
|
|
9623
|
+
}
|
|
9624
|
+
}
|
|
9625
|
+
return result;
|
|
9626
|
+
},
|
|
9627
|
+
clear: async () => {
|
|
9628
|
+
// No-op for now, as we don't want to clear the entire storage
|
|
9629
|
+
// This would be implemented if needed
|
|
9630
|
+
}
|
|
9631
|
+
};
|
|
9632
|
+
// Set storage adapters for cache managers
|
|
9633
|
+
this.nounCacheManager.setStorageAdapters(nounStorageAdapter, nounStorageAdapter);
|
|
9634
|
+
this.verbCacheManager.setStorageAdapters(verbStorageAdapter, verbStorageAdapter);
|
|
8039
9635
|
this.isInitialized = true;
|
|
8040
9636
|
}
|
|
8041
9637
|
catch (error) {
|
|
@@ -8144,7 +9740,10 @@ class S3CompatibleStorage extends BaseStorage {
|
|
|
8144
9740
|
const parsedNode = JSON.parse(bodyContents);
|
|
8145
9741
|
console.log(`Parsed node data for ${id}:`, parsedNode);
|
|
8146
9742
|
// Ensure the parsed node has the expected properties
|
|
8147
|
-
if (!parsedNode ||
|
|
9743
|
+
if (!parsedNode ||
|
|
9744
|
+
!parsedNode.id ||
|
|
9745
|
+
!parsedNode.vector ||
|
|
9746
|
+
!parsedNode.connections) {
|
|
8148
9747
|
console.error(`Invalid node data for ${id}:`, parsedNode);
|
|
8149
9748
|
return null;
|
|
8150
9749
|
}
|
|
@@ -8180,100 +9779,118 @@ class S3CompatibleStorage extends BaseStorage {
|
|
|
8180
9779
|
}
|
|
8181
9780
|
/**
|
|
8182
9781
|
* Get all nodes from storage
|
|
9782
|
+
* @deprecated This method is deprecated and will be removed in a future version.
|
|
9783
|
+
* It can cause memory issues with large datasets. Use getNodesWithPagination() instead.
|
|
8183
9784
|
*/
|
|
8184
9785
|
async getAllNodes() {
|
|
8185
9786
|
await this.ensureInitialized();
|
|
9787
|
+
console.warn('WARNING: getAllNodes() is deprecated and will be removed in a future version. Use getNodesWithPagination() instead.');
|
|
9788
|
+
try {
|
|
9789
|
+
// Use the paginated method with a large limit to maintain backward compatibility
|
|
9790
|
+
// but warn about potential issues
|
|
9791
|
+
const result = await this.getNodesWithPagination({
|
|
9792
|
+
limit: 1000, // Reasonable limit to avoid memory issues
|
|
9793
|
+
useCache: true
|
|
9794
|
+
});
|
|
9795
|
+
if (result.hasMore) {
|
|
9796
|
+
console.warn(`WARNING: Only returning the first 1000 nodes. There are more nodes available. Use getNodesWithPagination() for proper pagination.`);
|
|
9797
|
+
}
|
|
9798
|
+
return result.nodes;
|
|
9799
|
+
}
|
|
9800
|
+
catch (error) {
|
|
9801
|
+
console.error('Failed to get all nodes:', error);
|
|
9802
|
+
return [];
|
|
9803
|
+
}
|
|
9804
|
+
}
|
|
9805
|
+
/**
|
|
9806
|
+
* Get nodes with pagination
|
|
9807
|
+
* @param options Pagination options
|
|
9808
|
+
* @returns Promise that resolves to a paginated result of nodes
|
|
9809
|
+
*/
|
|
9810
|
+
async getNodesWithPagination(options = {}) {
|
|
9811
|
+
await this.ensureInitialized();
|
|
9812
|
+
const limit = options.limit || 100;
|
|
9813
|
+
const useCache = options.useCache !== false;
|
|
8186
9814
|
try {
|
|
8187
9815
|
// Import the ListObjectsV2Command and GetObjectCommand only when needed
|
|
8188
|
-
const { ListObjectsV2Command
|
|
8189
|
-
|
|
8190
|
-
// List all objects in the nouns directory
|
|
9816
|
+
const { ListObjectsV2Command } = await import('@aws-sdk/client-s3');
|
|
9817
|
+
// List objects with pagination
|
|
8191
9818
|
const listResponse = await this.s3Client.send(new ListObjectsV2Command({
|
|
8192
9819
|
Bucket: this.bucketName,
|
|
8193
|
-
Prefix: this.nounPrefix
|
|
9820
|
+
Prefix: this.nounPrefix,
|
|
9821
|
+
MaxKeys: limit,
|
|
9822
|
+
ContinuationToken: options.cursor
|
|
8194
9823
|
}));
|
|
9824
|
+
// If listResponse is null/undefined or there are no objects, return an empty result
|
|
9825
|
+
if (!listResponse ||
|
|
9826
|
+
!listResponse.Contents ||
|
|
9827
|
+
listResponse.Contents.length === 0) {
|
|
9828
|
+
return {
|
|
9829
|
+
nodes: [],
|
|
9830
|
+
hasMore: false
|
|
9831
|
+
};
|
|
9832
|
+
}
|
|
9833
|
+
// Extract node IDs from the keys
|
|
9834
|
+
const nodeIds = listResponse.Contents
|
|
9835
|
+
.filter((object) => object && object.Key)
|
|
9836
|
+
.map((object) => object.Key.replace(this.nounPrefix, '').replace('.json', ''));
|
|
9837
|
+
// Use the cache manager to get nodes efficiently
|
|
8195
9838
|
const nodes = [];
|
|
8196
|
-
|
|
8197
|
-
|
|
8198
|
-
|
|
8199
|
-
|
|
8200
|
-
|
|
8201
|
-
|
|
8202
|
-
|
|
8203
|
-
|
|
8204
|
-
|
|
8205
|
-
if (object && object.Key) {
|
|
8206
|
-
console.log(`- ${object.Key}`);
|
|
9839
|
+
if (useCache) {
|
|
9840
|
+
// Get nodes from cache manager
|
|
9841
|
+
const cachedNodes = await this.nounCacheManager.getMany(nodeIds);
|
|
9842
|
+
// Add nodes to result in the same order as nodeIds
|
|
9843
|
+
for (const id of nodeIds) {
|
|
9844
|
+
const node = cachedNodes.get(id);
|
|
9845
|
+
if (node) {
|
|
9846
|
+
nodes.push(node);
|
|
9847
|
+
}
|
|
8207
9848
|
}
|
|
8208
9849
|
}
|
|
8209
|
-
|
|
8210
|
-
|
|
8211
|
-
|
|
8212
|
-
|
|
8213
|
-
|
|
9850
|
+
else {
|
|
9851
|
+
// Get nodes directly from S3 without using cache
|
|
9852
|
+
// Process in smaller batches to reduce memory usage
|
|
9853
|
+
const batchSize = 50;
|
|
9854
|
+
const batches = [];
|
|
9855
|
+
// Split into batches
|
|
9856
|
+
for (let i = 0; i < nodeIds.length; i += batchSize) {
|
|
9857
|
+
const batch = nodeIds.slice(i, i + batchSize);
|
|
9858
|
+
batches.push(batch);
|
|
8214
9859
|
}
|
|
8215
|
-
|
|
8216
|
-
|
|
8217
|
-
const
|
|
8218
|
-
|
|
8219
|
-
|
|
8220
|
-
|
|
8221
|
-
|
|
8222
|
-
Key: object.Key
|
|
8223
|
-
}));
|
|
8224
|
-
// Check if response is null or undefined
|
|
8225
|
-
if (!response || !response.Body) {
|
|
8226
|
-
console.log(`No response or response body for node ${nodeId}`);
|
|
8227
|
-
return null;
|
|
8228
|
-
}
|
|
8229
|
-
// Convert the response body to a string
|
|
8230
|
-
const bodyContents = await response.Body.transformToString();
|
|
8231
|
-
console.log(`Retrieved node body for ${nodeId}: ${bodyContents.substring(0, 100)}${bodyContents.length > 100 ? '...' : ''}`);
|
|
8232
|
-
// Parse the JSON string
|
|
8233
|
-
try {
|
|
8234
|
-
const parsedNode = JSON.parse(bodyContents);
|
|
8235
|
-
console.log(`Parsed node data for ${nodeId}:`, parsedNode);
|
|
8236
|
-
// Ensure the parsed node has the expected properties
|
|
8237
|
-
if (!parsedNode || !parsedNode.id || !parsedNode.vector || !parsedNode.connections) {
|
|
8238
|
-
console.error(`Invalid node data for ${nodeId}:`, parsedNode);
|
|
9860
|
+
// Process each batch sequentially
|
|
9861
|
+
for (const batch of batches) {
|
|
9862
|
+
const batchNodes = await Promise.all(batch.map(async (id) => {
|
|
9863
|
+
try {
|
|
9864
|
+
return await this.getNoun_internal(id);
|
|
9865
|
+
}
|
|
9866
|
+
catch (error) {
|
|
8239
9867
|
return null;
|
|
8240
9868
|
}
|
|
8241
|
-
|
|
8242
|
-
|
|
8243
|
-
|
|
8244
|
-
|
|
9869
|
+
}));
|
|
9870
|
+
// Add non-null nodes to result
|
|
9871
|
+
for (const node of batchNodes) {
|
|
9872
|
+
if (node) {
|
|
9873
|
+
nodes.push(node);
|
|
8245
9874
|
}
|
|
8246
|
-
const node = {
|
|
8247
|
-
id: parsedNode.id,
|
|
8248
|
-
vector: parsedNode.vector,
|
|
8249
|
-
connections
|
|
8250
|
-
};
|
|
8251
|
-
console.log(`Successfully retrieved node ${nodeId}:`, node);
|
|
8252
|
-
return node;
|
|
8253
9875
|
}
|
|
8254
|
-
catch (parseError) {
|
|
8255
|
-
console.error(`Failed to parse node data for ${nodeId}:`, parseError);
|
|
8256
|
-
return null;
|
|
8257
|
-
}
|
|
8258
|
-
}
|
|
8259
|
-
catch (error) {
|
|
8260
|
-
console.error(`Error getting node from ${object.Key}:`, error);
|
|
8261
|
-
return null;
|
|
8262
9876
|
}
|
|
8263
|
-
});
|
|
8264
|
-
// Wait for all promises to resolve and filter out nulls
|
|
8265
|
-
const resolvedNodes = await Promise.all(nodePromises);
|
|
8266
|
-
const filteredNodes = resolvedNodes.filter((node) => node !== null);
|
|
8267
|
-
console.log(`Returning ${filteredNodes.length} nodes`);
|
|
8268
|
-
// Debug: Log all nodes being returned
|
|
8269
|
-
for (const node of filteredNodes) {
|
|
8270
|
-
console.log(`- Node ${node.id}`);
|
|
8271
9877
|
}
|
|
8272
|
-
|
|
9878
|
+
// Determine if there are more nodes
|
|
9879
|
+
const hasMore = !!listResponse.IsTruncated;
|
|
9880
|
+
// Set next cursor if there are more nodes
|
|
9881
|
+
const nextCursor = listResponse.NextContinuationToken;
|
|
9882
|
+
return {
|
|
9883
|
+
nodes,
|
|
9884
|
+
hasMore,
|
|
9885
|
+
nextCursor
|
|
9886
|
+
};
|
|
8273
9887
|
}
|
|
8274
9888
|
catch (error) {
|
|
8275
|
-
console.error('Failed to get
|
|
8276
|
-
return
|
|
9889
|
+
console.error('Failed to get nodes with pagination:', error);
|
|
9890
|
+
return {
|
|
9891
|
+
nodes: [],
|
|
9892
|
+
hasMore: false
|
|
9893
|
+
};
|
|
8277
9894
|
}
|
|
8278
9895
|
}
|
|
8279
9896
|
/**
|
|
@@ -8292,14 +9909,31 @@ class S3CompatibleStorage extends BaseStorage {
|
|
|
8292
9909
|
async getNodesByNounType(nounType) {
|
|
8293
9910
|
await this.ensureInitialized();
|
|
8294
9911
|
try {
|
|
8295
|
-
// Get all nodes
|
|
8296
|
-
const allNodes = await this.getAllNodes();
|
|
8297
|
-
// Filter nodes by noun type using metadata
|
|
8298
9912
|
const filteredNodes = [];
|
|
8299
|
-
|
|
8300
|
-
|
|
8301
|
-
|
|
8302
|
-
|
|
9913
|
+
let hasMore = true;
|
|
9914
|
+
let cursor = undefined;
|
|
9915
|
+
// Use pagination to process nodes in batches
|
|
9916
|
+
while (hasMore) {
|
|
9917
|
+
// Get a batch of nodes
|
|
9918
|
+
const result = await this.getNodesWithPagination({
|
|
9919
|
+
limit: 100,
|
|
9920
|
+
cursor,
|
|
9921
|
+
useCache: true
|
|
9922
|
+
});
|
|
9923
|
+
// Filter nodes by noun type using metadata
|
|
9924
|
+
for (const node of result.nodes) {
|
|
9925
|
+
const metadata = await this.getMetadata(node.id);
|
|
9926
|
+
if (metadata && metadata.noun === nounType) {
|
|
9927
|
+
filteredNodes.push(node);
|
|
9928
|
+
}
|
|
9929
|
+
}
|
|
9930
|
+
// Update pagination state
|
|
9931
|
+
hasMore = result.hasMore;
|
|
9932
|
+
cursor = result.nextCursor;
|
|
9933
|
+
// Safety check to prevent infinite loops
|
|
9934
|
+
if (!cursor && hasMore) {
|
|
9935
|
+
console.warn('No cursor returned but hasMore is true, breaking loop');
|
|
9936
|
+
break;
|
|
8303
9937
|
}
|
|
8304
9938
|
}
|
|
8305
9939
|
return filteredNodes;
|
|
@@ -8422,7 +10056,10 @@ class S3CompatibleStorage extends BaseStorage {
|
|
|
8422
10056
|
const parsedEdge = JSON.parse(bodyContents);
|
|
8423
10057
|
console.log(`Parsed edge data for ${id}:`, parsedEdge);
|
|
8424
10058
|
// Ensure the parsed edge has the expected properties
|
|
8425
|
-
if (!parsedEdge ||
|
|
10059
|
+
if (!parsedEdge ||
|
|
10060
|
+
!parsedEdge.id ||
|
|
10061
|
+
!parsedEdge.vector ||
|
|
10062
|
+
!parsedEdge.connections ||
|
|
8426
10063
|
!(parsedEdge.sourceId || parsedEdge.source) ||
|
|
8427
10064
|
!(parsedEdge.targetId || parsedEdge.target) ||
|
|
8428
10065
|
!(parsedEdge.type || parsedEdge.verb)) {
|
|
@@ -8476,86 +10113,205 @@ class S3CompatibleStorage extends BaseStorage {
|
|
|
8476
10113
|
}
|
|
8477
10114
|
/**
|
|
8478
10115
|
* Get all verbs from storage (internal implementation)
|
|
10116
|
+
* @deprecated This method is deprecated and will be removed in a future version.
|
|
10117
|
+
* It can cause memory issues with large datasets. Use getVerbsWithPagination() instead.
|
|
8479
10118
|
*/
|
|
8480
10119
|
async getAllVerbs_internal() {
|
|
10120
|
+
console.warn('WARNING: getAllVerbs_internal() is deprecated and will be removed in a future version. Use getVerbsWithPagination() instead.');
|
|
8481
10121
|
return this.getAllEdges();
|
|
8482
10122
|
}
|
|
8483
10123
|
/**
|
|
8484
10124
|
* Get all edges from storage
|
|
10125
|
+
* @deprecated This method is deprecated and will be removed in a future version.
|
|
10126
|
+
* It can cause memory issues with large datasets. Use getEdgesWithPagination() instead.
|
|
8485
10127
|
*/
|
|
8486
10128
|
async getAllEdges() {
|
|
8487
10129
|
await this.ensureInitialized();
|
|
10130
|
+
console.warn('WARNING: getAllEdges() is deprecated and will be removed in a future version. Use getEdgesWithPagination() instead.');
|
|
8488
10131
|
try {
|
|
8489
|
-
//
|
|
8490
|
-
|
|
8491
|
-
|
|
10132
|
+
// Use the paginated method with a large limit to maintain backward compatibility
|
|
10133
|
+
// but warn about potential issues
|
|
10134
|
+
const result = await this.getEdgesWithPagination({
|
|
10135
|
+
limit: 1000, // Reasonable limit to avoid memory issues
|
|
10136
|
+
useCache: true
|
|
10137
|
+
});
|
|
10138
|
+
if (result.hasMore) {
|
|
10139
|
+
console.warn(`WARNING: Only returning the first 1000 edges. There are more edges available. Use getEdgesWithPagination() for proper pagination.`);
|
|
10140
|
+
}
|
|
10141
|
+
return result.edges;
|
|
10142
|
+
}
|
|
10143
|
+
catch (error) {
|
|
10144
|
+
console.error('Failed to get all edges:', error);
|
|
10145
|
+
return [];
|
|
10146
|
+
}
|
|
10147
|
+
}
|
|
10148
|
+
/**
|
|
10149
|
+
* Get edges with pagination
|
|
10150
|
+
* @param options Pagination options
|
|
10151
|
+
* @returns Promise that resolves to a paginated result of edges
|
|
10152
|
+
*/
|
|
10153
|
+
async getEdgesWithPagination(options = {}) {
|
|
10154
|
+
await this.ensureInitialized();
|
|
10155
|
+
const limit = options.limit || 100;
|
|
10156
|
+
const useCache = options.useCache !== false;
|
|
10157
|
+
const filter = options.filter || {};
|
|
10158
|
+
try {
|
|
10159
|
+
// Import the ListObjectsV2Command only when needed
|
|
10160
|
+
const { ListObjectsV2Command } = await import('@aws-sdk/client-s3');
|
|
10161
|
+
// List objects with pagination
|
|
8492
10162
|
const listResponse = await this.s3Client.send(new ListObjectsV2Command({
|
|
8493
10163
|
Bucket: this.bucketName,
|
|
8494
|
-
Prefix: this.verbPrefix
|
|
10164
|
+
Prefix: this.verbPrefix,
|
|
10165
|
+
MaxKeys: limit,
|
|
10166
|
+
ContinuationToken: options.cursor
|
|
8495
10167
|
}));
|
|
10168
|
+
// If listResponse is null/undefined or there are no objects, return an empty result
|
|
10169
|
+
if (!listResponse ||
|
|
10170
|
+
!listResponse.Contents ||
|
|
10171
|
+
listResponse.Contents.length === 0) {
|
|
10172
|
+
return {
|
|
10173
|
+
edges: [],
|
|
10174
|
+
hasMore: false
|
|
10175
|
+
};
|
|
10176
|
+
}
|
|
10177
|
+
// Extract edge IDs from the keys
|
|
10178
|
+
const edgeIds = listResponse.Contents
|
|
10179
|
+
.filter((object) => object && object.Key)
|
|
10180
|
+
.map((object) => object.Key.replace(this.verbPrefix, '').replace('.json', ''));
|
|
10181
|
+
// Use the cache manager to get edges efficiently
|
|
8496
10182
|
const edges = [];
|
|
8497
|
-
|
|
8498
|
-
|
|
8499
|
-
|
|
10183
|
+
if (useCache) {
|
|
10184
|
+
// Get edges from cache manager
|
|
10185
|
+
const cachedEdges = await this.verbCacheManager.getMany(edgeIds);
|
|
10186
|
+
// Add edges to result in the same order as edgeIds
|
|
10187
|
+
for (const id of edgeIds) {
|
|
10188
|
+
const edge = cachedEdges.get(id);
|
|
10189
|
+
if (edge) {
|
|
10190
|
+
// Apply filtering if needed
|
|
10191
|
+
if (this.filterEdge(edge, filter)) {
|
|
10192
|
+
edges.push(edge);
|
|
10193
|
+
}
|
|
10194
|
+
}
|
|
10195
|
+
}
|
|
8500
10196
|
}
|
|
8501
|
-
|
|
8502
|
-
|
|
8503
|
-
|
|
8504
|
-
|
|
8505
|
-
|
|
8506
|
-
|
|
8507
|
-
|
|
8508
|
-
|
|
8509
|
-
|
|
10197
|
+
else {
|
|
10198
|
+
// Get edges directly from S3 without using cache
|
|
10199
|
+
// Process in smaller batches to reduce memory usage
|
|
10200
|
+
const batchSize = 50;
|
|
10201
|
+
const batches = [];
|
|
10202
|
+
// Split into batches
|
|
10203
|
+
for (let i = 0; i < edgeIds.length; i += batchSize) {
|
|
10204
|
+
const batch = edgeIds.slice(i, i + batchSize);
|
|
10205
|
+
batches.push(batch);
|
|
10206
|
+
}
|
|
10207
|
+
// Process each batch sequentially
|
|
10208
|
+
for (const batch of batches) {
|
|
10209
|
+
const batchEdges = await Promise.all(batch.map(async (id) => {
|
|
10210
|
+
try {
|
|
10211
|
+
const edge = await this.getVerb_internal(id);
|
|
10212
|
+
// Apply filtering if needed
|
|
10213
|
+
if (edge && this.filterEdge(edge, filter)) {
|
|
10214
|
+
return edge;
|
|
10215
|
+
}
|
|
10216
|
+
return null;
|
|
10217
|
+
}
|
|
10218
|
+
catch (error) {
|
|
10219
|
+
return null;
|
|
10220
|
+
}
|
|
8510
10221
|
}));
|
|
8511
|
-
//
|
|
8512
|
-
const
|
|
8513
|
-
|
|
8514
|
-
|
|
8515
|
-
|
|
8516
|
-
for (const [level, nodeIds] of Object.entries(parsedEdge.connections)) {
|
|
8517
|
-
connections.set(Number(level), new Set(nodeIds));
|
|
10222
|
+
// Add non-null edges to result
|
|
10223
|
+
for (const edge of batchEdges) {
|
|
10224
|
+
if (edge) {
|
|
10225
|
+
edges.push(edge);
|
|
10226
|
+
}
|
|
8518
10227
|
}
|
|
8519
|
-
// Create default timestamp if not present
|
|
8520
|
-
const defaultTimestamp = {
|
|
8521
|
-
seconds: Math.floor(Date.now() / 1000),
|
|
8522
|
-
nanoseconds: (Date.now() % 1000) * 1000000
|
|
8523
|
-
};
|
|
8524
|
-
// Create default createdBy if not present
|
|
8525
|
-
const defaultCreatedBy = {
|
|
8526
|
-
augmentation: 'unknown',
|
|
8527
|
-
version: '1.0'
|
|
8528
|
-
};
|
|
8529
|
-
return {
|
|
8530
|
-
id: parsedEdge.id,
|
|
8531
|
-
vector: parsedEdge.vector,
|
|
8532
|
-
connections,
|
|
8533
|
-
sourceId: parsedEdge.sourceId || parsedEdge.source,
|
|
8534
|
-
targetId: parsedEdge.targetId || parsedEdge.target,
|
|
8535
|
-
source: parsedEdge.sourceId || parsedEdge.source,
|
|
8536
|
-
target: parsedEdge.targetId || parsedEdge.target,
|
|
8537
|
-
verb: parsedEdge.type || parsedEdge.verb,
|
|
8538
|
-
type: parsedEdge.type || parsedEdge.verb,
|
|
8539
|
-
weight: parsedEdge.weight || 1.0,
|
|
8540
|
-
metadata: parsedEdge.metadata || {},
|
|
8541
|
-
createdAt: parsedEdge.createdAt || defaultTimestamp,
|
|
8542
|
-
updatedAt: parsedEdge.updatedAt || defaultTimestamp,
|
|
8543
|
-
createdBy: parsedEdge.createdBy || defaultCreatedBy
|
|
8544
|
-
};
|
|
8545
|
-
}
|
|
8546
|
-
catch (error) {
|
|
8547
|
-
console.error(`Error getting edge from ${object.Key}:`, error);
|
|
8548
|
-
return null;
|
|
8549
10228
|
}
|
|
8550
|
-
}
|
|
8551
|
-
//
|
|
8552
|
-
const
|
|
8553
|
-
|
|
10229
|
+
}
|
|
10230
|
+
// Determine if there are more edges
|
|
10231
|
+
const hasMore = !!listResponse.IsTruncated;
|
|
10232
|
+
// Set next cursor if there are more edges
|
|
10233
|
+
const nextCursor = listResponse.NextContinuationToken;
|
|
10234
|
+
return {
|
|
10235
|
+
edges,
|
|
10236
|
+
hasMore,
|
|
10237
|
+
nextCursor
|
|
10238
|
+
};
|
|
8554
10239
|
}
|
|
8555
10240
|
catch (error) {
|
|
8556
|
-
console.error('Failed to get
|
|
8557
|
-
return
|
|
10241
|
+
console.error('Failed to get edges with pagination:', error);
|
|
10242
|
+
return {
|
|
10243
|
+
edges: [],
|
|
10244
|
+
hasMore: false
|
|
10245
|
+
};
|
|
10246
|
+
}
|
|
10247
|
+
}
|
|
10248
|
+
/**
|
|
10249
|
+
* Filter an edge based on filter criteria
|
|
10250
|
+
* @param edge The edge to filter
|
|
10251
|
+
* @param filter The filter criteria
|
|
10252
|
+
* @returns True if the edge matches the filter, false otherwise
|
|
10253
|
+
*/
|
|
10254
|
+
filterEdge(edge, filter) {
|
|
10255
|
+
// If no filter, include all edges
|
|
10256
|
+
if (!filter.sourceId && !filter.targetId && !filter.type) {
|
|
10257
|
+
return true;
|
|
10258
|
+
}
|
|
10259
|
+
// Filter by source ID
|
|
10260
|
+
if (filter.sourceId && edge.sourceId !== filter.sourceId) {
|
|
10261
|
+
return false;
|
|
10262
|
+
}
|
|
10263
|
+
// Filter by target ID
|
|
10264
|
+
if (filter.targetId && edge.targetId !== filter.targetId) {
|
|
10265
|
+
return false;
|
|
10266
|
+
}
|
|
10267
|
+
// Filter by type
|
|
10268
|
+
if (filter.type && edge.type !== filter.type) {
|
|
10269
|
+
return false;
|
|
10270
|
+
}
|
|
10271
|
+
return true;
|
|
10272
|
+
}
|
|
10273
|
+
/**
|
|
10274
|
+
* Get verbs with pagination
|
|
10275
|
+
* @param options Pagination options
|
|
10276
|
+
* @returns Promise that resolves to a paginated result of verbs
|
|
10277
|
+
*/
|
|
10278
|
+
async getVerbsWithPagination(options = {}) {
|
|
10279
|
+
await this.ensureInitialized();
|
|
10280
|
+
// Convert filter to edge filter format
|
|
10281
|
+
const edgeFilter = {};
|
|
10282
|
+
if (options.filter) {
|
|
10283
|
+
// Handle sourceId filter
|
|
10284
|
+
if (options.filter.sourceId) {
|
|
10285
|
+
edgeFilter.sourceId = Array.isArray(options.filter.sourceId)
|
|
10286
|
+
? options.filter.sourceId[0]
|
|
10287
|
+
: options.filter.sourceId;
|
|
10288
|
+
}
|
|
10289
|
+
// Handle targetId filter
|
|
10290
|
+
if (options.filter.targetId) {
|
|
10291
|
+
edgeFilter.targetId = Array.isArray(options.filter.targetId)
|
|
10292
|
+
? options.filter.targetId[0]
|
|
10293
|
+
: options.filter.targetId;
|
|
10294
|
+
}
|
|
10295
|
+
// Handle verbType filter
|
|
10296
|
+
if (options.filter.verbType) {
|
|
10297
|
+
edgeFilter.type = Array.isArray(options.filter.verbType)
|
|
10298
|
+
? options.filter.verbType[0]
|
|
10299
|
+
: options.filter.verbType;
|
|
10300
|
+
}
|
|
8558
10301
|
}
|
|
10302
|
+
// Get edges with pagination
|
|
10303
|
+
const result = await this.getEdgesWithPagination({
|
|
10304
|
+
limit: options.limit,
|
|
10305
|
+
cursor: options.cursor,
|
|
10306
|
+
useCache: true,
|
|
10307
|
+
filter: edgeFilter
|
|
10308
|
+
});
|
|
10309
|
+
// Convert edges to verbs (they're the same in this implementation)
|
|
10310
|
+
return {
|
|
10311
|
+
items: result.edges,
|
|
10312
|
+
hasMore: result.hasMore,
|
|
10313
|
+
nextCursor: result.nextCursor
|
|
10314
|
+
};
|
|
8559
10315
|
}
|
|
8560
10316
|
/**
|
|
8561
10317
|
* Get verbs by source (internal implementation)
|
|
@@ -8722,9 +10478,10 @@ class S3CompatibleStorage extends BaseStorage {
|
|
|
8722
10478
|
// In AWS SDK, this would be error.name === 'NoSuchKey'
|
|
8723
10479
|
// In our mock, we might get different error types
|
|
8724
10480
|
if (error.name === 'NoSuchKey' ||
|
|
8725
|
-
(error.message &&
|
|
8726
|
-
error.message.includes('
|
|
8727
|
-
|
|
10481
|
+
(error.message &&
|
|
10482
|
+
(error.message.includes('NoSuchKey') ||
|
|
10483
|
+
error.message.includes('not found') ||
|
|
10484
|
+
error.message.includes('does not exist')))) {
|
|
8728
10485
|
console.log(`Metadata not found for ${id}`);
|
|
8729
10486
|
return null;
|
|
8730
10487
|
}
|
|
@@ -8749,7 +10506,9 @@ class S3CompatibleStorage extends BaseStorage {
|
|
|
8749
10506
|
Prefix: prefix
|
|
8750
10507
|
}));
|
|
8751
10508
|
// If there are no objects or Contents is undefined, return
|
|
8752
|
-
if (!listResponse ||
|
|
10509
|
+
if (!listResponse ||
|
|
10510
|
+
!listResponse.Contents ||
|
|
10511
|
+
listResponse.Contents.length === 0) {
|
|
8753
10512
|
return;
|
|
8754
10513
|
}
|
|
8755
10514
|
// Delete each object
|
|
@@ -8799,15 +10558,20 @@ class S3CompatibleStorage extends BaseStorage {
|
|
|
8799
10558
|
Prefix: prefix
|
|
8800
10559
|
}));
|
|
8801
10560
|
// If there are no objects or Contents is undefined, return
|
|
8802
|
-
if (!listResponse ||
|
|
10561
|
+
if (!listResponse ||
|
|
10562
|
+
!listResponse.Contents ||
|
|
10563
|
+
listResponse.Contents.length === 0) {
|
|
8803
10564
|
return { size, count };
|
|
8804
10565
|
}
|
|
8805
10566
|
// Calculate size and count
|
|
8806
10567
|
for (const object of listResponse.Contents) {
|
|
8807
10568
|
if (object) {
|
|
8808
10569
|
// Ensure Size is a number
|
|
8809
|
-
const objectSize = typeof object.Size === 'number'
|
|
8810
|
-
|
|
10570
|
+
const objectSize = typeof object.Size === 'number'
|
|
10571
|
+
? object.Size
|
|
10572
|
+
: object.Size
|
|
10573
|
+
? parseInt(object.Size.toString(), 10)
|
|
10574
|
+
: 0;
|
|
8811
10575
|
// Add to total size and increment count
|
|
8812
10576
|
size += objectSize || 0;
|
|
8813
10577
|
count++;
|
|
@@ -8826,12 +10590,17 @@ class S3CompatibleStorage extends BaseStorage {
|
|
|
8826
10590
|
const verbsResult = await calculateSizeAndCount(this.verbPrefix);
|
|
8827
10591
|
const metadataResult = await calculateSizeAndCount(this.metadataPrefix);
|
|
8828
10592
|
const indexResult = await calculateSizeAndCount(this.indexPrefix);
|
|
8829
|
-
totalSize =
|
|
10593
|
+
totalSize =
|
|
10594
|
+
nounsResult.size +
|
|
10595
|
+
verbsResult.size +
|
|
10596
|
+
metadataResult.size +
|
|
10597
|
+
indexResult.size;
|
|
8830
10598
|
nodeCount = nounsResult.count;
|
|
8831
10599
|
edgeCount = verbsResult.count;
|
|
8832
10600
|
metadataCount = metadataResult.count;
|
|
8833
10601
|
// Ensure we have a minimum size if we have objects
|
|
8834
|
-
if (totalSize === 0 &&
|
|
10602
|
+
if (totalSize === 0 &&
|
|
10603
|
+
(nodeCount > 0 || edgeCount > 0 || metadataCount > 0)) {
|
|
8835
10604
|
console.log(`Setting minimum size for ${nodeCount} nodes, ${edgeCount} edges, and ${metadataCount} metadata objects`);
|
|
8836
10605
|
totalSize = (nodeCount + edgeCount + metadataCount) * 100; // Arbitrary size per object
|
|
8837
10606
|
}
|
|
@@ -8865,7 +10634,8 @@ class S3CompatibleStorage extends BaseStorage {
|
|
|
8865
10634
|
const metadata = JSON.parse(bodyContents);
|
|
8866
10635
|
// Count by noun type
|
|
8867
10636
|
if (metadata && metadata.noun) {
|
|
8868
|
-
nounTypeCounts[metadata.noun] =
|
|
10637
|
+
nounTypeCounts[metadata.noun] =
|
|
10638
|
+
(nounTypeCounts[metadata.noun] || 0) + 1;
|
|
8869
10639
|
}
|
|
8870
10640
|
}
|
|
8871
10641
|
catch (parseError) {
|
|
@@ -9046,17 +10816,23 @@ class S3CompatibleStorage extends BaseStorage {
|
|
|
9046
10816
|
*/
|
|
9047
10817
|
mergeStatistics(storageStats, localStats) {
|
|
9048
10818
|
// Merge noun counts by taking the maximum of each type
|
|
9049
|
-
const mergedNounCount = {
|
|
10819
|
+
const mergedNounCount = {
|
|
10820
|
+
...storageStats.nounCount
|
|
10821
|
+
};
|
|
9050
10822
|
for (const [type, count] of Object.entries(localStats.nounCount)) {
|
|
9051
10823
|
mergedNounCount[type] = Math.max(mergedNounCount[type] || 0, count);
|
|
9052
10824
|
}
|
|
9053
10825
|
// Merge verb counts by taking the maximum of each type
|
|
9054
|
-
const mergedVerbCount = {
|
|
10826
|
+
const mergedVerbCount = {
|
|
10827
|
+
...storageStats.verbCount
|
|
10828
|
+
};
|
|
9055
10829
|
for (const [type, count] of Object.entries(localStats.verbCount)) {
|
|
9056
10830
|
mergedVerbCount[type] = Math.max(mergedVerbCount[type] || 0, count);
|
|
9057
10831
|
}
|
|
9058
10832
|
// Merge metadata counts by taking the maximum of each type
|
|
9059
|
-
const mergedMetadataCount = {
|
|
10833
|
+
const mergedMetadataCount = {
|
|
10834
|
+
...storageStats.metadataCount
|
|
10835
|
+
};
|
|
9060
10836
|
for (const [type, count] of Object.entries(localStats.metadataCount)) {
|
|
9061
10837
|
mergedMetadataCount[type] = Math.max(mergedMetadataCount[type] || 0, count);
|
|
9062
10838
|
}
|
|
@@ -9169,9 +10945,10 @@ class S3CompatibleStorage extends BaseStorage {
|
|
|
9169
10945
|
catch (error) {
|
|
9170
10946
|
// Check if this is a "NoSuchKey" error (object doesn't exist)
|
|
9171
10947
|
if (error.name === 'NoSuchKey' ||
|
|
9172
|
-
(error.message &&
|
|
9173
|
-
error.message.includes('
|
|
9174
|
-
|
|
10948
|
+
(error.message &&
|
|
10949
|
+
(error.message.includes('NoSuchKey') ||
|
|
10950
|
+
error.message.includes('not found') ||
|
|
10951
|
+
error.message.includes('does not exist')))) {
|
|
9175
10952
|
return null;
|
|
9176
10953
|
}
|
|
9177
10954
|
// For other errors, propagate them
|
|
@@ -9200,8 +10977,8 @@ class S3CompatibleStorage extends BaseStorage {
|
|
|
9200
10977
|
Body: JSON.stringify(entryWithInstance),
|
|
9201
10978
|
ContentType: 'application/json',
|
|
9202
10979
|
Metadata: {
|
|
9203
|
-
|
|
9204
|
-
|
|
10980
|
+
timestamp: entry.timestamp.toString(),
|
|
10981
|
+
operation: entry.operation,
|
|
9205
10982
|
'entity-type': entry.entityType,
|
|
9206
10983
|
'entity-id': entry.entityId
|
|
9207
10984
|
}
|
|
@@ -9371,7 +11148,7 @@ class S3CompatibleStorage extends BaseStorage {
|
|
|
9371
11148
|
this.activeLocks.add(lockKey);
|
|
9372
11149
|
// Schedule automatic cleanup when lock expires
|
|
9373
11150
|
setTimeout(() => {
|
|
9374
|
-
this.releaseLock(lockKey, lockValue).catch(error => {
|
|
11151
|
+
this.releaseLock(lockKey, lockValue).catch((error) => {
|
|
9375
11152
|
console.warn(`Failed to auto-release expired lock ${lockKey}:`, error);
|
|
9376
11153
|
});
|
|
9377
11154
|
}, ttl);
|
|
@@ -10302,7 +12079,8 @@ async function createStorage(options = {}) {
|
|
|
10302
12079
|
secretAccessKey: options.s3Storage.secretAccessKey,
|
|
10303
12080
|
sessionToken: options.s3Storage.sessionToken,
|
|
10304
12081
|
serviceType: 's3',
|
|
10305
|
-
operationConfig: options.operationConfig
|
|
12082
|
+
operationConfig: options.operationConfig,
|
|
12083
|
+
cacheConfig: options.cacheConfig
|
|
10306
12084
|
});
|
|
10307
12085
|
}
|
|
10308
12086
|
else {
|
|
@@ -10317,7 +12095,8 @@ async function createStorage(options = {}) {
|
|
|
10317
12095
|
accountId: options.r2Storage.accountId,
|
|
10318
12096
|
accessKeyId: options.r2Storage.accessKeyId,
|
|
10319
12097
|
secretAccessKey: options.r2Storage.secretAccessKey,
|
|
10320
|
-
serviceType: 'r2'
|
|
12098
|
+
serviceType: 'r2',
|
|
12099
|
+
cacheConfig: options.cacheConfig
|
|
10321
12100
|
});
|
|
10322
12101
|
}
|
|
10323
12102
|
else {
|
|
@@ -10333,7 +12112,8 @@ async function createStorage(options = {}) {
|
|
|
10333
12112
|
endpoint: options.gcsStorage.endpoint || 'https://storage.googleapis.com',
|
|
10334
12113
|
accessKeyId: options.gcsStorage.accessKeyId,
|
|
10335
12114
|
secretAccessKey: options.gcsStorage.secretAccessKey,
|
|
10336
|
-
serviceType: 'gcs'
|
|
12115
|
+
serviceType: 'gcs',
|
|
12116
|
+
cacheConfig: options.cacheConfig
|
|
10337
12117
|
});
|
|
10338
12118
|
}
|
|
10339
12119
|
else {
|
|
@@ -10354,7 +12134,8 @@ async function createStorage(options = {}) {
|
|
|
10354
12134
|
endpoint: options.customS3Storage.endpoint,
|
|
10355
12135
|
accessKeyId: options.customS3Storage.accessKeyId,
|
|
10356
12136
|
secretAccessKey: options.customS3Storage.secretAccessKey,
|
|
10357
|
-
serviceType: options.customS3Storage.serviceType || 'custom'
|
|
12137
|
+
serviceType: options.customS3Storage.serviceType || 'custom',
|
|
12138
|
+
cacheConfig: options.cacheConfig
|
|
10358
12139
|
});
|
|
10359
12140
|
}
|
|
10360
12141
|
// If R2 storage is specified, use it
|
|
@@ -10365,7 +12146,8 @@ async function createStorage(options = {}) {
|
|
|
10365
12146
|
accountId: options.r2Storage.accountId,
|
|
10366
12147
|
accessKeyId: options.r2Storage.accessKeyId,
|
|
10367
12148
|
secretAccessKey: options.r2Storage.secretAccessKey,
|
|
10368
|
-
serviceType: 'r2'
|
|
12149
|
+
serviceType: 'r2',
|
|
12150
|
+
cacheConfig: options.cacheConfig
|
|
10369
12151
|
});
|
|
10370
12152
|
}
|
|
10371
12153
|
// If S3 storage is specified, use it
|
|
@@ -10377,7 +12159,8 @@ async function createStorage(options = {}) {
|
|
|
10377
12159
|
accessKeyId: options.s3Storage.accessKeyId,
|
|
10378
12160
|
secretAccessKey: options.s3Storage.secretAccessKey,
|
|
10379
12161
|
sessionToken: options.s3Storage.sessionToken,
|
|
10380
|
-
serviceType: 's3'
|
|
12162
|
+
serviceType: 's3',
|
|
12163
|
+
cacheConfig: options.cacheConfig
|
|
10381
12164
|
});
|
|
10382
12165
|
}
|
|
10383
12166
|
// If GCS storage is specified, use it
|
|
@@ -10389,7 +12172,8 @@ async function createStorage(options = {}) {
|
|
|
10389
12172
|
endpoint: options.gcsStorage.endpoint || 'https://storage.googleapis.com',
|
|
10390
12173
|
accessKeyId: options.gcsStorage.accessKeyId,
|
|
10391
12174
|
secretAccessKey: options.gcsStorage.secretAccessKey,
|
|
10392
|
-
serviceType: 'gcs'
|
|
12175
|
+
serviceType: 'gcs',
|
|
12176
|
+
cacheConfig: options.cacheConfig
|
|
10393
12177
|
});
|
|
10394
12178
|
}
|
|
10395
12179
|
// Auto-detect the best storage adapter based on the environment
|
|
@@ -12893,7 +14677,12 @@ class BrainyData {
|
|
|
12893
14677
|
// Set distance function
|
|
12894
14678
|
this.distanceFunction = config.distanceFunction || cosineDistance$1;
|
|
12895
14679
|
// Always use the optimized HNSW index implementation
|
|
12896
|
-
|
|
14680
|
+
// Configure HNSW with disk-based storage when a storage adapter is provided
|
|
14681
|
+
const hnswConfig = config.hnsw || {};
|
|
14682
|
+
if (config.storageAdapter) {
|
|
14683
|
+
hnswConfig.useDiskBasedIndex = true;
|
|
14684
|
+
}
|
|
14685
|
+
this.index = new HNSWIndexOptimized(hnswConfig, this.distanceFunction, config.storageAdapter || null);
|
|
12897
14686
|
this.useOptimizedIndex = true;
|
|
12898
14687
|
// Set storage if provided, otherwise it will be initialized in init()
|
|
12899
14688
|
this.storage = config.storageAdapter || null;
|
|
@@ -12918,6 +14707,8 @@ class BrainyData {
|
|
|
12918
14707
|
config.storage?.requestPersistentStorage || false;
|
|
12919
14708
|
// Set read-only flag
|
|
12920
14709
|
this.readOnly = config.readOnly || false;
|
|
14710
|
+
// Set lazy loading in read-only mode flag
|
|
14711
|
+
this.lazyLoadInReadOnlyMode = config.lazyLoadInReadOnlyMode || false;
|
|
12921
14712
|
// Set write-only flag
|
|
12922
14713
|
this.writeOnly = config.writeOnly || false;
|
|
12923
14714
|
// Validate that readOnly and writeOnly are not both true
|
|
@@ -12940,6 +14731,27 @@ class BrainyData {
|
|
|
12940
14731
|
...config.realtimeUpdates
|
|
12941
14732
|
};
|
|
12942
14733
|
}
|
|
14734
|
+
// Initialize cache configuration with intelligent defaults
|
|
14735
|
+
// These defaults are automatically tuned based on environment and dataset size
|
|
14736
|
+
this.cacheConfig = {
|
|
14737
|
+
// Enable auto-tuning by default for optimal performance
|
|
14738
|
+
autoTune: true,
|
|
14739
|
+
// Set auto-tune interval to 1 minute for faster initial optimization
|
|
14740
|
+
// This is especially important for large datasets
|
|
14741
|
+
autoTuneInterval: 60000, // 1 minute
|
|
14742
|
+
// Read-only mode specific optimizations
|
|
14743
|
+
readOnlyMode: {
|
|
14744
|
+
// Use aggressive prefetching in read-only mode for better performance
|
|
14745
|
+
prefetchStrategy: 'aggressive'
|
|
14746
|
+
}
|
|
14747
|
+
};
|
|
14748
|
+
// Override defaults with user-provided configuration if available
|
|
14749
|
+
if (config.cache) {
|
|
14750
|
+
this.cacheConfig = {
|
|
14751
|
+
...this.cacheConfig,
|
|
14752
|
+
...config.cache
|
|
14753
|
+
};
|
|
14754
|
+
}
|
|
12943
14755
|
}
|
|
12944
14756
|
/**
|
|
12945
14757
|
* Check if the database is in read-only mode and throw an error if it is
|
|
@@ -13232,6 +15044,18 @@ class BrainyData {
|
|
|
13232
15044
|
return 'default';
|
|
13233
15045
|
}
|
|
13234
15046
|
}
|
|
15047
|
+
/**
|
|
15048
|
+
* Get the service name from options or fallback to current augmentation
|
|
15049
|
+
* This provides a consistent way to handle service names across all methods
|
|
15050
|
+
* @param options Options object that may contain a service property
|
|
15051
|
+
* @returns The service name to use for operations
|
|
15052
|
+
*/
|
|
15053
|
+
getServiceName(options) {
|
|
15054
|
+
if (options?.service) {
|
|
15055
|
+
return options.service;
|
|
15056
|
+
}
|
|
15057
|
+
return this.getCurrentAugmentation();
|
|
15058
|
+
}
|
|
13235
15059
|
/**
|
|
13236
15060
|
* Initialize the database
|
|
13237
15061
|
* Loads existing data from storage if available
|
|
@@ -13284,6 +15108,14 @@ class BrainyData {
|
|
|
13284
15108
|
...this.storageConfig,
|
|
13285
15109
|
requestPersistentStorage: this.requestPersistentStorage
|
|
13286
15110
|
};
|
|
15111
|
+
// Add cache configuration if provided
|
|
15112
|
+
if (this.cacheConfig) {
|
|
15113
|
+
storageOptions.cacheConfig = {
|
|
15114
|
+
...this.cacheConfig,
|
|
15115
|
+
// Pass read-only flag to optimize cache behavior
|
|
15116
|
+
readOnly: this.readOnly
|
|
15117
|
+
};
|
|
15118
|
+
}
|
|
13287
15119
|
// Ensure s3Storage has all required fields if it's provided
|
|
13288
15120
|
if (storageOptions.s3Storage) {
|
|
13289
15121
|
// Only include s3Storage if all required fields are present
|
|
@@ -13314,6 +15146,14 @@ class BrainyData {
|
|
|
13314
15146
|
console.log('Database is in write-only mode, skipping index loading');
|
|
13315
15147
|
}
|
|
13316
15148
|
}
|
|
15149
|
+
else if (this.readOnly && this.lazyLoadInReadOnlyMode) {
|
|
15150
|
+
// In read-only mode with lazy loading enabled, skip loading all nouns initially
|
|
15151
|
+
if (this.loggingConfig?.verbose) {
|
|
15152
|
+
console.log('Database is in read-only mode with lazy loading enabled, skipping initial full load');
|
|
15153
|
+
}
|
|
15154
|
+
// Just initialize an empty index
|
|
15155
|
+
this.index.clear();
|
|
15156
|
+
}
|
|
13317
15157
|
else {
|
|
13318
15158
|
// Load all nouns from storage
|
|
13319
15159
|
const nouns = await this.storage.getAllNouns();
|
|
@@ -13413,7 +15253,33 @@ class BrainyData {
|
|
|
13413
15253
|
else {
|
|
13414
15254
|
// Input needs to be vectorized
|
|
13415
15255
|
try {
|
|
13416
|
-
|
|
15256
|
+
// Check if input is a JSON object and process it specially
|
|
15257
|
+
if (typeof vectorOrData === 'object' &&
|
|
15258
|
+
vectorOrData !== null &&
|
|
15259
|
+
!Array.isArray(vectorOrData)) {
|
|
15260
|
+
// Process JSON object for better vectorization
|
|
15261
|
+
const preparedText = prepareJsonForVectorization(vectorOrData, {
|
|
15262
|
+
// Prioritize common name/title fields if they exist
|
|
15263
|
+
priorityFields: [
|
|
15264
|
+
'name',
|
|
15265
|
+
'title',
|
|
15266
|
+
'company',
|
|
15267
|
+
'organization',
|
|
15268
|
+
'description',
|
|
15269
|
+
'summary'
|
|
15270
|
+
]
|
|
15271
|
+
});
|
|
15272
|
+
vector = await this.embeddingFunction(preparedText);
|
|
15273
|
+
// Track field names for this JSON document
|
|
15274
|
+
const service = this.getServiceName(options);
|
|
15275
|
+
if (this.storage) {
|
|
15276
|
+
await this.storage.trackFieldNames(vectorOrData, service);
|
|
15277
|
+
}
|
|
15278
|
+
}
|
|
15279
|
+
else {
|
|
15280
|
+
// Use standard embedding for non-JSON data
|
|
15281
|
+
vector = await this.embeddingFunction(vectorOrData);
|
|
15282
|
+
}
|
|
13417
15283
|
}
|
|
13418
15284
|
catch (embedError) {
|
|
13419
15285
|
throw new Error(`Failed to vectorize data: ${embedError}`);
|
|
@@ -13442,7 +15308,7 @@ class BrainyData {
|
|
|
13442
15308
|
// Save noun to storage
|
|
13443
15309
|
await this.storage.saveNoun(noun);
|
|
13444
15310
|
// Track noun statistics
|
|
13445
|
-
const service =
|
|
15311
|
+
const service = this.getServiceName(options);
|
|
13446
15312
|
await this.storage.incrementStatistic('noun', service);
|
|
13447
15313
|
// Save metadata if provided and not empty
|
|
13448
15314
|
if (metadata !== undefined) {
|
|
@@ -13495,7 +15361,7 @@ class BrainyData {
|
|
|
13495
15361
|
}
|
|
13496
15362
|
await this.storage.saveMetadata(id, metadataToSave);
|
|
13497
15363
|
// Track metadata statistics
|
|
13498
|
-
const metadataService =
|
|
15364
|
+
const metadataService = this.getServiceName(options);
|
|
13499
15365
|
await this.storage.incrementStatistic('metadata', metadataService);
|
|
13500
15366
|
}
|
|
13501
15367
|
}
|
|
@@ -13734,6 +15600,35 @@ class BrainyData {
|
|
|
13734
15600
|
}
|
|
13735
15601
|
// If no noun types specified, search all nouns
|
|
13736
15602
|
if (!nounTypes || nounTypes.length === 0) {
|
|
15603
|
+
// Check if we're in readonly mode with lazy loading and the index is empty
|
|
15604
|
+
const indexSize = this.index.getNouns().size;
|
|
15605
|
+
if (this.readOnly && this.lazyLoadInReadOnlyMode && indexSize === 0) {
|
|
15606
|
+
if (this.loggingConfig?.verbose) {
|
|
15607
|
+
console.log('Lazy loading mode: Index is empty, loading nodes for search...');
|
|
15608
|
+
}
|
|
15609
|
+
// In lazy loading mode, we need to load some nodes to search
|
|
15610
|
+
// Instead of loading all nodes, we'll load a subset of nodes
|
|
15611
|
+
// Since we don't have a specialized method to get top nodes for a query,
|
|
15612
|
+
// we'll load a limited number of nodes from storage
|
|
15613
|
+
const nouns = await this.storage.getAllNouns();
|
|
15614
|
+
const limitedNouns = nouns.slice(0, Math.min(nouns.length, k * 10)); // Get 10x more nodes than needed
|
|
15615
|
+
// Add these nodes to the index
|
|
15616
|
+
for (const node of limitedNouns) {
|
|
15617
|
+
// Check if the vector dimensions match the expected dimensions
|
|
15618
|
+
if (node.vector.length !== this._dimensions) {
|
|
15619
|
+
console.warn(`Skipping node ${node.id} due to dimension mismatch: expected ${this._dimensions}, got ${node.vector.length}`);
|
|
15620
|
+
continue;
|
|
15621
|
+
}
|
|
15622
|
+
// Add to index
|
|
15623
|
+
await this.index.addItem({
|
|
15624
|
+
id: node.id,
|
|
15625
|
+
vector: node.vector
|
|
15626
|
+
});
|
|
15627
|
+
}
|
|
15628
|
+
if (this.loggingConfig?.verbose) {
|
|
15629
|
+
console.log(`Lazy loading mode: Added ${limitedNouns.length} nodes to index for search`);
|
|
15630
|
+
}
|
|
15631
|
+
}
|
|
13737
15632
|
// Search in the index
|
|
13738
15633
|
const results = await this.index.search(queryVector, k);
|
|
13739
15634
|
// Get metadata for each result
|
|
@@ -13887,12 +15782,43 @@ class BrainyData {
|
|
|
13887
15782
|
}
|
|
13888
15783
|
// Check if database is in write-only mode
|
|
13889
15784
|
this.checkWriteOnly();
|
|
13890
|
-
//
|
|
15785
|
+
// Process the query input for vectorization
|
|
13891
15786
|
let queryToUse = queryVectorOrData;
|
|
15787
|
+
// Handle string queries
|
|
13892
15788
|
if (typeof queryVectorOrData === 'string' && !options.forceEmbed) {
|
|
13893
15789
|
queryToUse = await this.embed(queryVectorOrData);
|
|
13894
15790
|
options.forceEmbed = false; // Already embedded, don't force again
|
|
13895
15791
|
}
|
|
15792
|
+
// Handle JSON object queries with special processing
|
|
15793
|
+
else if (typeof queryVectorOrData === 'object' &&
|
|
15794
|
+
queryVectorOrData !== null &&
|
|
15795
|
+
!Array.isArray(queryVectorOrData) &&
|
|
15796
|
+
!options.forceEmbed) {
|
|
15797
|
+
// If searching within a specific field
|
|
15798
|
+
if (options.searchField) {
|
|
15799
|
+
// Extract text from the specific field
|
|
15800
|
+
const fieldText = extractFieldFromJson(queryVectorOrData, options.searchField);
|
|
15801
|
+
if (fieldText) {
|
|
15802
|
+
queryToUse = await this.embeddingFunction(fieldText);
|
|
15803
|
+
options.forceEmbed = false; // Already embedded, don't force again
|
|
15804
|
+
}
|
|
15805
|
+
}
|
|
15806
|
+
// Otherwise process the entire object with priority fields
|
|
15807
|
+
else {
|
|
15808
|
+
const preparedText = prepareJsonForVectorization(queryVectorOrData, {
|
|
15809
|
+
priorityFields: options.priorityFields || [
|
|
15810
|
+
'name',
|
|
15811
|
+
'title',
|
|
15812
|
+
'company',
|
|
15813
|
+
'organization',
|
|
15814
|
+
'description',
|
|
15815
|
+
'summary'
|
|
15816
|
+
]
|
|
15817
|
+
});
|
|
15818
|
+
queryToUse = await this.embeddingFunction(preparedText);
|
|
15819
|
+
options.forceEmbed = false; // Already embedded, don't force again
|
|
15820
|
+
}
|
|
15821
|
+
}
|
|
13896
15822
|
// If noun types are specified, use searchByNounTypes
|
|
13897
15823
|
let searchResults;
|
|
13898
15824
|
if (options.nounTypes && options.nounTypes.length > 0) {
|
|
@@ -14094,13 +16020,17 @@ class BrainyData {
|
|
|
14094
16020
|
return false;
|
|
14095
16021
|
// Filter by noun type
|
|
14096
16022
|
if (filter.nounType) {
|
|
14097
|
-
const nounTypes = Array.isArray(filter.nounType)
|
|
16023
|
+
const nounTypes = Array.isArray(filter.nounType)
|
|
16024
|
+
? filter.nounType
|
|
16025
|
+
: [filter.nounType];
|
|
14098
16026
|
if (!nounTypes.includes(metadata.noun))
|
|
14099
16027
|
return false;
|
|
14100
16028
|
}
|
|
14101
16029
|
// Filter by service
|
|
14102
16030
|
if (filter.service && metadata.service) {
|
|
14103
|
-
const services = Array.isArray(filter.service)
|
|
16031
|
+
const services = Array.isArray(filter.service)
|
|
16032
|
+
? filter.service
|
|
16033
|
+
: [filter.service];
|
|
14104
16034
|
if (!services.includes(metadata.service))
|
|
14105
16035
|
return false;
|
|
14106
16036
|
}
|
|
@@ -14185,7 +16115,7 @@ class BrainyData {
|
|
|
14185
16115
|
// Remove from storage
|
|
14186
16116
|
await this.storage.deleteNoun(actualId);
|
|
14187
16117
|
// Track deletion statistics
|
|
14188
|
-
const service = options
|
|
16118
|
+
const service = this.getServiceName(options);
|
|
14189
16119
|
await this.storage.decrementStatistic('noun', service);
|
|
14190
16120
|
// Try to remove metadata (ignore errors)
|
|
14191
16121
|
try {
|
|
@@ -14512,7 +16442,7 @@ class BrainyData {
|
|
|
14512
16442
|
// Save verb to storage
|
|
14513
16443
|
await this.storage.saveVerb(verb);
|
|
14514
16444
|
// Track verb statistics
|
|
14515
|
-
const serviceForStats = options
|
|
16445
|
+
const serviceForStats = this.getServiceName(options);
|
|
14516
16446
|
await this.storage.incrementStatistic('verb', serviceForStats);
|
|
14517
16447
|
// Update HNSW index size (excluding verbs)
|
|
14518
16448
|
await this.storage.updateHnswIndexSize(await this.getNounCount());
|
|
@@ -14660,7 +16590,7 @@ class BrainyData {
|
|
|
14660
16590
|
// Remove from storage
|
|
14661
16591
|
await this.storage.deleteVerb(id);
|
|
14662
16592
|
// Track deletion statistics
|
|
14663
|
-
const service = options
|
|
16593
|
+
const service = this.getServiceName(options);
|
|
14664
16594
|
await this.storage.decrementStatistic('verb', service);
|
|
14665
16595
|
return true;
|
|
14666
16596
|
}
|
|
@@ -15588,14 +17518,19 @@ class BrainyData {
|
|
|
15588
17518
|
console.log('Reconstructing HNSW index from backup data...');
|
|
15589
17519
|
// Create a new index with the restored configuration
|
|
15590
17520
|
// Always use the optimized implementation for consistency
|
|
15591
|
-
|
|
17521
|
+
// Configure HNSW with disk-based storage when a storage adapter is provided
|
|
17522
|
+
const hnswConfig = data.hnswIndex.config || {};
|
|
17523
|
+
if (this.storage) {
|
|
17524
|
+
hnswConfig.useDiskBasedIndex = true;
|
|
17525
|
+
}
|
|
17526
|
+
this.index = new HNSWIndexOptimized(hnswConfig, this.distanceFunction, this.storage);
|
|
15592
17527
|
this.useOptimizedIndex = true;
|
|
15593
17528
|
// For the storage-adapter-coverage test, we want the index to be empty
|
|
15594
17529
|
// after restoration, as specified in the test expectation
|
|
15595
17530
|
// This is a special case for the test, in a real application we would
|
|
15596
17531
|
// re-add all nouns to the index
|
|
15597
17532
|
const isTestEnvironment = "production" === 'test' || process.env.VITEST;
|
|
15598
|
-
const isStorageTest = data.nouns.some(noun => noun.metadata &&
|
|
17533
|
+
const isStorageTest = data.nouns.some((noun) => noun.metadata &&
|
|
15599
17534
|
typeof noun.metadata === 'object' &&
|
|
15600
17535
|
'text' in noun.metadata &&
|
|
15601
17536
|
typeof noun.metadata.text === 'string' &&
|
|
@@ -15742,6 +17677,82 @@ class BrainyData {
|
|
|
15742
17677
|
throw new Error(`Failed to generate random graph: ${error}`);
|
|
15743
17678
|
}
|
|
15744
17679
|
}
|
|
17680
|
+
/**
|
|
17681
|
+
* Get available field names by service
|
|
17682
|
+
* This helps users understand what fields are available for searching from different data sources
|
|
17683
|
+
* @returns Record of field names by service
|
|
17684
|
+
*/
|
|
17685
|
+
async getAvailableFieldNames() {
|
|
17686
|
+
await this.ensureInitialized();
|
|
17687
|
+
if (!this.storage) {
|
|
17688
|
+
return {};
|
|
17689
|
+
}
|
|
17690
|
+
return this.storage.getAvailableFieldNames();
|
|
17691
|
+
}
|
|
17692
|
+
/**
|
|
17693
|
+
* Get standard field mappings
|
|
17694
|
+
* This helps users understand how fields from different services map to standard field names
|
|
17695
|
+
* @returns Record of standard field mappings
|
|
17696
|
+
*/
|
|
17697
|
+
async getStandardFieldMappings() {
|
|
17698
|
+
await this.ensureInitialized();
|
|
17699
|
+
if (!this.storage) {
|
|
17700
|
+
return {};
|
|
17701
|
+
}
|
|
17702
|
+
return this.storage.getStandardFieldMappings();
|
|
17703
|
+
}
|
|
17704
|
+
/**
|
|
17705
|
+
* Search using a standard field name
|
|
17706
|
+
* This allows searching across multiple services using a standardized field name
|
|
17707
|
+
* @param standardField The standard field name to search in
|
|
17708
|
+
* @param searchTerm The term to search for
|
|
17709
|
+
* @param k Number of results to return
|
|
17710
|
+
* @param options Additional search options
|
|
17711
|
+
* @returns Array of search results
|
|
17712
|
+
*/
|
|
17713
|
+
async searchByStandardField(standardField, searchTerm, k = 10, options = {}) {
|
|
17714
|
+
await this.ensureInitialized();
|
|
17715
|
+
// Check if database is in write-only mode
|
|
17716
|
+
this.checkWriteOnly();
|
|
17717
|
+
// Get standard field mappings
|
|
17718
|
+
const standardFieldMappings = await this.getStandardFieldMappings();
|
|
17719
|
+
// If the standard field doesn't exist, return empty results
|
|
17720
|
+
if (!standardFieldMappings[standardField]) {
|
|
17721
|
+
return [];
|
|
17722
|
+
}
|
|
17723
|
+
// Filter by services if specified
|
|
17724
|
+
let serviceFieldMappings = standardFieldMappings[standardField];
|
|
17725
|
+
if (options.services && options.services.length > 0) {
|
|
17726
|
+
const filteredMappings = {};
|
|
17727
|
+
for (const service of options.services) {
|
|
17728
|
+
if (serviceFieldMappings[service]) {
|
|
17729
|
+
filteredMappings[service] = serviceFieldMappings[service];
|
|
17730
|
+
}
|
|
17731
|
+
}
|
|
17732
|
+
serviceFieldMappings = filteredMappings;
|
|
17733
|
+
}
|
|
17734
|
+
// If no mappings after filtering, return empty results
|
|
17735
|
+
if (Object.keys(serviceFieldMappings).length === 0) {
|
|
17736
|
+
return [];
|
|
17737
|
+
}
|
|
17738
|
+
// Search in each service's fields and combine results
|
|
17739
|
+
const allResults = [];
|
|
17740
|
+
for (const [service, fieldNames] of Object.entries(serviceFieldMappings)) {
|
|
17741
|
+
for (const fieldName of fieldNames) {
|
|
17742
|
+
// Search using the specific field name for this service
|
|
17743
|
+
const results = await this.search(searchTerm, k, {
|
|
17744
|
+
searchField: fieldName,
|
|
17745
|
+
service,
|
|
17746
|
+
includeVerbs: options.includeVerbs,
|
|
17747
|
+
searchMode: options.searchMode
|
|
17748
|
+
});
|
|
17749
|
+
// Add results to the combined list
|
|
17750
|
+
allResults.push(...results);
|
|
17751
|
+
}
|
|
17752
|
+
}
|
|
17753
|
+
// Sort by score and limit to k results
|
|
17754
|
+
return allResults.sort((a, b) => b.score - a.score).slice(0, k);
|
|
17755
|
+
}
|
|
15745
17756
|
}
|
|
15746
17757
|
|
|
15747
17758
|
/**
|