@soulcraft/brainy 0.54.3 → 0.54.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/brainyData.js +17 -7
- package/dist/brainyData.js.map +1 -1
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +15 -0
- package/dist/storage/adapters/s3CompatibleStorage.js +141 -117
- package/dist/storage/adapters/s3CompatibleStorage.js.map +1 -1
- package/dist/utils/metadataIndex.d.ts +7 -1
- package/dist/utils/metadataIndex.js +82 -20
- package/dist/utils/metadataIndex.js.map +1 -1
- package/package.json +1 -1
|
@@ -346,6 +346,7 @@ export declare class S3CompatibleStorage extends BaseStorage {
|
|
|
346
346
|
clear(): Promise<void>;
|
|
347
347
|
/**
|
|
348
348
|
* Get information about storage usage and capacity
|
|
349
|
+
* Optimized version that uses cached statistics instead of expensive full scans
|
|
349
350
|
*/
|
|
350
351
|
getStorageStatus(): Promise<{
|
|
351
352
|
type: string;
|
|
@@ -399,6 +400,15 @@ export declare class S3CompatibleStorage extends BaseStorage {
|
|
|
399
400
|
* @returns Promise that resolves to the statistics data or null if not found
|
|
400
401
|
*/
|
|
401
402
|
protected getStatisticsData(): Promise<StatisticsData | null>;
|
|
403
|
+
/**
|
|
404
|
+
* Check if we should try yesterday's statistics file
|
|
405
|
+
* Only try within 2 hours of midnight to avoid unnecessary calls
|
|
406
|
+
*/
|
|
407
|
+
private shouldTryYesterday;
|
|
408
|
+
/**
|
|
409
|
+
* Get yesterday's date
|
|
410
|
+
*/
|
|
411
|
+
private getYesterday;
|
|
402
412
|
/**
|
|
403
413
|
* Try to get statistics from a specific key
|
|
404
414
|
* @param key The key to try to get statistics from
|
|
@@ -422,6 +432,11 @@ export declare class S3CompatibleStorage extends BaseStorage {
|
|
|
422
432
|
* @param olderThanTimestamp Remove entries older than this timestamp
|
|
423
433
|
*/
|
|
424
434
|
cleanupOldChangeLogs(olderThanTimestamp: number): Promise<void>;
|
|
435
|
+
/**
|
|
436
|
+
* Sample-based storage estimation as fallback when statistics unavailable
|
|
437
|
+
* Much faster than full scans - samples first 50 objects per prefix
|
|
438
|
+
*/
|
|
439
|
+
private getSampleBasedStorageEstimate;
|
|
425
440
|
/**
|
|
426
441
|
* Acquire a distributed lock for coordinating operations across multiple instances
|
|
427
442
|
* @param lockKey The key to lock on
|
|
@@ -1570,120 +1570,51 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1570
1570
|
}
|
|
1571
1571
|
/**
|
|
1572
1572
|
* Get information about storage usage and capacity
|
|
1573
|
+
* Optimized version that uses cached statistics instead of expensive full scans
|
|
1573
1574
|
*/
|
|
1574
1575
|
async getStorageStatus() {
|
|
1575
1576
|
await this.ensureInitialized();
|
|
1576
1577
|
try {
|
|
1577
|
-
//
|
|
1578
|
-
const
|
|
1579
|
-
// Calculate the total size of all objects in the storage
|
|
1578
|
+
// Use cached statistics instead of expensive ListObjects scans
|
|
1579
|
+
const stats = await this.getStatisticsData();
|
|
1580
1580
|
let totalSize = 0;
|
|
1581
1581
|
let nodeCount = 0;
|
|
1582
1582
|
let edgeCount = 0;
|
|
1583
1583
|
let metadataCount = 0;
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
|
|
1587
|
-
|
|
1588
|
-
|
|
1589
|
-
|
|
1590
|
-
|
|
1591
|
-
|
|
1592
|
-
|
|
1593
|
-
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
|
|
1599
|
-
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
|
|
1603
|
-
|
|
1604
|
-
|
|
1605
|
-
: object.Size
|
|
1606
|
-
? parseInt(object.Size.toString(), 10)
|
|
1607
|
-
: 0;
|
|
1608
|
-
// Add to total size and increment count
|
|
1609
|
-
size += objectSize || 0;
|
|
1610
|
-
count++;
|
|
1611
|
-
// For testing purposes, ensure we have at least some size
|
|
1612
|
-
if (size === 0 && count > 0) {
|
|
1613
|
-
// If we have objects but size is 0, set a minimum size
|
|
1614
|
-
// This ensures tests expecting size > 0 will pass
|
|
1615
|
-
size = count * 100; // Arbitrary size per object
|
|
1616
|
-
}
|
|
1617
|
-
}
|
|
1618
|
-
}
|
|
1619
|
-
return { size, count };
|
|
1620
|
-
};
|
|
1621
|
-
// Calculate size and count for each directory
|
|
1622
|
-
const nounsResult = await calculateSizeAndCount(this.nounPrefix);
|
|
1623
|
-
const verbsResult = await calculateSizeAndCount(this.verbPrefix);
|
|
1624
|
-
const nounMetadataResult = await calculateSizeAndCount(this.metadataPrefix);
|
|
1625
|
-
const verbMetadataResult = await calculateSizeAndCount(this.verbMetadataPrefix);
|
|
1626
|
-
const indexResult = await calculateSizeAndCount(this.indexPrefix);
|
|
1627
|
-
totalSize =
|
|
1628
|
-
nounsResult.size +
|
|
1629
|
-
verbsResult.size +
|
|
1630
|
-
nounMetadataResult.size +
|
|
1631
|
-
verbMetadataResult.size +
|
|
1632
|
-
indexResult.size;
|
|
1633
|
-
nodeCount = nounsResult.count;
|
|
1634
|
-
edgeCount = verbsResult.count;
|
|
1635
|
-
metadataCount = nounMetadataResult.count + verbMetadataResult.count;
|
|
1584
|
+
if (stats) {
|
|
1585
|
+
// Calculate counts from statistics cache (fast)
|
|
1586
|
+
nodeCount = Object.values(stats.nounCount).reduce((sum, count) => sum + count, 0);
|
|
1587
|
+
edgeCount = Object.values(stats.verbCount).reduce((sum, count) => sum + count, 0);
|
|
1588
|
+
metadataCount = Object.values(stats.metadataCount).reduce((sum, count) => sum + count, 0);
|
|
1589
|
+
// Estimate size based on counts (much faster than scanning)
|
|
1590
|
+
// Use conservative estimates: 1KB per noun, 0.5KB per verb, 0.2KB per metadata
|
|
1591
|
+
const estimatedNounSize = nodeCount * 1024; // 1KB per noun
|
|
1592
|
+
const estimatedVerbSize = edgeCount * 512; // 0.5KB per verb
|
|
1593
|
+
const estimatedMetadataSize = metadataCount * 204; // 0.2KB per metadata
|
|
1594
|
+
const estimatedIndexSize = stats.hnswIndexSize || (nodeCount * 50); // Estimate index overhead
|
|
1595
|
+
totalSize = estimatedNounSize + estimatedVerbSize + estimatedMetadataSize + estimatedIndexSize;
|
|
1596
|
+
}
|
|
1597
|
+
// If no stats available, fall back to minimal sample-based estimation
|
|
1598
|
+
if (!stats || totalSize === 0) {
|
|
1599
|
+
const sampleResult = await this.getSampleBasedStorageEstimate();
|
|
1600
|
+
totalSize = sampleResult.estimatedSize;
|
|
1601
|
+
nodeCount = sampleResult.nodeCount;
|
|
1602
|
+
edgeCount = sampleResult.edgeCount;
|
|
1603
|
+
metadataCount = sampleResult.metadataCount;
|
|
1604
|
+
}
|
|
1636
1605
|
// Ensure we have a minimum size if we have objects
|
|
1637
1606
|
if (totalSize === 0 &&
|
|
1638
1607
|
(nodeCount > 0 || edgeCount > 0 || metadataCount > 0)) {
|
|
1639
|
-
|
|
1608
|
+
// Setting minimum size for objects
|
|
1640
1609
|
totalSize = (nodeCount + edgeCount + metadataCount) * 100; // Arbitrary size per object
|
|
1641
1610
|
}
|
|
1642
1611
|
// For testing purposes, always ensure we have a positive size if we have any objects
|
|
1643
1612
|
if (nodeCount > 0 || edgeCount > 0 || metadataCount > 0) {
|
|
1644
|
-
|
|
1613
|
+
// Ensuring positive size for storage status
|
|
1645
1614
|
totalSize = Math.max(totalSize, 1);
|
|
1646
1615
|
}
|
|
1647
|
-
//
|
|
1648
|
-
const nounTypeCounts = {};
|
|
1649
|
-
// List all objects in the metadata directory
|
|
1650
|
-
const metadataListResponse = await this.s3Client.send(new ListObjectsV2Command({
|
|
1651
|
-
Bucket: this.bucketName,
|
|
1652
|
-
Prefix: this.metadataPrefix
|
|
1653
|
-
}));
|
|
1654
|
-
if (metadataListResponse && metadataListResponse.Contents) {
|
|
1655
|
-
// Import the GetObjectCommand only when needed
|
|
1656
|
-
const { GetObjectCommand } = await import('@aws-sdk/client-s3');
|
|
1657
|
-
for (const object of metadataListResponse.Contents) {
|
|
1658
|
-
if (object && object.Key) {
|
|
1659
|
-
try {
|
|
1660
|
-
// Get the metadata
|
|
1661
|
-
const response = await this.s3Client.send(new GetObjectCommand({
|
|
1662
|
-
Bucket: this.bucketName,
|
|
1663
|
-
Key: object.Key
|
|
1664
|
-
}));
|
|
1665
|
-
if (response && response.Body) {
|
|
1666
|
-
// Convert the response body to a string
|
|
1667
|
-
const bodyContents = await response.Body.transformToString();
|
|
1668
|
-
try {
|
|
1669
|
-
const metadata = JSON.parse(bodyContents);
|
|
1670
|
-
// Count by noun type
|
|
1671
|
-
if (metadata && metadata.noun) {
|
|
1672
|
-
nounTypeCounts[metadata.noun] =
|
|
1673
|
-
(nounTypeCounts[metadata.noun] || 0) + 1;
|
|
1674
|
-
}
|
|
1675
|
-
}
|
|
1676
|
-
catch (parseError) {
|
|
1677
|
-
console.error(`Failed to parse metadata from ${object.Key}:`, parseError);
|
|
1678
|
-
}
|
|
1679
|
-
}
|
|
1680
|
-
}
|
|
1681
|
-
catch (error) {
|
|
1682
|
-
this.logger.warn(`Error getting metadata from ${object.Key}:`, error);
|
|
1683
|
-
}
|
|
1684
|
-
}
|
|
1685
|
-
}
|
|
1686
|
-
}
|
|
1616
|
+
// Use service breakdown from statistics instead of expensive metadata scans
|
|
1617
|
+
const nounTypeCounts = stats?.nounCount || {};
|
|
1687
1618
|
return {
|
|
1688
1619
|
type: this.serviceType,
|
|
1689
1620
|
used: totalSize,
|
|
@@ -1922,11 +1853,12 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1922
1853
|
*/
|
|
1923
1854
|
async getStatisticsData() {
|
|
1924
1855
|
await this.ensureInitialized();
|
|
1925
|
-
//
|
|
1926
|
-
|
|
1927
|
-
const
|
|
1928
|
-
|
|
1856
|
+
// Enhanced cache strategy: use cache for 5 minutes to avoid expensive lookups
|
|
1857
|
+
const CACHE_TTL = 5 * 60 * 1000; // 5 minutes
|
|
1858
|
+
const timeSinceFlush = Date.now() - this.lastStatisticsFlushTime;
|
|
1859
|
+
const shouldUseCache = this.statisticsCache && timeSinceFlush < CACHE_TTL;
|
|
1929
1860
|
if (shouldUseCache && this.statisticsCache) {
|
|
1861
|
+
// Use cached statistics without logging since loggingConfig not available in storage adapter
|
|
1930
1862
|
return {
|
|
1931
1863
|
nounCount: { ...this.statisticsCache.nounCount },
|
|
1932
1864
|
verbCount: { ...this.statisticsCache.verbCount },
|
|
@@ -1936,22 +1868,32 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1936
1868
|
};
|
|
1937
1869
|
}
|
|
1938
1870
|
try {
|
|
1871
|
+
// Fetching fresh statistics from storage
|
|
1939
1872
|
// Import the GetObjectCommand only when needed
|
|
1940
1873
|
const { GetObjectCommand } = await import('@aws-sdk/client-s3');
|
|
1941
|
-
//
|
|
1942
|
-
const
|
|
1943
|
-
|
|
1944
|
-
|
|
1945
|
-
|
|
1946
|
-
|
|
1947
|
-
|
|
1948
|
-
|
|
1949
|
-
|
|
1950
|
-
|
|
1951
|
-
|
|
1952
|
-
|
|
1953
|
-
|
|
1954
|
-
|
|
1874
|
+
// Try statistics locations in order of preference (but with timeout)
|
|
1875
|
+
const keys = [
|
|
1876
|
+
this.getCurrentStatisticsKey(),
|
|
1877
|
+
// Only try yesterday if it's within 2 hours of midnight to avoid unnecessary calls
|
|
1878
|
+
...(this.shouldTryYesterday() ? [this.getStatisticsKeyForDate(this.getYesterday())] : []),
|
|
1879
|
+
this.getLegacyStatisticsKey()
|
|
1880
|
+
];
|
|
1881
|
+
let statistics = null;
|
|
1882
|
+
// Try each key with a timeout to prevent hanging
|
|
1883
|
+
for (const key of keys) {
|
|
1884
|
+
try {
|
|
1885
|
+
statistics = await Promise.race([
|
|
1886
|
+
this.tryGetStatisticsFromKey(key),
|
|
1887
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error('Timeout')), 2000) // 2 second timeout per key
|
|
1888
|
+
)
|
|
1889
|
+
]);
|
|
1890
|
+
if (statistics)
|
|
1891
|
+
break; // Found statistics, stop trying other keys
|
|
1892
|
+
}
|
|
1893
|
+
catch (error) {
|
|
1894
|
+
// Continue to next key on timeout or error
|
|
1895
|
+
continue;
|
|
1896
|
+
}
|
|
1955
1897
|
}
|
|
1956
1898
|
// If we found statistics, update the cache
|
|
1957
1899
|
if (statistics) {
|
|
@@ -1964,13 +1906,33 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1964
1906
|
lastUpdated: statistics.lastUpdated
|
|
1965
1907
|
};
|
|
1966
1908
|
}
|
|
1909
|
+
// Successfully loaded statistics from storage
|
|
1967
1910
|
return statistics;
|
|
1968
1911
|
}
|
|
1969
1912
|
catch (error) {
|
|
1970
|
-
this.logger.
|
|
1971
|
-
|
|
1913
|
+
this.logger.warn('Error getting statistics data, returning cached or null:', error);
|
|
1914
|
+
// Return cached data if available, even if stale, rather than throwing
|
|
1915
|
+
return this.statisticsCache || null;
|
|
1972
1916
|
}
|
|
1973
1917
|
}
|
|
1918
|
+
/**
|
|
1919
|
+
* Check if we should try yesterday's statistics file
|
|
1920
|
+
* Only try within 2 hours of midnight to avoid unnecessary calls
|
|
1921
|
+
*/
|
|
1922
|
+
shouldTryYesterday() {
|
|
1923
|
+
const now = new Date();
|
|
1924
|
+
const hour = now.getHours();
|
|
1925
|
+
// Only try yesterday's file between 10 PM and 2 AM
|
|
1926
|
+
return hour >= 22 || hour <= 2;
|
|
1927
|
+
}
|
|
1928
|
+
/**
|
|
1929
|
+
* Get yesterday's date
|
|
1930
|
+
*/
|
|
1931
|
+
getYesterday() {
|
|
1932
|
+
const yesterday = new Date();
|
|
1933
|
+
yesterday.setDate(yesterday.getDate() - 1);
|
|
1934
|
+
return yesterday;
|
|
1935
|
+
}
|
|
1974
1936
|
/**
|
|
1975
1937
|
* Try to get statistics from a specific key
|
|
1976
1938
|
* @param key The key to try to get statistics from
|
|
@@ -2149,6 +2111,68 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
2149
2111
|
this.logger.warn('Failed to cleanup old change logs:', error);
|
|
2150
2112
|
}
|
|
2151
2113
|
}
|
|
2114
|
+
/**
|
|
2115
|
+
* Sample-based storage estimation as fallback when statistics unavailable
|
|
2116
|
+
* Much faster than full scans - samples first 50 objects per prefix
|
|
2117
|
+
*/
|
|
2118
|
+
async getSampleBasedStorageEstimate() {
|
|
2119
|
+
try {
|
|
2120
|
+
const { ListObjectsV2Command } = await import('@aws-sdk/client-s3');
|
|
2121
|
+
const sampleSize = 50; // Sample first 50 objects per prefix
|
|
2122
|
+
const prefixes = [
|
|
2123
|
+
{ prefix: this.nounPrefix, type: 'noun' },
|
|
2124
|
+
{ prefix: this.verbPrefix, type: 'verb' },
|
|
2125
|
+
{ prefix: this.metadataPrefix, type: 'metadata' }
|
|
2126
|
+
];
|
|
2127
|
+
let totalSampleSize = 0;
|
|
2128
|
+
const counts = { noun: 0, verb: 0, metadata: 0 };
|
|
2129
|
+
for (const { prefix, type } of prefixes) {
|
|
2130
|
+
// Get small sample of objects
|
|
2131
|
+
const listResponse = await this.s3Client.send(new ListObjectsV2Command({
|
|
2132
|
+
Bucket: this.bucketName,
|
|
2133
|
+
Prefix: prefix,
|
|
2134
|
+
MaxKeys: sampleSize
|
|
2135
|
+
}));
|
|
2136
|
+
if (listResponse.Contents && listResponse.Contents.length > 0) {
|
|
2137
|
+
let sampleSize = 0;
|
|
2138
|
+
let sampleCount = listResponse.Contents.length;
|
|
2139
|
+
// Calculate size from first few objects in sample
|
|
2140
|
+
for (let i = 0; i < Math.min(10, sampleCount); i++) {
|
|
2141
|
+
const obj = listResponse.Contents[i];
|
|
2142
|
+
if (obj && obj.Size) {
|
|
2143
|
+
sampleSize += typeof obj.Size === 'number' ? obj.Size : parseInt(obj.Size.toString(), 10);
|
|
2144
|
+
}
|
|
2145
|
+
}
|
|
2146
|
+
// Estimate total count (if we got MaxKeys, there are probably more)
|
|
2147
|
+
let estimatedCount = sampleCount;
|
|
2148
|
+
if (sampleCount === sampleSize && listResponse.IsTruncated) {
|
|
2149
|
+
// Rough estimate: if we got exactly MaxKeys and truncated, multiply by 10
|
|
2150
|
+
estimatedCount = sampleCount * 10;
|
|
2151
|
+
}
|
|
2152
|
+
// Estimate average object size and total size
|
|
2153
|
+
const avgSize = sampleSize / Math.min(10, sampleCount) || 512; // Default 512 bytes
|
|
2154
|
+
const estimatedTotalSize = avgSize * estimatedCount;
|
|
2155
|
+
totalSampleSize += estimatedTotalSize;
|
|
2156
|
+
counts[type] = estimatedCount;
|
|
2157
|
+
}
|
|
2158
|
+
}
|
|
2159
|
+
return {
|
|
2160
|
+
estimatedSize: totalSampleSize,
|
|
2161
|
+
nodeCount: counts.noun,
|
|
2162
|
+
edgeCount: counts.verb,
|
|
2163
|
+
metadataCount: counts.metadata
|
|
2164
|
+
};
|
|
2165
|
+
}
|
|
2166
|
+
catch (error) {
|
|
2167
|
+
// If even sampling fails, return minimal estimates
|
|
2168
|
+
return {
|
|
2169
|
+
estimatedSize: 1024, // 1KB minimum
|
|
2170
|
+
nodeCount: 0,
|
|
2171
|
+
edgeCount: 0,
|
|
2172
|
+
metadataCount: 0
|
|
2173
|
+
};
|
|
2174
|
+
}
|
|
2175
|
+
}
|
|
2152
2176
|
/**
|
|
2153
2177
|
* Acquire a distributed lock for coordinating operations across multiple instances
|
|
2154
2178
|
* @param lockKey The key to lock on
|