@soulcraft/brainy 0.54.3 → 0.54.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -346,6 +346,7 @@ export declare class S3CompatibleStorage extends BaseStorage {
346
346
  clear(): Promise<void>;
347
347
  /**
348
348
  * Get information about storage usage and capacity
349
+ * Optimized version that uses cached statistics instead of expensive full scans
349
350
  */
350
351
  getStorageStatus(): Promise<{
351
352
  type: string;
@@ -399,6 +400,15 @@ export declare class S3CompatibleStorage extends BaseStorage {
399
400
  * @returns Promise that resolves to the statistics data or null if not found
400
401
  */
401
402
  protected getStatisticsData(): Promise<StatisticsData | null>;
403
+ /**
404
+ * Check if we should try yesterday's statistics file
405
+ * Only try within 2 hours of midnight to avoid unnecessary calls
406
+ */
407
+ private shouldTryYesterday;
408
+ /**
409
+ * Get yesterday's date
410
+ */
411
+ private getYesterday;
402
412
  /**
403
413
  * Try to get statistics from a specific key
404
414
  * @param key The key to try to get statistics from
@@ -422,6 +432,11 @@ export declare class S3CompatibleStorage extends BaseStorage {
422
432
  * @param olderThanTimestamp Remove entries older than this timestamp
423
433
  */
424
434
  cleanupOldChangeLogs(olderThanTimestamp: number): Promise<void>;
435
+ /**
436
+ * Sample-based storage estimation as fallback when statistics unavailable
437
+ * Much faster than full scans - samples first 50 objects per prefix
438
+ */
439
+ private getSampleBasedStorageEstimate;
425
440
  /**
426
441
  * Acquire a distributed lock for coordinating operations across multiple instances
427
442
  * @param lockKey The key to lock on
@@ -1570,120 +1570,51 @@ export class S3CompatibleStorage extends BaseStorage {
1570
1570
  }
1571
1571
  /**
1572
1572
  * Get information about storage usage and capacity
1573
+ * Optimized version that uses cached statistics instead of expensive full scans
1573
1574
  */
1574
1575
  async getStorageStatus() {
1575
1576
  await this.ensureInitialized();
1576
1577
  try {
1577
- // Import the ListObjectsV2Command only when needed
1578
- const { ListObjectsV2Command } = await import('@aws-sdk/client-s3');
1579
- // Calculate the total size of all objects in the storage
1578
+ // Use cached statistics instead of expensive ListObjects scans
1579
+ const stats = await this.getStatisticsData();
1580
1580
  let totalSize = 0;
1581
1581
  let nodeCount = 0;
1582
1582
  let edgeCount = 0;
1583
1583
  let metadataCount = 0;
1584
- // Helper function to calculate size and count for a given prefix
1585
- const calculateSizeAndCount = async (prefix) => {
1586
- let size = 0;
1587
- let count = 0;
1588
- // List all objects with the given prefix
1589
- const listResponse = await this.s3Client.send(new ListObjectsV2Command({
1590
- Bucket: this.bucketName,
1591
- Prefix: prefix
1592
- }));
1593
- // If there are no objects or Contents is undefined, return
1594
- if (!listResponse ||
1595
- !listResponse.Contents ||
1596
- listResponse.Contents.length === 0) {
1597
- return { size, count };
1598
- }
1599
- // Calculate size and count
1600
- for (const object of listResponse.Contents) {
1601
- if (object) {
1602
- // Ensure Size is a number
1603
- const objectSize = typeof object.Size === 'number'
1604
- ? object.Size
1605
- : object.Size
1606
- ? parseInt(object.Size.toString(), 10)
1607
- : 0;
1608
- // Add to total size and increment count
1609
- size += objectSize || 0;
1610
- count++;
1611
- // For testing purposes, ensure we have at least some size
1612
- if (size === 0 && count > 0) {
1613
- // If we have objects but size is 0, set a minimum size
1614
- // This ensures tests expecting size > 0 will pass
1615
- size = count * 100; // Arbitrary size per object
1616
- }
1617
- }
1618
- }
1619
- return { size, count };
1620
- };
1621
- // Calculate size and count for each directory
1622
- const nounsResult = await calculateSizeAndCount(this.nounPrefix);
1623
- const verbsResult = await calculateSizeAndCount(this.verbPrefix);
1624
- const nounMetadataResult = await calculateSizeAndCount(this.metadataPrefix);
1625
- const verbMetadataResult = await calculateSizeAndCount(this.verbMetadataPrefix);
1626
- const indexResult = await calculateSizeAndCount(this.indexPrefix);
1627
- totalSize =
1628
- nounsResult.size +
1629
- verbsResult.size +
1630
- nounMetadataResult.size +
1631
- verbMetadataResult.size +
1632
- indexResult.size;
1633
- nodeCount = nounsResult.count;
1634
- edgeCount = verbsResult.count;
1635
- metadataCount = nounMetadataResult.count + verbMetadataResult.count;
1584
+ if (stats) {
1585
+ // Calculate counts from statistics cache (fast)
1586
+ nodeCount = Object.values(stats.nounCount).reduce((sum, count) => sum + count, 0);
1587
+ edgeCount = Object.values(stats.verbCount).reduce((sum, count) => sum + count, 0);
1588
+ metadataCount = Object.values(stats.metadataCount).reduce((sum, count) => sum + count, 0);
1589
+ // Estimate size based on counts (much faster than scanning)
1590
+ // Use conservative estimates: 1KB per noun, 0.5KB per verb, 0.2KB per metadata
1591
+ const estimatedNounSize = nodeCount * 1024; // 1KB per noun
1592
+ const estimatedVerbSize = edgeCount * 512; // 0.5KB per verb
1593
+ const estimatedMetadataSize = metadataCount * 204; // 0.2KB per metadata
1594
+ const estimatedIndexSize = stats.hnswIndexSize || (nodeCount * 50); // Estimate index overhead
1595
+ totalSize = estimatedNounSize + estimatedVerbSize + estimatedMetadataSize + estimatedIndexSize;
1596
+ }
1597
+ // If no stats available, fall back to minimal sample-based estimation
1598
+ if (!stats || totalSize === 0) {
1599
+ const sampleResult = await this.getSampleBasedStorageEstimate();
1600
+ totalSize = sampleResult.estimatedSize;
1601
+ nodeCount = sampleResult.nodeCount;
1602
+ edgeCount = sampleResult.edgeCount;
1603
+ metadataCount = sampleResult.metadataCount;
1604
+ }
1636
1605
  // Ensure we have a minimum size if we have objects
1637
1606
  if (totalSize === 0 &&
1638
1607
  (nodeCount > 0 || edgeCount > 0 || metadataCount > 0)) {
1639
- console.log(`Setting minimum size for ${nodeCount} nodes, ${edgeCount} edges, and ${metadataCount} metadata objects`);
1608
+ // Setting minimum size for objects
1640
1609
  totalSize = (nodeCount + edgeCount + metadataCount) * 100; // Arbitrary size per object
1641
1610
  }
1642
1611
  // For testing purposes, always ensure we have a positive size if we have any objects
1643
1612
  if (nodeCount > 0 || edgeCount > 0 || metadataCount > 0) {
1644
- console.log(`Ensuring positive size for storage status with ${nodeCount} nodes, ${edgeCount} edges, and ${metadataCount} metadata objects`);
1613
+ // Ensuring positive size for storage status
1645
1614
  totalSize = Math.max(totalSize, 1);
1646
1615
  }
1647
- // Count nouns by type using metadata
1648
- const nounTypeCounts = {};
1649
- // List all objects in the metadata directory
1650
- const metadataListResponse = await this.s3Client.send(new ListObjectsV2Command({
1651
- Bucket: this.bucketName,
1652
- Prefix: this.metadataPrefix
1653
- }));
1654
- if (metadataListResponse && metadataListResponse.Contents) {
1655
- // Import the GetObjectCommand only when needed
1656
- const { GetObjectCommand } = await import('@aws-sdk/client-s3');
1657
- for (const object of metadataListResponse.Contents) {
1658
- if (object && object.Key) {
1659
- try {
1660
- // Get the metadata
1661
- const response = await this.s3Client.send(new GetObjectCommand({
1662
- Bucket: this.bucketName,
1663
- Key: object.Key
1664
- }));
1665
- if (response && response.Body) {
1666
- // Convert the response body to a string
1667
- const bodyContents = await response.Body.transformToString();
1668
- try {
1669
- const metadata = JSON.parse(bodyContents);
1670
- // Count by noun type
1671
- if (metadata && metadata.noun) {
1672
- nounTypeCounts[metadata.noun] =
1673
- (nounTypeCounts[metadata.noun] || 0) + 1;
1674
- }
1675
- }
1676
- catch (parseError) {
1677
- console.error(`Failed to parse metadata from ${object.Key}:`, parseError);
1678
- }
1679
- }
1680
- }
1681
- catch (error) {
1682
- this.logger.warn(`Error getting metadata from ${object.Key}:`, error);
1683
- }
1684
- }
1685
- }
1686
- }
1616
+ // Use service breakdown from statistics instead of expensive metadata scans
1617
+ const nounTypeCounts = stats?.nounCount || {};
1687
1618
  return {
1688
1619
  type: this.serviceType,
1689
1620
  used: totalSize,
@@ -1922,11 +1853,12 @@ export class S3CompatibleStorage extends BaseStorage {
1922
1853
  */
1923
1854
  async getStatisticsData() {
1924
1855
  await this.ensureInitialized();
1925
- // Always fetch fresh statistics from storage to avoid inconsistencies
1926
- // Only use cache if explicitly in read-only mode
1927
- const shouldUseCache = this.readOnly && this.statisticsCache &&
1928
- (Date.now() - this.lastStatisticsFlushTime < this.MIN_FLUSH_INTERVAL_MS);
1856
+ // Enhanced cache strategy: use cache for 5 minutes to avoid expensive lookups
1857
+ const CACHE_TTL = 5 * 60 * 1000; // 5 minutes
1858
+ const timeSinceFlush = Date.now() - this.lastStatisticsFlushTime;
1859
+ const shouldUseCache = this.statisticsCache && timeSinceFlush < CACHE_TTL;
1929
1860
  if (shouldUseCache && this.statisticsCache) {
1861
+ // Use cached statistics without logging since loggingConfig not available in storage adapter
1930
1862
  return {
1931
1863
  nounCount: { ...this.statisticsCache.nounCount },
1932
1864
  verbCount: { ...this.statisticsCache.verbCount },
@@ -1936,22 +1868,32 @@ export class S3CompatibleStorage extends BaseStorage {
1936
1868
  };
1937
1869
  }
1938
1870
  try {
1871
+ // Fetching fresh statistics from storage
1939
1872
  // Import the GetObjectCommand only when needed
1940
1873
  const { GetObjectCommand } = await import('@aws-sdk/client-s3');
1941
- // First try to get statistics from today's file
1942
- const currentKey = this.getCurrentStatisticsKey();
1943
- let statistics = await this.tryGetStatisticsFromKey(currentKey);
1944
- // If not found, try yesterday's file (in case it's just after midnight)
1945
- if (!statistics) {
1946
- const yesterday = new Date();
1947
- yesterday.setDate(yesterday.getDate() - 1);
1948
- const yesterdayKey = this.getStatisticsKeyForDate(yesterday);
1949
- statistics = await this.tryGetStatisticsFromKey(yesterdayKey);
1950
- }
1951
- // If still not found, try the legacy location
1952
- if (!statistics) {
1953
- const legacyKey = this.getLegacyStatisticsKey();
1954
- statistics = await this.tryGetStatisticsFromKey(legacyKey);
1874
+ // Try statistics locations in order of preference (but with timeout)
1875
+ const keys = [
1876
+ this.getCurrentStatisticsKey(),
1877
+ // Only try yesterday if it's within 2 hours of midnight to avoid unnecessary calls
1878
+ ...(this.shouldTryYesterday() ? [this.getStatisticsKeyForDate(this.getYesterday())] : []),
1879
+ this.getLegacyStatisticsKey()
1880
+ ];
1881
+ let statistics = null;
1882
+ // Try each key with a timeout to prevent hanging
1883
+ for (const key of keys) {
1884
+ try {
1885
+ statistics = await Promise.race([
1886
+ this.tryGetStatisticsFromKey(key),
1887
+ new Promise((_, reject) => setTimeout(() => reject(new Error('Timeout')), 2000) // 2 second timeout per key
1888
+ )
1889
+ ]);
1890
+ if (statistics)
1891
+ break; // Found statistics, stop trying other keys
1892
+ }
1893
+ catch (error) {
1894
+ // Continue to next key on timeout or error
1895
+ continue;
1896
+ }
1955
1897
  }
1956
1898
  // If we found statistics, update the cache
1957
1899
  if (statistics) {
@@ -1964,13 +1906,33 @@ export class S3CompatibleStorage extends BaseStorage {
1964
1906
  lastUpdated: statistics.lastUpdated
1965
1907
  };
1966
1908
  }
1909
+ // Successfully loaded statistics from storage
1967
1910
  return statistics;
1968
1911
  }
1969
1912
  catch (error) {
1970
- this.logger.error('Error getting statistics data:', error);
1971
- throw error;
1913
+ this.logger.warn('Error getting statistics data, returning cached or null:', error);
1914
+ // Return cached data if available, even if stale, rather than throwing
1915
+ return this.statisticsCache || null;
1972
1916
  }
1973
1917
  }
1918
+ /**
1919
+ * Check if we should try yesterday's statistics file
1920
+ * Only try within 2 hours of midnight to avoid unnecessary calls
1921
+ */
1922
+ shouldTryYesterday() {
1923
+ const now = new Date();
1924
+ const hour = now.getHours();
1925
+ // Only try yesterday's file between 10 PM and 2 AM
1926
+ return hour >= 22 || hour <= 2;
1927
+ }
1928
+ /**
1929
+ * Get yesterday's date
1930
+ */
1931
+ getYesterday() {
1932
+ const yesterday = new Date();
1933
+ yesterday.setDate(yesterday.getDate() - 1);
1934
+ return yesterday;
1935
+ }
1974
1936
  /**
1975
1937
  * Try to get statistics from a specific key
1976
1938
  * @param key The key to try to get statistics from
@@ -2149,6 +2111,68 @@ export class S3CompatibleStorage extends BaseStorage {
2149
2111
  this.logger.warn('Failed to cleanup old change logs:', error);
2150
2112
  }
2151
2113
  }
2114
+ /**
2115
+ * Sample-based storage estimation as fallback when statistics unavailable
2116
+ * Much faster than full scans - samples first 50 objects per prefix
2117
+ */
2118
+ async getSampleBasedStorageEstimate() {
2119
+ try {
2120
+ const { ListObjectsV2Command } = await import('@aws-sdk/client-s3');
2121
+ const sampleSize = 50; // Sample first 50 objects per prefix
2122
+ const prefixes = [
2123
+ { prefix: this.nounPrefix, type: 'noun' },
2124
+ { prefix: this.verbPrefix, type: 'verb' },
2125
+ { prefix: this.metadataPrefix, type: 'metadata' }
2126
+ ];
2127
+ let totalSampleSize = 0;
2128
+ const counts = { noun: 0, verb: 0, metadata: 0 };
2129
+ for (const { prefix, type } of prefixes) {
2130
+ // Get small sample of objects
2131
+ const listResponse = await this.s3Client.send(new ListObjectsV2Command({
2132
+ Bucket: this.bucketName,
2133
+ Prefix: prefix,
2134
+ MaxKeys: sampleSize
2135
+ }));
2136
+ if (listResponse.Contents && listResponse.Contents.length > 0) {
2137
+ let sampleSize = 0;
2138
+ let sampleCount = listResponse.Contents.length;
2139
+ // Calculate size from first few objects in sample
2140
+ for (let i = 0; i < Math.min(10, sampleCount); i++) {
2141
+ const obj = listResponse.Contents[i];
2142
+ if (obj && obj.Size) {
2143
+ sampleSize += typeof obj.Size === 'number' ? obj.Size : parseInt(obj.Size.toString(), 10);
2144
+ }
2145
+ }
2146
+ // Estimate total count (if we got MaxKeys, there are probably more)
2147
+ let estimatedCount = sampleCount;
2148
+ if (sampleCount === sampleSize && listResponse.IsTruncated) {
2149
+ // Rough estimate: if we got exactly MaxKeys and truncated, multiply by 10
2150
+ estimatedCount = sampleCount * 10;
2151
+ }
2152
+ // Estimate average object size and total size
2153
+ const avgSize = sampleSize / Math.min(10, sampleCount) || 512; // Default 512 bytes
2154
+ const estimatedTotalSize = avgSize * estimatedCount;
2155
+ totalSampleSize += estimatedTotalSize;
2156
+ counts[type] = estimatedCount;
2157
+ }
2158
+ }
2159
+ return {
2160
+ estimatedSize: totalSampleSize,
2161
+ nodeCount: counts.noun,
2162
+ edgeCount: counts.verb,
2163
+ metadataCount: counts.metadata
2164
+ };
2165
+ }
2166
+ catch (error) {
2167
+ // If even sampling fails, return minimal estimates
2168
+ return {
2169
+ estimatedSize: 1024, // 1KB minimum
2170
+ nodeCount: 0,
2171
+ edgeCount: 0,
2172
+ metadataCount: 0
2173
+ };
2174
+ }
2175
+ }
2152
2176
  /**
2153
2177
  * Acquire a distributed lock for coordinating operations across multiple instances
2154
2178
  * @param lockKey The key to lock on