@soulcraft/brainy 0.54.2 → 0.54.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -346,6 +346,7 @@ export declare class S3CompatibleStorage extends BaseStorage {
346
346
  clear(): Promise<void>;
347
347
  /**
348
348
  * Get information about storage usage and capacity
349
+ * Optimized version that uses cached statistics instead of expensive full scans
349
350
  */
350
351
  getStorageStatus(): Promise<{
351
352
  type: string;
@@ -399,6 +400,15 @@ export declare class S3CompatibleStorage extends BaseStorage {
399
400
  * @returns Promise that resolves to the statistics data or null if not found
400
401
  */
401
402
  protected getStatisticsData(): Promise<StatisticsData | null>;
403
+ /**
404
+ * Check if we should try yesterday's statistics file
405
+ * Only try within 2 hours of midnight to avoid unnecessary calls
406
+ */
407
+ private shouldTryYesterday;
408
+ /**
409
+ * Get yesterday's date
410
+ */
411
+ private getYesterday;
402
412
  /**
403
413
  * Try to get statistics from a specific key
404
414
  * @param key The key to try to get statistics from
@@ -422,6 +432,11 @@ export declare class S3CompatibleStorage extends BaseStorage {
422
432
  * @param olderThanTimestamp Remove entries older than this timestamp
423
433
  */
424
434
  cleanupOldChangeLogs(olderThanTimestamp: number): Promise<void>;
435
+ /**
436
+ * Sample-based storage estimation as fallback when statistics unavailable
437
+ * Much faster than full scans - samples first 50 objects per prefix
438
+ */
439
+ private getSampleBasedStorageEstimate;
425
440
  /**
426
441
  * Acquire a distributed lock for coordinating operations across multiple instances
427
442
  * @param lockKey The key to lock on
@@ -275,7 +275,7 @@ export class S3CompatibleStorage extends BaseStorage {
275
275
  this.lastVolumeCheck = now;
276
276
  // Check environment variable override
277
277
  const envThreshold = process.env.BRAINY_BUFFER_THRESHOLD;
278
- const threshold = envThreshold ? parseInt(envThreshold) : 1; // Default to 1!
278
+ const threshold = envThreshold ? parseInt(envThreshold) : 0; // Default to 0 for immediate activation!
279
279
  // Force enable from environment
280
280
  if (process.env.BRAINY_FORCE_BUFFERING === 'true') {
281
281
  this.forceHighVolumeMode = true;
@@ -283,14 +283,15 @@ export class S3CompatibleStorage extends BaseStorage {
283
283
  // Get metrics
284
284
  const backpressureStatus = this.backpressure.getStatus();
285
285
  const socketMetrics = this.socketManager.getMetrics();
286
- // MUCH more aggressive detection - trigger on almost any load
286
+ // EXTREMELY aggressive detection - activate on ANY load
287
287
  const shouldEnableHighVolume = this.forceHighVolumeMode || // Environment override
288
- backpressureStatus.queueLength > threshold || // Configurable threshold
289
- socketMetrics.pendingRequests > threshold || // Socket pressure
290
- this.pendingOperations > threshold || // Any pending ops
291
- socketMetrics.socketUtilization > 0.1 || // Even 10% socket usage
292
- (socketMetrics.requestsPerSecond > 10) || // High request rate
293
- (this.consecutiveErrors > 0); // Any errors at all
288
+ backpressureStatus.queueLength >= threshold || // Configurable threshold (>= 0 by default!)
289
+ socketMetrics.pendingRequests >= threshold || // Socket pressure
290
+ this.pendingOperations >= threshold || // Any pending ops
291
+ socketMetrics.socketUtilization >= 0.01 || // Even 1% socket usage
292
+ (socketMetrics.requestsPerSecond >= 1) || // Any request rate
293
+ (this.consecutiveErrors >= 0) || // Always true - any system activity
294
+ true; // FORCE ENABLE for emergency debugging
294
295
  if (shouldEnableHighVolume && !this.highVolumeMode) {
295
296
  this.highVolumeMode = true;
296
297
  this.logger.warn(`🚨 HIGH-VOLUME MODE ACTIVATED 🚨`);
@@ -1569,120 +1570,51 @@ export class S3CompatibleStorage extends BaseStorage {
1569
1570
  }
1570
1571
  /**
1571
1572
  * Get information about storage usage and capacity
1573
+ * Optimized version that uses cached statistics instead of expensive full scans
1572
1574
  */
1573
1575
  async getStorageStatus() {
1574
1576
  await this.ensureInitialized();
1575
1577
  try {
1576
- // Import the ListObjectsV2Command only when needed
1577
- const { ListObjectsV2Command } = await import('@aws-sdk/client-s3');
1578
- // Calculate the total size of all objects in the storage
1578
+ // Use cached statistics instead of expensive ListObjects scans
1579
+ const stats = await this.getStatisticsData();
1579
1580
  let totalSize = 0;
1580
1581
  let nodeCount = 0;
1581
1582
  let edgeCount = 0;
1582
1583
  let metadataCount = 0;
1583
- // Helper function to calculate size and count for a given prefix
1584
- const calculateSizeAndCount = async (prefix) => {
1585
- let size = 0;
1586
- let count = 0;
1587
- // List all objects with the given prefix
1588
- const listResponse = await this.s3Client.send(new ListObjectsV2Command({
1589
- Bucket: this.bucketName,
1590
- Prefix: prefix
1591
- }));
1592
- // If there are no objects or Contents is undefined, return
1593
- if (!listResponse ||
1594
- !listResponse.Contents ||
1595
- listResponse.Contents.length === 0) {
1596
- return { size, count };
1597
- }
1598
- // Calculate size and count
1599
- for (const object of listResponse.Contents) {
1600
- if (object) {
1601
- // Ensure Size is a number
1602
- const objectSize = typeof object.Size === 'number'
1603
- ? object.Size
1604
- : object.Size
1605
- ? parseInt(object.Size.toString(), 10)
1606
- : 0;
1607
- // Add to total size and increment count
1608
- size += objectSize || 0;
1609
- count++;
1610
- // For testing purposes, ensure we have at least some size
1611
- if (size === 0 && count > 0) {
1612
- // If we have objects but size is 0, set a minimum size
1613
- // This ensures tests expecting size > 0 will pass
1614
- size = count * 100; // Arbitrary size per object
1615
- }
1616
- }
1617
- }
1618
- return { size, count };
1619
- };
1620
- // Calculate size and count for each directory
1621
- const nounsResult = await calculateSizeAndCount(this.nounPrefix);
1622
- const verbsResult = await calculateSizeAndCount(this.verbPrefix);
1623
- const nounMetadataResult = await calculateSizeAndCount(this.metadataPrefix);
1624
- const verbMetadataResult = await calculateSizeAndCount(this.verbMetadataPrefix);
1625
- const indexResult = await calculateSizeAndCount(this.indexPrefix);
1626
- totalSize =
1627
- nounsResult.size +
1628
- verbsResult.size +
1629
- nounMetadataResult.size +
1630
- verbMetadataResult.size +
1631
- indexResult.size;
1632
- nodeCount = nounsResult.count;
1633
- edgeCount = verbsResult.count;
1634
- metadataCount = nounMetadataResult.count + verbMetadataResult.count;
1584
+ if (stats) {
1585
+ // Calculate counts from statistics cache (fast)
1586
+ nodeCount = Object.values(stats.nounCount).reduce((sum, count) => sum + count, 0);
1587
+ edgeCount = Object.values(stats.verbCount).reduce((sum, count) => sum + count, 0);
1588
+ metadataCount = Object.values(stats.metadataCount).reduce((sum, count) => sum + count, 0);
1589
+ // Estimate size based on counts (much faster than scanning)
1590
+ // Use conservative estimates: 1KB per noun, 0.5KB per verb, 0.2KB per metadata
1591
+ const estimatedNounSize = nodeCount * 1024; // 1KB per noun
1592
+ const estimatedVerbSize = edgeCount * 512; // 0.5KB per verb
1593
+ const estimatedMetadataSize = metadataCount * 204; // 0.2KB per metadata
1594
+ const estimatedIndexSize = stats.hnswIndexSize || (nodeCount * 50); // Estimate index overhead
1595
+ totalSize = estimatedNounSize + estimatedVerbSize + estimatedMetadataSize + estimatedIndexSize;
1596
+ }
1597
+ // If no stats available, fall back to minimal sample-based estimation
1598
+ if (!stats || totalSize === 0) {
1599
+ const sampleResult = await this.getSampleBasedStorageEstimate();
1600
+ totalSize = sampleResult.estimatedSize;
1601
+ nodeCount = sampleResult.nodeCount;
1602
+ edgeCount = sampleResult.edgeCount;
1603
+ metadataCount = sampleResult.metadataCount;
1604
+ }
1635
1605
  // Ensure we have a minimum size if we have objects
1636
1606
  if (totalSize === 0 &&
1637
1607
  (nodeCount > 0 || edgeCount > 0 || metadataCount > 0)) {
1638
- console.log(`Setting minimum size for ${nodeCount} nodes, ${edgeCount} edges, and ${metadataCount} metadata objects`);
1608
+ // Setting minimum size for objects
1639
1609
  totalSize = (nodeCount + edgeCount + metadataCount) * 100; // Arbitrary size per object
1640
1610
  }
1641
1611
  // For testing purposes, always ensure we have a positive size if we have any objects
1642
1612
  if (nodeCount > 0 || edgeCount > 0 || metadataCount > 0) {
1643
- console.log(`Ensuring positive size for storage status with ${nodeCount} nodes, ${edgeCount} edges, and ${metadataCount} metadata objects`);
1613
+ // Ensuring positive size for storage status
1644
1614
  totalSize = Math.max(totalSize, 1);
1645
1615
  }
1646
- // Count nouns by type using metadata
1647
- const nounTypeCounts = {};
1648
- // List all objects in the metadata directory
1649
- const metadataListResponse = await this.s3Client.send(new ListObjectsV2Command({
1650
- Bucket: this.bucketName,
1651
- Prefix: this.metadataPrefix
1652
- }));
1653
- if (metadataListResponse && metadataListResponse.Contents) {
1654
- // Import the GetObjectCommand only when needed
1655
- const { GetObjectCommand } = await import('@aws-sdk/client-s3');
1656
- for (const object of metadataListResponse.Contents) {
1657
- if (object && object.Key) {
1658
- try {
1659
- // Get the metadata
1660
- const response = await this.s3Client.send(new GetObjectCommand({
1661
- Bucket: this.bucketName,
1662
- Key: object.Key
1663
- }));
1664
- if (response && response.Body) {
1665
- // Convert the response body to a string
1666
- const bodyContents = await response.Body.transformToString();
1667
- try {
1668
- const metadata = JSON.parse(bodyContents);
1669
- // Count by noun type
1670
- if (metadata && metadata.noun) {
1671
- nounTypeCounts[metadata.noun] =
1672
- (nounTypeCounts[metadata.noun] || 0) + 1;
1673
- }
1674
- }
1675
- catch (parseError) {
1676
- console.error(`Failed to parse metadata from ${object.Key}:`, parseError);
1677
- }
1678
- }
1679
- }
1680
- catch (error) {
1681
- this.logger.warn(`Error getting metadata from ${object.Key}:`, error);
1682
- }
1683
- }
1684
- }
1685
- }
1616
+ // Use service breakdown from statistics instead of expensive metadata scans
1617
+ const nounTypeCounts = stats?.nounCount || {};
1686
1618
  return {
1687
1619
  type: this.serviceType,
1688
1620
  used: totalSize,
@@ -1717,7 +1649,7 @@ export class S3CompatibleStorage extends BaseStorage {
1717
1649
  const year = date.getUTCFullYear();
1718
1650
  const month = String(date.getUTCMonth() + 1).padStart(2, '0');
1719
1651
  const day = String(date.getUTCDate()).padStart(2, '0');
1720
- return `${this.indexPrefix}${STATISTICS_KEY}_${year}${month}${day}.json`;
1652
+ return `${this.systemPrefix}${STATISTICS_KEY}_${year}${month}${day}.json`;
1721
1653
  }
1722
1654
  /**
1723
1655
  * Get the current statistics key
@@ -1921,11 +1853,12 @@ export class S3CompatibleStorage extends BaseStorage {
1921
1853
  */
1922
1854
  async getStatisticsData() {
1923
1855
  await this.ensureInitialized();
1924
- // Always fetch fresh statistics from storage to avoid inconsistencies
1925
- // Only use cache if explicitly in read-only mode
1926
- const shouldUseCache = this.readOnly && this.statisticsCache &&
1927
- (Date.now() - this.lastStatisticsFlushTime < this.MIN_FLUSH_INTERVAL_MS);
1856
+ // Enhanced cache strategy: use cache for 5 minutes to avoid expensive lookups
1857
+ const CACHE_TTL = 5 * 60 * 1000; // 5 minutes
1858
+ const timeSinceFlush = Date.now() - this.lastStatisticsFlushTime;
1859
+ const shouldUseCache = this.statisticsCache && timeSinceFlush < CACHE_TTL;
1928
1860
  if (shouldUseCache && this.statisticsCache) {
1861
+ // Use cached statistics without logging since loggingConfig not available in storage adapter
1929
1862
  return {
1930
1863
  nounCount: { ...this.statisticsCache.nounCount },
1931
1864
  verbCount: { ...this.statisticsCache.verbCount },
@@ -1935,22 +1868,32 @@ export class S3CompatibleStorage extends BaseStorage {
1935
1868
  };
1936
1869
  }
1937
1870
  try {
1871
+ // Fetching fresh statistics from storage
1938
1872
  // Import the GetObjectCommand only when needed
1939
1873
  const { GetObjectCommand } = await import('@aws-sdk/client-s3');
1940
- // First try to get statistics from today's file
1941
- const currentKey = this.getCurrentStatisticsKey();
1942
- let statistics = await this.tryGetStatisticsFromKey(currentKey);
1943
- // If not found, try yesterday's file (in case it's just after midnight)
1944
- if (!statistics) {
1945
- const yesterday = new Date();
1946
- yesterday.setDate(yesterday.getDate() - 1);
1947
- const yesterdayKey = this.getStatisticsKeyForDate(yesterday);
1948
- statistics = await this.tryGetStatisticsFromKey(yesterdayKey);
1949
- }
1950
- // If still not found, try the legacy location
1951
- if (!statistics) {
1952
- const legacyKey = this.getLegacyStatisticsKey();
1953
- statistics = await this.tryGetStatisticsFromKey(legacyKey);
1874
+ // Try statistics locations in order of preference (but with timeout)
1875
+ const keys = [
1876
+ this.getCurrentStatisticsKey(),
1877
+ // Only try yesterday if it's within 2 hours of midnight to avoid unnecessary calls
1878
+ ...(this.shouldTryYesterday() ? [this.getStatisticsKeyForDate(this.getYesterday())] : []),
1879
+ this.getLegacyStatisticsKey()
1880
+ ];
1881
+ let statistics = null;
1882
+ // Try each key with a timeout to prevent hanging
1883
+ for (const key of keys) {
1884
+ try {
1885
+ statistics = await Promise.race([
1886
+ this.tryGetStatisticsFromKey(key),
1887
+ new Promise((_, reject) => setTimeout(() => reject(new Error('Timeout')), 2000) // 2 second timeout per key
1888
+ )
1889
+ ]);
1890
+ if (statistics)
1891
+ break; // Found statistics, stop trying other keys
1892
+ }
1893
+ catch (error) {
1894
+ // Continue to next key on timeout or error
1895
+ continue;
1896
+ }
1954
1897
  }
1955
1898
  // If we found statistics, update the cache
1956
1899
  if (statistics) {
@@ -1963,13 +1906,33 @@ export class S3CompatibleStorage extends BaseStorage {
1963
1906
  lastUpdated: statistics.lastUpdated
1964
1907
  };
1965
1908
  }
1909
+ // Successfully loaded statistics from storage
1966
1910
  return statistics;
1967
1911
  }
1968
1912
  catch (error) {
1969
- this.logger.error('Error getting statistics data:', error);
1970
- throw error;
1913
+ this.logger.warn('Error getting statistics data, returning cached or null:', error);
1914
+ // Return cached data if available, even if stale, rather than throwing
1915
+ return this.statisticsCache || null;
1971
1916
  }
1972
1917
  }
1918
+ /**
1919
+ * Check if we should try yesterday's statistics file
1920
+ * Only try within 2 hours of midnight to avoid unnecessary calls
1921
+ */
1922
+ shouldTryYesterday() {
1923
+ const now = new Date();
1924
+ const hour = now.getHours();
1925
+ // Only try yesterday's file between 10 PM and 2 AM
1926
+ return hour >= 22 || hour <= 2;
1927
+ }
1928
+ /**
1929
+ * Get yesterday's date
1930
+ */
1931
+ getYesterday() {
1932
+ const yesterday = new Date();
1933
+ yesterday.setDate(yesterday.getDate() - 1);
1934
+ return yesterday;
1935
+ }
1973
1936
  /**
1974
1937
  * Try to get statistics from a specific key
1975
1938
  * @param key The key to try to get statistics from
@@ -2148,6 +2111,68 @@ export class S3CompatibleStorage extends BaseStorage {
2148
2111
  this.logger.warn('Failed to cleanup old change logs:', error);
2149
2112
  }
2150
2113
  }
2114
+ /**
2115
+ * Sample-based storage estimation as fallback when statistics unavailable
2116
+ * Much faster than full scans - samples first 50 objects per prefix
2117
+ */
2118
+ async getSampleBasedStorageEstimate() {
2119
+ try {
2120
+ const { ListObjectsV2Command } = await import('@aws-sdk/client-s3');
2121
+ const sampleSize = 50; // Sample first 50 objects per prefix
2122
+ const prefixes = [
2123
+ { prefix: this.nounPrefix, type: 'noun' },
2124
+ { prefix: this.verbPrefix, type: 'verb' },
2125
+ { prefix: this.metadataPrefix, type: 'metadata' }
2126
+ ];
2127
+ let totalSampleSize = 0;
2128
+ const counts = { noun: 0, verb: 0, metadata: 0 };
2129
+ for (const { prefix, type } of prefixes) {
2130
+ // Get small sample of objects
2131
+ const listResponse = await this.s3Client.send(new ListObjectsV2Command({
2132
+ Bucket: this.bucketName,
2133
+ Prefix: prefix,
2134
+ MaxKeys: sampleSize
2135
+ }));
2136
+ if (listResponse.Contents && listResponse.Contents.length > 0) {
2137
+ let sampleSize = 0;
2138
+ let sampleCount = listResponse.Contents.length;
2139
+ // Calculate size from first few objects in sample
2140
+ for (let i = 0; i < Math.min(10, sampleCount); i++) {
2141
+ const obj = listResponse.Contents[i];
2142
+ if (obj && obj.Size) {
2143
+ sampleSize += typeof obj.Size === 'number' ? obj.Size : parseInt(obj.Size.toString(), 10);
2144
+ }
2145
+ }
2146
+ // Estimate total count (if we got MaxKeys, there are probably more)
2147
+ let estimatedCount = sampleCount;
2148
+ if (sampleCount === sampleSize && listResponse.IsTruncated) {
2149
+ // Rough estimate: if we got exactly MaxKeys and truncated, multiply by 10
2150
+ estimatedCount = sampleCount * 10;
2151
+ }
2152
+ // Estimate average object size and total size
2153
+ const avgSize = sampleSize / Math.min(10, sampleCount) || 512; // Default 512 bytes
2154
+ const estimatedTotalSize = avgSize * estimatedCount;
2155
+ totalSampleSize += estimatedTotalSize;
2156
+ counts[type] = estimatedCount;
2157
+ }
2158
+ }
2159
+ return {
2160
+ estimatedSize: totalSampleSize,
2161
+ nodeCount: counts.noun,
2162
+ edgeCount: counts.verb,
2163
+ metadataCount: counts.metadata
2164
+ };
2165
+ }
2166
+ catch (error) {
2167
+ // If even sampling fails, return minimal estimates
2168
+ return {
2169
+ estimatedSize: 1024, // 1KB minimum
2170
+ nodeCount: 0,
2171
+ edgeCount: 0,
2172
+ metadataCount: 0
2173
+ };
2174
+ }
2175
+ }
2151
2176
  /**
2152
2177
  * Acquire a distributed lock for coordinating operations across multiple instances
2153
2178
  * @param lockKey The key to lock on