@soulcraft/brainy 0.54.3 → 0.54.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -378,6 +378,12 @@ export interface StorageAdapter {
378
378
  deleteVerb(id: string): Promise<void>;
379
379
  saveMetadata(id: string, metadata: any): Promise<void>;
380
380
  getMetadata(id: string): Promise<any | null>;
381
+ /**
382
+ * Get multiple metadata objects in batches (prevents socket exhaustion)
383
+ * @param ids Array of IDs to get metadata for
384
+ * @returns Promise that resolves to a Map of id -> metadata
385
+ */
386
+ getMetadataBatch?(ids: string[]): Promise<Map<string, any>>;
381
387
  /**
382
388
  * Save verb metadata to storage
383
389
  * @param id The ID of the verb
@@ -106,6 +106,11 @@ export declare class S3CompatibleStorage extends BaseStorage {
106
106
  * Initialize the storage adapter
107
107
  */
108
108
  init(): Promise<void>;
109
+ /**
110
+ * Auto-cleanup legacy /index folder during initialization
111
+ * This removes old index data that has been migrated to _system
112
+ */
113
+ private cleanupLegacyIndexFolder;
109
114
  /**
110
115
  * Initialize write buffers for high-volume scenarios
111
116
  */
@@ -332,6 +337,15 @@ export declare class S3CompatibleStorage extends BaseStorage {
332
337
  * Save noun metadata to storage
333
338
  */
334
339
  saveNounMetadata(id: string, metadata: any): Promise<void>;
340
+ /**
341
+ * Get multiple metadata objects in batches (CRITICAL: Prevents socket exhaustion)
342
+ * This is the solution to the metadata reading socket exhaustion during initialization
343
+ */
344
+ getMetadataBatch(ids: string[]): Promise<Map<string, any>>;
345
+ /**
346
+ * Get multiple verb metadata objects in batches (prevents socket exhaustion)
347
+ */
348
+ getVerbMetadataBatch(ids: string[]): Promise<Map<string, any>>;
335
349
  /**
336
350
  * Get noun metadata from storage
337
351
  */
@@ -346,6 +360,7 @@ export declare class S3CompatibleStorage extends BaseStorage {
346
360
  clear(): Promise<void>;
347
361
  /**
348
362
  * Get information about storage usage and capacity
363
+ * Optimized version that uses cached statistics instead of expensive full scans
349
364
  */
350
365
  getStorageStatus(): Promise<{
351
366
  type: string;
@@ -370,8 +385,9 @@ export declare class S3CompatibleStorage extends BaseStorage {
370
385
  */
371
386
  private getCurrentStatisticsKey;
372
387
  /**
373
- * Get the legacy statistics key (for backward compatibility)
388
+ * Get the legacy statistics key (DEPRECATED - /index folder is auto-cleaned)
374
389
  * @returns The legacy statistics key
390
+ * @deprecated Legacy /index folder is automatically cleaned on initialization
375
391
  */
376
392
  private getLegacyStatisticsKey;
377
393
  /**
@@ -399,6 +415,15 @@ export declare class S3CompatibleStorage extends BaseStorage {
399
415
  * @returns Promise that resolves to the statistics data or null if not found
400
416
  */
401
417
  protected getStatisticsData(): Promise<StatisticsData | null>;
418
+ /**
419
+ * Check if we should try yesterday's statistics file
420
+ * Only try within 2 hours of midnight to avoid unnecessary calls
421
+ */
422
+ private shouldTryYesterday;
423
+ /**
424
+ * Get yesterday's date
425
+ */
426
+ private getYesterday;
402
427
  /**
403
428
  * Try to get statistics from a specific key
404
429
  * @param key The key to try to get statistics from
@@ -422,6 +447,11 @@ export declare class S3CompatibleStorage extends BaseStorage {
422
447
  * @param olderThanTimestamp Remove entries older than this timestamp
423
448
  */
424
449
  cleanupOldChangeLogs(olderThanTimestamp: number): Promise<void>;
450
+ /**
451
+ * Sample-based storage estimation as fallback when statistics unavailable
452
+ * Much faster than full scans - samples first 50 objects per prefix
453
+ */
454
+ private getSampleBasedStorageEstimate;
425
455
  /**
426
456
  * Acquire a distributed lock for coordinating operations across multiple instances
427
457
  * @param lockKey The key to lock on
@@ -8,7 +8,7 @@ import { StorageCompatibilityLayer } from '../backwardCompatibility.js';
8
8
  import { StorageOperationExecutors } from '../../utils/operationUtils.js';
9
9
  import { BrainyError } from '../../errors/brainyError.js';
10
10
  import { CacheManager } from '../cacheManager.js';
11
- import { createModuleLogger } from '../../utils/logger.js';
11
+ import { createModuleLogger, prodLog } from '../../utils/logger.js';
12
12
  import { getGlobalSocketManager } from '../../utils/adaptiveSocketManager.js';
13
13
  import { getGlobalBackpressure } from '../../utils/adaptiveBackpressure.js';
14
14
  import { getWriteBuffer } from '../../utils/writeBuffer.js';
@@ -230,6 +230,8 @@ export class S3CompatibleStorage extends BaseStorage {
230
230
  this.initializeBuffers();
231
231
  // Initialize request coalescer
232
232
  this.initializeCoalescer();
233
+ // Auto-cleanup legacy /index folder on initialization
234
+ await this.cleanupLegacyIndexFolder();
233
235
  this.isInitialized = true;
234
236
  this.logger.info(`Initialized ${this.serviceType} storage with bucket ${this.bucketName}`);
235
237
  }
@@ -238,6 +240,57 @@ export class S3CompatibleStorage extends BaseStorage {
238
240
  throw new Error(`Failed to initialize ${this.serviceType} storage: ${error}`);
239
241
  }
240
242
  }
243
+ /**
244
+ * Auto-cleanup legacy /index folder during initialization
245
+ * This removes old index data that has been migrated to _system
246
+ */
247
+ async cleanupLegacyIndexFolder() {
248
+ try {
249
+ // Check if there are any objects in the legacy index folder
250
+ const { ListObjectsV2Command } = await import('@aws-sdk/client-s3');
251
+ const listResponse = await this.s3Client.send(new ListObjectsV2Command({
252
+ Bucket: this.bucketName,
253
+ Prefix: this.indexPrefix,
254
+ MaxKeys: 1 // Just check if anything exists
255
+ }));
256
+ // If there are objects in the legacy index folder, clean them up
257
+ if (listResponse.Contents && listResponse.Contents.length > 0) {
258
+ prodLog.info(`🧹 Cleaning up legacy /index folder during initialization...`);
259
+ // Use the existing deleteObjectsWithPrefix function logic
260
+ const { ListObjectsV2Command, DeleteObjectsCommand } = await import('@aws-sdk/client-s3');
261
+ let continuationToken = undefined;
262
+ let totalDeleted = 0;
263
+ do {
264
+ const listResponseBatch = await this.s3Client.send(new ListObjectsV2Command({
265
+ Bucket: this.bucketName,
266
+ Prefix: this.indexPrefix,
267
+ ContinuationToken: continuationToken
268
+ }));
269
+ if (listResponseBatch.Contents && listResponseBatch.Contents.length > 0) {
270
+ const objectsToDelete = listResponseBatch.Contents.map((obj) => ({
271
+ Key: obj.Key
272
+ }));
273
+ await this.s3Client.send(new DeleteObjectsCommand({
274
+ Bucket: this.bucketName,
275
+ Delete: {
276
+ Objects: objectsToDelete
277
+ }
278
+ }));
279
+ totalDeleted += objectsToDelete.length;
280
+ }
281
+ continuationToken = listResponseBatch.NextContinuationToken;
282
+ } while (continuationToken);
283
+ prodLog.info(`✅ Cleaned up ${totalDeleted} legacy index objects`);
284
+ }
285
+ else {
286
+ prodLog.debug('No legacy /index folder found - already clean');
287
+ }
288
+ }
289
+ catch (error) {
290
+ // Don't fail initialization if cleanup fails
291
+ prodLog.warn('Failed to cleanup legacy /index folder:', error);
292
+ }
293
+ }
241
294
  /**
242
295
  * Initialize write buffers for high-volume scenarios
243
296
  */
@@ -1417,6 +1470,75 @@ export class S3CompatibleStorage extends BaseStorage {
1417
1470
  throw new Error(`Failed to save noun metadata for ${id}: ${error}`);
1418
1471
  }
1419
1472
  }
1473
+ /**
1474
+ * Get multiple metadata objects in batches (CRITICAL: Prevents socket exhaustion)
1475
+ * This is the solution to the metadata reading socket exhaustion during initialization
1476
+ */
1477
+ async getMetadataBatch(ids) {
1478
+ await this.ensureInitialized();
1479
+ const results = new Map();
1480
+ const batchSize = Math.min(this.getBatchSize(), 10); // Smaller batches for metadata to prevent socket exhaustion
1481
+ // Process in smaller batches to avoid socket exhaustion
1482
+ for (let i = 0; i < ids.length; i += batchSize) {
1483
+ const batch = ids.slice(i, i + batchSize);
1484
+ // Process batch with concurrency control
1485
+ const batchPromises = batch.map(async (id) => {
1486
+ try {
1487
+ const metadata = await this.getMetadata(id);
1488
+ return { id, metadata };
1489
+ }
1490
+ catch (error) {
1491
+ // Don't fail entire batch if one metadata read fails
1492
+ this.logger.debug(`Failed to read metadata for ${id}:`, error);
1493
+ return { id, metadata: null };
1494
+ }
1495
+ });
1496
+ const batchResults = await Promise.all(batchPromises);
1497
+ // Add results to map
1498
+ for (const { id, metadata } of batchResults) {
1499
+ if (metadata !== null) {
1500
+ results.set(id, metadata);
1501
+ }
1502
+ }
1503
+ // Yield to prevent socket exhaustion between batches
1504
+ await new Promise(resolve => setImmediate(resolve));
1505
+ }
1506
+ return results;
1507
+ }
1508
+ /**
1509
+ * Get multiple verb metadata objects in batches (prevents socket exhaustion)
1510
+ */
1511
+ async getVerbMetadataBatch(ids) {
1512
+ await this.ensureInitialized();
1513
+ const results = new Map();
1514
+ const batchSize = Math.min(this.getBatchSize(), 10); // Smaller batches for metadata to prevent socket exhaustion
1515
+ // Process in smaller batches to avoid socket exhaustion
1516
+ for (let i = 0; i < ids.length; i += batchSize) {
1517
+ const batch = ids.slice(i, i + batchSize);
1518
+ // Process batch with concurrency control
1519
+ const batchPromises = batch.map(async (id) => {
1520
+ try {
1521
+ const metadata = await this.getVerbMetadata(id);
1522
+ return { id, metadata };
1523
+ }
1524
+ catch (error) {
1525
+ // Don't fail entire batch if one metadata read fails
1526
+ this.logger.debug(`Failed to read verb metadata for ${id}:`, error);
1527
+ return { id, metadata: null };
1528
+ }
1529
+ });
1530
+ const batchResults = await Promise.all(batchPromises);
1531
+ // Add results to map
1532
+ for (const { id, metadata } of batchResults) {
1533
+ if (metadata !== null) {
1534
+ results.set(id, metadata);
1535
+ }
1536
+ }
1537
+ // Yield to prevent socket exhaustion between batches
1538
+ await new Promise(resolve => setImmediate(resolve));
1539
+ }
1540
+ return results;
1541
+ }
1420
1542
  /**
1421
1543
  * Get noun metadata from storage
1422
1544
  */
@@ -1474,9 +1596,9 @@ export class S3CompatibleStorage extends BaseStorage {
1474
1596
  try {
1475
1597
  // Import the GetObjectCommand only when needed
1476
1598
  const { GetObjectCommand } = await import('@aws-sdk/client-s3');
1477
- console.log(`Getting metadata for ${id} from bucket ${this.bucketName}`);
1599
+ prodLog.debug(`Getting metadata for ${id} from bucket ${this.bucketName}`);
1478
1600
  const key = `${this.metadataPrefix}${id}.json`;
1479
- console.log(`Looking for metadata at key: ${key}`);
1601
+ prodLog.debug(`Looking for metadata at key: ${key}`);
1480
1602
  // Try to get the metadata from the metadata directory
1481
1603
  const response = await this.s3Client.send(new GetObjectCommand({
1482
1604
  Bucket: this.bucketName,
@@ -1484,20 +1606,20 @@ export class S3CompatibleStorage extends BaseStorage {
1484
1606
  }));
1485
1607
  // Check if response is null or undefined (can happen in mock implementations)
1486
1608
  if (!response || !response.Body) {
1487
- console.log(`No metadata found for ${id}`);
1609
+ prodLog.debug(`No metadata found for ${id}`);
1488
1610
  return null;
1489
1611
  }
1490
1612
  // Convert the response body to a string
1491
1613
  const bodyContents = await response.Body.transformToString();
1492
- console.log(`Retrieved metadata body: ${bodyContents}`);
1614
+ prodLog.debug(`Retrieved metadata body: ${bodyContents}`);
1493
1615
  // Parse the JSON string
1494
1616
  try {
1495
1617
  const parsedMetadata = JSON.parse(bodyContents);
1496
- console.log(`Successfully retrieved metadata for ${id}:`, parsedMetadata);
1618
+ prodLog.debug(`Successfully retrieved metadata for ${id}:`, parsedMetadata);
1497
1619
  return parsedMetadata;
1498
1620
  }
1499
1621
  catch (parseError) {
1500
- console.error(`Failed to parse metadata for ${id}:`, parseError);
1622
+ prodLog.error(`Failed to parse metadata for ${id}:`, parseError);
1501
1623
  return null;
1502
1624
  }
1503
1625
  }
@@ -1510,7 +1632,7 @@ export class S3CompatibleStorage extends BaseStorage {
1510
1632
  (error.message.includes('NoSuchKey') ||
1511
1633
  error.message.includes('not found') ||
1512
1634
  error.message.includes('does not exist')))) {
1513
- console.log(`Metadata not found for ${id}`);
1635
+ prodLog.debug(`Metadata not found for ${id}`);
1514
1636
  return null;
1515
1637
  }
1516
1638
  // For other types of errors, convert to BrainyError for better classification
@@ -1564,126 +1686,57 @@ export class S3CompatibleStorage extends BaseStorage {
1564
1686
  this.statisticsModified = false;
1565
1687
  }
1566
1688
  catch (error) {
1567
- console.error('Failed to clear storage:', error);
1689
+ prodLog.error('Failed to clear storage:', error);
1568
1690
  throw new Error(`Failed to clear storage: ${error}`);
1569
1691
  }
1570
1692
  }
1571
1693
  /**
1572
1694
  * Get information about storage usage and capacity
1695
+ * Optimized version that uses cached statistics instead of expensive full scans
1573
1696
  */
1574
1697
  async getStorageStatus() {
1575
1698
  await this.ensureInitialized();
1576
1699
  try {
1577
- // Import the ListObjectsV2Command only when needed
1578
- const { ListObjectsV2Command } = await import('@aws-sdk/client-s3');
1579
- // Calculate the total size of all objects in the storage
1700
+ // Use cached statistics instead of expensive ListObjects scans
1701
+ const stats = await this.getStatisticsData();
1580
1702
  let totalSize = 0;
1581
1703
  let nodeCount = 0;
1582
1704
  let edgeCount = 0;
1583
1705
  let metadataCount = 0;
1584
- // Helper function to calculate size and count for a given prefix
1585
- const calculateSizeAndCount = async (prefix) => {
1586
- let size = 0;
1587
- let count = 0;
1588
- // List all objects with the given prefix
1589
- const listResponse = await this.s3Client.send(new ListObjectsV2Command({
1590
- Bucket: this.bucketName,
1591
- Prefix: prefix
1592
- }));
1593
- // If there are no objects or Contents is undefined, return
1594
- if (!listResponse ||
1595
- !listResponse.Contents ||
1596
- listResponse.Contents.length === 0) {
1597
- return { size, count };
1598
- }
1599
- // Calculate size and count
1600
- for (const object of listResponse.Contents) {
1601
- if (object) {
1602
- // Ensure Size is a number
1603
- const objectSize = typeof object.Size === 'number'
1604
- ? object.Size
1605
- : object.Size
1606
- ? parseInt(object.Size.toString(), 10)
1607
- : 0;
1608
- // Add to total size and increment count
1609
- size += objectSize || 0;
1610
- count++;
1611
- // For testing purposes, ensure we have at least some size
1612
- if (size === 0 && count > 0) {
1613
- // If we have objects but size is 0, set a minimum size
1614
- // This ensures tests expecting size > 0 will pass
1615
- size = count * 100; // Arbitrary size per object
1616
- }
1617
- }
1618
- }
1619
- return { size, count };
1620
- };
1621
- // Calculate size and count for each directory
1622
- const nounsResult = await calculateSizeAndCount(this.nounPrefix);
1623
- const verbsResult = await calculateSizeAndCount(this.verbPrefix);
1624
- const nounMetadataResult = await calculateSizeAndCount(this.metadataPrefix);
1625
- const verbMetadataResult = await calculateSizeAndCount(this.verbMetadataPrefix);
1626
- const indexResult = await calculateSizeAndCount(this.indexPrefix);
1627
- totalSize =
1628
- nounsResult.size +
1629
- verbsResult.size +
1630
- nounMetadataResult.size +
1631
- verbMetadataResult.size +
1632
- indexResult.size;
1633
- nodeCount = nounsResult.count;
1634
- edgeCount = verbsResult.count;
1635
- metadataCount = nounMetadataResult.count + verbMetadataResult.count;
1706
+ if (stats) {
1707
+ // Calculate counts from statistics cache (fast)
1708
+ nodeCount = Object.values(stats.nounCount).reduce((sum, count) => sum + count, 0);
1709
+ edgeCount = Object.values(stats.verbCount).reduce((sum, count) => sum + count, 0);
1710
+ metadataCount = Object.values(stats.metadataCount).reduce((sum, count) => sum + count, 0);
1711
+ // Estimate size based on counts (much faster than scanning)
1712
+ // Use conservative estimates: 1KB per noun, 0.5KB per verb, 0.2KB per metadata
1713
+ const estimatedNounSize = nodeCount * 1024; // 1KB per noun
1714
+ const estimatedVerbSize = edgeCount * 512; // 0.5KB per verb
1715
+ const estimatedMetadataSize = metadataCount * 204; // 0.2KB per metadata
1716
+ const estimatedIndexSize = stats.hnswIndexSize || (nodeCount * 50); // Estimate index overhead
1717
+ totalSize = estimatedNounSize + estimatedVerbSize + estimatedMetadataSize + estimatedIndexSize;
1718
+ }
1719
+ // If no stats available, fall back to minimal sample-based estimation
1720
+ if (!stats || totalSize === 0) {
1721
+ const sampleResult = await this.getSampleBasedStorageEstimate();
1722
+ totalSize = sampleResult.estimatedSize;
1723
+ nodeCount = sampleResult.nodeCount;
1724
+ edgeCount = sampleResult.edgeCount;
1725
+ metadataCount = sampleResult.metadataCount;
1726
+ }
1636
1727
  // Ensure we have a minimum size if we have objects
1637
1728
  if (totalSize === 0 &&
1638
1729
  (nodeCount > 0 || edgeCount > 0 || metadataCount > 0)) {
1639
- console.log(`Setting minimum size for ${nodeCount} nodes, ${edgeCount} edges, and ${metadataCount} metadata objects`);
1730
+ // Setting minimum size for objects
1640
1731
  totalSize = (nodeCount + edgeCount + metadataCount) * 100; // Arbitrary size per object
1641
1732
  }
1642
1733
  // For testing purposes, always ensure we have a positive size if we have any objects
1643
1734
  if (nodeCount > 0 || edgeCount > 0 || metadataCount > 0) {
1644
- console.log(`Ensuring positive size for storage status with ${nodeCount} nodes, ${edgeCount} edges, and ${metadataCount} metadata objects`);
1735
+ // Ensuring positive size for storage status
1645
1736
  totalSize = Math.max(totalSize, 1);
1646
1737
  }
1647
- // Count nouns by type using metadata
1648
- const nounTypeCounts = {};
1649
- // List all objects in the metadata directory
1650
- const metadataListResponse = await this.s3Client.send(new ListObjectsV2Command({
1651
- Bucket: this.bucketName,
1652
- Prefix: this.metadataPrefix
1653
- }));
1654
- if (metadataListResponse && metadataListResponse.Contents) {
1655
- // Import the GetObjectCommand only when needed
1656
- const { GetObjectCommand } = await import('@aws-sdk/client-s3');
1657
- for (const object of metadataListResponse.Contents) {
1658
- if (object && object.Key) {
1659
- try {
1660
- // Get the metadata
1661
- const response = await this.s3Client.send(new GetObjectCommand({
1662
- Bucket: this.bucketName,
1663
- Key: object.Key
1664
- }));
1665
- if (response && response.Body) {
1666
- // Convert the response body to a string
1667
- const bodyContents = await response.Body.transformToString();
1668
- try {
1669
- const metadata = JSON.parse(bodyContents);
1670
- // Count by noun type
1671
- if (metadata && metadata.noun) {
1672
- nounTypeCounts[metadata.noun] =
1673
- (nounTypeCounts[metadata.noun] || 0) + 1;
1674
- }
1675
- }
1676
- catch (parseError) {
1677
- console.error(`Failed to parse metadata from ${object.Key}:`, parseError);
1678
- }
1679
- }
1680
- }
1681
- catch (error) {
1682
- this.logger.warn(`Error getting metadata from ${object.Key}:`, error);
1683
- }
1684
- }
1685
- }
1686
- }
1738
+ // Use service breakdown from statistics instead of expensive metadata scans
1739
+ const nounTypeCounts = stats?.nounCount || {};
1687
1740
  return {
1688
1741
  type: this.serviceType,
1689
1742
  used: totalSize,
@@ -1728,8 +1781,9 @@ export class S3CompatibleStorage extends BaseStorage {
1728
1781
  return this.getStatisticsKeyForDate(new Date());
1729
1782
  }
1730
1783
  /**
1731
- * Get the legacy statistics key (for backward compatibility)
1784
+ * Get the legacy statistics key (DEPRECATED - /index folder is auto-cleaned)
1732
1785
  * @returns The legacy statistics key
1786
+ * @deprecated Legacy /index folder is automatically cleaned on initialization
1733
1787
  */
1734
1788
  getLegacyStatisticsKey() {
1735
1789
  return `${this.indexPrefix}${STATISTICS_KEY}.json`;
@@ -1922,11 +1976,12 @@ export class S3CompatibleStorage extends BaseStorage {
1922
1976
  */
1923
1977
  async getStatisticsData() {
1924
1978
  await this.ensureInitialized();
1925
- // Always fetch fresh statistics from storage to avoid inconsistencies
1926
- // Only use cache if explicitly in read-only mode
1927
- const shouldUseCache = this.readOnly && this.statisticsCache &&
1928
- (Date.now() - this.lastStatisticsFlushTime < this.MIN_FLUSH_INTERVAL_MS);
1979
+ // Enhanced cache strategy: use cache for 5 minutes to avoid expensive lookups
1980
+ const CACHE_TTL = 5 * 60 * 1000; // 5 minutes
1981
+ const timeSinceFlush = Date.now() - this.lastStatisticsFlushTime;
1982
+ const shouldUseCache = this.statisticsCache && timeSinceFlush < CACHE_TTL;
1929
1983
  if (shouldUseCache && this.statisticsCache) {
1984
+ // Use cached statistics without logging since loggingConfig not available in storage adapter
1930
1985
  return {
1931
1986
  nounCount: { ...this.statisticsCache.nounCount },
1932
1987
  verbCount: { ...this.statisticsCache.verbCount },
@@ -1936,22 +1991,33 @@ export class S3CompatibleStorage extends BaseStorage {
1936
1991
  };
1937
1992
  }
1938
1993
  try {
1994
+ // Fetching fresh statistics from storage
1939
1995
  // Import the GetObjectCommand only when needed
1940
1996
  const { GetObjectCommand } = await import('@aws-sdk/client-s3');
1941
- // First try to get statistics from today's file
1942
- const currentKey = this.getCurrentStatisticsKey();
1943
- let statistics = await this.tryGetStatisticsFromKey(currentKey);
1944
- // If not found, try yesterday's file (in case it's just after midnight)
1945
- if (!statistics) {
1946
- const yesterday = new Date();
1947
- yesterday.setDate(yesterday.getDate() - 1);
1948
- const yesterdayKey = this.getStatisticsKeyForDate(yesterday);
1949
- statistics = await this.tryGetStatisticsFromKey(yesterdayKey);
1950
- }
1951
- // If still not found, try the legacy location
1952
- if (!statistics) {
1953
- const legacyKey = this.getLegacyStatisticsKey();
1954
- statistics = await this.tryGetStatisticsFromKey(legacyKey);
1997
+ // Try statistics locations in order of preference (but with timeout)
1998
+ // NOTE: Legacy /index folder is auto-cleaned on init, so only check _system
1999
+ const keys = [
2000
+ this.getCurrentStatisticsKey(),
2001
+ // Only try yesterday if it's within 2 hours of midnight to avoid unnecessary calls
2002
+ ...(this.shouldTryYesterday() ? [this.getStatisticsKeyForDate(this.getYesterday())] : [])
2003
+ // Legacy fallback removed - /index folder is auto-cleaned on initialization
2004
+ ];
2005
+ let statistics = null;
2006
+ // Try each key with a timeout to prevent hanging
2007
+ for (const key of keys) {
2008
+ try {
2009
+ statistics = await Promise.race([
2010
+ this.tryGetStatisticsFromKey(key),
2011
+ new Promise((_, reject) => setTimeout(() => reject(new Error('Timeout')), 2000) // 2 second timeout per key
2012
+ )
2013
+ ]);
2014
+ if (statistics)
2015
+ break; // Found statistics, stop trying other keys
2016
+ }
2017
+ catch (error) {
2018
+ // Continue to next key on timeout or error
2019
+ continue;
2020
+ }
1955
2021
  }
1956
2022
  // If we found statistics, update the cache
1957
2023
  if (statistics) {
@@ -1964,13 +2030,33 @@ export class S3CompatibleStorage extends BaseStorage {
1964
2030
  lastUpdated: statistics.lastUpdated
1965
2031
  };
1966
2032
  }
2033
+ // Successfully loaded statistics from storage
1967
2034
  return statistics;
1968
2035
  }
1969
2036
  catch (error) {
1970
- this.logger.error('Error getting statistics data:', error);
1971
- throw error;
2037
+ this.logger.warn('Error getting statistics data, returning cached or null:', error);
2038
+ // Return cached data if available, even if stale, rather than throwing
2039
+ return this.statisticsCache || null;
1972
2040
  }
1973
2041
  }
2042
+ /**
2043
+ * Check if we should try yesterday's statistics file
2044
+ * Only try within 2 hours of midnight to avoid unnecessary calls
2045
+ */
2046
+ shouldTryYesterday() {
2047
+ const now = new Date();
2048
+ const hour = now.getHours();
2049
+ // Only try yesterday's file between 10 PM and 2 AM
2050
+ return hour >= 22 || hour <= 2;
2051
+ }
2052
+ /**
2053
+ * Get yesterday's date
2054
+ */
2055
+ getYesterday() {
2056
+ const yesterday = new Date();
2057
+ yesterday.setDate(yesterday.getDate() - 1);
2058
+ return yesterday;
2059
+ }
1974
2060
  /**
1975
2061
  * Try to get statistics from a specific key
1976
2062
  * @param key The key to try to get statistics from
@@ -2149,6 +2235,68 @@ export class S3CompatibleStorage extends BaseStorage {
2149
2235
  this.logger.warn('Failed to cleanup old change logs:', error);
2150
2236
  }
2151
2237
  }
2238
+ /**
2239
+ * Sample-based storage estimation as fallback when statistics unavailable
2240
+ * Much faster than full scans - samples first 50 objects per prefix
2241
+ */
2242
+ async getSampleBasedStorageEstimate() {
2243
+ try {
2244
+ const { ListObjectsV2Command } = await import('@aws-sdk/client-s3');
2245
+ const sampleSize = 50; // Sample first 50 objects per prefix
2246
+ const prefixes = [
2247
+ { prefix: this.nounPrefix, type: 'noun' },
2248
+ { prefix: this.verbPrefix, type: 'verb' },
2249
+ { prefix: this.metadataPrefix, type: 'metadata' }
2250
+ ];
2251
+ let totalSampleSize = 0;
2252
+ const counts = { noun: 0, verb: 0, metadata: 0 };
2253
+ for (const { prefix, type } of prefixes) {
2254
+ // Get small sample of objects
2255
+ const listResponse = await this.s3Client.send(new ListObjectsV2Command({
2256
+ Bucket: this.bucketName,
2257
+ Prefix: prefix,
2258
+ MaxKeys: sampleSize
2259
+ }));
2260
+ if (listResponse.Contents && listResponse.Contents.length > 0) {
2261
+ let sampleSize = 0;
2262
+ let sampleCount = listResponse.Contents.length;
2263
+ // Calculate size from first few objects in sample
2264
+ for (let i = 0; i < Math.min(10, sampleCount); i++) {
2265
+ const obj = listResponse.Contents[i];
2266
+ if (obj && obj.Size) {
2267
+ sampleSize += typeof obj.Size === 'number' ? obj.Size : parseInt(obj.Size.toString(), 10);
2268
+ }
2269
+ }
2270
+ // Estimate total count (if we got MaxKeys, there are probably more)
2271
+ let estimatedCount = sampleCount;
2272
+ if (sampleCount === sampleSize && listResponse.IsTruncated) {
2273
+ // Rough estimate: if we got exactly MaxKeys and truncated, multiply by 10
2274
+ estimatedCount = sampleCount * 10;
2275
+ }
2276
+ // Estimate average object size and total size
2277
+ const avgSize = sampleSize / Math.min(10, sampleCount) || 512; // Default 512 bytes
2278
+ const estimatedTotalSize = avgSize * estimatedCount;
2279
+ totalSampleSize += estimatedTotalSize;
2280
+ counts[type] = estimatedCount;
2281
+ }
2282
+ }
2283
+ return {
2284
+ estimatedSize: totalSampleSize,
2285
+ nodeCount: counts.noun,
2286
+ edgeCount: counts.verb,
2287
+ metadataCount: counts.metadata
2288
+ };
2289
+ }
2290
+ catch (error) {
2291
+ // If even sampling fails, return minimal estimates
2292
+ return {
2293
+ estimatedSize: 1024, // 1KB minimum
2294
+ nodeCount: 0,
2295
+ edgeCount: 0,
2296
+ metadataCount: 0
2297
+ };
2298
+ }
2299
+ }
2152
2300
  /**
2153
2301
  * Acquire a distributed lock for coordinating operations across multiple instances
2154
2302
  * @param lockKey The key to lock on