@soulcraft/brainy 0.54.4 → 0.54.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -378,6 +378,12 @@ export interface StorageAdapter {
378
378
  deleteVerb(id: string): Promise<void>;
379
379
  saveMetadata(id: string, metadata: any): Promise<void>;
380
380
  getMetadata(id: string): Promise<any | null>;
381
+ /**
382
+ * Get multiple metadata objects in batches (prevents socket exhaustion)
383
+ * @param ids Array of IDs to get metadata for
384
+ * @returns Promise that resolves to a Map of id -> metadata
385
+ */
386
+ getMetadataBatch?(ids: string[]): Promise<Map<string, any>>;
381
387
  /**
382
388
  * Save verb metadata to storage
383
389
  * @param id The ID of the verb
@@ -106,6 +106,11 @@ export declare class S3CompatibleStorage extends BaseStorage {
106
106
  * Initialize the storage adapter
107
107
  */
108
108
  init(): Promise<void>;
109
+ /**
110
+ * Auto-cleanup legacy /index folder during initialization
111
+ * This removes old index data that has been migrated to _system
112
+ */
113
+ private cleanupLegacyIndexFolder;
109
114
  /**
110
115
  * Initialize write buffers for high-volume scenarios
111
116
  */
@@ -332,6 +337,15 @@ export declare class S3CompatibleStorage extends BaseStorage {
332
337
  * Save noun metadata to storage
333
338
  */
334
339
  saveNounMetadata(id: string, metadata: any): Promise<void>;
340
+ /**
341
+ * Get multiple metadata objects in batches (CRITICAL: Prevents socket exhaustion)
342
+ * This is the solution to the metadata reading socket exhaustion during initialization
343
+ */
344
+ getMetadataBatch(ids: string[]): Promise<Map<string, any>>;
345
+ /**
346
+ * Get multiple verb metadata objects in batches (prevents socket exhaustion)
347
+ */
348
+ getVerbMetadataBatch(ids: string[]): Promise<Map<string, any>>;
335
349
  /**
336
350
  * Get noun metadata from storage
337
351
  */
@@ -371,8 +385,9 @@ export declare class S3CompatibleStorage extends BaseStorage {
371
385
  */
372
386
  private getCurrentStatisticsKey;
373
387
  /**
374
- * Get the legacy statistics key (for backward compatibility)
388
+ * Get the legacy statistics key (DEPRECATED - /index folder is auto-cleaned)
375
389
  * @returns The legacy statistics key
390
+ * @deprecated Legacy /index folder is automatically cleaned on initialization
376
391
  */
377
392
  private getLegacyStatisticsKey;
378
393
  /**
@@ -8,7 +8,7 @@ import { StorageCompatibilityLayer } from '../backwardCompatibility.js';
8
8
  import { StorageOperationExecutors } from '../../utils/operationUtils.js';
9
9
  import { BrainyError } from '../../errors/brainyError.js';
10
10
  import { CacheManager } from '../cacheManager.js';
11
- import { createModuleLogger } from '../../utils/logger.js';
11
+ import { createModuleLogger, prodLog } from '../../utils/logger.js';
12
12
  import { getGlobalSocketManager } from '../../utils/adaptiveSocketManager.js';
13
13
  import { getGlobalBackpressure } from '../../utils/adaptiveBackpressure.js';
14
14
  import { getWriteBuffer } from '../../utils/writeBuffer.js';
@@ -230,6 +230,8 @@ export class S3CompatibleStorage extends BaseStorage {
230
230
  this.initializeBuffers();
231
231
  // Initialize request coalescer
232
232
  this.initializeCoalescer();
233
+ // Auto-cleanup legacy /index folder on initialization
234
+ await this.cleanupLegacyIndexFolder();
233
235
  this.isInitialized = true;
234
236
  this.logger.info(`Initialized ${this.serviceType} storage with bucket ${this.bucketName}`);
235
237
  }
@@ -238,6 +240,57 @@ export class S3CompatibleStorage extends BaseStorage {
238
240
  throw new Error(`Failed to initialize ${this.serviceType} storage: ${error}`);
239
241
  }
240
242
  }
243
+ /**
244
+ * Auto-cleanup legacy /index folder during initialization
245
+ * This removes old index data that has been migrated to _system
246
+ */
247
+ async cleanupLegacyIndexFolder() {
248
+ try {
249
+ // Check if there are any objects in the legacy index folder
250
+ const { ListObjectsV2Command } = await import('@aws-sdk/client-s3');
251
+ const listResponse = await this.s3Client.send(new ListObjectsV2Command({
252
+ Bucket: this.bucketName,
253
+ Prefix: this.indexPrefix,
254
+ MaxKeys: 1 // Just check if anything exists
255
+ }));
256
+ // If there are objects in the legacy index folder, clean them up
257
+ if (listResponse.Contents && listResponse.Contents.length > 0) {
258
+ prodLog.info(`🧹 Cleaning up legacy /index folder during initialization...`);
259
+ // Use the existing deleteObjectsWithPrefix function logic
260
+ const { ListObjectsV2Command, DeleteObjectsCommand } = await import('@aws-sdk/client-s3');
261
+ let continuationToken = undefined;
262
+ let totalDeleted = 0;
263
+ do {
264
+ const listResponseBatch = await this.s3Client.send(new ListObjectsV2Command({
265
+ Bucket: this.bucketName,
266
+ Prefix: this.indexPrefix,
267
+ ContinuationToken: continuationToken
268
+ }));
269
+ if (listResponseBatch.Contents && listResponseBatch.Contents.length > 0) {
270
+ const objectsToDelete = listResponseBatch.Contents.map((obj) => ({
271
+ Key: obj.Key
272
+ }));
273
+ await this.s3Client.send(new DeleteObjectsCommand({
274
+ Bucket: this.bucketName,
275
+ Delete: {
276
+ Objects: objectsToDelete
277
+ }
278
+ }));
279
+ totalDeleted += objectsToDelete.length;
280
+ }
281
+ continuationToken = listResponseBatch.NextContinuationToken;
282
+ } while (continuationToken);
283
+ prodLog.info(`✅ Cleaned up ${totalDeleted} legacy index objects`);
284
+ }
285
+ else {
286
+ prodLog.debug('No legacy /index folder found - already clean');
287
+ }
288
+ }
289
+ catch (error) {
290
+ // Don't fail initialization if cleanup fails
291
+ prodLog.warn('Failed to cleanup legacy /index folder:', error);
292
+ }
293
+ }
241
294
  /**
242
295
  * Initialize write buffers for high-volume scenarios
243
296
  */
@@ -1417,6 +1470,75 @@ export class S3CompatibleStorage extends BaseStorage {
1417
1470
  throw new Error(`Failed to save noun metadata for ${id}: ${error}`);
1418
1471
  }
1419
1472
  }
1473
+ /**
1474
+ * Get multiple metadata objects in batches (CRITICAL: Prevents socket exhaustion)
1475
+ * This is the solution to the metadata reading socket exhaustion during initialization
1476
+ */
1477
+ async getMetadataBatch(ids) {
1478
+ await this.ensureInitialized();
1479
+ const results = new Map();
1480
+ const batchSize = Math.min(this.getBatchSize(), 10); // Smaller batches for metadata to prevent socket exhaustion
1481
+ // Process in smaller batches to avoid socket exhaustion
1482
+ for (let i = 0; i < ids.length; i += batchSize) {
1483
+ const batch = ids.slice(i, i + batchSize);
1484
+ // Process batch with concurrency control
1485
+ const batchPromises = batch.map(async (id) => {
1486
+ try {
1487
+ const metadata = await this.getMetadata(id);
1488
+ return { id, metadata };
1489
+ }
1490
+ catch (error) {
1491
+ // Don't fail entire batch if one metadata read fails
1492
+ this.logger.debug(`Failed to read metadata for ${id}:`, error);
1493
+ return { id, metadata: null };
1494
+ }
1495
+ });
1496
+ const batchResults = await Promise.all(batchPromises);
1497
+ // Add results to map
1498
+ for (const { id, metadata } of batchResults) {
1499
+ if (metadata !== null) {
1500
+ results.set(id, metadata);
1501
+ }
1502
+ }
1503
+ // Yield to prevent socket exhaustion between batches
1504
+ await new Promise(resolve => setImmediate(resolve));
1505
+ }
1506
+ return results;
1507
+ }
1508
+ /**
1509
+ * Get multiple verb metadata objects in batches (prevents socket exhaustion)
1510
+ */
1511
+ async getVerbMetadataBatch(ids) {
1512
+ await this.ensureInitialized();
1513
+ const results = new Map();
1514
+ const batchSize = Math.min(this.getBatchSize(), 10); // Smaller batches for metadata to prevent socket exhaustion
1515
+ // Process in smaller batches to avoid socket exhaustion
1516
+ for (let i = 0; i < ids.length; i += batchSize) {
1517
+ const batch = ids.slice(i, i + batchSize);
1518
+ // Process batch with concurrency control
1519
+ const batchPromises = batch.map(async (id) => {
1520
+ try {
1521
+ const metadata = await this.getVerbMetadata(id);
1522
+ return { id, metadata };
1523
+ }
1524
+ catch (error) {
1525
+ // Don't fail entire batch if one metadata read fails
1526
+ this.logger.debug(`Failed to read verb metadata for ${id}:`, error);
1527
+ return { id, metadata: null };
1528
+ }
1529
+ });
1530
+ const batchResults = await Promise.all(batchPromises);
1531
+ // Add results to map
1532
+ for (const { id, metadata } of batchResults) {
1533
+ if (metadata !== null) {
1534
+ results.set(id, metadata);
1535
+ }
1536
+ }
1537
+ // Yield to prevent socket exhaustion between batches
1538
+ await new Promise(resolve => setImmediate(resolve));
1539
+ }
1540
+ return results;
1541
+ }
1420
1542
  /**
1421
1543
  * Get noun metadata from storage
1422
1544
  */
@@ -1474,9 +1596,9 @@ export class S3CompatibleStorage extends BaseStorage {
1474
1596
  try {
1475
1597
  // Import the GetObjectCommand only when needed
1476
1598
  const { GetObjectCommand } = await import('@aws-sdk/client-s3');
1477
- console.log(`Getting metadata for ${id} from bucket ${this.bucketName}`);
1599
+ prodLog.debug(`Getting metadata for ${id} from bucket ${this.bucketName}`);
1478
1600
  const key = `${this.metadataPrefix}${id}.json`;
1479
- console.log(`Looking for metadata at key: ${key}`);
1601
+ prodLog.debug(`Looking for metadata at key: ${key}`);
1480
1602
  // Try to get the metadata from the metadata directory
1481
1603
  const response = await this.s3Client.send(new GetObjectCommand({
1482
1604
  Bucket: this.bucketName,
@@ -1484,20 +1606,20 @@ export class S3CompatibleStorage extends BaseStorage {
1484
1606
  }));
1485
1607
  // Check if response is null or undefined (can happen in mock implementations)
1486
1608
  if (!response || !response.Body) {
1487
- console.log(`No metadata found for ${id}`);
1609
+ prodLog.debug(`No metadata found for ${id}`);
1488
1610
  return null;
1489
1611
  }
1490
1612
  // Convert the response body to a string
1491
1613
  const bodyContents = await response.Body.transformToString();
1492
- console.log(`Retrieved metadata body: ${bodyContents}`);
1614
+ prodLog.debug(`Retrieved metadata body: ${bodyContents}`);
1493
1615
  // Parse the JSON string
1494
1616
  try {
1495
1617
  const parsedMetadata = JSON.parse(bodyContents);
1496
- console.log(`Successfully retrieved metadata for ${id}:`, parsedMetadata);
1618
+ prodLog.debug(`Successfully retrieved metadata for ${id}:`, parsedMetadata);
1497
1619
  return parsedMetadata;
1498
1620
  }
1499
1621
  catch (parseError) {
1500
- console.error(`Failed to parse metadata for ${id}:`, parseError);
1622
+ prodLog.error(`Failed to parse metadata for ${id}:`, parseError);
1501
1623
  return null;
1502
1624
  }
1503
1625
  }
@@ -1510,7 +1632,7 @@ export class S3CompatibleStorage extends BaseStorage {
1510
1632
  (error.message.includes('NoSuchKey') ||
1511
1633
  error.message.includes('not found') ||
1512
1634
  error.message.includes('does not exist')))) {
1513
- console.log(`Metadata not found for ${id}`);
1635
+ prodLog.debug(`Metadata not found for ${id}`);
1514
1636
  return null;
1515
1637
  }
1516
1638
  // For other types of errors, convert to BrainyError for better classification
@@ -1564,7 +1686,7 @@ export class S3CompatibleStorage extends BaseStorage {
1564
1686
  this.statisticsModified = false;
1565
1687
  }
1566
1688
  catch (error) {
1567
- console.error('Failed to clear storage:', error);
1689
+ prodLog.error('Failed to clear storage:', error);
1568
1690
  throw new Error(`Failed to clear storage: ${error}`);
1569
1691
  }
1570
1692
  }
@@ -1659,8 +1781,9 @@ export class S3CompatibleStorage extends BaseStorage {
1659
1781
  return this.getStatisticsKeyForDate(new Date());
1660
1782
  }
1661
1783
  /**
1662
- * Get the legacy statistics key (for backward compatibility)
1784
+ * Get the legacy statistics key (DEPRECATED - /index folder is auto-cleaned)
1663
1785
  * @returns The legacy statistics key
1786
+ * @deprecated Legacy /index folder is automatically cleaned on initialization
1664
1787
  */
1665
1788
  getLegacyStatisticsKey() {
1666
1789
  return `${this.indexPrefix}${STATISTICS_KEY}.json`;
@@ -1872,11 +1995,12 @@ export class S3CompatibleStorage extends BaseStorage {
1872
1995
  // Import the GetObjectCommand only when needed
1873
1996
  const { GetObjectCommand } = await import('@aws-sdk/client-s3');
1874
1997
  // Try statistics locations in order of preference (but with timeout)
1998
+ // NOTE: Legacy /index folder is auto-cleaned on init, so only check _system
1875
1999
  const keys = [
1876
2000
  this.getCurrentStatisticsKey(),
1877
2001
  // Only try yesterday if it's within 2 hours of midnight to avoid unnecessary calls
1878
- ...(this.shouldTryYesterday() ? [this.getStatisticsKeyForDate(this.getYesterday())] : []),
1879
- this.getLegacyStatisticsKey()
2002
+ ...(this.shouldTryYesterday() ? [this.getStatisticsKeyForDate(this.getYesterday())] : [])
2003
+ // Legacy fallback removed - /index folder is auto-cleaned on initialization
1880
2004
  ];
1881
2005
  let statistics = null;
1882
2006
  // Try each key with a timeout to prevent hanging