@soulcraft/brainy 0.54.4 → 0.54.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/brainyData.js +13 -12
- package/dist/brainyData.js.map +1 -1
- package/dist/coreTypes.d.ts +6 -0
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +16 -1
- package/dist/storage/adapters/s3CompatibleStorage.js +136 -12
- package/dist/storage/adapters/s3CompatibleStorage.js.map +1 -1
- package/dist/utils/environment.d.ts +12 -0
- package/dist/utils/environment.js +90 -0
- package/dist/utils/environment.js.map +1 -1
- package/dist/utils/logger.d.ts +27 -0
- package/dist/utils/logger.js +91 -3
- package/dist/utils/logger.js.map +1 -1
- package/dist/utils/metadataIndex.js +86 -23
- package/dist/utils/metadataIndex.js.map +1 -1
- package/dist/utils/workerUtils.js.map +1 -1
- package/package.json +1 -1
package/dist/coreTypes.d.ts
CHANGED
|
@@ -378,6 +378,12 @@ export interface StorageAdapter {
|
|
|
378
378
|
deleteVerb(id: string): Promise<void>;
|
|
379
379
|
saveMetadata(id: string, metadata: any): Promise<void>;
|
|
380
380
|
getMetadata(id: string): Promise<any | null>;
|
|
381
|
+
/**
|
|
382
|
+
* Get multiple metadata objects in batches (prevents socket exhaustion)
|
|
383
|
+
* @param ids Array of IDs to get metadata for
|
|
384
|
+
* @returns Promise that resolves to a Map of id -> metadata
|
|
385
|
+
*/
|
|
386
|
+
getMetadataBatch?(ids: string[]): Promise<Map<string, any>>;
|
|
381
387
|
/**
|
|
382
388
|
* Save verb metadata to storage
|
|
383
389
|
* @param id The ID of the verb
|
|
@@ -106,6 +106,11 @@ export declare class S3CompatibleStorage extends BaseStorage {
|
|
|
106
106
|
* Initialize the storage adapter
|
|
107
107
|
*/
|
|
108
108
|
init(): Promise<void>;
|
|
109
|
+
/**
|
|
110
|
+
* Auto-cleanup legacy /index folder during initialization
|
|
111
|
+
* This removes old index data that has been migrated to _system
|
|
112
|
+
*/
|
|
113
|
+
private cleanupLegacyIndexFolder;
|
|
109
114
|
/**
|
|
110
115
|
* Initialize write buffers for high-volume scenarios
|
|
111
116
|
*/
|
|
@@ -332,6 +337,15 @@ export declare class S3CompatibleStorage extends BaseStorage {
|
|
|
332
337
|
* Save noun metadata to storage
|
|
333
338
|
*/
|
|
334
339
|
saveNounMetadata(id: string, metadata: any): Promise<void>;
|
|
340
|
+
/**
|
|
341
|
+
* Get multiple metadata objects in batches (CRITICAL: Prevents socket exhaustion)
|
|
342
|
+
* This is the solution to the metadata reading socket exhaustion during initialization
|
|
343
|
+
*/
|
|
344
|
+
getMetadataBatch(ids: string[]): Promise<Map<string, any>>;
|
|
345
|
+
/**
|
|
346
|
+
* Get multiple verb metadata objects in batches (prevents socket exhaustion)
|
|
347
|
+
*/
|
|
348
|
+
getVerbMetadataBatch(ids: string[]): Promise<Map<string, any>>;
|
|
335
349
|
/**
|
|
336
350
|
* Get noun metadata from storage
|
|
337
351
|
*/
|
|
@@ -371,8 +385,9 @@ export declare class S3CompatibleStorage extends BaseStorage {
|
|
|
371
385
|
*/
|
|
372
386
|
private getCurrentStatisticsKey;
|
|
373
387
|
/**
|
|
374
|
-
* Get the legacy statistics key (
|
|
388
|
+
* Get the legacy statistics key (DEPRECATED - /index folder is auto-cleaned)
|
|
375
389
|
* @returns The legacy statistics key
|
|
390
|
+
* @deprecated Legacy /index folder is automatically cleaned on initialization
|
|
376
391
|
*/
|
|
377
392
|
private getLegacyStatisticsKey;
|
|
378
393
|
/**
|
|
@@ -8,7 +8,7 @@ import { StorageCompatibilityLayer } from '../backwardCompatibility.js';
|
|
|
8
8
|
import { StorageOperationExecutors } from '../../utils/operationUtils.js';
|
|
9
9
|
import { BrainyError } from '../../errors/brainyError.js';
|
|
10
10
|
import { CacheManager } from '../cacheManager.js';
|
|
11
|
-
import { createModuleLogger } from '../../utils/logger.js';
|
|
11
|
+
import { createModuleLogger, prodLog } from '../../utils/logger.js';
|
|
12
12
|
import { getGlobalSocketManager } from '../../utils/adaptiveSocketManager.js';
|
|
13
13
|
import { getGlobalBackpressure } from '../../utils/adaptiveBackpressure.js';
|
|
14
14
|
import { getWriteBuffer } from '../../utils/writeBuffer.js';
|
|
@@ -230,6 +230,8 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
230
230
|
this.initializeBuffers();
|
|
231
231
|
// Initialize request coalescer
|
|
232
232
|
this.initializeCoalescer();
|
|
233
|
+
// Auto-cleanup legacy /index folder on initialization
|
|
234
|
+
await this.cleanupLegacyIndexFolder();
|
|
233
235
|
this.isInitialized = true;
|
|
234
236
|
this.logger.info(`Initialized ${this.serviceType} storage with bucket ${this.bucketName}`);
|
|
235
237
|
}
|
|
@@ -238,6 +240,57 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
238
240
|
throw new Error(`Failed to initialize ${this.serviceType} storage: ${error}`);
|
|
239
241
|
}
|
|
240
242
|
}
|
|
243
|
+
/**
|
|
244
|
+
* Auto-cleanup legacy /index folder during initialization
|
|
245
|
+
* This removes old index data that has been migrated to _system
|
|
246
|
+
*/
|
|
247
|
+
async cleanupLegacyIndexFolder() {
|
|
248
|
+
try {
|
|
249
|
+
// Check if there are any objects in the legacy index folder
|
|
250
|
+
const { ListObjectsV2Command } = await import('@aws-sdk/client-s3');
|
|
251
|
+
const listResponse = await this.s3Client.send(new ListObjectsV2Command({
|
|
252
|
+
Bucket: this.bucketName,
|
|
253
|
+
Prefix: this.indexPrefix,
|
|
254
|
+
MaxKeys: 1 // Just check if anything exists
|
|
255
|
+
}));
|
|
256
|
+
// If there are objects in the legacy index folder, clean them up
|
|
257
|
+
if (listResponse.Contents && listResponse.Contents.length > 0) {
|
|
258
|
+
prodLog.info(`🧹 Cleaning up legacy /index folder during initialization...`);
|
|
259
|
+
// Use the existing deleteObjectsWithPrefix function logic
|
|
260
|
+
const { ListObjectsV2Command, DeleteObjectsCommand } = await import('@aws-sdk/client-s3');
|
|
261
|
+
let continuationToken = undefined;
|
|
262
|
+
let totalDeleted = 0;
|
|
263
|
+
do {
|
|
264
|
+
const listResponseBatch = await this.s3Client.send(new ListObjectsV2Command({
|
|
265
|
+
Bucket: this.bucketName,
|
|
266
|
+
Prefix: this.indexPrefix,
|
|
267
|
+
ContinuationToken: continuationToken
|
|
268
|
+
}));
|
|
269
|
+
if (listResponseBatch.Contents && listResponseBatch.Contents.length > 0) {
|
|
270
|
+
const objectsToDelete = listResponseBatch.Contents.map((obj) => ({
|
|
271
|
+
Key: obj.Key
|
|
272
|
+
}));
|
|
273
|
+
await this.s3Client.send(new DeleteObjectsCommand({
|
|
274
|
+
Bucket: this.bucketName,
|
|
275
|
+
Delete: {
|
|
276
|
+
Objects: objectsToDelete
|
|
277
|
+
}
|
|
278
|
+
}));
|
|
279
|
+
totalDeleted += objectsToDelete.length;
|
|
280
|
+
}
|
|
281
|
+
continuationToken = listResponseBatch.NextContinuationToken;
|
|
282
|
+
} while (continuationToken);
|
|
283
|
+
prodLog.info(`✅ Cleaned up ${totalDeleted} legacy index objects`);
|
|
284
|
+
}
|
|
285
|
+
else {
|
|
286
|
+
prodLog.debug('No legacy /index folder found - already clean');
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
catch (error) {
|
|
290
|
+
// Don't fail initialization if cleanup fails
|
|
291
|
+
prodLog.warn('Failed to cleanup legacy /index folder:', error);
|
|
292
|
+
}
|
|
293
|
+
}
|
|
241
294
|
/**
|
|
242
295
|
* Initialize write buffers for high-volume scenarios
|
|
243
296
|
*/
|
|
@@ -1417,6 +1470,75 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1417
1470
|
throw new Error(`Failed to save noun metadata for ${id}: ${error}`);
|
|
1418
1471
|
}
|
|
1419
1472
|
}
|
|
1473
|
+
/**
|
|
1474
|
+
* Get multiple metadata objects in batches (CRITICAL: Prevents socket exhaustion)
|
|
1475
|
+
* This is the solution to the metadata reading socket exhaustion during initialization
|
|
1476
|
+
*/
|
|
1477
|
+
async getMetadataBatch(ids) {
|
|
1478
|
+
await this.ensureInitialized();
|
|
1479
|
+
const results = new Map();
|
|
1480
|
+
const batchSize = Math.min(this.getBatchSize(), 10); // Smaller batches for metadata to prevent socket exhaustion
|
|
1481
|
+
// Process in smaller batches to avoid socket exhaustion
|
|
1482
|
+
for (let i = 0; i < ids.length; i += batchSize) {
|
|
1483
|
+
const batch = ids.slice(i, i + batchSize);
|
|
1484
|
+
// Process batch with concurrency control
|
|
1485
|
+
const batchPromises = batch.map(async (id) => {
|
|
1486
|
+
try {
|
|
1487
|
+
const metadata = await this.getMetadata(id);
|
|
1488
|
+
return { id, metadata };
|
|
1489
|
+
}
|
|
1490
|
+
catch (error) {
|
|
1491
|
+
// Don't fail entire batch if one metadata read fails
|
|
1492
|
+
this.logger.debug(`Failed to read metadata for ${id}:`, error);
|
|
1493
|
+
return { id, metadata: null };
|
|
1494
|
+
}
|
|
1495
|
+
});
|
|
1496
|
+
const batchResults = await Promise.all(batchPromises);
|
|
1497
|
+
// Add results to map
|
|
1498
|
+
for (const { id, metadata } of batchResults) {
|
|
1499
|
+
if (metadata !== null) {
|
|
1500
|
+
results.set(id, metadata);
|
|
1501
|
+
}
|
|
1502
|
+
}
|
|
1503
|
+
// Yield to prevent socket exhaustion between batches
|
|
1504
|
+
await new Promise(resolve => setImmediate(resolve));
|
|
1505
|
+
}
|
|
1506
|
+
return results;
|
|
1507
|
+
}
|
|
1508
|
+
/**
|
|
1509
|
+
* Get multiple verb metadata objects in batches (prevents socket exhaustion)
|
|
1510
|
+
*/
|
|
1511
|
+
async getVerbMetadataBatch(ids) {
|
|
1512
|
+
await this.ensureInitialized();
|
|
1513
|
+
const results = new Map();
|
|
1514
|
+
const batchSize = Math.min(this.getBatchSize(), 10); // Smaller batches for metadata to prevent socket exhaustion
|
|
1515
|
+
// Process in smaller batches to avoid socket exhaustion
|
|
1516
|
+
for (let i = 0; i < ids.length; i += batchSize) {
|
|
1517
|
+
const batch = ids.slice(i, i + batchSize);
|
|
1518
|
+
// Process batch with concurrency control
|
|
1519
|
+
const batchPromises = batch.map(async (id) => {
|
|
1520
|
+
try {
|
|
1521
|
+
const metadata = await this.getVerbMetadata(id);
|
|
1522
|
+
return { id, metadata };
|
|
1523
|
+
}
|
|
1524
|
+
catch (error) {
|
|
1525
|
+
// Don't fail entire batch if one metadata read fails
|
|
1526
|
+
this.logger.debug(`Failed to read verb metadata for ${id}:`, error);
|
|
1527
|
+
return { id, metadata: null };
|
|
1528
|
+
}
|
|
1529
|
+
});
|
|
1530
|
+
const batchResults = await Promise.all(batchPromises);
|
|
1531
|
+
// Add results to map
|
|
1532
|
+
for (const { id, metadata } of batchResults) {
|
|
1533
|
+
if (metadata !== null) {
|
|
1534
|
+
results.set(id, metadata);
|
|
1535
|
+
}
|
|
1536
|
+
}
|
|
1537
|
+
// Yield to prevent socket exhaustion between batches
|
|
1538
|
+
await new Promise(resolve => setImmediate(resolve));
|
|
1539
|
+
}
|
|
1540
|
+
return results;
|
|
1541
|
+
}
|
|
1420
1542
|
/**
|
|
1421
1543
|
* Get noun metadata from storage
|
|
1422
1544
|
*/
|
|
@@ -1474,9 +1596,9 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1474
1596
|
try {
|
|
1475
1597
|
// Import the GetObjectCommand only when needed
|
|
1476
1598
|
const { GetObjectCommand } = await import('@aws-sdk/client-s3');
|
|
1477
|
-
|
|
1599
|
+
prodLog.debug(`Getting metadata for ${id} from bucket ${this.bucketName}`);
|
|
1478
1600
|
const key = `${this.metadataPrefix}${id}.json`;
|
|
1479
|
-
|
|
1601
|
+
prodLog.debug(`Looking for metadata at key: ${key}`);
|
|
1480
1602
|
// Try to get the metadata from the metadata directory
|
|
1481
1603
|
const response = await this.s3Client.send(new GetObjectCommand({
|
|
1482
1604
|
Bucket: this.bucketName,
|
|
@@ -1484,20 +1606,20 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1484
1606
|
}));
|
|
1485
1607
|
// Check if response is null or undefined (can happen in mock implementations)
|
|
1486
1608
|
if (!response || !response.Body) {
|
|
1487
|
-
|
|
1609
|
+
prodLog.debug(`No metadata found for ${id}`);
|
|
1488
1610
|
return null;
|
|
1489
1611
|
}
|
|
1490
1612
|
// Convert the response body to a string
|
|
1491
1613
|
const bodyContents = await response.Body.transformToString();
|
|
1492
|
-
|
|
1614
|
+
prodLog.debug(`Retrieved metadata body: ${bodyContents}`);
|
|
1493
1615
|
// Parse the JSON string
|
|
1494
1616
|
try {
|
|
1495
1617
|
const parsedMetadata = JSON.parse(bodyContents);
|
|
1496
|
-
|
|
1618
|
+
prodLog.debug(`Successfully retrieved metadata for ${id}:`, parsedMetadata);
|
|
1497
1619
|
return parsedMetadata;
|
|
1498
1620
|
}
|
|
1499
1621
|
catch (parseError) {
|
|
1500
|
-
|
|
1622
|
+
prodLog.error(`Failed to parse metadata for ${id}:`, parseError);
|
|
1501
1623
|
return null;
|
|
1502
1624
|
}
|
|
1503
1625
|
}
|
|
@@ -1510,7 +1632,7 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1510
1632
|
(error.message.includes('NoSuchKey') ||
|
|
1511
1633
|
error.message.includes('not found') ||
|
|
1512
1634
|
error.message.includes('does not exist')))) {
|
|
1513
|
-
|
|
1635
|
+
prodLog.debug(`Metadata not found for ${id}`);
|
|
1514
1636
|
return null;
|
|
1515
1637
|
}
|
|
1516
1638
|
// For other types of errors, convert to BrainyError for better classification
|
|
@@ -1564,7 +1686,7 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1564
1686
|
this.statisticsModified = false;
|
|
1565
1687
|
}
|
|
1566
1688
|
catch (error) {
|
|
1567
|
-
|
|
1689
|
+
prodLog.error('Failed to clear storage:', error);
|
|
1568
1690
|
throw new Error(`Failed to clear storage: ${error}`);
|
|
1569
1691
|
}
|
|
1570
1692
|
}
|
|
@@ -1659,8 +1781,9 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1659
1781
|
return this.getStatisticsKeyForDate(new Date());
|
|
1660
1782
|
}
|
|
1661
1783
|
/**
|
|
1662
|
-
* Get the legacy statistics key (
|
|
1784
|
+
* Get the legacy statistics key (DEPRECATED - /index folder is auto-cleaned)
|
|
1663
1785
|
* @returns The legacy statistics key
|
|
1786
|
+
* @deprecated Legacy /index folder is automatically cleaned on initialization
|
|
1664
1787
|
*/
|
|
1665
1788
|
getLegacyStatisticsKey() {
|
|
1666
1789
|
return `${this.indexPrefix}${STATISTICS_KEY}.json`;
|
|
@@ -1872,11 +1995,12 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1872
1995
|
// Import the GetObjectCommand only when needed
|
|
1873
1996
|
const { GetObjectCommand } = await import('@aws-sdk/client-s3');
|
|
1874
1997
|
// Try statistics locations in order of preference (but with timeout)
|
|
1998
|
+
// NOTE: Legacy /index folder is auto-cleaned on init, so only check _system
|
|
1875
1999
|
const keys = [
|
|
1876
2000
|
this.getCurrentStatisticsKey(),
|
|
1877
2001
|
// Only try yesterday if it's within 2 hours of midnight to avoid unnecessary calls
|
|
1878
|
-
...(this.shouldTryYesterday() ? [this.getStatisticsKeyForDate(this.getYesterday())] : [])
|
|
1879
|
-
|
|
2002
|
+
...(this.shouldTryYesterday() ? [this.getStatisticsKeyForDate(this.getYesterday())] : [])
|
|
2003
|
+
// Legacy fallback removed - /index folder is auto-cleaned on initialization
|
|
1880
2004
|
];
|
|
1881
2005
|
let statistics = null;
|
|
1882
2006
|
// Try each key with a timeout to prevent hanging
|