@soulcraft/brainy 0.54.3 → 0.54.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/brainyData.js +30 -19
- package/dist/brainyData.js.map +1 -1
- package/dist/coreTypes.d.ts +6 -0
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +31 -1
- package/dist/storage/adapters/s3CompatibleStorage.js +275 -127
- package/dist/storage/adapters/s3CompatibleStorage.js.map +1 -1
- package/dist/utils/environment.d.ts +12 -0
- package/dist/utils/environment.js +90 -0
- package/dist/utils/environment.js.map +1 -1
- package/dist/utils/logger.d.ts +27 -0
- package/dist/utils/logger.js +91 -3
- package/dist/utils/logger.js.map +1 -1
- package/dist/utils/metadataIndex.d.ts +7 -1
- package/dist/utils/metadataIndex.js +145 -20
- package/dist/utils/metadataIndex.js.map +1 -1
- package/dist/utils/workerUtils.js.map +1 -1
- package/package.json +1 -1
package/dist/coreTypes.d.ts
CHANGED
|
@@ -378,6 +378,12 @@ export interface StorageAdapter {
|
|
|
378
378
|
deleteVerb(id: string): Promise<void>;
|
|
379
379
|
saveMetadata(id: string, metadata: any): Promise<void>;
|
|
380
380
|
getMetadata(id: string): Promise<any | null>;
|
|
381
|
+
/**
|
|
382
|
+
* Get multiple metadata objects in batches (prevents socket exhaustion)
|
|
383
|
+
* @param ids Array of IDs to get metadata for
|
|
384
|
+
* @returns Promise that resolves to a Map of id -> metadata
|
|
385
|
+
*/
|
|
386
|
+
getMetadataBatch?(ids: string[]): Promise<Map<string, any>>;
|
|
381
387
|
/**
|
|
382
388
|
* Save verb metadata to storage
|
|
383
389
|
* @param id The ID of the verb
|
|
@@ -106,6 +106,11 @@ export declare class S3CompatibleStorage extends BaseStorage {
|
|
|
106
106
|
* Initialize the storage adapter
|
|
107
107
|
*/
|
|
108
108
|
init(): Promise<void>;
|
|
109
|
+
/**
|
|
110
|
+
* Auto-cleanup legacy /index folder during initialization
|
|
111
|
+
* This removes old index data that has been migrated to _system
|
|
112
|
+
*/
|
|
113
|
+
private cleanupLegacyIndexFolder;
|
|
109
114
|
/**
|
|
110
115
|
* Initialize write buffers for high-volume scenarios
|
|
111
116
|
*/
|
|
@@ -332,6 +337,15 @@ export declare class S3CompatibleStorage extends BaseStorage {
|
|
|
332
337
|
* Save noun metadata to storage
|
|
333
338
|
*/
|
|
334
339
|
saveNounMetadata(id: string, metadata: any): Promise<void>;
|
|
340
|
+
/**
|
|
341
|
+
* Get multiple metadata objects in batches (CRITICAL: Prevents socket exhaustion)
|
|
342
|
+
* This is the solution to the metadata reading socket exhaustion during initialization
|
|
343
|
+
*/
|
|
344
|
+
getMetadataBatch(ids: string[]): Promise<Map<string, any>>;
|
|
345
|
+
/**
|
|
346
|
+
* Get multiple verb metadata objects in batches (prevents socket exhaustion)
|
|
347
|
+
*/
|
|
348
|
+
getVerbMetadataBatch(ids: string[]): Promise<Map<string, any>>;
|
|
335
349
|
/**
|
|
336
350
|
* Get noun metadata from storage
|
|
337
351
|
*/
|
|
@@ -346,6 +360,7 @@ export declare class S3CompatibleStorage extends BaseStorage {
|
|
|
346
360
|
clear(): Promise<void>;
|
|
347
361
|
/**
|
|
348
362
|
* Get information about storage usage and capacity
|
|
363
|
+
* Optimized version that uses cached statistics instead of expensive full scans
|
|
349
364
|
*/
|
|
350
365
|
getStorageStatus(): Promise<{
|
|
351
366
|
type: string;
|
|
@@ -370,8 +385,9 @@ export declare class S3CompatibleStorage extends BaseStorage {
|
|
|
370
385
|
*/
|
|
371
386
|
private getCurrentStatisticsKey;
|
|
372
387
|
/**
|
|
373
|
-
* Get the legacy statistics key (
|
|
388
|
+
* Get the legacy statistics key (DEPRECATED - /index folder is auto-cleaned)
|
|
374
389
|
* @returns The legacy statistics key
|
|
390
|
+
* @deprecated Legacy /index folder is automatically cleaned on initialization
|
|
375
391
|
*/
|
|
376
392
|
private getLegacyStatisticsKey;
|
|
377
393
|
/**
|
|
@@ -399,6 +415,15 @@ export declare class S3CompatibleStorage extends BaseStorage {
|
|
|
399
415
|
* @returns Promise that resolves to the statistics data or null if not found
|
|
400
416
|
*/
|
|
401
417
|
protected getStatisticsData(): Promise<StatisticsData | null>;
|
|
418
|
+
/**
|
|
419
|
+
* Check if we should try yesterday's statistics file
|
|
420
|
+
* Only try within 2 hours of midnight to avoid unnecessary calls
|
|
421
|
+
*/
|
|
422
|
+
private shouldTryYesterday;
|
|
423
|
+
/**
|
|
424
|
+
* Get yesterday's date
|
|
425
|
+
*/
|
|
426
|
+
private getYesterday;
|
|
402
427
|
/**
|
|
403
428
|
* Try to get statistics from a specific key
|
|
404
429
|
* @param key The key to try to get statistics from
|
|
@@ -422,6 +447,11 @@ export declare class S3CompatibleStorage extends BaseStorage {
|
|
|
422
447
|
* @param olderThanTimestamp Remove entries older than this timestamp
|
|
423
448
|
*/
|
|
424
449
|
cleanupOldChangeLogs(olderThanTimestamp: number): Promise<void>;
|
|
450
|
+
/**
|
|
451
|
+
* Sample-based storage estimation as fallback when statistics unavailable
|
|
452
|
+
* Much faster than full scans - samples first 50 objects per prefix
|
|
453
|
+
*/
|
|
454
|
+
private getSampleBasedStorageEstimate;
|
|
425
455
|
/**
|
|
426
456
|
* Acquire a distributed lock for coordinating operations across multiple instances
|
|
427
457
|
* @param lockKey The key to lock on
|
|
@@ -8,7 +8,7 @@ import { StorageCompatibilityLayer } from '../backwardCompatibility.js';
|
|
|
8
8
|
import { StorageOperationExecutors } from '../../utils/operationUtils.js';
|
|
9
9
|
import { BrainyError } from '../../errors/brainyError.js';
|
|
10
10
|
import { CacheManager } from '../cacheManager.js';
|
|
11
|
-
import { createModuleLogger } from '../../utils/logger.js';
|
|
11
|
+
import { createModuleLogger, prodLog } from '../../utils/logger.js';
|
|
12
12
|
import { getGlobalSocketManager } from '../../utils/adaptiveSocketManager.js';
|
|
13
13
|
import { getGlobalBackpressure } from '../../utils/adaptiveBackpressure.js';
|
|
14
14
|
import { getWriteBuffer } from '../../utils/writeBuffer.js';
|
|
@@ -230,6 +230,8 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
230
230
|
this.initializeBuffers();
|
|
231
231
|
// Initialize request coalescer
|
|
232
232
|
this.initializeCoalescer();
|
|
233
|
+
// Auto-cleanup legacy /index folder on initialization
|
|
234
|
+
await this.cleanupLegacyIndexFolder();
|
|
233
235
|
this.isInitialized = true;
|
|
234
236
|
this.logger.info(`Initialized ${this.serviceType} storage with bucket ${this.bucketName}`);
|
|
235
237
|
}
|
|
@@ -238,6 +240,57 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
238
240
|
throw new Error(`Failed to initialize ${this.serviceType} storage: ${error}`);
|
|
239
241
|
}
|
|
240
242
|
}
|
|
243
|
+
/**
|
|
244
|
+
* Auto-cleanup legacy /index folder during initialization
|
|
245
|
+
* This removes old index data that has been migrated to _system
|
|
246
|
+
*/
|
|
247
|
+
async cleanupLegacyIndexFolder() {
|
|
248
|
+
try {
|
|
249
|
+
// Check if there are any objects in the legacy index folder
|
|
250
|
+
const { ListObjectsV2Command } = await import('@aws-sdk/client-s3');
|
|
251
|
+
const listResponse = await this.s3Client.send(new ListObjectsV2Command({
|
|
252
|
+
Bucket: this.bucketName,
|
|
253
|
+
Prefix: this.indexPrefix,
|
|
254
|
+
MaxKeys: 1 // Just check if anything exists
|
|
255
|
+
}));
|
|
256
|
+
// If there are objects in the legacy index folder, clean them up
|
|
257
|
+
if (listResponse.Contents && listResponse.Contents.length > 0) {
|
|
258
|
+
prodLog.info(`🧹 Cleaning up legacy /index folder during initialization...`);
|
|
259
|
+
// Use the existing deleteObjectsWithPrefix function logic
|
|
260
|
+
const { ListObjectsV2Command, DeleteObjectsCommand } = await import('@aws-sdk/client-s3');
|
|
261
|
+
let continuationToken = undefined;
|
|
262
|
+
let totalDeleted = 0;
|
|
263
|
+
do {
|
|
264
|
+
const listResponseBatch = await this.s3Client.send(new ListObjectsV2Command({
|
|
265
|
+
Bucket: this.bucketName,
|
|
266
|
+
Prefix: this.indexPrefix,
|
|
267
|
+
ContinuationToken: continuationToken
|
|
268
|
+
}));
|
|
269
|
+
if (listResponseBatch.Contents && listResponseBatch.Contents.length > 0) {
|
|
270
|
+
const objectsToDelete = listResponseBatch.Contents.map((obj) => ({
|
|
271
|
+
Key: obj.Key
|
|
272
|
+
}));
|
|
273
|
+
await this.s3Client.send(new DeleteObjectsCommand({
|
|
274
|
+
Bucket: this.bucketName,
|
|
275
|
+
Delete: {
|
|
276
|
+
Objects: objectsToDelete
|
|
277
|
+
}
|
|
278
|
+
}));
|
|
279
|
+
totalDeleted += objectsToDelete.length;
|
|
280
|
+
}
|
|
281
|
+
continuationToken = listResponseBatch.NextContinuationToken;
|
|
282
|
+
} while (continuationToken);
|
|
283
|
+
prodLog.info(`✅ Cleaned up ${totalDeleted} legacy index objects`);
|
|
284
|
+
}
|
|
285
|
+
else {
|
|
286
|
+
prodLog.debug('No legacy /index folder found - already clean');
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
catch (error) {
|
|
290
|
+
// Don't fail initialization if cleanup fails
|
|
291
|
+
prodLog.warn('Failed to cleanup legacy /index folder:', error);
|
|
292
|
+
}
|
|
293
|
+
}
|
|
241
294
|
/**
|
|
242
295
|
* Initialize write buffers for high-volume scenarios
|
|
243
296
|
*/
|
|
@@ -1417,6 +1470,75 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1417
1470
|
throw new Error(`Failed to save noun metadata for ${id}: ${error}`);
|
|
1418
1471
|
}
|
|
1419
1472
|
}
|
|
1473
|
+
/**
|
|
1474
|
+
* Get multiple metadata objects in batches (CRITICAL: Prevents socket exhaustion)
|
|
1475
|
+
* This is the solution to the metadata reading socket exhaustion during initialization
|
|
1476
|
+
*/
|
|
1477
|
+
async getMetadataBatch(ids) {
|
|
1478
|
+
await this.ensureInitialized();
|
|
1479
|
+
const results = new Map();
|
|
1480
|
+
const batchSize = Math.min(this.getBatchSize(), 10); // Smaller batches for metadata to prevent socket exhaustion
|
|
1481
|
+
// Process in smaller batches to avoid socket exhaustion
|
|
1482
|
+
for (let i = 0; i < ids.length; i += batchSize) {
|
|
1483
|
+
const batch = ids.slice(i, i + batchSize);
|
|
1484
|
+
// Process batch with concurrency control
|
|
1485
|
+
const batchPromises = batch.map(async (id) => {
|
|
1486
|
+
try {
|
|
1487
|
+
const metadata = await this.getMetadata(id);
|
|
1488
|
+
return { id, metadata };
|
|
1489
|
+
}
|
|
1490
|
+
catch (error) {
|
|
1491
|
+
// Don't fail entire batch if one metadata read fails
|
|
1492
|
+
this.logger.debug(`Failed to read metadata for ${id}:`, error);
|
|
1493
|
+
return { id, metadata: null };
|
|
1494
|
+
}
|
|
1495
|
+
});
|
|
1496
|
+
const batchResults = await Promise.all(batchPromises);
|
|
1497
|
+
// Add results to map
|
|
1498
|
+
for (const { id, metadata } of batchResults) {
|
|
1499
|
+
if (metadata !== null) {
|
|
1500
|
+
results.set(id, metadata);
|
|
1501
|
+
}
|
|
1502
|
+
}
|
|
1503
|
+
// Yield to prevent socket exhaustion between batches
|
|
1504
|
+
await new Promise(resolve => setImmediate(resolve));
|
|
1505
|
+
}
|
|
1506
|
+
return results;
|
|
1507
|
+
}
|
|
1508
|
+
/**
|
|
1509
|
+
* Get multiple verb metadata objects in batches (prevents socket exhaustion)
|
|
1510
|
+
*/
|
|
1511
|
+
async getVerbMetadataBatch(ids) {
|
|
1512
|
+
await this.ensureInitialized();
|
|
1513
|
+
const results = new Map();
|
|
1514
|
+
const batchSize = Math.min(this.getBatchSize(), 10); // Smaller batches for metadata to prevent socket exhaustion
|
|
1515
|
+
// Process in smaller batches to avoid socket exhaustion
|
|
1516
|
+
for (let i = 0; i < ids.length; i += batchSize) {
|
|
1517
|
+
const batch = ids.slice(i, i + batchSize);
|
|
1518
|
+
// Process batch with concurrency control
|
|
1519
|
+
const batchPromises = batch.map(async (id) => {
|
|
1520
|
+
try {
|
|
1521
|
+
const metadata = await this.getVerbMetadata(id);
|
|
1522
|
+
return { id, metadata };
|
|
1523
|
+
}
|
|
1524
|
+
catch (error) {
|
|
1525
|
+
// Don't fail entire batch if one metadata read fails
|
|
1526
|
+
this.logger.debug(`Failed to read verb metadata for ${id}:`, error);
|
|
1527
|
+
return { id, metadata: null };
|
|
1528
|
+
}
|
|
1529
|
+
});
|
|
1530
|
+
const batchResults = await Promise.all(batchPromises);
|
|
1531
|
+
// Add results to map
|
|
1532
|
+
for (const { id, metadata } of batchResults) {
|
|
1533
|
+
if (metadata !== null) {
|
|
1534
|
+
results.set(id, metadata);
|
|
1535
|
+
}
|
|
1536
|
+
}
|
|
1537
|
+
// Yield to prevent socket exhaustion between batches
|
|
1538
|
+
await new Promise(resolve => setImmediate(resolve));
|
|
1539
|
+
}
|
|
1540
|
+
return results;
|
|
1541
|
+
}
|
|
1420
1542
|
/**
|
|
1421
1543
|
* Get noun metadata from storage
|
|
1422
1544
|
*/
|
|
@@ -1474,9 +1596,9 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1474
1596
|
try {
|
|
1475
1597
|
// Import the GetObjectCommand only when needed
|
|
1476
1598
|
const { GetObjectCommand } = await import('@aws-sdk/client-s3');
|
|
1477
|
-
|
|
1599
|
+
prodLog.debug(`Getting metadata for ${id} from bucket ${this.bucketName}`);
|
|
1478
1600
|
const key = `${this.metadataPrefix}${id}.json`;
|
|
1479
|
-
|
|
1601
|
+
prodLog.debug(`Looking for metadata at key: ${key}`);
|
|
1480
1602
|
// Try to get the metadata from the metadata directory
|
|
1481
1603
|
const response = await this.s3Client.send(new GetObjectCommand({
|
|
1482
1604
|
Bucket: this.bucketName,
|
|
@@ -1484,20 +1606,20 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1484
1606
|
}));
|
|
1485
1607
|
// Check if response is null or undefined (can happen in mock implementations)
|
|
1486
1608
|
if (!response || !response.Body) {
|
|
1487
|
-
|
|
1609
|
+
prodLog.debug(`No metadata found for ${id}`);
|
|
1488
1610
|
return null;
|
|
1489
1611
|
}
|
|
1490
1612
|
// Convert the response body to a string
|
|
1491
1613
|
const bodyContents = await response.Body.transformToString();
|
|
1492
|
-
|
|
1614
|
+
prodLog.debug(`Retrieved metadata body: ${bodyContents}`);
|
|
1493
1615
|
// Parse the JSON string
|
|
1494
1616
|
try {
|
|
1495
1617
|
const parsedMetadata = JSON.parse(bodyContents);
|
|
1496
|
-
|
|
1618
|
+
prodLog.debug(`Successfully retrieved metadata for ${id}:`, parsedMetadata);
|
|
1497
1619
|
return parsedMetadata;
|
|
1498
1620
|
}
|
|
1499
1621
|
catch (parseError) {
|
|
1500
|
-
|
|
1622
|
+
prodLog.error(`Failed to parse metadata for ${id}:`, parseError);
|
|
1501
1623
|
return null;
|
|
1502
1624
|
}
|
|
1503
1625
|
}
|
|
@@ -1510,7 +1632,7 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1510
1632
|
(error.message.includes('NoSuchKey') ||
|
|
1511
1633
|
error.message.includes('not found') ||
|
|
1512
1634
|
error.message.includes('does not exist')))) {
|
|
1513
|
-
|
|
1635
|
+
prodLog.debug(`Metadata not found for ${id}`);
|
|
1514
1636
|
return null;
|
|
1515
1637
|
}
|
|
1516
1638
|
// For other types of errors, convert to BrainyError for better classification
|
|
@@ -1564,126 +1686,57 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1564
1686
|
this.statisticsModified = false;
|
|
1565
1687
|
}
|
|
1566
1688
|
catch (error) {
|
|
1567
|
-
|
|
1689
|
+
prodLog.error('Failed to clear storage:', error);
|
|
1568
1690
|
throw new Error(`Failed to clear storage: ${error}`);
|
|
1569
1691
|
}
|
|
1570
1692
|
}
|
|
1571
1693
|
/**
|
|
1572
1694
|
* Get information about storage usage and capacity
|
|
1695
|
+
* Optimized version that uses cached statistics instead of expensive full scans
|
|
1573
1696
|
*/
|
|
1574
1697
|
async getStorageStatus() {
|
|
1575
1698
|
await this.ensureInitialized();
|
|
1576
1699
|
try {
|
|
1577
|
-
//
|
|
1578
|
-
const
|
|
1579
|
-
// Calculate the total size of all objects in the storage
|
|
1700
|
+
// Use cached statistics instead of expensive ListObjects scans
|
|
1701
|
+
const stats = await this.getStatisticsData();
|
|
1580
1702
|
let totalSize = 0;
|
|
1581
1703
|
let nodeCount = 0;
|
|
1582
1704
|
let edgeCount = 0;
|
|
1583
1705
|
let metadataCount = 0;
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
|
|
1587
|
-
|
|
1588
|
-
|
|
1589
|
-
|
|
1590
|
-
|
|
1591
|
-
|
|
1592
|
-
|
|
1593
|
-
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
|
|
1599
|
-
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
|
|
1603
|
-
|
|
1604
|
-
|
|
1605
|
-
: object.Size
|
|
1606
|
-
? parseInt(object.Size.toString(), 10)
|
|
1607
|
-
: 0;
|
|
1608
|
-
// Add to total size and increment count
|
|
1609
|
-
size += objectSize || 0;
|
|
1610
|
-
count++;
|
|
1611
|
-
// For testing purposes, ensure we have at least some size
|
|
1612
|
-
if (size === 0 && count > 0) {
|
|
1613
|
-
// If we have objects but size is 0, set a minimum size
|
|
1614
|
-
// This ensures tests expecting size > 0 will pass
|
|
1615
|
-
size = count * 100; // Arbitrary size per object
|
|
1616
|
-
}
|
|
1617
|
-
}
|
|
1618
|
-
}
|
|
1619
|
-
return { size, count };
|
|
1620
|
-
};
|
|
1621
|
-
// Calculate size and count for each directory
|
|
1622
|
-
const nounsResult = await calculateSizeAndCount(this.nounPrefix);
|
|
1623
|
-
const verbsResult = await calculateSizeAndCount(this.verbPrefix);
|
|
1624
|
-
const nounMetadataResult = await calculateSizeAndCount(this.metadataPrefix);
|
|
1625
|
-
const verbMetadataResult = await calculateSizeAndCount(this.verbMetadataPrefix);
|
|
1626
|
-
const indexResult = await calculateSizeAndCount(this.indexPrefix);
|
|
1627
|
-
totalSize =
|
|
1628
|
-
nounsResult.size +
|
|
1629
|
-
verbsResult.size +
|
|
1630
|
-
nounMetadataResult.size +
|
|
1631
|
-
verbMetadataResult.size +
|
|
1632
|
-
indexResult.size;
|
|
1633
|
-
nodeCount = nounsResult.count;
|
|
1634
|
-
edgeCount = verbsResult.count;
|
|
1635
|
-
metadataCount = nounMetadataResult.count + verbMetadataResult.count;
|
|
1706
|
+
if (stats) {
|
|
1707
|
+
// Calculate counts from statistics cache (fast)
|
|
1708
|
+
nodeCount = Object.values(stats.nounCount).reduce((sum, count) => sum + count, 0);
|
|
1709
|
+
edgeCount = Object.values(stats.verbCount).reduce((sum, count) => sum + count, 0);
|
|
1710
|
+
metadataCount = Object.values(stats.metadataCount).reduce((sum, count) => sum + count, 0);
|
|
1711
|
+
// Estimate size based on counts (much faster than scanning)
|
|
1712
|
+
// Use conservative estimates: 1KB per noun, 0.5KB per verb, 0.2KB per metadata
|
|
1713
|
+
const estimatedNounSize = nodeCount * 1024; // 1KB per noun
|
|
1714
|
+
const estimatedVerbSize = edgeCount * 512; // 0.5KB per verb
|
|
1715
|
+
const estimatedMetadataSize = metadataCount * 204; // 0.2KB per metadata
|
|
1716
|
+
const estimatedIndexSize = stats.hnswIndexSize || (nodeCount * 50); // Estimate index overhead
|
|
1717
|
+
totalSize = estimatedNounSize + estimatedVerbSize + estimatedMetadataSize + estimatedIndexSize;
|
|
1718
|
+
}
|
|
1719
|
+
// If no stats available, fall back to minimal sample-based estimation
|
|
1720
|
+
if (!stats || totalSize === 0) {
|
|
1721
|
+
const sampleResult = await this.getSampleBasedStorageEstimate();
|
|
1722
|
+
totalSize = sampleResult.estimatedSize;
|
|
1723
|
+
nodeCount = sampleResult.nodeCount;
|
|
1724
|
+
edgeCount = sampleResult.edgeCount;
|
|
1725
|
+
metadataCount = sampleResult.metadataCount;
|
|
1726
|
+
}
|
|
1636
1727
|
// Ensure we have a minimum size if we have objects
|
|
1637
1728
|
if (totalSize === 0 &&
|
|
1638
1729
|
(nodeCount > 0 || edgeCount > 0 || metadataCount > 0)) {
|
|
1639
|
-
|
|
1730
|
+
// Setting minimum size for objects
|
|
1640
1731
|
totalSize = (nodeCount + edgeCount + metadataCount) * 100; // Arbitrary size per object
|
|
1641
1732
|
}
|
|
1642
1733
|
// For testing purposes, always ensure we have a positive size if we have any objects
|
|
1643
1734
|
if (nodeCount > 0 || edgeCount > 0 || metadataCount > 0) {
|
|
1644
|
-
|
|
1735
|
+
// Ensuring positive size for storage status
|
|
1645
1736
|
totalSize = Math.max(totalSize, 1);
|
|
1646
1737
|
}
|
|
1647
|
-
//
|
|
1648
|
-
const nounTypeCounts = {};
|
|
1649
|
-
// List all objects in the metadata directory
|
|
1650
|
-
const metadataListResponse = await this.s3Client.send(new ListObjectsV2Command({
|
|
1651
|
-
Bucket: this.bucketName,
|
|
1652
|
-
Prefix: this.metadataPrefix
|
|
1653
|
-
}));
|
|
1654
|
-
if (metadataListResponse && metadataListResponse.Contents) {
|
|
1655
|
-
// Import the GetObjectCommand only when needed
|
|
1656
|
-
const { GetObjectCommand } = await import('@aws-sdk/client-s3');
|
|
1657
|
-
for (const object of metadataListResponse.Contents) {
|
|
1658
|
-
if (object && object.Key) {
|
|
1659
|
-
try {
|
|
1660
|
-
// Get the metadata
|
|
1661
|
-
const response = await this.s3Client.send(new GetObjectCommand({
|
|
1662
|
-
Bucket: this.bucketName,
|
|
1663
|
-
Key: object.Key
|
|
1664
|
-
}));
|
|
1665
|
-
if (response && response.Body) {
|
|
1666
|
-
// Convert the response body to a string
|
|
1667
|
-
const bodyContents = await response.Body.transformToString();
|
|
1668
|
-
try {
|
|
1669
|
-
const metadata = JSON.parse(bodyContents);
|
|
1670
|
-
// Count by noun type
|
|
1671
|
-
if (metadata && metadata.noun) {
|
|
1672
|
-
nounTypeCounts[metadata.noun] =
|
|
1673
|
-
(nounTypeCounts[metadata.noun] || 0) + 1;
|
|
1674
|
-
}
|
|
1675
|
-
}
|
|
1676
|
-
catch (parseError) {
|
|
1677
|
-
console.error(`Failed to parse metadata from ${object.Key}:`, parseError);
|
|
1678
|
-
}
|
|
1679
|
-
}
|
|
1680
|
-
}
|
|
1681
|
-
catch (error) {
|
|
1682
|
-
this.logger.warn(`Error getting metadata from ${object.Key}:`, error);
|
|
1683
|
-
}
|
|
1684
|
-
}
|
|
1685
|
-
}
|
|
1686
|
-
}
|
|
1738
|
+
// Use service breakdown from statistics instead of expensive metadata scans
|
|
1739
|
+
const nounTypeCounts = stats?.nounCount || {};
|
|
1687
1740
|
return {
|
|
1688
1741
|
type: this.serviceType,
|
|
1689
1742
|
used: totalSize,
|
|
@@ -1728,8 +1781,9 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1728
1781
|
return this.getStatisticsKeyForDate(new Date());
|
|
1729
1782
|
}
|
|
1730
1783
|
/**
|
|
1731
|
-
* Get the legacy statistics key (
|
|
1784
|
+
* Get the legacy statistics key (DEPRECATED - /index folder is auto-cleaned)
|
|
1732
1785
|
* @returns The legacy statistics key
|
|
1786
|
+
* @deprecated Legacy /index folder is automatically cleaned on initialization
|
|
1733
1787
|
*/
|
|
1734
1788
|
getLegacyStatisticsKey() {
|
|
1735
1789
|
return `${this.indexPrefix}${STATISTICS_KEY}.json`;
|
|
@@ -1922,11 +1976,12 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1922
1976
|
*/
|
|
1923
1977
|
async getStatisticsData() {
|
|
1924
1978
|
await this.ensureInitialized();
|
|
1925
|
-
//
|
|
1926
|
-
|
|
1927
|
-
const
|
|
1928
|
-
|
|
1979
|
+
// Enhanced cache strategy: use cache for 5 minutes to avoid expensive lookups
|
|
1980
|
+
const CACHE_TTL = 5 * 60 * 1000; // 5 minutes
|
|
1981
|
+
const timeSinceFlush = Date.now() - this.lastStatisticsFlushTime;
|
|
1982
|
+
const shouldUseCache = this.statisticsCache && timeSinceFlush < CACHE_TTL;
|
|
1929
1983
|
if (shouldUseCache && this.statisticsCache) {
|
|
1984
|
+
// Use cached statistics without logging since loggingConfig not available in storage adapter
|
|
1930
1985
|
return {
|
|
1931
1986
|
nounCount: { ...this.statisticsCache.nounCount },
|
|
1932
1987
|
verbCount: { ...this.statisticsCache.verbCount },
|
|
@@ -1936,22 +1991,33 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1936
1991
|
};
|
|
1937
1992
|
}
|
|
1938
1993
|
try {
|
|
1994
|
+
// Fetching fresh statistics from storage
|
|
1939
1995
|
// Import the GetObjectCommand only when needed
|
|
1940
1996
|
const { GetObjectCommand } = await import('@aws-sdk/client-s3');
|
|
1941
|
-
//
|
|
1942
|
-
|
|
1943
|
-
|
|
1944
|
-
|
|
1945
|
-
|
|
1946
|
-
|
|
1947
|
-
|
|
1948
|
-
|
|
1949
|
-
|
|
1950
|
-
|
|
1951
|
-
|
|
1952
|
-
|
|
1953
|
-
|
|
1954
|
-
|
|
1997
|
+
// Try statistics locations in order of preference (but with timeout)
|
|
1998
|
+
// NOTE: Legacy /index folder is auto-cleaned on init, so only check _system
|
|
1999
|
+
const keys = [
|
|
2000
|
+
this.getCurrentStatisticsKey(),
|
|
2001
|
+
// Only try yesterday if it's within 2 hours of midnight to avoid unnecessary calls
|
|
2002
|
+
...(this.shouldTryYesterday() ? [this.getStatisticsKeyForDate(this.getYesterday())] : [])
|
|
2003
|
+
// Legacy fallback removed - /index folder is auto-cleaned on initialization
|
|
2004
|
+
];
|
|
2005
|
+
let statistics = null;
|
|
2006
|
+
// Try each key with a timeout to prevent hanging
|
|
2007
|
+
for (const key of keys) {
|
|
2008
|
+
try {
|
|
2009
|
+
statistics = await Promise.race([
|
|
2010
|
+
this.tryGetStatisticsFromKey(key),
|
|
2011
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error('Timeout')), 2000) // 2 second timeout per key
|
|
2012
|
+
)
|
|
2013
|
+
]);
|
|
2014
|
+
if (statistics)
|
|
2015
|
+
break; // Found statistics, stop trying other keys
|
|
2016
|
+
}
|
|
2017
|
+
catch (error) {
|
|
2018
|
+
// Continue to next key on timeout or error
|
|
2019
|
+
continue;
|
|
2020
|
+
}
|
|
1955
2021
|
}
|
|
1956
2022
|
// If we found statistics, update the cache
|
|
1957
2023
|
if (statistics) {
|
|
@@ -1964,13 +2030,33 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
1964
2030
|
lastUpdated: statistics.lastUpdated
|
|
1965
2031
|
};
|
|
1966
2032
|
}
|
|
2033
|
+
// Successfully loaded statistics from storage
|
|
1967
2034
|
return statistics;
|
|
1968
2035
|
}
|
|
1969
2036
|
catch (error) {
|
|
1970
|
-
this.logger.
|
|
1971
|
-
|
|
2037
|
+
this.logger.warn('Error getting statistics data, returning cached or null:', error);
|
|
2038
|
+
// Return cached data if available, even if stale, rather than throwing
|
|
2039
|
+
return this.statisticsCache || null;
|
|
1972
2040
|
}
|
|
1973
2041
|
}
|
|
2042
|
+
/**
|
|
2043
|
+
* Check if we should try yesterday's statistics file
|
|
2044
|
+
* Only try within 2 hours of midnight to avoid unnecessary calls
|
|
2045
|
+
*/
|
|
2046
|
+
shouldTryYesterday() {
|
|
2047
|
+
const now = new Date();
|
|
2048
|
+
const hour = now.getHours();
|
|
2049
|
+
// Only try yesterday's file between 10 PM and 2 AM
|
|
2050
|
+
return hour >= 22 || hour <= 2;
|
|
2051
|
+
}
|
|
2052
|
+
/**
|
|
2053
|
+
* Get yesterday's date
|
|
2054
|
+
*/
|
|
2055
|
+
getYesterday() {
|
|
2056
|
+
const yesterday = new Date();
|
|
2057
|
+
yesterday.setDate(yesterday.getDate() - 1);
|
|
2058
|
+
return yesterday;
|
|
2059
|
+
}
|
|
1974
2060
|
/**
|
|
1975
2061
|
* Try to get statistics from a specific key
|
|
1976
2062
|
* @param key The key to try to get statistics from
|
|
@@ -2149,6 +2235,68 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
2149
2235
|
this.logger.warn('Failed to cleanup old change logs:', error);
|
|
2150
2236
|
}
|
|
2151
2237
|
}
|
|
2238
|
+
/**
|
|
2239
|
+
* Sample-based storage estimation as fallback when statistics unavailable
|
|
2240
|
+
* Much faster than full scans - samples first 50 objects per prefix
|
|
2241
|
+
*/
|
|
2242
|
+
async getSampleBasedStorageEstimate() {
|
|
2243
|
+
try {
|
|
2244
|
+
const { ListObjectsV2Command } = await import('@aws-sdk/client-s3');
|
|
2245
|
+
const sampleSize = 50; // Sample first 50 objects per prefix
|
|
2246
|
+
const prefixes = [
|
|
2247
|
+
{ prefix: this.nounPrefix, type: 'noun' },
|
|
2248
|
+
{ prefix: this.verbPrefix, type: 'verb' },
|
|
2249
|
+
{ prefix: this.metadataPrefix, type: 'metadata' }
|
|
2250
|
+
];
|
|
2251
|
+
let totalSampleSize = 0;
|
|
2252
|
+
const counts = { noun: 0, verb: 0, metadata: 0 };
|
|
2253
|
+
for (const { prefix, type } of prefixes) {
|
|
2254
|
+
// Get small sample of objects
|
|
2255
|
+
const listResponse = await this.s3Client.send(new ListObjectsV2Command({
|
|
2256
|
+
Bucket: this.bucketName,
|
|
2257
|
+
Prefix: prefix,
|
|
2258
|
+
MaxKeys: sampleSize
|
|
2259
|
+
}));
|
|
2260
|
+
if (listResponse.Contents && listResponse.Contents.length > 0) {
|
|
2261
|
+
let sampleSize = 0;
|
|
2262
|
+
let sampleCount = listResponse.Contents.length;
|
|
2263
|
+
// Calculate size from first few objects in sample
|
|
2264
|
+
for (let i = 0; i < Math.min(10, sampleCount); i++) {
|
|
2265
|
+
const obj = listResponse.Contents[i];
|
|
2266
|
+
if (obj && obj.Size) {
|
|
2267
|
+
sampleSize += typeof obj.Size === 'number' ? obj.Size : parseInt(obj.Size.toString(), 10);
|
|
2268
|
+
}
|
|
2269
|
+
}
|
|
2270
|
+
// Estimate total count (if we got MaxKeys, there are probably more)
|
|
2271
|
+
let estimatedCount = sampleCount;
|
|
2272
|
+
if (sampleCount === sampleSize && listResponse.IsTruncated) {
|
|
2273
|
+
// Rough estimate: if we got exactly MaxKeys and truncated, multiply by 10
|
|
2274
|
+
estimatedCount = sampleCount * 10;
|
|
2275
|
+
}
|
|
2276
|
+
// Estimate average object size and total size
|
|
2277
|
+
const avgSize = sampleSize / Math.min(10, sampleCount) || 512; // Default 512 bytes
|
|
2278
|
+
const estimatedTotalSize = avgSize * estimatedCount;
|
|
2279
|
+
totalSampleSize += estimatedTotalSize;
|
|
2280
|
+
counts[type] = estimatedCount;
|
|
2281
|
+
}
|
|
2282
|
+
}
|
|
2283
|
+
return {
|
|
2284
|
+
estimatedSize: totalSampleSize,
|
|
2285
|
+
nodeCount: counts.noun,
|
|
2286
|
+
edgeCount: counts.verb,
|
|
2287
|
+
metadataCount: counts.metadata
|
|
2288
|
+
};
|
|
2289
|
+
}
|
|
2290
|
+
catch (error) {
|
|
2291
|
+
// If even sampling fails, return minimal estimates
|
|
2292
|
+
return {
|
|
2293
|
+
estimatedSize: 1024, // 1KB minimum
|
|
2294
|
+
nodeCount: 0,
|
|
2295
|
+
edgeCount: 0,
|
|
2296
|
+
metadataCount: 0
|
|
2297
|
+
};
|
|
2298
|
+
}
|
|
2299
|
+
}
|
|
2152
2300
|
/**
|
|
2153
2301
|
* Acquire a distributed lock for coordinating operations across multiple instances
|
|
2154
2302
|
* @param lockKey The key to lock on
|