@soulcraft/brainy 3.50.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/CHANGELOG.md +242 -0
  2. package/README.md +358 -658
  3. package/dist/api/ConfigAPI.js +56 -19
  4. package/dist/api/DataAPI.js +24 -18
  5. package/dist/augmentations/storageAugmentations.d.ts +24 -0
  6. package/dist/augmentations/storageAugmentations.js +22 -0
  7. package/dist/brainy.js +32 -9
  8. package/dist/cli/commands/core.d.ts +20 -10
  9. package/dist/cli/commands/core.js +384 -82
  10. package/dist/cli/commands/import.d.ts +41 -0
  11. package/dist/cli/commands/import.js +456 -0
  12. package/dist/cli/commands/insights.d.ts +34 -0
  13. package/dist/cli/commands/insights.js +300 -0
  14. package/dist/cli/commands/neural.d.ts +6 -12
  15. package/dist/cli/commands/neural.js +113 -10
  16. package/dist/cli/commands/nlp.d.ts +28 -0
  17. package/dist/cli/commands/nlp.js +246 -0
  18. package/dist/cli/commands/storage.d.ts +64 -0
  19. package/dist/cli/commands/storage.js +730 -0
  20. package/dist/cli/index.js +210 -24
  21. package/dist/coreTypes.d.ts +206 -34
  22. package/dist/distributed/configManager.js +8 -6
  23. package/dist/distributed/shardMigration.js +2 -0
  24. package/dist/distributed/storageDiscovery.js +6 -4
  25. package/dist/embeddings/EmbeddingManager.d.ts +2 -2
  26. package/dist/embeddings/EmbeddingManager.js +5 -1
  27. package/dist/graph/lsm/LSMTree.js +32 -20
  28. package/dist/hnsw/typeAwareHNSWIndex.js +6 -2
  29. package/dist/storage/adapters/azureBlobStorage.d.ts +545 -0
  30. package/dist/storage/adapters/azureBlobStorage.js +1809 -0
  31. package/dist/storage/adapters/baseStorageAdapter.d.ts +16 -13
  32. package/dist/storage/adapters/fileSystemStorage.d.ts +21 -9
  33. package/dist/storage/adapters/fileSystemStorage.js +204 -127
  34. package/dist/storage/adapters/gcsStorage.d.ts +119 -9
  35. package/dist/storage/adapters/gcsStorage.js +317 -62
  36. package/dist/storage/adapters/memoryStorage.d.ts +30 -18
  37. package/dist/storage/adapters/memoryStorage.js +99 -94
  38. package/dist/storage/adapters/opfsStorage.d.ts +48 -10
  39. package/dist/storage/adapters/opfsStorage.js +201 -80
  40. package/dist/storage/adapters/r2Storage.d.ts +12 -5
  41. package/dist/storage/adapters/r2Storage.js +63 -15
  42. package/dist/storage/adapters/s3CompatibleStorage.d.ts +164 -17
  43. package/dist/storage/adapters/s3CompatibleStorage.js +472 -80
  44. package/dist/storage/adapters/typeAwareStorageAdapter.d.ts +38 -6
  45. package/dist/storage/adapters/typeAwareStorageAdapter.js +218 -39
  46. package/dist/storage/baseStorage.d.ts +41 -38
  47. package/dist/storage/baseStorage.js +110 -134
  48. package/dist/storage/storageFactory.d.ts +29 -2
  49. package/dist/storage/storageFactory.js +30 -1
  50. package/dist/utils/entityIdMapper.js +5 -2
  51. package/dist/utils/fieldTypeInference.js +8 -1
  52. package/dist/utils/metadataFilter.d.ts +3 -2
  53. package/dist/utils/metadataFilter.js +1 -0
  54. package/dist/utils/metadataIndex.d.ts +2 -1
  55. package/dist/utils/metadataIndex.js +9 -1
  56. package/dist/utils/metadataIndexChunking.js +9 -4
  57. package/dist/utils/periodicCleanup.js +1 -0
  58. package/package.json +3 -1
@@ -0,0 +1,1809 @@
1
+ /**
2
+ * Azure Blob Storage Adapter (Native)
3
+ * Uses the native @azure/storage-blob library for optimal performance and authentication
4
+ *
5
+ * Supports multiple authentication methods:
6
+ * 1. DefaultAzureCredential (Managed Identity) - Automatic in Azure environments
7
+ * 2. Connection String
8
+ * 3. Storage Account Key
9
+ * 4. SAS Token
10
+ * 5. Azure AD (OAuth2) via DefaultAzureCredential
11
+ *
12
+ * v4.0.0: Fully compatible with metadata/vector separation architecture
13
+ */
14
+ import { BaseStorage, SYSTEM_DIR, STATISTICS_KEY, getDirectoryPath } from '../baseStorage.js';
15
+ import { BrainyError } from '../../errors/brainyError.js';
16
+ import { CacheManager } from '../cacheManager.js';
17
+ import { createModuleLogger, prodLog } from '../../utils/logger.js';
18
+ import { getGlobalBackpressure } from '../../utils/adaptiveBackpressure.js';
19
+ import { getWriteBuffer } from '../../utils/writeBuffer.js';
20
+ import { getCoalescer } from '../../utils/requestCoalescer.js';
21
+ import { getShardIdFromUuid } from '../sharding.js';
22
+ // Azure Blob Storage API limits
23
+ const MAX_AZURE_PAGE_SIZE = 5000;
24
+ /**
25
+ * Native Azure Blob Storage adapter for server environments
26
+ * Uses the @azure/storage-blob library with DefaultAzureCredential
27
+ *
28
+ * Authentication priority:
29
+ * 1. DefaultAzureCredential (Managed Identity) - if no credentials provided
30
+ * 2. Connection String - if connectionString provided
31
+ * 3. Storage Account Key - if accountName + accountKey provided
32
+ * 4. SAS Token - if accountName + sasToken provided
33
+ */
34
+ export class AzureBlobStorage extends BaseStorage {
35
+ /**
36
+ * Initialize the storage adapter
37
+ * @param options Configuration options for Azure Blob Storage
38
+ */
39
+ constructor(options) {
40
+ super();
41
+ this.blobServiceClient = null;
42
+ this.containerClient = null;
43
+ // Statistics caching for better performance
44
+ this.statisticsCache = null;
45
+ // Backpressure and performance management
46
+ this.pendingOperations = 0;
47
+ this.consecutiveErrors = 0;
48
+ this.lastErrorReset = Date.now();
49
+ // Adaptive backpressure for automatic flow control
50
+ this.backpressure = getGlobalBackpressure();
51
+ // Write buffers for bulk operations
52
+ this.nounWriteBuffer = null;
53
+ this.verbWriteBuffer = null;
54
+ // Request coalescer for deduplication
55
+ this.requestCoalescer = null;
56
+ // High-volume mode detection
57
+ this.highVolumeMode = false;
58
+ this.lastVolumeCheck = 0;
59
+ this.volumeCheckInterval = 1000; // Check every second
60
+ this.forceHighVolumeMode = false; // Environment variable override
61
+ // Module logger
62
+ this.logger = createModuleLogger('AzureBlobStorage');
63
+ this.containerName = options.containerName;
64
+ this.connectionString = options.connectionString;
65
+ this.accountName = options.accountName;
66
+ this.accountKey = options.accountKey;
67
+ this.sasToken = options.sasToken;
68
+ this.readOnly = options.readOnly || false;
69
+ // Set up prefixes for different types of data using entity-based structure
70
+ this.nounPrefix = `${getDirectoryPath('noun', 'vector')}/`;
71
+ this.verbPrefix = `${getDirectoryPath('verb', 'vector')}/`;
72
+ this.metadataPrefix = `${getDirectoryPath('noun', 'metadata')}/`; // Noun metadata
73
+ this.verbMetadataPrefix = `${getDirectoryPath('verb', 'metadata')}/`; // Verb metadata
74
+ this.systemPrefix = `${SYSTEM_DIR}/`; // System data
75
+ // Initialize cache managers
76
+ this.nounCacheManager = new CacheManager(options.cacheConfig);
77
+ this.verbCacheManager = new CacheManager(options.cacheConfig);
78
+ // Check for high-volume mode override
79
+ if (typeof process !== 'undefined' && process.env?.BRAINY_FORCE_HIGH_VOLUME === 'true') {
80
+ this.forceHighVolumeMode = true;
81
+ this.highVolumeMode = true;
82
+ prodLog.info('🚀 High-volume mode FORCED via BRAINY_FORCE_HIGH_VOLUME environment variable');
83
+ }
84
+ }
85
+ /**
86
+ * Initialize the storage adapter
87
+ */
88
+ async init() {
89
+ if (this.isInitialized) {
90
+ return;
91
+ }
92
+ try {
93
+ // Import Azure Storage SDK only when needed
94
+ const { BlobServiceClient } = await import('@azure/storage-blob');
95
+ // Configure the Azure Blob Storage client based on available credentials
96
+ // Priority 1: Connection String
97
+ if (this.connectionString) {
98
+ this.blobServiceClient = BlobServiceClient.fromConnectionString(this.connectionString);
99
+ prodLog.info('🔐 Azure: Using Connection String');
100
+ }
101
+ // Priority 2: Account Name + Key
102
+ else if (this.accountName && this.accountKey) {
103
+ const { StorageSharedKeyCredential } = await import('@azure/storage-blob');
104
+ const sharedKeyCredential = new StorageSharedKeyCredential(this.accountName, this.accountKey);
105
+ this.blobServiceClient = new BlobServiceClient(`https://${this.accountName}.blob.core.windows.net`, sharedKeyCredential);
106
+ prodLog.info('🔐 Azure: Using Account Key');
107
+ }
108
+ // Priority 3: SAS Token
109
+ else if (this.accountName && this.sasToken) {
110
+ this.blobServiceClient = new BlobServiceClient(`https://${this.accountName}.blob.core.windows.net${this.sasToken}`);
111
+ prodLog.info('🔐 Azure: Using SAS Token');
112
+ }
113
+ // Priority 4: DefaultAzureCredential (Managed Identity)
114
+ else if (this.accountName) {
115
+ const { DefaultAzureCredential } = await import('@azure/identity');
116
+ const credential = new DefaultAzureCredential();
117
+ this.blobServiceClient = new BlobServiceClient(`https://${this.accountName}.blob.core.windows.net`, credential);
118
+ prodLog.info('🔐 Azure: Using DefaultAzureCredential (Managed Identity)');
119
+ }
120
+ else {
121
+ throw new Error('Azure Blob Storage requires either connectionString, accountName+accountKey, accountName+sasToken, or accountName (for Managed Identity)');
122
+ }
123
+ // Get reference to the container
124
+ this.containerClient = this.blobServiceClient.getContainerClient(this.containerName);
125
+ // Create container if it doesn't exist
126
+ const exists = await this.containerClient.exists();
127
+ if (!exists) {
128
+ await this.containerClient.create();
129
+ prodLog.info(`✅ Created Azure container: ${this.containerName}`);
130
+ }
131
+ else {
132
+ prodLog.info(`✅ Connected to Azure container: ${this.containerName}`);
133
+ }
134
+ // Initialize write buffers for high-volume mode
135
+ const storageId = `azure-${this.containerName}`;
136
+ this.nounWriteBuffer = getWriteBuffer(`${storageId}-nouns`, 'noun', async (items) => {
137
+ await this.flushNounBuffer(items);
138
+ });
139
+ this.verbWriteBuffer = getWriteBuffer(`${storageId}-verbs`, 'verb', async (items) => {
140
+ await this.flushVerbBuffer(items);
141
+ });
142
+ // Initialize request coalescer for deduplication
143
+ this.requestCoalescer = getCoalescer(storageId, async (batch) => {
144
+ // Process coalesced operations (placeholder for future optimization)
145
+ this.logger.trace(`Processing coalesced batch: ${batch.length} items`);
146
+ });
147
+ // Initialize counts from storage
148
+ await this.initializeCounts();
149
+ // Clear any stale cache entries from previous runs
150
+ prodLog.info('🧹 Clearing cache from previous run to prevent cache poisoning');
151
+ this.nounCacheManager.clear();
152
+ this.verbCacheManager.clear();
153
+ prodLog.info('✅ Cache cleared - starting fresh');
154
+ this.isInitialized = true;
155
+ }
156
+ catch (error) {
157
+ this.logger.error('Failed to initialize Azure Blob Storage:', error);
158
+ throw new Error(`Failed to initialize Azure Blob Storage: ${error}`);
159
+ }
160
+ }
161
+ /**
162
+ * Get the Azure blob name for a noun using UUID-based sharding
163
+ *
164
+ * Uses first 2 hex characters of UUID for consistent sharding.
165
+ * Path format: entities/nouns/vectors/{shardId}/{uuid}.json
166
+ *
167
+ * @example
168
+ * getNounKey('ab123456-1234-5678-9abc-def012345678')
169
+ * // returns 'entities/nouns/vectors/ab/ab123456-1234-5678-9abc-def012345678.json'
170
+ */
171
+ getNounKey(id) {
172
+ const shardId = getShardIdFromUuid(id);
173
+ return `${this.nounPrefix}${shardId}/${id}.json`;
174
+ }
175
+ /**
176
+ * Get the Azure blob name for a verb using UUID-based sharding
177
+ *
178
+ * Uses first 2 hex characters of UUID for consistent sharding.
179
+ * Path format: entities/verbs/vectors/{shardId}/{uuid}.json
180
+ *
181
+ * @example
182
+ * getVerbKey('cd987654-4321-8765-cba9-fed543210987')
183
+ * // returns 'entities/verbs/vectors/cd/cd987654-4321-8765-cba9-fed543210987.json'
184
+ */
185
+ getVerbKey(id) {
186
+ const shardId = getShardIdFromUuid(id);
187
+ return `${this.verbPrefix}${shardId}/${id}.json`;
188
+ }
189
+ /**
190
+ * Override base class method to detect Azure-specific throttling errors
191
+ */
192
+ isThrottlingError(error) {
193
+ // First check base class detection
194
+ if (super.isThrottlingError(error)) {
195
+ return true;
196
+ }
197
+ // Azure-specific throttling detection
198
+ const statusCode = error.statusCode || error.code;
199
+ const message = error.message?.toLowerCase() || '';
200
+ return (statusCode === 429 || // Too Many Requests
201
+ statusCode === 503 || // Service Unavailable
202
+ statusCode === 'ServerBusy' ||
203
+ statusCode === 'IngressOverLimit' ||
204
+ statusCode === 'EgressOverLimit' ||
205
+ message.includes('throttl') ||
206
+ message.includes('rate limit') ||
207
+ message.includes('too many requests'));
208
+ }
209
+ /**
210
+ * Override base class to enable smart batching for cloud storage
211
+ *
212
+ * Azure Blob Storage is cloud storage with network latency (~50ms per write).
213
+ * Smart batching reduces writes from 1000 ops → 100 batches.
214
+ *
215
+ * @returns true (Azure is cloud storage)
216
+ */
217
+ isCloudStorage() {
218
+ return true; // Azure benefits from batching
219
+ }
220
+ /**
221
+ * Apply backpressure before starting an operation
222
+ * @returns Request ID for tracking
223
+ */
224
+ async applyBackpressure() {
225
+ const requestId = `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
226
+ await this.backpressure.requestPermission(requestId, 1);
227
+ this.pendingOperations++;
228
+ return requestId;
229
+ }
230
+ /**
231
+ * Release backpressure after completing an operation
232
+ * @param success Whether the operation succeeded
233
+ * @param requestId Request ID from applyBackpressure()
234
+ */
235
+ releaseBackpressure(success = true, requestId) {
236
+ this.pendingOperations = Math.max(0, this.pendingOperations - 1);
237
+ if (requestId) {
238
+ this.backpressure.releasePermission(requestId, success);
239
+ }
240
+ }
241
+ /**
242
+ * Check if high-volume mode should be enabled
243
+ */
244
+ checkVolumeMode() {
245
+ if (this.forceHighVolumeMode) {
246
+ return; // Already forced on
247
+ }
248
+ const now = Date.now();
249
+ if (now - this.lastVolumeCheck < this.volumeCheckInterval) {
250
+ return;
251
+ }
252
+ this.lastVolumeCheck = now;
253
+ // Enable high-volume mode if we have many pending operations
254
+ const shouldEnable = this.pendingOperations > 20;
255
+ if (shouldEnable && !this.highVolumeMode) {
256
+ this.highVolumeMode = true;
257
+ prodLog.info('🚀 High-volume mode ENABLED (pending operations:', this.pendingOperations, ')');
258
+ }
259
+ else if (!shouldEnable && this.highVolumeMode && !this.forceHighVolumeMode) {
260
+ this.highVolumeMode = false;
261
+ prodLog.info('🐌 High-volume mode DISABLED (pending operations:', this.pendingOperations, ')');
262
+ }
263
+ }
264
+ /**
265
+ * Flush noun buffer to Azure
266
+ */
267
+ async flushNounBuffer(items) {
268
+ const writes = Array.from(items.values()).map(async (noun) => {
269
+ try {
270
+ await this.saveNodeDirect(noun);
271
+ }
272
+ catch (error) {
273
+ this.logger.error(`Failed to flush noun ${noun.id}:`, error);
274
+ }
275
+ });
276
+ await Promise.all(writes);
277
+ }
278
+ /**
279
+ * Flush verb buffer to Azure
280
+ */
281
+ async flushVerbBuffer(items) {
282
+ const writes = Array.from(items.values()).map(async (verb) => {
283
+ try {
284
+ await this.saveEdgeDirect(verb);
285
+ }
286
+ catch (error) {
287
+ this.logger.error(`Failed to flush verb ${verb.id}:`, error);
288
+ }
289
+ });
290
+ await Promise.all(writes);
291
+ }
292
+ /**
293
+ * Save a noun to storage (internal implementation)
294
+ */
295
+ async saveNoun_internal(noun) {
296
+ return this.saveNode(noun);
297
+ }
298
+ /**
299
+ * Save a node to storage
300
+ */
301
+ async saveNode(node) {
302
+ await this.ensureInitialized();
303
+ // ALWAYS check if we should use high-volume mode (critical for detection)
304
+ this.checkVolumeMode();
305
+ // Use write buffer in high-volume mode
306
+ if (this.highVolumeMode && this.nounWriteBuffer) {
307
+ this.logger.trace(`📝 BUFFERING: Adding noun ${node.id} to write buffer (high-volume mode active)`);
308
+ await this.nounWriteBuffer.add(node.id, node);
309
+ return;
310
+ }
311
+ else if (!this.highVolumeMode) {
312
+ this.logger.trace(`📝 DIRECT WRITE: Saving noun ${node.id} directly (high-volume mode inactive)`);
313
+ }
314
+ // Direct write in normal mode
315
+ await this.saveNodeDirect(node);
316
+ }
317
+ /**
318
+ * Save a node directly to Azure (bypass buffer)
319
+ */
320
+ async saveNodeDirect(node) {
321
+ // Apply backpressure before starting operation
322
+ const requestId = await this.applyBackpressure();
323
+ try {
324
+ this.logger.trace(`Saving node ${node.id}`);
325
+ // Convert connections Map to a serializable format
326
+ // CRITICAL: Only save lightweight vector data (no metadata)
327
+ // Metadata is saved separately via saveNounMetadata() (2-file system)
328
+ const serializableNode = {
329
+ id: node.id,
330
+ vector: node.vector,
331
+ connections: Object.fromEntries(Array.from(node.connections.entries()).map(([level, nounIds]) => [
332
+ level,
333
+ Array.from(nounIds)
334
+ ])),
335
+ level: node.level || 0
336
+ // NO metadata field - saved separately for scalability
337
+ };
338
+ // Get the Azure blob name with UUID-based sharding
339
+ const blobName = this.getNounKey(node.id);
340
+ // Save to Azure Blob Storage
341
+ const blockBlobClient = this.containerClient.getBlockBlobClient(blobName);
342
+ await blockBlobClient.upload(JSON.stringify(serializableNode, null, 2), JSON.stringify(serializableNode).length, {
343
+ blobHTTPHeaders: { blobContentType: 'application/json' }
344
+ });
345
+ // CRITICAL FIX: Only cache nodes with non-empty vectors
346
+ // This prevents cache pollution from HNSW's lazy-loading nodes (vector: [])
347
+ if (node.vector && Array.isArray(node.vector) && node.vector.length > 0) {
348
+ this.nounCacheManager.set(node.id, node);
349
+ }
350
+ // Note: Empty vectors are intentional during HNSW lazy mode - not logged
351
+ // Increment noun count
352
+ const metadata = await this.getNounMetadata(node.id);
353
+ if (metadata && metadata.type) {
354
+ await this.incrementEntityCountSafe(metadata.type);
355
+ }
356
+ this.logger.trace(`Node ${node.id} saved successfully`);
357
+ this.releaseBackpressure(true, requestId);
358
+ }
359
+ catch (error) {
360
+ this.releaseBackpressure(false, requestId);
361
+ // Handle throttling
362
+ if (this.isThrottlingError(error)) {
363
+ await this.handleThrottling(error);
364
+ throw error; // Re-throw for retry at higher level
365
+ }
366
+ this.logger.error(`Failed to save node ${node.id}:`, error);
367
+ throw new Error(`Failed to save node ${node.id}: ${error}`);
368
+ }
369
+ }
370
+ /**
371
+ * Get a noun from storage (internal implementation)
372
+ * v4.0.0: Returns ONLY vector data (no metadata field)
373
+ * Base class combines with metadata via getNoun() -> HNSWNounWithMetadata
374
+ */
375
+ async getNoun_internal(id) {
376
+ // v4.0.0: Return ONLY vector data (no metadata field)
377
+ const node = await this.getNode(id);
378
+ if (!node) {
379
+ return null;
380
+ }
381
+ // Return pure vector structure
382
+ return node;
383
+ }
384
+ /**
385
+ * Get a node from storage
386
+ */
387
+ async getNode(id) {
388
+ await this.ensureInitialized();
389
+ // Check cache first
390
+ const cached = await this.nounCacheManager.get(id);
391
+ // Validate cached object before returning
392
+ if (cached !== undefined && cached !== null) {
393
+ // Validate cached object has required fields (including non-empty vector!)
394
+ if (!cached.id || !cached.vector || !Array.isArray(cached.vector) || cached.vector.length === 0) {
395
+ // Invalid cache detected - log and auto-recover
396
+ prodLog.warn(`[Azure] Invalid cached object for ${id.substring(0, 8)} (${!cached.id ? 'missing id' :
397
+ !cached.vector ? 'missing vector' :
398
+ !Array.isArray(cached.vector) ? 'vector not array' :
399
+ 'empty vector'}) - removing from cache and reloading`);
400
+ this.nounCacheManager.delete(id);
401
+ // Fall through to load from Azure
402
+ }
403
+ else {
404
+ // Valid cache hit
405
+ this.logger.trace(`Cache hit for noun ${id}`);
406
+ return cached;
407
+ }
408
+ }
409
+ else if (cached === null) {
410
+ prodLog.warn(`[Azure] Cache contains null for ${id.substring(0, 8)} - reloading from storage`);
411
+ }
412
+ // Apply backpressure
413
+ const requestId = await this.applyBackpressure();
414
+ try {
415
+ this.logger.trace(`Getting node ${id}`);
416
+ // Get the Azure blob name with UUID-based sharding
417
+ const blobName = this.getNounKey(id);
418
+ // Download from Azure Blob Storage
419
+ const blockBlobClient = this.containerClient.getBlockBlobClient(blobName);
420
+ const downloadResponse = await blockBlobClient.download(0);
421
+ const downloaded = await this.streamToBuffer(downloadResponse.readableStreamBody);
422
+ // Parse JSON
423
+ const data = JSON.parse(downloaded.toString());
424
+ // Convert serialized connections back to Map<number, Set<string>>
425
+ const connections = new Map();
426
+ for (const [level, nounIds] of Object.entries(data.connections || {})) {
427
+ connections.set(Number(level), new Set(nounIds));
428
+ }
429
+ // CRITICAL: Only return lightweight vector data (no metadata)
430
+ // Metadata is retrieved separately via getNounMetadata() (2-file system)
431
+ const node = {
432
+ id: data.id,
433
+ vector: data.vector,
434
+ connections,
435
+ level: data.level || 0
436
+ // NO metadata field - retrieved separately for scalability
437
+ };
438
+ // CRITICAL FIX: Only cache valid nodes with non-empty vectors (never cache null or empty)
439
+ if (node && node.id && node.vector && Array.isArray(node.vector) && node.vector.length > 0) {
440
+ this.nounCacheManager.set(id, node);
441
+ }
442
+ else {
443
+ prodLog.warn(`[Azure] Not caching invalid node ${id.substring(0, 8)} (missing id/vector or empty vector)`);
444
+ }
445
+ this.logger.trace(`Successfully retrieved node ${id}`);
446
+ this.releaseBackpressure(true, requestId);
447
+ return node;
448
+ }
449
+ catch (error) {
450
+ this.releaseBackpressure(false, requestId);
451
+ // Check if this is a "not found" error
452
+ if (error.statusCode === 404 || error.code === 'BlobNotFound') {
453
+ this.logger.trace(`Node not found: ${id}`);
454
+ // CRITICAL FIX: Do NOT cache null values
455
+ return null;
456
+ }
457
+ // Handle throttling
458
+ if (this.isThrottlingError(error)) {
459
+ await this.handleThrottling(error);
460
+ throw error;
461
+ }
462
+ // All other errors should throw, not return null
463
+ this.logger.error(`Failed to get node ${id}:`, error);
464
+ throw BrainyError.fromError(error, `getNoun(${id})`);
465
+ }
466
+ }
467
+ /**
468
+ * Delete a noun from storage (internal implementation)
469
+ */
470
+ async deleteNoun_internal(id) {
471
+ await this.ensureInitialized();
472
+ const requestId = await this.applyBackpressure();
473
+ try {
474
+ this.logger.trace(`Deleting noun ${id}`);
475
+ // Get the Azure blob name
476
+ const blobName = this.getNounKey(id);
477
+ // Delete from Azure
478
+ const blockBlobClient = this.containerClient.getBlockBlobClient(blobName);
479
+ await blockBlobClient.delete();
480
+ // Remove from cache
481
+ this.nounCacheManager.delete(id);
482
+ // Decrement noun count
483
+ const metadata = await this.getNounMetadata(id);
484
+ if (metadata && metadata.type) {
485
+ await this.decrementEntityCountSafe(metadata.type);
486
+ }
487
+ this.logger.trace(`Noun ${id} deleted successfully`);
488
+ this.releaseBackpressure(true, requestId);
489
+ }
490
+ catch (error) {
491
+ this.releaseBackpressure(false, requestId);
492
+ if (error.statusCode === 404 || error.code === 'BlobNotFound') {
493
+ // Already deleted
494
+ this.logger.trace(`Noun ${id} not found (already deleted)`);
495
+ return;
496
+ }
497
+ // Handle throttling
498
+ if (this.isThrottlingError(error)) {
499
+ await this.handleThrottling(error);
500
+ throw error;
501
+ }
502
+ this.logger.error(`Failed to delete noun ${id}:`, error);
503
+ throw new Error(`Failed to delete noun ${id}: ${error}`);
504
+ }
505
+ }
506
+ /**
507
+ * Write an object to a specific path in Azure
508
+ * Primitive operation required by base class
509
+ * @protected
510
+ */
511
+ async writeObjectToPath(path, data) {
512
+ await this.ensureInitialized();
513
+ try {
514
+ this.logger.trace(`Writing object to path: ${path}`);
515
+ const blockBlobClient = this.containerClient.getBlockBlobClient(path);
516
+ const content = JSON.stringify(data, null, 2);
517
+ await blockBlobClient.upload(content, content.length, {
518
+ blobHTTPHeaders: { blobContentType: 'application/json' }
519
+ });
520
+ this.logger.trace(`Object written successfully to ${path}`);
521
+ }
522
+ catch (error) {
523
+ this.logger.error(`Failed to write object to ${path}:`, error);
524
+ throw new Error(`Failed to write object to ${path}: ${error}`);
525
+ }
526
+ }
527
+ /**
528
+ * Read an object from a specific path in Azure
529
+ * Primitive operation required by base class
530
+ * @protected
531
+ */
532
+ async readObjectFromPath(path) {
533
+ await this.ensureInitialized();
534
+ try {
535
+ this.logger.trace(`Reading object from path: ${path}`);
536
+ const blockBlobClient = this.containerClient.getBlockBlobClient(path);
537
+ const downloadResponse = await blockBlobClient.download(0);
538
+ const downloaded = await this.streamToBuffer(downloadResponse.readableStreamBody);
539
+ const data = JSON.parse(downloaded.toString());
540
+ this.logger.trace(`Object read successfully from ${path}`);
541
+ return data;
542
+ }
543
+ catch (error) {
544
+ // Check if this is a "not found" error
545
+ if (error.statusCode === 404 || error.code === 'BlobNotFound') {
546
+ this.logger.trace(`Object not found at ${path}`);
547
+ return null;
548
+ }
549
+ this.logger.error(`Failed to read object from ${path}:`, error);
550
+ throw BrainyError.fromError(error, `readObjectFromPath(${path})`);
551
+ }
552
+ }
553
+ /**
554
+ * Delete an object from a specific path in Azure
555
+ * Primitive operation required by base class
556
+ * @protected
557
+ */
558
+ async deleteObjectFromPath(path) {
559
+ await this.ensureInitialized();
560
+ try {
561
+ this.logger.trace(`Deleting object at path: ${path}`);
562
+ const blockBlobClient = this.containerClient.getBlockBlobClient(path);
563
+ await blockBlobClient.delete();
564
+ this.logger.trace(`Object deleted successfully from ${path}`);
565
+ }
566
+ catch (error) {
567
+ // If already deleted (404), treat as success
568
+ if (error.statusCode === 404 || error.code === 'BlobNotFound') {
569
+ this.logger.trace(`Object at ${path} not found (already deleted)`);
570
+ return;
571
+ }
572
+ this.logger.error(`Failed to delete object from ${path}:`, error);
573
+ throw new Error(`Failed to delete object from ${path}: ${error}`);
574
+ }
575
+ }
576
+ /**
577
+ * Batch delete multiple blobs from Azure Blob Storage
578
+ * Deletes up to 256 blobs per batch (Azure limit)
579
+ * Handles throttling, retries, and partial failures
580
+ *
581
+ * @param keys - Array of blob names (paths) to delete
582
+ * @param options - Configuration options for batch deletion
583
+ * @returns Statistics about successful and failed deletions
584
+ */
585
+ async batchDelete(keys, options = {}) {
586
+ await this.ensureInitialized();
587
+ const { maxRetries = 3, retryDelayMs = 1000, continueOnError = true } = options;
588
+ if (!keys || keys.length === 0) {
589
+ return {
590
+ totalRequested: 0,
591
+ successfulDeletes: 0,
592
+ failedDeletes: 0,
593
+ errors: []
594
+ };
595
+ }
596
+ this.logger.info(`Starting batch delete of ${keys.length} blobs`);
597
+ const stats = {
598
+ totalRequested: keys.length,
599
+ successfulDeletes: 0,
600
+ failedDeletes: 0,
601
+ errors: []
602
+ };
603
+ // Chunk keys into batches of max 256 (Azure limit)
604
+ const MAX_BATCH_SIZE = 256;
605
+ const batches = [];
606
+ for (let i = 0; i < keys.length; i += MAX_BATCH_SIZE) {
607
+ batches.push(keys.slice(i, i + MAX_BATCH_SIZE));
608
+ }
609
+ this.logger.debug(`Split ${keys.length} keys into ${batches.length} batches`);
610
+ // Process each batch
611
+ for (let batchIndex = 0; batchIndex < batches.length; batchIndex++) {
612
+ const batch = batches[batchIndex];
613
+ let retryCount = 0;
614
+ let batchSuccess = false;
615
+ while (retryCount <= maxRetries && !batchSuccess) {
616
+ const requestId = await this.applyBackpressure();
617
+ try {
618
+ const { BlobBatchClient } = await import('@azure/storage-blob');
619
+ this.logger.debug(`Processing batch ${batchIndex + 1}/${batches.length} with ${batch.length} blobs (attempt ${retryCount + 1}/${maxRetries + 1})`);
620
+ // Create batch client
621
+ const batchClient = this.containerClient.getBlobBatchClient();
622
+ // Execute batch delete
623
+ const deletePromises = batch.map((key) => {
624
+ const blobClient = this.containerClient.getBlockBlobClient(key);
625
+ return blobClient.url;
626
+ });
627
+ // Use batch delete
628
+ const batchDeleteResponse = await batchClient.deleteBlobs(batch.map(key => this.containerClient.getBlockBlobClient(key).url), {
629
+ // Additional options can be added here
630
+ });
631
+ this.logger.debug(`Batch ${batchIndex + 1} completed`);
632
+ // Process results
633
+ for (let i = 0; i < batch.length; i++) {
634
+ const key = batch[i];
635
+ const subResponse = batchDeleteResponse.subResponses[i];
636
+ if (subResponse.status === 202 || subResponse.status === 404) {
637
+ // 202 Accepted = successful delete
638
+ // 404 Not Found = already deleted (treat as success)
639
+ stats.successfulDeletes++;
640
+ if (subResponse.status === 404) {
641
+ this.logger.trace(`Blob ${key} already deleted (404)`);
642
+ }
643
+ }
644
+ else {
645
+ // Deletion failed
646
+ stats.failedDeletes++;
647
+ stats.errors.push({
648
+ key,
649
+ error: `HTTP ${subResponse.status}: ${subResponse.errorCode || 'Unknown error'}`
650
+ });
651
+ this.logger.error(`Failed to delete ${key}: ${subResponse.status} - ${subResponse.errorCode}`);
652
+ }
653
+ }
654
+ this.releaseBackpressure(true, requestId);
655
+ batchSuccess = true;
656
+ }
657
+ catch (error) {
658
+ this.releaseBackpressure(false, requestId);
659
+ // Handle throttling
660
+ if (this.isThrottlingError(error)) {
661
+ this.logger.warn(`Batch ${batchIndex + 1} throttled, waiting before retry...`);
662
+ await this.handleThrottling(error);
663
+ retryCount++;
664
+ if (retryCount <= maxRetries) {
665
+ const delay = retryDelayMs * Math.pow(2, retryCount - 1); // Exponential backoff
666
+ await new Promise((resolve) => setTimeout(resolve, delay));
667
+ }
668
+ continue;
669
+ }
670
+ // Handle other errors
671
+ this.logger.error(`Batch ${batchIndex + 1} failed (attempt ${retryCount + 1}/${maxRetries + 1}):`, error);
672
+ if (retryCount < maxRetries) {
673
+ retryCount++;
674
+ const delay = retryDelayMs * Math.pow(2, retryCount - 1);
675
+ await new Promise((resolve) => setTimeout(resolve, delay));
676
+ continue;
677
+ }
678
+ // Max retries exceeded
679
+ if (continueOnError) {
680
+ // Mark all keys in this batch as failed and continue to next batch
681
+ for (const key of batch) {
682
+ stats.failedDeletes++;
683
+ stats.errors.push({
684
+ key,
685
+ error: error.message || String(error)
686
+ });
687
+ }
688
+ this.logger.error(`Batch ${batchIndex + 1} failed after ${maxRetries} retries, continuing to next batch`);
689
+ batchSuccess = true; // Mark as "handled" to move to next batch
690
+ }
691
+ else {
692
+ // Stop processing and throw error
693
+ throw BrainyError.storage(`Batch delete failed at batch ${batchIndex + 1}/${batches.length} after ${maxRetries} retries. Total: ${stats.successfulDeletes} deleted, ${stats.failedDeletes} failed`, error instanceof Error ? error : undefined);
694
+ }
695
+ }
696
+ }
697
+ }
698
+ this.logger.info(`Batch delete completed: ${stats.successfulDeletes}/${stats.totalRequested} successful, ${stats.failedDeletes} failed`);
699
+ return stats;
700
+ }
701
+ /**
702
+ * List all objects under a specific prefix in Azure
703
+ * Primitive operation required by base class
704
+ * @protected
705
+ */
706
+ async listObjectsUnderPath(prefix) {
707
+ await this.ensureInitialized();
708
+ try {
709
+ this.logger.trace(`Listing objects under prefix: ${prefix}`);
710
+ const paths = [];
711
+ for await (const blob of this.containerClient.listBlobsFlat({ prefix })) {
712
+ if (blob.name) {
713
+ paths.push(blob.name);
714
+ }
715
+ }
716
+ this.logger.trace(`Found ${paths.length} objects under ${prefix}`);
717
+ return paths;
718
+ }
719
+ catch (error) {
720
+ this.logger.error(`Failed to list objects under ${prefix}:`, error);
721
+ throw new Error(`Failed to list objects under ${prefix}: ${error}`);
722
+ }
723
+ }
724
+ /**
725
+ * Helper: Convert Azure stream to buffer
726
+ */
727
+ async streamToBuffer(readableStream) {
728
+ return new Promise((resolve, reject) => {
729
+ const chunks = [];
730
+ readableStream.on('data', (data) => {
731
+ chunks.push(data instanceof Buffer ? data : Buffer.from(data));
732
+ });
733
+ readableStream.on('end', () => {
734
+ resolve(Buffer.concat(chunks));
735
+ });
736
+ readableStream.on('error', reject);
737
+ });
738
+ }
739
+ /**
740
+ * Save a verb to storage (internal implementation)
741
+ */
742
+ async saveVerb_internal(verb) {
743
+ return this.saveEdge(verb);
744
+ }
745
+ /**
746
+ * Save an edge to storage
747
+ */
748
+ async saveEdge(edge) {
749
+ await this.ensureInitialized();
750
+ // Check volume mode
751
+ this.checkVolumeMode();
752
+ // Use write buffer in high-volume mode
753
+ if (this.highVolumeMode && this.verbWriteBuffer) {
754
+ this.logger.trace(`📝 BUFFERING: Adding verb ${edge.id} to write buffer`);
755
+ await this.verbWriteBuffer.add(edge.id, edge);
756
+ return;
757
+ }
758
+ // Direct write in normal mode
759
+ await this.saveEdgeDirect(edge);
760
+ }
761
+ /**
762
+ * Save an edge directly to Azure (bypass buffer)
763
+ */
764
+ async saveEdgeDirect(edge) {
765
+ const requestId = await this.applyBackpressure();
766
+ try {
767
+ this.logger.trace(`Saving edge ${edge.id}`);
768
+ // Convert connections Map to serializable format
769
+ // ARCHITECTURAL FIX: Include core relational fields in verb vector file
770
+ // These fields are essential for 90% of operations - no metadata lookup needed
771
+ const serializableEdge = {
772
+ id: edge.id,
773
+ vector: edge.vector,
774
+ connections: Object.fromEntries(Array.from(edge.connections.entries()).map(([level, verbIds]) => [
775
+ level,
776
+ Array.from(verbIds)
777
+ ])),
778
+ // CORE RELATIONAL DATA (v4.0.0)
779
+ verb: edge.verb,
780
+ sourceId: edge.sourceId,
781
+ targetId: edge.targetId,
782
+ // User metadata (if any) - saved separately for scalability
783
+ // metadata field is saved separately via saveVerbMetadata()
784
+ };
785
+ // Get the Azure blob name with UUID-based sharding
786
+ const blobName = this.getVerbKey(edge.id);
787
+ // Save to Azure
788
+ const blockBlobClient = this.containerClient.getBlockBlobClient(blobName);
789
+ await blockBlobClient.upload(JSON.stringify(serializableEdge, null, 2), JSON.stringify(serializableEdge).length, {
790
+ blobHTTPHeaders: { blobContentType: 'application/json' }
791
+ });
792
+ // Update cache
793
+ this.verbCacheManager.set(edge.id, edge);
794
+ // Increment verb count
795
+ const metadata = await this.getVerbMetadata(edge.id);
796
+ if (metadata && metadata.type) {
797
+ await this.incrementVerbCount(metadata.type);
798
+ }
799
+ this.logger.trace(`Edge ${edge.id} saved successfully`);
800
+ this.releaseBackpressure(true, requestId);
801
+ }
802
+ catch (error) {
803
+ this.releaseBackpressure(false, requestId);
804
+ if (this.isThrottlingError(error)) {
805
+ await this.handleThrottling(error);
806
+ throw error;
807
+ }
808
+ this.logger.error(`Failed to save edge ${edge.id}:`, error);
809
+ throw new Error(`Failed to save edge ${edge.id}: ${error}`);
810
+ }
811
+ }
812
+ /**
813
+ * Get a verb from storage (internal implementation)
814
+ * v4.0.0: Returns ONLY vector + core relational fields (no metadata field)
815
+ * Base class combines with metadata via getVerb() -> HNSWVerbWithMetadata
816
+ */
817
+ async getVerb_internal(id) {
818
+ // v4.0.0: Return ONLY vector + core relational data (no metadata field)
819
+ const edge = await this.getEdge(id);
820
+ if (!edge) {
821
+ return null;
822
+ }
823
+ // Return pure vector + core fields structure
824
+ return edge;
825
+ }
826
+ /**
827
+ * Get an edge from storage
828
+ */
829
+ async getEdge(id) {
830
+ await this.ensureInitialized();
831
+ // Check cache first
832
+ const cached = this.verbCacheManager.get(id);
833
+ if (cached) {
834
+ this.logger.trace(`Cache hit for verb ${id}`);
835
+ return cached;
836
+ }
837
+ const requestId = await this.applyBackpressure();
838
+ try {
839
+ this.logger.trace(`Getting edge ${id}`);
840
+ // Get the Azure blob name with UUID-based sharding
841
+ const blobName = this.getVerbKey(id);
842
+ // Download from Azure
843
+ const blockBlobClient = this.containerClient.getBlockBlobClient(blobName);
844
+ const downloadResponse = await blockBlobClient.download(0);
845
+ const downloaded = await this.streamToBuffer(downloadResponse.readableStreamBody);
846
+ // Parse JSON
847
+ const data = JSON.parse(downloaded.toString());
848
+ // Convert serialized connections back to Map
849
+ const connections = new Map();
850
+ for (const [level, verbIds] of Object.entries(data.connections || {})) {
851
+ connections.set(Number(level), new Set(verbIds));
852
+ }
853
+ // v4.0.0: Return HNSWVerb with core relational fields (NO metadata field)
854
+ const edge = {
855
+ id: data.id,
856
+ vector: data.vector,
857
+ connections,
858
+ // CORE RELATIONAL DATA (read from vector file)
859
+ verb: data.verb,
860
+ sourceId: data.sourceId,
861
+ targetId: data.targetId
862
+ // ✅ NO metadata field in v4.0.0
863
+ // User metadata retrieved separately via getVerbMetadata()
864
+ };
865
+ // Update cache
866
+ this.verbCacheManager.set(id, edge);
867
+ this.logger.trace(`Successfully retrieved edge ${id}`);
868
+ this.releaseBackpressure(true, requestId);
869
+ return edge;
870
+ }
871
+ catch (error) {
872
+ this.releaseBackpressure(false, requestId);
873
+ // Check if this is a "not found" error
874
+ if (error.statusCode === 404 || error.code === 'BlobNotFound') {
875
+ this.logger.trace(`Edge not found: ${id}`);
876
+ return null;
877
+ }
878
+ if (this.isThrottlingError(error)) {
879
+ await this.handleThrottling(error);
880
+ throw error;
881
+ }
882
+ this.logger.error(`Failed to get edge ${id}:`, error);
883
+ throw BrainyError.fromError(error, `getVerb(${id})`);
884
+ }
885
+ }
886
+ /**
887
+ * Delete a verb from storage (internal implementation)
888
+ */
889
+ async deleteVerb_internal(id) {
890
+ await this.ensureInitialized();
891
+ const requestId = await this.applyBackpressure();
892
+ try {
893
+ this.logger.trace(`Deleting verb ${id}`);
894
+ // Get the Azure blob name
895
+ const blobName = this.getVerbKey(id);
896
+ // Delete from Azure
897
+ const blockBlobClient = this.containerClient.getBlockBlobClient(blobName);
898
+ await blockBlobClient.delete();
899
+ // Remove from cache
900
+ this.verbCacheManager.delete(id);
901
+ // Decrement verb count
902
+ const metadata = await this.getVerbMetadata(id);
903
+ if (metadata && metadata.type) {
904
+ await this.decrementVerbCount(metadata.type);
905
+ }
906
+ this.logger.trace(`Verb ${id} deleted successfully`);
907
+ this.releaseBackpressure(true, requestId);
908
+ }
909
+ catch (error) {
910
+ this.releaseBackpressure(false, requestId);
911
+ if (error.statusCode === 404 || error.code === 'BlobNotFound') {
912
+ // Already deleted
913
+ this.logger.trace(`Verb ${id} not found (already deleted)`);
914
+ return;
915
+ }
916
+ if (this.isThrottlingError(error)) {
917
+ await this.handleThrottling(error);
918
+ throw error;
919
+ }
920
+ this.logger.error(`Failed to delete verb ${id}:`, error);
921
+ throw new Error(`Failed to delete verb ${id}: ${error}`);
922
+ }
923
+ }
924
+ /**
925
+ * Get nouns with pagination
926
+ * v4.0.0: Returns HNSWNounWithMetadata[] (includes metadata field)
927
+ * Iterates through all UUID-based shards (00-ff) for consistent pagination
928
+ */
929
+ async getNounsWithPagination(options = {}) {
930
+ await this.ensureInitialized();
931
+ const limit = options.limit || 100;
932
+ // Simplified implementation for Azure (can be optimized similar to GCS)
933
+ const items = [];
934
+ const iterator = this.containerClient.listBlobsFlat({ prefix: this.nounPrefix });
935
+ let count = 0;
936
+ for await (const blob of iterator) {
937
+ if (count >= limit)
938
+ break;
939
+ if (!blob.name || !blob.name.endsWith('.json'))
940
+ continue;
941
+ // Extract UUID from blob name
942
+ const parts = blob.name.split('/');
943
+ const fileName = parts[parts.length - 1];
944
+ const id = fileName.replace('.json', '');
945
+ const node = await this.getNode(id);
946
+ if (!node)
947
+ continue;
948
+ const metadata = await this.getNounMetadata(id);
949
+ if (!metadata)
950
+ continue;
951
+ // Apply filters if provided
952
+ if (options.filter) {
953
+ if (options.filter.nounType) {
954
+ const nounTypes = Array.isArray(options.filter.nounType)
955
+ ? options.filter.nounType
956
+ : [options.filter.nounType];
957
+ const nounType = metadata.type || metadata.noun;
958
+ if (!nounType || !nounTypes.includes(nounType)) {
959
+ continue;
960
+ }
961
+ }
962
+ }
963
+ // Combine node with metadata
964
+ items.push({
965
+ ...node,
966
+ metadata
967
+ });
968
+ count++;
969
+ }
970
+ return {
971
+ items,
972
+ totalCount: this.totalNounCount,
973
+ hasMore: false,
974
+ nextCursor: undefined
975
+ };
976
+ }
977
+ /**
978
+ * Get nouns by noun type (internal implementation)
979
+ */
980
+ async getNounsByNounType_internal(nounType) {
981
+ const result = await this.getNounsWithPagination({
982
+ limit: 10000, // Large limit for backward compatibility
983
+ filter: { nounType }
984
+ });
985
+ return result.items;
986
+ }
987
+ /**
988
+ * Get verbs by source ID (internal implementation)
989
+ */
990
+ async getVerbsBySource_internal(sourceId) {
991
+ // Simplified: scan all verbs and filter
992
+ const items = [];
993
+ const iterator = this.containerClient.listBlobsFlat({ prefix: this.verbPrefix });
994
+ for await (const blob of iterator) {
995
+ if (!blob.name || !blob.name.endsWith('.json'))
996
+ continue;
997
+ const parts = blob.name.split('/');
998
+ const fileName = parts[parts.length - 1];
999
+ const id = fileName.replace('.json', '');
1000
+ const verb = await this.getEdge(id);
1001
+ if (!verb || verb.sourceId !== sourceId)
1002
+ continue;
1003
+ const metadata = await this.getVerbMetadata(id);
1004
+ items.push({
1005
+ ...verb,
1006
+ metadata: metadata || {}
1007
+ });
1008
+ }
1009
+ return items;
1010
+ }
1011
+ /**
1012
+ * Get verbs by target ID (internal implementation)
1013
+ */
1014
+ async getVerbsByTarget_internal(targetId) {
1015
+ // Simplified: scan all verbs and filter
1016
+ const items = [];
1017
+ const iterator = this.containerClient.listBlobsFlat({ prefix: this.verbPrefix });
1018
+ for await (const blob of iterator) {
1019
+ if (!blob.name || !blob.name.endsWith('.json'))
1020
+ continue;
1021
+ const parts = blob.name.split('/');
1022
+ const fileName = parts[parts.length - 1];
1023
+ const id = fileName.replace('.json', '');
1024
+ const verb = await this.getEdge(id);
1025
+ if (!verb || verb.targetId !== targetId)
1026
+ continue;
1027
+ const metadata = await this.getVerbMetadata(id);
1028
+ items.push({
1029
+ ...verb,
1030
+ metadata: metadata || {}
1031
+ });
1032
+ }
1033
+ return items;
1034
+ }
1035
+ /**
1036
+ * Get verbs by type (internal implementation)
1037
+ */
1038
+ async getVerbsByType_internal(type) {
1039
+ // Simplified: scan all verbs and filter
1040
+ const items = [];
1041
+ const iterator = this.containerClient.listBlobsFlat({ prefix: this.verbPrefix });
1042
+ for await (const blob of iterator) {
1043
+ if (!blob.name || !blob.name.endsWith('.json'))
1044
+ continue;
1045
+ const parts = blob.name.split('/');
1046
+ const fileName = parts[parts.length - 1];
1047
+ const id = fileName.replace('.json', '');
1048
+ const verb = await this.getEdge(id);
1049
+ if (!verb || verb.verb !== type)
1050
+ continue;
1051
+ const metadata = await this.getVerbMetadata(id);
1052
+ items.push({
1053
+ ...verb,
1054
+ metadata: metadata || {}
1055
+ });
1056
+ }
1057
+ return items;
1058
+ }
1059
+ /**
1060
+ * Clear all data from storage
1061
+ */
1062
+ async clear() {
1063
+ await this.ensureInitialized();
1064
+ try {
1065
+ this.logger.info('🧹 Clearing all data from Azure container...');
1066
+ // Delete all blobs in container
1067
+ for await (const blob of this.containerClient.listBlobsFlat()) {
1068
+ if (blob.name) {
1069
+ const blockBlobClient = this.containerClient.getBlockBlobClient(blob.name);
1070
+ await blockBlobClient.delete();
1071
+ }
1072
+ }
1073
+ // Clear caches
1074
+ this.nounCacheManager.clear();
1075
+ this.verbCacheManager.clear();
1076
+ // Reset counts
1077
+ this.totalNounCount = 0;
1078
+ this.totalVerbCount = 0;
1079
+ this.entityCounts.clear();
1080
+ this.verbCounts.clear();
1081
+ this.logger.info('✅ All data cleared from Azure');
1082
+ }
1083
+ catch (error) {
1084
+ this.logger.error('Failed to clear Azure storage:', error);
1085
+ throw new Error(`Failed to clear Azure storage: ${error}`);
1086
+ }
1087
+ }
1088
+ /**
1089
+ * Get storage status
1090
+ */
1091
+ async getStorageStatus() {
1092
+ await this.ensureInitialized();
1093
+ try {
1094
+ const properties = await this.containerClient.getProperties();
1095
+ return {
1096
+ type: 'azure',
1097
+ used: 0, // Azure doesn't provide usage info easily
1098
+ quota: null, // No quota in Azure Blob Storage
1099
+ details: {
1100
+ container: this.containerName,
1101
+ lastModified: properties.lastModified,
1102
+ etag: properties.etag
1103
+ }
1104
+ };
1105
+ }
1106
+ catch (error) {
1107
+ this.logger.error('Failed to get storage status:', error);
1108
+ return {
1109
+ type: 'azure',
1110
+ used: 0,
1111
+ quota: null
1112
+ };
1113
+ }
1114
+ }
1115
+ /**
1116
+ * Save statistics data to storage
1117
+ */
1118
+ async saveStatisticsData(statistics) {
1119
+ await this.ensureInitialized();
1120
+ try {
1121
+ const key = `${this.systemPrefix}${STATISTICS_KEY}.json`;
1122
+ this.logger.trace(`Saving statistics to ${key}`);
1123
+ const blockBlobClient = this.containerClient.getBlockBlobClient(key);
1124
+ const content = JSON.stringify(statistics, null, 2);
1125
+ await blockBlobClient.upload(content, content.length, {
1126
+ blobHTTPHeaders: { blobContentType: 'application/json' }
1127
+ });
1128
+ this.logger.trace('Statistics saved successfully');
1129
+ }
1130
+ catch (error) {
1131
+ this.logger.error('Failed to save statistics:', error);
1132
+ throw new Error(`Failed to save statistics: ${error}`);
1133
+ }
1134
+ }
1135
+ /**
1136
+ * Get statistics data from storage
1137
+ */
1138
+ async getStatisticsData() {
1139
+ await this.ensureInitialized();
1140
+ try {
1141
+ const key = `${this.systemPrefix}${STATISTICS_KEY}.json`;
1142
+ this.logger.trace(`Getting statistics from ${key}`);
1143
+ const blockBlobClient = this.containerClient.getBlockBlobClient(key);
1144
+ const downloadResponse = await blockBlobClient.download(0);
1145
+ const downloaded = await this.streamToBuffer(downloadResponse.readableStreamBody);
1146
+ const statistics = JSON.parse(downloaded.toString());
1147
+ this.logger.trace('Statistics retrieved successfully');
1148
+ // CRITICAL FIX: Populate totalNodes and totalEdges from in-memory counts
1149
+ return {
1150
+ ...statistics,
1151
+ totalNodes: this.totalNounCount,
1152
+ totalEdges: this.totalVerbCount,
1153
+ lastUpdated: new Date().toISOString()
1154
+ };
1155
+ }
1156
+ catch (error) {
1157
+ if (error.statusCode === 404 || error.code === 'BlobNotFound') {
1158
+ // Statistics file doesn't exist yet (first restart)
1159
+ this.logger.trace('Statistics file not found - returning minimal stats with counts');
1160
+ return {
1161
+ nounCount: {},
1162
+ verbCount: {},
1163
+ metadataCount: {},
1164
+ hnswIndexSize: 0,
1165
+ totalNodes: this.totalNounCount,
1166
+ totalEdges: this.totalVerbCount,
1167
+ totalMetadata: 0,
1168
+ lastUpdated: new Date().toISOString()
1169
+ };
1170
+ }
1171
+ this.logger.error('Failed to get statistics:', error);
1172
+ return null;
1173
+ }
1174
+ }
1175
+ /**
1176
+ * Initialize counts from storage
1177
+ */
1178
+ async initializeCounts() {
1179
+ const key = `${this.systemPrefix}counts.json`;
1180
+ try {
1181
+ const blockBlobClient = this.containerClient.getBlockBlobClient(key);
1182
+ const downloadResponse = await blockBlobClient.download(0);
1183
+ const downloaded = await this.streamToBuffer(downloadResponse.readableStreamBody);
1184
+ const counts = JSON.parse(downloaded.toString());
1185
+ this.totalNounCount = counts.totalNounCount || 0;
1186
+ this.totalVerbCount = counts.totalVerbCount || 0;
1187
+ this.entityCounts = new Map(Object.entries(counts.entityCounts || {}));
1188
+ this.verbCounts = new Map(Object.entries(counts.verbCounts || {}));
1189
+ prodLog.info(`📊 Loaded counts from storage: ${this.totalNounCount} nouns, ${this.totalVerbCount} verbs`);
1190
+ }
1191
+ catch (error) {
1192
+ if (error.statusCode === 404 || error.code === 'BlobNotFound') {
1193
+ // No counts file yet - initialize from scan (first-time setup)
1194
+ prodLog.info('📊 No counts file found - this is normal for first init');
1195
+ await this.initializeCountsFromScan();
1196
+ }
1197
+ else {
1198
+ // CRITICAL FIX: Don't silently fail on network/permission errors
1199
+ this.logger.error('❌ CRITICAL: Failed to load counts from Azure:', error);
1200
+ prodLog.error(`❌ Error loading ${key}: ${error.message}`);
1201
+ // Try to recover by scanning the container
1202
+ prodLog.warn('⚠️ Attempting recovery by scanning Azure container...');
1203
+ await this.initializeCountsFromScan();
1204
+ }
1205
+ }
1206
+ }
1207
+ /**
1208
+ * Initialize counts from storage scan (expensive - only for first-time init)
1209
+ */
1210
+ async initializeCountsFromScan() {
1211
+ try {
1212
+ prodLog.info('📊 Scanning Azure container to initialize counts...');
1213
+ // Count nouns
1214
+ let nounCount = 0;
1215
+ for await (const blob of this.containerClient.listBlobsFlat({ prefix: this.nounPrefix })) {
1216
+ if (blob.name && blob.name.endsWith('.json')) {
1217
+ nounCount++;
1218
+ }
1219
+ }
1220
+ this.totalNounCount = nounCount;
1221
+ // Count verbs
1222
+ let verbCount = 0;
1223
+ for await (const blob of this.containerClient.listBlobsFlat({ prefix: this.verbPrefix })) {
1224
+ if (blob.name && blob.name.endsWith('.json')) {
1225
+ verbCount++;
1226
+ }
1227
+ }
1228
+ this.totalVerbCount = verbCount;
1229
+ // Save initial counts
1230
+ if (this.totalNounCount > 0 || this.totalVerbCount > 0) {
1231
+ await this.persistCounts();
1232
+ prodLog.info(`✅ Initialized counts from scan: ${this.totalNounCount} nouns, ${this.totalVerbCount} verbs`);
1233
+ }
1234
+ else {
1235
+ prodLog.warn(`⚠️ No entities found during container scan. Check that entities exist and prefixes are correct.`);
1236
+ }
1237
+ }
1238
+ catch (error) {
1239
+ // CRITICAL FIX: Don't silently fail - this prevents data loss scenarios
1240
+ this.logger.error('❌ CRITICAL: Failed to initialize counts from Azure container scan:', error);
1241
+ throw new Error(`Failed to initialize Azure storage counts: ${error}. This prevents container restarts from working correctly.`);
1242
+ }
1243
+ }
1244
+ /**
1245
+ * Persist counts to storage
1246
+ */
1247
+ async persistCounts() {
1248
+ try {
1249
+ const key = `${this.systemPrefix}counts.json`;
1250
+ const counts = {
1251
+ totalNounCount: this.totalNounCount,
1252
+ totalVerbCount: this.totalVerbCount,
1253
+ entityCounts: Object.fromEntries(this.entityCounts),
1254
+ verbCounts: Object.fromEntries(this.verbCounts),
1255
+ lastUpdated: new Date().toISOString()
1256
+ };
1257
+ const blockBlobClient = this.containerClient.getBlockBlobClient(key);
1258
+ const content = JSON.stringify(counts, null, 2);
1259
+ await blockBlobClient.upload(content, content.length, {
1260
+ blobHTTPHeaders: { blobContentType: 'application/json' }
1261
+ });
1262
+ }
1263
+ catch (error) {
1264
+ this.logger.error('Error persisting counts:', error);
1265
+ }
1266
+ }
1267
+ /**
1268
+ * Get a noun's vector for HNSW rebuild
1269
+ */
1270
+ async getNounVector(id) {
1271
+ await this.ensureInitialized();
1272
+ const noun = await this.getNode(id);
1273
+ return noun ? noun.vector : null;
1274
+ }
1275
+ /**
1276
+ * Save HNSW graph data for a noun
1277
+ */
1278
+ async saveHNSWData(nounId, hnswData) {
1279
+ await this.ensureInitialized();
1280
+ try {
1281
+ const shard = getShardIdFromUuid(nounId);
1282
+ const key = `entities/nouns/hnsw/${shard}/${nounId}.json`;
1283
+ const blockBlobClient = this.containerClient.getBlockBlobClient(key);
1284
+ const content = JSON.stringify(hnswData, null, 2);
1285
+ await blockBlobClient.upload(content, content.length, {
1286
+ blobHTTPHeaders: { blobContentType: 'application/json' }
1287
+ });
1288
+ }
1289
+ catch (error) {
1290
+ this.logger.error(`Failed to save HNSW data for ${nounId}:`, error);
1291
+ throw new Error(`Failed to save HNSW data for ${nounId}: ${error}`);
1292
+ }
1293
+ }
1294
+ /**
1295
+ * Get HNSW graph data for a noun
1296
+ */
1297
+ async getHNSWData(nounId) {
1298
+ await this.ensureInitialized();
1299
+ try {
1300
+ const shard = getShardIdFromUuid(nounId);
1301
+ const key = `entities/nouns/hnsw/${shard}/${nounId}.json`;
1302
+ const blockBlobClient = this.containerClient.getBlockBlobClient(key);
1303
+ const downloadResponse = await blockBlobClient.download(0);
1304
+ const downloaded = await this.streamToBuffer(downloadResponse.readableStreamBody);
1305
+ return JSON.parse(downloaded.toString());
1306
+ }
1307
+ catch (error) {
1308
+ if (error.statusCode === 404 || error.code === 'BlobNotFound') {
1309
+ return null;
1310
+ }
1311
+ this.logger.error(`Failed to get HNSW data for ${nounId}:`, error);
1312
+ throw new Error(`Failed to get HNSW data for ${nounId}: ${error}`);
1313
+ }
1314
+ }
1315
+ /**
1316
+ * Save HNSW system data (entry point, max level)
1317
+ */
1318
+ async saveHNSWSystem(systemData) {
1319
+ await this.ensureInitialized();
1320
+ try {
1321
+ const key = `${this.systemPrefix}hnsw-system.json`;
1322
+ const blockBlobClient = this.containerClient.getBlockBlobClient(key);
1323
+ const content = JSON.stringify(systemData, null, 2);
1324
+ await blockBlobClient.upload(content, content.length, {
1325
+ blobHTTPHeaders: { blobContentType: 'application/json' }
1326
+ });
1327
+ }
1328
+ catch (error) {
1329
+ this.logger.error('Failed to save HNSW system data:', error);
1330
+ throw new Error(`Failed to save HNSW system data: ${error}`);
1331
+ }
1332
+ }
1333
+ /**
1334
+ * Get HNSW system data (entry point, max level)
1335
+ */
1336
+ async getHNSWSystem() {
1337
+ await this.ensureInitialized();
1338
+ try {
1339
+ const key = `${this.systemPrefix}hnsw-system.json`;
1340
+ const blockBlobClient = this.containerClient.getBlockBlobClient(key);
1341
+ const downloadResponse = await blockBlobClient.download(0);
1342
+ const downloaded = await this.streamToBuffer(downloadResponse.readableStreamBody);
1343
+ return JSON.parse(downloaded.toString());
1344
+ }
1345
+ catch (error) {
1346
+ if (error.statusCode === 404 || error.code === 'BlobNotFound') {
1347
+ return null;
1348
+ }
1349
+ this.logger.error('Failed to get HNSW system data:', error);
1350
+ throw new Error(`Failed to get HNSW system data: ${error}`);
1351
+ }
1352
+ }
1353
+ /**
1354
+ * Set the access tier for a specific blob (v4.0.0 cost optimization)
1355
+ * Azure Blob Storage tiers:
1356
+ * - Hot: $0.0184/GB/month - Frequently accessed data
1357
+ * - Cool: $0.01/GB/month - Infrequently accessed data (45% cheaper)
1358
+ * - Archive: $0.00099/GB/month - Rarely accessed data (99% cheaper!)
1359
+ *
1360
+ * @param blobName - Name of the blob to change tier
1361
+ * @param tier - Target access tier ('Hot', 'Cool', or 'Archive')
1362
+ * @returns Promise that resolves when tier is set
1363
+ *
1364
+ * @example
1365
+ * // Move old vectors to Archive tier (99% cost savings)
1366
+ * await storage.setBlobTier('entities/nouns/vectors/ab/old-id.json', 'Archive')
1367
+ */
1368
+ async setBlobTier(blobName, tier) {
1369
+ await this.ensureInitialized();
1370
+ try {
1371
+ this.logger.info(`Setting blob tier for ${blobName} to ${tier}`);
1372
+ const blockBlobClient = this.containerClient.getBlockBlobClient(blobName);
1373
+ await blockBlobClient.setAccessTier(tier);
1374
+ this.logger.info(`Successfully set ${blobName} to ${tier} tier`);
1375
+ }
1376
+ catch (error) {
1377
+ if (error.statusCode === 404 || error.code === 'BlobNotFound') {
1378
+ throw new Error(`Blob not found: ${blobName}`);
1379
+ }
1380
+ this.logger.error(`Failed to set tier for ${blobName}:`, error);
1381
+ throw new Error(`Failed to set blob tier: ${error}`);
1382
+ }
1383
+ }
1384
+ /**
1385
+ * Get the current access tier for a blob
1386
+ *
1387
+ * @param blobName - Name of the blob
1388
+ * @returns Promise that resolves to the current tier or null if not found
1389
+ *
1390
+ * @example
1391
+ * const tier = await storage.getBlobTier('entities/nouns/vectors/ab/id.json')
1392
+ * console.log(`Current tier: ${tier}`) // 'Hot', 'Cool', or 'Archive'
1393
+ */
1394
+ async getBlobTier(blobName) {
1395
+ await this.ensureInitialized();
1396
+ try {
1397
+ const blockBlobClient = this.containerClient.getBlockBlobClient(blobName);
1398
+ const properties = await blockBlobClient.getProperties();
1399
+ return properties.accessTier || null;
1400
+ }
1401
+ catch (error) {
1402
+ if (error.statusCode === 404 || error.code === 'BlobNotFound') {
1403
+ return null;
1404
+ }
1405
+ this.logger.error(`Failed to get tier for ${blobName}:`, error);
1406
+ throw new Error(`Failed to get blob tier: ${error}`);
1407
+ }
1408
+ }
1409
+ /**
1410
+ * Set access tier for multiple blobs in batch (v4.0.0 cost optimization)
1411
+ * Efficiently move large numbers of blobs between tiers for cost optimization
1412
+ *
1413
+ * @param blobs - Array of blob names and their target tiers
1414
+ * @param options - Configuration options
1415
+ * @returns Promise with statistics about tier changes
1416
+ *
1417
+ * @example
1418
+ * // Move old data to Archive tier for 99% cost savings
1419
+ * const oldBlobs = await storage.listObjectsUnderPath('entities/nouns/vectors/')
1420
+ * await storage.setBlobTierBatch(
1421
+ * oldBlobs.map(name => ({ blobName: name, tier: 'Archive' }))
1422
+ * )
1423
+ */
1424
+ async setBlobTierBatch(blobs, options = {}) {
1425
+ await this.ensureInitialized();
1426
+ const { maxRetries = 3, retryDelayMs = 1000, continueOnError = true } = options;
1427
+ if (!blobs || blobs.length === 0) {
1428
+ return {
1429
+ totalRequested: 0,
1430
+ successfulChanges: 0,
1431
+ failedChanges: 0,
1432
+ errors: []
1433
+ };
1434
+ }
1435
+ this.logger.info(`Starting batch tier change for ${blobs.length} blobs`);
1436
+ const stats = {
1437
+ totalRequested: blobs.length,
1438
+ successfulChanges: 0,
1439
+ failedChanges: 0,
1440
+ errors: []
1441
+ };
1442
+ // Process each blob (Azure doesn't have batch tier API, so we parallelize)
1443
+ const CONCURRENT_LIMIT = 10; // Limit concurrent operations to avoid throttling
1444
+ for (let i = 0; i < blobs.length; i += CONCURRENT_LIMIT) {
1445
+ const batch = blobs.slice(i, i + CONCURRENT_LIMIT);
1446
+ const promises = batch.map(async ({ blobName, tier }) => {
1447
+ let retryCount = 0;
1448
+ while (retryCount <= maxRetries) {
1449
+ try {
1450
+ await this.setBlobTier(blobName, tier);
1451
+ return { blobName, success: true, error: null };
1452
+ }
1453
+ catch (error) {
1454
+ // Handle throttling
1455
+ if (this.isThrottlingError(error)) {
1456
+ this.logger.warn(`Tier change throttled for ${blobName}, retrying...`);
1457
+ await this.handleThrottling(error);
1458
+ retryCount++;
1459
+ if (retryCount <= maxRetries) {
1460
+ const delay = retryDelayMs * Math.pow(2, retryCount - 1);
1461
+ await new Promise((resolve) => setTimeout(resolve, delay));
1462
+ }
1463
+ continue;
1464
+ }
1465
+ // Other errors
1466
+ if (retryCount < maxRetries) {
1467
+ retryCount++;
1468
+ const delay = retryDelayMs * Math.pow(2, retryCount - 1);
1469
+ await new Promise((resolve) => setTimeout(resolve, delay));
1470
+ continue;
1471
+ }
1472
+ // Max retries exceeded
1473
+ return {
1474
+ blobName,
1475
+ success: false,
1476
+ error: error.message || String(error)
1477
+ };
1478
+ }
1479
+ }
1480
+ // Should never reach here, but TypeScript needs a return
1481
+ return {
1482
+ blobName,
1483
+ success: false,
1484
+ error: 'Max retries exceeded'
1485
+ };
1486
+ });
1487
+ const results = await Promise.all(promises);
1488
+ for (const result of results) {
1489
+ if (result.success) {
1490
+ stats.successfulChanges++;
1491
+ }
1492
+ else {
1493
+ stats.failedChanges++;
1494
+ if (result.error) {
1495
+ stats.errors.push({
1496
+ blobName: result.blobName,
1497
+ error: result.error
1498
+ });
1499
+ }
1500
+ }
1501
+ }
1502
+ }
1503
+ this.logger.info(`Batch tier change completed: ${stats.successfulChanges}/${stats.totalRequested} successful, ${stats.failedChanges} failed`);
1504
+ return stats;
1505
+ }
1506
+ /**
1507
+ * Check if a blob in Archive tier has been rehydrated and is ready to read
1508
+ * Archive tier blobs must be rehydrated before they can be read
1509
+ *
1510
+ * @param blobName - Name of the blob to check
1511
+ * @returns Promise that resolves to rehydration status
1512
+ *
1513
+ * @example
1514
+ * const status = await storage.checkRehydrationStatus('entities/nouns/vectors/ab/id.json')
1515
+ * if (status.isRehydrated) {
1516
+ * // Blob is ready to read
1517
+ * const data = await storage.readObjectFromPath('entities/nouns/vectors/ab/id.json')
1518
+ * }
1519
+ */
1520
+ async checkRehydrationStatus(blobName) {
1521
+ await this.ensureInitialized();
1522
+ try {
1523
+ const blockBlobClient = this.containerClient.getBlockBlobClient(blobName);
1524
+ const properties = await blockBlobClient.getProperties();
1525
+ const tier = properties.accessTier;
1526
+ const archiveStatus = properties.archiveStatus;
1527
+ return {
1528
+ isArchived: tier === 'Archive',
1529
+ isRehydrating: archiveStatus === 'rehydrate-pending-to-hot' || archiveStatus === 'rehydrate-pending-to-cool',
1530
+ isRehydrated: tier === 'Hot' || tier === 'Cool',
1531
+ rehydratePriority: properties.rehydratePriority
1532
+ };
1533
+ }
1534
+ catch (error) {
1535
+ if (error.statusCode === 404 || error.code === 'BlobNotFound') {
1536
+ throw new Error(`Blob not found: ${blobName}`);
1537
+ }
1538
+ this.logger.error(`Failed to check rehydration status for ${blobName}:`, error);
1539
+ throw new Error(`Failed to check rehydration status: ${error}`);
1540
+ }
1541
+ }
1542
+ /**
1543
+ * Rehydrate an archived blob (move from Archive to Hot or Cool tier)
1544
+ * Note: Rehydration can take several hours depending on priority
1545
+ *
1546
+ * @param blobName - Name of the blob to rehydrate
1547
+ * @param targetTier - Target tier after rehydration ('Hot' or 'Cool')
1548
+ * @param priority - Rehydration priority ('Standard' or 'High')
1549
+ * Standard: Up to 15 hours, cheaper
1550
+ * High: Up to 1 hour, more expensive
1551
+ * @returns Promise that resolves when rehydration is initiated
1552
+ *
1553
+ * @example
1554
+ * // Rehydrate with standard priority (cheaper, slower)
1555
+ * await storage.rehydrateBlob('entities/nouns/vectors/ab/id.json', 'Cool', 'Standard')
1556
+ *
1557
+ * // Check status
1558
+ * const status = await storage.checkRehydrationStatus('entities/nouns/vectors/ab/id.json')
1559
+ * console.log(`Rehydrating: ${status.isRehydrating}`)
1560
+ */
1561
+ async rehydrateBlob(blobName, targetTier, priority = 'Standard') {
1562
+ await this.ensureInitialized();
1563
+ try {
1564
+ this.logger.info(`Rehydrating blob ${blobName} to ${targetTier} tier with ${priority} priority`);
1565
+ const blockBlobClient = this.containerClient.getBlockBlobClient(blobName);
1566
+ // Set tier with rehydration priority
1567
+ await blockBlobClient.setAccessTier(targetTier, {
1568
+ rehydratePriority: priority
1569
+ });
1570
+ this.logger.info(`Successfully initiated rehydration for ${blobName}`);
1571
+ }
1572
+ catch (error) {
1573
+ if (error.statusCode === 404 || error.code === 'BlobNotFound') {
1574
+ throw new Error(`Blob not found: ${blobName}`);
1575
+ }
1576
+ this.logger.error(`Failed to rehydrate blob ${blobName}:`, error);
1577
+ throw new Error(`Failed to rehydrate blob: ${error}`);
1578
+ }
1579
+ }
1580
+ /**
1581
+ * Set lifecycle management policy for automatic tier transitions and deletions (v4.0.0)
1582
+ * Automates cost optimization by moving old data to cheaper tiers or deleting it
1583
+ *
1584
+ * Azure Lifecycle Management rules run once per day and apply to the entire container.
1585
+ * Rules are evaluated against blob properties like lastModifiedTime and lastAccessTime.
1586
+ *
1587
+ * @param options - Lifecycle policy configuration
1588
+ * @returns Promise that resolves when policy is set
1589
+ *
1590
+ * @example
1591
+ * // Auto-archive old vectors for 99% cost savings
1592
+ * await storage.setLifecyclePolicy({
1593
+ * rules: [
1594
+ * {
1595
+ * name: 'archiveOldVectors',
1596
+ * enabled: true,
1597
+ * type: 'Lifecycle',
1598
+ * definition: {
1599
+ * filters: {
1600
+ * blobTypes: ['blockBlob'],
1601
+ * prefixMatch: ['entities/nouns/vectors/']
1602
+ * },
1603
+ * actions: {
1604
+ * baseBlob: {
1605
+ * tierToCool: { daysAfterModificationGreaterThan: 30 },
1606
+ * tierToArchive: { daysAfterModificationGreaterThan: 90 },
1607
+ * delete: { daysAfterModificationGreaterThan: 365 }
1608
+ * }
1609
+ * }
1610
+ * }
1611
+ * }
1612
+ * ]
1613
+ * })
1614
+ */
1615
+ async setLifecyclePolicy(options) {
1616
+ await this.ensureInitialized();
1617
+ if (!this.accountName) {
1618
+ throw new Error('Lifecycle policies require accountName to be configured');
1619
+ }
1620
+ try {
1621
+ this.logger.info(`Setting lifecycle policy with ${options.rules.length} rules`);
1622
+ const { BlobServiceClient } = await import('@azure/storage-blob');
1623
+ // Get blob service client
1624
+ let blobServiceClient;
1625
+ if (this.connectionString) {
1626
+ blobServiceClient = BlobServiceClient.fromConnectionString(this.connectionString);
1627
+ }
1628
+ else if (this.accountName && this.accountKey) {
1629
+ const { StorageSharedKeyCredential } = await import('@azure/storage-blob');
1630
+ const credential = new StorageSharedKeyCredential(this.accountName, this.accountKey);
1631
+ blobServiceClient = new BlobServiceClient(`https://${this.accountName}.blob.core.windows.net`, credential);
1632
+ }
1633
+ else if (this.accountName && this.sasToken) {
1634
+ blobServiceClient = new BlobServiceClient(`https://${this.accountName}.blob.core.windows.net${this.sasToken}`);
1635
+ }
1636
+ else if (this.accountName) {
1637
+ const { DefaultAzureCredential } = await import('@azure/identity');
1638
+ const credential = new DefaultAzureCredential();
1639
+ blobServiceClient = new BlobServiceClient(`https://${this.accountName}.blob.core.windows.net`, credential);
1640
+ }
1641
+ else {
1642
+ throw new Error('Cannot set lifecycle policy without valid authentication');
1643
+ }
1644
+ // Get service properties to modify lifecycle policy
1645
+ const serviceProperties = await blobServiceClient.getProperties();
1646
+ // Format rules according to Azure's expected structure
1647
+ const lifecyclePolicy = {
1648
+ rules: options.rules.map(rule => ({
1649
+ enabled: rule.enabled,
1650
+ name: rule.name,
1651
+ type: rule.type,
1652
+ definition: {
1653
+ filters: {
1654
+ blobTypes: rule.definition.filters.blobTypes,
1655
+ ...(rule.definition.filters.prefixMatch && {
1656
+ prefixMatch: rule.definition.filters.prefixMatch
1657
+ })
1658
+ },
1659
+ actions: {
1660
+ baseBlob: {
1661
+ ...(rule.definition.actions.baseBlob.tierToCool && {
1662
+ tierToCool: rule.definition.actions.baseBlob.tierToCool
1663
+ }),
1664
+ ...(rule.definition.actions.baseBlob.tierToArchive && {
1665
+ tierToArchive: rule.definition.actions.baseBlob.tierToArchive
1666
+ }),
1667
+ ...(rule.definition.actions.baseBlob.delete && {
1668
+ delete: rule.definition.actions.baseBlob.delete
1669
+ })
1670
+ }
1671
+ }
1672
+ }
1673
+ }))
1674
+ };
1675
+ // Set the lifecycle management policy
1676
+ await blobServiceClient.setProperties({
1677
+ ...serviceProperties,
1678
+ blobAnalyticsLogging: serviceProperties.blobAnalyticsLogging,
1679
+ hourMetrics: serviceProperties.hourMetrics,
1680
+ minuteMetrics: serviceProperties.minuteMetrics,
1681
+ cors: serviceProperties.cors,
1682
+ deleteRetentionPolicy: serviceProperties.deleteRetentionPolicy,
1683
+ staticWebsite: serviceProperties.staticWebsite,
1684
+ // Set lifecycle policy
1685
+ lifecyclePolicy
1686
+ });
1687
+ this.logger.info(`Successfully set lifecycle policy with ${options.rules.length} rules`);
1688
+ }
1689
+ catch (error) {
1690
+ this.logger.error('Failed to set lifecycle policy:', error);
1691
+ throw new Error(`Failed to set lifecycle policy: ${error.message || error}`);
1692
+ }
1693
+ }
1694
+ /**
1695
+ * Get the current lifecycle management policy
1696
+ *
1697
+ * @returns Promise that resolves to the current policy or null if not set
1698
+ *
1699
+ * @example
1700
+ * const policy = await storage.getLifecyclePolicy()
1701
+ * if (policy) {
1702
+ * console.log(`Found ${policy.rules.length} lifecycle rules`)
1703
+ * }
1704
+ */
1705
+ async getLifecyclePolicy() {
1706
+ await this.ensureInitialized();
1707
+ if (!this.accountName) {
1708
+ throw new Error('Lifecycle policies require accountName to be configured');
1709
+ }
1710
+ try {
1711
+ this.logger.info('Getting lifecycle policy');
1712
+ const { BlobServiceClient } = await import('@azure/storage-blob');
1713
+ // Get blob service client
1714
+ let blobServiceClient;
1715
+ if (this.connectionString) {
1716
+ blobServiceClient = BlobServiceClient.fromConnectionString(this.connectionString);
1717
+ }
1718
+ else if (this.accountName && this.accountKey) {
1719
+ const { StorageSharedKeyCredential } = await import('@azure/storage-blob');
1720
+ const credential = new StorageSharedKeyCredential(this.accountName, this.accountKey);
1721
+ blobServiceClient = new BlobServiceClient(`https://${this.accountName}.blob.core.windows.net`, credential);
1722
+ }
1723
+ else if (this.accountName && this.sasToken) {
1724
+ blobServiceClient = new BlobServiceClient(`https://${this.accountName}.blob.core.windows.net${this.sasToken}`);
1725
+ }
1726
+ else if (this.accountName) {
1727
+ const { DefaultAzureCredential } = await import('@azure/identity');
1728
+ const credential = new DefaultAzureCredential();
1729
+ blobServiceClient = new BlobServiceClient(`https://${this.accountName}.blob.core.windows.net`, credential);
1730
+ }
1731
+ else {
1732
+ throw new Error('Cannot get lifecycle policy without valid authentication');
1733
+ }
1734
+ // Get service properties
1735
+ const serviceProperties = await blobServiceClient.getProperties();
1736
+ if (!serviceProperties.lifecyclePolicy || !serviceProperties.lifecyclePolicy.rules) {
1737
+ this.logger.info('No lifecycle policy configured');
1738
+ return null;
1739
+ }
1740
+ this.logger.info(`Found lifecycle policy with ${serviceProperties.lifecyclePolicy.rules.length} rules`);
1741
+ return serviceProperties.lifecyclePolicy;
1742
+ }
1743
+ catch (error) {
1744
+ this.logger.error('Failed to get lifecycle policy:', error);
1745
+ throw new Error(`Failed to get lifecycle policy: ${error.message || error}`);
1746
+ }
1747
+ }
1748
+ /**
1749
+ * Remove the lifecycle management policy
1750
+ * All automatic tier transitions and deletions will stop
1751
+ *
1752
+ * @returns Promise that resolves when policy is removed
1753
+ *
1754
+ * @example
1755
+ * await storage.removeLifecyclePolicy()
1756
+ * console.log('Lifecycle policy removed - auto-archival disabled')
1757
+ */
1758
+ async removeLifecyclePolicy() {
1759
+ await this.ensureInitialized();
1760
+ if (!this.accountName) {
1761
+ throw new Error('Lifecycle policies require accountName to be configured');
1762
+ }
1763
+ try {
1764
+ this.logger.info('Removing lifecycle policy');
1765
+ const { BlobServiceClient } = await import('@azure/storage-blob');
1766
+ // Get blob service client
1767
+ let blobServiceClient;
1768
+ if (this.connectionString) {
1769
+ blobServiceClient = BlobServiceClient.fromConnectionString(this.connectionString);
1770
+ }
1771
+ else if (this.accountName && this.accountKey) {
1772
+ const { StorageSharedKeyCredential } = await import('@azure/storage-blob');
1773
+ const credential = new StorageSharedKeyCredential(this.accountName, this.accountKey);
1774
+ blobServiceClient = new BlobServiceClient(`https://${this.accountName}.blob.core.windows.net`, credential);
1775
+ }
1776
+ else if (this.accountName && this.sasToken) {
1777
+ blobServiceClient = new BlobServiceClient(`https://${this.accountName}.blob.core.windows.net${this.sasToken}`);
1778
+ }
1779
+ else if (this.accountName) {
1780
+ const { DefaultAzureCredential } = await import('@azure/identity');
1781
+ const credential = new DefaultAzureCredential();
1782
+ blobServiceClient = new BlobServiceClient(`https://${this.accountName}.blob.core.windows.net`, credential);
1783
+ }
1784
+ else {
1785
+ throw new Error('Cannot remove lifecycle policy without valid authentication');
1786
+ }
1787
+ // Get service properties
1788
+ const serviceProperties = await blobServiceClient.getProperties();
1789
+ // Set properties without lifecycle policy (removes it)
1790
+ await blobServiceClient.setProperties({
1791
+ ...serviceProperties,
1792
+ blobAnalyticsLogging: serviceProperties.blobAnalyticsLogging,
1793
+ hourMetrics: serviceProperties.hourMetrics,
1794
+ minuteMetrics: serviceProperties.minuteMetrics,
1795
+ cors: serviceProperties.cors,
1796
+ deleteRetentionPolicy: serviceProperties.deleteRetentionPolicy,
1797
+ staticWebsite: serviceProperties.staticWebsite,
1798
+ // Remove lifecycle policy by not including it
1799
+ lifecyclePolicy: undefined
1800
+ });
1801
+ this.logger.info('Successfully removed lifecycle policy');
1802
+ }
1803
+ catch (error) {
1804
+ this.logger.error('Failed to remove lifecycle policy:', error);
1805
+ throw new Error(`Failed to remove lifecycle policy: ${error.message || error}`);
1806
+ }
1807
+ }
1808
+ }
1809
+ //# sourceMappingURL=azureBlobStorage.js.map