@soulcraft/brainy 3.50.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +242 -0
- package/README.md +358 -658
- package/dist/api/ConfigAPI.js +56 -19
- package/dist/api/DataAPI.js +24 -18
- package/dist/augmentations/storageAugmentations.d.ts +24 -0
- package/dist/augmentations/storageAugmentations.js +22 -0
- package/dist/brainy.js +32 -9
- package/dist/cli/commands/core.d.ts +20 -10
- package/dist/cli/commands/core.js +384 -82
- package/dist/cli/commands/import.d.ts +41 -0
- package/dist/cli/commands/import.js +456 -0
- package/dist/cli/commands/insights.d.ts +34 -0
- package/dist/cli/commands/insights.js +300 -0
- package/dist/cli/commands/neural.d.ts +6 -12
- package/dist/cli/commands/neural.js +113 -10
- package/dist/cli/commands/nlp.d.ts +28 -0
- package/dist/cli/commands/nlp.js +246 -0
- package/dist/cli/commands/storage.d.ts +64 -0
- package/dist/cli/commands/storage.js +730 -0
- package/dist/cli/index.js +210 -24
- package/dist/coreTypes.d.ts +206 -34
- package/dist/distributed/configManager.js +8 -6
- package/dist/distributed/shardMigration.js +2 -0
- package/dist/distributed/storageDiscovery.js +6 -4
- package/dist/embeddings/EmbeddingManager.d.ts +2 -2
- package/dist/embeddings/EmbeddingManager.js +5 -1
- package/dist/graph/lsm/LSMTree.js +32 -20
- package/dist/hnsw/typeAwareHNSWIndex.js +6 -2
- package/dist/storage/adapters/azureBlobStorage.d.ts +545 -0
- package/dist/storage/adapters/azureBlobStorage.js +1809 -0
- package/dist/storage/adapters/baseStorageAdapter.d.ts +16 -13
- package/dist/storage/adapters/fileSystemStorage.d.ts +21 -9
- package/dist/storage/adapters/fileSystemStorage.js +204 -127
- package/dist/storage/adapters/gcsStorage.d.ts +119 -9
- package/dist/storage/adapters/gcsStorage.js +317 -62
- package/dist/storage/adapters/memoryStorage.d.ts +30 -18
- package/dist/storage/adapters/memoryStorage.js +99 -94
- package/dist/storage/adapters/opfsStorage.d.ts +48 -10
- package/dist/storage/adapters/opfsStorage.js +201 -80
- package/dist/storage/adapters/r2Storage.d.ts +12 -5
- package/dist/storage/adapters/r2Storage.js +63 -15
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +164 -17
- package/dist/storage/adapters/s3CompatibleStorage.js +472 -80
- package/dist/storage/adapters/typeAwareStorageAdapter.d.ts +38 -6
- package/dist/storage/adapters/typeAwareStorageAdapter.js +218 -39
- package/dist/storage/baseStorage.d.ts +41 -38
- package/dist/storage/baseStorage.js +110 -134
- package/dist/storage/storageFactory.d.ts +29 -2
- package/dist/storage/storageFactory.js +30 -1
- package/dist/utils/entityIdMapper.js +5 -2
- package/dist/utils/fieldTypeInference.js +8 -1
- package/dist/utils/metadataFilter.d.ts +3 -2
- package/dist/utils/metadataFilter.js +1 -0
- package/dist/utils/metadataIndex.d.ts +2 -1
- package/dist/utils/metadataIndex.js +9 -1
- package/dist/utils/metadataIndexChunking.js +9 -4
- package/dist/utils/periodicCleanup.js +1 -0
- package/package.json +3 -1
|
@@ -0,0 +1,1809 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Azure Blob Storage Adapter (Native)
|
|
3
|
+
* Uses the native @azure/storage-blob library for optimal performance and authentication
|
|
4
|
+
*
|
|
5
|
+
* Supports multiple authentication methods:
|
|
6
|
+
* 1. DefaultAzureCredential (Managed Identity) - Automatic in Azure environments
|
|
7
|
+
* 2. Connection String
|
|
8
|
+
* 3. Storage Account Key
|
|
9
|
+
* 4. SAS Token
|
|
10
|
+
* 5. Azure AD (OAuth2) via DefaultAzureCredential
|
|
11
|
+
*
|
|
12
|
+
* v4.0.0: Fully compatible with metadata/vector separation architecture
|
|
13
|
+
*/
|
|
14
|
+
import { BaseStorage, SYSTEM_DIR, STATISTICS_KEY, getDirectoryPath } from '../baseStorage.js';
|
|
15
|
+
import { BrainyError } from '../../errors/brainyError.js';
|
|
16
|
+
import { CacheManager } from '../cacheManager.js';
|
|
17
|
+
import { createModuleLogger, prodLog } from '../../utils/logger.js';
|
|
18
|
+
import { getGlobalBackpressure } from '../../utils/adaptiveBackpressure.js';
|
|
19
|
+
import { getWriteBuffer } from '../../utils/writeBuffer.js';
|
|
20
|
+
import { getCoalescer } from '../../utils/requestCoalescer.js';
|
|
21
|
+
import { getShardIdFromUuid } from '../sharding.js';
|
|
22
|
+
// Azure Blob Storage API limits
|
|
23
|
+
const MAX_AZURE_PAGE_SIZE = 5000;
|
|
24
|
+
/**
|
|
25
|
+
* Native Azure Blob Storage adapter for server environments
|
|
26
|
+
* Uses the @azure/storage-blob library with DefaultAzureCredential
|
|
27
|
+
*
|
|
28
|
+
* Authentication priority:
|
|
29
|
+
* 1. DefaultAzureCredential (Managed Identity) - if no credentials provided
|
|
30
|
+
* 2. Connection String - if connectionString provided
|
|
31
|
+
* 3. Storage Account Key - if accountName + accountKey provided
|
|
32
|
+
* 4. SAS Token - if accountName + sasToken provided
|
|
33
|
+
*/
|
|
34
|
+
export class AzureBlobStorage extends BaseStorage {
|
|
35
|
+
/**
|
|
36
|
+
* Initialize the storage adapter
|
|
37
|
+
* @param options Configuration options for Azure Blob Storage
|
|
38
|
+
*/
|
|
39
|
+
constructor(options) {
|
|
40
|
+
super();
|
|
41
|
+
this.blobServiceClient = null;
|
|
42
|
+
this.containerClient = null;
|
|
43
|
+
// Statistics caching for better performance
|
|
44
|
+
this.statisticsCache = null;
|
|
45
|
+
// Backpressure and performance management
|
|
46
|
+
this.pendingOperations = 0;
|
|
47
|
+
this.consecutiveErrors = 0;
|
|
48
|
+
this.lastErrorReset = Date.now();
|
|
49
|
+
// Adaptive backpressure for automatic flow control
|
|
50
|
+
this.backpressure = getGlobalBackpressure();
|
|
51
|
+
// Write buffers for bulk operations
|
|
52
|
+
this.nounWriteBuffer = null;
|
|
53
|
+
this.verbWriteBuffer = null;
|
|
54
|
+
// Request coalescer for deduplication
|
|
55
|
+
this.requestCoalescer = null;
|
|
56
|
+
// High-volume mode detection
|
|
57
|
+
this.highVolumeMode = false;
|
|
58
|
+
this.lastVolumeCheck = 0;
|
|
59
|
+
this.volumeCheckInterval = 1000; // Check every second
|
|
60
|
+
this.forceHighVolumeMode = false; // Environment variable override
|
|
61
|
+
// Module logger
|
|
62
|
+
this.logger = createModuleLogger('AzureBlobStorage');
|
|
63
|
+
this.containerName = options.containerName;
|
|
64
|
+
this.connectionString = options.connectionString;
|
|
65
|
+
this.accountName = options.accountName;
|
|
66
|
+
this.accountKey = options.accountKey;
|
|
67
|
+
this.sasToken = options.sasToken;
|
|
68
|
+
this.readOnly = options.readOnly || false;
|
|
69
|
+
// Set up prefixes for different types of data using entity-based structure
|
|
70
|
+
this.nounPrefix = `${getDirectoryPath('noun', 'vector')}/`;
|
|
71
|
+
this.verbPrefix = `${getDirectoryPath('verb', 'vector')}/`;
|
|
72
|
+
this.metadataPrefix = `${getDirectoryPath('noun', 'metadata')}/`; // Noun metadata
|
|
73
|
+
this.verbMetadataPrefix = `${getDirectoryPath('verb', 'metadata')}/`; // Verb metadata
|
|
74
|
+
this.systemPrefix = `${SYSTEM_DIR}/`; // System data
|
|
75
|
+
// Initialize cache managers
|
|
76
|
+
this.nounCacheManager = new CacheManager(options.cacheConfig);
|
|
77
|
+
this.verbCacheManager = new CacheManager(options.cacheConfig);
|
|
78
|
+
// Check for high-volume mode override
|
|
79
|
+
if (typeof process !== 'undefined' && process.env?.BRAINY_FORCE_HIGH_VOLUME === 'true') {
|
|
80
|
+
this.forceHighVolumeMode = true;
|
|
81
|
+
this.highVolumeMode = true;
|
|
82
|
+
prodLog.info('🚀 High-volume mode FORCED via BRAINY_FORCE_HIGH_VOLUME environment variable');
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Initialize the storage adapter
|
|
87
|
+
*/
|
|
88
|
+
async init() {
|
|
89
|
+
if (this.isInitialized) {
|
|
90
|
+
return;
|
|
91
|
+
}
|
|
92
|
+
try {
|
|
93
|
+
// Import Azure Storage SDK only when needed
|
|
94
|
+
const { BlobServiceClient } = await import('@azure/storage-blob');
|
|
95
|
+
// Configure the Azure Blob Storage client based on available credentials
|
|
96
|
+
// Priority 1: Connection String
|
|
97
|
+
if (this.connectionString) {
|
|
98
|
+
this.blobServiceClient = BlobServiceClient.fromConnectionString(this.connectionString);
|
|
99
|
+
prodLog.info('🔐 Azure: Using Connection String');
|
|
100
|
+
}
|
|
101
|
+
// Priority 2: Account Name + Key
|
|
102
|
+
else if (this.accountName && this.accountKey) {
|
|
103
|
+
const { StorageSharedKeyCredential } = await import('@azure/storage-blob');
|
|
104
|
+
const sharedKeyCredential = new StorageSharedKeyCredential(this.accountName, this.accountKey);
|
|
105
|
+
this.blobServiceClient = new BlobServiceClient(`https://${this.accountName}.blob.core.windows.net`, sharedKeyCredential);
|
|
106
|
+
prodLog.info('🔐 Azure: Using Account Key');
|
|
107
|
+
}
|
|
108
|
+
// Priority 3: SAS Token
|
|
109
|
+
else if (this.accountName && this.sasToken) {
|
|
110
|
+
this.blobServiceClient = new BlobServiceClient(`https://${this.accountName}.blob.core.windows.net${this.sasToken}`);
|
|
111
|
+
prodLog.info('🔐 Azure: Using SAS Token');
|
|
112
|
+
}
|
|
113
|
+
// Priority 4: DefaultAzureCredential (Managed Identity)
|
|
114
|
+
else if (this.accountName) {
|
|
115
|
+
const { DefaultAzureCredential } = await import('@azure/identity');
|
|
116
|
+
const credential = new DefaultAzureCredential();
|
|
117
|
+
this.blobServiceClient = new BlobServiceClient(`https://${this.accountName}.blob.core.windows.net`, credential);
|
|
118
|
+
prodLog.info('🔐 Azure: Using DefaultAzureCredential (Managed Identity)');
|
|
119
|
+
}
|
|
120
|
+
else {
|
|
121
|
+
throw new Error('Azure Blob Storage requires either connectionString, accountName+accountKey, accountName+sasToken, or accountName (for Managed Identity)');
|
|
122
|
+
}
|
|
123
|
+
// Get reference to the container
|
|
124
|
+
this.containerClient = this.blobServiceClient.getContainerClient(this.containerName);
|
|
125
|
+
// Create container if it doesn't exist
|
|
126
|
+
const exists = await this.containerClient.exists();
|
|
127
|
+
if (!exists) {
|
|
128
|
+
await this.containerClient.create();
|
|
129
|
+
prodLog.info(`✅ Created Azure container: ${this.containerName}`);
|
|
130
|
+
}
|
|
131
|
+
else {
|
|
132
|
+
prodLog.info(`✅ Connected to Azure container: ${this.containerName}`);
|
|
133
|
+
}
|
|
134
|
+
// Initialize write buffers for high-volume mode
|
|
135
|
+
const storageId = `azure-${this.containerName}`;
|
|
136
|
+
this.nounWriteBuffer = getWriteBuffer(`${storageId}-nouns`, 'noun', async (items) => {
|
|
137
|
+
await this.flushNounBuffer(items);
|
|
138
|
+
});
|
|
139
|
+
this.verbWriteBuffer = getWriteBuffer(`${storageId}-verbs`, 'verb', async (items) => {
|
|
140
|
+
await this.flushVerbBuffer(items);
|
|
141
|
+
});
|
|
142
|
+
// Initialize request coalescer for deduplication
|
|
143
|
+
this.requestCoalescer = getCoalescer(storageId, async (batch) => {
|
|
144
|
+
// Process coalesced operations (placeholder for future optimization)
|
|
145
|
+
this.logger.trace(`Processing coalesced batch: ${batch.length} items`);
|
|
146
|
+
});
|
|
147
|
+
// Initialize counts from storage
|
|
148
|
+
await this.initializeCounts();
|
|
149
|
+
// Clear any stale cache entries from previous runs
|
|
150
|
+
prodLog.info('🧹 Clearing cache from previous run to prevent cache poisoning');
|
|
151
|
+
this.nounCacheManager.clear();
|
|
152
|
+
this.verbCacheManager.clear();
|
|
153
|
+
prodLog.info('✅ Cache cleared - starting fresh');
|
|
154
|
+
this.isInitialized = true;
|
|
155
|
+
}
|
|
156
|
+
catch (error) {
|
|
157
|
+
this.logger.error('Failed to initialize Azure Blob Storage:', error);
|
|
158
|
+
throw new Error(`Failed to initialize Azure Blob Storage: ${error}`);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Get the Azure blob name for a noun using UUID-based sharding
|
|
163
|
+
*
|
|
164
|
+
* Uses first 2 hex characters of UUID for consistent sharding.
|
|
165
|
+
* Path format: entities/nouns/vectors/{shardId}/{uuid}.json
|
|
166
|
+
*
|
|
167
|
+
* @example
|
|
168
|
+
* getNounKey('ab123456-1234-5678-9abc-def012345678')
|
|
169
|
+
* // returns 'entities/nouns/vectors/ab/ab123456-1234-5678-9abc-def012345678.json'
|
|
170
|
+
*/
|
|
171
|
+
getNounKey(id) {
|
|
172
|
+
const shardId = getShardIdFromUuid(id);
|
|
173
|
+
return `${this.nounPrefix}${shardId}/${id}.json`;
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* Get the Azure blob name for a verb using UUID-based sharding
|
|
177
|
+
*
|
|
178
|
+
* Uses first 2 hex characters of UUID for consistent sharding.
|
|
179
|
+
* Path format: entities/verbs/vectors/{shardId}/{uuid}.json
|
|
180
|
+
*
|
|
181
|
+
* @example
|
|
182
|
+
* getVerbKey('cd987654-4321-8765-cba9-fed543210987')
|
|
183
|
+
* // returns 'entities/verbs/vectors/cd/cd987654-4321-8765-cba9-fed543210987.json'
|
|
184
|
+
*/
|
|
185
|
+
getVerbKey(id) {
|
|
186
|
+
const shardId = getShardIdFromUuid(id);
|
|
187
|
+
return `${this.verbPrefix}${shardId}/${id}.json`;
|
|
188
|
+
}
|
|
189
|
+
/**
|
|
190
|
+
* Override base class method to detect Azure-specific throttling errors
|
|
191
|
+
*/
|
|
192
|
+
isThrottlingError(error) {
|
|
193
|
+
// First check base class detection
|
|
194
|
+
if (super.isThrottlingError(error)) {
|
|
195
|
+
return true;
|
|
196
|
+
}
|
|
197
|
+
// Azure-specific throttling detection
|
|
198
|
+
const statusCode = error.statusCode || error.code;
|
|
199
|
+
const message = error.message?.toLowerCase() || '';
|
|
200
|
+
return (statusCode === 429 || // Too Many Requests
|
|
201
|
+
statusCode === 503 || // Service Unavailable
|
|
202
|
+
statusCode === 'ServerBusy' ||
|
|
203
|
+
statusCode === 'IngressOverLimit' ||
|
|
204
|
+
statusCode === 'EgressOverLimit' ||
|
|
205
|
+
message.includes('throttl') ||
|
|
206
|
+
message.includes('rate limit') ||
|
|
207
|
+
message.includes('too many requests'));
|
|
208
|
+
}
|
|
209
|
+
/**
|
|
210
|
+
* Override base class to enable smart batching for cloud storage
|
|
211
|
+
*
|
|
212
|
+
* Azure Blob Storage is cloud storage with network latency (~50ms per write).
|
|
213
|
+
* Smart batching reduces writes from 1000 ops → 100 batches.
|
|
214
|
+
*
|
|
215
|
+
* @returns true (Azure is cloud storage)
|
|
216
|
+
*/
|
|
217
|
+
isCloudStorage() {
|
|
218
|
+
return true; // Azure benefits from batching
|
|
219
|
+
}
|
|
220
|
+
/**
|
|
221
|
+
* Apply backpressure before starting an operation
|
|
222
|
+
* @returns Request ID for tracking
|
|
223
|
+
*/
|
|
224
|
+
async applyBackpressure() {
|
|
225
|
+
const requestId = `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
|
|
226
|
+
await this.backpressure.requestPermission(requestId, 1);
|
|
227
|
+
this.pendingOperations++;
|
|
228
|
+
return requestId;
|
|
229
|
+
}
|
|
230
|
+
/**
|
|
231
|
+
* Release backpressure after completing an operation
|
|
232
|
+
* @param success Whether the operation succeeded
|
|
233
|
+
* @param requestId Request ID from applyBackpressure()
|
|
234
|
+
*/
|
|
235
|
+
releaseBackpressure(success = true, requestId) {
|
|
236
|
+
this.pendingOperations = Math.max(0, this.pendingOperations - 1);
|
|
237
|
+
if (requestId) {
|
|
238
|
+
this.backpressure.releasePermission(requestId, success);
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
/**
|
|
242
|
+
* Check if high-volume mode should be enabled
|
|
243
|
+
*/
|
|
244
|
+
checkVolumeMode() {
|
|
245
|
+
if (this.forceHighVolumeMode) {
|
|
246
|
+
return; // Already forced on
|
|
247
|
+
}
|
|
248
|
+
const now = Date.now();
|
|
249
|
+
if (now - this.lastVolumeCheck < this.volumeCheckInterval) {
|
|
250
|
+
return;
|
|
251
|
+
}
|
|
252
|
+
this.lastVolumeCheck = now;
|
|
253
|
+
// Enable high-volume mode if we have many pending operations
|
|
254
|
+
const shouldEnable = this.pendingOperations > 20;
|
|
255
|
+
if (shouldEnable && !this.highVolumeMode) {
|
|
256
|
+
this.highVolumeMode = true;
|
|
257
|
+
prodLog.info('🚀 High-volume mode ENABLED (pending operations:', this.pendingOperations, ')');
|
|
258
|
+
}
|
|
259
|
+
else if (!shouldEnable && this.highVolumeMode && !this.forceHighVolumeMode) {
|
|
260
|
+
this.highVolumeMode = false;
|
|
261
|
+
prodLog.info('🐌 High-volume mode DISABLED (pending operations:', this.pendingOperations, ')');
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
/**
|
|
265
|
+
* Flush noun buffer to Azure
|
|
266
|
+
*/
|
|
267
|
+
async flushNounBuffer(items) {
|
|
268
|
+
const writes = Array.from(items.values()).map(async (noun) => {
|
|
269
|
+
try {
|
|
270
|
+
await this.saveNodeDirect(noun);
|
|
271
|
+
}
|
|
272
|
+
catch (error) {
|
|
273
|
+
this.logger.error(`Failed to flush noun ${noun.id}:`, error);
|
|
274
|
+
}
|
|
275
|
+
});
|
|
276
|
+
await Promise.all(writes);
|
|
277
|
+
}
|
|
278
|
+
/**
|
|
279
|
+
* Flush verb buffer to Azure
|
|
280
|
+
*/
|
|
281
|
+
async flushVerbBuffer(items) {
|
|
282
|
+
const writes = Array.from(items.values()).map(async (verb) => {
|
|
283
|
+
try {
|
|
284
|
+
await this.saveEdgeDirect(verb);
|
|
285
|
+
}
|
|
286
|
+
catch (error) {
|
|
287
|
+
this.logger.error(`Failed to flush verb ${verb.id}:`, error);
|
|
288
|
+
}
|
|
289
|
+
});
|
|
290
|
+
await Promise.all(writes);
|
|
291
|
+
}
|
|
292
|
+
/**
|
|
293
|
+
* Save a noun to storage (internal implementation)
|
|
294
|
+
*/
|
|
295
|
+
async saveNoun_internal(noun) {
|
|
296
|
+
return this.saveNode(noun);
|
|
297
|
+
}
|
|
298
|
+
/**
|
|
299
|
+
* Save a node to storage
|
|
300
|
+
*/
|
|
301
|
+
async saveNode(node) {
|
|
302
|
+
await this.ensureInitialized();
|
|
303
|
+
// ALWAYS check if we should use high-volume mode (critical for detection)
|
|
304
|
+
this.checkVolumeMode();
|
|
305
|
+
// Use write buffer in high-volume mode
|
|
306
|
+
if (this.highVolumeMode && this.nounWriteBuffer) {
|
|
307
|
+
this.logger.trace(`📝 BUFFERING: Adding noun ${node.id} to write buffer (high-volume mode active)`);
|
|
308
|
+
await this.nounWriteBuffer.add(node.id, node);
|
|
309
|
+
return;
|
|
310
|
+
}
|
|
311
|
+
else if (!this.highVolumeMode) {
|
|
312
|
+
this.logger.trace(`📝 DIRECT WRITE: Saving noun ${node.id} directly (high-volume mode inactive)`);
|
|
313
|
+
}
|
|
314
|
+
// Direct write in normal mode
|
|
315
|
+
await this.saveNodeDirect(node);
|
|
316
|
+
}
|
|
317
|
+
/**
|
|
318
|
+
* Save a node directly to Azure (bypass buffer)
|
|
319
|
+
*/
|
|
320
|
+
async saveNodeDirect(node) {
|
|
321
|
+
// Apply backpressure before starting operation
|
|
322
|
+
const requestId = await this.applyBackpressure();
|
|
323
|
+
try {
|
|
324
|
+
this.logger.trace(`Saving node ${node.id}`);
|
|
325
|
+
// Convert connections Map to a serializable format
|
|
326
|
+
// CRITICAL: Only save lightweight vector data (no metadata)
|
|
327
|
+
// Metadata is saved separately via saveNounMetadata() (2-file system)
|
|
328
|
+
const serializableNode = {
|
|
329
|
+
id: node.id,
|
|
330
|
+
vector: node.vector,
|
|
331
|
+
connections: Object.fromEntries(Array.from(node.connections.entries()).map(([level, nounIds]) => [
|
|
332
|
+
level,
|
|
333
|
+
Array.from(nounIds)
|
|
334
|
+
])),
|
|
335
|
+
level: node.level || 0
|
|
336
|
+
// NO metadata field - saved separately for scalability
|
|
337
|
+
};
|
|
338
|
+
// Get the Azure blob name with UUID-based sharding
|
|
339
|
+
const blobName = this.getNounKey(node.id);
|
|
340
|
+
// Save to Azure Blob Storage
|
|
341
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(blobName);
|
|
342
|
+
await blockBlobClient.upload(JSON.stringify(serializableNode, null, 2), JSON.stringify(serializableNode).length, {
|
|
343
|
+
blobHTTPHeaders: { blobContentType: 'application/json' }
|
|
344
|
+
});
|
|
345
|
+
// CRITICAL FIX: Only cache nodes with non-empty vectors
|
|
346
|
+
// This prevents cache pollution from HNSW's lazy-loading nodes (vector: [])
|
|
347
|
+
if (node.vector && Array.isArray(node.vector) && node.vector.length > 0) {
|
|
348
|
+
this.nounCacheManager.set(node.id, node);
|
|
349
|
+
}
|
|
350
|
+
// Note: Empty vectors are intentional during HNSW lazy mode - not logged
|
|
351
|
+
// Increment noun count
|
|
352
|
+
const metadata = await this.getNounMetadata(node.id);
|
|
353
|
+
if (metadata && metadata.type) {
|
|
354
|
+
await this.incrementEntityCountSafe(metadata.type);
|
|
355
|
+
}
|
|
356
|
+
this.logger.trace(`Node ${node.id} saved successfully`);
|
|
357
|
+
this.releaseBackpressure(true, requestId);
|
|
358
|
+
}
|
|
359
|
+
catch (error) {
|
|
360
|
+
this.releaseBackpressure(false, requestId);
|
|
361
|
+
// Handle throttling
|
|
362
|
+
if (this.isThrottlingError(error)) {
|
|
363
|
+
await this.handleThrottling(error);
|
|
364
|
+
throw error; // Re-throw for retry at higher level
|
|
365
|
+
}
|
|
366
|
+
this.logger.error(`Failed to save node ${node.id}:`, error);
|
|
367
|
+
throw new Error(`Failed to save node ${node.id}: ${error}`);
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
/**
|
|
371
|
+
* Get a noun from storage (internal implementation)
|
|
372
|
+
* v4.0.0: Returns ONLY vector data (no metadata field)
|
|
373
|
+
* Base class combines with metadata via getNoun() -> HNSWNounWithMetadata
|
|
374
|
+
*/
|
|
375
|
+
async getNoun_internal(id) {
|
|
376
|
+
// v4.0.0: Return ONLY vector data (no metadata field)
|
|
377
|
+
const node = await this.getNode(id);
|
|
378
|
+
if (!node) {
|
|
379
|
+
return null;
|
|
380
|
+
}
|
|
381
|
+
// Return pure vector structure
|
|
382
|
+
return node;
|
|
383
|
+
}
|
|
384
|
+
/**
|
|
385
|
+
* Get a node from storage
|
|
386
|
+
*/
|
|
387
|
+
async getNode(id) {
|
|
388
|
+
await this.ensureInitialized();
|
|
389
|
+
// Check cache first
|
|
390
|
+
const cached = await this.nounCacheManager.get(id);
|
|
391
|
+
// Validate cached object before returning
|
|
392
|
+
if (cached !== undefined && cached !== null) {
|
|
393
|
+
// Validate cached object has required fields (including non-empty vector!)
|
|
394
|
+
if (!cached.id || !cached.vector || !Array.isArray(cached.vector) || cached.vector.length === 0) {
|
|
395
|
+
// Invalid cache detected - log and auto-recover
|
|
396
|
+
prodLog.warn(`[Azure] Invalid cached object for ${id.substring(0, 8)} (${!cached.id ? 'missing id' :
|
|
397
|
+
!cached.vector ? 'missing vector' :
|
|
398
|
+
!Array.isArray(cached.vector) ? 'vector not array' :
|
|
399
|
+
'empty vector'}) - removing from cache and reloading`);
|
|
400
|
+
this.nounCacheManager.delete(id);
|
|
401
|
+
// Fall through to load from Azure
|
|
402
|
+
}
|
|
403
|
+
else {
|
|
404
|
+
// Valid cache hit
|
|
405
|
+
this.logger.trace(`Cache hit for noun ${id}`);
|
|
406
|
+
return cached;
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
else if (cached === null) {
|
|
410
|
+
prodLog.warn(`[Azure] Cache contains null for ${id.substring(0, 8)} - reloading from storage`);
|
|
411
|
+
}
|
|
412
|
+
// Apply backpressure
|
|
413
|
+
const requestId = await this.applyBackpressure();
|
|
414
|
+
try {
|
|
415
|
+
this.logger.trace(`Getting node ${id}`);
|
|
416
|
+
// Get the Azure blob name with UUID-based sharding
|
|
417
|
+
const blobName = this.getNounKey(id);
|
|
418
|
+
// Download from Azure Blob Storage
|
|
419
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(blobName);
|
|
420
|
+
const downloadResponse = await blockBlobClient.download(0);
|
|
421
|
+
const downloaded = await this.streamToBuffer(downloadResponse.readableStreamBody);
|
|
422
|
+
// Parse JSON
|
|
423
|
+
const data = JSON.parse(downloaded.toString());
|
|
424
|
+
// Convert serialized connections back to Map<number, Set<string>>
|
|
425
|
+
const connections = new Map();
|
|
426
|
+
for (const [level, nounIds] of Object.entries(data.connections || {})) {
|
|
427
|
+
connections.set(Number(level), new Set(nounIds));
|
|
428
|
+
}
|
|
429
|
+
// CRITICAL: Only return lightweight vector data (no metadata)
|
|
430
|
+
// Metadata is retrieved separately via getNounMetadata() (2-file system)
|
|
431
|
+
const node = {
|
|
432
|
+
id: data.id,
|
|
433
|
+
vector: data.vector,
|
|
434
|
+
connections,
|
|
435
|
+
level: data.level || 0
|
|
436
|
+
// NO metadata field - retrieved separately for scalability
|
|
437
|
+
};
|
|
438
|
+
// CRITICAL FIX: Only cache valid nodes with non-empty vectors (never cache null or empty)
|
|
439
|
+
if (node && node.id && node.vector && Array.isArray(node.vector) && node.vector.length > 0) {
|
|
440
|
+
this.nounCacheManager.set(id, node);
|
|
441
|
+
}
|
|
442
|
+
else {
|
|
443
|
+
prodLog.warn(`[Azure] Not caching invalid node ${id.substring(0, 8)} (missing id/vector or empty vector)`);
|
|
444
|
+
}
|
|
445
|
+
this.logger.trace(`Successfully retrieved node ${id}`);
|
|
446
|
+
this.releaseBackpressure(true, requestId);
|
|
447
|
+
return node;
|
|
448
|
+
}
|
|
449
|
+
catch (error) {
|
|
450
|
+
this.releaseBackpressure(false, requestId);
|
|
451
|
+
// Check if this is a "not found" error
|
|
452
|
+
if (error.statusCode === 404 || error.code === 'BlobNotFound') {
|
|
453
|
+
this.logger.trace(`Node not found: ${id}`);
|
|
454
|
+
// CRITICAL FIX: Do NOT cache null values
|
|
455
|
+
return null;
|
|
456
|
+
}
|
|
457
|
+
// Handle throttling
|
|
458
|
+
if (this.isThrottlingError(error)) {
|
|
459
|
+
await this.handleThrottling(error);
|
|
460
|
+
throw error;
|
|
461
|
+
}
|
|
462
|
+
// All other errors should throw, not return null
|
|
463
|
+
this.logger.error(`Failed to get node ${id}:`, error);
|
|
464
|
+
throw BrainyError.fromError(error, `getNoun(${id})`);
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
/**
|
|
468
|
+
* Delete a noun from storage (internal implementation)
|
|
469
|
+
*/
|
|
470
|
+
async deleteNoun_internal(id) {
|
|
471
|
+
await this.ensureInitialized();
|
|
472
|
+
const requestId = await this.applyBackpressure();
|
|
473
|
+
try {
|
|
474
|
+
this.logger.trace(`Deleting noun ${id}`);
|
|
475
|
+
// Get the Azure blob name
|
|
476
|
+
const blobName = this.getNounKey(id);
|
|
477
|
+
// Delete from Azure
|
|
478
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(blobName);
|
|
479
|
+
await blockBlobClient.delete();
|
|
480
|
+
// Remove from cache
|
|
481
|
+
this.nounCacheManager.delete(id);
|
|
482
|
+
// Decrement noun count
|
|
483
|
+
const metadata = await this.getNounMetadata(id);
|
|
484
|
+
if (metadata && metadata.type) {
|
|
485
|
+
await this.decrementEntityCountSafe(metadata.type);
|
|
486
|
+
}
|
|
487
|
+
this.logger.trace(`Noun ${id} deleted successfully`);
|
|
488
|
+
this.releaseBackpressure(true, requestId);
|
|
489
|
+
}
|
|
490
|
+
catch (error) {
|
|
491
|
+
this.releaseBackpressure(false, requestId);
|
|
492
|
+
if (error.statusCode === 404 || error.code === 'BlobNotFound') {
|
|
493
|
+
// Already deleted
|
|
494
|
+
this.logger.trace(`Noun ${id} not found (already deleted)`);
|
|
495
|
+
return;
|
|
496
|
+
}
|
|
497
|
+
// Handle throttling
|
|
498
|
+
if (this.isThrottlingError(error)) {
|
|
499
|
+
await this.handleThrottling(error);
|
|
500
|
+
throw error;
|
|
501
|
+
}
|
|
502
|
+
this.logger.error(`Failed to delete noun ${id}:`, error);
|
|
503
|
+
throw new Error(`Failed to delete noun ${id}: ${error}`);
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
/**
|
|
507
|
+
* Write an object to a specific path in Azure
|
|
508
|
+
* Primitive operation required by base class
|
|
509
|
+
* @protected
|
|
510
|
+
*/
|
|
511
|
+
async writeObjectToPath(path, data) {
|
|
512
|
+
await this.ensureInitialized();
|
|
513
|
+
try {
|
|
514
|
+
this.logger.trace(`Writing object to path: ${path}`);
|
|
515
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(path);
|
|
516
|
+
const content = JSON.stringify(data, null, 2);
|
|
517
|
+
await blockBlobClient.upload(content, content.length, {
|
|
518
|
+
blobHTTPHeaders: { blobContentType: 'application/json' }
|
|
519
|
+
});
|
|
520
|
+
this.logger.trace(`Object written successfully to ${path}`);
|
|
521
|
+
}
|
|
522
|
+
catch (error) {
|
|
523
|
+
this.logger.error(`Failed to write object to ${path}:`, error);
|
|
524
|
+
throw new Error(`Failed to write object to ${path}: ${error}`);
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
/**
|
|
528
|
+
* Read an object from a specific path in Azure
|
|
529
|
+
* Primitive operation required by base class
|
|
530
|
+
* @protected
|
|
531
|
+
*/
|
|
532
|
+
async readObjectFromPath(path) {
|
|
533
|
+
await this.ensureInitialized();
|
|
534
|
+
try {
|
|
535
|
+
this.logger.trace(`Reading object from path: ${path}`);
|
|
536
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(path);
|
|
537
|
+
const downloadResponse = await blockBlobClient.download(0);
|
|
538
|
+
const downloaded = await this.streamToBuffer(downloadResponse.readableStreamBody);
|
|
539
|
+
const data = JSON.parse(downloaded.toString());
|
|
540
|
+
this.logger.trace(`Object read successfully from ${path}`);
|
|
541
|
+
return data;
|
|
542
|
+
}
|
|
543
|
+
catch (error) {
|
|
544
|
+
// Check if this is a "not found" error
|
|
545
|
+
if (error.statusCode === 404 || error.code === 'BlobNotFound') {
|
|
546
|
+
this.logger.trace(`Object not found at ${path}`);
|
|
547
|
+
return null;
|
|
548
|
+
}
|
|
549
|
+
this.logger.error(`Failed to read object from ${path}:`, error);
|
|
550
|
+
throw BrainyError.fromError(error, `readObjectFromPath(${path})`);
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
/**
|
|
554
|
+
* Delete an object from a specific path in Azure
|
|
555
|
+
* Primitive operation required by base class
|
|
556
|
+
* @protected
|
|
557
|
+
*/
|
|
558
|
+
async deleteObjectFromPath(path) {
|
|
559
|
+
await this.ensureInitialized();
|
|
560
|
+
try {
|
|
561
|
+
this.logger.trace(`Deleting object at path: ${path}`);
|
|
562
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(path);
|
|
563
|
+
await blockBlobClient.delete();
|
|
564
|
+
this.logger.trace(`Object deleted successfully from ${path}`);
|
|
565
|
+
}
|
|
566
|
+
catch (error) {
|
|
567
|
+
// If already deleted (404), treat as success
|
|
568
|
+
if (error.statusCode === 404 || error.code === 'BlobNotFound') {
|
|
569
|
+
this.logger.trace(`Object at ${path} not found (already deleted)`);
|
|
570
|
+
return;
|
|
571
|
+
}
|
|
572
|
+
this.logger.error(`Failed to delete object from ${path}:`, error);
|
|
573
|
+
throw new Error(`Failed to delete object from ${path}: ${error}`);
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
/**
|
|
577
|
+
* Batch delete multiple blobs from Azure Blob Storage
|
|
578
|
+
* Deletes up to 256 blobs per batch (Azure limit)
|
|
579
|
+
* Handles throttling, retries, and partial failures
|
|
580
|
+
*
|
|
581
|
+
* @param keys - Array of blob names (paths) to delete
|
|
582
|
+
* @param options - Configuration options for batch deletion
|
|
583
|
+
* @returns Statistics about successful and failed deletions
|
|
584
|
+
*/
|
|
585
|
+
async batchDelete(keys, options = {}) {
|
|
586
|
+
await this.ensureInitialized();
|
|
587
|
+
const { maxRetries = 3, retryDelayMs = 1000, continueOnError = true } = options;
|
|
588
|
+
if (!keys || keys.length === 0) {
|
|
589
|
+
return {
|
|
590
|
+
totalRequested: 0,
|
|
591
|
+
successfulDeletes: 0,
|
|
592
|
+
failedDeletes: 0,
|
|
593
|
+
errors: []
|
|
594
|
+
};
|
|
595
|
+
}
|
|
596
|
+
this.logger.info(`Starting batch delete of ${keys.length} blobs`);
|
|
597
|
+
const stats = {
|
|
598
|
+
totalRequested: keys.length,
|
|
599
|
+
successfulDeletes: 0,
|
|
600
|
+
failedDeletes: 0,
|
|
601
|
+
errors: []
|
|
602
|
+
};
|
|
603
|
+
// Chunk keys into batches of max 256 (Azure limit)
|
|
604
|
+
const MAX_BATCH_SIZE = 256;
|
|
605
|
+
const batches = [];
|
|
606
|
+
for (let i = 0; i < keys.length; i += MAX_BATCH_SIZE) {
|
|
607
|
+
batches.push(keys.slice(i, i + MAX_BATCH_SIZE));
|
|
608
|
+
}
|
|
609
|
+
this.logger.debug(`Split ${keys.length} keys into ${batches.length} batches`);
|
|
610
|
+
// Process each batch
|
|
611
|
+
for (let batchIndex = 0; batchIndex < batches.length; batchIndex++) {
|
|
612
|
+
const batch = batches[batchIndex];
|
|
613
|
+
let retryCount = 0;
|
|
614
|
+
let batchSuccess = false;
|
|
615
|
+
while (retryCount <= maxRetries && !batchSuccess) {
|
|
616
|
+
const requestId = await this.applyBackpressure();
|
|
617
|
+
try {
|
|
618
|
+
const { BlobBatchClient } = await import('@azure/storage-blob');
|
|
619
|
+
this.logger.debug(`Processing batch ${batchIndex + 1}/${batches.length} with ${batch.length} blobs (attempt ${retryCount + 1}/${maxRetries + 1})`);
|
|
620
|
+
// Create batch client
|
|
621
|
+
const batchClient = this.containerClient.getBlobBatchClient();
|
|
622
|
+
// Execute batch delete
|
|
623
|
+
const deletePromises = batch.map((key) => {
|
|
624
|
+
const blobClient = this.containerClient.getBlockBlobClient(key);
|
|
625
|
+
return blobClient.url;
|
|
626
|
+
});
|
|
627
|
+
// Use batch delete
|
|
628
|
+
const batchDeleteResponse = await batchClient.deleteBlobs(batch.map(key => this.containerClient.getBlockBlobClient(key).url), {
|
|
629
|
+
// Additional options can be added here
|
|
630
|
+
});
|
|
631
|
+
this.logger.debug(`Batch ${batchIndex + 1} completed`);
|
|
632
|
+
// Process results
|
|
633
|
+
for (let i = 0; i < batch.length; i++) {
|
|
634
|
+
const key = batch[i];
|
|
635
|
+
const subResponse = batchDeleteResponse.subResponses[i];
|
|
636
|
+
if (subResponse.status === 202 || subResponse.status === 404) {
|
|
637
|
+
// 202 Accepted = successful delete
|
|
638
|
+
// 404 Not Found = already deleted (treat as success)
|
|
639
|
+
stats.successfulDeletes++;
|
|
640
|
+
if (subResponse.status === 404) {
|
|
641
|
+
this.logger.trace(`Blob ${key} already deleted (404)`);
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
else {
|
|
645
|
+
// Deletion failed
|
|
646
|
+
stats.failedDeletes++;
|
|
647
|
+
stats.errors.push({
|
|
648
|
+
key,
|
|
649
|
+
error: `HTTP ${subResponse.status}: ${subResponse.errorCode || 'Unknown error'}`
|
|
650
|
+
});
|
|
651
|
+
this.logger.error(`Failed to delete ${key}: ${subResponse.status} - ${subResponse.errorCode}`);
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
this.releaseBackpressure(true, requestId);
|
|
655
|
+
batchSuccess = true;
|
|
656
|
+
}
|
|
657
|
+
catch (error) {
|
|
658
|
+
this.releaseBackpressure(false, requestId);
|
|
659
|
+
// Handle throttling
|
|
660
|
+
if (this.isThrottlingError(error)) {
|
|
661
|
+
this.logger.warn(`Batch ${batchIndex + 1} throttled, waiting before retry...`);
|
|
662
|
+
await this.handleThrottling(error);
|
|
663
|
+
retryCount++;
|
|
664
|
+
if (retryCount <= maxRetries) {
|
|
665
|
+
const delay = retryDelayMs * Math.pow(2, retryCount - 1); // Exponential backoff
|
|
666
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
667
|
+
}
|
|
668
|
+
continue;
|
|
669
|
+
}
|
|
670
|
+
// Handle other errors
|
|
671
|
+
this.logger.error(`Batch ${batchIndex + 1} failed (attempt ${retryCount + 1}/${maxRetries + 1}):`, error);
|
|
672
|
+
if (retryCount < maxRetries) {
|
|
673
|
+
retryCount++;
|
|
674
|
+
const delay = retryDelayMs * Math.pow(2, retryCount - 1);
|
|
675
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
676
|
+
continue;
|
|
677
|
+
}
|
|
678
|
+
// Max retries exceeded
|
|
679
|
+
if (continueOnError) {
|
|
680
|
+
// Mark all keys in this batch as failed and continue to next batch
|
|
681
|
+
for (const key of batch) {
|
|
682
|
+
stats.failedDeletes++;
|
|
683
|
+
stats.errors.push({
|
|
684
|
+
key,
|
|
685
|
+
error: error.message || String(error)
|
|
686
|
+
});
|
|
687
|
+
}
|
|
688
|
+
this.logger.error(`Batch ${batchIndex + 1} failed after ${maxRetries} retries, continuing to next batch`);
|
|
689
|
+
batchSuccess = true; // Mark as "handled" to move to next batch
|
|
690
|
+
}
|
|
691
|
+
else {
|
|
692
|
+
// Stop processing and throw error
|
|
693
|
+
throw BrainyError.storage(`Batch delete failed at batch ${batchIndex + 1}/${batches.length} after ${maxRetries} retries. Total: ${stats.successfulDeletes} deleted, ${stats.failedDeletes} failed`, error instanceof Error ? error : undefined);
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
this.logger.info(`Batch delete completed: ${stats.successfulDeletes}/${stats.totalRequested} successful, ${stats.failedDeletes} failed`);
|
|
699
|
+
return stats;
|
|
700
|
+
}
|
|
701
|
+
/**
|
|
702
|
+
* List all objects under a specific prefix in Azure
|
|
703
|
+
* Primitive operation required by base class
|
|
704
|
+
* @protected
|
|
705
|
+
*/
|
|
706
|
+
async listObjectsUnderPath(prefix) {
|
|
707
|
+
await this.ensureInitialized();
|
|
708
|
+
try {
|
|
709
|
+
this.logger.trace(`Listing objects under prefix: ${prefix}`);
|
|
710
|
+
const paths = [];
|
|
711
|
+
for await (const blob of this.containerClient.listBlobsFlat({ prefix })) {
|
|
712
|
+
if (blob.name) {
|
|
713
|
+
paths.push(blob.name);
|
|
714
|
+
}
|
|
715
|
+
}
|
|
716
|
+
this.logger.trace(`Found ${paths.length} objects under ${prefix}`);
|
|
717
|
+
return paths;
|
|
718
|
+
}
|
|
719
|
+
catch (error) {
|
|
720
|
+
this.logger.error(`Failed to list objects under ${prefix}:`, error);
|
|
721
|
+
throw new Error(`Failed to list objects under ${prefix}: ${error}`);
|
|
722
|
+
}
|
|
723
|
+
}
|
|
724
|
+
/**
|
|
725
|
+
* Helper: Convert Azure stream to buffer
|
|
726
|
+
*/
|
|
727
|
+
async streamToBuffer(readableStream) {
|
|
728
|
+
return new Promise((resolve, reject) => {
|
|
729
|
+
const chunks = [];
|
|
730
|
+
readableStream.on('data', (data) => {
|
|
731
|
+
chunks.push(data instanceof Buffer ? data : Buffer.from(data));
|
|
732
|
+
});
|
|
733
|
+
readableStream.on('end', () => {
|
|
734
|
+
resolve(Buffer.concat(chunks));
|
|
735
|
+
});
|
|
736
|
+
readableStream.on('error', reject);
|
|
737
|
+
});
|
|
738
|
+
}
|
|
739
|
+
/**
|
|
740
|
+
* Save a verb to storage (internal implementation)
|
|
741
|
+
*/
|
|
742
|
+
async saveVerb_internal(verb) {
|
|
743
|
+
return this.saveEdge(verb);
|
|
744
|
+
}
|
|
745
|
+
/**
|
|
746
|
+
* Save an edge to storage
|
|
747
|
+
*/
|
|
748
|
+
async saveEdge(edge) {
|
|
749
|
+
await this.ensureInitialized();
|
|
750
|
+
// Check volume mode
|
|
751
|
+
this.checkVolumeMode();
|
|
752
|
+
// Use write buffer in high-volume mode
|
|
753
|
+
if (this.highVolumeMode && this.verbWriteBuffer) {
|
|
754
|
+
this.logger.trace(`📝 BUFFERING: Adding verb ${edge.id} to write buffer`);
|
|
755
|
+
await this.verbWriteBuffer.add(edge.id, edge);
|
|
756
|
+
return;
|
|
757
|
+
}
|
|
758
|
+
// Direct write in normal mode
|
|
759
|
+
await this.saveEdgeDirect(edge);
|
|
760
|
+
}
|
|
761
|
+
/**
|
|
762
|
+
* Save an edge directly to Azure (bypass buffer)
|
|
763
|
+
*/
|
|
764
|
+
async saveEdgeDirect(edge) {
|
|
765
|
+
const requestId = await this.applyBackpressure();
|
|
766
|
+
try {
|
|
767
|
+
this.logger.trace(`Saving edge ${edge.id}`);
|
|
768
|
+
// Convert connections Map to serializable format
|
|
769
|
+
// ARCHITECTURAL FIX: Include core relational fields in verb vector file
|
|
770
|
+
// These fields are essential for 90% of operations - no metadata lookup needed
|
|
771
|
+
const serializableEdge = {
|
|
772
|
+
id: edge.id,
|
|
773
|
+
vector: edge.vector,
|
|
774
|
+
connections: Object.fromEntries(Array.from(edge.connections.entries()).map(([level, verbIds]) => [
|
|
775
|
+
level,
|
|
776
|
+
Array.from(verbIds)
|
|
777
|
+
])),
|
|
778
|
+
// CORE RELATIONAL DATA (v4.0.0)
|
|
779
|
+
verb: edge.verb,
|
|
780
|
+
sourceId: edge.sourceId,
|
|
781
|
+
targetId: edge.targetId,
|
|
782
|
+
// User metadata (if any) - saved separately for scalability
|
|
783
|
+
// metadata field is saved separately via saveVerbMetadata()
|
|
784
|
+
};
|
|
785
|
+
// Get the Azure blob name with UUID-based sharding
|
|
786
|
+
const blobName = this.getVerbKey(edge.id);
|
|
787
|
+
// Save to Azure
|
|
788
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(blobName);
|
|
789
|
+
await blockBlobClient.upload(JSON.stringify(serializableEdge, null, 2), JSON.stringify(serializableEdge).length, {
|
|
790
|
+
blobHTTPHeaders: { blobContentType: 'application/json' }
|
|
791
|
+
});
|
|
792
|
+
// Update cache
|
|
793
|
+
this.verbCacheManager.set(edge.id, edge);
|
|
794
|
+
// Increment verb count
|
|
795
|
+
const metadata = await this.getVerbMetadata(edge.id);
|
|
796
|
+
if (metadata && metadata.type) {
|
|
797
|
+
await this.incrementVerbCount(metadata.type);
|
|
798
|
+
}
|
|
799
|
+
this.logger.trace(`Edge ${edge.id} saved successfully`);
|
|
800
|
+
this.releaseBackpressure(true, requestId);
|
|
801
|
+
}
|
|
802
|
+
catch (error) {
|
|
803
|
+
this.releaseBackpressure(false, requestId);
|
|
804
|
+
if (this.isThrottlingError(error)) {
|
|
805
|
+
await this.handleThrottling(error);
|
|
806
|
+
throw error;
|
|
807
|
+
}
|
|
808
|
+
this.logger.error(`Failed to save edge ${edge.id}:`, error);
|
|
809
|
+
throw new Error(`Failed to save edge ${edge.id}: ${error}`);
|
|
810
|
+
}
|
|
811
|
+
}
|
|
812
|
+
/**
|
|
813
|
+
* Get a verb from storage (internal implementation)
|
|
814
|
+
* v4.0.0: Returns ONLY vector + core relational fields (no metadata field)
|
|
815
|
+
* Base class combines with metadata via getVerb() -> HNSWVerbWithMetadata
|
|
816
|
+
*/
|
|
817
|
+
async getVerb_internal(id) {
|
|
818
|
+
// v4.0.0: Return ONLY vector + core relational data (no metadata field)
|
|
819
|
+
const edge = await this.getEdge(id);
|
|
820
|
+
if (!edge) {
|
|
821
|
+
return null;
|
|
822
|
+
}
|
|
823
|
+
// Return pure vector + core fields structure
|
|
824
|
+
return edge;
|
|
825
|
+
}
|
|
826
|
+
/**
|
|
827
|
+
* Get an edge from storage
|
|
828
|
+
*/
|
|
829
|
+
async getEdge(id) {
|
|
830
|
+
await this.ensureInitialized();
|
|
831
|
+
// Check cache first
|
|
832
|
+
const cached = this.verbCacheManager.get(id);
|
|
833
|
+
if (cached) {
|
|
834
|
+
this.logger.trace(`Cache hit for verb ${id}`);
|
|
835
|
+
return cached;
|
|
836
|
+
}
|
|
837
|
+
const requestId = await this.applyBackpressure();
|
|
838
|
+
try {
|
|
839
|
+
this.logger.trace(`Getting edge ${id}`);
|
|
840
|
+
// Get the Azure blob name with UUID-based sharding
|
|
841
|
+
const blobName = this.getVerbKey(id);
|
|
842
|
+
// Download from Azure
|
|
843
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(blobName);
|
|
844
|
+
const downloadResponse = await blockBlobClient.download(0);
|
|
845
|
+
const downloaded = await this.streamToBuffer(downloadResponse.readableStreamBody);
|
|
846
|
+
// Parse JSON
|
|
847
|
+
const data = JSON.parse(downloaded.toString());
|
|
848
|
+
// Convert serialized connections back to Map
|
|
849
|
+
const connections = new Map();
|
|
850
|
+
for (const [level, verbIds] of Object.entries(data.connections || {})) {
|
|
851
|
+
connections.set(Number(level), new Set(verbIds));
|
|
852
|
+
}
|
|
853
|
+
// v4.0.0: Return HNSWVerb with core relational fields (NO metadata field)
|
|
854
|
+
const edge = {
|
|
855
|
+
id: data.id,
|
|
856
|
+
vector: data.vector,
|
|
857
|
+
connections,
|
|
858
|
+
// CORE RELATIONAL DATA (read from vector file)
|
|
859
|
+
verb: data.verb,
|
|
860
|
+
sourceId: data.sourceId,
|
|
861
|
+
targetId: data.targetId
|
|
862
|
+
// ✅ NO metadata field in v4.0.0
|
|
863
|
+
// User metadata retrieved separately via getVerbMetadata()
|
|
864
|
+
};
|
|
865
|
+
// Update cache
|
|
866
|
+
this.verbCacheManager.set(id, edge);
|
|
867
|
+
this.logger.trace(`Successfully retrieved edge ${id}`);
|
|
868
|
+
this.releaseBackpressure(true, requestId);
|
|
869
|
+
return edge;
|
|
870
|
+
}
|
|
871
|
+
catch (error) {
|
|
872
|
+
this.releaseBackpressure(false, requestId);
|
|
873
|
+
// Check if this is a "not found" error
|
|
874
|
+
if (error.statusCode === 404 || error.code === 'BlobNotFound') {
|
|
875
|
+
this.logger.trace(`Edge not found: ${id}`);
|
|
876
|
+
return null;
|
|
877
|
+
}
|
|
878
|
+
if (this.isThrottlingError(error)) {
|
|
879
|
+
await this.handleThrottling(error);
|
|
880
|
+
throw error;
|
|
881
|
+
}
|
|
882
|
+
this.logger.error(`Failed to get edge ${id}:`, error);
|
|
883
|
+
throw BrainyError.fromError(error, `getVerb(${id})`);
|
|
884
|
+
}
|
|
885
|
+
}
|
|
886
|
+
/**
|
|
887
|
+
* Delete a verb from storage (internal implementation)
|
|
888
|
+
*/
|
|
889
|
+
async deleteVerb_internal(id) {
|
|
890
|
+
await this.ensureInitialized();
|
|
891
|
+
const requestId = await this.applyBackpressure();
|
|
892
|
+
try {
|
|
893
|
+
this.logger.trace(`Deleting verb ${id}`);
|
|
894
|
+
// Get the Azure blob name
|
|
895
|
+
const blobName = this.getVerbKey(id);
|
|
896
|
+
// Delete from Azure
|
|
897
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(blobName);
|
|
898
|
+
await blockBlobClient.delete();
|
|
899
|
+
// Remove from cache
|
|
900
|
+
this.verbCacheManager.delete(id);
|
|
901
|
+
// Decrement verb count
|
|
902
|
+
const metadata = await this.getVerbMetadata(id);
|
|
903
|
+
if (metadata && metadata.type) {
|
|
904
|
+
await this.decrementVerbCount(metadata.type);
|
|
905
|
+
}
|
|
906
|
+
this.logger.trace(`Verb ${id} deleted successfully`);
|
|
907
|
+
this.releaseBackpressure(true, requestId);
|
|
908
|
+
}
|
|
909
|
+
catch (error) {
|
|
910
|
+
this.releaseBackpressure(false, requestId);
|
|
911
|
+
if (error.statusCode === 404 || error.code === 'BlobNotFound') {
|
|
912
|
+
// Already deleted
|
|
913
|
+
this.logger.trace(`Verb ${id} not found (already deleted)`);
|
|
914
|
+
return;
|
|
915
|
+
}
|
|
916
|
+
if (this.isThrottlingError(error)) {
|
|
917
|
+
await this.handleThrottling(error);
|
|
918
|
+
throw error;
|
|
919
|
+
}
|
|
920
|
+
this.logger.error(`Failed to delete verb ${id}:`, error);
|
|
921
|
+
throw new Error(`Failed to delete verb ${id}: ${error}`);
|
|
922
|
+
}
|
|
923
|
+
}
|
|
924
|
+
/**
|
|
925
|
+
* Get nouns with pagination
|
|
926
|
+
* v4.0.0: Returns HNSWNounWithMetadata[] (includes metadata field)
|
|
927
|
+
* Iterates through all UUID-based shards (00-ff) for consistent pagination
|
|
928
|
+
*/
|
|
929
|
+
async getNounsWithPagination(options = {}) {
|
|
930
|
+
await this.ensureInitialized();
|
|
931
|
+
const limit = options.limit || 100;
|
|
932
|
+
// Simplified implementation for Azure (can be optimized similar to GCS)
|
|
933
|
+
const items = [];
|
|
934
|
+
const iterator = this.containerClient.listBlobsFlat({ prefix: this.nounPrefix });
|
|
935
|
+
let count = 0;
|
|
936
|
+
for await (const blob of iterator) {
|
|
937
|
+
if (count >= limit)
|
|
938
|
+
break;
|
|
939
|
+
if (!blob.name || !blob.name.endsWith('.json'))
|
|
940
|
+
continue;
|
|
941
|
+
// Extract UUID from blob name
|
|
942
|
+
const parts = blob.name.split('/');
|
|
943
|
+
const fileName = parts[parts.length - 1];
|
|
944
|
+
const id = fileName.replace('.json', '');
|
|
945
|
+
const node = await this.getNode(id);
|
|
946
|
+
if (!node)
|
|
947
|
+
continue;
|
|
948
|
+
const metadata = await this.getNounMetadata(id);
|
|
949
|
+
if (!metadata)
|
|
950
|
+
continue;
|
|
951
|
+
// Apply filters if provided
|
|
952
|
+
if (options.filter) {
|
|
953
|
+
if (options.filter.nounType) {
|
|
954
|
+
const nounTypes = Array.isArray(options.filter.nounType)
|
|
955
|
+
? options.filter.nounType
|
|
956
|
+
: [options.filter.nounType];
|
|
957
|
+
const nounType = metadata.type || metadata.noun;
|
|
958
|
+
if (!nounType || !nounTypes.includes(nounType)) {
|
|
959
|
+
continue;
|
|
960
|
+
}
|
|
961
|
+
}
|
|
962
|
+
}
|
|
963
|
+
// Combine node with metadata
|
|
964
|
+
items.push({
|
|
965
|
+
...node,
|
|
966
|
+
metadata
|
|
967
|
+
});
|
|
968
|
+
count++;
|
|
969
|
+
}
|
|
970
|
+
return {
|
|
971
|
+
items,
|
|
972
|
+
totalCount: this.totalNounCount,
|
|
973
|
+
hasMore: false,
|
|
974
|
+
nextCursor: undefined
|
|
975
|
+
};
|
|
976
|
+
}
|
|
977
|
+
/**
|
|
978
|
+
* Get nouns by noun type (internal implementation)
|
|
979
|
+
*/
|
|
980
|
+
async getNounsByNounType_internal(nounType) {
|
|
981
|
+
const result = await this.getNounsWithPagination({
|
|
982
|
+
limit: 10000, // Large limit for backward compatibility
|
|
983
|
+
filter: { nounType }
|
|
984
|
+
});
|
|
985
|
+
return result.items;
|
|
986
|
+
}
|
|
987
|
+
/**
|
|
988
|
+
* Get verbs by source ID (internal implementation)
|
|
989
|
+
*/
|
|
990
|
+
async getVerbsBySource_internal(sourceId) {
|
|
991
|
+
// Simplified: scan all verbs and filter
|
|
992
|
+
const items = [];
|
|
993
|
+
const iterator = this.containerClient.listBlobsFlat({ prefix: this.verbPrefix });
|
|
994
|
+
for await (const blob of iterator) {
|
|
995
|
+
if (!blob.name || !blob.name.endsWith('.json'))
|
|
996
|
+
continue;
|
|
997
|
+
const parts = blob.name.split('/');
|
|
998
|
+
const fileName = parts[parts.length - 1];
|
|
999
|
+
const id = fileName.replace('.json', '');
|
|
1000
|
+
const verb = await this.getEdge(id);
|
|
1001
|
+
if (!verb || verb.sourceId !== sourceId)
|
|
1002
|
+
continue;
|
|
1003
|
+
const metadata = await this.getVerbMetadata(id);
|
|
1004
|
+
items.push({
|
|
1005
|
+
...verb,
|
|
1006
|
+
metadata: metadata || {}
|
|
1007
|
+
});
|
|
1008
|
+
}
|
|
1009
|
+
return items;
|
|
1010
|
+
}
|
|
1011
|
+
/**
|
|
1012
|
+
* Get verbs by target ID (internal implementation)
|
|
1013
|
+
*/
|
|
1014
|
+
async getVerbsByTarget_internal(targetId) {
|
|
1015
|
+
// Simplified: scan all verbs and filter
|
|
1016
|
+
const items = [];
|
|
1017
|
+
const iterator = this.containerClient.listBlobsFlat({ prefix: this.verbPrefix });
|
|
1018
|
+
for await (const blob of iterator) {
|
|
1019
|
+
if (!blob.name || !blob.name.endsWith('.json'))
|
|
1020
|
+
continue;
|
|
1021
|
+
const parts = blob.name.split('/');
|
|
1022
|
+
const fileName = parts[parts.length - 1];
|
|
1023
|
+
const id = fileName.replace('.json', '');
|
|
1024
|
+
const verb = await this.getEdge(id);
|
|
1025
|
+
if (!verb || verb.targetId !== targetId)
|
|
1026
|
+
continue;
|
|
1027
|
+
const metadata = await this.getVerbMetadata(id);
|
|
1028
|
+
items.push({
|
|
1029
|
+
...verb,
|
|
1030
|
+
metadata: metadata || {}
|
|
1031
|
+
});
|
|
1032
|
+
}
|
|
1033
|
+
return items;
|
|
1034
|
+
}
|
|
1035
|
+
/**
|
|
1036
|
+
* Get verbs by type (internal implementation)
|
|
1037
|
+
*/
|
|
1038
|
+
async getVerbsByType_internal(type) {
|
|
1039
|
+
// Simplified: scan all verbs and filter
|
|
1040
|
+
const items = [];
|
|
1041
|
+
const iterator = this.containerClient.listBlobsFlat({ prefix: this.verbPrefix });
|
|
1042
|
+
for await (const blob of iterator) {
|
|
1043
|
+
if (!blob.name || !blob.name.endsWith('.json'))
|
|
1044
|
+
continue;
|
|
1045
|
+
const parts = blob.name.split('/');
|
|
1046
|
+
const fileName = parts[parts.length - 1];
|
|
1047
|
+
const id = fileName.replace('.json', '');
|
|
1048
|
+
const verb = await this.getEdge(id);
|
|
1049
|
+
if (!verb || verb.verb !== type)
|
|
1050
|
+
continue;
|
|
1051
|
+
const metadata = await this.getVerbMetadata(id);
|
|
1052
|
+
items.push({
|
|
1053
|
+
...verb,
|
|
1054
|
+
metadata: metadata || {}
|
|
1055
|
+
});
|
|
1056
|
+
}
|
|
1057
|
+
return items;
|
|
1058
|
+
}
|
|
1059
|
+
/**
|
|
1060
|
+
* Clear all data from storage
|
|
1061
|
+
*/
|
|
1062
|
+
async clear() {
|
|
1063
|
+
await this.ensureInitialized();
|
|
1064
|
+
try {
|
|
1065
|
+
this.logger.info('🧹 Clearing all data from Azure container...');
|
|
1066
|
+
// Delete all blobs in container
|
|
1067
|
+
for await (const blob of this.containerClient.listBlobsFlat()) {
|
|
1068
|
+
if (blob.name) {
|
|
1069
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(blob.name);
|
|
1070
|
+
await blockBlobClient.delete();
|
|
1071
|
+
}
|
|
1072
|
+
}
|
|
1073
|
+
// Clear caches
|
|
1074
|
+
this.nounCacheManager.clear();
|
|
1075
|
+
this.verbCacheManager.clear();
|
|
1076
|
+
// Reset counts
|
|
1077
|
+
this.totalNounCount = 0;
|
|
1078
|
+
this.totalVerbCount = 0;
|
|
1079
|
+
this.entityCounts.clear();
|
|
1080
|
+
this.verbCounts.clear();
|
|
1081
|
+
this.logger.info('✅ All data cleared from Azure');
|
|
1082
|
+
}
|
|
1083
|
+
catch (error) {
|
|
1084
|
+
this.logger.error('Failed to clear Azure storage:', error);
|
|
1085
|
+
throw new Error(`Failed to clear Azure storage: ${error}`);
|
|
1086
|
+
}
|
|
1087
|
+
}
|
|
1088
|
+
/**
|
|
1089
|
+
* Get storage status
|
|
1090
|
+
*/
|
|
1091
|
+
async getStorageStatus() {
|
|
1092
|
+
await this.ensureInitialized();
|
|
1093
|
+
try {
|
|
1094
|
+
const properties = await this.containerClient.getProperties();
|
|
1095
|
+
return {
|
|
1096
|
+
type: 'azure',
|
|
1097
|
+
used: 0, // Azure doesn't provide usage info easily
|
|
1098
|
+
quota: null, // No quota in Azure Blob Storage
|
|
1099
|
+
details: {
|
|
1100
|
+
container: this.containerName,
|
|
1101
|
+
lastModified: properties.lastModified,
|
|
1102
|
+
etag: properties.etag
|
|
1103
|
+
}
|
|
1104
|
+
};
|
|
1105
|
+
}
|
|
1106
|
+
catch (error) {
|
|
1107
|
+
this.logger.error('Failed to get storage status:', error);
|
|
1108
|
+
return {
|
|
1109
|
+
type: 'azure',
|
|
1110
|
+
used: 0,
|
|
1111
|
+
quota: null
|
|
1112
|
+
};
|
|
1113
|
+
}
|
|
1114
|
+
}
|
|
1115
|
+
/**
|
|
1116
|
+
* Save statistics data to storage
|
|
1117
|
+
*/
|
|
1118
|
+
async saveStatisticsData(statistics) {
|
|
1119
|
+
await this.ensureInitialized();
|
|
1120
|
+
try {
|
|
1121
|
+
const key = `${this.systemPrefix}${STATISTICS_KEY}.json`;
|
|
1122
|
+
this.logger.trace(`Saving statistics to ${key}`);
|
|
1123
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(key);
|
|
1124
|
+
const content = JSON.stringify(statistics, null, 2);
|
|
1125
|
+
await blockBlobClient.upload(content, content.length, {
|
|
1126
|
+
blobHTTPHeaders: { blobContentType: 'application/json' }
|
|
1127
|
+
});
|
|
1128
|
+
this.logger.trace('Statistics saved successfully');
|
|
1129
|
+
}
|
|
1130
|
+
catch (error) {
|
|
1131
|
+
this.logger.error('Failed to save statistics:', error);
|
|
1132
|
+
throw new Error(`Failed to save statistics: ${error}`);
|
|
1133
|
+
}
|
|
1134
|
+
}
|
|
1135
|
+
/**
|
|
1136
|
+
* Get statistics data from storage
|
|
1137
|
+
*/
|
|
1138
|
+
async getStatisticsData() {
|
|
1139
|
+
await this.ensureInitialized();
|
|
1140
|
+
try {
|
|
1141
|
+
const key = `${this.systemPrefix}${STATISTICS_KEY}.json`;
|
|
1142
|
+
this.logger.trace(`Getting statistics from ${key}`);
|
|
1143
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(key);
|
|
1144
|
+
const downloadResponse = await blockBlobClient.download(0);
|
|
1145
|
+
const downloaded = await this.streamToBuffer(downloadResponse.readableStreamBody);
|
|
1146
|
+
const statistics = JSON.parse(downloaded.toString());
|
|
1147
|
+
this.logger.trace('Statistics retrieved successfully');
|
|
1148
|
+
// CRITICAL FIX: Populate totalNodes and totalEdges from in-memory counts
|
|
1149
|
+
return {
|
|
1150
|
+
...statistics,
|
|
1151
|
+
totalNodes: this.totalNounCount,
|
|
1152
|
+
totalEdges: this.totalVerbCount,
|
|
1153
|
+
lastUpdated: new Date().toISOString()
|
|
1154
|
+
};
|
|
1155
|
+
}
|
|
1156
|
+
catch (error) {
|
|
1157
|
+
if (error.statusCode === 404 || error.code === 'BlobNotFound') {
|
|
1158
|
+
// Statistics file doesn't exist yet (first restart)
|
|
1159
|
+
this.logger.trace('Statistics file not found - returning minimal stats with counts');
|
|
1160
|
+
return {
|
|
1161
|
+
nounCount: {},
|
|
1162
|
+
verbCount: {},
|
|
1163
|
+
metadataCount: {},
|
|
1164
|
+
hnswIndexSize: 0,
|
|
1165
|
+
totalNodes: this.totalNounCount,
|
|
1166
|
+
totalEdges: this.totalVerbCount,
|
|
1167
|
+
totalMetadata: 0,
|
|
1168
|
+
lastUpdated: new Date().toISOString()
|
|
1169
|
+
};
|
|
1170
|
+
}
|
|
1171
|
+
this.logger.error('Failed to get statistics:', error);
|
|
1172
|
+
return null;
|
|
1173
|
+
}
|
|
1174
|
+
}
|
|
1175
|
+
/**
|
|
1176
|
+
* Initialize counts from storage
|
|
1177
|
+
*/
|
|
1178
|
+
async initializeCounts() {
|
|
1179
|
+
const key = `${this.systemPrefix}counts.json`;
|
|
1180
|
+
try {
|
|
1181
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(key);
|
|
1182
|
+
const downloadResponse = await blockBlobClient.download(0);
|
|
1183
|
+
const downloaded = await this.streamToBuffer(downloadResponse.readableStreamBody);
|
|
1184
|
+
const counts = JSON.parse(downloaded.toString());
|
|
1185
|
+
this.totalNounCount = counts.totalNounCount || 0;
|
|
1186
|
+
this.totalVerbCount = counts.totalVerbCount || 0;
|
|
1187
|
+
this.entityCounts = new Map(Object.entries(counts.entityCounts || {}));
|
|
1188
|
+
this.verbCounts = new Map(Object.entries(counts.verbCounts || {}));
|
|
1189
|
+
prodLog.info(`📊 Loaded counts from storage: ${this.totalNounCount} nouns, ${this.totalVerbCount} verbs`);
|
|
1190
|
+
}
|
|
1191
|
+
catch (error) {
|
|
1192
|
+
if (error.statusCode === 404 || error.code === 'BlobNotFound') {
|
|
1193
|
+
// No counts file yet - initialize from scan (first-time setup)
|
|
1194
|
+
prodLog.info('📊 No counts file found - this is normal for first init');
|
|
1195
|
+
await this.initializeCountsFromScan();
|
|
1196
|
+
}
|
|
1197
|
+
else {
|
|
1198
|
+
// CRITICAL FIX: Don't silently fail on network/permission errors
|
|
1199
|
+
this.logger.error('❌ CRITICAL: Failed to load counts from Azure:', error);
|
|
1200
|
+
prodLog.error(`❌ Error loading ${key}: ${error.message}`);
|
|
1201
|
+
// Try to recover by scanning the container
|
|
1202
|
+
prodLog.warn('⚠️ Attempting recovery by scanning Azure container...');
|
|
1203
|
+
await this.initializeCountsFromScan();
|
|
1204
|
+
}
|
|
1205
|
+
}
|
|
1206
|
+
}
|
|
1207
|
+
/**
|
|
1208
|
+
* Initialize counts from storage scan (expensive - only for first-time init)
|
|
1209
|
+
*/
|
|
1210
|
+
async initializeCountsFromScan() {
|
|
1211
|
+
try {
|
|
1212
|
+
prodLog.info('📊 Scanning Azure container to initialize counts...');
|
|
1213
|
+
// Count nouns
|
|
1214
|
+
let nounCount = 0;
|
|
1215
|
+
for await (const blob of this.containerClient.listBlobsFlat({ prefix: this.nounPrefix })) {
|
|
1216
|
+
if (blob.name && blob.name.endsWith('.json')) {
|
|
1217
|
+
nounCount++;
|
|
1218
|
+
}
|
|
1219
|
+
}
|
|
1220
|
+
this.totalNounCount = nounCount;
|
|
1221
|
+
// Count verbs
|
|
1222
|
+
let verbCount = 0;
|
|
1223
|
+
for await (const blob of this.containerClient.listBlobsFlat({ prefix: this.verbPrefix })) {
|
|
1224
|
+
if (blob.name && blob.name.endsWith('.json')) {
|
|
1225
|
+
verbCount++;
|
|
1226
|
+
}
|
|
1227
|
+
}
|
|
1228
|
+
this.totalVerbCount = verbCount;
|
|
1229
|
+
// Save initial counts
|
|
1230
|
+
if (this.totalNounCount > 0 || this.totalVerbCount > 0) {
|
|
1231
|
+
await this.persistCounts();
|
|
1232
|
+
prodLog.info(`✅ Initialized counts from scan: ${this.totalNounCount} nouns, ${this.totalVerbCount} verbs`);
|
|
1233
|
+
}
|
|
1234
|
+
else {
|
|
1235
|
+
prodLog.warn(`⚠️ No entities found during container scan. Check that entities exist and prefixes are correct.`);
|
|
1236
|
+
}
|
|
1237
|
+
}
|
|
1238
|
+
catch (error) {
|
|
1239
|
+
// CRITICAL FIX: Don't silently fail - this prevents data loss scenarios
|
|
1240
|
+
this.logger.error('❌ CRITICAL: Failed to initialize counts from Azure container scan:', error);
|
|
1241
|
+
throw new Error(`Failed to initialize Azure storage counts: ${error}. This prevents container restarts from working correctly.`);
|
|
1242
|
+
}
|
|
1243
|
+
}
|
|
1244
|
+
/**
|
|
1245
|
+
* Persist counts to storage
|
|
1246
|
+
*/
|
|
1247
|
+
async persistCounts() {
|
|
1248
|
+
try {
|
|
1249
|
+
const key = `${this.systemPrefix}counts.json`;
|
|
1250
|
+
const counts = {
|
|
1251
|
+
totalNounCount: this.totalNounCount,
|
|
1252
|
+
totalVerbCount: this.totalVerbCount,
|
|
1253
|
+
entityCounts: Object.fromEntries(this.entityCounts),
|
|
1254
|
+
verbCounts: Object.fromEntries(this.verbCounts),
|
|
1255
|
+
lastUpdated: new Date().toISOString()
|
|
1256
|
+
};
|
|
1257
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(key);
|
|
1258
|
+
const content = JSON.stringify(counts, null, 2);
|
|
1259
|
+
await blockBlobClient.upload(content, content.length, {
|
|
1260
|
+
blobHTTPHeaders: { blobContentType: 'application/json' }
|
|
1261
|
+
});
|
|
1262
|
+
}
|
|
1263
|
+
catch (error) {
|
|
1264
|
+
this.logger.error('Error persisting counts:', error);
|
|
1265
|
+
}
|
|
1266
|
+
}
|
|
1267
|
+
/**
|
|
1268
|
+
* Get a noun's vector for HNSW rebuild
|
|
1269
|
+
*/
|
|
1270
|
+
async getNounVector(id) {
|
|
1271
|
+
await this.ensureInitialized();
|
|
1272
|
+
const noun = await this.getNode(id);
|
|
1273
|
+
return noun ? noun.vector : null;
|
|
1274
|
+
}
|
|
1275
|
+
/**
|
|
1276
|
+
* Save HNSW graph data for a noun
|
|
1277
|
+
*/
|
|
1278
|
+
async saveHNSWData(nounId, hnswData) {
|
|
1279
|
+
await this.ensureInitialized();
|
|
1280
|
+
try {
|
|
1281
|
+
const shard = getShardIdFromUuid(nounId);
|
|
1282
|
+
const key = `entities/nouns/hnsw/${shard}/${nounId}.json`;
|
|
1283
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(key);
|
|
1284
|
+
const content = JSON.stringify(hnswData, null, 2);
|
|
1285
|
+
await blockBlobClient.upload(content, content.length, {
|
|
1286
|
+
blobHTTPHeaders: { blobContentType: 'application/json' }
|
|
1287
|
+
});
|
|
1288
|
+
}
|
|
1289
|
+
catch (error) {
|
|
1290
|
+
this.logger.error(`Failed to save HNSW data for ${nounId}:`, error);
|
|
1291
|
+
throw new Error(`Failed to save HNSW data for ${nounId}: ${error}`);
|
|
1292
|
+
}
|
|
1293
|
+
}
|
|
1294
|
+
/**
|
|
1295
|
+
* Get HNSW graph data for a noun
|
|
1296
|
+
*/
|
|
1297
|
+
async getHNSWData(nounId) {
|
|
1298
|
+
await this.ensureInitialized();
|
|
1299
|
+
try {
|
|
1300
|
+
const shard = getShardIdFromUuid(nounId);
|
|
1301
|
+
const key = `entities/nouns/hnsw/${shard}/${nounId}.json`;
|
|
1302
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(key);
|
|
1303
|
+
const downloadResponse = await blockBlobClient.download(0);
|
|
1304
|
+
const downloaded = await this.streamToBuffer(downloadResponse.readableStreamBody);
|
|
1305
|
+
return JSON.parse(downloaded.toString());
|
|
1306
|
+
}
|
|
1307
|
+
catch (error) {
|
|
1308
|
+
if (error.statusCode === 404 || error.code === 'BlobNotFound') {
|
|
1309
|
+
return null;
|
|
1310
|
+
}
|
|
1311
|
+
this.logger.error(`Failed to get HNSW data for ${nounId}:`, error);
|
|
1312
|
+
throw new Error(`Failed to get HNSW data for ${nounId}: ${error}`);
|
|
1313
|
+
}
|
|
1314
|
+
}
|
|
1315
|
+
/**
|
|
1316
|
+
* Save HNSW system data (entry point, max level)
|
|
1317
|
+
*/
|
|
1318
|
+
async saveHNSWSystem(systemData) {
|
|
1319
|
+
await this.ensureInitialized();
|
|
1320
|
+
try {
|
|
1321
|
+
const key = `${this.systemPrefix}hnsw-system.json`;
|
|
1322
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(key);
|
|
1323
|
+
const content = JSON.stringify(systemData, null, 2);
|
|
1324
|
+
await blockBlobClient.upload(content, content.length, {
|
|
1325
|
+
blobHTTPHeaders: { blobContentType: 'application/json' }
|
|
1326
|
+
});
|
|
1327
|
+
}
|
|
1328
|
+
catch (error) {
|
|
1329
|
+
this.logger.error('Failed to save HNSW system data:', error);
|
|
1330
|
+
throw new Error(`Failed to save HNSW system data: ${error}`);
|
|
1331
|
+
}
|
|
1332
|
+
}
|
|
1333
|
+
/**
|
|
1334
|
+
* Get HNSW system data (entry point, max level)
|
|
1335
|
+
*/
|
|
1336
|
+
async getHNSWSystem() {
|
|
1337
|
+
await this.ensureInitialized();
|
|
1338
|
+
try {
|
|
1339
|
+
const key = `${this.systemPrefix}hnsw-system.json`;
|
|
1340
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(key);
|
|
1341
|
+
const downloadResponse = await blockBlobClient.download(0);
|
|
1342
|
+
const downloaded = await this.streamToBuffer(downloadResponse.readableStreamBody);
|
|
1343
|
+
return JSON.parse(downloaded.toString());
|
|
1344
|
+
}
|
|
1345
|
+
catch (error) {
|
|
1346
|
+
if (error.statusCode === 404 || error.code === 'BlobNotFound') {
|
|
1347
|
+
return null;
|
|
1348
|
+
}
|
|
1349
|
+
this.logger.error('Failed to get HNSW system data:', error);
|
|
1350
|
+
throw new Error(`Failed to get HNSW system data: ${error}`);
|
|
1351
|
+
}
|
|
1352
|
+
}
|
|
1353
|
+
/**
|
|
1354
|
+
* Set the access tier for a specific blob (v4.0.0 cost optimization)
|
|
1355
|
+
* Azure Blob Storage tiers:
|
|
1356
|
+
* - Hot: $0.0184/GB/month - Frequently accessed data
|
|
1357
|
+
* - Cool: $0.01/GB/month - Infrequently accessed data (45% cheaper)
|
|
1358
|
+
* - Archive: $0.00099/GB/month - Rarely accessed data (99% cheaper!)
|
|
1359
|
+
*
|
|
1360
|
+
* @param blobName - Name of the blob to change tier
|
|
1361
|
+
* @param tier - Target access tier ('Hot', 'Cool', or 'Archive')
|
|
1362
|
+
* @returns Promise that resolves when tier is set
|
|
1363
|
+
*
|
|
1364
|
+
* @example
|
|
1365
|
+
* // Move old vectors to Archive tier (99% cost savings)
|
|
1366
|
+
* await storage.setBlobTier('entities/nouns/vectors/ab/old-id.json', 'Archive')
|
|
1367
|
+
*/
|
|
1368
|
+
async setBlobTier(blobName, tier) {
|
|
1369
|
+
await this.ensureInitialized();
|
|
1370
|
+
try {
|
|
1371
|
+
this.logger.info(`Setting blob tier for ${blobName} to ${tier}`);
|
|
1372
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(blobName);
|
|
1373
|
+
await blockBlobClient.setAccessTier(tier);
|
|
1374
|
+
this.logger.info(`Successfully set ${blobName} to ${tier} tier`);
|
|
1375
|
+
}
|
|
1376
|
+
catch (error) {
|
|
1377
|
+
if (error.statusCode === 404 || error.code === 'BlobNotFound') {
|
|
1378
|
+
throw new Error(`Blob not found: ${blobName}`);
|
|
1379
|
+
}
|
|
1380
|
+
this.logger.error(`Failed to set tier for ${blobName}:`, error);
|
|
1381
|
+
throw new Error(`Failed to set blob tier: ${error}`);
|
|
1382
|
+
}
|
|
1383
|
+
}
|
|
1384
|
+
/**
|
|
1385
|
+
* Get the current access tier for a blob
|
|
1386
|
+
*
|
|
1387
|
+
* @param blobName - Name of the blob
|
|
1388
|
+
* @returns Promise that resolves to the current tier or null if not found
|
|
1389
|
+
*
|
|
1390
|
+
* @example
|
|
1391
|
+
* const tier = await storage.getBlobTier('entities/nouns/vectors/ab/id.json')
|
|
1392
|
+
* console.log(`Current tier: ${tier}`) // 'Hot', 'Cool', or 'Archive'
|
|
1393
|
+
*/
|
|
1394
|
+
async getBlobTier(blobName) {
|
|
1395
|
+
await this.ensureInitialized();
|
|
1396
|
+
try {
|
|
1397
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(blobName);
|
|
1398
|
+
const properties = await blockBlobClient.getProperties();
|
|
1399
|
+
return properties.accessTier || null;
|
|
1400
|
+
}
|
|
1401
|
+
catch (error) {
|
|
1402
|
+
if (error.statusCode === 404 || error.code === 'BlobNotFound') {
|
|
1403
|
+
return null;
|
|
1404
|
+
}
|
|
1405
|
+
this.logger.error(`Failed to get tier for ${blobName}:`, error);
|
|
1406
|
+
throw new Error(`Failed to get blob tier: ${error}`);
|
|
1407
|
+
}
|
|
1408
|
+
}
|
|
1409
|
+
/**
|
|
1410
|
+
* Set access tier for multiple blobs in batch (v4.0.0 cost optimization)
|
|
1411
|
+
* Efficiently move large numbers of blobs between tiers for cost optimization
|
|
1412
|
+
*
|
|
1413
|
+
* @param blobs - Array of blob names and their target tiers
|
|
1414
|
+
* @param options - Configuration options
|
|
1415
|
+
* @returns Promise with statistics about tier changes
|
|
1416
|
+
*
|
|
1417
|
+
* @example
|
|
1418
|
+
* // Move old data to Archive tier for 99% cost savings
|
|
1419
|
+
* const oldBlobs = await storage.listObjectsUnderPath('entities/nouns/vectors/')
|
|
1420
|
+
* await storage.setBlobTierBatch(
|
|
1421
|
+
* oldBlobs.map(name => ({ blobName: name, tier: 'Archive' }))
|
|
1422
|
+
* )
|
|
1423
|
+
*/
|
|
1424
|
+
async setBlobTierBatch(blobs, options = {}) {
|
|
1425
|
+
await this.ensureInitialized();
|
|
1426
|
+
const { maxRetries = 3, retryDelayMs = 1000, continueOnError = true } = options;
|
|
1427
|
+
if (!blobs || blobs.length === 0) {
|
|
1428
|
+
return {
|
|
1429
|
+
totalRequested: 0,
|
|
1430
|
+
successfulChanges: 0,
|
|
1431
|
+
failedChanges: 0,
|
|
1432
|
+
errors: []
|
|
1433
|
+
};
|
|
1434
|
+
}
|
|
1435
|
+
this.logger.info(`Starting batch tier change for ${blobs.length} blobs`);
|
|
1436
|
+
const stats = {
|
|
1437
|
+
totalRequested: blobs.length,
|
|
1438
|
+
successfulChanges: 0,
|
|
1439
|
+
failedChanges: 0,
|
|
1440
|
+
errors: []
|
|
1441
|
+
};
|
|
1442
|
+
// Process each blob (Azure doesn't have batch tier API, so we parallelize)
|
|
1443
|
+
const CONCURRENT_LIMIT = 10; // Limit concurrent operations to avoid throttling
|
|
1444
|
+
for (let i = 0; i < blobs.length; i += CONCURRENT_LIMIT) {
|
|
1445
|
+
const batch = blobs.slice(i, i + CONCURRENT_LIMIT);
|
|
1446
|
+
const promises = batch.map(async ({ blobName, tier }) => {
|
|
1447
|
+
let retryCount = 0;
|
|
1448
|
+
while (retryCount <= maxRetries) {
|
|
1449
|
+
try {
|
|
1450
|
+
await this.setBlobTier(blobName, tier);
|
|
1451
|
+
return { blobName, success: true, error: null };
|
|
1452
|
+
}
|
|
1453
|
+
catch (error) {
|
|
1454
|
+
// Handle throttling
|
|
1455
|
+
if (this.isThrottlingError(error)) {
|
|
1456
|
+
this.logger.warn(`Tier change throttled for ${blobName}, retrying...`);
|
|
1457
|
+
await this.handleThrottling(error);
|
|
1458
|
+
retryCount++;
|
|
1459
|
+
if (retryCount <= maxRetries) {
|
|
1460
|
+
const delay = retryDelayMs * Math.pow(2, retryCount - 1);
|
|
1461
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
1462
|
+
}
|
|
1463
|
+
continue;
|
|
1464
|
+
}
|
|
1465
|
+
// Other errors
|
|
1466
|
+
if (retryCount < maxRetries) {
|
|
1467
|
+
retryCount++;
|
|
1468
|
+
const delay = retryDelayMs * Math.pow(2, retryCount - 1);
|
|
1469
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
1470
|
+
continue;
|
|
1471
|
+
}
|
|
1472
|
+
// Max retries exceeded
|
|
1473
|
+
return {
|
|
1474
|
+
blobName,
|
|
1475
|
+
success: false,
|
|
1476
|
+
error: error.message || String(error)
|
|
1477
|
+
};
|
|
1478
|
+
}
|
|
1479
|
+
}
|
|
1480
|
+
// Should never reach here, but TypeScript needs a return
|
|
1481
|
+
return {
|
|
1482
|
+
blobName,
|
|
1483
|
+
success: false,
|
|
1484
|
+
error: 'Max retries exceeded'
|
|
1485
|
+
};
|
|
1486
|
+
});
|
|
1487
|
+
const results = await Promise.all(promises);
|
|
1488
|
+
for (const result of results) {
|
|
1489
|
+
if (result.success) {
|
|
1490
|
+
stats.successfulChanges++;
|
|
1491
|
+
}
|
|
1492
|
+
else {
|
|
1493
|
+
stats.failedChanges++;
|
|
1494
|
+
if (result.error) {
|
|
1495
|
+
stats.errors.push({
|
|
1496
|
+
blobName: result.blobName,
|
|
1497
|
+
error: result.error
|
|
1498
|
+
});
|
|
1499
|
+
}
|
|
1500
|
+
}
|
|
1501
|
+
}
|
|
1502
|
+
}
|
|
1503
|
+
this.logger.info(`Batch tier change completed: ${stats.successfulChanges}/${stats.totalRequested} successful, ${stats.failedChanges} failed`);
|
|
1504
|
+
return stats;
|
|
1505
|
+
}
|
|
1506
|
+
/**
|
|
1507
|
+
* Check if a blob in Archive tier has been rehydrated and is ready to read
|
|
1508
|
+
* Archive tier blobs must be rehydrated before they can be read
|
|
1509
|
+
*
|
|
1510
|
+
* @param blobName - Name of the blob to check
|
|
1511
|
+
* @returns Promise that resolves to rehydration status
|
|
1512
|
+
*
|
|
1513
|
+
* @example
|
|
1514
|
+
* const status = await storage.checkRehydrationStatus('entities/nouns/vectors/ab/id.json')
|
|
1515
|
+
* if (status.isRehydrated) {
|
|
1516
|
+
* // Blob is ready to read
|
|
1517
|
+
* const data = await storage.readObjectFromPath('entities/nouns/vectors/ab/id.json')
|
|
1518
|
+
* }
|
|
1519
|
+
*/
|
|
1520
|
+
async checkRehydrationStatus(blobName) {
|
|
1521
|
+
await this.ensureInitialized();
|
|
1522
|
+
try {
|
|
1523
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(blobName);
|
|
1524
|
+
const properties = await blockBlobClient.getProperties();
|
|
1525
|
+
const tier = properties.accessTier;
|
|
1526
|
+
const archiveStatus = properties.archiveStatus;
|
|
1527
|
+
return {
|
|
1528
|
+
isArchived: tier === 'Archive',
|
|
1529
|
+
isRehydrating: archiveStatus === 'rehydrate-pending-to-hot' || archiveStatus === 'rehydrate-pending-to-cool',
|
|
1530
|
+
isRehydrated: tier === 'Hot' || tier === 'Cool',
|
|
1531
|
+
rehydratePriority: properties.rehydratePriority
|
|
1532
|
+
};
|
|
1533
|
+
}
|
|
1534
|
+
catch (error) {
|
|
1535
|
+
if (error.statusCode === 404 || error.code === 'BlobNotFound') {
|
|
1536
|
+
throw new Error(`Blob not found: ${blobName}`);
|
|
1537
|
+
}
|
|
1538
|
+
this.logger.error(`Failed to check rehydration status for ${blobName}:`, error);
|
|
1539
|
+
throw new Error(`Failed to check rehydration status: ${error}`);
|
|
1540
|
+
}
|
|
1541
|
+
}
|
|
1542
|
+
/**
|
|
1543
|
+
* Rehydrate an archived blob (move from Archive to Hot or Cool tier)
|
|
1544
|
+
* Note: Rehydration can take several hours depending on priority
|
|
1545
|
+
*
|
|
1546
|
+
* @param blobName - Name of the blob to rehydrate
|
|
1547
|
+
* @param targetTier - Target tier after rehydration ('Hot' or 'Cool')
|
|
1548
|
+
* @param priority - Rehydration priority ('Standard' or 'High')
|
|
1549
|
+
* Standard: Up to 15 hours, cheaper
|
|
1550
|
+
* High: Up to 1 hour, more expensive
|
|
1551
|
+
* @returns Promise that resolves when rehydration is initiated
|
|
1552
|
+
*
|
|
1553
|
+
* @example
|
|
1554
|
+
* // Rehydrate with standard priority (cheaper, slower)
|
|
1555
|
+
* await storage.rehydrateBlob('entities/nouns/vectors/ab/id.json', 'Cool', 'Standard')
|
|
1556
|
+
*
|
|
1557
|
+
* // Check status
|
|
1558
|
+
* const status = await storage.checkRehydrationStatus('entities/nouns/vectors/ab/id.json')
|
|
1559
|
+
* console.log(`Rehydrating: ${status.isRehydrating}`)
|
|
1560
|
+
*/
|
|
1561
|
+
async rehydrateBlob(blobName, targetTier, priority = 'Standard') {
|
|
1562
|
+
await this.ensureInitialized();
|
|
1563
|
+
try {
|
|
1564
|
+
this.logger.info(`Rehydrating blob ${blobName} to ${targetTier} tier with ${priority} priority`);
|
|
1565
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(blobName);
|
|
1566
|
+
// Set tier with rehydration priority
|
|
1567
|
+
await blockBlobClient.setAccessTier(targetTier, {
|
|
1568
|
+
rehydratePriority: priority
|
|
1569
|
+
});
|
|
1570
|
+
this.logger.info(`Successfully initiated rehydration for ${blobName}`);
|
|
1571
|
+
}
|
|
1572
|
+
catch (error) {
|
|
1573
|
+
if (error.statusCode === 404 || error.code === 'BlobNotFound') {
|
|
1574
|
+
throw new Error(`Blob not found: ${blobName}`);
|
|
1575
|
+
}
|
|
1576
|
+
this.logger.error(`Failed to rehydrate blob ${blobName}:`, error);
|
|
1577
|
+
throw new Error(`Failed to rehydrate blob: ${error}`);
|
|
1578
|
+
}
|
|
1579
|
+
}
|
|
1580
|
+
/**
|
|
1581
|
+
* Set lifecycle management policy for automatic tier transitions and deletions (v4.0.0)
|
|
1582
|
+
* Automates cost optimization by moving old data to cheaper tiers or deleting it
|
|
1583
|
+
*
|
|
1584
|
+
* Azure Lifecycle Management rules run once per day and apply to the entire container.
|
|
1585
|
+
* Rules are evaluated against blob properties like lastModifiedTime and lastAccessTime.
|
|
1586
|
+
*
|
|
1587
|
+
* @param options - Lifecycle policy configuration
|
|
1588
|
+
* @returns Promise that resolves when policy is set
|
|
1589
|
+
*
|
|
1590
|
+
* @example
|
|
1591
|
+
* // Auto-archive old vectors for 99% cost savings
|
|
1592
|
+
* await storage.setLifecyclePolicy({
|
|
1593
|
+
* rules: [
|
|
1594
|
+
* {
|
|
1595
|
+
* name: 'archiveOldVectors',
|
|
1596
|
+
* enabled: true,
|
|
1597
|
+
* type: 'Lifecycle',
|
|
1598
|
+
* definition: {
|
|
1599
|
+
* filters: {
|
|
1600
|
+
* blobTypes: ['blockBlob'],
|
|
1601
|
+
* prefixMatch: ['entities/nouns/vectors/']
|
|
1602
|
+
* },
|
|
1603
|
+
* actions: {
|
|
1604
|
+
* baseBlob: {
|
|
1605
|
+
* tierToCool: { daysAfterModificationGreaterThan: 30 },
|
|
1606
|
+
* tierToArchive: { daysAfterModificationGreaterThan: 90 },
|
|
1607
|
+
* delete: { daysAfterModificationGreaterThan: 365 }
|
|
1608
|
+
* }
|
|
1609
|
+
* }
|
|
1610
|
+
* }
|
|
1611
|
+
* }
|
|
1612
|
+
* ]
|
|
1613
|
+
* })
|
|
1614
|
+
*/
|
|
1615
|
+
async setLifecyclePolicy(options) {
|
|
1616
|
+
await this.ensureInitialized();
|
|
1617
|
+
if (!this.accountName) {
|
|
1618
|
+
throw new Error('Lifecycle policies require accountName to be configured');
|
|
1619
|
+
}
|
|
1620
|
+
try {
|
|
1621
|
+
this.logger.info(`Setting lifecycle policy with ${options.rules.length} rules`);
|
|
1622
|
+
const { BlobServiceClient } = await import('@azure/storage-blob');
|
|
1623
|
+
// Get blob service client
|
|
1624
|
+
let blobServiceClient;
|
|
1625
|
+
if (this.connectionString) {
|
|
1626
|
+
blobServiceClient = BlobServiceClient.fromConnectionString(this.connectionString);
|
|
1627
|
+
}
|
|
1628
|
+
else if (this.accountName && this.accountKey) {
|
|
1629
|
+
const { StorageSharedKeyCredential } = await import('@azure/storage-blob');
|
|
1630
|
+
const credential = new StorageSharedKeyCredential(this.accountName, this.accountKey);
|
|
1631
|
+
blobServiceClient = new BlobServiceClient(`https://${this.accountName}.blob.core.windows.net`, credential);
|
|
1632
|
+
}
|
|
1633
|
+
else if (this.accountName && this.sasToken) {
|
|
1634
|
+
blobServiceClient = new BlobServiceClient(`https://${this.accountName}.blob.core.windows.net${this.sasToken}`);
|
|
1635
|
+
}
|
|
1636
|
+
else if (this.accountName) {
|
|
1637
|
+
const { DefaultAzureCredential } = await import('@azure/identity');
|
|
1638
|
+
const credential = new DefaultAzureCredential();
|
|
1639
|
+
blobServiceClient = new BlobServiceClient(`https://${this.accountName}.blob.core.windows.net`, credential);
|
|
1640
|
+
}
|
|
1641
|
+
else {
|
|
1642
|
+
throw new Error('Cannot set lifecycle policy without valid authentication');
|
|
1643
|
+
}
|
|
1644
|
+
// Get service properties to modify lifecycle policy
|
|
1645
|
+
const serviceProperties = await blobServiceClient.getProperties();
|
|
1646
|
+
// Format rules according to Azure's expected structure
|
|
1647
|
+
const lifecyclePolicy = {
|
|
1648
|
+
rules: options.rules.map(rule => ({
|
|
1649
|
+
enabled: rule.enabled,
|
|
1650
|
+
name: rule.name,
|
|
1651
|
+
type: rule.type,
|
|
1652
|
+
definition: {
|
|
1653
|
+
filters: {
|
|
1654
|
+
blobTypes: rule.definition.filters.blobTypes,
|
|
1655
|
+
...(rule.definition.filters.prefixMatch && {
|
|
1656
|
+
prefixMatch: rule.definition.filters.prefixMatch
|
|
1657
|
+
})
|
|
1658
|
+
},
|
|
1659
|
+
actions: {
|
|
1660
|
+
baseBlob: {
|
|
1661
|
+
...(rule.definition.actions.baseBlob.tierToCool && {
|
|
1662
|
+
tierToCool: rule.definition.actions.baseBlob.tierToCool
|
|
1663
|
+
}),
|
|
1664
|
+
...(rule.definition.actions.baseBlob.tierToArchive && {
|
|
1665
|
+
tierToArchive: rule.definition.actions.baseBlob.tierToArchive
|
|
1666
|
+
}),
|
|
1667
|
+
...(rule.definition.actions.baseBlob.delete && {
|
|
1668
|
+
delete: rule.definition.actions.baseBlob.delete
|
|
1669
|
+
})
|
|
1670
|
+
}
|
|
1671
|
+
}
|
|
1672
|
+
}
|
|
1673
|
+
}))
|
|
1674
|
+
};
|
|
1675
|
+
// Set the lifecycle management policy
|
|
1676
|
+
await blobServiceClient.setProperties({
|
|
1677
|
+
...serviceProperties,
|
|
1678
|
+
blobAnalyticsLogging: serviceProperties.blobAnalyticsLogging,
|
|
1679
|
+
hourMetrics: serviceProperties.hourMetrics,
|
|
1680
|
+
minuteMetrics: serviceProperties.minuteMetrics,
|
|
1681
|
+
cors: serviceProperties.cors,
|
|
1682
|
+
deleteRetentionPolicy: serviceProperties.deleteRetentionPolicy,
|
|
1683
|
+
staticWebsite: serviceProperties.staticWebsite,
|
|
1684
|
+
// Set lifecycle policy
|
|
1685
|
+
lifecyclePolicy
|
|
1686
|
+
});
|
|
1687
|
+
this.logger.info(`Successfully set lifecycle policy with ${options.rules.length} rules`);
|
|
1688
|
+
}
|
|
1689
|
+
catch (error) {
|
|
1690
|
+
this.logger.error('Failed to set lifecycle policy:', error);
|
|
1691
|
+
throw new Error(`Failed to set lifecycle policy: ${error.message || error}`);
|
|
1692
|
+
}
|
|
1693
|
+
}
|
|
1694
|
+
/**
|
|
1695
|
+
* Get the current lifecycle management policy
|
|
1696
|
+
*
|
|
1697
|
+
* @returns Promise that resolves to the current policy or null if not set
|
|
1698
|
+
*
|
|
1699
|
+
* @example
|
|
1700
|
+
* const policy = await storage.getLifecyclePolicy()
|
|
1701
|
+
* if (policy) {
|
|
1702
|
+
* console.log(`Found ${policy.rules.length} lifecycle rules`)
|
|
1703
|
+
* }
|
|
1704
|
+
*/
|
|
1705
|
+
async getLifecyclePolicy() {
|
|
1706
|
+
await this.ensureInitialized();
|
|
1707
|
+
if (!this.accountName) {
|
|
1708
|
+
throw new Error('Lifecycle policies require accountName to be configured');
|
|
1709
|
+
}
|
|
1710
|
+
try {
|
|
1711
|
+
this.logger.info('Getting lifecycle policy');
|
|
1712
|
+
const { BlobServiceClient } = await import('@azure/storage-blob');
|
|
1713
|
+
// Get blob service client
|
|
1714
|
+
let blobServiceClient;
|
|
1715
|
+
if (this.connectionString) {
|
|
1716
|
+
blobServiceClient = BlobServiceClient.fromConnectionString(this.connectionString);
|
|
1717
|
+
}
|
|
1718
|
+
else if (this.accountName && this.accountKey) {
|
|
1719
|
+
const { StorageSharedKeyCredential } = await import('@azure/storage-blob');
|
|
1720
|
+
const credential = new StorageSharedKeyCredential(this.accountName, this.accountKey);
|
|
1721
|
+
blobServiceClient = new BlobServiceClient(`https://${this.accountName}.blob.core.windows.net`, credential);
|
|
1722
|
+
}
|
|
1723
|
+
else if (this.accountName && this.sasToken) {
|
|
1724
|
+
blobServiceClient = new BlobServiceClient(`https://${this.accountName}.blob.core.windows.net${this.sasToken}`);
|
|
1725
|
+
}
|
|
1726
|
+
else if (this.accountName) {
|
|
1727
|
+
const { DefaultAzureCredential } = await import('@azure/identity');
|
|
1728
|
+
const credential = new DefaultAzureCredential();
|
|
1729
|
+
blobServiceClient = new BlobServiceClient(`https://${this.accountName}.blob.core.windows.net`, credential);
|
|
1730
|
+
}
|
|
1731
|
+
else {
|
|
1732
|
+
throw new Error('Cannot get lifecycle policy without valid authentication');
|
|
1733
|
+
}
|
|
1734
|
+
// Get service properties
|
|
1735
|
+
const serviceProperties = await blobServiceClient.getProperties();
|
|
1736
|
+
if (!serviceProperties.lifecyclePolicy || !serviceProperties.lifecyclePolicy.rules) {
|
|
1737
|
+
this.logger.info('No lifecycle policy configured');
|
|
1738
|
+
return null;
|
|
1739
|
+
}
|
|
1740
|
+
this.logger.info(`Found lifecycle policy with ${serviceProperties.lifecyclePolicy.rules.length} rules`);
|
|
1741
|
+
return serviceProperties.lifecyclePolicy;
|
|
1742
|
+
}
|
|
1743
|
+
catch (error) {
|
|
1744
|
+
this.logger.error('Failed to get lifecycle policy:', error);
|
|
1745
|
+
throw new Error(`Failed to get lifecycle policy: ${error.message || error}`);
|
|
1746
|
+
}
|
|
1747
|
+
}
|
|
1748
|
+
/**
|
|
1749
|
+
* Remove the lifecycle management policy
|
|
1750
|
+
* All automatic tier transitions and deletions will stop
|
|
1751
|
+
*
|
|
1752
|
+
* @returns Promise that resolves when policy is removed
|
|
1753
|
+
*
|
|
1754
|
+
* @example
|
|
1755
|
+
* await storage.removeLifecyclePolicy()
|
|
1756
|
+
* console.log('Lifecycle policy removed - auto-archival disabled')
|
|
1757
|
+
*/
|
|
1758
|
+
async removeLifecyclePolicy() {
|
|
1759
|
+
await this.ensureInitialized();
|
|
1760
|
+
if (!this.accountName) {
|
|
1761
|
+
throw new Error('Lifecycle policies require accountName to be configured');
|
|
1762
|
+
}
|
|
1763
|
+
try {
|
|
1764
|
+
this.logger.info('Removing lifecycle policy');
|
|
1765
|
+
const { BlobServiceClient } = await import('@azure/storage-blob');
|
|
1766
|
+
// Get blob service client
|
|
1767
|
+
let blobServiceClient;
|
|
1768
|
+
if (this.connectionString) {
|
|
1769
|
+
blobServiceClient = BlobServiceClient.fromConnectionString(this.connectionString);
|
|
1770
|
+
}
|
|
1771
|
+
else if (this.accountName && this.accountKey) {
|
|
1772
|
+
const { StorageSharedKeyCredential } = await import('@azure/storage-blob');
|
|
1773
|
+
const credential = new StorageSharedKeyCredential(this.accountName, this.accountKey);
|
|
1774
|
+
blobServiceClient = new BlobServiceClient(`https://${this.accountName}.blob.core.windows.net`, credential);
|
|
1775
|
+
}
|
|
1776
|
+
else if (this.accountName && this.sasToken) {
|
|
1777
|
+
blobServiceClient = new BlobServiceClient(`https://${this.accountName}.blob.core.windows.net${this.sasToken}`);
|
|
1778
|
+
}
|
|
1779
|
+
else if (this.accountName) {
|
|
1780
|
+
const { DefaultAzureCredential } = await import('@azure/identity');
|
|
1781
|
+
const credential = new DefaultAzureCredential();
|
|
1782
|
+
blobServiceClient = new BlobServiceClient(`https://${this.accountName}.blob.core.windows.net`, credential);
|
|
1783
|
+
}
|
|
1784
|
+
else {
|
|
1785
|
+
throw new Error('Cannot remove lifecycle policy without valid authentication');
|
|
1786
|
+
}
|
|
1787
|
+
// Get service properties
|
|
1788
|
+
const serviceProperties = await blobServiceClient.getProperties();
|
|
1789
|
+
// Set properties without lifecycle policy (removes it)
|
|
1790
|
+
await blobServiceClient.setProperties({
|
|
1791
|
+
...serviceProperties,
|
|
1792
|
+
blobAnalyticsLogging: serviceProperties.blobAnalyticsLogging,
|
|
1793
|
+
hourMetrics: serviceProperties.hourMetrics,
|
|
1794
|
+
minuteMetrics: serviceProperties.minuteMetrics,
|
|
1795
|
+
cors: serviceProperties.cors,
|
|
1796
|
+
deleteRetentionPolicy: serviceProperties.deleteRetentionPolicy,
|
|
1797
|
+
staticWebsite: serviceProperties.staticWebsite,
|
|
1798
|
+
// Remove lifecycle policy by not including it
|
|
1799
|
+
lifecyclePolicy: undefined
|
|
1800
|
+
});
|
|
1801
|
+
this.logger.info('Successfully removed lifecycle policy');
|
|
1802
|
+
}
|
|
1803
|
+
catch (error) {
|
|
1804
|
+
this.logger.error('Failed to remove lifecycle policy:', error);
|
|
1805
|
+
throw new Error(`Failed to remove lifecycle policy: ${error.message || error}`);
|
|
1806
|
+
}
|
|
1807
|
+
}
|
|
1808
|
+
}
|
|
1809
|
+
//# sourceMappingURL=azureBlobStorage.js.map
|