@uploadista/data-store-azure 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +5 -0
- package/.turbo/turbo-check.log +5 -0
- package/LICENSE +21 -0
- package/README.md +570 -0
- package/dist/azure-store.d.ts +67 -0
- package/dist/azure-store.d.ts.map +1 -0
- package/dist/azure-store.js +725 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +1 -0
- package/package.json +33 -0
- package/src/azure-store.ts +1083 -0
- package/src/index.ts +1 -0
- package/tsconfig.json +20 -0
- package/tsconfig.tsbuildinfo +1 -0
|
@@ -0,0 +1,725 @@
|
|
|
1
|
+
import { BlobServiceClient as BlobService, StorageSharedKeyCredential, } from "@azure/storage-blob";
|
|
2
|
+
import { UploadistaError } from "@uploadista/core/errors";
|
|
3
|
+
import { azureActiveUploadsGauge as activeUploadsGauge, azureFileSizeHistogram as fileSizeHistogram, logAzureUploadCompletion, azurePartSizeHistogram as partSizeHistogram, azurePartUploadDurationHistogram as partUploadDurationHistogram, trackAzureError, azureUploadDurationHistogram as uploadDurationHistogram, azureUploadErrorsTotal as uploadErrorsTotal, azureUploadPartsTotal as uploadPartsTotal, azureUploadRequestsTotal as uploadRequestsTotal, azureUploadSuccessTotal as uploadSuccessTotal, withAzureTimingMetrics as withTimingMetrics, withAzureUploadMetrics as withUploadMetrics, } from "@uploadista/observability";
|
|
4
|
+
import { Effect, Ref, Stream } from "effect";
|
|
5
|
+
// Using base64 encoding that works in both Node.js and browser
|
|
6
|
+
const bufferFrom = (str) => {
|
|
7
|
+
// Use global Buffer if available, otherwise fallback to btoa
|
|
8
|
+
if (typeof globalThis !== "undefined" && "Buffer" in globalThis) {
|
|
9
|
+
return globalThis.Buffer.from(str);
|
|
10
|
+
}
|
|
11
|
+
// Fallback for browser environments
|
|
12
|
+
return new Uint8Array(Array.from(str, (c) => c.charCodeAt(0)));
|
|
13
|
+
};
|
|
14
|
+
function calcOffsetFromBlocks(blocks) {
|
|
15
|
+
return blocks && blocks.length > 0
|
|
16
|
+
? blocks.reduce((a, b) => a + (b?.size ?? 0), 0)
|
|
17
|
+
: 0;
|
|
18
|
+
}
|
|
19
|
+
export function azureStore({ deliveryUrl, blockSize, minBlockSize = 1024, // 1KB minimum
|
|
20
|
+
maxBlocks = 50_000, kvStore, maxConcurrentBlockUploads = 60, expirationPeriodInMilliseconds = 1000 * 60 * 60 * 24 * 7, // 1 week
|
|
21
|
+
connectionString, sasUrl, credential, accountName, accountKey, containerName, }) {
|
|
22
|
+
const preferredBlockSize = blockSize || 8 * 1024 * 1024; // 8MB default
|
|
23
|
+
const maxUploadSize = 5_497_558_138_880; // 5TiB (Azure Block Blob limit)
|
|
24
|
+
// Initialize Azure Blob Service Client with cross-platform authentication
|
|
25
|
+
let blobServiceClient;
|
|
26
|
+
if (connectionString) {
|
|
27
|
+
// Connection string (works in all environments)
|
|
28
|
+
blobServiceClient = BlobService.fromConnectionString(connectionString);
|
|
29
|
+
}
|
|
30
|
+
else if (sasUrl) {
|
|
31
|
+
// SAS URL (works in all environments including browsers)
|
|
32
|
+
blobServiceClient = new BlobService(sasUrl);
|
|
33
|
+
}
|
|
34
|
+
else if (credential) {
|
|
35
|
+
// OAuth token credential (works in all environments, recommended for production)
|
|
36
|
+
const accountUrl = accountName
|
|
37
|
+
? `https://${accountName}.blob.core.windows.net`
|
|
38
|
+
: sasUrl?.split("?")[0] || "";
|
|
39
|
+
if (!accountUrl) {
|
|
40
|
+
throw new Error("When using credential authentication, either accountName or a valid sasUrl must be provided to determine the account URL");
|
|
41
|
+
}
|
|
42
|
+
blobServiceClient = new BlobService(accountUrl, credential);
|
|
43
|
+
}
|
|
44
|
+
else if (accountName && accountKey) {
|
|
45
|
+
// Legacy shared key authentication (Node.js only)
|
|
46
|
+
// This will fail in browser/edge environments
|
|
47
|
+
try {
|
|
48
|
+
const sharedKeyCredential = new StorageSharedKeyCredential(accountName, accountKey);
|
|
49
|
+
blobServiceClient = new BlobService(`https://${accountName}.blob.core.windows.net`, sharedKeyCredential);
|
|
50
|
+
}
|
|
51
|
+
catch (error) {
|
|
52
|
+
throw new Error("StorageSharedKeyCredential is only available in Node.js environments. " +
|
|
53
|
+
"Use sasUrl or credential options for cross-platform compatibility. " +
|
|
54
|
+
`Original error: ${error}`);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
else {
|
|
58
|
+
throw new Error("Azure authentication required. Provide one of: " +
|
|
59
|
+
"connectionString, sasUrl, credential, or accountName + accountKey (Node.js only)");
|
|
60
|
+
}
|
|
61
|
+
const containerClient = blobServiceClient.getContainerClient(containerName);
|
|
62
|
+
const incompletePartKey = (id) => {
|
|
63
|
+
return `${id}.incomplete`;
|
|
64
|
+
};
|
|
65
|
+
const uploadBlock = (uploadFile, readStream, blockId) => {
|
|
66
|
+
return withTimingMetrics(partUploadDurationHistogram, Effect.gen(function* () {
|
|
67
|
+
yield* Effect.logInfo("Uploading block").pipe(Effect.annotateLogs({
|
|
68
|
+
upload_id: uploadFile.id,
|
|
69
|
+
block_id: blockId,
|
|
70
|
+
block_size: readStream.length,
|
|
71
|
+
}));
|
|
72
|
+
yield* uploadPartsTotal(Effect.succeed(1));
|
|
73
|
+
yield* partSizeHistogram(Effect.succeed(readStream.length));
|
|
74
|
+
try {
|
|
75
|
+
const blobClient = containerClient.getBlockBlobClient(uploadFile.id);
|
|
76
|
+
yield* Effect.tryPromise({
|
|
77
|
+
try: async () => {
|
|
78
|
+
await blobClient.stageBlock(blockId, readStream, readStream.length);
|
|
79
|
+
},
|
|
80
|
+
catch: (error) => {
|
|
81
|
+
Effect.runSync(trackAzureError("uploadBlock", error, {
|
|
82
|
+
upload_id: uploadFile.id,
|
|
83
|
+
block_id: blockId,
|
|
84
|
+
block_size: readStream.length,
|
|
85
|
+
}));
|
|
86
|
+
return UploadistaError.fromCode("FILE_WRITE_ERROR", {
|
|
87
|
+
cause: error,
|
|
88
|
+
});
|
|
89
|
+
},
|
|
90
|
+
});
|
|
91
|
+
yield* Effect.logInfo("Finished uploading block").pipe(Effect.annotateLogs({
|
|
92
|
+
upload_id: uploadFile.id,
|
|
93
|
+
block_id: blockId,
|
|
94
|
+
block_size: readStream.length,
|
|
95
|
+
}));
|
|
96
|
+
}
|
|
97
|
+
catch (error) {
|
|
98
|
+
Effect.runSync(trackAzureError("uploadBlock", error, {
|
|
99
|
+
upload_id: uploadFile.id,
|
|
100
|
+
block_id: blockId,
|
|
101
|
+
block_size: readStream.length,
|
|
102
|
+
}));
|
|
103
|
+
throw error;
|
|
104
|
+
}
|
|
105
|
+
}));
|
|
106
|
+
};
|
|
107
|
+
const uploadIncompleteBlock = (id, readStream) => {
|
|
108
|
+
return Effect.tryPromise({
|
|
109
|
+
try: async () => {
|
|
110
|
+
const blobClient = containerClient.getBlockBlobClient(incompletePartKey(id));
|
|
111
|
+
await blobClient.upload(readStream, readStream.length);
|
|
112
|
+
},
|
|
113
|
+
catch: (error) => UploadistaError.fromCode("FILE_WRITE_ERROR", { cause: error }),
|
|
114
|
+
}).pipe(Effect.tap(() => Effect.logInfo("Finished uploading incomplete block").pipe(Effect.annotateLogs({
|
|
115
|
+
upload_id: id,
|
|
116
|
+
}))));
|
|
117
|
+
};
|
|
118
|
+
const getIncompleteBlock = (id) => {
|
|
119
|
+
return Effect.tryPromise({
|
|
120
|
+
try: async () => {
|
|
121
|
+
try {
|
|
122
|
+
const blobClient = containerClient.getBlockBlobClient(incompletePartKey(id));
|
|
123
|
+
const response = await blobClient.download();
|
|
124
|
+
return response.readableStreamBody;
|
|
125
|
+
}
|
|
126
|
+
catch (error) {
|
|
127
|
+
if (error &&
|
|
128
|
+
typeof error === "object" &&
|
|
129
|
+
"statusCode" in error &&
|
|
130
|
+
error.statusCode === 404) {
|
|
131
|
+
return undefined;
|
|
132
|
+
}
|
|
133
|
+
throw error;
|
|
134
|
+
}
|
|
135
|
+
},
|
|
136
|
+
catch: (error) => UploadistaError.fromCode("FILE_WRITE_ERROR", { cause: error }),
|
|
137
|
+
});
|
|
138
|
+
};
|
|
139
|
+
const getIncompleteBlockSize = (id) => {
|
|
140
|
+
return Effect.tryPromise({
|
|
141
|
+
try: async () => {
|
|
142
|
+
try {
|
|
143
|
+
const blobClient = containerClient.getBlockBlobClient(incompletePartKey(id));
|
|
144
|
+
const properties = await blobClient.getProperties();
|
|
145
|
+
return properties.contentLength;
|
|
146
|
+
}
|
|
147
|
+
catch (error) {
|
|
148
|
+
if (error &&
|
|
149
|
+
typeof error === "object" &&
|
|
150
|
+
"statusCode" in error &&
|
|
151
|
+
error.statusCode === 404) {
|
|
152
|
+
return undefined;
|
|
153
|
+
}
|
|
154
|
+
throw error;
|
|
155
|
+
}
|
|
156
|
+
},
|
|
157
|
+
catch: (error) => UploadistaError.fromCode("FILE_WRITE_ERROR", { cause: error }),
|
|
158
|
+
});
|
|
159
|
+
};
|
|
160
|
+
const deleteIncompleteBlock = (id) => {
|
|
161
|
+
return Effect.tryPromise({
|
|
162
|
+
try: async () => {
|
|
163
|
+
const blobClient = containerClient.getBlockBlobClient(incompletePartKey(id));
|
|
164
|
+
await blobClient.deleteIfExists();
|
|
165
|
+
},
|
|
166
|
+
catch: (error) => UploadistaError.fromCode("FILE_WRITE_ERROR", { cause: error }),
|
|
167
|
+
});
|
|
168
|
+
};
|
|
169
|
+
const downloadIncompleteBlock = (id) => {
|
|
170
|
+
return Effect.gen(function* () {
|
|
171
|
+
const incompleteBlock = yield* getIncompleteBlock(id);
|
|
172
|
+
if (!incompleteBlock) {
|
|
173
|
+
return;
|
|
174
|
+
}
|
|
175
|
+
// Read the stream and collect all chunks to calculate size
|
|
176
|
+
const reader = incompleteBlock.getReader();
|
|
177
|
+
const chunks = [];
|
|
178
|
+
let incompleteBlockSize = 0;
|
|
179
|
+
try {
|
|
180
|
+
while (true) {
|
|
181
|
+
const result = yield* Effect.promise(() => reader.read());
|
|
182
|
+
if (result.done)
|
|
183
|
+
break;
|
|
184
|
+
chunks.push(result.value);
|
|
185
|
+
incompleteBlockSize += result.value.length;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
finally {
|
|
189
|
+
reader.releaseLock();
|
|
190
|
+
}
|
|
191
|
+
// Create a new readable stream from the chunks
|
|
192
|
+
const stream = Stream.fromIterable(chunks);
|
|
193
|
+
return {
|
|
194
|
+
size: incompleteBlockSize,
|
|
195
|
+
stream,
|
|
196
|
+
};
|
|
197
|
+
});
|
|
198
|
+
};
|
|
199
|
+
const calcOptimalBlockSize = (initSize) => {
|
|
200
|
+
const size = initSize ?? maxUploadSize;
|
|
201
|
+
let optimalBlockSize;
|
|
202
|
+
if (size <= preferredBlockSize) {
|
|
203
|
+
optimalBlockSize = size;
|
|
204
|
+
}
|
|
205
|
+
else if (size <= preferredBlockSize * maxBlocks) {
|
|
206
|
+
optimalBlockSize = preferredBlockSize;
|
|
207
|
+
}
|
|
208
|
+
else {
|
|
209
|
+
// Calculate the minimum block size needed to fit within the max blocks limit
|
|
210
|
+
optimalBlockSize = Math.ceil(size / maxBlocks);
|
|
211
|
+
}
|
|
212
|
+
// Ensure the block size respects the minimum and is aligned properly
|
|
213
|
+
const finalBlockSize = Math.max(optimalBlockSize, minBlockSize);
|
|
214
|
+
// Round up to ensure consistent block sizes
|
|
215
|
+
return Math.ceil(finalBlockSize / 1024) * 1024; // Align to 1KB boundaries
|
|
216
|
+
};
|
|
217
|
+
// Proper single-pass chunking using Effect's async stream constructor
|
|
218
|
+
// Ensures all parts except the final part are exactly the same size (S3 requirement)
|
|
219
|
+
const createChunkedStream = (chunkSize) => (stream) => {
|
|
220
|
+
return Stream.async((emit) => {
|
|
221
|
+
let buffer = new Uint8Array(0);
|
|
222
|
+
let blockNumber = 1;
|
|
223
|
+
let totalBytesProcessed = 0;
|
|
224
|
+
const emitChunk = (data, isFinalChunk = false) => {
|
|
225
|
+
// Log chunk information for debugging - use INFO level to see in logs
|
|
226
|
+
Effect.runSync(Effect.logInfo("Creating chunk").pipe(Effect.annotateLogs({
|
|
227
|
+
block_number: blockNumber,
|
|
228
|
+
chunk_size: data.length,
|
|
229
|
+
expected_size: chunkSize,
|
|
230
|
+
is_final_chunk: isFinalChunk,
|
|
231
|
+
total_bytes_processed: totalBytesProcessed + data.length,
|
|
232
|
+
})));
|
|
233
|
+
emit.single({
|
|
234
|
+
blockNumber: blockNumber++,
|
|
235
|
+
data,
|
|
236
|
+
size: data.length,
|
|
237
|
+
});
|
|
238
|
+
};
|
|
239
|
+
const processChunk = (newData) => {
|
|
240
|
+
// Combine buffer with new data
|
|
241
|
+
const combined = new Uint8Array(buffer.length + newData.length);
|
|
242
|
+
combined.set(buffer);
|
|
243
|
+
combined.set(newData, buffer.length);
|
|
244
|
+
buffer = combined;
|
|
245
|
+
totalBytesProcessed += newData.length;
|
|
246
|
+
// Emit full chunks of exactly chunkSize bytes
|
|
247
|
+
// This ensures S3 multipart upload rule: all parts except last must be same size
|
|
248
|
+
while (buffer.length >= chunkSize) {
|
|
249
|
+
const chunk = buffer.slice(0, chunkSize);
|
|
250
|
+
buffer = buffer.slice(chunkSize);
|
|
251
|
+
emitChunk(chunk, false);
|
|
252
|
+
}
|
|
253
|
+
};
|
|
254
|
+
// Process the stream
|
|
255
|
+
Effect.runFork(stream.pipe(Stream.runForEach((chunk) => Effect.sync(() => processChunk(chunk))), Effect.andThen(() => Effect.sync(() => {
|
|
256
|
+
// Emit final chunk if there's remaining data
|
|
257
|
+
// The final chunk can be any size < chunkSize (S3 allows this)
|
|
258
|
+
if (buffer.length > 0) {
|
|
259
|
+
emitChunk(buffer, true);
|
|
260
|
+
}
|
|
261
|
+
emit.end();
|
|
262
|
+
})), Effect.catchAll((error) => Effect.sync(() => emit.fail(error)))));
|
|
263
|
+
});
|
|
264
|
+
};
|
|
265
|
+
// Byte-level progress tracking during streaming
|
|
266
|
+
// This provides smooth, immediate progress feedback by tracking bytes as they
|
|
267
|
+
// flow through the stream, before they reach S3. This solves the issue where
|
|
268
|
+
// small files (< 5MB) would jump from 0% to 100% instantly.
|
|
269
|
+
const withByteProgressTracking = (onProgress, initialOffset = 0) => (stream) => {
|
|
270
|
+
if (!onProgress)
|
|
271
|
+
return stream;
|
|
272
|
+
return Effect.gen(function* () {
|
|
273
|
+
const totalBytesProcessedRef = yield* Ref.make(initialOffset);
|
|
274
|
+
return stream.pipe(Stream.tap((chunk) => Effect.gen(function* () {
|
|
275
|
+
const newTotal = yield* Ref.updateAndGet(totalBytesProcessedRef, (total) => total + chunk.length);
|
|
276
|
+
onProgress(newTotal);
|
|
277
|
+
})));
|
|
278
|
+
}).pipe(Stream.unwrap);
|
|
279
|
+
};
|
|
280
|
+
/**
|
|
281
|
+
* Uploads a stream to Azure using multiple blocks
|
|
282
|
+
*/
|
|
283
|
+
const uploadBlocks = (uploadFile, readStream, initCurrentBlockNumber, initOffset, onProgress) => {
|
|
284
|
+
return Effect.gen(function* () {
|
|
285
|
+
yield* Effect.logInfo("Uploading blocks").pipe(Effect.annotateLogs({
|
|
286
|
+
upload_id: uploadFile.id,
|
|
287
|
+
init_offset: initOffset,
|
|
288
|
+
file_size: uploadFile.size,
|
|
289
|
+
}));
|
|
290
|
+
const size = uploadFile.size;
|
|
291
|
+
const uploadBlockSize = calcOptimalBlockSize(size);
|
|
292
|
+
yield* Effect.logInfo("Block size").pipe(Effect.annotateLogs({
|
|
293
|
+
upload_id: uploadFile.id,
|
|
294
|
+
block_size: uploadBlockSize,
|
|
295
|
+
}));
|
|
296
|
+
// Enhanced Progress Tracking Strategy:
|
|
297
|
+
// 1. Byte-level progress during streaming - provides immediate, smooth feedback
|
|
298
|
+
// as data flows through the pipeline (even for small files)
|
|
299
|
+
// 2. This tracks progress BEFORE S3 upload, giving users immediate feedback
|
|
300
|
+
// 3. For large files with multiple parts, this provides granular updates
|
|
301
|
+
// 4. For small files (single part), this prevents 0%->100% jumps
|
|
302
|
+
const chunkStream = readStream.pipe(
|
|
303
|
+
// Add byte-level progress tracking during streaming (immediate feedback)
|
|
304
|
+
withByteProgressTracking(onProgress, initOffset),
|
|
305
|
+
// Create chunks for S3 multipart upload with uniform part sizes
|
|
306
|
+
createChunkedStream(uploadBlockSize));
|
|
307
|
+
// Track cumulative offset and total bytes with Effect Refs
|
|
308
|
+
const cumulativeOffsetRef = yield* Ref.make(initOffset);
|
|
309
|
+
const totalBytesUploadedRef = yield* Ref.make(0);
|
|
310
|
+
const blockIdsRef = yield* Ref.make([]);
|
|
311
|
+
// Create a chunk upload function for the sink
|
|
312
|
+
const uploadChunk = (chunkInfo) => Effect.gen(function* () {
|
|
313
|
+
// Calculate cumulative bytes to determine if this is the final block
|
|
314
|
+
const cumulativeOffset = yield* Ref.updateAndGet(cumulativeOffsetRef, (offset) => offset + chunkInfo.size);
|
|
315
|
+
const isFinalBlock = cumulativeOffset >= (uploadFile.size || 0);
|
|
316
|
+
yield* Effect.logDebug("Processing chunk").pipe(Effect.annotateLogs({
|
|
317
|
+
upload_id: uploadFile.id,
|
|
318
|
+
cumulative_offset: cumulativeOffset,
|
|
319
|
+
file_size: uploadFile.size,
|
|
320
|
+
chunk_size: chunkInfo.size,
|
|
321
|
+
is_final_block: isFinalBlock,
|
|
322
|
+
}));
|
|
323
|
+
const actualBlockNumber = initCurrentBlockNumber + chunkInfo.blockNumber - 1;
|
|
324
|
+
if (chunkInfo.size > uploadBlockSize) {
|
|
325
|
+
yield* Effect.fail(UploadistaError.fromCode("FILE_WRITE_ERROR", {
|
|
326
|
+
cause: new Error(`Block size ${chunkInfo.size} exceeds upload block size ${uploadBlockSize}`),
|
|
327
|
+
}));
|
|
328
|
+
}
|
|
329
|
+
// For parts that meet the minimum part size (5MB) or are the final part,
|
|
330
|
+
// upload them as regular multipart parts
|
|
331
|
+
if (chunkInfo.size >= minBlockSize || isFinalBlock) {
|
|
332
|
+
yield* Effect.logDebug("Uploading multipart chunk").pipe(Effect.annotateLogs({
|
|
333
|
+
upload_id: uploadFile.id,
|
|
334
|
+
block_number: actualBlockNumber,
|
|
335
|
+
chunk_size: chunkInfo.size,
|
|
336
|
+
min_block_size: minBlockSize,
|
|
337
|
+
is_final_block: isFinalBlock,
|
|
338
|
+
}));
|
|
339
|
+
// Generate block ID (base64 encoded, must be consistent)
|
|
340
|
+
const blockId = bufferFrom(`block-${actualBlockNumber.toString().padStart(6, "0")}`).toString("base64");
|
|
341
|
+
yield* uploadBlock(uploadFile, chunkInfo.data, blockId);
|
|
342
|
+
yield* Ref.update(blockIdsRef, (ids) => [...ids, blockId]);
|
|
343
|
+
yield* partSizeHistogram(Effect.succeed(chunkInfo.size));
|
|
344
|
+
}
|
|
345
|
+
else {
|
|
346
|
+
// Only upload as incomplete part if it's smaller than minimum and not final
|
|
347
|
+
yield* uploadIncompleteBlock(uploadFile.id, chunkInfo.data);
|
|
348
|
+
}
|
|
349
|
+
yield* Ref.update(totalBytesUploadedRef, (total) => total + chunkInfo.size);
|
|
350
|
+
// Note: Byte-level progress is now tracked during streaming phase
|
|
351
|
+
// This ensures smooth progress updates regardless of part size
|
|
352
|
+
// Azure upload completion is tracked via totalBytesUploadedRef for accuracy
|
|
353
|
+
});
|
|
354
|
+
// Process chunks concurrently with controlled concurrency
|
|
355
|
+
yield* chunkStream.pipe(Stream.runForEach((chunkInfo) => uploadChunk(chunkInfo)), Effect.withConcurrency(maxConcurrentBlockUploads));
|
|
356
|
+
return {
|
|
357
|
+
bytesUploaded: yield* Ref.get(totalBytesUploadedRef),
|
|
358
|
+
blockIds: yield* Ref.get(blockIdsRef),
|
|
359
|
+
};
|
|
360
|
+
});
|
|
361
|
+
};
|
|
362
|
+
/**
|
|
363
|
+
* Commits all staged blocks to create the final blob
|
|
364
|
+
*/
|
|
365
|
+
const commitBlocks = (uploadFile, blockIds) => {
|
|
366
|
+
return Effect.tryPromise({
|
|
367
|
+
try: async () => {
|
|
368
|
+
const blobClient = containerClient.getBlockBlobClient(uploadFile.id);
|
|
369
|
+
await blobClient.commitBlockList(blockIds, {
|
|
370
|
+
blobHTTPHeaders: {
|
|
371
|
+
blobContentType: uploadFile.metadata?.contentType?.toString(),
|
|
372
|
+
blobCacheControl: uploadFile.metadata?.cacheControl?.toString(),
|
|
373
|
+
},
|
|
374
|
+
});
|
|
375
|
+
},
|
|
376
|
+
catch: (error) => UploadistaError.fromCode("FILE_WRITE_ERROR", { cause: error }),
|
|
377
|
+
});
|
|
378
|
+
};
|
|
379
|
+
/**
|
|
380
|
+
* Gets the committed blocks for a blob
|
|
381
|
+
*/
|
|
382
|
+
const retrieveBlocks = (id) => {
|
|
383
|
+
return Effect.tryPromise({
|
|
384
|
+
try: async () => {
|
|
385
|
+
try {
|
|
386
|
+
const blobClient = containerClient.getBlockBlobClient(id);
|
|
387
|
+
const blockList = await blobClient.getBlockList("committed");
|
|
388
|
+
const blocks = blockList.committedBlocks?.map((block) => ({
|
|
389
|
+
size: block.size,
|
|
390
|
+
})) ?? [];
|
|
391
|
+
return blocks;
|
|
392
|
+
}
|
|
393
|
+
catch (error) {
|
|
394
|
+
if (error &&
|
|
395
|
+
typeof error === "object" &&
|
|
396
|
+
"statusCode" in error &&
|
|
397
|
+
error.statusCode === 404) {
|
|
398
|
+
return [];
|
|
399
|
+
}
|
|
400
|
+
throw error;
|
|
401
|
+
}
|
|
402
|
+
},
|
|
403
|
+
catch: (error) => UploadistaError.fromCode("UPLOAD_ID_NOT_FOUND", {
|
|
404
|
+
cause: error,
|
|
405
|
+
}),
|
|
406
|
+
});
|
|
407
|
+
};
|
|
408
|
+
/**
|
|
409
|
+
* Removes cached data for a given file
|
|
410
|
+
*/
|
|
411
|
+
const clearCache = (id) => {
|
|
412
|
+
return Effect.gen(function* () {
|
|
413
|
+
yield* Effect.logInfo("Removing cached data").pipe(Effect.annotateLogs({
|
|
414
|
+
upload_id: id,
|
|
415
|
+
}));
|
|
416
|
+
yield* kvStore.delete(id);
|
|
417
|
+
});
|
|
418
|
+
};
|
|
419
|
+
/**
|
|
420
|
+
* Creates a blob placeholder in Azure and stores metadata
|
|
421
|
+
*/
|
|
422
|
+
const create = (upload) => {
|
|
423
|
+
return Effect.gen(function* () {
|
|
424
|
+
yield* uploadRequestsTotal(Effect.succeed(1));
|
|
425
|
+
yield* activeUploadsGauge(Effect.succeed(1));
|
|
426
|
+
yield* fileSizeHistogram(Effect.succeed(upload.size || 0));
|
|
427
|
+
yield* Effect.logInfo("Initializing Azure blob upload").pipe(Effect.annotateLogs({
|
|
428
|
+
upload_id: upload.id,
|
|
429
|
+
}));
|
|
430
|
+
upload.creationDate = new Date().toISOString();
|
|
431
|
+
upload.storage = {
|
|
432
|
+
id: upload.storage.id,
|
|
433
|
+
type: upload.storage.type,
|
|
434
|
+
path: upload.id,
|
|
435
|
+
bucket: containerName,
|
|
436
|
+
};
|
|
437
|
+
upload.url = `${deliveryUrl}/${upload.id}`;
|
|
438
|
+
yield* kvStore.set(upload.id, upload);
|
|
439
|
+
yield* Effect.logInfo("Azure blob upload initialized").pipe(Effect.annotateLogs({
|
|
440
|
+
upload_id: upload.id,
|
|
441
|
+
}));
|
|
442
|
+
return upload;
|
|
443
|
+
});
|
|
444
|
+
};
|
|
445
|
+
const readStream = (id) => {
|
|
446
|
+
return Effect.tryPromise({
|
|
447
|
+
try: async () => {
|
|
448
|
+
const blobClient = containerClient.getBlockBlobClient(id);
|
|
449
|
+
const response = await blobClient.download();
|
|
450
|
+
if (response.blobBody) {
|
|
451
|
+
return response.blobBody;
|
|
452
|
+
}
|
|
453
|
+
if (response.readableStreamBody) {
|
|
454
|
+
return response.readableStreamBody;
|
|
455
|
+
}
|
|
456
|
+
throw new Error("No blob body or readable stream body");
|
|
457
|
+
},
|
|
458
|
+
catch: (error) => UploadistaError.fromCode("FILE_WRITE_ERROR", {
|
|
459
|
+
cause: error,
|
|
460
|
+
}),
|
|
461
|
+
});
|
|
462
|
+
};
|
|
463
|
+
const read = (id) => {
|
|
464
|
+
return Effect.gen(function* () {
|
|
465
|
+
const stream = yield* readStream(id);
|
|
466
|
+
// Convert stream/blob to Uint8Array
|
|
467
|
+
if (stream instanceof Blob) {
|
|
468
|
+
const arrayBuffer = yield* Effect.promise(() => stream.arrayBuffer());
|
|
469
|
+
return new Uint8Array(arrayBuffer);
|
|
470
|
+
}
|
|
471
|
+
// Read from ReadableStream
|
|
472
|
+
const reader = stream.getReader();
|
|
473
|
+
const chunks = [];
|
|
474
|
+
try {
|
|
475
|
+
while (true) {
|
|
476
|
+
const result = yield* Effect.promise(() => reader.read());
|
|
477
|
+
if (result.done)
|
|
478
|
+
break;
|
|
479
|
+
chunks.push(result.value);
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
finally {
|
|
483
|
+
reader.releaseLock();
|
|
484
|
+
}
|
|
485
|
+
// Concatenate all chunks
|
|
486
|
+
const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
|
|
487
|
+
const result = new Uint8Array(totalLength);
|
|
488
|
+
let offset = 0;
|
|
489
|
+
for (const chunk of chunks) {
|
|
490
|
+
result.set(chunk, offset);
|
|
491
|
+
offset += chunk.length;
|
|
492
|
+
}
|
|
493
|
+
return result;
|
|
494
|
+
});
|
|
495
|
+
};
|
|
496
|
+
const prepareUpload = (file_id, initialOffset, initialData) => {
|
|
497
|
+
return Effect.gen(function* () {
|
|
498
|
+
const uploadFile = yield* kvStore.get(file_id);
|
|
499
|
+
const blocks = yield* retrieveBlocks(file_id);
|
|
500
|
+
const blockNumber = blocks.length;
|
|
501
|
+
const nextBlockNumber = blockNumber + 1;
|
|
502
|
+
const incompleteBlock = yield* downloadIncompleteBlock(file_id);
|
|
503
|
+
if (incompleteBlock) {
|
|
504
|
+
yield* deleteIncompleteBlock(file_id);
|
|
505
|
+
const offset = initialOffset - incompleteBlock.size;
|
|
506
|
+
const data = incompleteBlock.stream.pipe(Stream.concat(initialData));
|
|
507
|
+
return {
|
|
508
|
+
uploadFile,
|
|
509
|
+
nextBlockNumber: nextBlockNumber - 1,
|
|
510
|
+
offset,
|
|
511
|
+
incompleteBlockSize: incompleteBlock.size,
|
|
512
|
+
data,
|
|
513
|
+
};
|
|
514
|
+
}
|
|
515
|
+
else {
|
|
516
|
+
return {
|
|
517
|
+
uploadFile,
|
|
518
|
+
nextBlockNumber,
|
|
519
|
+
offset: initialOffset,
|
|
520
|
+
incompleteBlockSize: 0,
|
|
521
|
+
data: initialData,
|
|
522
|
+
};
|
|
523
|
+
}
|
|
524
|
+
});
|
|
525
|
+
};
|
|
526
|
+
/**
|
|
527
|
+
* Write to the file, starting at the provided offset
|
|
528
|
+
*/
|
|
529
|
+
const write = (options, dependencies) => {
|
|
530
|
+
return withUploadMetrics(options.file_id, withTimingMetrics(uploadDurationHistogram, Effect.gen(function* () {
|
|
531
|
+
const startTime = Date.now();
|
|
532
|
+
const { stream: initialData, file_id, offset: initialOffset, } = options;
|
|
533
|
+
const { onProgress } = dependencies;
|
|
534
|
+
const prepareResult = yield* prepareUpload(file_id, initialOffset, initialData);
|
|
535
|
+
const { uploadFile, nextBlockNumber, offset, data } = prepareResult;
|
|
536
|
+
const { bytesUploaded, blockIds } = yield* uploadBlocks(uploadFile, data, nextBlockNumber, offset, onProgress);
|
|
537
|
+
const newOffset = offset + bytesUploaded;
|
|
538
|
+
if (uploadFile.size === newOffset) {
|
|
539
|
+
try {
|
|
540
|
+
// Commit all blocks to finalize the blob
|
|
541
|
+
yield* commitBlocks(uploadFile, blockIds);
|
|
542
|
+
yield* clearCache(file_id);
|
|
543
|
+
// Log completion with observability
|
|
544
|
+
yield* logAzureUploadCompletion(file_id, {
|
|
545
|
+
fileSize: uploadFile.size || 0,
|
|
546
|
+
totalDurationMs: Date.now() - startTime,
|
|
547
|
+
partsCount: blockIds.length,
|
|
548
|
+
averagePartSize: uploadFile.size,
|
|
549
|
+
throughputBps: uploadFile.size / (Date.now() - startTime),
|
|
550
|
+
retryCount: 0,
|
|
551
|
+
});
|
|
552
|
+
yield* uploadSuccessTotal(Effect.succeed(1));
|
|
553
|
+
yield* activeUploadsGauge(Effect.succeed(-1));
|
|
554
|
+
}
|
|
555
|
+
catch (error) {
|
|
556
|
+
yield* Effect.logError("Failed to finish upload").pipe(Effect.annotateLogs({
|
|
557
|
+
upload_id: file_id,
|
|
558
|
+
error: JSON.stringify(error),
|
|
559
|
+
}));
|
|
560
|
+
yield* uploadErrorsTotal(Effect.succeed(1));
|
|
561
|
+
Effect.runSync(trackAzureError("write", error, {
|
|
562
|
+
upload_id: file_id,
|
|
563
|
+
operation: "commit",
|
|
564
|
+
blocks: blockIds.length,
|
|
565
|
+
}));
|
|
566
|
+
throw error;
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
return newOffset;
|
|
570
|
+
})));
|
|
571
|
+
};
|
|
572
|
+
const getUpload = (id) => {
|
|
573
|
+
return Effect.gen(function* () {
|
|
574
|
+
const uploadFile = yield* kvStore.get(id);
|
|
575
|
+
let offset = 0;
|
|
576
|
+
try {
|
|
577
|
+
const blocks = yield* retrieveBlocks(id);
|
|
578
|
+
offset = calcOffsetFromBlocks(blocks);
|
|
579
|
+
}
|
|
580
|
+
catch (error) {
|
|
581
|
+
// Check if the error is caused by the blob not being found
|
|
582
|
+
if (typeof error === "object" &&
|
|
583
|
+
error !== null &&
|
|
584
|
+
"statusCode" in error &&
|
|
585
|
+
error.statusCode === 404) {
|
|
586
|
+
return {
|
|
587
|
+
...uploadFile,
|
|
588
|
+
offset: uploadFile.size,
|
|
589
|
+
size: uploadFile.size,
|
|
590
|
+
metadata: uploadFile.metadata,
|
|
591
|
+
storage: uploadFile.storage,
|
|
592
|
+
};
|
|
593
|
+
}
|
|
594
|
+
yield* Effect.logError("Error on get upload").pipe(Effect.annotateLogs({
|
|
595
|
+
upload_id: id,
|
|
596
|
+
error: JSON.stringify(error),
|
|
597
|
+
}));
|
|
598
|
+
throw error;
|
|
599
|
+
}
|
|
600
|
+
const incompleteBlockSize = yield* getIncompleteBlockSize(id);
|
|
601
|
+
return {
|
|
602
|
+
...uploadFile,
|
|
603
|
+
offset: offset + (incompleteBlockSize ?? 0),
|
|
604
|
+
size: uploadFile.size,
|
|
605
|
+
storage: uploadFile.storage,
|
|
606
|
+
};
|
|
607
|
+
});
|
|
608
|
+
};
|
|
609
|
+
const remove = (id) => {
|
|
610
|
+
return Effect.gen(function* () {
|
|
611
|
+
try {
|
|
612
|
+
const blobClient = containerClient.getBlockBlobClient(id);
|
|
613
|
+
yield* Effect.promise(() => blobClient.deleteIfExists());
|
|
614
|
+
// Also delete incomplete block if it exists
|
|
615
|
+
yield* deleteIncompleteBlock(id);
|
|
616
|
+
}
|
|
617
|
+
catch (error) {
|
|
618
|
+
if (typeof error === "object" &&
|
|
619
|
+
error !== null &&
|
|
620
|
+
"statusCode" in error &&
|
|
621
|
+
error.statusCode === 404) {
|
|
622
|
+
yield* Effect.logError("No file found").pipe(Effect.annotateLogs({
|
|
623
|
+
upload_id: id,
|
|
624
|
+
}));
|
|
625
|
+
return yield* Effect.fail(UploadistaError.fromCode("FILE_NOT_FOUND"));
|
|
626
|
+
}
|
|
627
|
+
Effect.runSync(trackAzureError("remove", error, {
|
|
628
|
+
upload_id: id,
|
|
629
|
+
}));
|
|
630
|
+
throw error;
|
|
631
|
+
}
|
|
632
|
+
yield* clearCache(id);
|
|
633
|
+
yield* activeUploadsGauge(Effect.succeed(-1));
|
|
634
|
+
});
|
|
635
|
+
};
|
|
636
|
+
const getExpiration = () => {
|
|
637
|
+
return expirationPeriodInMilliseconds;
|
|
638
|
+
};
|
|
639
|
+
const getExpirationDate = (created_at) => {
|
|
640
|
+
const date = new Date(created_at);
|
|
641
|
+
return new Date(date.getTime() + getExpiration());
|
|
642
|
+
};
|
|
643
|
+
const deleteExpired = () => {
|
|
644
|
+
return Effect.tryPromise({
|
|
645
|
+
try: async () => {
|
|
646
|
+
if (getExpiration() === 0) {
|
|
647
|
+
return 0;
|
|
648
|
+
}
|
|
649
|
+
let deleted = 0;
|
|
650
|
+
const response = containerClient.listBlobsFlat({
|
|
651
|
+
includeMetadata: true,
|
|
652
|
+
});
|
|
653
|
+
const expiredBlobs = [];
|
|
654
|
+
for await (const blob of response) {
|
|
655
|
+
if (blob.metadata?.creationDate) {
|
|
656
|
+
const creationDate = new Date(blob.metadata.creationDate);
|
|
657
|
+
if (Date.now() >
|
|
658
|
+
getExpirationDate(creationDate.toISOString()).getTime()) {
|
|
659
|
+
expiredBlobs.push(blob.name);
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
// Delete expired blobs
|
|
664
|
+
for (const blobName of expiredBlobs) {
|
|
665
|
+
await containerClient.deleteBlob(blobName);
|
|
666
|
+
deleted++;
|
|
667
|
+
}
|
|
668
|
+
return deleted;
|
|
669
|
+
},
|
|
670
|
+
catch: (error) => UploadistaError.fromCode("FILE_WRITE_ERROR", { cause: error }),
|
|
671
|
+
});
|
|
672
|
+
};
|
|
673
|
+
const getCapabilities = () => {
|
|
674
|
+
return {
|
|
675
|
+
supportsParallelUploads: true,
|
|
676
|
+
supportsConcatenation: false, // Azure doesn't have native concatenation like GCS
|
|
677
|
+
supportsDeferredLength: true,
|
|
678
|
+
supportsResumableUploads: true,
|
|
679
|
+
supportsTransactionalUploads: true,
|
|
680
|
+
maxConcurrentUploads: maxConcurrentBlockUploads,
|
|
681
|
+
minChunkSize: minBlockSize,
|
|
682
|
+
maxChunkSize: 4000 * 1024 * 1024, // 4000MB Azure limit
|
|
683
|
+
maxParts: maxBlocks,
|
|
684
|
+
optimalChunkSize: preferredBlockSize,
|
|
685
|
+
requiresOrderedChunks: false,
|
|
686
|
+
requiresMimeTypeValidation: true,
|
|
687
|
+
maxValidationSize: undefined, // no size limit
|
|
688
|
+
};
|
|
689
|
+
};
|
|
690
|
+
const getChunkerConstraints = () => {
|
|
691
|
+
return {
|
|
692
|
+
minChunkSize: minBlockSize,
|
|
693
|
+
maxChunkSize: 4000 * 1024 * 1024, // 4000MB Azure limit
|
|
694
|
+
optimalChunkSize: preferredBlockSize,
|
|
695
|
+
requiresOrderedChunks: false,
|
|
696
|
+
};
|
|
697
|
+
};
|
|
698
|
+
const validateUploadStrategy = (strategy) => {
|
|
699
|
+
const capabilities = getCapabilities();
|
|
700
|
+
const result = (() => {
|
|
701
|
+
switch (strategy) {
|
|
702
|
+
case "parallel":
|
|
703
|
+
return capabilities.supportsParallelUploads;
|
|
704
|
+
case "single":
|
|
705
|
+
return true;
|
|
706
|
+
default:
|
|
707
|
+
return false;
|
|
708
|
+
}
|
|
709
|
+
})();
|
|
710
|
+
return Effect.succeed(result);
|
|
711
|
+
};
|
|
712
|
+
return {
|
|
713
|
+
bucket: containerName,
|
|
714
|
+
create,
|
|
715
|
+
remove,
|
|
716
|
+
write,
|
|
717
|
+
getUpload,
|
|
718
|
+
read,
|
|
719
|
+
readStream,
|
|
720
|
+
deleteExpired: deleteExpired(),
|
|
721
|
+
getCapabilities,
|
|
722
|
+
getChunkerConstraints,
|
|
723
|
+
validateUploadStrategy,
|
|
724
|
+
};
|
|
725
|
+
}
|