@uploadista/data-store-azure 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1083 @@
1
+ import type { TokenCredential } from "@azure/core-auth";
2
+ import {
3
+ BlobServiceClient as BlobService,
4
+ type BlobServiceClient,
5
+ type ContainerClient,
6
+ StorageSharedKeyCredential,
7
+ } from "@azure/storage-blob";
8
+ import { UploadistaError } from "@uploadista/core/errors";
9
+
10
+ import type {
11
+ DataStore,
12
+ DataStoreCapabilities,
13
+ DataStoreWriteOptions,
14
+ KvStore,
15
+ UploadFile,
16
+ UploadStrategy,
17
+ } from "@uploadista/core/types";
18
+ import {
19
+ azureActiveUploadsGauge as activeUploadsGauge,
20
+ azureFileSizeHistogram as fileSizeHistogram,
21
+ logAzureUploadCompletion,
22
+ azurePartSizeHistogram as partSizeHistogram,
23
+ azurePartUploadDurationHistogram as partUploadDurationHistogram,
24
+ trackAzureError,
25
+ azureUploadDurationHistogram as uploadDurationHistogram,
26
+ azureUploadErrorsTotal as uploadErrorsTotal,
27
+ azureUploadPartsTotal as uploadPartsTotal,
28
+ azureUploadRequestsTotal as uploadRequestsTotal,
29
+ azureUploadSuccessTotal as uploadSuccessTotal,
30
+ withAzureTimingMetrics as withTimingMetrics,
31
+ withAzureUploadMetrics as withUploadMetrics,
32
+ } from "@uploadista/observability";
33
+ import { Effect, Ref, Stream } from "effect";
34
+
35
+ // Using base64 encoding that works in both Node.js and browser
36
+ const bufferFrom = (str: string) => {
37
+ // Use global Buffer if available, otherwise fallback to btoa
38
+ if (typeof globalThis !== "undefined" && "Buffer" in globalThis) {
39
+ return (globalThis as any).Buffer.from(str);
40
+ }
41
+ // Fallback for browser environments
42
+ return new Uint8Array(Array.from(str, (c) => c.charCodeAt(0)));
43
+ };
44
+
45
+ export type ChunkInfo = {
46
+ blockNumber: number;
47
+ data: Uint8Array;
48
+ size: number;
49
+ isFinalPart?: boolean;
50
+ };
51
+
52
+ export type AzureStoreOptions = {
53
+ deliveryUrl: string;
54
+ /**
55
+ * The preferred block size for blocks sent to Azure. Can not be lower than 1 byte or more than 4000MiB.
56
+ * The server calculates the optimal block size, which takes this size into account,
57
+ * but may increase it to not exceed the Azure 50K blocks limit.
58
+ */
59
+ blockSize?: number;
60
+ /**
61
+ * The minimal block size for blocks.
62
+ * Can be used to ensure that all non-trailing blocks are exactly the same size.
63
+ * Can not be lower than 1 byte or more than 4000MiB.
64
+ */
65
+ minBlockSize?: number;
66
+ /**
67
+ * The maximum number of blocks allowed in a block blob upload. Defaults to 50,000.
68
+ */
69
+ maxBlocks?: number;
70
+ maxConcurrentBlockUploads?: number;
71
+ kvStore: KvStore<UploadFile>;
72
+ expirationPeriodInMilliseconds?: number;
73
+ // Azure authentication options (choose one)
74
+ connectionString?: string;
75
+ /**
76
+ * SAS URL for the storage account (works in all environments including browsers)
77
+ * Format: https://<account>.blob.core.windows.net?<sas-token>
78
+ */
79
+ sasUrl?: string;
80
+ /**
81
+ * TokenCredential for OAuth authentication (e.g., DefaultAzureCredential)
82
+ * Works in all environments and is the recommended approach for production
83
+ */
84
+ credential?: TokenCredential;
85
+ /**
86
+ * Account name and key for shared key authentication (Node.js only)
87
+ * @deprecated Use sasUrl or credential instead for cross-platform compatibility
88
+ */
89
+ accountName?: string;
90
+ /**
91
+ * @deprecated Use sasUrl or credential instead for cross-platform compatibility
92
+ */
93
+ accountKey?: string;
94
+ containerName: string;
95
+ };
96
+
97
+ function calcOffsetFromBlocks(blocks?: Array<{ size: number }>) {
98
+ return blocks && blocks.length > 0
99
+ ? blocks.reduce((a, b) => a + (b?.size ?? 0), 0)
100
+ : 0;
101
+ }
102
+
103
+ export type AzureStore = DataStore<UploadFile> & {
104
+ getUpload: (id: string) => Effect.Effect<UploadFile, UploadistaError>;
105
+ readStream: (
106
+ id: string,
107
+ ) => Effect.Effect<ReadableStream | Blob, UploadistaError>;
108
+ getChunkerConstraints: () => {
109
+ minChunkSize: number;
110
+ maxChunkSize: number;
111
+ optimalChunkSize: number;
112
+ requiresOrderedChunks: boolean;
113
+ };
114
+ };
115
+
116
+ export function azureStore({
117
+ deliveryUrl,
118
+ blockSize,
119
+ minBlockSize = 1024, // 1KB minimum
120
+ maxBlocks = 50_000,
121
+ kvStore,
122
+ maxConcurrentBlockUploads = 60,
123
+ expirationPeriodInMilliseconds = 1000 * 60 * 60 * 24 * 7, // 1 week
124
+ connectionString,
125
+ sasUrl,
126
+ credential,
127
+ accountName,
128
+ accountKey,
129
+ containerName,
130
+ }: AzureStoreOptions): AzureStore {
131
+ const preferredBlockSize = blockSize || 8 * 1024 * 1024; // 8MB default
132
+ const maxUploadSize = 5_497_558_138_880 as const; // 5TiB (Azure Block Blob limit)
133
+
134
+ // Initialize Azure Blob Service Client with cross-platform authentication
135
+ let blobServiceClient: BlobServiceClient;
136
+
137
+ if (connectionString) {
138
+ // Connection string (works in all environments)
139
+ blobServiceClient = BlobService.fromConnectionString(connectionString);
140
+ } else if (sasUrl) {
141
+ // SAS URL (works in all environments including browsers)
142
+ blobServiceClient = new BlobService(sasUrl);
143
+ } else if (credential) {
144
+ // OAuth token credential (works in all environments, recommended for production)
145
+ const accountUrl = accountName
146
+ ? `https://${accountName}.blob.core.windows.net`
147
+ : sasUrl?.split("?")[0] || "";
148
+ if (!accountUrl) {
149
+ throw new Error(
150
+ "When using credential authentication, either accountName or a valid sasUrl must be provided to determine the account URL",
151
+ );
152
+ }
153
+ blobServiceClient = new BlobService(accountUrl, credential);
154
+ } else if (accountName && accountKey) {
155
+ // Legacy shared key authentication (Node.js only)
156
+ // This will fail in browser/edge environments
157
+ try {
158
+ const sharedKeyCredential = new StorageSharedKeyCredential(
159
+ accountName,
160
+ accountKey,
161
+ );
162
+ blobServiceClient = new BlobService(
163
+ `https://${accountName}.blob.core.windows.net`,
164
+ sharedKeyCredential,
165
+ );
166
+ } catch (error) {
167
+ throw new Error(
168
+ "StorageSharedKeyCredential is only available in Node.js environments. " +
169
+ "Use sasUrl or credential options for cross-platform compatibility. " +
170
+ `Original error: ${error}`,
171
+ );
172
+ }
173
+ } else {
174
+ throw new Error(
175
+ "Azure authentication required. Provide one of: " +
176
+ "connectionString, sasUrl, credential, or accountName + accountKey (Node.js only)",
177
+ );
178
+ }
179
+
180
+ const containerClient: ContainerClient =
181
+ blobServiceClient.getContainerClient(containerName);
182
+
183
+ const incompletePartKey = (id: string) => {
184
+ return `${id}.incomplete`;
185
+ };
186
+
187
+ const uploadBlock = (
188
+ uploadFile: UploadFile,
189
+ readStream: Uint8Array,
190
+ blockId: string,
191
+ ) => {
192
+ return withTimingMetrics(
193
+ partUploadDurationHistogram,
194
+ Effect.gen(function* () {
195
+ yield* Effect.logInfo("Uploading block").pipe(
196
+ Effect.annotateLogs({
197
+ upload_id: uploadFile.id,
198
+ block_id: blockId,
199
+ block_size: readStream.length,
200
+ }),
201
+ );
202
+
203
+ yield* uploadPartsTotal(Effect.succeed(1));
204
+ yield* partSizeHistogram(Effect.succeed(readStream.length));
205
+
206
+ try {
207
+ const blobClient = containerClient.getBlockBlobClient(uploadFile.id);
208
+ yield* Effect.tryPromise({
209
+ try: async () => {
210
+ await blobClient.stageBlock(
211
+ blockId,
212
+ readStream,
213
+ readStream.length,
214
+ );
215
+ },
216
+ catch: (error) => {
217
+ Effect.runSync(
218
+ trackAzureError("uploadBlock", error, {
219
+ upload_id: uploadFile.id,
220
+ block_id: blockId,
221
+ block_size: readStream.length,
222
+ }),
223
+ );
224
+ return UploadistaError.fromCode("FILE_WRITE_ERROR", {
225
+ cause: error as Error,
226
+ });
227
+ },
228
+ });
229
+
230
+ yield* Effect.logInfo("Finished uploading block").pipe(
231
+ Effect.annotateLogs({
232
+ upload_id: uploadFile.id,
233
+ block_id: blockId,
234
+ block_size: readStream.length,
235
+ }),
236
+ );
237
+ } catch (error) {
238
+ Effect.runSync(
239
+ trackAzureError("uploadBlock", error, {
240
+ upload_id: uploadFile.id,
241
+ block_id: blockId,
242
+ block_size: readStream.length,
243
+ }),
244
+ );
245
+ throw error;
246
+ }
247
+ }),
248
+ );
249
+ };
250
+
251
+ const uploadIncompleteBlock = (id: string, readStream: Uint8Array) => {
252
+ return Effect.tryPromise({
253
+ try: async () => {
254
+ const blobClient = containerClient.getBlockBlobClient(
255
+ incompletePartKey(id),
256
+ );
257
+ await blobClient.upload(readStream, readStream.length);
258
+ },
259
+ catch: (error) =>
260
+ UploadistaError.fromCode("FILE_WRITE_ERROR", { cause: error as Error }),
261
+ }).pipe(
262
+ Effect.tap(() =>
263
+ Effect.logInfo("Finished uploading incomplete block").pipe(
264
+ Effect.annotateLogs({
265
+ upload_id: id,
266
+ }),
267
+ ),
268
+ ),
269
+ );
270
+ };
271
+
272
+ const getIncompleteBlock = (id: string) => {
273
+ return Effect.tryPromise({
274
+ try: async () => {
275
+ try {
276
+ const blobClient = containerClient.getBlockBlobClient(
277
+ incompletePartKey(id),
278
+ );
279
+ const response = await blobClient.download();
280
+ return response.readableStreamBody as unknown as ReadableStream;
281
+ } catch (error) {
282
+ if (
283
+ error &&
284
+ typeof error === "object" &&
285
+ "statusCode" in error &&
286
+ error.statusCode === 404
287
+ ) {
288
+ return undefined;
289
+ }
290
+ throw error;
291
+ }
292
+ },
293
+ catch: (error) =>
294
+ UploadistaError.fromCode("FILE_WRITE_ERROR", { cause: error as Error }),
295
+ });
296
+ };
297
+
298
+ const getIncompleteBlockSize = (id: string) => {
299
+ return Effect.tryPromise({
300
+ try: async () => {
301
+ try {
302
+ const blobClient = containerClient.getBlockBlobClient(
303
+ incompletePartKey(id),
304
+ );
305
+ const properties = await blobClient.getProperties();
306
+ return properties.contentLength;
307
+ } catch (error) {
308
+ if (
309
+ error &&
310
+ typeof error === "object" &&
311
+ "statusCode" in error &&
312
+ error.statusCode === 404
313
+ ) {
314
+ return undefined;
315
+ }
316
+ throw error;
317
+ }
318
+ },
319
+ catch: (error) =>
320
+ UploadistaError.fromCode("FILE_WRITE_ERROR", { cause: error as Error }),
321
+ });
322
+ };
323
+
324
+ const deleteIncompleteBlock = (id: string) => {
325
+ return Effect.tryPromise({
326
+ try: async () => {
327
+ const blobClient = containerClient.getBlockBlobClient(
328
+ incompletePartKey(id),
329
+ );
330
+ await blobClient.deleteIfExists();
331
+ },
332
+ catch: (error) =>
333
+ UploadistaError.fromCode("FILE_WRITE_ERROR", { cause: error as Error }),
334
+ });
335
+ };
336
+
337
+ const downloadIncompleteBlock = (id: string) => {
338
+ return Effect.gen(function* () {
339
+ const incompleteBlock = yield* getIncompleteBlock(id);
340
+
341
+ if (!incompleteBlock) {
342
+ return;
343
+ }
344
+
345
+ // Read the stream and collect all chunks to calculate size
346
+ const reader = incompleteBlock.getReader();
347
+ const chunks: Uint8Array[] = [];
348
+ let incompleteBlockSize = 0;
349
+
350
+ try {
351
+ while (true) {
352
+ const result = yield* Effect.promise(() => reader.read());
353
+ if (result.done) break;
354
+ chunks.push(result.value);
355
+ incompleteBlockSize += result.value.length;
356
+ }
357
+ } finally {
358
+ reader.releaseLock();
359
+ }
360
+
361
+ // Create a new readable stream from the chunks
362
+ const stream = Stream.fromIterable(chunks);
363
+
364
+ return {
365
+ size: incompleteBlockSize,
366
+ stream,
367
+ };
368
+ });
369
+ };
370
+
371
+ const calcOptimalBlockSize = (initSize?: number): number => {
372
+ const size = initSize ?? maxUploadSize;
373
+ let optimalBlockSize: number;
374
+
375
+ if (size <= preferredBlockSize) {
376
+ optimalBlockSize = size;
377
+ } else if (size <= preferredBlockSize * maxBlocks) {
378
+ optimalBlockSize = preferredBlockSize;
379
+ } else {
380
+ // Calculate the minimum block size needed to fit within the max blocks limit
381
+ optimalBlockSize = Math.ceil(size / maxBlocks);
382
+ }
383
+
384
+ // Ensure the block size respects the minimum and is aligned properly
385
+ const finalBlockSize = Math.max(optimalBlockSize, minBlockSize);
386
+
387
+ // Round up to ensure consistent block sizes
388
+ return Math.ceil(finalBlockSize / 1024) * 1024; // Align to 1KB boundaries
389
+ };
390
+
391
+ // Proper single-pass chunking using Effect's async stream constructor
392
+ // Ensures all parts except the final part are exactly the same size (S3 requirement)
393
+ const createChunkedStream =
394
+ (chunkSize: number) =>
395
+ <E>(stream: Stream.Stream<Uint8Array, E>): Stream.Stream<ChunkInfo, E> => {
396
+ return Stream.async<ChunkInfo, E>((emit) => {
397
+ let buffer = new Uint8Array(0);
398
+ let blockNumber = 1;
399
+ let totalBytesProcessed = 0;
400
+
401
+ const emitChunk = (data: Uint8Array, isFinalChunk = false) => {
402
+ // Log chunk information for debugging - use INFO level to see in logs
403
+ Effect.runSync(
404
+ Effect.logInfo("Creating chunk").pipe(
405
+ Effect.annotateLogs({
406
+ block_number: blockNumber,
407
+ chunk_size: data.length,
408
+ expected_size: chunkSize,
409
+ is_final_chunk: isFinalChunk,
410
+ total_bytes_processed: totalBytesProcessed + data.length,
411
+ }),
412
+ ),
413
+ );
414
+ emit.single({
415
+ blockNumber: blockNumber++,
416
+ data,
417
+ size: data.length,
418
+ });
419
+ };
420
+
421
+ const processChunk = (newData: Uint8Array) => {
422
+ // Combine buffer with new data
423
+ const combined = new Uint8Array(buffer.length + newData.length);
424
+ combined.set(buffer);
425
+ combined.set(newData, buffer.length);
426
+ buffer = combined;
427
+ totalBytesProcessed += newData.length;
428
+
429
+ // Emit full chunks of exactly chunkSize bytes
430
+ // This ensures S3 multipart upload rule: all parts except last must be same size
431
+ while (buffer.length >= chunkSize) {
432
+ const chunk = buffer.slice(0, chunkSize);
433
+ buffer = buffer.slice(chunkSize);
434
+ emitChunk(chunk, false);
435
+ }
436
+ };
437
+
438
+ // Process the stream
439
+ Effect.runFork(
440
+ stream.pipe(
441
+ Stream.runForEach((chunk) =>
442
+ Effect.sync(() => processChunk(chunk)),
443
+ ),
444
+ Effect.andThen(() =>
445
+ Effect.sync(() => {
446
+ // Emit final chunk if there's remaining data
447
+ // The final chunk can be any size < chunkSize (S3 allows this)
448
+ if (buffer.length > 0) {
449
+ emitChunk(buffer, true);
450
+ }
451
+ emit.end();
452
+ }),
453
+ ),
454
+ Effect.catchAll((error) => Effect.sync(() => emit.fail(error))),
455
+ ),
456
+ );
457
+ });
458
+ };
459
+
460
+ // Byte-level progress tracking during streaming
461
+ // This provides smooth, immediate progress feedback by tracking bytes as they
462
+ // flow through the stream, before they reach S3. This solves the issue where
463
+ // small files (< 5MB) would jump from 0% to 100% instantly.
464
+ const withByteProgressTracking =
465
+ (onProgress?: (totalBytes: number) => void, initialOffset = 0) =>
466
+ <E, R>(stream: Stream.Stream<Uint8Array, E, R>) => {
467
+ if (!onProgress) return stream;
468
+
469
+ return Effect.gen(function* () {
470
+ const totalBytesProcessedRef = yield* Ref.make(initialOffset);
471
+
472
+ return stream.pipe(
473
+ Stream.tap((chunk) =>
474
+ Effect.gen(function* () {
475
+ const newTotal = yield* Ref.updateAndGet(
476
+ totalBytesProcessedRef,
477
+ (total) => total + chunk.length,
478
+ );
479
+ onProgress(newTotal);
480
+ }),
481
+ ),
482
+ );
483
+ }).pipe(Stream.unwrap);
484
+ };
485
+
486
+ /**
487
+ * Uploads a stream to Azure using multiple blocks
488
+ */
489
+ const uploadBlocks = (
490
+ uploadFile: UploadFile,
491
+ readStream: Stream.Stream<Uint8Array, UploadistaError>,
492
+ initCurrentBlockNumber: number,
493
+ initOffset: number,
494
+ onProgress?: (newOffset: number) => void,
495
+ ) => {
496
+ return Effect.gen(function* () {
497
+ yield* Effect.logInfo("Uploading blocks").pipe(
498
+ Effect.annotateLogs({
499
+ upload_id: uploadFile.id,
500
+ init_offset: initOffset,
501
+ file_size: uploadFile.size,
502
+ }),
503
+ );
504
+
505
+ const size = uploadFile.size;
506
+
507
+ const uploadBlockSize = calcOptimalBlockSize(size);
508
+ yield* Effect.logInfo("Block size").pipe(
509
+ Effect.annotateLogs({
510
+ upload_id: uploadFile.id,
511
+ block_size: uploadBlockSize,
512
+ }),
513
+ );
514
+ // Enhanced Progress Tracking Strategy:
515
+ // 1. Byte-level progress during streaming - provides immediate, smooth feedback
516
+ // as data flows through the pipeline (even for small files)
517
+ // 2. This tracks progress BEFORE S3 upload, giving users immediate feedback
518
+ // 3. For large files with multiple parts, this provides granular updates
519
+ // 4. For small files (single part), this prevents 0%->100% jumps
520
+ const chunkStream = readStream.pipe(
521
+ // Add byte-level progress tracking during streaming (immediate feedback)
522
+ withByteProgressTracking(onProgress, initOffset),
523
+ // Create chunks for S3 multipart upload with uniform part sizes
524
+ createChunkedStream(uploadBlockSize),
525
+ );
526
+
527
+ // Track cumulative offset and total bytes with Effect Refs
528
+ const cumulativeOffsetRef = yield* Ref.make(initOffset);
529
+ const totalBytesUploadedRef = yield* Ref.make(0);
530
+ const blockIdsRef = yield* Ref.make<string[]>([]);
531
+ // Create a chunk upload function for the sink
532
+ const uploadChunk = (chunkInfo: ChunkInfo) =>
533
+ Effect.gen(function* () {
534
+ // Calculate cumulative bytes to determine if this is the final block
535
+ const cumulativeOffset = yield* Ref.updateAndGet(
536
+ cumulativeOffsetRef,
537
+ (offset) => offset + chunkInfo.size,
538
+ );
539
+ const isFinalBlock = cumulativeOffset >= (uploadFile.size || 0);
540
+
541
+ yield* Effect.logDebug("Processing chunk").pipe(
542
+ Effect.annotateLogs({
543
+ upload_id: uploadFile.id,
544
+ cumulative_offset: cumulativeOffset,
545
+ file_size: uploadFile.size,
546
+ chunk_size: chunkInfo.size,
547
+ is_final_block: isFinalBlock,
548
+ }),
549
+ );
550
+
551
+ const actualBlockNumber =
552
+ initCurrentBlockNumber + chunkInfo.blockNumber - 1;
553
+
554
+ if (chunkInfo.size > uploadBlockSize) {
555
+ yield* Effect.fail(
556
+ UploadistaError.fromCode("FILE_WRITE_ERROR", {
557
+ cause: new Error(
558
+ `Block size ${chunkInfo.size} exceeds upload block size ${uploadBlockSize}`,
559
+ ),
560
+ }),
561
+ );
562
+ }
563
+
564
+ // For parts that meet the minimum part size (5MB) or are the final part,
565
+ // upload them as regular multipart parts
566
+ if (chunkInfo.size >= minBlockSize || isFinalBlock) {
567
+ yield* Effect.logDebug("Uploading multipart chunk").pipe(
568
+ Effect.annotateLogs({
569
+ upload_id: uploadFile.id,
570
+ block_number: actualBlockNumber,
571
+ chunk_size: chunkInfo.size,
572
+ min_block_size: minBlockSize,
573
+ is_final_block: isFinalBlock,
574
+ }),
575
+ );
576
+ // Generate block ID (base64 encoded, must be consistent)
577
+ const blockId = bufferFrom(
578
+ `block-${actualBlockNumber.toString().padStart(6, "0")}`,
579
+ ).toString("base64");
580
+ yield* uploadBlock(uploadFile, chunkInfo.data, blockId);
581
+ yield* Ref.update(blockIdsRef, (ids) => [...ids, blockId]);
582
+ yield* partSizeHistogram(Effect.succeed(chunkInfo.size));
583
+ } else {
584
+ // Only upload as incomplete part if it's smaller than minimum and not final
585
+ yield* uploadIncompleteBlock(uploadFile.id, chunkInfo.data);
586
+ }
587
+
588
+ yield* Ref.update(
589
+ totalBytesUploadedRef,
590
+ (total) => total + chunkInfo.size,
591
+ );
592
+
593
+ // Note: Byte-level progress is now tracked during streaming phase
594
+ // This ensures smooth progress updates regardless of part size
595
+ // Azure upload completion is tracked via totalBytesUploadedRef for accuracy
596
+ });
597
+
598
+ // Process chunks concurrently with controlled concurrency
599
+ yield* chunkStream.pipe(
600
+ Stream.runForEach((chunkInfo) => uploadChunk(chunkInfo)),
601
+ Effect.withConcurrency(maxConcurrentBlockUploads),
602
+ );
603
+
604
+ return {
605
+ bytesUploaded: yield* Ref.get(totalBytesUploadedRef),
606
+ blockIds: yield* Ref.get(blockIdsRef),
607
+ };
608
+ });
609
+ };
610
+
611
+ /**
612
+ * Commits all staged blocks to create the final blob
613
+ */
614
+ const commitBlocks = (uploadFile: UploadFile, blockIds: string[]) => {
615
+ return Effect.tryPromise({
616
+ try: async () => {
617
+ const blobClient = containerClient.getBlockBlobClient(uploadFile.id);
618
+ await blobClient.commitBlockList(blockIds, {
619
+ blobHTTPHeaders: {
620
+ blobContentType: uploadFile.metadata?.contentType?.toString(),
621
+ blobCacheControl: uploadFile.metadata?.cacheControl?.toString(),
622
+ },
623
+ });
624
+ },
625
+ catch: (error) =>
626
+ UploadistaError.fromCode("FILE_WRITE_ERROR", { cause: error as Error }),
627
+ });
628
+ };
629
+
630
+ /**
631
+ * Gets the committed blocks for a blob
632
+ */
633
+ const retrieveBlocks = (id: string) => {
634
+ return Effect.tryPromise({
635
+ try: async () => {
636
+ try {
637
+ const blobClient = containerClient.getBlockBlobClient(id);
638
+ const blockList = await blobClient.getBlockList("committed");
639
+
640
+ const blocks =
641
+ blockList.committedBlocks?.map((block) => ({
642
+ size: block.size,
643
+ })) ?? [];
644
+
645
+ return blocks;
646
+ } catch (error) {
647
+ if (
648
+ error &&
649
+ typeof error === "object" &&
650
+ "statusCode" in error &&
651
+ error.statusCode === 404
652
+ ) {
653
+ return [];
654
+ }
655
+ throw error;
656
+ }
657
+ },
658
+ catch: (error) =>
659
+ UploadistaError.fromCode("UPLOAD_ID_NOT_FOUND", {
660
+ cause: error as Error,
661
+ }),
662
+ });
663
+ };
664
+
665
+ /**
666
+ * Removes cached data for a given file
667
+ */
668
+ const clearCache = (id: string) => {
669
+ return Effect.gen(function* () {
670
+ yield* Effect.logInfo("Removing cached data").pipe(
671
+ Effect.annotateLogs({
672
+ upload_id: id,
673
+ }),
674
+ );
675
+ yield* kvStore.delete(id);
676
+ });
677
+ };
678
+
679
+ /**
680
+ * Creates a blob placeholder in Azure and stores metadata
681
+ */
682
+ const create = (upload: UploadFile) => {
683
+ return Effect.gen(function* () {
684
+ yield* uploadRequestsTotal(Effect.succeed(1));
685
+ yield* activeUploadsGauge(Effect.succeed(1));
686
+ yield* fileSizeHistogram(Effect.succeed(upload.size || 0));
687
+
688
+ yield* Effect.logInfo("Initializing Azure blob upload").pipe(
689
+ Effect.annotateLogs({
690
+ upload_id: upload.id,
691
+ }),
692
+ );
693
+
694
+ upload.creationDate = new Date().toISOString();
695
+ upload.storage = {
696
+ id: upload.storage.id,
697
+ type: upload.storage.type,
698
+ path: upload.id,
699
+ bucket: containerName,
700
+ };
701
+ upload.url = `${deliveryUrl}/${upload.id}`;
702
+
703
+ yield* kvStore.set(upload.id, upload);
704
+ yield* Effect.logInfo("Azure blob upload initialized").pipe(
705
+ Effect.annotateLogs({
706
+ upload_id: upload.id,
707
+ }),
708
+ );
709
+
710
+ return upload;
711
+ });
712
+ };
713
+
714
+ const readStream = (
715
+ id: string,
716
+ ): Effect.Effect<ReadableStream | Blob, UploadistaError> => {
717
+ return Effect.tryPromise({
718
+ try: async () => {
719
+ const blobClient = containerClient.getBlockBlobClient(id);
720
+ const response = await blobClient.download();
721
+ if (response.blobBody) {
722
+ return response.blobBody;
723
+ }
724
+ if (response.readableStreamBody) {
725
+ return response.readableStreamBody as unknown as ReadableStream;
726
+ }
727
+ throw new Error("No blob body or readable stream body");
728
+ },
729
+ catch: (error) =>
730
+ UploadistaError.fromCode("FILE_WRITE_ERROR", {
731
+ cause: error as Error,
732
+ }),
733
+ });
734
+ };
735
+
736
+ const read = (id: string): Effect.Effect<Uint8Array, UploadistaError> => {
737
+ return Effect.gen(function* () {
738
+ const stream = yield* readStream(id);
739
+
740
+ // Convert stream/blob to Uint8Array
741
+ if (stream instanceof Blob) {
742
+ const arrayBuffer = yield* Effect.promise(() => stream.arrayBuffer());
743
+ return new Uint8Array(arrayBuffer as ArrayBuffer);
744
+ }
745
+
746
+ // Read from ReadableStream
747
+ const reader = stream.getReader();
748
+ const chunks: Uint8Array[] = [];
749
+
750
+ try {
751
+ while (true) {
752
+ const result = yield* Effect.promise(() => reader.read());
753
+ if (result.done) break;
754
+ chunks.push(result.value);
755
+ }
756
+ } finally {
757
+ reader.releaseLock();
758
+ }
759
+
760
+ // Concatenate all chunks
761
+ const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
762
+ const result = new Uint8Array(totalLength);
763
+ let offset = 0;
764
+ for (const chunk of chunks) {
765
+ result.set(chunk, offset);
766
+ offset += chunk.length;
767
+ }
768
+
769
+ return result;
770
+ });
771
+ };
772
+
773
+ const prepareUpload = (
774
+ file_id: string,
775
+ initialOffset: number,
776
+ initialData: Stream.Stream<Uint8Array, UploadistaError>,
777
+ ) => {
778
+ return Effect.gen(function* () {
779
+ const uploadFile = yield* kvStore.get(file_id);
780
+
781
+ const blocks = yield* retrieveBlocks(file_id);
782
+
783
+ const blockNumber = blocks.length;
784
+ const nextBlockNumber = blockNumber + 1;
785
+
786
+ const incompleteBlock = yield* downloadIncompleteBlock(file_id);
787
+
788
+ if (incompleteBlock) {
789
+ yield* deleteIncompleteBlock(file_id);
790
+ const offset = initialOffset - incompleteBlock.size;
791
+ const data = incompleteBlock.stream.pipe(Stream.concat(initialData));
792
+ return {
793
+ uploadFile,
794
+ nextBlockNumber: nextBlockNumber - 1,
795
+ offset,
796
+ incompleteBlockSize: incompleteBlock.size,
797
+ data,
798
+ };
799
+ } else {
800
+ return {
801
+ uploadFile,
802
+ nextBlockNumber,
803
+ offset: initialOffset,
804
+ incompleteBlockSize: 0,
805
+ data: initialData,
806
+ };
807
+ }
808
+ });
809
+ };
810
+
811
+ /**
812
+ * Write to the file, starting at the provided offset
813
+ */
814
+ const write = (
815
+ options: DataStoreWriteOptions,
816
+ dependencies: {
817
+ onProgress?: (chunkSize: number) => void;
818
+ },
819
+ ) => {
820
+ return withUploadMetrics(
821
+ options.file_id,
822
+ withTimingMetrics(
823
+ uploadDurationHistogram,
824
+ Effect.gen(function* () {
825
+ const startTime = Date.now();
826
+ const {
827
+ stream: initialData,
828
+ file_id,
829
+ offset: initialOffset,
830
+ } = options;
831
+ const { onProgress } = dependencies;
832
+
833
+ const prepareResult = yield* prepareUpload(
834
+ file_id,
835
+ initialOffset,
836
+ initialData,
837
+ );
838
+
839
+ const { uploadFile, nextBlockNumber, offset, data } = prepareResult;
840
+
841
+ const { bytesUploaded, blockIds } = yield* uploadBlocks(
842
+ uploadFile,
843
+ data,
844
+ nextBlockNumber,
845
+ offset,
846
+ onProgress,
847
+ );
848
+
849
+ const newOffset = offset + bytesUploaded;
850
+
851
+ if (uploadFile.size === newOffset) {
852
+ try {
853
+ // Commit all blocks to finalize the blob
854
+ yield* commitBlocks(uploadFile, blockIds);
855
+ yield* clearCache(file_id);
856
+
857
+ // Log completion with observability
858
+ yield* logAzureUploadCompletion(file_id, {
859
+ fileSize: uploadFile.size || 0,
860
+ totalDurationMs: Date.now() - startTime,
861
+ partsCount: blockIds.length,
862
+ averagePartSize: uploadFile.size,
863
+ throughputBps: uploadFile.size / (Date.now() - startTime),
864
+ retryCount: 0,
865
+ });
866
+
867
+ yield* uploadSuccessTotal(Effect.succeed(1));
868
+ yield* activeUploadsGauge(Effect.succeed(-1));
869
+ } catch (error) {
870
+ yield* Effect.logError("Failed to finish upload").pipe(
871
+ Effect.annotateLogs({
872
+ upload_id: file_id,
873
+ error: JSON.stringify(error),
874
+ }),
875
+ );
876
+ yield* uploadErrorsTotal(Effect.succeed(1));
877
+ Effect.runSync(
878
+ trackAzureError("write", error, {
879
+ upload_id: file_id,
880
+ operation: "commit",
881
+ blocks: blockIds.length,
882
+ }),
883
+ );
884
+ throw error;
885
+ }
886
+ }
887
+
888
+ return newOffset;
889
+ }),
890
+ ),
891
+ );
892
+ };
893
+
894
+ const getUpload = (id: string) => {
895
+ return Effect.gen(function* () {
896
+ const uploadFile = yield* kvStore.get(id);
897
+
898
+ let offset = 0;
899
+
900
+ try {
901
+ const blocks = yield* retrieveBlocks(id);
902
+ offset = calcOffsetFromBlocks(blocks);
903
+ } catch (error) {
904
+ // Check if the error is caused by the blob not being found
905
+ if (
906
+ typeof error === "object" &&
907
+ error !== null &&
908
+ "statusCode" in error &&
909
+ error.statusCode === 404
910
+ ) {
911
+ return {
912
+ ...uploadFile,
913
+ offset: uploadFile.size as number,
914
+ size: uploadFile.size,
915
+ metadata: uploadFile.metadata,
916
+ storage: uploadFile.storage,
917
+ };
918
+ }
919
+
920
+ yield* Effect.logError("Error on get upload").pipe(
921
+ Effect.annotateLogs({
922
+ upload_id: id,
923
+ error: JSON.stringify(error),
924
+ }),
925
+ );
926
+ throw error;
927
+ }
928
+
929
+ const incompleteBlockSize = yield* getIncompleteBlockSize(id);
930
+
931
+ return {
932
+ ...uploadFile,
933
+ offset: offset + (incompleteBlockSize ?? 0),
934
+ size: uploadFile.size,
935
+ storage: uploadFile.storage,
936
+ };
937
+ });
938
+ };
939
+
940
+ const remove = (id: string) => {
941
+ return Effect.gen(function* () {
942
+ try {
943
+ const blobClient = containerClient.getBlockBlobClient(id);
944
+ yield* Effect.promise(() => blobClient.deleteIfExists());
945
+
946
+ // Also delete incomplete block if it exists
947
+ yield* deleteIncompleteBlock(id);
948
+ } catch (error) {
949
+ if (
950
+ typeof error === "object" &&
951
+ error !== null &&
952
+ "statusCode" in error &&
953
+ error.statusCode === 404
954
+ ) {
955
+ yield* Effect.logError("No file found").pipe(
956
+ Effect.annotateLogs({
957
+ upload_id: id,
958
+ }),
959
+ );
960
+ return yield* Effect.fail(UploadistaError.fromCode("FILE_NOT_FOUND"));
961
+ }
962
+ Effect.runSync(
963
+ trackAzureError("remove", error, {
964
+ upload_id: id,
965
+ }),
966
+ );
967
+ throw error;
968
+ }
969
+
970
+ yield* clearCache(id);
971
+ yield* activeUploadsGauge(Effect.succeed(-1));
972
+ });
973
+ };
974
+
975
+ const getExpiration = () => {
976
+ return expirationPeriodInMilliseconds;
977
+ };
978
+
979
+ const getExpirationDate = (created_at: string) => {
980
+ const date = new Date(created_at);
981
+ return new Date(date.getTime() + getExpiration());
982
+ };
983
+
984
+ const deleteExpired = (): Effect.Effect<number, UploadistaError> => {
985
+ return Effect.tryPromise({
986
+ try: async (): Promise<number> => {
987
+ if (getExpiration() === 0) {
988
+ return 0;
989
+ }
990
+
991
+ let deleted = 0;
992
+
993
+ const response = containerClient.listBlobsFlat({
994
+ includeMetadata: true,
995
+ });
996
+
997
+ const expiredBlobs: string[] = [];
998
+
999
+ for await (const blob of response) {
1000
+ if (blob.metadata?.creationDate) {
1001
+ const creationDate = new Date(blob.metadata.creationDate);
1002
+ if (
1003
+ Date.now() >
1004
+ getExpirationDate(creationDate.toISOString()).getTime()
1005
+ ) {
1006
+ expiredBlobs.push(blob.name);
1007
+ }
1008
+ }
1009
+ }
1010
+
1011
+ // Delete expired blobs
1012
+ for (const blobName of expiredBlobs) {
1013
+ await containerClient.deleteBlob(blobName);
1014
+ deleted++;
1015
+ }
1016
+
1017
+ return deleted;
1018
+ },
1019
+ catch: (error) =>
1020
+ UploadistaError.fromCode("FILE_WRITE_ERROR", { cause: error as Error }),
1021
+ });
1022
+ };
1023
+
1024
+ const getCapabilities = (): DataStoreCapabilities => {
1025
+ return {
1026
+ supportsParallelUploads: true,
1027
+ supportsConcatenation: false, // Azure doesn't have native concatenation like GCS
1028
+ supportsDeferredLength: true,
1029
+ supportsResumableUploads: true,
1030
+ supportsTransactionalUploads: true,
1031
+ maxConcurrentUploads: maxConcurrentBlockUploads,
1032
+ minChunkSize: minBlockSize,
1033
+ maxChunkSize: 4000 * 1024 * 1024, // 4000MB Azure limit
1034
+ maxParts: maxBlocks,
1035
+ optimalChunkSize: preferredBlockSize,
1036
+ requiresOrderedChunks: false,
1037
+ requiresMimeTypeValidation: true,
1038
+ maxValidationSize: undefined, // no size limit
1039
+ };
1040
+ };
1041
+
1042
+ const getChunkerConstraints = () => {
1043
+ return {
1044
+ minChunkSize: minBlockSize,
1045
+ maxChunkSize: 4000 * 1024 * 1024, // 4000MB Azure limit
1046
+ optimalChunkSize: preferredBlockSize,
1047
+ requiresOrderedChunks: false,
1048
+ };
1049
+ };
1050
+
1051
+ const validateUploadStrategy = (
1052
+ strategy: UploadStrategy,
1053
+ ): Effect.Effect<boolean, never> => {
1054
+ const capabilities = getCapabilities();
1055
+
1056
+ const result = (() => {
1057
+ switch (strategy) {
1058
+ case "parallel":
1059
+ return capabilities.supportsParallelUploads;
1060
+ case "single":
1061
+ return true;
1062
+ default:
1063
+ return false;
1064
+ }
1065
+ })();
1066
+
1067
+ return Effect.succeed(result);
1068
+ };
1069
+
1070
+ return {
1071
+ bucket: containerName,
1072
+ create,
1073
+ remove,
1074
+ write,
1075
+ getUpload,
1076
+ read,
1077
+ readStream,
1078
+ deleteExpired: deleteExpired(),
1079
+ getCapabilities,
1080
+ getChunkerConstraints,
1081
+ validateUploadStrategy,
1082
+ };
1083
+ }