@uploadista/data-store-azure 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,13 +7,13 @@ import {
7
7
  } from "@azure/storage-blob";
8
8
  import { UploadistaError } from "@uploadista/core/errors";
9
9
 
10
- import type {
11
- DataStore,
12
- DataStoreCapabilities,
13
- DataStoreWriteOptions,
14
- KvStore,
15
- UploadFile,
16
- UploadStrategy,
10
+ import {
11
+ type DataStore,
12
+ type DataStoreCapabilities,
13
+ type DataStoreWriteOptions,
14
+ type UploadFile,
15
+ UploadFileKVStore,
16
+ type UploadStrategy,
17
17
  } from "@uploadista/core/types";
18
18
  import {
19
19
  azureActiveUploadsGauge as activeUploadsGauge,
@@ -68,7 +68,6 @@ export type AzureStoreOptions = {
68
68
  */
69
69
  maxBlocks?: number;
70
70
  maxConcurrentBlockUploads?: number;
71
- kvStore: KvStore<UploadFile>;
72
71
  expirationPeriodInMilliseconds?: number;
73
72
  // Azure authentication options (choose one)
74
73
  connectionString?: string;
@@ -118,7 +117,6 @@ export function azureStore({
118
117
  blockSize,
119
118
  minBlockSize = 1024, // 1KB minimum
120
119
  maxBlocks = 50_000,
121
- kvStore,
122
120
  maxConcurrentBlockUploads = 60,
123
121
  expirationPeriodInMilliseconds = 1000 * 60 * 60 * 24 * 7, // 1 week
124
122
  connectionString,
@@ -127,957 +125,981 @@ export function azureStore({
127
125
  accountName,
128
126
  accountKey,
129
127
  containerName,
130
- }: AzureStoreOptions): AzureStore {
131
- const preferredBlockSize = blockSize || 8 * 1024 * 1024; // 8MB default
132
- const maxUploadSize = 5_497_558_138_880 as const; // 5TiB (Azure Block Blob limit)
133
-
134
- // Initialize Azure Blob Service Client with cross-platform authentication
135
- let blobServiceClient: BlobServiceClient;
136
-
137
- if (connectionString) {
138
- // Connection string (works in all environments)
139
- blobServiceClient = BlobService.fromConnectionString(connectionString);
140
- } else if (sasUrl) {
141
- // SAS URL (works in all environments including browsers)
142
- blobServiceClient = new BlobService(sasUrl);
143
- } else if (credential) {
144
- // OAuth token credential (works in all environments, recommended for production)
145
- const accountUrl = accountName
146
- ? `https://${accountName}.blob.core.windows.net`
147
- : sasUrl?.split("?")[0] || "";
148
- if (!accountUrl) {
149
- throw new Error(
150
- "When using credential authentication, either accountName or a valid sasUrl must be provided to determine the account URL",
151
- );
152
- }
153
- blobServiceClient = new BlobService(accountUrl, credential);
154
- } else if (accountName && accountKey) {
155
- // Legacy shared key authentication (Node.js only)
156
- // This will fail in browser/edge environments
157
- try {
158
- const sharedKeyCredential = new StorageSharedKeyCredential(
159
- accountName,
160
- accountKey,
161
- );
162
- blobServiceClient = new BlobService(
163
- `https://${accountName}.blob.core.windows.net`,
164
- sharedKeyCredential,
165
- );
166
- } catch (error) {
128
+ }: AzureStoreOptions) {
129
+ return Effect.gen(function* () {
130
+ const kvStore = yield* UploadFileKVStore;
131
+ const preferredBlockSize = blockSize || 8 * 1024 * 1024; // 8MB default
132
+ const maxUploadSize = 5_497_558_138_880 as const; // 5TiB (Azure Block Blob limit)
133
+
134
+ // Initialize Azure Blob Service Client with cross-platform authentication
135
+ let blobServiceClient: BlobServiceClient;
136
+
137
+ if (connectionString) {
138
+ // Connection string (works in all environments)
139
+ blobServiceClient = BlobService.fromConnectionString(connectionString);
140
+ } else if (sasUrl) {
141
+ // SAS URL (works in all environments including browsers)
142
+ blobServiceClient = new BlobService(sasUrl);
143
+ } else if (credential) {
144
+ // OAuth token credential (works in all environments, recommended for production)
145
+ const accountUrl = accountName
146
+ ? `https://${accountName}.blob.core.windows.net`
147
+ : sasUrl?.split("?")[0] || "";
148
+ if (!accountUrl) {
149
+ throw new Error(
150
+ "When using credential authentication, either accountName or a valid sasUrl must be provided to determine the account URL",
151
+ );
152
+ }
153
+ blobServiceClient = new BlobService(accountUrl, credential);
154
+ } else if (accountName && accountKey) {
155
+ // Legacy shared key authentication (Node.js only)
156
+ // This will fail in browser/edge environments
157
+ try {
158
+ const sharedKeyCredential = new StorageSharedKeyCredential(
159
+ accountName,
160
+ accountKey,
161
+ );
162
+ blobServiceClient = new BlobService(
163
+ `https://${accountName}.blob.core.windows.net`,
164
+ sharedKeyCredential,
165
+ );
166
+ } catch (error) {
167
+ throw new Error(
168
+ "StorageSharedKeyCredential is only available in Node.js environments. " +
169
+ "Use sasUrl or credential options for cross-platform compatibility. " +
170
+ `Original error: ${error}`,
171
+ );
172
+ }
173
+ } else {
167
174
  throw new Error(
168
- "StorageSharedKeyCredential is only available in Node.js environments. " +
169
- "Use sasUrl or credential options for cross-platform compatibility. " +
170
- `Original error: ${error}`,
175
+ "Azure authentication required. Provide one of: " +
176
+ "connectionString, sasUrl, credential, or accountName + accountKey (Node.js only)",
171
177
  );
172
178
  }
173
- } else {
174
- throw new Error(
175
- "Azure authentication required. Provide one of: " +
176
- "connectionString, sasUrl, credential, or accountName + accountKey (Node.js only)",
177
- );
178
- }
179
-
180
- const containerClient: ContainerClient =
181
- blobServiceClient.getContainerClient(containerName);
182
-
183
- const incompletePartKey = (id: string) => {
184
- return `${id}.incomplete`;
185
- };
186
-
187
- const uploadBlock = (
188
- uploadFile: UploadFile,
189
- readStream: Uint8Array,
190
- blockId: string,
191
- ) => {
192
- return withTimingMetrics(
193
- partUploadDurationHistogram,
194
- Effect.gen(function* () {
195
- yield* Effect.logInfo("Uploading block").pipe(
196
- Effect.annotateLogs({
197
- upload_id: uploadFile.id,
198
- block_id: blockId,
199
- block_size: readStream.length,
200
- }),
201
- );
202
179
 
203
- yield* uploadPartsTotal(Effect.succeed(1));
204
- yield* partSizeHistogram(Effect.succeed(readStream.length));
180
+ const containerClient: ContainerClient =
181
+ blobServiceClient.getContainerClient(containerName);
205
182
 
206
- try {
207
- const blobClient = containerClient.getBlockBlobClient(uploadFile.id);
208
- yield* Effect.tryPromise({
209
- try: async () => {
210
- await blobClient.stageBlock(
211
- blockId,
212
- readStream,
213
- readStream.length,
214
- );
215
- },
216
- catch: (error) => {
217
- Effect.runSync(
218
- trackAzureError("uploadBlock", error, {
219
- upload_id: uploadFile.id,
220
- block_id: blockId,
221
- block_size: readStream.length,
222
- }),
223
- );
224
- return UploadistaError.fromCode("FILE_WRITE_ERROR", {
225
- cause: error as Error,
226
- });
227
- },
228
- });
183
+ const incompletePartKey = (id: string) => {
184
+ return `${id}.incomplete`;
185
+ };
229
186
 
230
- yield* Effect.logInfo("Finished uploading block").pipe(
187
+ const uploadBlock = (
188
+ uploadFile: UploadFile,
189
+ readStream: Uint8Array,
190
+ blockId: string,
191
+ ) => {
192
+ return withTimingMetrics(
193
+ partUploadDurationHistogram,
194
+ Effect.gen(function* () {
195
+ yield* Effect.logInfo("Uploading block").pipe(
231
196
  Effect.annotateLogs({
232
197
  upload_id: uploadFile.id,
233
198
  block_id: blockId,
234
199
  block_size: readStream.length,
235
200
  }),
236
201
  );
237
- } catch (error) {
238
- Effect.runSync(
239
- trackAzureError("uploadBlock", error, {
240
- upload_id: uploadFile.id,
241
- block_id: blockId,
242
- block_size: readStream.length,
243
- }),
244
- );
245
- throw error;
246
- }
247
- }),
248
- );
249
- };
250
202
 
251
- const uploadIncompleteBlock = (id: string, readStream: Uint8Array) => {
252
- return Effect.tryPromise({
253
- try: async () => {
254
- const blobClient = containerClient.getBlockBlobClient(
255
- incompletePartKey(id),
256
- );
257
- await blobClient.upload(readStream, readStream.length);
258
- },
259
- catch: (error) =>
260
- UploadistaError.fromCode("FILE_WRITE_ERROR", { cause: error as Error }),
261
- }).pipe(
262
- Effect.tap(() =>
263
- Effect.logInfo("Finished uploading incomplete block").pipe(
264
- Effect.annotateLogs({
265
- upload_id: id,
266
- }),
267
- ),
268
- ),
269
- );
270
- };
203
+ yield* uploadPartsTotal(Effect.succeed(1));
204
+ yield* partSizeHistogram(Effect.succeed(readStream.length));
271
205
 
272
- const getIncompleteBlock = (id: string) => {
273
- return Effect.tryPromise({
274
- try: async () => {
275
- try {
206
+ try {
207
+ const blobClient = containerClient.getBlockBlobClient(
208
+ uploadFile.id,
209
+ );
210
+ yield* Effect.tryPromise({
211
+ try: async () => {
212
+ await blobClient.stageBlock(
213
+ blockId,
214
+ readStream,
215
+ readStream.length,
216
+ );
217
+ },
218
+ catch: (error) => {
219
+ Effect.runSync(
220
+ trackAzureError("uploadBlock", error, {
221
+ upload_id: uploadFile.id,
222
+ block_id: blockId,
223
+ block_size: readStream.length,
224
+ }),
225
+ );
226
+ return UploadistaError.fromCode("FILE_WRITE_ERROR", {
227
+ cause: error as Error,
228
+ });
229
+ },
230
+ });
231
+
232
+ yield* Effect.logInfo("Finished uploading block").pipe(
233
+ Effect.annotateLogs({
234
+ upload_id: uploadFile.id,
235
+ block_id: blockId,
236
+ block_size: readStream.length,
237
+ }),
238
+ );
239
+ } catch (error) {
240
+ Effect.runSync(
241
+ trackAzureError("uploadBlock", error, {
242
+ upload_id: uploadFile.id,
243
+ block_id: blockId,
244
+ block_size: readStream.length,
245
+ }),
246
+ );
247
+ throw error;
248
+ }
249
+ }),
250
+ );
251
+ };
252
+
253
+ const uploadIncompleteBlock = (id: string, readStream: Uint8Array) => {
254
+ return Effect.tryPromise({
255
+ try: async () => {
276
256
  const blobClient = containerClient.getBlockBlobClient(
277
257
  incompletePartKey(id),
278
258
  );
279
- const response = await blobClient.download();
280
- return response.readableStreamBody as unknown as ReadableStream;
281
- } catch (error) {
282
- if (
283
- error &&
284
- typeof error === "object" &&
285
- "statusCode" in error &&
286
- error.statusCode === 404
287
- ) {
288
- return undefined;
259
+ await blobClient.upload(readStream, readStream.length);
260
+ },
261
+ catch: (error) =>
262
+ UploadistaError.fromCode("FILE_WRITE_ERROR", {
263
+ cause: error as Error,
264
+ }),
265
+ }).pipe(
266
+ Effect.tap(() =>
267
+ Effect.logInfo("Finished uploading incomplete block").pipe(
268
+ Effect.annotateLogs({
269
+ upload_id: id,
270
+ }),
271
+ ),
272
+ ),
273
+ );
274
+ };
275
+
276
+ const getIncompleteBlock = (id: string) => {
277
+ return Effect.tryPromise({
278
+ try: async () => {
279
+ try {
280
+ const blobClient = containerClient.getBlockBlobClient(
281
+ incompletePartKey(id),
282
+ );
283
+ const response = await blobClient.download();
284
+ return response.readableStreamBody as unknown as ReadableStream;
285
+ } catch (error) {
286
+ if (
287
+ error &&
288
+ typeof error === "object" &&
289
+ "statusCode" in error &&
290
+ error.statusCode === 404
291
+ ) {
292
+ return undefined;
293
+ }
294
+ throw error;
289
295
  }
290
- throw error;
291
- }
292
- },
293
- catch: (error) =>
294
- UploadistaError.fromCode("FILE_WRITE_ERROR", { cause: error as Error }),
295
- });
296
- };
296
+ },
297
+ catch: (error) =>
298
+ UploadistaError.fromCode("FILE_WRITE_ERROR", {
299
+ cause: error as Error,
300
+ }),
301
+ });
302
+ };
297
303
 
298
- const getIncompleteBlockSize = (id: string) => {
299
- return Effect.tryPromise({
300
- try: async () => {
301
- try {
304
+ const getIncompleteBlockSize = (id: string) => {
305
+ return Effect.tryPromise({
306
+ try: async () => {
307
+ try {
308
+ const blobClient = containerClient.getBlockBlobClient(
309
+ incompletePartKey(id),
310
+ );
311
+ const properties = await blobClient.getProperties();
312
+ return properties.contentLength;
313
+ } catch (error) {
314
+ if (
315
+ error &&
316
+ typeof error === "object" &&
317
+ "statusCode" in error &&
318
+ error.statusCode === 404
319
+ ) {
320
+ return undefined;
321
+ }
322
+ throw error;
323
+ }
324
+ },
325
+ catch: (error) =>
326
+ UploadistaError.fromCode("FILE_WRITE_ERROR", {
327
+ cause: error as Error,
328
+ }),
329
+ });
330
+ };
331
+
332
+ const deleteIncompleteBlock = (id: string) => {
333
+ return Effect.tryPromise({
334
+ try: async () => {
302
335
  const blobClient = containerClient.getBlockBlobClient(
303
336
  incompletePartKey(id),
304
337
  );
305
- const properties = await blobClient.getProperties();
306
- return properties.contentLength;
307
- } catch (error) {
308
- if (
309
- error &&
310
- typeof error === "object" &&
311
- "statusCode" in error &&
312
- error.statusCode === 404
313
- ) {
314
- return undefined;
315
- }
316
- throw error;
317
- }
318
- },
319
- catch: (error) =>
320
- UploadistaError.fromCode("FILE_WRITE_ERROR", { cause: error as Error }),
321
- });
322
- };
323
-
324
- const deleteIncompleteBlock = (id: string) => {
325
- return Effect.tryPromise({
326
- try: async () => {
327
- const blobClient = containerClient.getBlockBlobClient(
328
- incompletePartKey(id),
329
- );
330
- await blobClient.deleteIfExists();
331
- },
332
- catch: (error) =>
333
- UploadistaError.fromCode("FILE_WRITE_ERROR", { cause: error as Error }),
334
- });
335
- };
338
+ await blobClient.deleteIfExists();
339
+ },
340
+ catch: (error) =>
341
+ UploadistaError.fromCode("FILE_WRITE_ERROR", {
342
+ cause: error as Error,
343
+ }),
344
+ });
345
+ };
336
346
 
337
- const downloadIncompleteBlock = (id: string) => {
338
- return Effect.gen(function* () {
339
- const incompleteBlock = yield* getIncompleteBlock(id);
347
+ const downloadIncompleteBlock = (id: string) => {
348
+ return Effect.gen(function* () {
349
+ const incompleteBlock = yield* getIncompleteBlock(id);
340
350
 
341
- if (!incompleteBlock) {
342
- return;
343
- }
351
+ if (!incompleteBlock) {
352
+ return;
353
+ }
344
354
 
345
- // Read the stream and collect all chunks to calculate size
346
- const reader = incompleteBlock.getReader();
347
- const chunks: Uint8Array[] = [];
348
- let incompleteBlockSize = 0;
355
+ // Read the stream and collect all chunks to calculate size
356
+ const reader = incompleteBlock.getReader();
357
+ const chunks: Uint8Array[] = [];
358
+ let incompleteBlockSize = 0;
349
359
 
350
- try {
351
- while (true) {
352
- const result = yield* Effect.promise(() => reader.read());
353
- if (result.done) break;
354
- chunks.push(result.value);
355
- incompleteBlockSize += result.value.length;
360
+ try {
361
+ while (true) {
362
+ const result = yield* Effect.promise(() => reader.read());
363
+ if (result.done) break;
364
+ chunks.push(result.value);
365
+ incompleteBlockSize += result.value.length;
366
+ }
367
+ } finally {
368
+ reader.releaseLock();
356
369
  }
357
- } finally {
358
- reader.releaseLock();
359
- }
360
370
 
361
- // Create a new readable stream from the chunks
362
- const stream = Stream.fromIterable(chunks);
371
+ // Create a new readable stream from the chunks
372
+ const stream = Stream.fromIterable(chunks);
363
373
 
364
- return {
365
- size: incompleteBlockSize,
366
- stream,
367
- };
368
- });
369
- };
374
+ return {
375
+ size: incompleteBlockSize,
376
+ stream,
377
+ };
378
+ });
379
+ };
370
380
 
371
- const calcOptimalBlockSize = (initSize?: number): number => {
372
- const size = initSize ?? maxUploadSize;
373
- let optimalBlockSize: number;
381
+ const calcOptimalBlockSize = (initSize?: number): number => {
382
+ const size = initSize ?? maxUploadSize;
383
+ let optimalBlockSize: number;
374
384
 
375
- if (size <= preferredBlockSize) {
376
- optimalBlockSize = size;
377
- } else if (size <= preferredBlockSize * maxBlocks) {
378
- optimalBlockSize = preferredBlockSize;
379
- } else {
380
- // Calculate the minimum block size needed to fit within the max blocks limit
381
- optimalBlockSize = Math.ceil(size / maxBlocks);
382
- }
385
+ if (size <= preferredBlockSize) {
386
+ optimalBlockSize = size;
387
+ } else if (size <= preferredBlockSize * maxBlocks) {
388
+ optimalBlockSize = preferredBlockSize;
389
+ } else {
390
+ // Calculate the minimum block size needed to fit within the max blocks limit
391
+ optimalBlockSize = Math.ceil(size / maxBlocks);
392
+ }
383
393
 
384
- // Ensure the block size respects the minimum and is aligned properly
385
- const finalBlockSize = Math.max(optimalBlockSize, minBlockSize);
394
+ // Ensure the block size respects the minimum and is aligned properly
395
+ const finalBlockSize = Math.max(optimalBlockSize, minBlockSize);
386
396
 
387
- // Round up to ensure consistent block sizes
388
- return Math.ceil(finalBlockSize / 1024) * 1024; // Align to 1KB boundaries
389
- };
397
+ // Round up to ensure consistent block sizes
398
+ return Math.ceil(finalBlockSize / 1024) * 1024; // Align to 1KB boundaries
399
+ };
390
400
 
391
- // Proper single-pass chunking using Effect's async stream constructor
392
- // Ensures all parts except the final part are exactly the same size (S3 requirement)
393
- const createChunkedStream =
394
- (chunkSize: number) =>
395
- <E>(stream: Stream.Stream<Uint8Array, E>): Stream.Stream<ChunkInfo, E> => {
396
- return Stream.async<ChunkInfo, E>((emit) => {
397
- let buffer = new Uint8Array(0);
398
- let blockNumber = 1;
399
- let totalBytesProcessed = 0;
400
-
401
- const emitChunk = (data: Uint8Array, isFinalChunk = false) => {
402
- // Log chunk information for debugging - use INFO level to see in logs
403
- Effect.runSync(
404
- Effect.logInfo("Creating chunk").pipe(
405
- Effect.annotateLogs({
406
- block_number: blockNumber,
407
- chunk_size: data.length,
408
- expected_size: chunkSize,
409
- is_final_chunk: isFinalChunk,
410
- total_bytes_processed: totalBytesProcessed + data.length,
411
- }),
412
- ),
413
- );
414
- emit.single({
415
- blockNumber: blockNumber++,
416
- data,
417
- size: data.length,
418
- });
419
- };
401
+ // Proper single-pass chunking using Effect's async stream constructor
402
+ // Ensures all parts except the final part are exactly the same size (S3 requirement)
403
+ const createChunkedStream =
404
+ (chunkSize: number) =>
405
+ <E>(
406
+ stream: Stream.Stream<Uint8Array, E>,
407
+ ): Stream.Stream<ChunkInfo, E> => {
408
+ return Stream.async<ChunkInfo, E>((emit) => {
409
+ let buffer = new Uint8Array(0);
410
+ let blockNumber = 1;
411
+ let totalBytesProcessed = 0;
412
+
413
+ const emitChunk = (data: Uint8Array, isFinalChunk = false) => {
414
+ // Log chunk information for debugging - use INFO level to see in logs
415
+ Effect.runSync(
416
+ Effect.logInfo("Creating chunk").pipe(
417
+ Effect.annotateLogs({
418
+ block_number: blockNumber,
419
+ chunk_size: data.length,
420
+ expected_size: chunkSize,
421
+ is_final_chunk: isFinalChunk,
422
+ total_bytes_processed: totalBytesProcessed + data.length,
423
+ }),
424
+ ),
425
+ );
426
+ emit.single({
427
+ blockNumber: blockNumber++,
428
+ data,
429
+ size: data.length,
430
+ });
431
+ };
420
432
 
421
- const processChunk = (newData: Uint8Array) => {
422
- // Combine buffer with new data
423
- const combined = new Uint8Array(buffer.length + newData.length);
424
- combined.set(buffer);
425
- combined.set(newData, buffer.length);
426
- buffer = combined;
427
- totalBytesProcessed += newData.length;
428
-
429
- // Emit full chunks of exactly chunkSize bytes
430
- // This ensures S3 multipart upload rule: all parts except last must be same size
431
- while (buffer.length >= chunkSize) {
432
- const chunk = buffer.slice(0, chunkSize);
433
- buffer = buffer.slice(chunkSize);
434
- emitChunk(chunk, false);
435
- }
436
- };
433
+ const processChunk = (newData: Uint8Array) => {
434
+ // Combine buffer with new data
435
+ const combined = new Uint8Array(buffer.length + newData.length);
436
+ combined.set(buffer);
437
+ combined.set(newData, buffer.length);
438
+ buffer = combined;
439
+ totalBytesProcessed += newData.length;
440
+
441
+ // Emit full chunks of exactly chunkSize bytes
442
+ // This ensures S3 multipart upload rule: all parts except last must be same size
443
+ while (buffer.length >= chunkSize) {
444
+ const chunk = buffer.slice(0, chunkSize);
445
+ buffer = buffer.slice(chunkSize);
446
+ emitChunk(chunk, false);
447
+ }
448
+ };
437
449
 
438
- // Process the stream
439
- Effect.runFork(
440
- stream.pipe(
441
- Stream.runForEach((chunk) =>
442
- Effect.sync(() => processChunk(chunk)),
450
+ // Process the stream
451
+ Effect.runFork(
452
+ stream.pipe(
453
+ Stream.runForEach((chunk) =>
454
+ Effect.sync(() => processChunk(chunk)),
455
+ ),
456
+ Effect.andThen(() =>
457
+ Effect.sync(() => {
458
+ // Emit final chunk if there's remaining data
459
+ // The final chunk can be any size < chunkSize (S3 allows this)
460
+ if (buffer.length > 0) {
461
+ emitChunk(buffer, true);
462
+ }
463
+ emit.end();
464
+ }),
465
+ ),
466
+ Effect.catchAll((error) => Effect.sync(() => emit.fail(error))),
443
467
  ),
444
- Effect.andThen(() =>
445
- Effect.sync(() => {
446
- // Emit final chunk if there's remaining data
447
- // The final chunk can be any size < chunkSize (S3 allows this)
448
- if (buffer.length > 0) {
449
- emitChunk(buffer, true);
450
- }
451
- emit.end();
468
+ );
469
+ });
470
+ };
471
+
472
+ // Byte-level progress tracking during streaming
473
+ // This provides smooth, immediate progress feedback by tracking bytes as they
474
+ // flow through the stream, before they reach S3. This solves the issue where
475
+ // small files (< 5MB) would jump from 0% to 100% instantly.
476
+ const withByteProgressTracking =
477
+ (onProgress?: (totalBytes: number) => void, initialOffset = 0) =>
478
+ <E, R>(stream: Stream.Stream<Uint8Array, E, R>) => {
479
+ if (!onProgress) return stream;
480
+
481
+ return Effect.gen(function* () {
482
+ const totalBytesProcessedRef = yield* Ref.make(initialOffset);
483
+
484
+ return stream.pipe(
485
+ Stream.tap((chunk) =>
486
+ Effect.gen(function* () {
487
+ const newTotal = yield* Ref.updateAndGet(
488
+ totalBytesProcessedRef,
489
+ (total) => total + chunk.length,
490
+ );
491
+ onProgress(newTotal);
452
492
  }),
453
493
  ),
454
- Effect.catchAll((error) => Effect.sync(() => emit.fail(error))),
455
- ),
456
- );
457
- });
458
- };
459
-
460
- // Byte-level progress tracking during streaming
461
- // This provides smooth, immediate progress feedback by tracking bytes as they
462
- // flow through the stream, before they reach S3. This solves the issue where
463
- // small files (< 5MB) would jump from 0% to 100% instantly.
464
- const withByteProgressTracking =
465
- (onProgress?: (totalBytes: number) => void, initialOffset = 0) =>
466
- <E, R>(stream: Stream.Stream<Uint8Array, E, R>) => {
467
- if (!onProgress) return stream;
494
+ );
495
+ }).pipe(Stream.unwrap);
496
+ };
468
497
 
498
+ /**
499
+ * Uploads a stream to Azure using multiple blocks
500
+ */
501
+ const uploadBlocks = (
502
+ uploadFile: UploadFile,
503
+ readStream: Stream.Stream<Uint8Array, UploadistaError>,
504
+ initCurrentBlockNumber: number,
505
+ initOffset: number,
506
+ onProgress?: (newOffset: number) => void,
507
+ ) => {
469
508
  return Effect.gen(function* () {
470
- const totalBytesProcessedRef = yield* Ref.make(initialOffset);
471
-
472
- return stream.pipe(
473
- Stream.tap((chunk) =>
474
- Effect.gen(function* () {
475
- const newTotal = yield* Ref.updateAndGet(
476
- totalBytesProcessedRef,
477
- (total) => total + chunk.length,
478
- );
479
- onProgress(newTotal);
480
- }),
481
- ),
509
+ yield* Effect.logInfo("Uploading blocks").pipe(
510
+ Effect.annotateLogs({
511
+ upload_id: uploadFile.id,
512
+ init_offset: initOffset,
513
+ file_size: uploadFile.size,
514
+ }),
482
515
  );
483
- }).pipe(Stream.unwrap);
484
- };
485
-
486
- /**
487
- * Uploads a stream to Azure using multiple blocks
488
- */
489
- const uploadBlocks = (
490
- uploadFile: UploadFile,
491
- readStream: Stream.Stream<Uint8Array, UploadistaError>,
492
- initCurrentBlockNumber: number,
493
- initOffset: number,
494
- onProgress?: (newOffset: number) => void,
495
- ) => {
496
- return Effect.gen(function* () {
497
- yield* Effect.logInfo("Uploading blocks").pipe(
498
- Effect.annotateLogs({
499
- upload_id: uploadFile.id,
500
- init_offset: initOffset,
501
- file_size: uploadFile.size,
502
- }),
503
- );
504
516
 
505
- const size = uploadFile.size;
517
+ const size = uploadFile.size;
506
518
 
507
- const uploadBlockSize = calcOptimalBlockSize(size);
508
- yield* Effect.logInfo("Block size").pipe(
509
- Effect.annotateLogs({
510
- upload_id: uploadFile.id,
511
- block_size: uploadBlockSize,
512
- }),
513
- );
514
- // Enhanced Progress Tracking Strategy:
515
- // 1. Byte-level progress during streaming - provides immediate, smooth feedback
516
- // as data flows through the pipeline (even for small files)
517
- // 2. This tracks progress BEFORE S3 upload, giving users immediate feedback
518
- // 3. For large files with multiple parts, this provides granular updates
519
- // 4. For small files (single part), this prevents 0%->100% jumps
520
- const chunkStream = readStream.pipe(
521
- // Add byte-level progress tracking during streaming (immediate feedback)
522
- withByteProgressTracking(onProgress, initOffset),
523
- // Create chunks for S3 multipart upload with uniform part sizes
524
- createChunkedStream(uploadBlockSize),
525
- );
526
-
527
- // Track cumulative offset and total bytes with Effect Refs
528
- const cumulativeOffsetRef = yield* Ref.make(initOffset);
529
- const totalBytesUploadedRef = yield* Ref.make(0);
530
- const blockIdsRef = yield* Ref.make<string[]>([]);
531
- // Create a chunk upload function for the sink
532
- const uploadChunk = (chunkInfo: ChunkInfo) =>
533
- Effect.gen(function* () {
534
- // Calculate cumulative bytes to determine if this is the final block
535
- const cumulativeOffset = yield* Ref.updateAndGet(
536
- cumulativeOffsetRef,
537
- (offset) => offset + chunkInfo.size,
538
- );
539
- const isFinalBlock = cumulativeOffset >= (uploadFile.size || 0);
540
-
541
- yield* Effect.logDebug("Processing chunk").pipe(
542
- Effect.annotateLogs({
543
- upload_id: uploadFile.id,
544
- cumulative_offset: cumulativeOffset,
545
- file_size: uploadFile.size,
546
- chunk_size: chunkInfo.size,
547
- is_final_block: isFinalBlock,
548
- }),
549
- );
550
-
551
- const actualBlockNumber =
552
- initCurrentBlockNumber + chunkInfo.blockNumber - 1;
519
+ const uploadBlockSize = calcOptimalBlockSize(size);
520
+ yield* Effect.logInfo("Block size").pipe(
521
+ Effect.annotateLogs({
522
+ upload_id: uploadFile.id,
523
+ block_size: uploadBlockSize,
524
+ }),
525
+ );
526
+ // Enhanced Progress Tracking Strategy:
527
+ // 1. Byte-level progress during streaming - provides immediate, smooth feedback
528
+ // as data flows through the pipeline (even for small files)
529
+ // 2. This tracks progress BEFORE S3 upload, giving users immediate feedback
530
+ // 3. For large files with multiple parts, this provides granular updates
531
+ // 4. For small files (single part), this prevents 0%->100% jumps
532
+ const chunkStream = readStream.pipe(
533
+ // Add byte-level progress tracking during streaming (immediate feedback)
534
+ withByteProgressTracking(onProgress, initOffset),
535
+ // Create chunks for S3 multipart upload with uniform part sizes
536
+ createChunkedStream(uploadBlockSize),
537
+ );
553
538
 
554
- if (chunkInfo.size > uploadBlockSize) {
555
- yield* Effect.fail(
556
- UploadistaError.fromCode("FILE_WRITE_ERROR", {
557
- cause: new Error(
558
- `Block size ${chunkInfo.size} exceeds upload block size ${uploadBlockSize}`,
559
- ),
560
- }),
539
+ // Track cumulative offset and total bytes with Effect Refs
540
+ const cumulativeOffsetRef = yield* Ref.make(initOffset);
541
+ const totalBytesUploadedRef = yield* Ref.make(0);
542
+ const blockIdsRef = yield* Ref.make<string[]>([]);
543
+ // Create a chunk upload function for the sink
544
+ const uploadChunk = (chunkInfo: ChunkInfo) =>
545
+ Effect.gen(function* () {
546
+ // Calculate cumulative bytes to determine if this is the final block
547
+ const cumulativeOffset = yield* Ref.updateAndGet(
548
+ cumulativeOffsetRef,
549
+ (offset) => offset + chunkInfo.size,
561
550
  );
562
- }
551
+ const isFinalBlock = cumulativeOffset >= (uploadFile.size || 0);
563
552
 
564
- // For parts that meet the minimum part size (5MB) or are the final part,
565
- // upload them as regular multipart parts
566
- if (chunkInfo.size >= minBlockSize || isFinalBlock) {
567
- yield* Effect.logDebug("Uploading multipart chunk").pipe(
553
+ yield* Effect.logDebug("Processing chunk").pipe(
568
554
  Effect.annotateLogs({
569
555
  upload_id: uploadFile.id,
570
- block_number: actualBlockNumber,
556
+ cumulative_offset: cumulativeOffset,
557
+ file_size: uploadFile.size,
571
558
  chunk_size: chunkInfo.size,
572
- min_block_size: minBlockSize,
573
559
  is_final_block: isFinalBlock,
574
560
  }),
575
561
  );
576
- // Generate block ID (base64 encoded, must be consistent)
577
- const blockId = bufferFrom(
578
- `block-${actualBlockNumber.toString().padStart(6, "0")}`,
579
- ).toString("base64");
580
- yield* uploadBlock(uploadFile, chunkInfo.data, blockId);
581
- yield* Ref.update(blockIdsRef, (ids) => [...ids, blockId]);
582
- yield* partSizeHistogram(Effect.succeed(chunkInfo.size));
583
- } else {
584
- // Only upload as incomplete part if it's smaller than minimum and not final
585
- yield* uploadIncompleteBlock(uploadFile.id, chunkInfo.data);
586
- }
587
562
 
588
- yield* Ref.update(
589
- totalBytesUploadedRef,
590
- (total) => total + chunkInfo.size,
591
- );
563
+ const actualBlockNumber =
564
+ initCurrentBlockNumber + chunkInfo.blockNumber - 1;
592
565
 
593
- // Note: Byte-level progress is now tracked during streaming phase
594
- // This ensures smooth progress updates regardless of part size
595
- // Azure upload completion is tracked via totalBytesUploadedRef for accuracy
596
- });
566
+ if (chunkInfo.size > uploadBlockSize) {
567
+ yield* Effect.fail(
568
+ UploadistaError.fromCode("FILE_WRITE_ERROR", {
569
+ cause: new Error(
570
+ `Block size ${chunkInfo.size} exceeds upload block size ${uploadBlockSize}`,
571
+ ),
572
+ }),
573
+ );
574
+ }
597
575
 
598
- // Process chunks concurrently with controlled concurrency
599
- yield* chunkStream.pipe(
600
- Stream.runForEach((chunkInfo) => uploadChunk(chunkInfo)),
601
- Effect.withConcurrency(maxConcurrentBlockUploads),
602
- );
576
+ // For parts that meet the minimum part size (5MB) or are the final part,
577
+ // upload them as regular multipart parts
578
+ if (chunkInfo.size >= minBlockSize || isFinalBlock) {
579
+ yield* Effect.logDebug("Uploading multipart chunk").pipe(
580
+ Effect.annotateLogs({
581
+ upload_id: uploadFile.id,
582
+ block_number: actualBlockNumber,
583
+ chunk_size: chunkInfo.size,
584
+ min_block_size: minBlockSize,
585
+ is_final_block: isFinalBlock,
586
+ }),
587
+ );
588
+ // Generate block ID (base64 encoded, must be consistent)
589
+ const blockId = bufferFrom(
590
+ `block-${actualBlockNumber.toString().padStart(6, "0")}`,
591
+ ).toString("base64");
592
+ yield* uploadBlock(uploadFile, chunkInfo.data, blockId);
593
+ yield* Ref.update(blockIdsRef, (ids) => [...ids, blockId]);
594
+ yield* partSizeHistogram(Effect.succeed(chunkInfo.size));
595
+ } else {
596
+ // Only upload as incomplete part if it's smaller than minimum and not final
597
+ yield* uploadIncompleteBlock(uploadFile.id, chunkInfo.data);
598
+ }
603
599
 
604
- return {
605
- bytesUploaded: yield* Ref.get(totalBytesUploadedRef),
606
- blockIds: yield* Ref.get(blockIdsRef),
607
- };
608
- });
609
- };
600
+ yield* Ref.update(
601
+ totalBytesUploadedRef,
602
+ (total) => total + chunkInfo.size,
603
+ );
610
604
 
611
- /**
612
- * Commits all staged blocks to create the final blob
613
- */
614
- const commitBlocks = (uploadFile: UploadFile, blockIds: string[]) => {
615
- return Effect.tryPromise({
616
- try: async () => {
617
- const blobClient = containerClient.getBlockBlobClient(uploadFile.id);
618
- await blobClient.commitBlockList(blockIds, {
619
- blobHTTPHeaders: {
620
- blobContentType: uploadFile.metadata?.contentType?.toString(),
621
- blobCacheControl: uploadFile.metadata?.cacheControl?.toString(),
622
- },
623
- });
624
- },
625
- catch: (error) =>
626
- UploadistaError.fromCode("FILE_WRITE_ERROR", { cause: error as Error }),
627
- });
628
- };
605
+ // Note: Byte-level progress is now tracked during streaming phase
606
+ // This ensures smooth progress updates regardless of part size
607
+ // Azure upload completion is tracked via totalBytesUploadedRef for accuracy
608
+ });
629
609
 
630
- /**
631
- * Gets the committed blocks for a blob
632
- */
633
- const retrieveBlocks = (id: string) => {
634
- return Effect.tryPromise({
635
- try: async () => {
636
- try {
637
- const blobClient = containerClient.getBlockBlobClient(id);
638
- const blockList = await blobClient.getBlockList("committed");
610
+ // Process chunks concurrently with controlled concurrency
611
+ yield* chunkStream.pipe(
612
+ Stream.runForEach((chunkInfo) => uploadChunk(chunkInfo)),
613
+ Effect.withConcurrency(maxConcurrentBlockUploads),
614
+ );
639
615
 
640
- const blocks =
641
- blockList.committedBlocks?.map((block) => ({
642
- size: block.size,
643
- })) ?? [];
616
+ return {
617
+ bytesUploaded: yield* Ref.get(totalBytesUploadedRef),
618
+ blockIds: yield* Ref.get(blockIdsRef),
619
+ };
620
+ });
621
+ };
644
622
 
645
- return blocks;
646
- } catch (error) {
647
- if (
648
- error &&
649
- typeof error === "object" &&
650
- "statusCode" in error &&
651
- error.statusCode === 404
652
- ) {
653
- return [];
654
- }
655
- throw error;
656
- }
657
- },
658
- catch: (error) =>
659
- UploadistaError.fromCode("UPLOAD_ID_NOT_FOUND", {
660
- cause: error as Error,
661
- }),
662
- });
663
- };
623
+ /**
624
+ * Commits all staged blocks to create the final blob
625
+ */
626
+ const commitBlocks = (uploadFile: UploadFile, blockIds: string[]) => {
627
+ return Effect.tryPromise({
628
+ try: async () => {
629
+ const blobClient = containerClient.getBlockBlobClient(uploadFile.id);
630
+ await blobClient.commitBlockList(blockIds, {
631
+ blobHTTPHeaders: {
632
+ blobContentType: uploadFile.metadata?.contentType?.toString(),
633
+ blobCacheControl: uploadFile.metadata?.cacheControl?.toString(),
634
+ },
635
+ });
636
+ },
637
+ catch: (error) =>
638
+ UploadistaError.fromCode("FILE_WRITE_ERROR", {
639
+ cause: error as Error,
640
+ }),
641
+ });
642
+ };
664
643
 
665
- /**
666
- * Removes cached data for a given file
667
- */
668
- const clearCache = (id: string) => {
669
- return Effect.gen(function* () {
670
- yield* Effect.logInfo("Removing cached data").pipe(
671
- Effect.annotateLogs({
672
- upload_id: id,
673
- }),
674
- );
675
- yield* kvStore.delete(id);
676
- });
677
- };
644
+ /**
645
+ * Gets the committed blocks for a blob
646
+ */
647
+ const retrieveBlocks = (id: string) => {
648
+ return Effect.tryPromise({
649
+ try: async () => {
650
+ try {
651
+ const blobClient = containerClient.getBlockBlobClient(id);
652
+ const blockList = await blobClient.getBlockList("committed");
653
+
654
+ const blocks =
655
+ blockList.committedBlocks?.map((block) => ({
656
+ size: block.size,
657
+ })) ?? [];
658
+
659
+ return blocks;
660
+ } catch (error) {
661
+ if (
662
+ error &&
663
+ typeof error === "object" &&
664
+ "statusCode" in error &&
665
+ error.statusCode === 404
666
+ ) {
667
+ return [];
668
+ }
669
+ throw error;
670
+ }
671
+ },
672
+ catch: (error) =>
673
+ UploadistaError.fromCode("UPLOAD_ID_NOT_FOUND", {
674
+ cause: error as Error,
675
+ }),
676
+ });
677
+ };
678
678
 
679
- /**
680
- * Creates a blob placeholder in Azure and stores metadata
681
- */
682
- const create = (upload: UploadFile) => {
683
- return Effect.gen(function* () {
684
- yield* uploadRequestsTotal(Effect.succeed(1));
685
- yield* activeUploadsGauge(Effect.succeed(1));
686
- yield* fileSizeHistogram(Effect.succeed(upload.size || 0));
687
-
688
- yield* Effect.logInfo("Initializing Azure blob upload").pipe(
689
- Effect.annotateLogs({
690
- upload_id: upload.id,
691
- }),
692
- );
679
+ /**
680
+ * Removes cached data for a given file
681
+ */
682
+ const clearCache = (id: string) => {
683
+ return Effect.gen(function* () {
684
+ yield* Effect.logInfo("Removing cached data").pipe(
685
+ Effect.annotateLogs({
686
+ upload_id: id,
687
+ }),
688
+ );
689
+ yield* kvStore.delete(id);
690
+ });
691
+ };
693
692
 
694
- upload.creationDate = new Date().toISOString();
695
- upload.storage = {
696
- id: upload.storage.id,
697
- type: upload.storage.type,
698
- path: upload.id,
699
- bucket: containerName,
700
- };
701
- upload.url = `${deliveryUrl}/${upload.id}`;
693
+ /**
694
+ * Creates a blob placeholder in Azure and stores metadata
695
+ */
696
+ const create = (upload: UploadFile) => {
697
+ return Effect.gen(function* () {
698
+ yield* uploadRequestsTotal(Effect.succeed(1));
699
+ yield* activeUploadsGauge(Effect.succeed(1));
700
+ yield* fileSizeHistogram(Effect.succeed(upload.size || 0));
702
701
 
703
- yield* kvStore.set(upload.id, upload);
704
- yield* Effect.logInfo("Azure blob upload initialized").pipe(
705
- Effect.annotateLogs({
706
- upload_id: upload.id,
707
- }),
708
- );
702
+ yield* Effect.logInfo("Initializing Azure blob upload").pipe(
703
+ Effect.annotateLogs({
704
+ upload_id: upload.id,
705
+ }),
706
+ );
709
707
 
710
- return upload;
711
- });
712
- };
708
+ upload.creationDate = new Date().toISOString();
709
+ upload.storage = {
710
+ id: upload.storage.id,
711
+ type: upload.storage.type,
712
+ path: upload.id,
713
+ bucket: containerName,
714
+ };
715
+ upload.url = `${deliveryUrl}/${upload.id}`;
713
716
 
714
- const readStream = (
715
- id: string,
716
- ): Effect.Effect<ReadableStream | Blob, UploadistaError> => {
717
- return Effect.tryPromise({
718
- try: async () => {
719
- const blobClient = containerClient.getBlockBlobClient(id);
720
- const response = await blobClient.download();
721
- if (response.blobBody) {
722
- return response.blobBody;
723
- }
724
- if (response.readableStreamBody) {
725
- return response.readableStreamBody as unknown as ReadableStream;
726
- }
727
- throw new Error("No blob body or readable stream body");
728
- },
729
- catch: (error) =>
730
- UploadistaError.fromCode("FILE_WRITE_ERROR", {
731
- cause: error as Error,
732
- }),
733
- });
734
- };
717
+ yield* kvStore.set(upload.id, upload);
718
+ yield* Effect.logInfo("Azure blob upload initialized").pipe(
719
+ Effect.annotateLogs({
720
+ upload_id: upload.id,
721
+ }),
722
+ );
735
723
 
736
- const read = (id: string): Effect.Effect<Uint8Array, UploadistaError> => {
737
- return Effect.gen(function* () {
738
- const stream = yield* readStream(id);
724
+ return upload;
725
+ });
726
+ };
739
727
 
740
- // Convert stream/blob to Uint8Array
741
- if (stream instanceof Blob) {
742
- const arrayBuffer = yield* Effect.promise(() => stream.arrayBuffer());
743
- return new Uint8Array(arrayBuffer as ArrayBuffer);
744
- }
728
+ const readStream = (
729
+ id: string,
730
+ ): Effect.Effect<ReadableStream | Blob, UploadistaError> => {
731
+ return Effect.tryPromise({
732
+ try: async () => {
733
+ const blobClient = containerClient.getBlockBlobClient(id);
734
+ const response = await blobClient.download();
735
+ if (response.blobBody) {
736
+ return response.blobBody;
737
+ }
738
+ if (response.readableStreamBody) {
739
+ return response.readableStreamBody as unknown as ReadableStream;
740
+ }
741
+ throw new Error("No blob body or readable stream body");
742
+ },
743
+ catch: (error) =>
744
+ UploadistaError.fromCode("FILE_WRITE_ERROR", {
745
+ cause: error as Error,
746
+ }),
747
+ });
748
+ };
745
749
 
746
- // Read from ReadableStream
747
- const reader = stream.getReader();
748
- const chunks: Uint8Array[] = [];
750
+ const read = (id: string): Effect.Effect<Uint8Array, UploadistaError> => {
751
+ return Effect.gen(function* () {
752
+ const stream = yield* readStream(id);
749
753
 
750
- try {
751
- while (true) {
752
- const result = yield* Effect.promise(() => reader.read());
753
- if (result.done) break;
754
- chunks.push(result.value);
754
+ // Convert stream/blob to Uint8Array
755
+ if (stream instanceof Blob) {
756
+ const arrayBuffer = yield* Effect.promise(() => stream.arrayBuffer());
757
+ return new Uint8Array(arrayBuffer as ArrayBuffer);
755
758
  }
756
- } finally {
757
- reader.releaseLock();
758
- }
759
759
 
760
- // Concatenate all chunks
761
- const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
762
- const result = new Uint8Array(totalLength);
763
- let offset = 0;
764
- for (const chunk of chunks) {
765
- result.set(chunk, offset);
766
- offset += chunk.length;
767
- }
768
-
769
- return result;
770
- });
771
- };
760
+ // Read from ReadableStream
761
+ const reader = stream.getReader();
762
+ const chunks: Uint8Array[] = [];
772
763
 
773
- const prepareUpload = (
774
- file_id: string,
775
- initialOffset: number,
776
- initialData: Stream.Stream<Uint8Array, UploadistaError>,
777
- ) => {
778
- return Effect.gen(function* () {
779
- const uploadFile = yield* kvStore.get(file_id);
780
-
781
- const blocks = yield* retrieveBlocks(file_id);
764
+ try {
765
+ while (true) {
766
+ const result = yield* Effect.promise(() => reader.read());
767
+ if (result.done) break;
768
+ chunks.push(result.value);
769
+ }
770
+ } finally {
771
+ reader.releaseLock();
772
+ }
782
773
 
783
- const blockNumber = blocks.length;
784
- const nextBlockNumber = blockNumber + 1;
774
+ // Concatenate all chunks
775
+ const totalLength = chunks.reduce(
776
+ (acc, chunk) => acc + chunk.length,
777
+ 0,
778
+ );
779
+ const result = new Uint8Array(totalLength);
780
+ let offset = 0;
781
+ for (const chunk of chunks) {
782
+ result.set(chunk, offset);
783
+ offset += chunk.length;
784
+ }
785
785
 
786
- const incompleteBlock = yield* downloadIncompleteBlock(file_id);
786
+ return result;
787
+ });
788
+ };
787
789
 
788
- if (incompleteBlock) {
789
- yield* deleteIncompleteBlock(file_id);
790
- const offset = initialOffset - incompleteBlock.size;
791
- const data = incompleteBlock.stream.pipe(Stream.concat(initialData));
792
- return {
793
- uploadFile,
794
- nextBlockNumber: nextBlockNumber - 1,
795
- offset,
796
- incompleteBlockSize: incompleteBlock.size,
797
- data,
798
- };
799
- } else {
800
- return {
801
- uploadFile,
802
- nextBlockNumber,
803
- offset: initialOffset,
804
- incompleteBlockSize: 0,
805
- data: initialData,
806
- };
807
- }
808
- });
809
- };
790
+ const prepareUpload = (
791
+ file_id: string,
792
+ initialOffset: number,
793
+ initialData: Stream.Stream<Uint8Array, UploadistaError>,
794
+ ) => {
795
+ return Effect.gen(function* () {
796
+ const uploadFile = yield* kvStore.get(file_id);
810
797
 
811
- /**
812
- * Write to the file, starting at the provided offset
813
- */
814
- const write = (
815
- options: DataStoreWriteOptions,
816
- dependencies: {
817
- onProgress?: (chunkSize: number) => void;
818
- },
819
- ) => {
820
- return withUploadMetrics(
821
- options.file_id,
822
- withTimingMetrics(
823
- uploadDurationHistogram,
824
- Effect.gen(function* () {
825
- const startTime = Date.now();
826
- const {
827
- stream: initialData,
828
- file_id,
829
- offset: initialOffset,
830
- } = options;
831
- const { onProgress } = dependencies;
798
+ const blocks = yield* retrieveBlocks(file_id);
832
799
 
833
- const prepareResult = yield* prepareUpload(
834
- file_id,
835
- initialOffset,
836
- initialData,
837
- );
800
+ const blockNumber = blocks.length;
801
+ const nextBlockNumber = blockNumber + 1;
838
802
 
839
- const { uploadFile, nextBlockNumber, offset, data } = prepareResult;
803
+ const incompleteBlock = yield* downloadIncompleteBlock(file_id);
840
804
 
841
- const { bytesUploaded, blockIds } = yield* uploadBlocks(
805
+ if (incompleteBlock) {
806
+ yield* deleteIncompleteBlock(file_id);
807
+ const offset = initialOffset - incompleteBlock.size;
808
+ const data = incompleteBlock.stream.pipe(Stream.concat(initialData));
809
+ return {
842
810
  uploadFile,
811
+ nextBlockNumber: nextBlockNumber - 1,
812
+ offset,
813
+ incompleteBlockSize: incompleteBlock.size,
843
814
  data,
815
+ };
816
+ } else {
817
+ return {
818
+ uploadFile,
844
819
  nextBlockNumber,
845
- offset,
846
- onProgress,
847
- );
848
-
849
- const newOffset = offset + bytesUploaded;
850
-
851
- if (uploadFile.size === newOffset) {
852
- try {
853
- // Commit all blocks to finalize the blob
854
- yield* commitBlocks(uploadFile, blockIds);
855
- yield* clearCache(file_id);
856
-
857
- // Log completion with observability
858
- yield* logAzureUploadCompletion(file_id, {
859
- fileSize: uploadFile.size || 0,
860
- totalDurationMs: Date.now() - startTime,
861
- partsCount: blockIds.length,
862
- averagePartSize: uploadFile.size,
863
- throughputBps: uploadFile.size / (Date.now() - startTime),
864
- retryCount: 0,
865
- });
866
-
867
- yield* uploadSuccessTotal(Effect.succeed(1));
868
- yield* activeUploadsGauge(Effect.succeed(-1));
869
- } catch (error) {
870
- yield* Effect.logError("Failed to finish upload").pipe(
871
- Effect.annotateLogs({
872
- upload_id: file_id,
873
- error: JSON.stringify(error),
874
- }),
875
- );
876
- yield* uploadErrorsTotal(Effect.succeed(1));
877
- Effect.runSync(
878
- trackAzureError("write", error, {
879
- upload_id: file_id,
880
- operation: "commit",
881
- blocks: blockIds.length,
882
- }),
883
- );
884
- throw error;
885
- }
886
- }
820
+ offset: initialOffset,
821
+ incompleteBlockSize: 0,
822
+ data: initialData,
823
+ };
824
+ }
825
+ });
826
+ };
887
827
 
888
- return newOffset;
889
- }),
890
- ),
891
- );
892
- };
828
+ /**
829
+ * Write to the file, starting at the provided offset
830
+ */
831
+ const write = (
832
+ options: DataStoreWriteOptions,
833
+ dependencies: {
834
+ onProgress?: (chunkSize: number) => void;
835
+ },
836
+ ) => {
837
+ return withUploadMetrics(
838
+ options.file_id,
839
+ withTimingMetrics(
840
+ uploadDurationHistogram,
841
+ Effect.gen(function* () {
842
+ const startTime = Date.now();
843
+ const {
844
+ stream: initialData,
845
+ file_id,
846
+ offset: initialOffset,
847
+ } = options;
848
+ const { onProgress } = dependencies;
849
+
850
+ const prepareResult = yield* prepareUpload(
851
+ file_id,
852
+ initialOffset,
853
+ initialData,
854
+ );
893
855
 
894
- const getUpload = (id: string) => {
895
- return Effect.gen(function* () {
896
- const uploadFile = yield* kvStore.get(id);
856
+ const { uploadFile, nextBlockNumber, offset, data } = prepareResult;
897
857
 
898
- let offset = 0;
858
+ const { bytesUploaded, blockIds } = yield* uploadBlocks(
859
+ uploadFile,
860
+ data,
861
+ nextBlockNumber,
862
+ offset,
863
+ onProgress,
864
+ );
899
865
 
900
- try {
901
- const blocks = yield* retrieveBlocks(id);
902
- offset = calcOffsetFromBlocks(blocks);
903
- } catch (error) {
904
- // Check if the error is caused by the blob not being found
905
- if (
906
- typeof error === "object" &&
907
- error !== null &&
908
- "statusCode" in error &&
909
- error.statusCode === 404
910
- ) {
911
- return {
912
- ...uploadFile,
913
- offset: uploadFile.size as number,
914
- size: uploadFile.size,
915
- metadata: uploadFile.metadata,
916
- storage: uploadFile.storage,
917
- };
918
- }
866
+ const newOffset = offset + bytesUploaded;
867
+
868
+ if (uploadFile.size === newOffset) {
869
+ try {
870
+ // Commit all blocks to finalize the blob
871
+ yield* commitBlocks(uploadFile, blockIds);
872
+ yield* clearCache(file_id);
873
+
874
+ // Log completion with observability
875
+ yield* logAzureUploadCompletion(file_id, {
876
+ fileSize: uploadFile.size || 0,
877
+ totalDurationMs: Date.now() - startTime,
878
+ partsCount: blockIds.length,
879
+ averagePartSize: uploadFile.size,
880
+ throughputBps: uploadFile.size / (Date.now() - startTime),
881
+ retryCount: 0,
882
+ });
883
+
884
+ yield* uploadSuccessTotal(Effect.succeed(1));
885
+ yield* activeUploadsGauge(Effect.succeed(-1));
886
+ } catch (error) {
887
+ yield* Effect.logError("Failed to finish upload").pipe(
888
+ Effect.annotateLogs({
889
+ upload_id: file_id,
890
+ error: JSON.stringify(error),
891
+ }),
892
+ );
893
+ yield* uploadErrorsTotal(Effect.succeed(1));
894
+ Effect.runSync(
895
+ trackAzureError("write", error, {
896
+ upload_id: file_id,
897
+ operation: "commit",
898
+ blocks: blockIds.length,
899
+ }),
900
+ );
901
+ throw error;
902
+ }
903
+ }
919
904
 
920
- yield* Effect.logError("Error on get upload").pipe(
921
- Effect.annotateLogs({
922
- upload_id: id,
923
- error: JSON.stringify(error),
905
+ return newOffset;
924
906
  }),
925
- );
926
- throw error;
927
- }
907
+ ),
908
+ );
909
+ };
928
910
 
929
- const incompleteBlockSize = yield* getIncompleteBlockSize(id);
911
+ const getUpload = (id: string) => {
912
+ return Effect.gen(function* () {
913
+ const uploadFile = yield* kvStore.get(id);
930
914
 
931
- return {
932
- ...uploadFile,
933
- offset: offset + (incompleteBlockSize ?? 0),
934
- size: uploadFile.size,
935
- storage: uploadFile.storage,
936
- };
937
- });
938
- };
915
+ let offset = 0;
939
916
 
940
- const remove = (id: string) => {
941
- return Effect.gen(function* () {
942
- try {
943
- const blobClient = containerClient.getBlockBlobClient(id);
944
- yield* Effect.promise(() => blobClient.deleteIfExists());
917
+ try {
918
+ const blocks = yield* retrieveBlocks(id);
919
+ offset = calcOffsetFromBlocks(blocks);
920
+ } catch (error) {
921
+ // Check if the error is caused by the blob not being found
922
+ if (
923
+ typeof error === "object" &&
924
+ error !== null &&
925
+ "statusCode" in error &&
926
+ error.statusCode === 404
927
+ ) {
928
+ return {
929
+ ...uploadFile,
930
+ offset: uploadFile.size as number,
931
+ size: uploadFile.size,
932
+ metadata: uploadFile.metadata,
933
+ storage: uploadFile.storage,
934
+ };
935
+ }
945
936
 
946
- // Also delete incomplete block if it exists
947
- yield* deleteIncompleteBlock(id);
948
- } catch (error) {
949
- if (
950
- typeof error === "object" &&
951
- error !== null &&
952
- "statusCode" in error &&
953
- error.statusCode === 404
954
- ) {
955
- yield* Effect.logError("No file found").pipe(
937
+ yield* Effect.logError("Error on get upload").pipe(
956
938
  Effect.annotateLogs({
957
939
  upload_id: id,
940
+ error: JSON.stringify(error),
958
941
  }),
959
942
  );
960
- return yield* Effect.fail(UploadistaError.fromCode("FILE_NOT_FOUND"));
943
+ throw error;
961
944
  }
962
- Effect.runSync(
963
- trackAzureError("remove", error, {
964
- upload_id: id,
965
- }),
966
- );
967
- throw error;
968
- }
969
945
 
970
- yield* clearCache(id);
971
- yield* activeUploadsGauge(Effect.succeed(-1));
972
- });
973
- };
946
+ const incompleteBlockSize = yield* getIncompleteBlockSize(id);
974
947
 
975
- const getExpiration = () => {
976
- return expirationPeriodInMilliseconds;
977
- };
948
+ return {
949
+ ...uploadFile,
950
+ offset: offset + (incompleteBlockSize ?? 0),
951
+ size: uploadFile.size,
952
+ storage: uploadFile.storage,
953
+ };
954
+ });
955
+ };
978
956
 
979
- const getExpirationDate = (created_at: string) => {
980
- const date = new Date(created_at);
981
- return new Date(date.getTime() + getExpiration());
982
- };
957
+ const remove = (id: string) => {
958
+ return Effect.gen(function* () {
959
+ try {
960
+ const blobClient = containerClient.getBlockBlobClient(id);
961
+ yield* Effect.promise(() => blobClient.deleteIfExists());
983
962
 
984
- const deleteExpired = (): Effect.Effect<number, UploadistaError> => {
985
- return Effect.tryPromise({
986
- try: async (): Promise<number> => {
987
- if (getExpiration() === 0) {
988
- return 0;
963
+ // Also delete incomplete block if it exists
964
+ yield* deleteIncompleteBlock(id);
965
+ } catch (error) {
966
+ if (
967
+ typeof error === "object" &&
968
+ error !== null &&
969
+ "statusCode" in error &&
970
+ error.statusCode === 404
971
+ ) {
972
+ yield* Effect.logError("No file found").pipe(
973
+ Effect.annotateLogs({
974
+ upload_id: id,
975
+ }),
976
+ );
977
+ return yield* Effect.fail(
978
+ UploadistaError.fromCode("FILE_NOT_FOUND"),
979
+ );
980
+ }
981
+ Effect.runSync(
982
+ trackAzureError("remove", error, {
983
+ upload_id: id,
984
+ }),
985
+ );
986
+ throw error;
989
987
  }
990
988
 
991
- let deleted = 0;
989
+ yield* clearCache(id);
990
+ yield* activeUploadsGauge(Effect.succeed(-1));
991
+ });
992
+ };
992
993
 
993
- const response = containerClient.listBlobsFlat({
994
- includeMetadata: true,
995
- });
994
+ const getExpiration = () => {
995
+ return expirationPeriodInMilliseconds;
996
+ };
996
997
 
997
- const expiredBlobs: string[] = [];
998
+ const getExpirationDate = (created_at: string) => {
999
+ const date = new Date(created_at);
1000
+ return new Date(date.getTime() + getExpiration());
1001
+ };
998
1002
 
999
- for await (const blob of response) {
1000
- if (blob.metadata?.creationDate) {
1001
- const creationDate = new Date(blob.metadata.creationDate);
1002
- if (
1003
- Date.now() >
1004
- getExpirationDate(creationDate.toISOString()).getTime()
1005
- ) {
1006
- expiredBlobs.push(blob.name);
1003
+ const deleteExpired = (): Effect.Effect<number, UploadistaError> => {
1004
+ return Effect.tryPromise({
1005
+ try: async (): Promise<number> => {
1006
+ if (getExpiration() === 0) {
1007
+ return 0;
1008
+ }
1009
+
1010
+ let deleted = 0;
1011
+
1012
+ const response = containerClient.listBlobsFlat({
1013
+ includeMetadata: true,
1014
+ });
1015
+
1016
+ const expiredBlobs: string[] = [];
1017
+
1018
+ for await (const blob of response) {
1019
+ if (blob.metadata?.creationDate) {
1020
+ const creationDate = new Date(blob.metadata.creationDate);
1021
+ if (
1022
+ Date.now() >
1023
+ getExpirationDate(creationDate.toISOString()).getTime()
1024
+ ) {
1025
+ expiredBlobs.push(blob.name);
1026
+ }
1007
1027
  }
1008
1028
  }
1009
- }
1010
1029
 
1011
- // Delete expired blobs
1012
- for (const blobName of expiredBlobs) {
1013
- await containerClient.deleteBlob(blobName);
1014
- deleted++;
1015
- }
1030
+ // Delete expired blobs
1031
+ for (const blobName of expiredBlobs) {
1032
+ await containerClient.deleteBlob(blobName);
1033
+ deleted++;
1034
+ }
1016
1035
 
1017
- return deleted;
1018
- },
1019
- catch: (error) =>
1020
- UploadistaError.fromCode("FILE_WRITE_ERROR", { cause: error as Error }),
1021
- });
1022
- };
1036
+ return deleted;
1037
+ },
1038
+ catch: (error) =>
1039
+ UploadistaError.fromCode("FILE_WRITE_ERROR", {
1040
+ cause: error as Error,
1041
+ }),
1042
+ });
1043
+ };
1023
1044
 
1024
- const getCapabilities = (): DataStoreCapabilities => {
1025
- return {
1026
- supportsParallelUploads: true,
1027
- supportsConcatenation: false, // Azure doesn't have native concatenation like GCS
1028
- supportsDeferredLength: true,
1029
- supportsResumableUploads: true,
1030
- supportsTransactionalUploads: true,
1031
- maxConcurrentUploads: maxConcurrentBlockUploads,
1032
- minChunkSize: minBlockSize,
1033
- maxChunkSize: 4000 * 1024 * 1024, // 4000MB Azure limit
1034
- maxParts: maxBlocks,
1035
- optimalChunkSize: preferredBlockSize,
1036
- requiresOrderedChunks: false,
1037
- requiresMimeTypeValidation: true,
1038
- maxValidationSize: undefined, // no size limit
1045
+ const getCapabilities = (): DataStoreCapabilities => {
1046
+ return {
1047
+ supportsParallelUploads: true,
1048
+ supportsConcatenation: false, // Azure doesn't have native concatenation like GCS
1049
+ supportsDeferredLength: true,
1050
+ supportsResumableUploads: true,
1051
+ supportsTransactionalUploads: true,
1052
+ maxConcurrentUploads: maxConcurrentBlockUploads,
1053
+ minChunkSize: minBlockSize,
1054
+ maxChunkSize: 4000 * 1024 * 1024, // 4000MB Azure limit
1055
+ maxParts: maxBlocks,
1056
+ optimalChunkSize: preferredBlockSize,
1057
+ requiresOrderedChunks: false,
1058
+ requiresMimeTypeValidation: true,
1059
+ maxValidationSize: undefined, // no size limit
1060
+ };
1039
1061
  };
1040
- };
1041
1062
 
1042
- const getChunkerConstraints = () => {
1043
- return {
1044
- minChunkSize: minBlockSize,
1045
- maxChunkSize: 4000 * 1024 * 1024, // 4000MB Azure limit
1046
- optimalChunkSize: preferredBlockSize,
1047
- requiresOrderedChunks: false,
1063
+ const getChunkerConstraints = () => {
1064
+ return {
1065
+ minChunkSize: minBlockSize,
1066
+ maxChunkSize: 4000 * 1024 * 1024, // 4000MB Azure limit
1067
+ optimalChunkSize: preferredBlockSize,
1068
+ requiresOrderedChunks: false,
1069
+ };
1048
1070
  };
1049
- };
1050
1071
 
1051
- const validateUploadStrategy = (
1052
- strategy: UploadStrategy,
1053
- ): Effect.Effect<boolean, never> => {
1054
- const capabilities = getCapabilities();
1055
-
1056
- const result = (() => {
1057
- switch (strategy) {
1058
- case "parallel":
1059
- return capabilities.supportsParallelUploads;
1060
- case "single":
1061
- return true;
1062
- default:
1063
- return false;
1064
- }
1065
- })();
1072
+ const validateUploadStrategy = (
1073
+ strategy: UploadStrategy,
1074
+ ): Effect.Effect<boolean, never> => {
1075
+ const capabilities = getCapabilities();
1076
+
1077
+ const result = (() => {
1078
+ switch (strategy) {
1079
+ case "parallel":
1080
+ return capabilities.supportsParallelUploads;
1081
+ case "single":
1082
+ return true;
1083
+ default:
1084
+ return false;
1085
+ }
1086
+ })();
1066
1087
 
1067
- return Effect.succeed(result);
1068
- };
1088
+ return Effect.succeed(result);
1089
+ };
1069
1090
 
1070
- return {
1071
- bucket: containerName,
1072
- create,
1073
- remove,
1074
- write,
1075
- getUpload,
1076
- read,
1077
- readStream,
1078
- deleteExpired: deleteExpired(),
1079
- getCapabilities,
1080
- getChunkerConstraints,
1081
- validateUploadStrategy,
1082
- };
1091
+ return {
1092
+ bucket: containerName,
1093
+ create,
1094
+ remove,
1095
+ write,
1096
+ getUpload,
1097
+ read,
1098
+ readStream,
1099
+ deleteExpired: deleteExpired(),
1100
+ getCapabilities,
1101
+ getChunkerConstraints,
1102
+ validateUploadStrategy,
1103
+ } as DataStore<UploadFile>;
1104
+ });
1083
1105
  }