@uploadista/data-store-r2 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,955 @@
1
+ import type { ReadableStream } from "@cloudflare/workers-types";
2
+ import { UploadistaError } from "@uploadista/core/errors";
3
+ import type {
4
+ DataStore,
5
+ DataStoreCapabilities,
6
+ DataStoreWriteOptions,
7
+ UploadFile,
8
+ UploadStrategy,
9
+ } from "@uploadista/core/types";
10
+ import { UploadFileKVStore } from "@uploadista/core/types";
11
+ import {
12
+ s3ActiveUploadsGauge as activeUploadsGauge,
13
+ s3FileSizeHistogram as fileSizeHistogram,
14
+ logS3UploadCompletion,
15
+ s3PartSizeHistogram as partSizeHistogram,
16
+ s3PartUploadDurationHistogram as partUploadDurationHistogram,
17
+ s3UploadDurationHistogram as uploadDurationHistogram,
18
+ s3UploadErrorsTotal as uploadErrorsTotal,
19
+ s3UploadPartsTotal as uploadPartsTotal,
20
+ s3UploadRequestsTotal as uploadRequestsTotal,
21
+ s3UploadSuccessTotal as uploadSuccessTotal,
22
+ withS3TimingMetrics as withTimingMetrics,
23
+ withS3UploadMetrics as withUploadMetrics,
24
+ } from "@uploadista/observability";
25
+ import { Effect, Ref, Schedule, Stream } from "effect";
26
+ import { R2ClientLayer, R2ClientService } from "./services/r2-client.service";
27
+ import type { ChunkInfo, R2StoreConfig, R2UploadedPart } from "./types";
28
+ import { calcOffsetFromParts, calcOptimalPartSize } from "./utils";
29
+
30
+ /**
31
+ * Generates an S3 key from an upload file, preserving the file extension if available.
32
+ * Looks for filename in metadata under common keys: 'filename', 'fileName', or 'name'.
33
+ * Falls back to just the upload ID if no filename is found.
34
+ */
35
+ const getS3Key = (uploadFile: UploadFile): string => {
36
+ const { id, metadata } = uploadFile;
37
+
38
+ if (!metadata) {
39
+ return id;
40
+ }
41
+
42
+ // Try common metadata keys for filename
43
+ const filename = metadata.filename || metadata.fileName || metadata.name;
44
+
45
+ if (typeof filename === "string" && filename.includes(".")) {
46
+ const extension = filename.substring(filename.lastIndexOf("."));
47
+ return `${id}${extension}`;
48
+ }
49
+
50
+ return id;
51
+ };
52
+
53
+ // Clean implementation using composed services
54
+ export function createR2Store(config: R2StoreConfig) {
55
+ const {
56
+ deliveryUrl,
57
+ partSize,
58
+ minPartSize = 5_242_880,
59
+ useTags = true,
60
+ maxMultipartParts = 10_000,
61
+ maxConcurrentPartUploads = 60,
62
+ bucket,
63
+ } = config;
64
+
65
+ return Effect.gen(function* () {
66
+ const r2Client = yield* R2ClientService;
67
+ const kvStore = yield* UploadFileKVStore;
68
+ const preferredPartSize = partSize || 8 * 1024 * 1024;
69
+
70
+ const getUploadId = (
71
+ uploadFile: UploadFile,
72
+ ): Effect.Effect<string, UploadistaError> => {
73
+ const uploadId = uploadFile.storage.uploadId;
74
+ if (!uploadId) {
75
+ return Effect.fail(
76
+ UploadistaError.fromCode(
77
+ "FILE_WRITE_ERROR",
78
+ new Error("Upload ID is undefined"),
79
+ ),
80
+ );
81
+ }
82
+ return Effect.succeed(uploadId);
83
+ };
84
+
85
+ const uploadPart = (
86
+ uploadFile: UploadFile,
87
+ data: Uint8Array,
88
+ partNumber: number,
89
+ ) => {
90
+ const s3Key = getS3Key(uploadFile);
91
+
92
+ return withTimingMetrics(
93
+ partUploadDurationHistogram,
94
+ Effect.gen(function* () {
95
+ const uploadId = yield* getUploadId(uploadFile);
96
+
97
+ const etag = yield* r2Client
98
+ .uploadPart({
99
+ bucket: r2Client.bucket,
100
+ key: s3Key,
101
+ uploadId,
102
+ partNumber,
103
+ data,
104
+ })
105
+ .pipe(
106
+ Effect.retry(
107
+ Schedule.exponential("1 second", 2.0).pipe(
108
+ Schedule.intersect(Schedule.recurs(3)),
109
+ ),
110
+ ),
111
+ Effect.tapError((error) =>
112
+ Effect.logWarning("Retrying part upload").pipe(
113
+ Effect.annotateLogs({
114
+ upload_id: uploadFile.id,
115
+ part_number: partNumber,
116
+ error_message: error.message,
117
+ retry_attempt: "unknown", // Will be overridden by the retry schedule
118
+ part_size: data.length,
119
+ s3_bucket: r2Client.bucket,
120
+ }),
121
+ ),
122
+ ),
123
+ );
124
+
125
+ // Store part metadata in KV (R2 doesn't provide listParts API)
126
+ const existingParts = uploadFile.storage.parts || [];
127
+ const updatedParts = [...existingParts, {
128
+ partNumber,
129
+ etag,
130
+ size: data.length,
131
+ }];
132
+
133
+ yield* kvStore.set(uploadFile.id, {
134
+ ...uploadFile,
135
+ storage: {
136
+ ...uploadFile.storage,
137
+ parts: updatedParts,
138
+ },
139
+ });
140
+
141
+ yield* uploadPartsTotal(Effect.succeed(1));
142
+ yield* Effect.logInfo("Part uploaded successfully").pipe(
143
+ Effect.annotateLogs({
144
+ upload_id: uploadFile.id,
145
+ part_number: partNumber,
146
+ part_size: data.length,
147
+ etag: etag,
148
+ }),
149
+ );
150
+
151
+ return etag;
152
+ }),
153
+ ).pipe(
154
+ Effect.withSpan(`s3-upload-part-${partNumber}`, {
155
+ attributes: {
156
+ "upload.id": uploadFile.id,
157
+ "upload.part_number": partNumber,
158
+ "upload.part_size": data.length,
159
+ "s3.bucket": r2Client.bucket,
160
+ "s3.key": s3Key,
161
+ },
162
+ }),
163
+ );
164
+ };
165
+
166
+ const uploadIncompletePart = (id: string, data: Uint8Array) =>
167
+ r2Client.putIncompletePart(id, data);
168
+
169
+ const downloadIncompletePart = (id: string) =>
170
+ Effect.gen(function* () {
171
+ const incompletePart = yield* r2Client.getIncompletePart(id);
172
+
173
+ if (!incompletePart) {
174
+ return undefined;
175
+ }
176
+
177
+ // Read the stream and collect all chunks to calculate size
178
+ const reader = incompletePart.getReader();
179
+ const chunks: Uint8Array[] = [];
180
+ let incompletePartSize = 0;
181
+
182
+ try {
183
+ while (true) {
184
+ const { done, value } = yield* Effect.promise(() => reader.read());
185
+ if (done) break;
186
+ chunks.push(value);
187
+ incompletePartSize += value.length;
188
+ }
189
+ } finally {
190
+ reader.releaseLock();
191
+ }
192
+
193
+ const stream = Stream.fromIterable(chunks);
194
+
195
+ return {
196
+ size: incompletePartSize,
197
+ stream,
198
+ };
199
+ });
200
+
201
+ const deleteIncompletePart = (id: string) =>
202
+ r2Client.deleteIncompletePart(id);
203
+
204
+ const getIncompletePartSize = (id: string) =>
205
+ r2Client.getIncompletePartSize(id);
206
+
207
+ const complete = (uploadFile: UploadFile, parts: Array<R2UploadedPart>) => {
208
+ const s3Key = getS3Key(uploadFile);
209
+
210
+ return Effect.gen(function* () {
211
+ const uploadId = yield* getUploadId(uploadFile);
212
+
213
+ return yield* r2Client.completeMultipartUpload(
214
+ {
215
+ bucket: r2Client.bucket,
216
+ key: s3Key,
217
+ uploadId,
218
+ },
219
+ parts,
220
+ );
221
+ }).pipe(
222
+ Effect.tap(() => uploadSuccessTotal(Effect.succeed(1))),
223
+ Effect.withSpan("s3-complete-multipart-upload", {
224
+ attributes: {
225
+ "upload.id": uploadFile.id,
226
+ "upload.parts_count": parts.length,
227
+ "s3.bucket": r2Client.bucket,
228
+ "s3.key": s3Key,
229
+ },
230
+ }),
231
+ );
232
+ };
233
+
234
+ const abort = (uploadFile: UploadFile) => {
235
+ const s3Key = getS3Key(uploadFile);
236
+
237
+ return Effect.gen(function* () {
238
+ const uploadId = yield* getUploadId(uploadFile);
239
+
240
+ yield* r2Client.abortMultipartUpload({
241
+ bucket: r2Client.bucket,
242
+ key: s3Key,
243
+ uploadId,
244
+ });
245
+
246
+ yield* r2Client.deleteObjects([s3Key]);
247
+ });
248
+ };
249
+
250
+ const retrieveParts = (id: string) =>
251
+ Effect.gen(function* () {
252
+ const metadata = yield* kvStore.get(id);
253
+
254
+ // R2 doesn't have a listParts API, so we retrieve parts from KV store
255
+ const parts = metadata.storage.parts || [];
256
+
257
+ // Convert to R2UploadedPart format for compatibility
258
+ const r2Parts: R2UploadedPart[] = parts.map((part) => ({
259
+ partNumber: part.partNumber,
260
+ etag: part.etag,
261
+ size: part.size,
262
+ }));
263
+
264
+ // Sort parts by part number to ensure correct order
265
+ r2Parts.sort((a, b) => (a.partNumber ?? 0) - (b.partNumber ?? 0));
266
+
267
+ return { uploadFound: true, parts: r2Parts };
268
+ });
269
+
270
+ const completeMetadata = (upload: UploadFile, useTags: boolean) =>
271
+ Effect.gen(function* () {
272
+ if (!useTags) {
273
+ return 0;
274
+ }
275
+
276
+ const uploadFile = yield* kvStore.get(upload.id);
277
+ const uploadId = uploadFile.storage.uploadId;
278
+ if (!uploadId) {
279
+ return 0;
280
+ }
281
+
282
+ yield* kvStore.set(upload.id, {
283
+ ...uploadFile,
284
+ storage: { ...uploadFile.storage, uploadId },
285
+ });
286
+
287
+ return 0;
288
+ });
289
+
290
+ const clearCache = (id: string) =>
291
+ Effect.gen(function* () {
292
+ yield* Effect.logInfo("Clearing cache").pipe(
293
+ Effect.annotateLogs({ upload_id: id }),
294
+ );
295
+ yield* kvStore.delete(id);
296
+ });
297
+
298
+ const createMultipartUpload = (upload: UploadFile) => {
299
+ const s3Key = getS3Key(upload);
300
+
301
+ return Effect.gen(function* () {
302
+ yield* Effect.logInfo("Initializing multipart upload").pipe(
303
+ Effect.annotateLogs({ upload_id: upload.id }),
304
+ );
305
+
306
+ const multipartInfo = yield* r2Client.createMultipartUpload({
307
+ bucket: r2Client.bucket,
308
+ key: s3Key,
309
+ uploadId: "", // Not needed for create
310
+ contentType: upload.metadata?.contentType?.toString(),
311
+ cacheControl: upload.metadata?.cacheControl?.toString(),
312
+ });
313
+
314
+ const uploadCreated = {
315
+ ...upload,
316
+ storage: {
317
+ ...upload.storage,
318
+ path: multipartInfo.key,
319
+ uploadId: multipartInfo.uploadId,
320
+ bucket: multipartInfo.bucket,
321
+ },
322
+ url: `${deliveryUrl}/${s3Key}`,
323
+ };
324
+
325
+ yield* kvStore.set(upload.id, uploadCreated);
326
+
327
+ yield* Effect.logInfo("Multipart upload created").pipe(
328
+ Effect.annotateLogs({
329
+ upload_id: upload.id,
330
+ s3_upload_id: uploadCreated.storage.uploadId,
331
+ s3_key: s3Key,
332
+ }),
333
+ );
334
+
335
+ yield* uploadRequestsTotal(Effect.succeed(1));
336
+ yield* fileSizeHistogram(Effect.succeed(upload.size || 0));
337
+
338
+ return uploadCreated;
339
+ }).pipe(
340
+ Effect.withSpan("s3-create-upload", {
341
+ attributes: {
342
+ "upload.id": upload.id,
343
+ "upload.size": upload.size || 0,
344
+ "s3.bucket": r2Client.bucket,
345
+ "s3.key": s3Key,
346
+ },
347
+ }),
348
+ );
349
+ };
350
+
351
+ /**
352
+ * Creates a multipart upload on S3 attaching any metadata to it.
353
+ * Also, a `${file_id}.info` file is created which holds some information
354
+ * about the upload itself like: `upload-id`, `upload-length`, etc.
355
+ */
356
+ const create = (upload: UploadFile) => {
357
+ return Effect.gen(function* () {
358
+ yield* Effect.logInfo("Initializing multipart upload").pipe(
359
+ Effect.annotateLogs({ upload_id: upload.id }),
360
+ );
361
+ const uploadCreated = yield* createMultipartUpload(upload);
362
+ yield* kvStore.set(upload.id, uploadCreated);
363
+ yield* Effect.logInfo("Multipart upload created").pipe(
364
+ Effect.annotateLogs({
365
+ upload_id: upload.id,
366
+ s3_upload_id: uploadCreated.storage.uploadId,
367
+ }),
368
+ );
369
+ yield* uploadRequestsTotal(Effect.succeed(1));
370
+
371
+ return uploadCreated;
372
+ }).pipe(
373
+ Effect.withSpan("s3-create-upload", {
374
+ attributes: {
375
+ "upload.id": upload.id,
376
+ "upload.size": upload.size || 0,
377
+ "s3.bucket": bucket,
378
+ },
379
+ }),
380
+ );
381
+ };
382
+
383
+ const remove = (id: string) =>
384
+ Effect.gen(function* () {
385
+ const uploadFile = yield* kvStore.get(id);
386
+ yield* abort(uploadFile);
387
+ yield* clearCache(id);
388
+ });
389
+
390
+ const write = (
391
+ options: DataStoreWriteOptions,
392
+ dependencies: { onProgress?: (currentOffset: number) => void },
393
+ ) =>
394
+ withUploadMetrics(
395
+ options.file_id,
396
+ withTimingMetrics(
397
+ uploadDurationHistogram,
398
+ Effect.gen(function* () {
399
+ const {
400
+ stream: initialData,
401
+ file_id,
402
+ offset: initialOffset,
403
+ } = options;
404
+ const { onProgress } = dependencies;
405
+
406
+ // Capture start time for upload completion metrics
407
+ const startTime = Date.now();
408
+
409
+ // Track active upload
410
+ yield* activeUploadsGauge(Effect.succeed(1));
411
+
412
+ const prepareResult = yield* prepareUpload(
413
+ file_id,
414
+ initialOffset,
415
+ initialData,
416
+ );
417
+
418
+ const {
419
+ uploadFile,
420
+ nextPartNumber,
421
+ offset,
422
+ data,
423
+ existingPartSize,
424
+ } = prepareResult;
425
+
426
+ // Use existing part size if parts already exist, otherwise calculate optimal size
427
+ const uploadPartSize =
428
+ existingPartSize ||
429
+ calcOptimalPartSize(
430
+ uploadFile.size,
431
+ preferredPartSize,
432
+ minPartSize,
433
+ maxMultipartParts,
434
+ );
435
+
436
+ // Log part size decision for debugging
437
+ yield* Effect.logInfo("Part size decision").pipe(
438
+ Effect.annotateLogs({
439
+ upload_id: file_id,
440
+ existing_part_size: existingPartSize,
441
+ calculated_part_size: calcOptimalPartSize(
442
+ uploadFile.size,
443
+ preferredPartSize,
444
+ minPartSize,
445
+ maxMultipartParts,
446
+ ),
447
+ final_part_size: uploadPartSize,
448
+ next_part_number: nextPartNumber,
449
+ }),
450
+ );
451
+
452
+ const bytesUploaded = yield* uploadParts(
453
+ uploadFile,
454
+ data,
455
+ nextPartNumber,
456
+ offset,
457
+ uploadPartSize,
458
+ minPartSize,
459
+ maxConcurrentPartUploads,
460
+ onProgress,
461
+ );
462
+
463
+ const newOffset = offset + bytesUploaded;
464
+
465
+ if (newOffset > maxConcurrentPartUploads)
466
+ if (uploadFile.size === newOffset) {
467
+ yield* finishUpload(file_id, uploadFile, startTime);
468
+ }
469
+
470
+ return newOffset;
471
+ }).pipe(Effect.ensuring(activeUploadsGauge(Effect.succeed(0)))),
472
+ ),
473
+ );
474
+
475
+ const getUpload = (id: string) =>
476
+ Effect.gen(function* () {
477
+ const uploadFile = yield* kvStore.get(id);
478
+
479
+ const { parts, uploadFound } = yield* retrieveParts(id);
480
+ if (!uploadFound) {
481
+ return {
482
+ ...uploadFile,
483
+ offset: uploadFile.size as number,
484
+ size: uploadFile.size,
485
+ };
486
+ }
487
+
488
+ const offset = calcOffsetFromParts(parts);
489
+ const incompletePartSize = yield* getIncompletePartSize(id);
490
+
491
+ return {
492
+ ...uploadFile,
493
+ offset: offset + (incompletePartSize ?? 0),
494
+ size: uploadFile.size,
495
+ storage: uploadFile.storage,
496
+ };
497
+ });
498
+
499
+ // const read = (id: string) =>
500
+ // Effect.gen(function* () {
501
+ // return yield* r2Client.getObject(id);
502
+ // });
503
+
504
+ // Helper functions
505
+ const prepareUpload = (
506
+ fileId: string,
507
+ initialOffset: number,
508
+ initialData: Stream.Stream<Uint8Array, UploadistaError>,
509
+ ) =>
510
+ Effect.gen(function* () {
511
+ const uploadFile = yield* kvStore.get(fileId);
512
+
513
+ // Get parts from storage metadata (tracked locally)
514
+ const parts = uploadFile.storage.parts || [];
515
+
516
+ const partNumber: number =
517
+ parts.length > 0 && parts[parts.length - 1].partNumber
518
+ ? parts[parts.length - 1].partNumber
519
+ : 0;
520
+ const nextPartNumber = partNumber + 1;
521
+
522
+ // Detect existing part size to maintain consistency
523
+ // We check the first part's size to ensure all subsequent parts match
524
+ const existingPartSize =
525
+ parts.length > 0 && parts[0].size ? parts[0].size : null;
526
+
527
+ // Validate that all existing parts (except potentially the last one) have the same size
528
+ if (existingPartSize && parts.length > 1) {
529
+ const inconsistentPart = parts
530
+ .slice(0, -1)
531
+ .find((part) => part.size !== existingPartSize);
532
+ if (inconsistentPart) {
533
+ yield* Effect.logWarning(
534
+ "Inconsistent part sizes detected in existing upload",
535
+ ).pipe(
536
+ Effect.annotateLogs({
537
+ upload_id: fileId,
538
+ expected_size: existingPartSize,
539
+ inconsistent_part: inconsistentPart.partNumber,
540
+ inconsistent_size: inconsistentPart.size,
541
+ }),
542
+ );
543
+ }
544
+ }
545
+
546
+ const incompletePart = yield* downloadIncompletePart(fileId);
547
+
548
+ if (incompletePart) {
549
+ yield* deleteIncompletePart(fileId);
550
+ const offset = initialOffset - incompletePart.size;
551
+ const data = incompletePart.stream.pipe(Stream.concat(initialData));
552
+ return {
553
+ uploadFile,
554
+ nextPartNumber,
555
+ offset,
556
+ incompletePartSize: incompletePart.size,
557
+ data,
558
+ existingPartSize,
559
+ };
560
+ } else {
561
+ return {
562
+ uploadFile,
563
+ nextPartNumber,
564
+ offset: initialOffset,
565
+ incompletePartSize: 0,
566
+ data: initialData,
567
+ existingPartSize,
568
+ };
569
+ }
570
+ });
571
+
572
+ const finishUpload = (
573
+ fileId: string,
574
+ uploadFile: UploadFile,
575
+ startTime: number,
576
+ ) =>
577
+ Effect.gen(function* () {
578
+ const { parts } = yield* retrieveParts(fileId);
579
+
580
+ // Log all parts for debugging S3 multipart upload requirements
581
+ yield* Effect.logInfo("Attempting to complete multipart upload").pipe(
582
+ Effect.annotateLogs({
583
+ upload_id: fileId,
584
+ parts_count: parts.length,
585
+ parts_info: parts.map((part, index) => ({
586
+ part_number: part.partNumber,
587
+ size: part.size,
588
+ etag: part.etag,
589
+ is_final_part: index === parts.length - 1,
590
+ })),
591
+ }),
592
+ );
593
+
594
+ yield* complete(uploadFile, parts);
595
+ yield* completeMetadata(uploadFile, useTags);
596
+ // yield* clearCache(fileId);
597
+
598
+ // Log upload completion metrics
599
+ const endTime = Date.now();
600
+ const totalDurationMs = endTime - startTime;
601
+ const fileSize = uploadFile.size || 0;
602
+ const throughputBps =
603
+ totalDurationMs > 0 ? (fileSize * 1000) / totalDurationMs : 0;
604
+
605
+ // Calculate average part size if we have parts
606
+ const averagePartSize =
607
+ parts.length > 0
608
+ ? parts.reduce((sum, part) => sum + (part.size || 0), 0) /
609
+ parts.length
610
+ : undefined;
611
+
612
+ yield* logS3UploadCompletion(fileId, {
613
+ fileSize,
614
+ totalDurationMs,
615
+ partsCount: parts.length,
616
+ averagePartSize,
617
+ throughputBps,
618
+ });
619
+ }).pipe(
620
+ Effect.tapError((error) =>
621
+ Effect.gen(function* () {
622
+ yield* uploadErrorsTotal(Effect.succeed(1));
623
+ yield* Effect.logError("Failed to finish upload").pipe(
624
+ Effect.annotateLogs({
625
+ upload_id: fileId,
626
+ error: String(error),
627
+ }),
628
+ );
629
+ }),
630
+ ),
631
+ );
632
+
633
+ // Note: R2 does not provide a listMultipartUploads API
634
+ // Use R2's native lifecycle rules or Cloudflare Workers Cron for cleanup
635
+ // See: https://developers.cloudflare.com/r2/buckets/object-lifecycles/
636
+ const deleteExpired = Effect.gen(function* () {
637
+ yield* Effect.logWarning(
638
+ "R2 does not support automatic expired upload deletion via API. Please use R2 lifecycle rules instead.",
639
+ ).pipe(
640
+ Effect.annotateLogs({
641
+ bucket: r2Client.bucket,
642
+ }),
643
+ );
644
+ return 0;
645
+ });
646
+
647
+ // Proper single-pass chunking using Effect's async stream constructor
648
+ // Ensures all parts except the final part are exactly the same size (S3 requirement)
649
+ const createChunkedStream =
650
+ (chunkSize: number) =>
651
+ <E>(
652
+ stream: Stream.Stream<Uint8Array, E>,
653
+ ): Stream.Stream<ChunkInfo, E> => {
654
+ return Stream.async<ChunkInfo, E>((emit) => {
655
+ let buffer = new Uint8Array(0);
656
+ let partNumber = 1;
657
+ let totalBytesProcessed = 0;
658
+
659
+ const emitChunk = (data: Uint8Array, isFinalChunk = false) => {
660
+ // Log chunk information for debugging - use INFO level to see in logs
661
+ Effect.runSync(
662
+ Effect.logInfo("Creating chunk").pipe(
663
+ Effect.annotateLogs({
664
+ part_number: partNumber,
665
+ chunk_size: data.length,
666
+ expected_size: chunkSize,
667
+ is_final_chunk: isFinalChunk,
668
+ total_bytes_processed: totalBytesProcessed + data.length,
669
+ }),
670
+ ),
671
+ );
672
+ emit.single({
673
+ partNumber: partNumber++,
674
+ data,
675
+ size: data.length,
676
+ });
677
+ };
678
+
679
+ const processChunk = (newData: Uint8Array) => {
680
+ // Combine buffer with new data
681
+ const combined = new Uint8Array(buffer.length + newData.length);
682
+ combined.set(buffer);
683
+ combined.set(newData, buffer.length);
684
+ buffer = combined;
685
+ totalBytesProcessed += newData.length;
686
+
687
+ // Emit full chunks of exactly chunkSize bytes
688
+ // This ensures S3 multipart upload rule: all parts except last must be same size
689
+ while (buffer.length >= chunkSize) {
690
+ const chunk = buffer.slice(0, chunkSize);
691
+ buffer = buffer.slice(chunkSize);
692
+ emitChunk(chunk, false);
693
+ }
694
+ };
695
+
696
+ // Process the stream
697
+ Effect.runFork(
698
+ stream.pipe(
699
+ Stream.runForEach((chunk) =>
700
+ Effect.sync(() => processChunk(chunk)),
701
+ ),
702
+ Effect.andThen(() =>
703
+ Effect.sync(() => {
704
+ // Emit final chunk if there's remaining data
705
+ // The final chunk can be any size < chunkSize (S3 allows this)
706
+ if (buffer.length > 0) {
707
+ emitChunk(buffer, true);
708
+ }
709
+ emit.end();
710
+ }),
711
+ ),
712
+ Effect.catchAll((error) => Effect.sync(() => emit.fail(error))),
713
+ ),
714
+ );
715
+ });
716
+ };
717
+
718
+ // Byte-level progress tracking during streaming
719
+ // This provides smooth, immediate progress feedback by tracking bytes as they
720
+ // flow through the stream, before they reach S3. This solves the issue where
721
+ // small files (< 5MB) would jump from 0% to 100% instantly.
722
+ const withByteProgressTracking =
723
+ (onProgress?: (totalBytes: number) => void, initialOffset = 0) =>
724
+ <E, R>(stream: Stream.Stream<Uint8Array, E, R>) => {
725
+ if (!onProgress) return stream;
726
+
727
+ return Effect.gen(function* () {
728
+ const totalBytesProcessedRef = yield* Ref.make(initialOffset);
729
+
730
+ return stream.pipe(
731
+ Stream.tap((chunk) =>
732
+ Effect.gen(function* () {
733
+ const newTotal = yield* Ref.updateAndGet(
734
+ totalBytesProcessedRef,
735
+ (total) => total + chunk.length,
736
+ );
737
+ onProgress(newTotal);
738
+ }),
739
+ ),
740
+ );
741
+ }).pipe(Stream.unwrap);
742
+ };
743
+
744
+ const uploadParts = (
745
+ uploadFile: UploadFile,
746
+ readStream: Stream.Stream<Uint8Array, UploadistaError>,
747
+ initCurrentPartNumber: number,
748
+ initOffset: number,
749
+ uploadPartSize: number,
750
+ minPartSize: number,
751
+ maxConcurrentPartUploads: number,
752
+ onProgress?: (newOffset: number) => void,
753
+ ) =>
754
+ Effect.gen(function* () {
755
+ yield* Effect.logInfo("Starting part uploads").pipe(
756
+ Effect.annotateLogs({
757
+ upload_id: uploadFile.id,
758
+ init_offset: initOffset,
759
+ file_size: uploadFile.size,
760
+ part_size: uploadPartSize,
761
+ min_part_size: minPartSize,
762
+ }),
763
+ );
764
+
765
+ // Enhanced Progress Tracking Strategy:
766
+ // 1. Byte-level progress during streaming - provides immediate, smooth feedback
767
+ // as data flows through the pipeline (even for small files)
768
+ // 2. This tracks progress BEFORE S3 upload, giving users immediate feedback
769
+ // 3. For large files with multiple parts, this provides granular updates
770
+ // 4. For small files (single part), this prevents 0%->100% jumps
771
+ const chunkStream = readStream.pipe(
772
+ // Add byte-level progress tracking during streaming (immediate feedback)
773
+ withByteProgressTracking(onProgress, initOffset),
774
+ // Create chunks for S3 multipart upload with uniform part sizes
775
+ createChunkedStream(uploadPartSize),
776
+ );
777
+
778
+ // Track cumulative offset and total bytes with Effect Refs
779
+ const cumulativeOffsetRef = yield* Ref.make(initOffset);
780
+ const totalBytesUploadedRef = yield* Ref.make(0);
781
+
782
+ // Create a chunk upload function for the sink
783
+ const uploadChunk = (chunkInfo: ChunkInfo) =>
784
+ Effect.gen(function* () {
785
+ // Calculate cumulative bytes to determine if this is the final part
786
+ const cumulativeOffset = yield* Ref.updateAndGet(
787
+ cumulativeOffsetRef,
788
+ (offset) => offset + chunkInfo.size,
789
+ );
790
+ const isFinalPart = cumulativeOffset >= (uploadFile.size || 0);
791
+
792
+ yield* Effect.logDebug("Processing chunk").pipe(
793
+ Effect.annotateLogs({
794
+ upload_id: uploadFile.id,
795
+ cumulative_offset: cumulativeOffset,
796
+ file_size: uploadFile.size,
797
+ chunk_size: chunkInfo.size,
798
+ is_final_part: isFinalPart,
799
+ }),
800
+ );
801
+
802
+ const actualPartNumber =
803
+ initCurrentPartNumber + chunkInfo.partNumber - 1;
804
+
805
+ if (chunkInfo.size > uploadPartSize) {
806
+ yield* Effect.fail(
807
+ UploadistaError.fromCode(
808
+ "FILE_WRITE_ERROR",
809
+ new Error(
810
+ `Part size ${chunkInfo.size} exceeds upload part size ${uploadPartSize}`,
811
+ ),
812
+ ),
813
+ );
814
+ }
815
+
816
+ // For parts that meet the minimum part size (5MB) or are the final part,
817
+ // upload them as regular multipart parts
818
+ if (chunkInfo.size >= minPartSize || isFinalPart) {
819
+ yield* Effect.logDebug("Uploading multipart chunk").pipe(
820
+ Effect.annotateLogs({
821
+ upload_id: uploadFile.id,
822
+ part_number: actualPartNumber,
823
+ chunk_size: chunkInfo.size,
824
+ min_part_size: minPartSize,
825
+ is_final_part: isFinalPart,
826
+ }),
827
+ );
828
+ yield* uploadPart(uploadFile, chunkInfo.data, actualPartNumber);
829
+ yield* partSizeHistogram(Effect.succeed(chunkInfo.size));
830
+ } else {
831
+ // Only upload as incomplete part if it's smaller than minimum and not final
832
+ yield* uploadIncompletePart(uploadFile.id, chunkInfo.data);
833
+ }
834
+
835
+ yield* Ref.update(
836
+ totalBytesUploadedRef,
837
+ (total) => total + chunkInfo.size,
838
+ );
839
+
840
+ // Note: Byte-level progress is now tracked during streaming phase
841
+ // This ensures smooth progress updates regardless of part size
842
+ // S3 upload completion is tracked via totalBytesUploadedRef for accuracy
843
+ });
844
+
845
+ // Process chunks concurrently with controlled concurrency
846
+ yield* chunkStream.pipe(
847
+ Stream.runForEach((chunkInfo) => uploadChunk(chunkInfo)),
848
+ Effect.withConcurrency(maxConcurrentPartUploads),
849
+ );
850
+
851
+ return yield* Ref.get(totalBytesUploadedRef);
852
+ });
853
+
854
+ const getCapabilities = (): DataStoreCapabilities => ({
855
+ supportsParallelUploads: true,
856
+ supportsConcatenation: true,
857
+ supportsDeferredLength: true,
858
+ supportsResumableUploads: true,
859
+ supportsTransactionalUploads: true,
860
+ maxConcurrentUploads: maxConcurrentPartUploads,
861
+ minChunkSize: minPartSize,
862
+ maxChunkSize: 5_368_709_120, // 5GiB S3 limit
863
+ maxParts: maxMultipartParts,
864
+ optimalChunkSize: preferredPartSize,
865
+ requiresOrderedChunks: false,
866
+ requiresMimeTypeValidation: true,
867
+ maxValidationSize: undefined, // no size limit
868
+ });
869
+
870
+ const getChunkerConstraints = () => ({
871
+ minChunkSize: minPartSize,
872
+ maxChunkSize: 5_368_709_120, // 5GiB S3 limit
873
+ optimalChunkSize: preferredPartSize,
874
+ requiresOrderedChunks: false,
875
+ });
876
+
877
+ const validateUploadStrategy = (
878
+ strategy: UploadStrategy,
879
+ ): Effect.Effect<boolean, never> => {
880
+ const capabilities = getCapabilities();
881
+ const result = (() => {
882
+ switch (strategy) {
883
+ case "parallel":
884
+ return capabilities.supportsParallelUploads;
885
+ case "single":
886
+ return true;
887
+ default:
888
+ return false;
889
+ }
890
+ })();
891
+ return Effect.succeed(result);
892
+ };
893
+
894
+ const concatArrayBuffers = (chunks: Uint8Array[]): Uint8Array => {
895
+ const result = new Uint8Array(chunks.reduce((a, c) => a + c.length, 0));
896
+ let offset = 0;
897
+ for (const chunk of chunks) {
898
+ result.set(chunk, offset);
899
+ offset += chunk.length;
900
+ }
901
+ return result;
902
+ };
903
+
904
+ const streamToArray = async (
905
+ stream: ReadableStream<Uint8Array>,
906
+ ): Promise<Uint8Array> => {
907
+ const reader = stream.getReader();
908
+ const chunks: Uint8Array[] = [];
909
+ while (true) {
910
+ const { done, value } = await reader.read();
911
+ if (done) break;
912
+ chunks.push(value);
913
+ }
914
+ return concatArrayBuffers(chunks);
915
+ };
916
+
917
+ const read = (id: string) =>
918
+ Effect.gen(function* () {
919
+ const upload = yield* kvStore.get(id);
920
+
921
+ if (!upload.id) {
922
+ return yield* Effect.fail(
923
+ UploadistaError.fromCode(
924
+ "FILE_READ_ERROR",
925
+ new Error("Upload Key is undefined"),
926
+ ),
927
+ );
928
+ }
929
+ const s3Key = getS3Key(upload);
930
+ const stream = yield* r2Client.getObject(s3Key);
931
+ return yield* Effect.promise(() => streamToArray(stream));
932
+ });
933
+
934
+ return {
935
+ bucket,
936
+ create,
937
+ remove,
938
+ write,
939
+ getUpload,
940
+ read,
941
+ deleteExpired,
942
+ getCapabilities,
943
+ getChunkerConstraints,
944
+ validateUploadStrategy,
945
+ } as DataStore<UploadFile>;
946
+ });
947
+ }
948
+
949
+ // Effect-based factory that uses services
950
+ export const r2Store = (options: R2StoreConfig) => {
951
+ const { r2Bucket, bucket } = options;
952
+ return createR2Store(options).pipe(
953
+ Effect.provide(R2ClientLayer(r2Bucket, bucket)),
954
+ );
955
+ };