@uploadista/data-store-s3 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/.turbo/turbo-build.log +5 -0
  2. package/.turbo/turbo-check.log +5 -0
  3. package/LICENSE +21 -0
  4. package/README.md +588 -0
  5. package/dist/index.d.ts +2 -0
  6. package/dist/index.d.ts.map +1 -0
  7. package/dist/index.js +1 -0
  8. package/dist/observability.d.ts +45 -0
  9. package/dist/observability.d.ts.map +1 -0
  10. package/dist/observability.js +155 -0
  11. package/dist/s3-store-old.d.ts +51 -0
  12. package/dist/s3-store-old.d.ts.map +1 -0
  13. package/dist/s3-store-old.js +765 -0
  14. package/dist/s3-store.d.ts +9 -0
  15. package/dist/s3-store.d.ts.map +1 -0
  16. package/dist/s3-store.js +666 -0
  17. package/dist/services/__mocks__/s3-client-mock.service.d.ts +44 -0
  18. package/dist/services/__mocks__/s3-client-mock.service.d.ts.map +1 -0
  19. package/dist/services/__mocks__/s3-client-mock.service.js +379 -0
  20. package/dist/services/index.d.ts +2 -0
  21. package/dist/services/index.d.ts.map +1 -0
  22. package/dist/services/index.js +1 -0
  23. package/dist/services/s3-client.service.d.ts +68 -0
  24. package/dist/services/s3-client.service.d.ts.map +1 -0
  25. package/dist/services/s3-client.service.js +209 -0
  26. package/dist/test-observability.d.ts +6 -0
  27. package/dist/test-observability.d.ts.map +1 -0
  28. package/dist/test-observability.js +62 -0
  29. package/dist/types.d.ts +81 -0
  30. package/dist/types.d.ts.map +1 -0
  31. package/dist/types.js +1 -0
  32. package/dist/utils/calculations.d.ts +7 -0
  33. package/dist/utils/calculations.d.ts.map +1 -0
  34. package/dist/utils/calculations.js +41 -0
  35. package/dist/utils/error-handling.d.ts +7 -0
  36. package/dist/utils/error-handling.d.ts.map +1 -0
  37. package/dist/utils/error-handling.js +29 -0
  38. package/dist/utils/index.d.ts +4 -0
  39. package/dist/utils/index.d.ts.map +1 -0
  40. package/dist/utils/index.js +3 -0
  41. package/dist/utils/stream-adapter.d.ts +14 -0
  42. package/dist/utils/stream-adapter.d.ts.map +1 -0
  43. package/dist/utils/stream-adapter.js +41 -0
  44. package/package.json +36 -0
  45. package/src/__tests__/integration/s3-store.integration.test.ts +548 -0
  46. package/src/__tests__/multipart-logic.test.ts +395 -0
  47. package/src/__tests__/s3-store.edge-cases.test.ts +681 -0
  48. package/src/__tests__/s3-store.performance.test.ts +622 -0
  49. package/src/__tests__/s3-store.test.ts +662 -0
  50. package/src/__tests__/utils/performance-helpers.ts +459 -0
  51. package/src/__tests__/utils/test-data-generator.ts +331 -0
  52. package/src/__tests__/utils/test-setup.ts +256 -0
  53. package/src/index.ts +1 -0
  54. package/src/s3-store.ts +1059 -0
  55. package/src/services/__mocks__/s3-client-mock.service.ts +604 -0
  56. package/src/services/index.ts +1 -0
  57. package/src/services/s3-client.service.ts +359 -0
  58. package/src/types.ts +96 -0
  59. package/src/utils/calculations.ts +61 -0
  60. package/src/utils/error-handling.ts +52 -0
  61. package/src/utils/index.ts +3 -0
  62. package/src/utils/stream-adapter.ts +50 -0
  63. package/tsconfig.json +19 -0
  64. package/tsconfig.tsbuildinfo +1 -0
  65. package/vitest.config.ts +15 -0
@@ -0,0 +1,1059 @@
1
+ import type AWS from "@aws-sdk/client-s3";
2
+ import { UploadistaError } from "@uploadista/core/errors";
3
+ import type {
4
+ DataStore,
5
+ DataStoreCapabilities,
6
+ DataStoreWriteOptions,
7
+ UploadFile,
8
+ UploadStrategy,
9
+ } from "@uploadista/core/types";
10
+ import { UploadFileKVStore } from "@uploadista/core/types";
11
+ import {
12
+ s3ActiveUploadsGauge as activeUploadsGauge,
13
+ s3FileSizeHistogram as fileSizeHistogram,
14
+ logS3UploadCompletion,
15
+ s3PartSizeHistogram as partSizeHistogram,
16
+ s3PartUploadDurationHistogram as partUploadDurationHistogram,
17
+ s3UploadDurationHistogram as uploadDurationHistogram,
18
+ s3UploadErrorsTotal as uploadErrorsTotal,
19
+ s3UploadPartsTotal as uploadPartsTotal,
20
+ s3UploadRequestsTotal as uploadRequestsTotal,
21
+ s3UploadSuccessTotal as uploadSuccessTotal,
22
+ withS3TimingMetrics as withTimingMetrics,
23
+ withS3UploadMetrics as withUploadMetrics,
24
+ } from "@uploadista/observability";
25
+ import { Effect, Ref, Schedule, Stream } from "effect";
26
+ import { S3ClientLayer, S3ClientService } from "./services/s3-client.service";
27
+ import type { ChunkInfo, S3StoreConfig } from "./types";
28
+ import {
29
+ calcOffsetFromParts,
30
+ calcOptimalPartSize,
31
+ getExpirationDate,
32
+ isUploadNotFoundError,
33
+ } from "./utils";
34
+
35
+ /**
36
+ * Generates an S3 key from an upload file, preserving the file extension if available.
37
+ * Looks for filename in metadata under common keys: 'filename', 'fileName', or 'name'.
38
+ * Falls back to just the upload ID if no filename is found.
39
+ */
40
+ const getS3Key = (uploadFile: UploadFile): string => {
41
+ const { id, metadata } = uploadFile;
42
+
43
+ if (!metadata) {
44
+ return id;
45
+ }
46
+
47
+ // Try common metadata keys for filename
48
+ const filename = metadata.filename || metadata.fileName || metadata.name;
49
+
50
+ if (typeof filename === "string" && filename.includes(".")) {
51
+ const extension = filename.substring(filename.lastIndexOf("."));
52
+ return `${id}${extension}`;
53
+ }
54
+
55
+ return id;
56
+ };
57
+
58
+ // Clean implementation using composed services
59
+ export function createS3StoreImplementation(config: S3StoreConfig) {
60
+ const {
61
+ deliveryUrl,
62
+ partSize,
63
+ minPartSize = 5_242_880,
64
+ useTags = true,
65
+ maxMultipartParts = 10_000,
66
+ kvStore,
67
+ maxConcurrentPartUploads = 60,
68
+ expirationPeriodInMilliseconds = 1000 * 60 * 60 * 24 * 7, // 1 week
69
+ s3ClientConfig: { bucket },
70
+ } = config;
71
+
72
+ return Effect.gen(function* () {
73
+ const s3Client = yield* S3ClientService;
74
+
75
+ const preferredPartSize = partSize || 8 * 1024 * 1024;
76
+
77
+ const getUploadId = (
78
+ uploadFile: UploadFile,
79
+ ): Effect.Effect<string, UploadistaError> => {
80
+ const uploadId = uploadFile.storage.uploadId;
81
+ if (!uploadId) {
82
+ return Effect.fail(
83
+ UploadistaError.fromCode(
84
+ "FILE_WRITE_ERROR",
85
+ new Error("Upload ID is undefined"),
86
+ ),
87
+ );
88
+ }
89
+ return Effect.succeed(uploadId);
90
+ };
91
+
92
+ const uploadPart = (
93
+ uploadFile: UploadFile,
94
+ data: Uint8Array,
95
+ partNumber: number,
96
+ ) => {
97
+ const s3Key = getS3Key(uploadFile);
98
+
99
+ return withTimingMetrics(
100
+ partUploadDurationHistogram,
101
+ Effect.gen(function* () {
102
+ const uploadId = yield* getUploadId(uploadFile);
103
+
104
+ const etag = yield* s3Client
105
+ .uploadPart({
106
+ bucket: s3Client.bucket,
107
+ key: s3Key,
108
+ uploadId,
109
+ partNumber,
110
+ data,
111
+ })
112
+ .pipe(
113
+ Effect.retry(
114
+ Schedule.exponential("1 second", 2.0).pipe(
115
+ Schedule.intersect(Schedule.recurs(3)),
116
+ ),
117
+ ),
118
+ Effect.tapError((error) =>
119
+ Effect.logWarning("Retrying part upload").pipe(
120
+ Effect.annotateLogs({
121
+ upload_id: uploadFile.id,
122
+ part_number: partNumber,
123
+ error_message: error.message,
124
+ retry_attempt: "unknown", // Will be overridden by the retry schedule
125
+ part_size: data.length,
126
+ s3_bucket: s3Client.bucket,
127
+ }),
128
+ ),
129
+ ),
130
+ );
131
+
132
+ yield* uploadPartsTotal(Effect.succeed(1));
133
+ yield* Effect.logInfo("Part uploaded successfully").pipe(
134
+ Effect.annotateLogs({
135
+ upload_id: uploadFile.id,
136
+ part_number: partNumber,
137
+ part_size: data.length,
138
+ etag: etag,
139
+ }),
140
+ );
141
+
142
+ return etag;
143
+ }),
144
+ ).pipe(
145
+ Effect.withSpan(`s3-upload-part-${partNumber}`, {
146
+ attributes: {
147
+ "upload.id": uploadFile.id,
148
+ "upload.part_number": partNumber,
149
+ "upload.part_size": data.length,
150
+ "s3.bucket": s3Client.bucket,
151
+ "s3.key": s3Key,
152
+ },
153
+ }),
154
+ );
155
+ };
156
+
157
+ const uploadIncompletePart = (id: string, data: Uint8Array) =>
158
+ s3Client.putIncompletePart(id, data);
159
+
160
+ const downloadIncompletePart = (id: string) =>
161
+ Effect.gen(function* () {
162
+ const incompletePart = yield* s3Client.getIncompletePart(id);
163
+
164
+ if (!incompletePart) {
165
+ return undefined;
166
+ }
167
+
168
+ // Read the stream and collect all chunks to calculate size
169
+ const reader = incompletePart.getReader();
170
+ const chunks: Uint8Array[] = [];
171
+ let incompletePartSize = 0;
172
+
173
+ try {
174
+ while (true) {
175
+ const { done, value } = yield* Effect.promise(() => reader.read());
176
+ if (done) break;
177
+ chunks.push(value);
178
+ incompletePartSize += value.length;
179
+ }
180
+ } finally {
181
+ reader.releaseLock();
182
+ }
183
+
184
+ const stream = Stream.fromIterable(chunks);
185
+
186
+ return {
187
+ size: incompletePartSize,
188
+ stream,
189
+ };
190
+ });
191
+
192
+ const deleteIncompletePart = (id: string) =>
193
+ s3Client.deleteIncompletePart(id);
194
+
195
+ const getIncompletePartSize = (id: string) =>
196
+ s3Client.getIncompletePartSize(id);
197
+
198
+ const complete = (uploadFile: UploadFile, parts: Array<AWS.Part>) => {
199
+ const s3Key = getS3Key(uploadFile);
200
+
201
+ return Effect.gen(function* () {
202
+ const uploadId = yield* getUploadId(uploadFile);
203
+
204
+ return yield* s3Client.completeMultipartUpload(
205
+ {
206
+ bucket: s3Client.bucket,
207
+ key: s3Key,
208
+ uploadId,
209
+ },
210
+ parts,
211
+ );
212
+ }).pipe(
213
+ Effect.tap(() => uploadSuccessTotal(Effect.succeed(1))),
214
+ Effect.withSpan("s3-complete-multipart-upload", {
215
+ attributes: {
216
+ "upload.id": uploadFile.id,
217
+ "upload.parts_count": parts.length,
218
+ "s3.bucket": s3Client.bucket,
219
+ "s3.key": s3Key,
220
+ },
221
+ }),
222
+ );
223
+ };
224
+
225
+ const abort = (uploadFile: UploadFile) => {
226
+ const s3Key = getS3Key(uploadFile);
227
+
228
+ return Effect.gen(function* () {
229
+ const uploadId = yield* getUploadId(uploadFile);
230
+
231
+ yield* s3Client.abortMultipartUpload({
232
+ bucket: s3Client.bucket,
233
+ key: s3Key,
234
+ uploadId,
235
+ });
236
+
237
+ yield* s3Client.deleteObjects([s3Key]);
238
+ });
239
+ };
240
+
241
+ const retrievePartsRecursive = (
242
+ s3Key: string,
243
+ uploadId: string,
244
+ uploadFileId: string,
245
+ partNumberMarker?: string,
246
+ ): Effect.Effect<
247
+ { uploadFound: boolean; parts: AWS.Part[] },
248
+ UploadistaError
249
+ > =>
250
+ Effect.gen(function* () {
251
+ try {
252
+ const result = yield* s3Client.listParts({
253
+ bucket: s3Client.bucket,
254
+ key: s3Key,
255
+ uploadId,
256
+ partNumberMarker,
257
+ });
258
+
259
+ let parts = result.parts;
260
+
261
+ if (result.isTruncated) {
262
+ const rest = yield* retrievePartsRecursive(
263
+ s3Key,
264
+ uploadId,
265
+ uploadFileId,
266
+ result.nextPartNumberMarker,
267
+ );
268
+ parts = [...parts, ...rest.parts];
269
+ }
270
+
271
+ if (!partNumberMarker) {
272
+ parts.sort((a, b) => (a.PartNumber ?? 0) - (b.PartNumber ?? 0));
273
+ }
274
+
275
+ return { uploadFound: true, parts };
276
+ } catch (error) {
277
+ if (isUploadNotFoundError(error)) {
278
+ yield* Effect.logWarning(
279
+ "S3 upload not found during listParts",
280
+ ).pipe(
281
+ Effect.annotateLogs({
282
+ upload_id: uploadFileId,
283
+ error_code: error.code,
284
+ }),
285
+ );
286
+ return { uploadFound: false, parts: [] };
287
+ }
288
+ throw error;
289
+ }
290
+ });
291
+
292
+ const retrieveParts = (id: string, partNumberMarker?: string) =>
293
+ Effect.gen(function* () {
294
+ const metadata = yield* kvStore.get(id);
295
+ const uploadId = yield* getUploadId(metadata);
296
+ const s3Key = getS3Key(metadata);
297
+
298
+ return yield* retrievePartsRecursive(
299
+ s3Key,
300
+ uploadId,
301
+ id,
302
+ partNumberMarker,
303
+ );
304
+ });
305
+
306
+ const completeMetadata = (upload: UploadFile, useTags: boolean) =>
307
+ Effect.gen(function* () {
308
+ if (!useTags) {
309
+ return 0;
310
+ }
311
+
312
+ const uploadFile = yield* kvStore.get(upload.id);
313
+ const uploadId = uploadFile.storage.uploadId;
314
+ if (!uploadId) {
315
+ return 0;
316
+ }
317
+
318
+ yield* kvStore.set(upload.id, {
319
+ ...uploadFile,
320
+ storage: { ...uploadFile.storage, uploadId },
321
+ });
322
+
323
+ return 0;
324
+ });
325
+
326
+ const clearCache = (id: string) =>
327
+ Effect.gen(function* () {
328
+ yield* Effect.logInfo("Clearing cache").pipe(
329
+ Effect.annotateLogs({ upload_id: id }),
330
+ );
331
+ yield* kvStore.delete(id);
332
+ });
333
+
334
+ const createMultipartUpload = (upload: UploadFile) => {
335
+ const s3Key = getS3Key(upload);
336
+
337
+ return Effect.gen(function* () {
338
+ yield* Effect.logInfo("Initializing multipart upload").pipe(
339
+ Effect.annotateLogs({ upload_id: upload.id }),
340
+ );
341
+
342
+ const multipartInfo = yield* s3Client.createMultipartUpload({
343
+ bucket: s3Client.bucket,
344
+ key: s3Key,
345
+ uploadId: "", // Not needed for create
346
+ contentType: upload.metadata?.contentType?.toString(),
347
+ cacheControl: upload.metadata?.cacheControl?.toString(),
348
+ });
349
+
350
+ const uploadCreated = {
351
+ ...upload,
352
+ storage: {
353
+ ...upload.storage,
354
+ path: multipartInfo.key,
355
+ uploadId: multipartInfo.uploadId,
356
+ bucket: multipartInfo.bucket,
357
+ },
358
+ url: `${deliveryUrl}/${s3Key}`,
359
+ };
360
+
361
+ yield* kvStore.set(upload.id, uploadCreated);
362
+
363
+ yield* Effect.logInfo("Multipart upload created").pipe(
364
+ Effect.annotateLogs({
365
+ upload_id: upload.id,
366
+ s3_upload_id: uploadCreated.storage.uploadId,
367
+ s3_key: s3Key,
368
+ }),
369
+ );
370
+
371
+ yield* uploadRequestsTotal(Effect.succeed(1));
372
+ yield* fileSizeHistogram(Effect.succeed(upload.size || 0));
373
+
374
+ return uploadCreated;
375
+ }).pipe(
376
+ Effect.withSpan("s3-create-upload", {
377
+ attributes: {
378
+ "upload.id": upload.id,
379
+ "upload.size": upload.size || 0,
380
+ "s3.bucket": s3Client.bucket,
381
+ "s3.key": s3Key,
382
+ },
383
+ }),
384
+ );
385
+ };
386
+
387
+ /**
388
+ * Creates a multipart upload on S3 attaching any metadata to it.
389
+ * Also, a `${file_id}.info` file is created which holds some information
390
+ * about the upload itself like: `upload-id`, `upload-length`, etc.
391
+ */
392
+ const create = (upload: UploadFile) => {
393
+ return Effect.gen(function* () {
394
+ yield* Effect.logInfo("Initializing multipart upload").pipe(
395
+ Effect.annotateLogs({ upload_id: upload.id }),
396
+ );
397
+ const uploadCreated = yield* createMultipartUpload(upload);
398
+ yield* kvStore.set(upload.id, uploadCreated);
399
+ yield* Effect.logInfo("Multipart upload created").pipe(
400
+ Effect.annotateLogs({
401
+ upload_id: upload.id,
402
+ s3_upload_id: uploadCreated.storage.uploadId,
403
+ }),
404
+ );
405
+ yield* uploadRequestsTotal(Effect.succeed(1));
406
+
407
+ return uploadCreated;
408
+ }).pipe(
409
+ Effect.withSpan("s3-create-upload", {
410
+ attributes: {
411
+ "upload.id": upload.id,
412
+ "upload.size": upload.size || 0,
413
+ "s3.bucket": bucket,
414
+ },
415
+ }),
416
+ );
417
+ };
418
+
419
+ const remove = (id: string) =>
420
+ Effect.gen(function* () {
421
+ const uploadFile = yield* kvStore.get(id);
422
+ yield* abort(uploadFile);
423
+ yield* clearCache(id);
424
+ });
425
+
426
+ const write = (
427
+ options: DataStoreWriteOptions,
428
+ dependencies: { onProgress?: (currentOffset: number) => void },
429
+ ) =>
430
+ withUploadMetrics(
431
+ options.file_id,
432
+ withTimingMetrics(
433
+ uploadDurationHistogram,
434
+ Effect.gen(function* () {
435
+ const {
436
+ stream: initialData,
437
+ file_id,
438
+ offset: initialOffset,
439
+ } = options;
440
+ const { onProgress } = dependencies;
441
+
442
+ // Capture start time for upload completion metrics
443
+ const startTime = Date.now();
444
+
445
+ // Track active upload
446
+ yield* activeUploadsGauge(Effect.succeed(1));
447
+
448
+ const prepareResult = yield* prepareUpload(
449
+ file_id,
450
+ initialOffset,
451
+ initialData,
452
+ );
453
+
454
+ const {
455
+ uploadFile,
456
+ nextPartNumber,
457
+ offset,
458
+ data,
459
+ existingPartSize,
460
+ } = prepareResult;
461
+
462
+ // Use existing part size if parts already exist, otherwise calculate optimal size
463
+ const uploadPartSize =
464
+ existingPartSize ||
465
+ calcOptimalPartSize(
466
+ uploadFile.size,
467
+ preferredPartSize,
468
+ minPartSize,
469
+ maxMultipartParts,
470
+ );
471
+
472
+ // Log part size decision for debugging
473
+ yield* Effect.logInfo("Part size decision").pipe(
474
+ Effect.annotateLogs({
475
+ upload_id: file_id,
476
+ existing_part_size: existingPartSize,
477
+ calculated_part_size: calcOptimalPartSize(
478
+ uploadFile.size,
479
+ preferredPartSize,
480
+ minPartSize,
481
+ maxMultipartParts,
482
+ ),
483
+ final_part_size: uploadPartSize,
484
+ next_part_number: nextPartNumber,
485
+ }),
486
+ );
487
+
488
+ const bytesUploaded = yield* uploadParts(
489
+ uploadFile,
490
+ data,
491
+ nextPartNumber,
492
+ offset,
493
+ uploadPartSize,
494
+ minPartSize,
495
+ maxConcurrentPartUploads,
496
+ onProgress,
497
+ );
498
+
499
+ const newOffset = offset + bytesUploaded;
500
+
501
+ if (newOffset > maxConcurrentPartUploads)
502
+ if (uploadFile.size === newOffset) {
503
+ yield* finishUpload(file_id, uploadFile, startTime);
504
+ }
505
+
506
+ return newOffset;
507
+ }).pipe(Effect.ensuring(activeUploadsGauge(Effect.succeed(0)))),
508
+ ),
509
+ );
510
+
511
+ const getUpload = (id: string) =>
512
+ Effect.gen(function* () {
513
+ const uploadFile = yield* kvStore.get(id);
514
+
515
+ const { parts, uploadFound } = yield* retrieveParts(id);
516
+ if (!uploadFound) {
517
+ return {
518
+ ...uploadFile,
519
+ offset: uploadFile.size as number,
520
+ size: uploadFile.size,
521
+ };
522
+ }
523
+
524
+ const offset = calcOffsetFromParts(parts);
525
+ const incompletePartSize = yield* getIncompletePartSize(id);
526
+
527
+ return {
528
+ ...uploadFile,
529
+ offset: offset + (incompletePartSize ?? 0),
530
+ size: uploadFile.size,
531
+ storage: uploadFile.storage,
532
+ };
533
+ });
534
+
535
+ // const read = (id: string) =>
536
+ // Effect.gen(function* () {
537
+ // return yield* s3Client.getObject(id);
538
+ // });
539
+
540
+ // Helper functions
541
+ const prepareUpload = (
542
+ fileId: string,
543
+ initialOffset: number,
544
+ initialData: Stream.Stream<Uint8Array, UploadistaError>,
545
+ ) =>
546
+ Effect.gen(function* () {
547
+ const uploadFile = yield* kvStore.get(fileId);
548
+ const { parts } = yield* retrieveParts(fileId);
549
+
550
+ const partNumber: number =
551
+ parts.length > 0 && parts[parts.length - 1].PartNumber
552
+ ? (parts[parts.length - 1].PartNumber ?? 0)
553
+ : 0;
554
+ const nextPartNumber = partNumber + 1;
555
+
556
+ // Detect existing part size to maintain consistency
557
+ // We check the first part's size to ensure all subsequent parts match
558
+ const existingPartSize =
559
+ parts.length > 0 && parts[0].Size ? parts[0].Size : null;
560
+
561
+ // Validate that all existing parts (except potentially the last one) have the same size
562
+ if (existingPartSize && parts.length > 1) {
563
+ const inconsistentPart = parts
564
+ .slice(0, -1)
565
+ .find((part) => part.Size !== existingPartSize);
566
+ if (inconsistentPart) {
567
+ yield* Effect.logWarning(
568
+ "Inconsistent part sizes detected in existing upload",
569
+ ).pipe(
570
+ Effect.annotateLogs({
571
+ upload_id: fileId,
572
+ expected_size: existingPartSize,
573
+ inconsistent_part: inconsistentPart.PartNumber,
574
+ inconsistent_size: inconsistentPart.Size,
575
+ }),
576
+ );
577
+ }
578
+ }
579
+
580
+ const incompletePart = yield* downloadIncompletePart(fileId);
581
+
582
+ if (incompletePart) {
583
+ yield* deleteIncompletePart(fileId);
584
+ const offset = initialOffset - incompletePart.size;
585
+ const data = incompletePart.stream.pipe(Stream.concat(initialData));
586
+ return {
587
+ uploadFile,
588
+ nextPartNumber,
589
+ offset,
590
+ incompletePartSize: incompletePart.size,
591
+ data,
592
+ existingPartSize,
593
+ };
594
+ } else {
595
+ return {
596
+ uploadFile,
597
+ nextPartNumber,
598
+ offset: initialOffset,
599
+ incompletePartSize: 0,
600
+ data: initialData,
601
+ existingPartSize,
602
+ };
603
+ }
604
+ });
605
+
606
+ const finishUpload = (
607
+ fileId: string,
608
+ uploadFile: UploadFile,
609
+ startTime: number,
610
+ ) =>
611
+ Effect.gen(function* () {
612
+ const { parts } = yield* retrieveParts(fileId);
613
+
614
+ // Log all parts for debugging S3 multipart upload requirements
615
+ yield* Effect.logInfo("Attempting to complete multipart upload").pipe(
616
+ Effect.annotateLogs({
617
+ upload_id: fileId,
618
+ parts_count: parts.length,
619
+ parts_info: parts.map((part, index) => ({
620
+ part_number: part.PartNumber,
621
+ size: part.Size,
622
+ etag: part.ETag,
623
+ is_final_part: index === parts.length - 1,
624
+ })),
625
+ }),
626
+ );
627
+
628
+ yield* complete(uploadFile, parts);
629
+ yield* completeMetadata(uploadFile, useTags);
630
+ // yield* clearCache(fileId);
631
+
632
+ // Log upload completion metrics
633
+ const endTime = Date.now();
634
+ const totalDurationMs = endTime - startTime;
635
+ const fileSize = uploadFile.size || 0;
636
+ const throughputBps =
637
+ totalDurationMs > 0 ? (fileSize * 1000) / totalDurationMs : 0;
638
+
639
+ // Calculate average part size if we have parts
640
+ const averagePartSize =
641
+ parts.length > 0
642
+ ? parts.reduce((sum, part) => sum + (part.Size || 0), 0) /
643
+ parts.length
644
+ : undefined;
645
+
646
+ yield* logS3UploadCompletion(fileId, {
647
+ fileSize,
648
+ totalDurationMs,
649
+ partsCount: parts.length,
650
+ averagePartSize,
651
+ throughputBps,
652
+ });
653
+ }).pipe(
654
+ Effect.tapError((error) =>
655
+ Effect.gen(function* () {
656
+ yield* uploadErrorsTotal(Effect.succeed(1));
657
+ yield* Effect.logError("Failed to finish upload").pipe(
658
+ Effect.annotateLogs({
659
+ upload_id: fileId,
660
+ error: String(error),
661
+ }),
662
+ );
663
+ }),
664
+ ),
665
+ );
666
+
667
+ const deleteExpired = Effect.gen(function* () {
668
+ if (expirationPeriodInMilliseconds === 0) {
669
+ return 0;
670
+ }
671
+
672
+ let keyMarker: string | undefined;
673
+ let uploadIdMarker: string | undefined;
674
+ let isTruncated = true;
675
+ let deleted = 0;
676
+
677
+ while (isTruncated) {
678
+ const listResponse = yield* s3Client.listMultipartUploads(
679
+ keyMarker,
680
+ uploadIdMarker,
681
+ );
682
+
683
+ const expiredUploads =
684
+ listResponse.Uploads?.filter((multiPartUpload) => {
685
+ const initiatedDate = multiPartUpload.Initiated;
686
+ return (
687
+ initiatedDate &&
688
+ Date.now() >
689
+ getExpirationDate(
690
+ initiatedDate.toISOString(),
691
+ expirationPeriodInMilliseconds,
692
+ ).getTime()
693
+ );
694
+ }) || [];
695
+
696
+ const objectsToDelete = expiredUploads
697
+ .filter((upload): upload is { Key: string } => {
698
+ return !!upload.Key;
699
+ })
700
+ .map((upload) => upload.Key);
701
+
702
+ if (objectsToDelete.length > 0) {
703
+ yield* s3Client.deleteObjects(objectsToDelete);
704
+
705
+ // Abort multipart uploads
706
+ yield* Effect.forEach(expiredUploads, (upload) => {
707
+ return Effect.gen(function* () {
708
+ if (!upload.Key || !upload.UploadId) {
709
+ return;
710
+ }
711
+ yield* s3Client.abortMultipartUpload({
712
+ bucket,
713
+ key: upload.Key,
714
+ uploadId: upload.UploadId,
715
+ });
716
+ return;
717
+ });
718
+ });
719
+
720
+ deleted += objectsToDelete.length;
721
+ }
722
+
723
+ isTruncated = listResponse.IsTruncated ?? false;
724
+
725
+ if (isTruncated) {
726
+ keyMarker = listResponse.NextKeyMarker;
727
+ uploadIdMarker = listResponse.NextUploadIdMarker;
728
+ }
729
+ }
730
+
731
+ return deleted;
732
+ });
733
+
734
+ // Proper single-pass chunking using Effect's async stream constructor
735
+ // Ensures all parts except the final part are exactly the same size (S3 requirement)
736
+ const createChunkedStream =
737
+ (chunkSize: number) =>
738
+ <E>(
739
+ stream: Stream.Stream<Uint8Array, E>,
740
+ ): Stream.Stream<ChunkInfo, E> => {
741
+ return Stream.async<ChunkInfo, E>((emit) => {
742
+ let buffer = new Uint8Array(0);
743
+ let partNumber = 1;
744
+ let totalBytesProcessed = 0;
745
+
746
+ const emitChunk = (data: Uint8Array, isFinalChunk = false) => {
747
+ // Log chunk information for debugging - use INFO level to see in logs
748
+ Effect.runSync(
749
+ Effect.logInfo("Creating chunk").pipe(
750
+ Effect.annotateLogs({
751
+ part_number: partNumber,
752
+ chunk_size: data.length,
753
+ expected_size: chunkSize,
754
+ is_final_chunk: isFinalChunk,
755
+ total_bytes_processed: totalBytesProcessed + data.length,
756
+ }),
757
+ ),
758
+ );
759
+ emit.single({
760
+ partNumber: partNumber++,
761
+ data,
762
+ size: data.length,
763
+ });
764
+ };
765
+
766
+ const processChunk = (newData: Uint8Array) => {
767
+ // Combine buffer with new data
768
+ const combined = new Uint8Array(buffer.length + newData.length);
769
+ combined.set(buffer);
770
+ combined.set(newData, buffer.length);
771
+ buffer = combined;
772
+ totalBytesProcessed += newData.length;
773
+
774
+ // Emit full chunks of exactly chunkSize bytes
775
+ // This ensures S3 multipart upload rule: all parts except last must be same size
776
+ while (buffer.length >= chunkSize) {
777
+ const chunk = buffer.slice(0, chunkSize);
778
+ buffer = buffer.slice(chunkSize);
779
+ emitChunk(chunk, false);
780
+ }
781
+ };
782
+
783
+ // Process the stream
784
+ Effect.runFork(
785
+ stream.pipe(
786
+ Stream.runForEach((chunk) =>
787
+ Effect.sync(() => processChunk(chunk)),
788
+ ),
789
+ Effect.andThen(() =>
790
+ Effect.sync(() => {
791
+ // Emit final chunk if there's remaining data
792
+ // The final chunk can be any size < chunkSize (S3 allows this)
793
+ if (buffer.length > 0) {
794
+ emitChunk(buffer, true);
795
+ }
796
+ emit.end();
797
+ }),
798
+ ),
799
+ Effect.catchAll((error) => Effect.sync(() => emit.fail(error))),
800
+ ),
801
+ );
802
+ });
803
+ };
804
+
805
+ // Byte-level progress tracking during streaming
806
+ // This provides smooth, immediate progress feedback by tracking bytes as they
807
+ // flow through the stream, before they reach S3. This solves the issue where
808
+ // small files (< 5MB) would jump from 0% to 100% instantly.
809
+ const withByteProgressTracking =
810
+ (onProgress?: (totalBytes: number) => void, initialOffset = 0) =>
811
+ <E, R>(stream: Stream.Stream<Uint8Array, E, R>) => {
812
+ if (!onProgress) return stream;
813
+
814
+ return Effect.gen(function* () {
815
+ const totalBytesProcessedRef = yield* Ref.make(initialOffset);
816
+
817
+ return stream.pipe(
818
+ Stream.tap((chunk) =>
819
+ Effect.gen(function* () {
820
+ const newTotal = yield* Ref.updateAndGet(
821
+ totalBytesProcessedRef,
822
+ (total) => total + chunk.length,
823
+ );
824
+ onProgress(newTotal);
825
+ }),
826
+ ),
827
+ );
828
+ }).pipe(Stream.unwrap);
829
+ };
830
+
831
+ const uploadParts = (
832
+ uploadFile: UploadFile,
833
+ readStream: Stream.Stream<Uint8Array, UploadistaError>,
834
+ initCurrentPartNumber: number,
835
+ initOffset: number,
836
+ uploadPartSize: number,
837
+ minPartSize: number,
838
+ maxConcurrentPartUploads: number,
839
+ onProgress?: (newOffset: number) => void,
840
+ ) =>
841
+ Effect.gen(function* () {
842
+ yield* Effect.logInfo("Starting part uploads").pipe(
843
+ Effect.annotateLogs({
844
+ upload_id: uploadFile.id,
845
+ init_offset: initOffset,
846
+ file_size: uploadFile.size,
847
+ part_size: uploadPartSize,
848
+ min_part_size: minPartSize,
849
+ }),
850
+ );
851
+
852
+ // Enhanced Progress Tracking Strategy:
853
+ // 1. Byte-level progress during streaming - provides immediate, smooth feedback
854
+ // as data flows through the pipeline (even for small files)
855
+ // 2. This tracks progress BEFORE S3 upload, giving users immediate feedback
856
+ // 3. For large files with multiple parts, this provides granular updates
857
+ // 4. For small files (single part), this prevents 0%->100% jumps
858
+ const chunkStream = readStream.pipe(
859
+ // Add byte-level progress tracking during streaming (immediate feedback)
860
+ withByteProgressTracking(onProgress, initOffset),
861
+ // Create chunks for S3 multipart upload with uniform part sizes
862
+ createChunkedStream(uploadPartSize),
863
+ );
864
+
865
+ // Track cumulative offset and total bytes with Effect Refs
866
+ const cumulativeOffsetRef = yield* Ref.make(initOffset);
867
+ const totalBytesUploadedRef = yield* Ref.make(0);
868
+
869
+ // Create a chunk upload function for the sink
870
+ const uploadChunk = (chunkInfo: ChunkInfo) =>
871
+ Effect.gen(function* () {
872
+ // Calculate cumulative bytes to determine if this is the final part
873
+ const cumulativeOffset = yield* Ref.updateAndGet(
874
+ cumulativeOffsetRef,
875
+ (offset) => offset + chunkInfo.size,
876
+ );
877
+ const isFinalPart = cumulativeOffset >= (uploadFile.size || 0);
878
+
879
+ yield* Effect.logDebug("Processing chunk").pipe(
880
+ Effect.annotateLogs({
881
+ upload_id: uploadFile.id,
882
+ cumulative_offset: cumulativeOffset,
883
+ file_size: uploadFile.size,
884
+ chunk_size: chunkInfo.size,
885
+ is_final_part: isFinalPart,
886
+ }),
887
+ );
888
+
889
+ const actualPartNumber =
890
+ initCurrentPartNumber + chunkInfo.partNumber - 1;
891
+
892
+ if (chunkInfo.size > uploadPartSize) {
893
+ yield* Effect.fail(
894
+ UploadistaError.fromCode(
895
+ "FILE_WRITE_ERROR",
896
+ new Error(
897
+ `Part size ${chunkInfo.size} exceeds upload part size ${uploadPartSize}`,
898
+ ),
899
+ ),
900
+ );
901
+ }
902
+
903
+ // For parts that meet the minimum part size (5MB) or are the final part,
904
+ // upload them as regular multipart parts
905
+ if (chunkInfo.size >= minPartSize || isFinalPart) {
906
+ yield* Effect.logDebug("Uploading multipart chunk").pipe(
907
+ Effect.annotateLogs({
908
+ upload_id: uploadFile.id,
909
+ part_number: actualPartNumber,
910
+ chunk_size: chunkInfo.size,
911
+ min_part_size: minPartSize,
912
+ is_final_part: isFinalPart,
913
+ }),
914
+ );
915
+ yield* uploadPart(uploadFile, chunkInfo.data, actualPartNumber);
916
+ yield* partSizeHistogram(Effect.succeed(chunkInfo.size));
917
+ } else {
918
+ // Only upload as incomplete part if it's smaller than minimum and not final
919
+ yield* uploadIncompletePart(uploadFile.id, chunkInfo.data);
920
+ }
921
+
922
+ yield* Ref.update(
923
+ totalBytesUploadedRef,
924
+ (total) => total + chunkInfo.size,
925
+ );
926
+
927
+ // Note: Byte-level progress is now tracked during streaming phase
928
+ // This ensures smooth progress updates regardless of part size
929
+ // S3 upload completion is tracked via totalBytesUploadedRef for accuracy
930
+ });
931
+
932
+ // Process chunks concurrently with controlled concurrency
933
+ yield* chunkStream.pipe(
934
+ Stream.runForEach((chunkInfo) => uploadChunk(chunkInfo)),
935
+ Effect.withConcurrency(maxConcurrentPartUploads),
936
+ );
937
+
938
+ return yield* Ref.get(totalBytesUploadedRef);
939
+ });
940
+
941
+ const getCapabilities = (): DataStoreCapabilities => ({
942
+ supportsParallelUploads: true,
943
+ supportsConcatenation: true,
944
+ supportsDeferredLength: true,
945
+ supportsResumableUploads: true,
946
+ supportsTransactionalUploads: true,
947
+ maxConcurrentUploads: maxConcurrentPartUploads,
948
+ minChunkSize: minPartSize,
949
+ maxChunkSize: 5_368_709_120, // 5GiB S3 limit
950
+ maxParts: maxMultipartParts,
951
+ optimalChunkSize: preferredPartSize,
952
+ requiresOrderedChunks: false,
953
+ requiresMimeTypeValidation: true,
954
+ maxValidationSize: undefined, // no size limit
955
+ });
956
+
957
+ const getChunkerConstraints = () => ({
958
+ minChunkSize: minPartSize,
959
+ maxChunkSize: 5_368_709_120, // 5GiB S3 limit
960
+ optimalChunkSize: preferredPartSize,
961
+ requiresOrderedChunks: false,
962
+ });
963
+
964
+ const validateUploadStrategy = (
965
+ strategy: UploadStrategy,
966
+ ): Effect.Effect<boolean, never> => {
967
+ const capabilities = getCapabilities();
968
+ const result = (() => {
969
+ switch (strategy) {
970
+ case "parallel":
971
+ return capabilities.supportsParallelUploads;
972
+ case "single":
973
+ return true;
974
+ default:
975
+ return false;
976
+ }
977
+ })();
978
+ return Effect.succeed(result);
979
+ };
980
+
981
+ const concatArrayBuffers = (chunks: Uint8Array[]): Uint8Array => {
982
+ const result = new Uint8Array(chunks.reduce((a, c) => a + c.length, 0));
983
+ let offset = 0;
984
+ for (const chunk of chunks) {
985
+ result.set(chunk, offset);
986
+ offset += chunk.length;
987
+ }
988
+ return result;
989
+ };
990
+
991
+ const streamToArray = async (
992
+ stream: ReadableStream<Uint8Array>,
993
+ ): Promise<Uint8Array> => {
994
+ const reader = stream.getReader();
995
+ const chunks: Uint8Array[] = [];
996
+ while (true) {
997
+ const { done, value } = await reader.read();
998
+ if (done) break;
999
+ chunks.push(value);
1000
+ }
1001
+ return concatArrayBuffers(chunks);
1002
+ };
1003
+
1004
+ const read = (id: string) =>
1005
+ Effect.gen(function* () {
1006
+ const upload = yield* kvStore.get(id);
1007
+ console.log(upload);
1008
+ if (!upload.id) {
1009
+ return yield* Effect.fail(
1010
+ UploadistaError.fromCode(
1011
+ "FILE_READ_ERROR",
1012
+ new Error("Upload Key is undefined"),
1013
+ ),
1014
+ );
1015
+ }
1016
+ const s3Key = getS3Key(upload);
1017
+ const stream = yield* s3Client.getObject(s3Key);
1018
+ return yield* Effect.promise(() => streamToArray(stream));
1019
+ });
1020
+
1021
+ return {
1022
+ bucket,
1023
+ create,
1024
+ remove,
1025
+ write,
1026
+ getUpload,
1027
+ read,
1028
+ deleteExpired,
1029
+ getCapabilities,
1030
+ getChunkerConstraints,
1031
+ validateUploadStrategy,
1032
+ } as DataStore<UploadFile>;
1033
+ });
1034
+ }
1035
+
1036
+ // Effect-based factory that uses services
1037
+ export const createS3Store = (options: S3StoreConfig) =>
1038
+ Effect.gen(function* () {
1039
+ const kvStore = yield* UploadFileKVStore;
1040
+ const {
1041
+ s3ClientConfig: { bucket, ...restS3ClientConfig },
1042
+ } = options;
1043
+ return yield* createS3StoreImplementation({
1044
+ ...options,
1045
+ kvStore,
1046
+ }).pipe(Effect.provide(S3ClientLayer(restS3ClientConfig, bucket)));
1047
+ });
1048
+
1049
+ // Backward compatibility: keep the original function for existing code
1050
+ export const s3Store = (config: S3StoreConfig) => {
1051
+ const {
1052
+ s3ClientConfig: { bucket, ...restS3ClientConfig },
1053
+ } = config;
1054
+ return Effect.runPromise(
1055
+ createS3StoreImplementation(config).pipe(
1056
+ Effect.provide(S3ClientLayer(restS3ClientConfig, bucket)),
1057
+ ),
1058
+ );
1059
+ };