@forzalabs/remora 0.0.61-nasco.3 → 0.0.62-nasco.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,6 +29,7 @@ const DriverHelper = {
29
29
  appendToUnifiedFile: (options) => __awaiter(void 0, void 0, void 0, function* () {
30
30
  (0, Affirm_1.default)(options, 'Invalid options');
31
31
  const { append, destinationPath, fileKey, headerLine, stream, fileType, hasHeaderRow, delimiter } = options;
32
+ (0, Affirm_1.default)(headerLine, `Invalid header line`);
32
33
  const keys = (fileType === 'JSON' || fileType === 'JSONL')
33
34
  ? Object.keys(JSON.parse(headerLine))
34
35
  : [];
@@ -78,24 +78,38 @@ class S3DestinationDriver {
78
78
  (0, Affirm_1.default)(uploadId, 'Failed to initiate multipart upload');
79
79
  const uploadedParts = [];
80
80
  let partNumber = 1;
81
+ const MIN_PART_SIZE = 5 * 1024 * 1024; // 5MB
82
+ let accumulatedBuffer = Buffer.alloc(0);
83
+ const uploadPart = (buffer) => __awaiter(this, void 0, void 0, function* () {
84
+ const uploadPartRes = yield this._client.send(new client_s3_1.UploadPartCommand({
85
+ Bucket: this._bucketName,
86
+ Key: name,
87
+ UploadId: uploadId,
88
+ PartNumber: partNumber,
89
+ Body: buffer
90
+ }));
91
+ uploadedParts.push({
92
+ PartNumber: partNumber,
93
+ ETag: uploadPartRes.ETag
94
+ });
95
+ partNumber++;
96
+ });
81
97
  yield dataset.streamBatches((batch) => __awaiter(this, void 0, void 0, function* () {
82
98
  const chunks = FileExporter_1.default.prepareBatch(batch, options);
83
99
  for (const chunk of chunks) {
84
- const body = Buffer.from(chunk);
85
- const uploadPartRes = yield this._client.send(new client_s3_1.UploadPartCommand({
86
- Bucket: this._bucketName,
87
- Key: name,
88
- UploadId: uploadId,
89
- PartNumber: partNumber,
90
- Body: body
91
- }));
92
- uploadedParts.push({
93
- PartNumber: partNumber,
94
- ETag: uploadPartRes.ETag
95
- });
96
- partNumber++;
100
+ const chunkBuffer = Buffer.from(chunk);
101
+ accumulatedBuffer = Buffer.concat([accumulatedBuffer, chunkBuffer]);
102
+ // If accumulated buffer is at least 5MB, upload it as a part
103
+ if (accumulatedBuffer.length >= MIN_PART_SIZE) {
104
+ yield uploadPart(accumulatedBuffer);
105
+ accumulatedBuffer = Buffer.alloc(0);
106
+ }
97
107
  }
98
108
  }));
109
+ // Upload any remaining data as the final part (even if smaller than 5MB)
110
+ if (accumulatedBuffer.length > 0) {
111
+ yield uploadPart(accumulatedBuffer);
112
+ }
99
113
  // Complete the multipart upload
100
114
  const completeRes = yield this._client.send(new client_s3_1.CompleteMultipartUploadCommand({
101
115
  Bucket: this._bucketName,
@@ -63,7 +63,7 @@ class FileExporterClass {
63
63
  const chunks = [];
64
64
  for (let i = 0; i < records.length; i += chunkSize) {
65
65
  const chunk = records.slice(i, i + chunkSize);
66
- chunks.push(chunk.join(separator));
66
+ chunks.push(chunk.join(separator) + separator);
67
67
  }
68
68
  return chunks;
69
69
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "0.0.61-nasco.3",
3
+ "version": "0.0.62-nasco.3",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,