@forzalabs/remora 0.0.61-nasco.3 → 0.0.62-nasco.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/drivers/DriverHelper.js +1 -0
- package/drivers/S3Driver.js +27 -13
- package/engines/file/FileExporter.js +1 -1
- package/package.json +1 -1
package/drivers/DriverHelper.js
CHANGED
|
@@ -29,6 +29,7 @@ const DriverHelper = {
|
|
|
29
29
|
appendToUnifiedFile: (options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
30
30
|
(0, Affirm_1.default)(options, 'Invalid options');
|
|
31
31
|
const { append, destinationPath, fileKey, headerLine, stream, fileType, hasHeaderRow, delimiter } = options;
|
|
32
|
+
(0, Affirm_1.default)(headerLine, `Invalid header line`);
|
|
32
33
|
const keys = (fileType === 'JSON' || fileType === 'JSONL')
|
|
33
34
|
? Object.keys(JSON.parse(headerLine))
|
|
34
35
|
: [];
|
package/drivers/S3Driver.js
CHANGED
|
@@ -78,24 +78,38 @@ class S3DestinationDriver {
|
|
|
78
78
|
(0, Affirm_1.default)(uploadId, 'Failed to initiate multipart upload');
|
|
79
79
|
const uploadedParts = [];
|
|
80
80
|
let partNumber = 1;
|
|
81
|
+
const MIN_PART_SIZE = 5 * 1024 * 1024; // 5MB
|
|
82
|
+
let accumulatedBuffer = Buffer.alloc(0);
|
|
83
|
+
const uploadPart = (buffer) => __awaiter(this, void 0, void 0, function* () {
|
|
84
|
+
const uploadPartRes = yield this._client.send(new client_s3_1.UploadPartCommand({
|
|
85
|
+
Bucket: this._bucketName,
|
|
86
|
+
Key: name,
|
|
87
|
+
UploadId: uploadId,
|
|
88
|
+
PartNumber: partNumber,
|
|
89
|
+
Body: buffer
|
|
90
|
+
}));
|
|
91
|
+
uploadedParts.push({
|
|
92
|
+
PartNumber: partNumber,
|
|
93
|
+
ETag: uploadPartRes.ETag
|
|
94
|
+
});
|
|
95
|
+
partNumber++;
|
|
96
|
+
});
|
|
81
97
|
yield dataset.streamBatches((batch) => __awaiter(this, void 0, void 0, function* () {
|
|
82
98
|
const chunks = FileExporter_1.default.prepareBatch(batch, options);
|
|
83
99
|
for (const chunk of chunks) {
|
|
84
|
-
const
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
}));
|
|
92
|
-
uploadedParts.push({
|
|
93
|
-
PartNumber: partNumber,
|
|
94
|
-
ETag: uploadPartRes.ETag
|
|
95
|
-
});
|
|
96
|
-
partNumber++;
|
|
100
|
+
const chunkBuffer = Buffer.from(chunk);
|
|
101
|
+
accumulatedBuffer = Buffer.concat([accumulatedBuffer, chunkBuffer]);
|
|
102
|
+
// If accumulated buffer is at least 5MB, upload it as a part
|
|
103
|
+
if (accumulatedBuffer.length >= MIN_PART_SIZE) {
|
|
104
|
+
yield uploadPart(accumulatedBuffer);
|
|
105
|
+
accumulatedBuffer = Buffer.alloc(0);
|
|
106
|
+
}
|
|
97
107
|
}
|
|
98
108
|
}));
|
|
109
|
+
// Upload any remaining data as the final part (even if smaller than 5MB)
|
|
110
|
+
if (accumulatedBuffer.length > 0) {
|
|
111
|
+
yield uploadPart(accumulatedBuffer);
|
|
112
|
+
}
|
|
99
113
|
// Complete the multipart upload
|
|
100
114
|
const completeRes = yield this._client.send(new client_s3_1.CompleteMultipartUploadCommand({
|
|
101
115
|
Bucket: this._bucketName,
|
|
@@ -63,7 +63,7 @@ class FileExporterClass {
|
|
|
63
63
|
const chunks = [];
|
|
64
64
|
for (let i = 0; i < records.length; i += chunkSize) {
|
|
65
65
|
const chunk = records.slice(i, i + chunkSize);
|
|
66
|
-
chunks.push(chunk.join(separator));
|
|
66
|
+
chunks.push(chunk.join(separator) + separator);
|
|
67
67
|
}
|
|
68
68
|
return chunks;
|
|
69
69
|
};
|