@forzalabs/remora 0.0.60-nasco.3 → 0.0.61-nasco.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +1 -1
- package/drivers/DriverHelper.js +11 -3
- package/drivers/LocalDriver.js +1 -37
- package/drivers/S3Driver.js +1 -53
- package/package.json +1 -1
package/Constants.js
CHANGED
package/drivers/DriverHelper.js
CHANGED
|
@@ -29,7 +29,9 @@ const DriverHelper = {
|
|
|
29
29
|
appendToUnifiedFile: (options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
30
30
|
(0, Affirm_1.default)(options, 'Invalid options');
|
|
31
31
|
const { append, destinationPath, fileKey, headerLine, stream, fileType, hasHeaderRow, delimiter } = options;
|
|
32
|
-
const keys = (fileType === 'JSON' || fileType === 'JSONL')
|
|
32
|
+
const keys = (fileType === 'JSON' || fileType === 'JSONL')
|
|
33
|
+
? Object.keys(JSON.parse(headerLine))
|
|
34
|
+
: [];
|
|
33
35
|
const shouldValidateHeader = fileType === 'CSV' || (fileType === 'TXT' && hasHeaderRow === true);
|
|
34
36
|
let isFirstLine = true;
|
|
35
37
|
let hasValidatedHeader = shouldValidateHeader ? false : true;
|
|
@@ -99,8 +101,14 @@ const DriverHelper = {
|
|
|
99
101
|
switch (fileType) {
|
|
100
102
|
case 'JSON':
|
|
101
103
|
case 'JSONL': {
|
|
102
|
-
|
|
103
|
-
|
|
104
|
+
try {
|
|
105
|
+
const parsed = JSON.parse(line);
|
|
106
|
+
return keys.map(k => parsed[k]).join(delimiter);
|
|
107
|
+
}
|
|
108
|
+
catch (error) {
|
|
109
|
+
Logger_1.default.log(`Failed parsing line in JSON - index: ${globalIndex}; line: ${line}; err: ${error === null || error === void 0 ? void 0 : error.name}`);
|
|
110
|
+
throw error;
|
|
111
|
+
}
|
|
104
112
|
}
|
|
105
113
|
default:
|
|
106
114
|
return line;
|
package/drivers/LocalDriver.js
CHANGED
|
@@ -156,7 +156,7 @@ class LocalSourceDriver {
|
|
|
156
156
|
// For single file, no header validation needed
|
|
157
157
|
const headerLine = (yield DriverHelper_1.default.quickReadFile(path_1.default.join(this._path, fileKey), 1))[0];
|
|
158
158
|
dataset.setFirstLine(headerLine);
|
|
159
|
-
yield copyLocally(fileKey,
|
|
159
|
+
yield copyLocally(fileKey, headerLine, false);
|
|
160
160
|
return dataset;
|
|
161
161
|
}
|
|
162
162
|
});
|
|
@@ -347,42 +347,6 @@ class LocalDestinationDriver {
|
|
|
347
347
|
throw new Error(`Failed to upload local file "${options.name}": ${error.message}`);
|
|
348
348
|
}
|
|
349
349
|
});
|
|
350
|
-
this.multipartUpload = (options) => __awaiter(this, void 0, void 0, function* () {
|
|
351
|
-
(0, Affirm_1.default)(this._path, 'Path not initialized');
|
|
352
|
-
(0, Affirm_1.default)(options, 'Invalid upload options');
|
|
353
|
-
(0, Affirm_1.default)(options.name, 'File name is required');
|
|
354
|
-
(0, Affirm_1.default)(options.contents && Array.isArray(options.contents), 'Contents must be an array');
|
|
355
|
-
(0, Affirm_1.default)(options.contents.length > 0, 'Contents array cannot be empty');
|
|
356
|
-
const folder = this._path;
|
|
357
|
-
try {
|
|
358
|
-
if (!fs.existsSync(folder)) {
|
|
359
|
-
fs.mkdirSync(folder, { recursive: true });
|
|
360
|
-
}
|
|
361
|
-
const filePath = path_1.default.join(folder, options.name);
|
|
362
|
-
// Create or truncate the file first
|
|
363
|
-
fs.writeFileSync(filePath, '');
|
|
364
|
-
// Append each chunk
|
|
365
|
-
for (const chunk of options.contents) {
|
|
366
|
-
(0, Affirm_1.default)(typeof chunk === 'string', 'Each chunk must be a string');
|
|
367
|
-
fs.appendFileSync(filePath, chunk);
|
|
368
|
-
}
|
|
369
|
-
return { bucket: folder, key: filePath, res: true };
|
|
370
|
-
}
|
|
371
|
-
catch (error) {
|
|
372
|
-
// Clean up the partial file if it exists
|
|
373
|
-
const filePath = path_1.default.join(folder, options.name);
|
|
374
|
-
if (fs.existsSync(filePath)) {
|
|
375
|
-
try {
|
|
376
|
-
fs.unlinkSync(filePath);
|
|
377
|
-
}
|
|
378
|
-
catch (cleanupError) {
|
|
379
|
-
console.error(`Failed to clean up partial file after error: ${cleanupError.message}`);
|
|
380
|
-
throw cleanupError;
|
|
381
|
-
}
|
|
382
|
-
}
|
|
383
|
-
throw new Error(`Failed to complete local multipart upload for "${options.name}": ${error.message}`);
|
|
384
|
-
}
|
|
385
|
-
});
|
|
386
350
|
this.uploadStream = (options) => __awaiter(this, void 0, void 0, function* () {
|
|
387
351
|
(0, Affirm_1.default)(options, `Invalid upload options`);
|
|
388
352
|
const { dataset, name, recordProjection } = options;
|
package/drivers/S3Driver.js
CHANGED
|
@@ -62,58 +62,6 @@ class S3DestinationDriver {
|
|
|
62
62
|
(0, Affirm_1.default)(res.$metadata.httpStatusCode === 200, `Failed to upload the file "${name}" to the bucket "${this._bucketName}": status code ${res.$metadata.httpStatusCode}`);
|
|
63
63
|
return { res: true, key: name, bucket: this._bucketName };
|
|
64
64
|
});
|
|
65
|
-
this.multipartUpload = (options) => __awaiter(this, void 0, void 0, function* () {
|
|
66
|
-
(0, Affirm_1.default)(options, `Invalid upload options`);
|
|
67
|
-
(0, Affirm_1.default)(options.contents && options.contents.length > 0, 'No contents provided for multipart upload');
|
|
68
|
-
(0, Affirm_1.default)(options.name, 'No filename provided for multipart upload');
|
|
69
|
-
try {
|
|
70
|
-
// Create the multipart upload
|
|
71
|
-
const createMultipartUploadRes = yield this._client.send(new client_s3_1.CreateMultipartUploadCommand({
|
|
72
|
-
Bucket: this._bucketName,
|
|
73
|
-
Key: options.name
|
|
74
|
-
}));
|
|
75
|
-
const uploadId = createMultipartUploadRes.UploadId;
|
|
76
|
-
(0, Affirm_1.default)(uploadId, 'Failed to initiate multipart upload');
|
|
77
|
-
// Upload each part
|
|
78
|
-
const uploadPromises = options.contents.map((content, index) => __awaiter(this, void 0, void 0, function* () {
|
|
79
|
-
const partNumber = index + 1;
|
|
80
|
-
const uploadPartRes = yield this._client.send(new client_s3_1.UploadPartCommand({
|
|
81
|
-
Bucket: this._bucketName,
|
|
82
|
-
Key: options.name,
|
|
83
|
-
UploadId: uploadId,
|
|
84
|
-
PartNumber: partNumber,
|
|
85
|
-
Body: Buffer.from(content)
|
|
86
|
-
}));
|
|
87
|
-
return {
|
|
88
|
-
PartNumber: partNumber,
|
|
89
|
-
ETag: uploadPartRes.ETag
|
|
90
|
-
};
|
|
91
|
-
}));
|
|
92
|
-
const uploadedParts = yield Promise.all(uploadPromises);
|
|
93
|
-
// Complete the multipart upload
|
|
94
|
-
const completeRes = yield this._client.send(new client_s3_1.CompleteMultipartUploadCommand({
|
|
95
|
-
Bucket: this._bucketName,
|
|
96
|
-
Key: options.name,
|
|
97
|
-
UploadId: uploadId,
|
|
98
|
-
MultipartUpload: {
|
|
99
|
-
Parts: uploadedParts
|
|
100
|
-
}
|
|
101
|
-
}));
|
|
102
|
-
(0, Affirm_1.default)(completeRes.$metadata.httpStatusCode === 200, `Failed to complete multipart upload for "${options.name}": status code ${completeRes.$metadata.httpStatusCode}`);
|
|
103
|
-
return { res: true, key: options.name, bucket: this._bucketName };
|
|
104
|
-
}
|
|
105
|
-
catch (error) {
|
|
106
|
-
// If anything fails, make sure to abort the multipart upload
|
|
107
|
-
if (error.UploadId) {
|
|
108
|
-
yield this._client.send(new client_s3_1.AbortMultipartUploadCommand({
|
|
109
|
-
Bucket: this._bucketName,
|
|
110
|
-
Key: options.name,
|
|
111
|
-
UploadId: error.UploadId
|
|
112
|
-
}));
|
|
113
|
-
}
|
|
114
|
-
throw error;
|
|
115
|
-
}
|
|
116
|
-
});
|
|
117
65
|
this.uploadStream = (options) => __awaiter(this, void 0, void 0, function* () {
|
|
118
66
|
(0, Affirm_1.default)(options, `Invalid upload options`);
|
|
119
67
|
const { dataset, name, recordProjection } = options;
|
|
@@ -282,7 +230,7 @@ class S3SourceDriver {
|
|
|
282
230
|
const headerLine = yield this.getFirstLineFromStream(firstFileStream);
|
|
283
231
|
dataset.setFirstLine(headerLine);
|
|
284
232
|
// For single file, no header validation needed
|
|
285
|
-
yield downloadLocally(fileKey,
|
|
233
|
+
yield downloadLocally(fileKey, headerLine);
|
|
286
234
|
return dataset;
|
|
287
235
|
}
|
|
288
236
|
});
|