@forzalabs/remora 0.0.48-nasco.3 → 0.0.49-nasco.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +1 -1
- package/drivers/DriverHelper.js +5 -3
- package/drivers/LocalDriver.js +27 -7
- package/drivers/S3Driver.js +11 -1
- package/package.json +1 -1
package/Constants.js
CHANGED
package/drivers/DriverHelper.js
CHANGED
|
@@ -24,9 +24,12 @@ const readline_1 = require("readline");
|
|
|
24
24
|
const promises_1 = require("stream/promises");
|
|
25
25
|
const fs_1 = require("fs");
|
|
26
26
|
const Logger_1 = __importDefault(require("../helper/Logger"));
|
|
27
|
+
const Affirm_1 = __importDefault(require("../core/Affirm"));
|
|
27
28
|
const DriverHelper = {
|
|
28
|
-
appendToUnifiedFile: (
|
|
29
|
-
|
|
29
|
+
appendToUnifiedFile: (options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
30
|
+
(0, Affirm_1.default)(options, 'Invalid options');
|
|
31
|
+
const { append, destinationPath, fileKey, headerLine, stream, fileType, hasHeaderRow } = options;
|
|
32
|
+
const shouldValidateHeader = fileType === 'CSV' || (fileType === 'TXT' && hasHeaderRow === true);
|
|
30
33
|
let isFirstLine = true;
|
|
31
34
|
let hasValidatedHeader = shouldValidateHeader ? false : true;
|
|
32
35
|
const headerValidationTransform = new stream_1.Transform({
|
|
@@ -36,7 +39,6 @@ const DriverHelper = {
|
|
|
36
39
|
const lines = chunkStr.split('\n');
|
|
37
40
|
if (isFirstLine && lines.length > 0) {
|
|
38
41
|
const firstLine = lines[0];
|
|
39
|
-
// Validate header only for CSV and TXT files
|
|
40
42
|
if (shouldValidateHeader && headerLine && headerLine.trim() !== '' && firstLine.trim() !== headerLine.trim()) {
|
|
41
43
|
const msg = `Error creating unified dataset: file "${fileKey}" has a different header line than the other files in this dataset\n\t-${fileKey}: ${firstLine}\n\t-main: ${headerLine}`;
|
|
42
44
|
Logger_1.default.log(msg);
|
package/drivers/LocalDriver.js
CHANGED
|
@@ -83,12 +83,15 @@ class LocalSourceDriver {
|
|
|
83
83
|
const { fileKey } = request;
|
|
84
84
|
if (fileKey.includes('%')) {
|
|
85
85
|
const allFileKeys = this.listFiles(fileKey);
|
|
86
|
-
|
|
86
|
+
Logger_1.default.log(`Matched ${allFileKeys.length} files, copying to locally and creating unified dataset.`);
|
|
87
|
+
const firstPath = path_1.default.join(this._path, allFileKeys[0]);
|
|
88
|
+
const headerLine = (yield DriverHelper_1.default.quickReadFile(firstPath, 1))[0];
|
|
89
|
+
const promises = allFileKeys.map((x, i) => this._get(Object.assign(Object.assign({}, request), { fileKey: x }), headerLine, i));
|
|
87
90
|
const results = yield Promise.all(promises);
|
|
88
91
|
return results.flat();
|
|
89
92
|
}
|
|
90
93
|
else {
|
|
91
|
-
return yield this._get(request);
|
|
94
|
+
return yield this._get(request, '');
|
|
92
95
|
}
|
|
93
96
|
});
|
|
94
97
|
this.readLinesInRange = (request) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -102,12 +105,15 @@ class LocalSourceDriver {
|
|
|
102
105
|
const { fileKey } = request;
|
|
103
106
|
if (fileKey.includes('%')) {
|
|
104
107
|
const allFileKeys = this.listFiles(fileKey);
|
|
105
|
-
|
|
108
|
+
Logger_1.default.log(`Matched ${allFileKeys.length} files, copying to locally and creating unified dataset.`);
|
|
109
|
+
const firstPath = path_1.default.join(this._path, allFileKeys[0]);
|
|
110
|
+
const headerLine = (yield DriverHelper_1.default.quickReadFile(firstPath, 1))[0];
|
|
111
|
+
const promises = allFileKeys.map((x, i) => this._get(Object.assign(Object.assign({}, request), { fileKey: x }), headerLine, i));
|
|
106
112
|
const results = yield Promise.all(promises);
|
|
107
113
|
return results.flat();
|
|
108
114
|
}
|
|
109
115
|
else {
|
|
110
|
-
return yield this._get(request);
|
|
116
|
+
return yield this._get(request, '');
|
|
111
117
|
}
|
|
112
118
|
});
|
|
113
119
|
this.download = (dataset) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -122,12 +128,20 @@ class LocalSourceDriver {
|
|
|
122
128
|
(0, Affirm_1.default)(fs.existsSync(sourceFilePath), `Source file does not exist: ${sourceFilePath}`);
|
|
123
129
|
// Copy and validate header in a single stream pass
|
|
124
130
|
const readStream = fs.createReadStream(sourceFilePath);
|
|
125
|
-
return DriverHelper_1.default.appendToUnifiedFile(
|
|
131
|
+
return DriverHelper_1.default.appendToUnifiedFile({
|
|
132
|
+
stream: readStream,
|
|
133
|
+
fileKey,
|
|
134
|
+
destinationPath: dataset.getPath(),
|
|
135
|
+
append: appendMode,
|
|
136
|
+
headerLine,
|
|
137
|
+
fileType: file.fileType,
|
|
138
|
+
hasHeaderRow: file.hasHeaderRow
|
|
139
|
+
});
|
|
126
140
|
});
|
|
127
141
|
const { fileKey } = file;
|
|
128
142
|
if (fileKey.includes('%')) {
|
|
129
143
|
const allFileKeys = this.listFiles(fileKey);
|
|
130
|
-
Logger_1.default.log(`Matched ${allFileKeys.length} files, copying
|
|
144
|
+
Logger_1.default.log(`Matched ${allFileKeys.length} files, copying locally and creating unified dataset.`);
|
|
131
145
|
// Get header line from the first file
|
|
132
146
|
const headerLine = (yield DriverHelper_1.default.quickReadFile(path_1.default.join(this._path, allFileKeys[0]), 1))[0];
|
|
133
147
|
// Copy files sequentially to avoid file conflicts
|
|
@@ -222,7 +236,7 @@ class LocalSourceDriver {
|
|
|
222
236
|
}
|
|
223
237
|
return lines;
|
|
224
238
|
});
|
|
225
|
-
this._get = (request, index) => __awaiter(this, void 0, void 0, function* () {
|
|
239
|
+
this._get = (request, headerLine, index) => __awaiter(this, void 0, void 0, function* () {
|
|
226
240
|
const { fileKey, fileType, options } = request;
|
|
227
241
|
let lineFrom, lineTo, sheetName, hasHeaderRow;
|
|
228
242
|
if (options) {
|
|
@@ -257,6 +271,12 @@ class LocalSourceDriver {
|
|
|
257
271
|
lines = yield this._readXmlLines(fileUrl);
|
|
258
272
|
break;
|
|
259
273
|
}
|
|
274
|
+
const firstLine = lines[0];
|
|
275
|
+
if (headerLine && headerLine.trim() !== '' && firstLine.trim() !== headerLine.trim()) {
|
|
276
|
+
const msg = `Error creating unified dataset: file "${fileKey}" has a different header line than the other files in this dataset\n\t-${fileKey}: ${firstLine}\n\t-main: ${headerLine}`;
|
|
277
|
+
Logger_1.default.log(msg);
|
|
278
|
+
throw new Error(msg);
|
|
279
|
+
}
|
|
260
280
|
// If this is not the first file read in a pattern match AND the file type has an header,
|
|
261
281
|
// then I need to remove the header from the resulting lines or the header will be duplicated
|
|
262
282
|
if (index > 0 && ParseHelper_1.default.shouldHaveHeader(fileType, hasHeaderRow)) {
|
package/drivers/S3Driver.js
CHANGED
|
@@ -31,6 +31,7 @@ const Helper_1 = __importDefault(require("../helper/Helper"));
|
|
|
31
31
|
const ParseHelper_1 = __importDefault(require("../engines/parsing/ParseHelper"));
|
|
32
32
|
const FileExporter_1 = __importDefault(require("../engines/file/FileExporter"));
|
|
33
33
|
const DriverHelper_1 = __importDefault(require("./DriverHelper"));
|
|
34
|
+
const Logger_1 = __importDefault(require("../helper/Logger"));
|
|
34
35
|
class S3DestinationDriver {
|
|
35
36
|
constructor() {
|
|
36
37
|
this.init = (source) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -235,11 +236,20 @@ class S3SourceDriver {
|
|
|
235
236
|
const response = yield this._client.send(command);
|
|
236
237
|
(0, Affirm_1.default)(response.Body, 'Failed to fetch object from S3');
|
|
237
238
|
const stream = response.Body;
|
|
238
|
-
return DriverHelper_1.default.appendToUnifiedFile(
|
|
239
|
+
return DriverHelper_1.default.appendToUnifiedFile({
|
|
240
|
+
stream,
|
|
241
|
+
fileKey: fileUrl,
|
|
242
|
+
destinationPath: dataset.getPath(),
|
|
243
|
+
append: appendMode,
|
|
244
|
+
headerLine,
|
|
245
|
+
fileType: file.fileType,
|
|
246
|
+
hasHeaderRow: file.hasHeaderRow
|
|
247
|
+
});
|
|
239
248
|
});
|
|
240
249
|
const { fileKey } = file;
|
|
241
250
|
if (fileKey.includes('%')) {
|
|
242
251
|
const allFileKeys = yield this.listFiles(fileKey);
|
|
252
|
+
Logger_1.default.log(`Matched ${allFileKeys.length} files, copying locally and creating unified dataset.`);
|
|
243
253
|
// Get header line from the first file
|
|
244
254
|
const firstFileCommand = new client_s3_1.GetObjectCommand({
|
|
245
255
|
Bucket: this._bucketName,
|