@forzalabs/remora 0.0.48-nasco.3 → 0.0.49-nasco.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Constants.js CHANGED
@@ -1,7 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  const CONSTANTS = {
4
- cliVersion: '0.0.48-nasco',
4
+ cliVersion: '0.0.49-nasco',
5
5
  lambdaVersion: 1,
6
6
  port: 5069,
7
7
  defaults: {
@@ -24,9 +24,12 @@ const readline_1 = require("readline");
24
24
  const promises_1 = require("stream/promises");
25
25
  const fs_1 = require("fs");
26
26
  const Logger_1 = __importDefault(require("../helper/Logger"));
27
+ const Affirm_1 = __importDefault(require("../core/Affirm"));
27
28
  const DriverHelper = {
28
- appendToUnifiedFile: (stream, fileKey, destinationPath, append, headerLine, fileType) => __awaiter(void 0, void 0, void 0, function* () {
29
- const shouldValidateHeader = fileType === 'CSV' || fileType === 'TXT';
29
+ appendToUnifiedFile: (options) => __awaiter(void 0, void 0, void 0, function* () {
30
+ (0, Affirm_1.default)(options, 'Invalid options');
31
+ const { append, destinationPath, fileKey, headerLine, stream, fileType, hasHeaderRow } = options;
32
+ const shouldValidateHeader = fileType === 'CSV' || (fileType === 'TXT' && hasHeaderRow === true);
30
33
  let isFirstLine = true;
31
34
  let hasValidatedHeader = shouldValidateHeader ? false : true;
32
35
  const headerValidationTransform = new stream_1.Transform({
@@ -36,7 +39,6 @@ const DriverHelper = {
36
39
  const lines = chunkStr.split('\n');
37
40
  if (isFirstLine && lines.length > 0) {
38
41
  const firstLine = lines[0];
39
- // Validate header only for CSV and TXT files
40
42
  if (shouldValidateHeader && headerLine && headerLine.trim() !== '' && firstLine.trim() !== headerLine.trim()) {
41
43
  const msg = `Error creating unified dataset: file "${fileKey}" has a different header line than the other files in this dataset\n\t-${fileKey}: ${firstLine}\n\t-main: ${headerLine}`;
42
44
  Logger_1.default.log(msg);
@@ -83,12 +83,15 @@ class LocalSourceDriver {
83
83
  const { fileKey } = request;
84
84
  if (fileKey.includes('%')) {
85
85
  const allFileKeys = this.listFiles(fileKey);
86
- const promises = allFileKeys.map((x, i) => this._get(Object.assign(Object.assign({}, request), { fileKey: x }), i));
86
+ Logger_1.default.log(`Matched ${allFileKeys.length} files, copying to locally and creating unified dataset.`);
87
+ const firstPath = path_1.default.join(this._path, allFileKeys[0]);
88
+ const headerLine = (yield DriverHelper_1.default.quickReadFile(firstPath, 1))[0];
89
+ const promises = allFileKeys.map((x, i) => this._get(Object.assign(Object.assign({}, request), { fileKey: x }), headerLine, i));
87
90
  const results = yield Promise.all(promises);
88
91
  return results.flat();
89
92
  }
90
93
  else {
91
- return yield this._get(request);
94
+ return yield this._get(request, '');
92
95
  }
93
96
  });
94
97
  this.readLinesInRange = (request) => __awaiter(this, void 0, void 0, function* () {
@@ -102,12 +105,15 @@ class LocalSourceDriver {
102
105
  const { fileKey } = request;
103
106
  if (fileKey.includes('%')) {
104
107
  const allFileKeys = this.listFiles(fileKey);
105
- const promises = allFileKeys.map((x, i) => this._get(Object.assign(Object.assign({}, request), { fileKey: x }), i));
108
+ Logger_1.default.log(`Matched ${allFileKeys.length} files, copying to locally and creating unified dataset.`);
109
+ const firstPath = path_1.default.join(this._path, allFileKeys[0]);
110
+ const headerLine = (yield DriverHelper_1.default.quickReadFile(firstPath, 1))[0];
111
+ const promises = allFileKeys.map((x, i) => this._get(Object.assign(Object.assign({}, request), { fileKey: x }), headerLine, i));
106
112
  const results = yield Promise.all(promises);
107
113
  return results.flat();
108
114
  }
109
115
  else {
110
- return yield this._get(request);
116
+ return yield this._get(request, '');
111
117
  }
112
118
  });
113
119
  this.download = (dataset) => __awaiter(this, void 0, void 0, function* () {
@@ -122,12 +128,20 @@ class LocalSourceDriver {
122
128
  (0, Affirm_1.default)(fs.existsSync(sourceFilePath), `Source file does not exist: ${sourceFilePath}`);
123
129
  // Copy and validate header in a single stream pass
124
130
  const readStream = fs.createReadStream(sourceFilePath);
125
- return DriverHelper_1.default.appendToUnifiedFile(readStream, fileKey, dataset.getPath(), appendMode, headerLine, file.fileType);
131
+ return DriverHelper_1.default.appendToUnifiedFile({
132
+ stream: readStream,
133
+ fileKey,
134
+ destinationPath: dataset.getPath(),
135
+ append: appendMode,
136
+ headerLine,
137
+ fileType: file.fileType,
138
+ hasHeaderRow: file.hasHeaderRow
139
+ });
126
140
  });
127
141
  const { fileKey } = file;
128
142
  if (fileKey.includes('%')) {
129
143
  const allFileKeys = this.listFiles(fileKey);
130
- Logger_1.default.log(`Matched ${allFileKeys.length} files, copying to locally and creating unified dataset.`);
144
+ Logger_1.default.log(`Matched ${allFileKeys.length} files, copying locally and creating unified dataset.`);
131
145
  // Get header line from the first file
132
146
  const headerLine = (yield DriverHelper_1.default.quickReadFile(path_1.default.join(this._path, allFileKeys[0]), 1))[0];
133
147
  // Copy files sequentially to avoid file conflicts
@@ -222,7 +236,7 @@ class LocalSourceDriver {
222
236
  }
223
237
  return lines;
224
238
  });
225
- this._get = (request, index) => __awaiter(this, void 0, void 0, function* () {
239
+ this._get = (request, headerLine, index) => __awaiter(this, void 0, void 0, function* () {
226
240
  const { fileKey, fileType, options } = request;
227
241
  let lineFrom, lineTo, sheetName, hasHeaderRow;
228
242
  if (options) {
@@ -257,6 +271,12 @@ class LocalSourceDriver {
257
271
  lines = yield this._readXmlLines(fileUrl);
258
272
  break;
259
273
  }
274
+ const firstLine = lines[0];
275
+ if (headerLine && headerLine.trim() !== '' && firstLine.trim() !== headerLine.trim()) {
276
+ const msg = `Error creating unified dataset: file "${fileKey}" has a different header line than the other files in this dataset\n\t-${fileKey}: ${firstLine}\n\t-main: ${headerLine}`;
277
+ Logger_1.default.log(msg);
278
+ throw new Error(msg);
279
+ }
260
280
  // If this is not the first file read in a pattern match AND the file type has an header,
261
281
  // then I need to remove the header from the resulting lines or the header will be duplicated
262
282
  if (index > 0 && ParseHelper_1.default.shouldHaveHeader(fileType, hasHeaderRow)) {
@@ -31,6 +31,7 @@ const Helper_1 = __importDefault(require("../helper/Helper"));
31
31
  const ParseHelper_1 = __importDefault(require("../engines/parsing/ParseHelper"));
32
32
  const FileExporter_1 = __importDefault(require("../engines/file/FileExporter"));
33
33
  const DriverHelper_1 = __importDefault(require("./DriverHelper"));
34
+ const Logger_1 = __importDefault(require("../helper/Logger"));
34
35
  class S3DestinationDriver {
35
36
  constructor() {
36
37
  this.init = (source) => __awaiter(this, void 0, void 0, function* () {
@@ -235,11 +236,20 @@ class S3SourceDriver {
235
236
  const response = yield this._client.send(command);
236
237
  (0, Affirm_1.default)(response.Body, 'Failed to fetch object from S3');
237
238
  const stream = response.Body;
238
- return DriverHelper_1.default.appendToUnifiedFile(stream, fileUrl, dataset.getPath(), appendMode, headerLine, file.fileType);
239
+ return DriverHelper_1.default.appendToUnifiedFile({
240
+ stream,
241
+ fileKey: fileUrl,
242
+ destinationPath: dataset.getPath(),
243
+ append: appendMode,
244
+ headerLine,
245
+ fileType: file.fileType,
246
+ hasHeaderRow: file.hasHeaderRow
247
+ });
239
248
  });
240
249
  const { fileKey } = file;
241
250
  if (fileKey.includes('%')) {
242
251
  const allFileKeys = yield this.listFiles(fileKey);
252
+ Logger_1.default.log(`Matched ${allFileKeys.length} files, copying locally and creating unified dataset.`);
243
253
  // Get header line from the first file
244
254
  const firstFileCommand = new client_s3_1.GetObjectCommand({
245
255
  Bucket: this._bucketName,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "0.0.48-nasco.3",
3
+ "version": "0.0.49-nasco.3",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,