@forzalabs/remora 0.0.46-nasco.3 → 0.0.48-nasco.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Constants.js CHANGED
@@ -1,7 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  const CONSTANTS = {
4
- cliVersion: '0.0.46-nasco',
4
+ cliVersion: '0.0.48-nasco',
5
5
  lambdaVersion: 1,
6
6
  port: 5069,
7
7
  defaults: {
@@ -177,6 +177,10 @@
177
177
  "hidden": {
178
178
  "type": "boolean",
179
179
  "description": "If set, the field is kept and used during processing, but omitted when exporting the data"
180
+ },
181
+ "fixed": {
182
+ "type": "boolean",
183
+ "description": "If set, \"default\" must have a value. This field is not searched in the underlying dataset, but is a fixed value set by the \"default\" prop."
180
184
  }
181
185
  },
182
186
  "required": [
@@ -436,6 +440,10 @@
436
440
  "hidden": {
437
441
  "type": "boolean",
438
442
  "description": "If set, the field is kept and used during processing, but omitted when exporting the data"
443
+ },
444
+ "fixed": {
445
+ "type": "boolean",
446
+ "description": "If set, \"default\" must have a value. This field is not searched in the underlying dataset, but is a fixed value set by the \"default\" prop."
439
447
  }
440
448
  },
441
449
  "required": [
@@ -0,0 +1,86 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ var __asyncValues = (this && this.__asyncValues) || function (o) {
12
+ if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
13
+ var m = o[Symbol.asyncIterator], i;
14
+ return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
15
+ function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
16
+ function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
17
+ };
18
+ var __importDefault = (this && this.__importDefault) || function (mod) {
19
+ return (mod && mod.__esModule) ? mod : { "default": mod };
20
+ };
21
+ Object.defineProperty(exports, "__esModule", { value: true });
22
+ const stream_1 = require("stream");
23
+ const readline_1 = require("readline");
24
+ const promises_1 = require("stream/promises");
25
+ const fs_1 = require("fs");
26
+ const Logger_1 = __importDefault(require("../helper/Logger"));
27
+ const DriverHelper = {
28
+ appendToUnifiedFile: (stream, fileKey, destinationPath, append, headerLine, fileType) => __awaiter(void 0, void 0, void 0, function* () {
29
+ const shouldValidateHeader = fileType === 'CSV' || fileType === 'TXT';
30
+ let isFirstLine = true;
31
+ let hasValidatedHeader = shouldValidateHeader ? false : true;
32
+ const headerValidationTransform = new stream_1.Transform({
33
+ transform(chunk, encoding, callback) {
34
+ if (!hasValidatedHeader) {
35
+ const chunkStr = chunk.toString();
36
+ const lines = chunkStr.split('\n');
37
+ if (isFirstLine && lines.length > 0) {
38
+ const firstLine = lines[0];
39
+ // Validate header only for CSV and TXT files
40
+ if (shouldValidateHeader && headerLine && headerLine.trim() !== '' && firstLine.trim() !== headerLine.trim()) {
41
+ const msg = `Error creating unified dataset: file "${fileKey}" has a different header line than the other files in this dataset\n\t-${fileKey}: ${firstLine}\n\t-main: ${headerLine}`;
42
+ Logger_1.default.log(msg);
43
+ return callback(new Error(msg));
44
+ }
45
+ hasValidatedHeader = true;
46
+ isFirstLine = false;
47
+ }
48
+ }
49
+ callback(null, chunk);
50
+ }
51
+ });
52
+ const writeOptions = append ? { flags: 'a' } : {};
53
+ const writeStream = (0, fs_1.createWriteStream)(destinationPath, writeOptions);
54
+ yield (0, promises_1.pipeline)(stream, headerValidationTransform, writeStream);
55
+ }),
56
+ quickReadFile: (filePath, lineCount) => __awaiter(void 0, void 0, void 0, function* () {
57
+ var _a, e_1, _b, _c;
58
+ const fileStream = (0, fs_1.createReadStream)(filePath);
59
+ const rl = (0, readline_1.createInterface)({ input: fileStream, crlfDelay: Infinity });
60
+ const lines = [];
61
+ let counter = 0;
62
+ try {
63
+ for (var _d = true, rl_1 = __asyncValues(rl), rl_1_1; rl_1_1 = yield rl_1.next(), _a = rl_1_1.done, !_a; _d = true) {
64
+ _c = rl_1_1.value;
65
+ _d = false;
66
+ const line = _c;
67
+ lines.push(line);
68
+ counter++;
69
+ if (counter >= lineCount) {
70
+ break;
71
+ }
72
+ }
73
+ }
74
+ catch (e_1_1) { e_1 = { error: e_1_1 }; }
75
+ finally {
76
+ try {
77
+ if (!_d && !_a && (_b = rl_1.return)) yield _b.call(rl_1);
78
+ }
79
+ finally { if (e_1) throw e_1.error; }
80
+ }
81
+ rl.close();
82
+ fileStream.close();
83
+ return lines;
84
+ })
85
+ };
86
+ exports.default = DriverHelper;
@@ -63,6 +63,8 @@ const XMLParser_1 = __importDefault(require("../engines/parsing/XMLParser")); //
63
63
  const Helper_1 = __importDefault(require("../helper/Helper"));
64
64
  const ParseHelper_1 = __importDefault(require("../engines/parsing/ParseHelper"));
65
65
  const FileExporter_1 = __importDefault(require("../engines/file/FileExporter"));
66
+ const Logger_1 = __importDefault(require("../helper/Logger"));
67
+ const DriverHelper_1 = __importDefault(require("./DriverHelper"));
66
68
  class LocalSourceDriver {
67
69
  constructor() {
68
70
  this.init = (source) => __awaiter(this, void 0, void 0, function* () {
@@ -115,30 +117,28 @@ class LocalSourceDriver {
115
117
  (0, Affirm_1.default)(file, 'Invalid dataset file');
116
118
  (0, Affirm_1.default)(file.fileKey, 'Invalid file key');
117
119
  (0, Affirm_1.default)(file.fileType, `Invalid file type`);
118
- const copyLocally = (fileKey_1, ...args_1) => __awaiter(this, [fileKey_1, ...args_1], void 0, function* (fileKey, appendMode = false) {
120
+ const copyLocally = (fileKey_1, headerLine_1, ...args_1) => __awaiter(this, [fileKey_1, headerLine_1, ...args_1], void 0, function* (fileKey, headerLine, appendMode = false) {
119
121
  const sourceFilePath = path_1.default.join(this._path, fileKey);
120
122
  (0, Affirm_1.default)(fs.existsSync(sourceFilePath), `Source file does not exist: ${sourceFilePath}`);
121
- const writeOptions = appendMode ? { flags: 'a' } : {};
123
+ // Copy and validate header in a single stream pass
122
124
  const readStream = fs.createReadStream(sourceFilePath);
123
- const writeStream = fs.createWriteStream(dataset.getPath(), writeOptions);
124
- return new Promise((resolve, reject) => {
125
- readStream.pipe(writeStream);
126
- writeStream.on('finish', resolve);
127
- writeStream.on('error', reject);
128
- readStream.on('error', reject);
129
- });
125
+ return DriverHelper_1.default.appendToUnifiedFile(readStream, fileKey, dataset.getPath(), appendMode, headerLine, file.fileType);
130
126
  });
131
127
  const { fileKey } = file;
132
128
  if (fileKey.includes('%')) {
133
129
  const allFileKeys = this.listFiles(fileKey);
130
+ Logger_1.default.log(`Matched ${allFileKeys.length} files, copying to locally and creating unified dataset.`);
131
+ // Get header line from the first file
132
+ const headerLine = (yield DriverHelper_1.default.quickReadFile(path_1.default.join(this._path, allFileKeys[0]), 1))[0];
134
133
  // Copy files sequentially to avoid file conflicts
135
134
  for (let i = 0; i < allFileKeys.length; i++) {
136
- yield copyLocally(allFileKeys[i], i > 0); // Append mode for subsequent files
135
+ yield copyLocally(allFileKeys[i], headerLine, i > 0); // Append mode for subsequent files
137
136
  }
138
137
  return dataset;
139
138
  }
140
139
  else {
141
- yield copyLocally(fileKey);
140
+ // For single file, no header validation needed
141
+ yield copyLocally(fileKey, '', false);
142
142
  return dataset;
143
143
  }
144
144
  });
@@ -29,9 +29,8 @@ const xlsx_1 = __importDefault(require("xlsx"));
29
29
  const XMLParser_1 = __importDefault(require("../engines/parsing/XMLParser")); // Added XMLParser import
30
30
  const Helper_1 = __importDefault(require("../helper/Helper"));
31
31
  const ParseHelper_1 = __importDefault(require("../engines/parsing/ParseHelper"));
32
- const promises_1 = require("stream/promises");
33
- const fs_1 = require("fs");
34
32
  const FileExporter_1 = __importDefault(require("../engines/file/FileExporter"));
33
+ const DriverHelper_1 = __importDefault(require("./DriverHelper"));
35
34
  class S3DestinationDriver {
36
35
  constructor() {
37
36
  this.init = (source) => __awaiter(this, void 0, void 0, function* () {
@@ -227,7 +226,8 @@ class S3SourceDriver {
227
226
  (0, Affirm_1.default)(file, 'Invalid dataset file');
228
227
  (0, Affirm_1.default)(file.fileKey, 'Invalid file key');
229
228
  (0, Affirm_1.default)(file.fileType, `Invalid file type`);
230
- const downloadLocally = (fileUrl_1, ...args_1) => __awaiter(this, [fileUrl_1, ...args_1], void 0, function* (fileUrl, appendMode = false) {
229
+ const downloadLocally = (fileUrl_1, headerLine_1, ...args_1) => __awaiter(this, [fileUrl_1, headerLine_1, ...args_1], void 0, function* (fileUrl, headerLine, appendMode = false) {
230
+ // Download and validate header in a single stream pass
231
231
  const command = new client_s3_1.GetObjectCommand({
232
232
  Bucket: this._bucketName,
233
233
  Key: fileUrl
@@ -235,23 +235,55 @@ class S3SourceDriver {
235
235
  const response = yield this._client.send(command);
236
236
  (0, Affirm_1.default)(response.Body, 'Failed to fetch object from S3');
237
237
  const stream = response.Body;
238
- const writeOptions = appendMode ? { flags: 'a' } : {};
239
- yield (0, promises_1.pipeline)(stream, (0, fs_1.createWriteStream)(dataset.getPath(), writeOptions));
238
+ return DriverHelper_1.default.appendToUnifiedFile(stream, fileUrl, dataset.getPath(), appendMode, headerLine, file.fileType);
240
239
  });
241
240
  const { fileKey } = file;
242
241
  if (fileKey.includes('%')) {
243
242
  const allFileKeys = yield this.listFiles(fileKey);
243
+ // Get header line from the first file
244
+ const firstFileCommand = new client_s3_1.GetObjectCommand({
245
+ Bucket: this._bucketName,
246
+ Key: allFileKeys[0]
247
+ });
248
+ const firstFileResponse = yield this._client.send(firstFileCommand);
249
+ (0, Affirm_1.default)(firstFileResponse.Body, 'Failed to fetch first file from S3');
250
+ const firstFileStream = firstFileResponse.Body;
251
+ const headerLine = yield this.getFirstLineFromStream(firstFileStream);
244
252
  // Download files sequentially to avoid file conflicts
245
253
  for (let i = 0; i < allFileKeys.length; i++) {
246
- yield downloadLocally(allFileKeys[i], i > 0); // Append mode for subsequent files
254
+ yield downloadLocally(allFileKeys[i], headerLine, i > 0); // Append mode for subsequent files
247
255
  }
248
256
  return dataset;
249
257
  }
250
258
  else {
251
- yield downloadLocally(fileKey);
259
+ // For single file, no header validation needed
260
+ yield downloadLocally(fileKey, '');
252
261
  return dataset;
253
262
  }
254
263
  });
264
+ this.getFirstLineFromStream = (stream) => __awaiter(this, void 0, void 0, function* () {
265
+ var _a, e_1, _b, _c;
266
+ const rl = readline_1.default.createInterface({ input: stream, crlfDelay: Infinity });
267
+ let firstLine = '';
268
+ try {
269
+ for (var _d = true, rl_1 = __asyncValues(rl), rl_1_1; rl_1_1 = yield rl_1.next(), _a = rl_1_1.done, !_a; _d = true) {
270
+ _c = rl_1_1.value;
271
+ _d = false;
272
+ const line = _c;
273
+ firstLine = line;
274
+ break;
275
+ }
276
+ }
277
+ catch (e_1_1) { e_1 = { error: e_1_1 }; }
278
+ finally {
279
+ try {
280
+ if (!_d && !_a && (_b = rl_1.return)) yield _b.call(rl_1);
281
+ }
282
+ finally { if (e_1) throw e_1.error; }
283
+ }
284
+ rl.close();
285
+ return firstLine;
286
+ });
255
287
  this.exist = (producer) => __awaiter(this, void 0, void 0, function* () {
256
288
  var _a;
257
289
  (0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "connect()" first');
@@ -276,7 +308,7 @@ class S3SourceDriver {
276
308
  }
277
309
  });
278
310
  this._readLines = (stream, lineFrom, lineTo) => __awaiter(this, void 0, void 0, function* () {
279
- var _a, e_1, _b, _c;
311
+ var _a, e_2, _b, _c;
280
312
  const reader = readline_1.default.createInterface({ input: stream, crlfDelay: Infinity });
281
313
  const lines = [];
282
314
  let lineCounter = 0;
@@ -300,19 +332,19 @@ class S3SourceDriver {
300
332
  }
301
333
  }
302
334
  }
303
- catch (e_1_1) { e_1 = { error: e_1_1 }; }
335
+ catch (e_2_1) { e_2 = { error: e_2_1 }; }
304
336
  finally {
305
337
  try {
306
338
  if (!_d && !_a && (_b = reader_1.return)) yield _b.call(reader_1);
307
339
  }
308
- finally { if (e_1) throw e_1.error; }
340
+ finally { if (e_2) throw e_2.error; }
309
341
  }
310
342
  reader.close();
311
343
  return lines;
312
344
  });
313
345
  this._readExcelLines = (stream, sheetName, lineFrom, lineTo) => __awaiter(this, void 0, void 0, function* () {
314
346
  var _a, stream_1, stream_1_1;
315
- var _b, e_2, _c, _d;
347
+ var _b, e_3, _c, _d;
316
348
  (0, Affirm_1.default)(sheetName, `Invalid sheetname`);
317
349
  const chunks = [];
318
350
  try {
@@ -323,12 +355,12 @@ class S3SourceDriver {
323
355
  chunks.push(chunk);
324
356
  }
325
357
  }
326
- catch (e_2_1) { e_2 = { error: e_2_1 }; }
358
+ catch (e_3_1) { e_3 = { error: e_3_1 }; }
327
359
  finally {
328
360
  try {
329
361
  if (!_a && !_b && (_c = stream_1.return)) yield _c.call(stream_1);
330
362
  }
331
- finally { if (e_2) throw e_2.error; }
363
+ finally { if (e_3) throw e_3.error; }
332
364
  }
333
365
  const buffer = Buffer.concat(chunks);
334
366
  const excel = xlsx_1.default.read(buffer, { type: 'buffer' });
@@ -343,7 +375,7 @@ class S3SourceDriver {
343
375
  });
344
376
  this._readXmlLines = (stream, lineFrom, lineTo) => __awaiter(this, void 0, void 0, function* () {
345
377
  var _a, stream_2, stream_2_1;
346
- var _b, e_3, _c, _d;
378
+ var _b, e_4, _c, _d;
347
379
  const chunks = [];
348
380
  try {
349
381
  for (_a = true, stream_2 = __asyncValues(stream); stream_2_1 = yield stream_2.next(), _b = stream_2_1.done, !_b; _a = true) {
@@ -353,12 +385,12 @@ class S3SourceDriver {
353
385
  chunks.push(chunk);
354
386
  }
355
387
  }
356
- catch (e_3_1) { e_3 = { error: e_3_1 }; }
388
+ catch (e_4_1) { e_4 = { error: e_4_1 }; }
357
389
  finally {
358
390
  try {
359
391
  if (!_a && !_b && (_c = stream_2.return)) yield _c.call(stream_2);
360
392
  }
361
- finally { if (e_3) throw e_3.error; }
393
+ finally { if (e_4) throw e_4.error; }
362
394
  }
363
395
  const buffer = Buffer.concat(chunks);
364
396
  const jsonData = XMLParser_1.default.xmlToJson(buffer);
@@ -138,7 +138,7 @@ class ConsumerManagerClass {
138
138
  if (!column) {
139
139
  // If the consumer doesn't find the field in the producer but has a default value AND set_default onError
140
140
  // then instead of failing, create a placeholder column for the producer
141
- if (field.onError === 'set_default' && Algo_1.default.hasVal(field.default)) {
141
+ if (field.fixed === true && Algo_1.default.hasVal(field.default)) {
142
142
  column = {
143
143
  aliasInProducer: field.key,
144
144
  nameInProducer: (_a = field.alias) !== null && _a !== void 0 ? _a : field.key,
@@ -54,16 +54,8 @@ class PostProcessorClass {
54
54
  return newDataset;
55
55
  });
56
56
  this.updateDimensions = (dataset, consumer) => {
57
- const dimensions = dataset.getDimensions();
58
57
  const fields = ConsumerManager_1.default.getExpandedFields(consumer);
59
- for (const dim of dimensions) {
60
- // This dimension is wanted by the consumer, check if it needs renaming
61
- const consumerField = fields.find(x => x.cField.key === dim.name);
62
- if (consumerField) {
63
- const { cField: { key, alias, hidden } } = consumerField;
64
- dataset.updateDimension(key, { name: alias, hidden: hidden });
65
- }
66
- }
58
+ dataset.updateDimensions(fields);
67
59
  return dataset;
68
60
  };
69
61
  this.dropDimensions = (dataset, consumer) => __awaiter(this, void 0, void 0, function* () {
@@ -237,7 +229,9 @@ class PostProcessorClass {
237
229
  });
238
230
  this._getFieldValue = (record, field) => {
239
231
  var _a, _b, _c;
240
- const { key, alias } = field.cField;
232
+ const { key, alias, fixed, default: defaultValue } = field.cField;
233
+ if (fixed && Algo_1.default.hasVal(defaultValue))
234
+ return defaultValue;
241
235
  const fieldKey = alias !== null && alias !== void 0 ? alias : key;
242
236
  const fieldValue = record.getValue(fieldKey);
243
237
  if (Algo_1.default.hasVal(fieldValue) && !isNaN(fieldValue)) {
@@ -31,6 +31,7 @@ const Affirm_1 = __importDefault(require("../../core/Affirm"));
31
31
  const XMLParser_1 = __importDefault(require("../parsing/XMLParser"));
32
32
  const DriverFactory_1 = __importDefault(require("../../drivers/DriverFactory"));
33
33
  const Helper_1 = __importDefault(require("../../helper/Helper"));
34
+ const Algo_1 = __importDefault(require("../../core/Algo"));
34
35
  class Dataset {
35
36
  constructor(name, file, batchSize = Constants_1.default.defaults.MAX_ITEMS_IN_MEMORY) {
36
37
  this.getPath = () => this._path;
@@ -678,16 +679,28 @@ class Dataset {
678
679
  return this;
679
680
  });
680
681
  this.getDimensions = () => this._dimensions;
681
- this.updateDimension = (dimensionName, newValues) => {
682
- const dimension = this._dimensions.find(x => x.name === dimensionName);
683
- (0, Affirm_1.default)(dimension, `Trying to update the dataset dimension "${dimensionName}", but none was found (${this._dimensions.map(x => x.name).join(', ')})`);
684
- this._startOperation('update-dimension');
685
- const { hidden, name } = newValues;
686
- if (name && name.length > 0)
687
- dimension.name = name;
688
- if (hidden)
689
- dimension.hidden = hidden;
690
- this._finishOperation('update-dimension');
682
+ this.updateDimensions = (fields) => {
683
+ this._startOperation('update-dimensions');
684
+ for (const field of fields) {
685
+ const { cField: { key, alias, hidden, fixed, default: defaultValue } } = field;
686
+ const currentDim = this._dimensions.find(x => x.name === key);
687
+ if (currentDim) {
688
+ currentDim.name = alias !== null && alias !== void 0 ? alias : key;
689
+ currentDim.hidden = hidden;
690
+ }
691
+ else if (fixed && Algo_1.default.hasVal(defaultValue)) {
692
+ this._dimensions.push({
693
+ hidden: hidden,
694
+ index: this._dimensions.length,
695
+ key: key,
696
+ name: alias !== null && alias !== void 0 ? alias : key
697
+ });
698
+ }
699
+ else {
700
+ throw new Error(`Trying to update the dataset dimension "${(alias !== null && alias !== void 0 ? alias : key)}", but none was found (${this._dimensions.map(x => x.name).join(', ')})`);
701
+ }
702
+ }
703
+ this._finishOperation('update-dimensions');
691
704
  return this;
692
705
  };
693
706
  this.dropDimensions = (dimensionNames) => __awaiter(this, void 0, void 0, function* () {
@@ -85,6 +85,8 @@ class ExecutionPlannerClas {
85
85
  plan.push(...this._planProducer(producers[0], options));
86
86
  else
87
87
  plan.push(...(producers.flatMap(x => this._planProducer(x, options))));
88
+ // I technically don't need this, but I keep it to merge all the datasets to a single one
89
+ // so the other steps of the plan can work with a single dataset variable
88
90
  plan.push({ type: 'join-producers-data' });
89
91
  if (consumer.filters && consumer.filters.length > 0)
90
92
  plan.push({ type: 'apply-consumer-filters-on-JSON' });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "0.0.46-nasco.3",
3
+ "version": "0.0.48-nasco.3",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,