@forzalabs/remora 0.2.6 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/Constants.js +10 -2
  2. package/actions/debug.js +1 -0
  3. package/actions/deploy.js +1 -0
  4. package/actions/run.js +17 -13
  5. package/actions/sample.js +1 -1
  6. package/core/Algo.js +8 -4
  7. package/definitions/ExecutorDefinitions.js +2 -0
  8. package/definitions/json_schemas/consumer-schema.json +1 -1
  9. package/definitions/json_schemas/producer-schema.json +1 -1
  10. package/definitions/temp.js +2 -0
  11. package/drivers/DeltaShareDriver.js +4 -0
  12. package/drivers/DriverFactory.js +10 -10
  13. package/drivers/DriverHelper.js +33 -10
  14. package/drivers/HttpApiDriver.js +4 -0
  15. package/drivers/LocalDriver.js +73 -6
  16. package/drivers/RedshiftDriver.js +4 -0
  17. package/drivers/S3Driver.js +36 -52
  18. package/drivers/files/LocalDestinationDriver.js +200 -0
  19. package/drivers/files/LocalSourceDriver.js +394 -0
  20. package/drivers/s3/S3DestinationDriver.js +159 -0
  21. package/drivers/s3/S3SourceDriver.js +455 -0
  22. package/engines/ai/LLM.js +0 -11
  23. package/engines/consumer/ConsumerEngine.js +0 -77
  24. package/engines/consumer/ConsumerManager.js +61 -36
  25. package/engines/consumer/ConsumerOnFinishManager.js +14 -0
  26. package/engines/consumer/PostProcessor.js +1 -7
  27. package/engines/dataset/Dataset.js +0 -61
  28. package/engines/dataset/DatasetManager.js +16 -76
  29. package/engines/dataset/DatasetRecord.js +4 -3
  30. package/engines/deployment/DeploymentPlanner.js +0 -7
  31. package/engines/execution/ExecutionPlanner.js +2 -2
  32. package/engines/execution/RequestExecutor.js +4 -45
  33. package/engines/file/FileExporter.js +7 -32
  34. package/engines/parsing/CSVParser.js +27 -26
  35. package/engines/parsing/LineParser.js +52 -0
  36. package/engines/parsing/XMLParser.js +1 -1
  37. package/engines/producer/ProducerEngine.js +0 -45
  38. package/engines/scheduler/CronScheduler.js +12 -4
  39. package/engines/scheduler/QueueManager.js +11 -4
  40. package/engines/sql/SQLCompiler.js +4 -4
  41. package/engines/transform/JoinEngine.js +3 -3
  42. package/engines/transform/TransformationEngine.js +3 -89
  43. package/engines/usage/UsageManager.js +8 -6
  44. package/engines/validation/Validator.js +12 -18
  45. package/executors/ConsumerExecutor.js +152 -0
  46. package/executors/Executor.js +168 -0
  47. package/executors/ExecutorOrchestrator.js +315 -0
  48. package/executors/ExecutorPerformance.js +17 -0
  49. package/executors/ExecutorProgress.js +52 -0
  50. package/executors/OutputExecutor.js +118 -0
  51. package/executors/ProducerExecutor.js +108 -0
  52. package/package.json +3 -3
  53. package/workers/ExecutorWorker.js +48 -0
package/Constants.js CHANGED
@@ -1,7 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  const CONSTANTS = {
4
- cliVersion: '0.2.6',
4
+ cliVersion: '1.0.0',
5
5
  backendVersion: 1,
6
6
  backendPort: 5088,
7
7
  workerVersion: 2,
@@ -19,7 +19,15 @@ const CONSTANTS = {
19
19
  MIN_RUNTIME_HEAP_MB: 4000,
20
20
  RECOMMENDED_RUNTIME_HEAP_MB: 8000,
21
21
  INDICATIVE_THREAD_LINE_COUNT: 750000,
22
- MAX_THREAD_COUNT: 8
22
+ MAX_THREAD_COUNT: 8,
23
+ /**
24
+ * Minimum file size to consider parallel processing (10 MB)
25
+ */
26
+ MIN_FILE_SIZE_FOR_PARALLEL: 10 * 1024 * 1024,
27
+ /**
28
+ * Minimum chunk size per worker to justify overhead (2 MB)
29
+ */
30
+ MIN_CHUNK_SIZE: 2 * 1024 * 1024
23
31
  }
24
32
  };
25
33
  exports.default = CONSTANTS;
package/actions/debug.js CHANGED
@@ -19,6 +19,7 @@ const DriverFactory_1 = __importDefault(require("../drivers/DriverFactory"));
19
19
  const Environment_1 = __importDefault(require("../engines/Environment"));
20
20
  const compile_1 = require("./compile");
21
21
  const debug = (options) => __awaiter(void 0, void 0, void 0, function* () {
22
+ void options;
22
23
  try {
23
24
  (0, compile_1.compile)();
24
25
  console.log('\n');
package/actions/deploy.js CHANGED
@@ -54,6 +54,7 @@ const deploy = (options) => __awaiter(void 0, void 0, void 0, function* () {
54
54
  const version = Constants_1.default.workerVersion;
55
55
  const workerAPI = `${host}/cli/v${version}/uploaddeployment`;
56
56
  const formData = new FormData();
57
+ // @ts-ignore
57
58
  const blob = new Blob([zipBuffer], { type: 'application/zip' });
58
59
  formData.append('remora_config', blob, 'temp_deployment.zip'); // Updated to match the actual file name
59
60
  const apiKey = process.env.REMORA_LICENCE_KEY;
package/actions/run.js CHANGED
@@ -14,17 +14,15 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
14
14
  Object.defineProperty(exports, "__esModule", { value: true });
15
15
  exports.run = void 0;
16
16
  const chalk_1 = __importDefault(require("chalk"));
17
- const ora_1 = __importDefault(require("ora"));
18
17
  const Environment_1 = __importDefault(require("../engines/Environment"));
19
- const ConsumerEngine_1 = __importDefault(require("../engines/consumer/ConsumerEngine"));
20
18
  const compile_1 = require("./compile");
21
19
  const Helper_1 = __importDefault(require("../helper/Helper"));
22
20
  const LicenceManager_1 = __importDefault(require("../licencing/LicenceManager"));
21
+ const ExecutorOrchestrator_1 = __importDefault(require("../executors/ExecutorOrchestrator"));
23
22
  const run = (consumerName, options) => __awaiter(void 0, void 0, void 0, function* () {
24
23
  try {
25
24
  (0, compile_1.compile)();
26
- console.log(); // needed for newline
27
- const spinner = (0, ora_1.default)(chalk_1.default.blue('Running consumer(s)...\n')).start();
25
+ console.log(chalk_1.default.blue('Running consumer(s)...\n')); // needed for newline
28
26
  const consumersToExecute = [];
29
27
  if (consumerName && consumerName.length > 0) {
30
28
  const cons = Environment_1.default.getConsumer(consumerName);
@@ -52,7 +50,14 @@ const run = (consumerName, options) => __awaiter(void 0, void 0, void 0, functio
52
50
  console.error(`Invalid Remora licence key, the product is not active: remember to set "REMORA_LICENCE_KEY" environment variable.`);
53
51
  process.exit(1);
54
52
  }
55
- const response = yield ConsumerEngine_1.default.execute(consumer, {}, { _id: check.customer, name: check.customer, type: 'licence' }, { invokedBy: 'CLI' });
53
+ const response = yield ExecutorOrchestrator_1.default.launch({
54
+ consumer,
55
+ details: {
56
+ invokedBy: 'CLI',
57
+ user: { _id: check.customer, name: check.customer, type: 'licence' }
58
+ },
59
+ logProgress: true
60
+ });
56
61
  results.push({ success: true, consumer, response });
57
62
  }
58
63
  catch (error) {
@@ -62,15 +67,14 @@ const run = (consumerName, options) => __awaiter(void 0, void 0, void 0, functio
62
67
  console.log(myErr.stack);
63
68
  }
64
69
  }
65
- spinner.succeed('All consumers have been executed:');
66
70
  results.forEach(({ response, consumer, success, error }) => {
67
71
  if (success) {
68
- const { _stats: { size, elapsedMS } } = response;
69
- const rowCount = size;
72
+ const { elapsedMS, outputCount, resultUri } = response;
73
+ const rowCount = outputCount;
70
74
  const duration = Helper_1.default.formatDuration(elapsedMS);
71
75
  const performanceInfo = chalk_1.default.gray(` (${rowCount} rows, ${duration})`);
72
- if (response.fileUri)
73
- console.log(chalk_1.default.green(`• Consumer ${consumer.name} -> ${response.fileUri}`) + performanceInfo);
76
+ if (resultUri)
77
+ console.log(chalk_1.default.green(`• Consumer ${consumer.name} -> ${resultUri}`) + performanceInfo);
74
78
  else
75
79
  console.log(chalk_1.default.green(`• Consumer ${consumer.name} `) + performanceInfo);
76
80
  }
@@ -82,11 +86,11 @@ const run = (consumerName, options) => __awaiter(void 0, void 0, void 0, functio
82
86
  const successfulResults = results.filter(x => x.success);
83
87
  const totalRows = successfulResults.reduce((sum, result) => {
84
88
  var _a, _b;
85
- return sum + ((_b = (_a = result.response) === null || _a === void 0 ? void 0 : _a._stats.size) !== null && _b !== void 0 ? _b : 0);
89
+ return sum + ((_b = (_a = result.response) === null || _a === void 0 ? void 0 : _a.outputCount) !== null && _b !== void 0 ? _b : 0);
86
90
  }, 0);
87
91
  const totalDuration = successfulResults.reduce((sum, result) => {
88
- var _a, _b;
89
- return sum + (((_b = (_a = result.response) === null || _a === void 0 ? void 0 : _a._stats) === null || _b === void 0 ? void 0 : _b.elapsedMS) || 0);
92
+ var _a;
93
+ return sum + (((_a = result.response) === null || _a === void 0 ? void 0 : _a.elapsedMS) || 0);
90
94
  }, 0);
91
95
  const totalsInfo = chalk_1.default.gray(` (${totalRows} rows, ${Helper_1.default.formatDuration(totalDuration)})`);
92
96
  if (results.some(x => !x.success))
package/actions/sample.js CHANGED
@@ -93,7 +93,7 @@ const sampleFromConsumer = (consumer, sampleSize) => __awaiter(void 0, void 0, v
93
93
  const mappedData = rawSampleData.map(record => {
94
94
  const mappedRecord = new DatasetRecord_1.default('', [], record._delimiter);
95
95
  consumer.fields.forEach(field => {
96
- if (field.key !== '*' && !field.grouping) {
96
+ if (field.key !== '*') {
97
97
  const sourceValue = record.getValue(field.key);
98
98
  const outputKey = field.alias || field.key;
99
99
  mappedRecord.setValue(outputKey, sourceValue);
package/core/Algo.js CHANGED
@@ -122,13 +122,15 @@ const algo = {
122
122
  },
123
123
  mean: (numbers) => {
124
124
  (0, Affirm_1.default)(algo.hasVal(numbers), 'Array must not be null or undefined');
125
- (0, Affirm_1.default)(numbers.length > 0, 'Array must be non-empty');
125
+ if (numbers.length === 0)
126
+ return 0;
126
127
  const total = algo.sum(numbers);
127
128
  return total / numbers.length;
128
129
  },
129
130
  sum: (numbers) => {
130
131
  (0, Affirm_1.default)(algo.hasVal(numbers), 'Array must not be null or undefined');
131
- (0, Affirm_1.default)(numbers.length > 0, 'Array must be non-empty');
132
+ if (numbers.length === 0)
133
+ return 0;
132
134
  let total = 0;
133
135
  for (let i = 0; i < numbers.length; i++) {
134
136
  total += numbers[i];
@@ -142,12 +144,14 @@ const algo = {
142
144
  },
143
145
  min: (arr) => {
144
146
  (0, Affirm_1.default)(algo.hasVal(arr), 'Array must not be null or undefined');
145
- (0, Affirm_1.default)(arr.length > 0, 'Array must be non-empty');
147
+ if (arr.length === 0)
148
+ return 0;
146
149
  return Math.min(...arr);
147
150
  },
148
151
  max: (arr) => {
149
152
  (0, Affirm_1.default)(algo.hasVal(arr), 'Array must not be null or undefined');
150
- (0, Affirm_1.default)(arr.length > 0, 'Array must be non-empty');
153
+ if (arr.length === 0)
154
+ return 0;
151
155
  return Math.max(...arr);
152
156
  },
153
157
  replaceAll: (text, search, replace) => text.replace(new RegExp(search, 'g'), replace),
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -845,7 +845,7 @@
845
845
  },
846
846
  {
847
847
  "type": "object",
848
- "description": "Apply conditional logic to transform values based on comparison conditions",
848
+ "description": "Apply conditional logic to transform values based on comparison conditions.",
849
849
  "properties": {
850
850
  "conditional": {
851
851
  "type": "object",
@@ -148,7 +148,7 @@
148
148
  "XML",
149
149
  "PARQUET"
150
150
  ],
151
- "description": "The type of file to read"
151
+ "description": "The type of file to read."
152
152
  },
153
153
  "delimiter": {
154
154
  "type": "string",
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -177,6 +177,10 @@ class DeltaShareSourceDriver {
177
177
  .map(x => JSON.parse(x));
178
178
  return deltaLines;
179
179
  });
180
+ this.ready = (producer) => __awaiter(this, void 0, void 0, function* () {
181
+ void producer;
182
+ throw new Error('DeltaShareSourceDriver.ready is not supported: Delta Sharing does not support readiness checks');
183
+ });
180
184
  }
181
185
  }
182
186
  exports.default = DeltaShareSourceDriver;
@@ -12,23 +12,24 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
12
12
  return (mod && mod.__esModule) ? mod : { "default": mod };
13
13
  };
14
14
  Object.defineProperty(exports, "__esModule", { value: true });
15
- const LocalDriver_1 = require("./LocalDriver");
16
15
  const RedshiftDriver_1 = __importDefault(require("./RedshiftDriver"));
17
- const S3Driver_1 = require("./S3Driver");
18
16
  const DeltaShareDriver_1 = __importDefault(require("./DeltaShareDriver"));
19
- const HttpApiDriver_1 = require("./HttpApiDriver");
17
+ const HttpApiDriver_1 = __importDefault(require("./HttpApiDriver"));
18
+ const LocalSourceDriver_1 = __importDefault(require("./files/LocalSourceDriver"));
19
+ const LocalDestinationDriver_1 = __importDefault(require("./files/LocalDestinationDriver"));
20
+ const S3SourceDriver_1 = __importDefault(require("./s3/S3SourceDriver"));
21
+ const S3DestinationDriver_1 = __importDefault(require("./s3/S3DestinationDriver"));
20
22
  class DriverFactoryClass {
21
23
  constructor() {
22
24
  this.instantiateSource = (source) => __awaiter(this, void 0, void 0, function* () {
23
25
  switch (source.engine) {
24
- // TODO: implement all the other engines
25
26
  case 'aws-redshift': {
26
27
  const driver = new RedshiftDriver_1.default();
27
28
  yield driver.init(source);
28
29
  return driver;
29
30
  }
30
31
  case 'aws-s3': {
31
- const driver = new S3Driver_1.S3SourceDriver();
32
+ const driver = new S3SourceDriver_1.default();
32
33
  yield driver.init(source);
33
34
  return driver;
34
35
  }
@@ -38,12 +39,12 @@ class DriverFactoryClass {
38
39
  return driver;
39
40
  }
40
41
  case 'local': {
41
- const driver = new LocalDriver_1.LocalSourceDriver();
42
+ const driver = new LocalSourceDriver_1.default();
42
43
  yield driver.init(source);
43
44
  return driver;
44
45
  }
45
46
  case 'http-api': {
46
- const driver = new HttpApiDriver_1.HttpApiSourceDriver();
47
+ const driver = new HttpApiDriver_1.default();
47
48
  yield driver.init(source);
48
49
  return driver;
49
50
  }
@@ -52,14 +53,13 @@ class DriverFactoryClass {
52
53
  });
53
54
  this.instantiateDestination = (source) => __awaiter(this, void 0, void 0, function* () {
54
55
  switch (source.engine) {
55
- // TODO: implement all the other engines
56
56
  case 'aws-s3': {
57
- const driver = new S3Driver_1.S3DestinationDriver();
57
+ const driver = new S3DestinationDriver_1.default();
58
58
  yield driver.init(source);
59
59
  return driver;
60
60
  }
61
61
  case 'local': {
62
- const driver = new LocalDriver_1.LocalDestinationDriver();
62
+ const driver = new LocalDestinationDriver_1.default();
63
63
  yield driver.init(source);
64
64
  return driver;
65
65
  }
@@ -23,12 +23,12 @@ const stream_1 = require("stream");
23
23
  const readline_1 = require("readline");
24
24
  const promises_1 = require("stream/promises");
25
25
  const fs_1 = require("fs");
26
+ const path_1 = __importDefault(require("path"));
26
27
  const Logger_1 = __importDefault(require("../helper/Logger"));
27
28
  const Affirm_1 = __importDefault(require("../core/Affirm"));
28
29
  const XLSParser_1 = __importDefault(require("../engines/parsing/XLSParser"));
29
- const path_1 = __importDefault(require("path"));
30
- const Constants_1 = __importDefault(require("../Constants"));
31
30
  const XMLParser_1 = __importDefault(require("../engines/parsing/XMLParser"));
31
+ const Constants_1 = __importDefault(require("../Constants"));
32
32
  const DriverHelper = {
33
33
  appendToUnifiedFile: (options) => __awaiter(void 0, void 0, void 0, function* () {
34
34
  (0, Affirm_1.default)(options, 'Invalid options');
@@ -37,7 +37,7 @@ const DriverHelper = {
37
37
  const keys = (fileType === 'JSON' || fileType === 'JSONL')
38
38
  ? Object.keys(JSON.parse(headerLine))
39
39
  : [];
40
- const shouldValidateHeader = fileType === 'CSV' || fileType === 'XLS' || (fileType === 'TXT' && hasHeaderRow === true);
40
+ const shouldValidateHeader = fileType === 'CSV' || (fileType === 'TXT' && hasHeaderRow === true);
41
41
  // When sourceFilename is set, the headerLine includes $source_filename at the end.
42
42
  // For validation, we need to compare against the original header without this suffix.
43
43
  const originalHeaderLine = sourceFilename
@@ -154,14 +154,9 @@ const DriverHelper = {
154
154
  return lineCount;
155
155
  }),
156
156
  quickReadFile: (filePath, lineCount) => __awaiter(void 0, void 0, void 0, function* () {
157
- const fileStream = (0, fs_1.createReadStream)(filePath);
158
- const lines = yield DriverHelper.quickReadStream(fileStream, lineCount);
159
- fileStream.close();
160
- return lines;
161
- }),
162
- quickReadStream: (stream, lineCount) => __awaiter(void 0, void 0, void 0, function* () {
163
157
  var _a, e_1, _b, _c;
164
- const rl = (0, readline_1.createInterface)({ input: stream, crlfDelay: Infinity });
158
+ const fileStream = (0, fs_1.createReadStream)(filePath);
159
+ const rl = (0, readline_1.createInterface)({ input: fileStream, crlfDelay: Infinity });
165
160
  const lines = [];
166
161
  let counter = 0;
167
162
  try {
@@ -184,6 +179,7 @@ const DriverHelper = {
184
179
  finally { if (e_1) throw e_1.error; }
185
180
  }
186
181
  rl.close();
182
+ fileStream.close();
187
183
  return lines;
188
184
  }),
189
185
  setHeaderFromFile: (fileKey, file, filePath, dataset) => __awaiter(void 0, void 0, void 0, function* () {
@@ -220,6 +216,33 @@ const DriverHelper = {
220
216
  default:
221
217
  throw new Error(`the fileType "${file.fileType}" is not implemented yet`);
222
218
  }
219
+ }),
220
+ quickReadStream: (stream, lineCount) => __awaiter(void 0, void 0, void 0, function* () {
221
+ var _a, e_2, _b, _c;
222
+ const rl = (0, readline_1.createInterface)({ input: stream, crlfDelay: Infinity });
223
+ const lines = [];
224
+ let counter = 0;
225
+ try {
226
+ for (var _d = true, rl_2 = __asyncValues(rl), rl_2_1; rl_2_1 = yield rl_2.next(), _a = rl_2_1.done, !_a; _d = true) {
227
+ _c = rl_2_1.value;
228
+ _d = false;
229
+ const line = _c;
230
+ lines.push(line);
231
+ counter++;
232
+ if (counter >= lineCount) {
233
+ break;
234
+ }
235
+ }
236
+ }
237
+ catch (e_2_1) { e_2 = { error: e_2_1 }; }
238
+ finally {
239
+ try {
240
+ if (!_d && !_a && (_b = rl_2.return)) yield _b.call(rl_2);
241
+ }
242
+ finally { if (e_2) throw e_2.error; }
243
+ }
244
+ rl.close();
245
+ return lines;
223
246
  })
224
247
  };
225
248
  exports.default = DriverHelper;
@@ -198,6 +198,10 @@ class HttpApiSourceDriver {
198
198
  }
199
199
  return itemsData;
200
200
  };
201
+ this.ready = (producer) => {
202
+ void producer;
203
+ throw new Error('Not implemented yet');
204
+ };
201
205
  }
202
206
  }
203
207
  exports.HttpApiSourceDriver = HttpApiSourceDriver;
@@ -59,13 +59,14 @@ const readline_1 = __importDefault(require("readline"));
59
59
  const Affirm_1 = __importDefault(require("../core/Affirm"));
60
60
  const Algo_1 = __importDefault(require("../core/Algo"));
61
61
  const xlsx_1 = __importDefault(require("xlsx"));
62
- const XMLParser_1 = __importDefault(require("../engines/parsing/XMLParser")); // Added XMLParser import
62
+ const XMLParser_1 = __importDefault(require("../engines/parsing/XMLParser"));
63
63
  const XLSParser_1 = __importDefault(require("../engines/parsing/XLSParser"));
64
64
  const Helper_1 = __importDefault(require("../helper/Helper"));
65
65
  const ParseHelper_1 = __importDefault(require("../engines/parsing/ParseHelper"));
66
66
  const FileExporter_1 = __importDefault(require("../engines/file/FileExporter"));
67
67
  const Logger_1 = __importDefault(require("../helper/Logger"));
68
68
  const DriverHelper_1 = __importDefault(require("./DriverHelper"));
69
+ const stream_1 = require("stream");
69
70
  class LocalSourceDriver {
70
71
  constructor() {
71
72
  this.init = (source) => __awaiter(this, void 0, void 0, function* () {
@@ -147,8 +148,7 @@ class LocalSourceDriver {
147
148
  sourceFilename
148
149
  });
149
150
  });
150
- // this function copy the local file on the temporary file and retrive the number of line.
151
- const handleFileAndGetLineCount = (fileKey, appendMode, fileType, sourceFilename) => __awaiter(this, void 0, void 0, function* () {
151
+ const getTotalLineCount = (fileKey, appendMode, fileType, sourceFilename) => __awaiter(this, void 0, void 0, function* () {
152
152
  let totalLineCount;
153
153
  let streamXLS;
154
154
  switch (fileType) {
@@ -158,7 +158,7 @@ class LocalSourceDriver {
158
158
  totalLineCount = yield copyLocally(fileKey, dataset.getFirstLine(), appendMode, sourceFilename, streamXLS);
159
159
  break;
160
160
  default:
161
- totalLineCount = yield copyLocally(fileKey, dataset.getFirstLine(), appendMode, sourceFilename);
161
+ totalLineCount = yield copyLocally(fileKey, dataset.getFirstLine(), false, sourceFilename);
162
162
  break;
163
163
  }
164
164
  return totalLineCount;
@@ -177,7 +177,7 @@ class LocalSourceDriver {
177
177
  const currentFileKey = allFileKeys[i];
178
178
  // Pass the filename (just the basename) if includeSourceFilename is enabled
179
179
  const sourceFilename = includeSourceFilename ? path_1.default.basename(currentFileKey) : undefined;
180
- totalLineCount += yield handleFileAndGetLineCount(currentFileKey, i > 0, file.fileType, sourceFilename); // Append mode for subsequent files
180
+ totalLineCount += yield getTotalLineCount(currentFileKey, true, file.fileType, sourceFilename); // Append mode for subsequent files
181
181
  }
182
182
  dataset.setCount(totalLineCount);
183
183
  return dataset;
@@ -185,7 +185,7 @@ class LocalSourceDriver {
185
185
  else {
186
186
  sourceFilename = includeSourceFilename ? path_1.default.basename(fileKey) : undefined;
187
187
  yield DriverHelper_1.default.setHeaderFromFile(fileKey, file, this._path, dataset);
188
- totalLineCount = (yield handleFileAndGetLineCount(fileKey, false, file.fileType, sourceFilename));
188
+ totalLineCount = (yield getTotalLineCount(fileKey, false, file.fileType, sourceFilename));
189
189
  dataset.setCount(totalLineCount);
190
190
  return dataset;
191
191
  }
@@ -376,6 +376,24 @@ class LocalSourceDriver {
376
376
  }
377
377
  fs.renameSync(sourceFilePath, destinationFilePath);
378
378
  };
379
+ this.ready = (producer) => __awaiter(this, void 0, void 0, function* () {
380
+ (0, Affirm_1.default)(producer, 'Invalid producer');
381
+ const { fileKey } = producer.settings;
382
+ if (fileKey.includes('%')) {
383
+ const allFileKeys = this.listFiles(fileKey);
384
+ const allFilePaths = allFileKeys.map(x => path_1.default.join(this._path, x));
385
+ const readStreams = allFilePaths.map(x => fs.createReadStream(x));
386
+ let pass = new stream_1.PassThrough();
387
+ for (const [index, stream] of readStreams.entries())
388
+ pass = stream.pipe(pass, { end: index === readStreams.length - 1 });
389
+ return pass;
390
+ }
391
+ else {
392
+ const sourceFilePath = path_1.default.join(this._path, fileKey);
393
+ const readStream = fs.createReadStream(sourceFilePath);
394
+ return readStream;
395
+ }
396
+ });
379
397
  }
380
398
  }
381
399
  exports.LocalSourceDriver = LocalSourceDriver;
@@ -470,6 +488,55 @@ class LocalDestinationDriver {
470
488
  const fileContent = yield s3Driver.downloadFile(sourceFileKey);
471
489
  yield this.saveFile(destinationFileKey, fileContent);
472
490
  });
491
+ this.ready = (destinationPath) => __awaiter(this, void 0, void 0, function* () {
492
+ return fs.createWriteStream(destinationPath);
493
+ });
494
+ this.move = (fromPath, toName) => __awaiter(this, void 0, void 0, function* () {
495
+ try {
496
+ const toFilePath = path_1.default.join(this._path, toName);
497
+ fs.renameSync(fromPath, toFilePath);
498
+ return { bucket: '', key: toFilePath, res: true };
499
+ }
500
+ catch (error) {
501
+ Logger_1.default.error(error);
502
+ return { bucket: '', key: '', res: false };
503
+ }
504
+ });
505
+ this.transformAndMove = (fromPath, transform, toName) => __awaiter(this, void 0, void 0, function* () {
506
+ var _a, e_2, _b, _c;
507
+ try {
508
+ const toFilePath = path_1.default.join(this._path, toName);
509
+ const decoder = new TextDecoder();
510
+ const reader = fs.createReadStream(fromPath);
511
+ const writer = fs.createWriteStream(toFilePath);
512
+ try {
513
+ for (var _d = true, reader_2 = __asyncValues(reader), reader_2_1; reader_2_1 = yield reader_2.next(), _a = reader_2_1.done, !_a; _d = true) {
514
+ _c = reader_2_1.value;
515
+ _d = false;
516
+ const chunk = _c;
517
+ const decoded = decoder.decode(chunk);
518
+ const lines = decoded.split('\n');
519
+ for (const line of lines) {
520
+ writer.write(transform(line) + '\n');
521
+ }
522
+ }
523
+ }
524
+ catch (e_2_1) { e_2 = { error: e_2_1 }; }
525
+ finally {
526
+ try {
527
+ if (!_d && !_a && (_b = reader_2.return)) yield _b.call(reader_2);
528
+ }
529
+ finally { if (e_2) throw e_2.error; }
530
+ }
531
+ writer.close();
532
+ reader.close();
533
+ return { bucket: '', key: toFilePath, res: true };
534
+ }
535
+ catch (error) {
536
+ Logger_1.default.error(error);
537
+ return { bucket: '', key: '', res: false };
538
+ }
539
+ });
473
540
  }
474
541
  }
475
542
  exports.LocalDestinationDriver = LocalDestinationDriver;
@@ -175,6 +175,10 @@ class RedshiftDriver {
175
175
  }
176
176
  return records;
177
177
  };
178
+ this.ready = (producer) => __awaiter(this, void 0, void 0, function* () {
179
+ void producer;
180
+ throw new Error('Not implemented yet');
181
+ });
178
182
  }
179
183
  }
180
184
  exports.default = RedshiftDriver;
@@ -34,7 +34,6 @@ const FileExporter_1 = __importDefault(require("../engines/file/FileExporter"));
34
34
  const DriverHelper_1 = __importDefault(require("./DriverHelper"));
35
35
  const Logger_1 = __importDefault(require("../helper/Logger"));
36
36
  const Constants_1 = __importDefault(require("../Constants"));
37
- const XLSParser_1 = __importDefault(require("../engines/parsing/XLSParser"));
38
37
  class S3DestinationDriver {
39
38
  constructor() {
40
39
  this.init = (source) => __awaiter(this, void 0, void 0, function* () {
@@ -225,16 +224,7 @@ class S3SourceDriver {
225
224
  });
226
225
  const response = yield this._client.send(command);
227
226
  (0, Affirm_1.default)(response.Body, 'Failed to fetch object from S3');
228
- let stream;
229
- switch (file.fileType) {
230
- case 'XLS':
231
- case 'XLSX':
232
- stream = yield XLSParser_1.default.parseXLSStream(response.Body, file.sheetName);
233
- break;
234
- default:
235
- stream = response.Body;
236
- break;
237
- }
227
+ const stream = response.Body;
238
228
  return DriverHelper_1.default.appendToUnifiedFile({
239
229
  stream,
240
230
  fileKey: fileUrl,
@@ -248,45 +238,6 @@ class S3SourceDriver {
248
238
  });
249
239
  });
250
240
  const { fileKey } = file;
251
- const setFirstLineFromStream = (stream) => __awaiter(this, void 0, void 0, function* () {
252
- var _a, e_1, _b, _c;
253
- const rl = readline_1.default.createInterface({ input: stream, crlfDelay: Infinity });
254
- let firstLine = '';
255
- switch (file.fileType) {
256
- case 'XLSX':
257
- case 'XLS':
258
- firstLine = yield XLSParser_1.default.getHeaderXlsFromStream(stream, file.sheetName);
259
- break;
260
- case 'CSV':
261
- case 'JSON':
262
- case 'JSONL':
263
- case 'TXT':
264
- try {
265
- for (var _d = true, rl_1 = __asyncValues(rl), rl_1_1; rl_1_1 = yield rl_1.next(), _a = rl_1_1.done, !_a; _d = true) {
266
- _c = rl_1_1.value;
267
- _d = false;
268
- const line = _c;
269
- firstLine = line;
270
- break;
271
- }
272
- }
273
- catch (e_1_1) { e_1 = { error: e_1_1 }; }
274
- finally {
275
- try {
276
- if (!_d && !_a && (_b = rl_1.return)) yield _b.call(rl_1);
277
- }
278
- finally { if (e_1) throw e_1.error; }
279
- }
280
- rl.close();
281
- break;
282
- }
283
- // If including source filename, append a placeholder column name to the header
284
- if (file.includeSourceFilename) {
285
- firstLine = firstLine + dataset.getDelimiter() + Constants_1.default.SOURCE_FILENAME_COLUMN;
286
- }
287
- dataset.setFirstLine(firstLine);
288
- return firstLine;
289
- });
290
241
  if (fileKey.includes('%')) {
291
242
  const allFileKeys = yield this.listFiles(fileKey);
292
243
  Logger_1.default.log(`Matched ${allFileKeys.length} files, copying locally and creating unified dataset.`);
@@ -299,7 +250,12 @@ class S3SourceDriver {
299
250
  const firstFileResponse = yield this._client.send(firstFileCommand);
300
251
  (0, Affirm_1.default)(firstFileResponse.Body, 'Failed to fetch first file from S3');
301
252
  const firstFileStream = firstFileResponse.Body;
302
- const headerLine = yield setFirstLineFromStream(firstFileStream);
253
+ let headerLine = yield this.getFirstLineFromStream(firstFileStream);
254
+ // If including source filename, append a placeholder column name to the header
255
+ if (includeSourceFilename) {
256
+ headerLine = headerLine + dataset.getDelimiter() + Constants_1.default.SOURCE_FILENAME_COLUMN;
257
+ }
258
+ dataset.setFirstLine(headerLine);
303
259
  let totalLineCount = 0;
304
260
  // Download files sequentially to avoid file conflicts
305
261
  for (let i = 0; i < allFileKeys.length; i++) {
@@ -320,7 +276,12 @@ class S3SourceDriver {
320
276
  const firstFileResponse = yield this._client.send(firstFileCommand);
321
277
  (0, Affirm_1.default)(firstFileResponse.Body, 'Failed to fetch first file from S3');
322
278
  const firstFileStream = firstFileResponse.Body;
323
- const headerLine = yield setFirstLineFromStream(firstFileStream);
279
+ let headerLine = yield this.getFirstLineFromStream(firstFileStream);
280
+ // If including source filename, append a placeholder column name to the header
281
+ if (includeSourceFilename) {
282
+ headerLine = headerLine + dataset.getDelimiter() + Constants_1.default.SOURCE_FILENAME_COLUMN;
283
+ }
284
+ dataset.setFirstLine(headerLine);
324
285
  // Pass the filename if includeSourceFilename is enabled
325
286
  const sourceFilename = includeSourceFilename ? path_1.default.basename(fileKey) : undefined;
326
287
  const totalLineCount = yield downloadLocally(fileKey, headerLine, false, sourceFilename);
@@ -328,6 +289,29 @@ class S3SourceDriver {
328
289
  return dataset;
329
290
  }
330
291
  });
292
+ this.getFirstLineFromStream = (stream) => __awaiter(this, void 0, void 0, function* () {
293
+ var _a, e_1, _b, _c;
294
+ const rl = readline_1.default.createInterface({ input: stream, crlfDelay: Infinity });
295
+ let firstLine = '';
296
+ try {
297
+ for (var _d = true, rl_1 = __asyncValues(rl), rl_1_1; rl_1_1 = yield rl_1.next(), _a = rl_1_1.done, !_a; _d = true) {
298
+ _c = rl_1_1.value;
299
+ _d = false;
300
+ const line = _c;
301
+ firstLine = line;
302
+ break;
303
+ }
304
+ }
305
+ catch (e_1_1) { e_1 = { error: e_1_1 }; }
306
+ finally {
307
+ try {
308
+ if (!_d && !_a && (_b = rl_1.return)) yield _b.call(rl_1);
309
+ }
310
+ finally { if (e_1) throw e_1.error; }
311
+ }
312
+ rl.close();
313
+ return firstLine;
314
+ });
331
315
  this.exist = (producer) => __awaiter(this, void 0, void 0, function* () {
332
316
  var _a;
333
317
  (0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "connect()" first');