@forzalabs/remora 1.0.11 → 1.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -119,11 +119,11 @@ class ValidatorClass {
119
119
  // For now we only support connecting producers of the same engine type to a consumer, so we give an error if we detect different ones
120
120
  const uniqEngines = Algo_1.default.uniqBy(sources, 'engine');
121
121
  if (uniqEngines.length !== 1)
122
- errors.push(`Sources with different engines were used in the consumer "${consumer.name}" (${uniqEngines.join(', ')})`);
122
+ errors.push(`Sources with different engines are used in the consumer "${consumer.name}" (${uniqEngines.join(', ')})`);
123
123
  // For now we also only support consumers that have producers ALL having the same exact source
124
124
  const uniqNames = Algo_1.default.uniqBy(sources, 'name');
125
125
  if (uniqNames.length !== 1)
126
- errors.push(`Producers with different sources were used in the consumer "${consumer.name}" (${uniqNames.join(', ')})`);
126
+ errors.push(`Producers with different sources are used in the consumer "${consumer.name}" (${uniqNames.join(', ')})`);
127
127
  if (consumer.filters && consumer.filters.length > 0) {
128
128
  if (consumer.filters.some(x => x.sql && x.rule))
129
129
  errors.push(`A single consumer can't have both filters based on SQL and filters based on rules.`);
@@ -188,6 +188,11 @@ class ValidatorClass {
188
188
  errors.push(`The export destination "${output.exportDestination}" was not found in the sources.`);
189
189
  }
190
190
  }
191
+ // Validate distinct
192
+ if (consumer.options) {
193
+ if (Algo_1.default.hasVal(consumer.options.distinct) && Algo_1.default.hasVal(consumer.options.distinctOn))
194
+ errors.push(`Can't specify a "distinct" and a "distinctOn" clause on the same consumer (${consumer.name}); use one or the other.`);
195
+ }
191
196
  }
192
197
  catch (e) {
193
198
  if (errors.length === 0)
@@ -25,9 +25,11 @@ const readline_1 = __importDefault(require("readline"));
25
25
  const promises_1 = __importDefault(require("fs/promises"));
26
26
  const TransformationEngine_1 = __importDefault(require("../engines/transform/TransformationEngine"));
27
27
  const RequestExecutor_1 = __importDefault(require("../engines/execution/RequestExecutor"));
28
- const Affirm_1 = __importDefault(require("../core/Affirm"));
29
28
  const Constants_1 = __importDefault(require("../Constants"));
30
29
  const Algo_1 = __importDefault(require("../core/Algo"));
30
+ const LineParser_1 = __importDefault(require("../engines/parsing/LineParser"));
31
+ const OutputExecutor_1 = __importDefault(require("./OutputExecutor"));
32
+ const ConsumerManager_1 = __importDefault(require("../engines/consumer/ConsumerManager"));
31
33
  class ConsumerExecutorClass {
32
34
  constructor() {
33
35
  this._getWorkPath = (consumer, executionId) => {
@@ -64,20 +66,14 @@ class ConsumerExecutorClass {
64
66
  if (!fs_1.default.existsSync(pathUri))
65
67
  fs_1.default.writeFileSync(pathUri, '');
66
68
  };
67
- this.ready = (consumer, executionId) => {
68
- (0, Affirm_1.default)(consumer, 'Invalid consumer');
69
- const workPath = this._getWorkPath(consumer, executionId);
70
- this._ensurePath(workPath);
71
- return fs_1.default.createWriteStream(workPath);
72
- };
73
69
  this.processRecord = (options) => {
74
- var _a;
70
+ var _a, _b;
75
71
  const { consumer, fields, dimensions, producer, record, requestOptions } = options;
76
- // map to consumer fields and apply consumer field logic
72
+ // Map to consumer fields and apply consumer field logic
77
73
  for (const field of fields) {
78
74
  const { cField } = field;
79
75
  const fieldKey = (_a = cField.alias) !== null && _a !== void 0 ? _a : cField.key;
80
- // set the fixed default value for the field, or throw error if not present in the producer
76
+ // Set the fixed default value for the field, or throw error if not present in the producer
81
77
  const dimension = dimensions.find(x => x.name === cField.key);
82
78
  if (!dimension) {
83
79
  if (cField.fixed && Algo_1.default.hasVal(cField.default))
@@ -85,12 +81,16 @@ class ConsumerExecutorClass {
85
81
  else
86
82
  throw new Error(`The requested field "${cField.key}" from the consumer is not present in the underlying producer "${producer.name}" (${dimensions.map(x => x.name).join(', ')})`);
87
83
  }
88
- // change the name of the dimension
84
+ // Change the name of the dimension
89
85
  if (cField.alias && cField.alias !== dimension.name) {
90
86
  record[cField.alias] = record[dimension.name];
91
87
  delete record[dimension.name];
92
88
  }
93
- // apply transformations
89
+ }
90
+ // Transformations need to be applied after the mapping since they might refer to other fields with their new names
91
+ for (const field of fields) {
92
+ const { cField } = field;
93
+ const fieldKey = (_b = cField.alias) !== null && _b !== void 0 ? _b : cField.key;
94
94
  if (cField.transform)
95
95
  record[fieldKey] = TransformationEngine_1.default.applyTransformations(record[fieldKey], cField.transform, cField, record);
96
96
  }
@@ -114,12 +114,11 @@ class ConsumerExecutorClass {
114
114
  }
115
115
  return record;
116
116
  };
117
- this.processDistinct = (consumer, executionId) => __awaiter(this, void 0, void 0, function* () {
117
+ this.processDistinct = (datasetPath) => __awaiter(this, void 0, void 0, function* () {
118
118
  var _a, e_1, _b, _c;
119
- const workPath = this._getWorkPath(consumer, executionId);
120
- const reader = fs_1.default.createReadStream(workPath);
119
+ const reader = fs_1.default.createReadStream(datasetPath);
121
120
  const lineReader = readline_1.default.createInterface({ input: reader, crlfDelay: Infinity });
122
- const tempWorkPath = workPath + '_tmp';
121
+ const tempWorkPath = datasetPath + '_tmp';
123
122
  const writer = fs_1.default.createWriteStream(tempWorkPath);
124
123
  let newLineCount = 0;
125
124
  const seen = new Set();
@@ -144,9 +143,120 @@ class ConsumerExecutorClass {
144
143
  }
145
144
  writer.close();
146
145
  reader.close();
147
- fs_1.default.renameSync(tempWorkPath, workPath);
146
+ // Wait for the writer to finish before renaming
147
+ yield new Promise((resolve, reject) => {
148
+ writer.on('finish', resolve);
149
+ writer.on('error', reject);
150
+ writer.end();
151
+ });
152
+ fs_1.default.renameSync(tempWorkPath, datasetPath);
148
153
  return newLineCount;
149
154
  });
155
+ this.processDistinctOn = (consumer, datasetPath) => __awaiter(this, void 0, void 0, function* () {
156
+ var _a, e_2, _b, _c;
157
+ const reader = fs_1.default.createReadStream(datasetPath);
158
+ const lineReader = readline_1.default.createInterface({ input: reader, crlfDelay: Infinity });
159
+ const { distinctOn } = consumer.options;
160
+ const { keys, resolution } = distinctOn;
161
+ const { strategy, orderBy, direction = 'asc' } = resolution;
162
+ const internalRecordFormat = OutputExecutor_1.default._getInternalRecordFormat(consumer);
163
+ const internalFields = ConsumerManager_1.default.getExpandedFields(consumer);
164
+ // Map to store the winning record for each composite key
165
+ // Key: composite key string, Value: { record: parsed object, line: original JSON line }
166
+ const winners = new Map();
167
+ try {
168
+ for (var _d = true, lineReader_2 = __asyncValues(lineReader), lineReader_2_1; lineReader_2_1 = yield lineReader_2.next(), _a = lineReader_2_1.done, !_a; _d = true) {
169
+ _c = lineReader_2_1.value;
170
+ _d = false;
171
+ const line = _c;
172
+ const record = (internalRecordFormat === 'CSV' || internalRecordFormat === 'TXT')
173
+ ? LineParser_1.default._internalParseCSV(line, internalFields)
174
+ : LineParser_1.default._internalParseJSON(line);
175
+ const compositeKey = keys.map(k => { var _a; return String((_a = record[k]) !== null && _a !== void 0 ? _a : ''); }).join('|');
176
+ const existing = winners.get(compositeKey);
177
+ if (!existing) {
178
+ winners.set(compositeKey, { record, line });
179
+ continue;
180
+ }
181
+ const shouldReplace = this._shouldReplaceRecord(existing.record, record, strategy, orderBy, direction);
182
+ if (shouldReplace) {
183
+ winners.set(compositeKey, { record, line });
184
+ }
185
+ }
186
+ }
187
+ catch (e_2_1) { e_2 = { error: e_2_1 }; }
188
+ finally {
189
+ try {
190
+ if (!_d && !_a && (_b = lineReader_2.return)) yield _b.call(lineReader_2);
191
+ }
192
+ finally { if (e_2) throw e_2.error; }
193
+ }
194
+ reader.close();
195
+ // Write the winning records to the temp file
196
+ const tempWorkPath = datasetPath + '_tmp';
197
+ const writer = fs_1.default.createWriteStream(tempWorkPath);
198
+ for (const { line } of winners.values()) {
199
+ writer.write(line + '\n');
200
+ }
201
+ // Wait for the writer to finish before renaming
202
+ yield new Promise((resolve, reject) => {
203
+ writer.on('finish', resolve);
204
+ writer.on('error', reject);
205
+ writer.end();
206
+ });
207
+ fs_1.default.renameSync(tempWorkPath, datasetPath);
208
+ return winners.size;
209
+ });
210
+ /**
211
+ * Determines if the new record should replace the existing record based on the resolution strategy
212
+ */
213
+ this._shouldReplaceRecord = (existing, newRecord, strategy, orderBy, direction) => {
214
+ switch (strategy) {
215
+ case 'first':
216
+ return false;
217
+ case 'last':
218
+ return true;
219
+ case 'min': {
220
+ const existingVal = existing[orderBy];
221
+ const newVal = newRecord[orderBy];
222
+ const comparison = this._compareValues(newVal, existingVal);
223
+ // For 'min', we want the smallest value
224
+ // If direction is 'desc', we invert the logic (smallest becomes largest)
225
+ return direction === 'asc' ? comparison < 0 : comparison > 0;
226
+ }
227
+ case 'max': {
228
+ const existingVal = existing[orderBy];
229
+ const newVal = newRecord[orderBy];
230
+ const comparison = this._compareValues(newVal, existingVal);
231
+ // For 'max', we want the largest value
232
+ // If direction is 'desc', we invert the logic (largest becomes smallest)
233
+ return direction === 'asc' ? comparison > 0 : comparison < 0;
234
+ }
235
+ default:
236
+ return false;
237
+ }
238
+ };
239
+ /**
240
+ * Compares two values, handling numbers, strings, and dates
241
+ * Returns: negative if a < b, positive if a > b, 0 if equal
242
+ */
243
+ this._compareValues = (a, b) => {
244
+ // Handle null/undefined
245
+ if (a == null && b == null)
246
+ return 0;
247
+ if (a == null)
248
+ return -1;
249
+ if (b == null)
250
+ return 1;
251
+ // Try numeric comparison
252
+ const numA = Number(a);
253
+ const numB = Number(b);
254
+ if (!isNaN(numA) && !isNaN(numB)) {
255
+ return numA - numB;
256
+ }
257
+ // Fall back to string comparison
258
+ return String(a).localeCompare(String(b));
259
+ };
150
260
  }
151
261
  }
152
262
  const ConsumerExecutor = new ConsumerExecutorClass();
@@ -27,6 +27,7 @@ const Affirm_1 = __importDefault(require("../core/Affirm"));
27
27
  const OutputExecutor_1 = __importDefault(require("./OutputExecutor"));
28
28
  const ConsumerManager_1 = __importDefault(require("../engines/consumer/ConsumerManager"));
29
29
  const ExecutorPerformance_1 = __importDefault(require("./ExecutorPerformance"));
30
+ const ExecutorScope_1 = __importDefault(require("./ExecutorScope"));
30
31
  class Executor {
31
32
  constructor() {
32
33
  this._REPORT_WORK_AFTER_LINES = 1000;
@@ -38,9 +39,9 @@ class Executor {
38
39
  */
39
40
  this.run = (request) => __awaiter(this, void 0, void 0, function* () {
40
41
  var _a, e_1, _b, _c;
41
- var _d;
42
+ var _d, _e;
42
43
  (0, Affirm_1.default)(request, 'Invalid request');
43
- const { consumer, producer, prodDimensions, workerId, chunk, options, reportWork } = request;
44
+ const { consumer, producer, prodDimensions, workerId, chunk, options, scope, reportWork } = request;
44
45
  const counter = performance.now();
45
46
  const result = {
46
47
  executionId: workerId,
@@ -48,12 +49,13 @@ class Executor {
48
49
  elapsedMS: -1,
49
50
  inputCount: -1,
50
51
  outputCount: -1,
51
- resultUri: ConsumerExecutor_1.default._getWorkPath(consumer, workerId),
52
+ resultUri: ExecutorScope_1.default.getWorkerPath(scope, workerId),
52
53
  operations: {}
53
54
  };
55
+ ExecutorScope_1.default.ensurePath(result.resultUri);
54
56
  let totalOutputCount = 0, totalCycles = 1, perf = 0, lineIndex = 0;
55
57
  const readStream = this.openReadStream(chunk);
56
- const writeStream = this.openWriteStream(consumer, workerId);
58
+ const writeStream = this.openWriteStream(scope, workerId);
57
59
  const fields = ConsumerManager_1.default.getExpandedFields(consumer);
58
60
  const { isFirstChunk, start, end } = chunk;
59
61
  const totalBytes = end - start;
@@ -61,9 +63,9 @@ class Executor {
61
63
  // Process all the line-independent operations of the consumer in a single pass
62
64
  const lineStream = readline_1.default.createInterface({ input: readStream, crlfDelay: Infinity });
63
65
  try {
64
- for (var _e = true, lineStream_1 = __asyncValues(lineStream), lineStream_1_1; lineStream_1_1 = yield lineStream_1.next(), _a = lineStream_1_1.done, !_a; _e = true) {
66
+ for (var _f = true, lineStream_1 = __asyncValues(lineStream), lineStream_1_1; lineStream_1_1 = yield lineStream_1.next(), _a = lineStream_1_1.done, !_a; _f = true) {
65
67
  _c = lineStream_1_1.value;
66
- _e = false;
68
+ _f = false;
67
69
  const line = _c;
68
70
  if (lineIndex === 0 && isFirstChunk) {
69
71
  if (!this.shouldProcessFirstLine(producer)) {
@@ -115,22 +117,27 @@ class Executor {
115
117
  catch (e_1_1) { e_1 = { error: e_1_1 }; }
116
118
  finally {
117
119
  try {
118
- if (!_e && !_a && (_b = lineStream_1.return)) yield _b.call(lineStream_1);
120
+ if (!_f && !_a && (_b = lineStream_1.return)) yield _b.call(lineStream_1);
119
121
  }
120
122
  finally { if (e_1) throw e_1.error; }
121
123
  }
122
124
  // Process the operations that work on multiple lines
123
125
  if (((_d = consumer.options) === null || _d === void 0 ? void 0 : _d.distinct) === true) {
124
126
  perf = performance.now();
125
- totalOutputCount = yield ConsumerExecutor_1.default.processDistinct(consumer, workerId);
127
+ totalOutputCount = yield ConsumerExecutor_1.default.processDistinct(result.resultUri);
126
128
  this._performance.measure('process-distinct', performance.now() - perf);
127
129
  totalCycles++;
128
130
  }
131
+ if ((_e = consumer.options) === null || _e === void 0 ? void 0 : _e.distinctOn) {
132
+ perf = performance.now();
133
+ totalOutputCount = yield ConsumerExecutor_1.default.processDistinctOn(consumer, result.resultUri);
134
+ this._performance.measure('process-distinct-on', performance.now() - perf);
135
+ totalCycles++;
136
+ }
129
137
  result.elapsedMS = performance.now() - counter;
130
138
  result.cycles = totalCycles;
131
139
  result.inputCount = lineIndex;
132
140
  result.outputCount = totalOutputCount;
133
- result.resultUri = ConsumerExecutor_1.default._getWorkPath(consumer, workerId);
134
141
  result.operations = this._performance.getOperations();
135
142
  return result;
136
143
  });
@@ -138,8 +145,9 @@ class Executor {
138
145
  const { end, fileUri, start } = chunk;
139
146
  return fs_1.default.createReadStream(fileUri, { start, end: end });
140
147
  };
141
- this.openWriteStream = (consumer, executionId) => {
142
- return ConsumerExecutor_1.default.ready(consumer, executionId);
148
+ this.openWriteStream = (scope, workerId) => {
149
+ const workerPath = ExecutorScope_1.default.getWorkerPath(scope, workerId);
150
+ return fs_1.default.createWriteStream(workerPath);
143
151
  };
144
152
  this.shouldProcessFirstLine = (producer) => {
145
153
  (0, Affirm_1.default)(producer, 'Invalid producer');
@@ -32,6 +32,7 @@ const ExecutorPerformance_1 = __importDefault(require("./ExecutorPerformance"));
32
32
  const ExecutorProgress_1 = __importDefault(require("./ExecutorProgress"));
33
33
  const Algo_1 = __importDefault(require("../core/Algo"));
34
34
  const ConsumerOnFinishManager_1 = __importDefault(require("../engines/consumer/ConsumerOnFinishManager"));
35
+ const ExecutorScope_1 = __importDefault(require("./ExecutorScope"));
35
36
  class ExecutorOrchestratorClass {
36
37
  constructor() {
37
38
  this.init = () => {
@@ -48,7 +49,7 @@ class ExecutorOrchestratorClass {
48
49
  }
49
50
  };
50
51
  this.launch = (request) => __awaiter(this, void 0, void 0, function* () {
51
- var _a;
52
+ var _a, _b;
52
53
  (0, Affirm_1.default)(request, 'Invalid options');
53
54
  const { consumer, details, logProgress, options } = request;
54
55
  (0, Affirm_1.default)(consumer, 'Invalid consumer');
@@ -56,12 +57,14 @@ class ExecutorOrchestratorClass {
56
57
  const tracker = new ExecutorPerformance_1.default();
57
58
  const _progress = new ExecutorProgress_1.default(logProgress);
58
59
  const { usageId } = UsageManager_1.default.startUsage(consumer, details);
59
- const workersId = [];
60
+ const scope = { id: usageId, folder: `${consumer.name}_${usageId}`, workersId: [] };
60
61
  try {
61
62
  const start = performance.now();
62
63
  this.init();
63
64
  const executorResults = [];
64
- const sourceFilesByProducer = yield this.readySourceFiles(consumer);
65
+ let counter = performance.now();
66
+ const sourceFilesByProducer = yield this.readySourceFiles(consumer, scope);
67
+ tracker.measure('ready-producers', performance.now() - counter);
65
68
  let globalWorkerIndex = 0;
66
69
  for (const pair of sourceFilesByProducer) {
67
70
  const { prod, cProd, response } = pair;
@@ -86,15 +89,16 @@ class ExecutorOrchestratorClass {
86
89
  const currentWorkerIndex = globalWorkerIndex;
87
90
  globalWorkerIndex++;
88
91
  const workerData = {
92
+ producer: prod,
89
93
  chunk,
90
94
  consumer,
91
95
  prodDimensions,
92
- producer: prod,
93
- workerId: workerId,
94
- options: options
96
+ workerId,
97
+ scope,
98
+ options
95
99
  };
96
100
  _progress.register((currentWorkerIndex + 1).toString(), prod.name, fileIndex, totalFiles);
97
- workersId.push(workerId);
101
+ scope.workersId.push(workerId);
98
102
  workerThreads.push(this._executorPool.exec('executor', [workerData], {
99
103
  on: payload => this.onWorkAdvanced(payload, currentWorkerIndex, _progress)
100
104
  }));
@@ -105,28 +109,34 @@ class ExecutorOrchestratorClass {
105
109
  }
106
110
  }
107
111
  _progress.complete();
108
- yield this.reconcileExecutorThreadsResults(consumer, usageId, executorResults, tracker);
112
+ yield this.reconcileExecutorThreadsResults(scope, executorResults, tracker);
109
113
  // If there is more than one worker, then I need to redo the operations that are done on multiple lines (cause now the worker files have been merged together)
110
114
  const postOperation = { totalOutputCount: null };
111
115
  if (executorResults.length > 1) {
112
116
  if (((_a = consumer.options) === null || _a === void 0 ? void 0 : _a.distinct) === true) {
113
- const perf = performance.now();
114
- const unifiedOutputCount = yield ConsumerExecutor_1.default.processDistinct(consumer, usageId);
115
- tracker.measure('process-distinct:main', performance.now() - perf);
117
+ counter = performance.now();
118
+ const unifiedOutputCount = yield ConsumerExecutor_1.default.processDistinct(ExecutorScope_1.default.getMainPath(scope));
119
+ tracker.measure('process-distinct:main', performance.now() - counter);
120
+ postOperation.totalOutputCount = unifiedOutputCount;
121
+ }
122
+ if (((_b = consumer.options) === null || _b === void 0 ? void 0 : _b.distinct) === true) {
123
+ counter = performance.now();
124
+ const unifiedOutputCount = yield ConsumerExecutor_1.default.processDistinctOn(consumer, ExecutorScope_1.default.getMainPath(scope));
125
+ tracker.measure('process-distinct-on:main', performance.now() - counter);
116
126
  postOperation.totalOutputCount = unifiedOutputCount;
117
127
  }
118
128
  }
119
129
  // Export to the destination
120
- let perf = performance.now();
121
- const exportRes = yield OutputExecutor_1.default.exportResult(consumer, usageId, ConsumerManager_1.default.getExpandedFields(consumer));
122
- tracker.measure('export-result', performance.now() - perf);
130
+ counter = performance.now();
131
+ const exportRes = yield OutputExecutor_1.default.exportResult(consumer, ConsumerManager_1.default.getExpandedFields(consumer), scope);
132
+ tracker.measure('export-result', performance.now() - counter);
123
133
  // Perform on-success actions if any
124
134
  if (consumer.outputs.some(x => x.onSuccess)) {
125
- perf = performance.now();
135
+ counter = performance.now();
126
136
  yield ConsumerOnFinishManager_1.default.onConsumerSuccess(consumer, usageId);
127
- tracker.measure('on-success-actions', performance.now() - perf);
137
+ tracker.measure('on-success-actions', performance.now() - counter);
128
138
  }
129
- yield this.performCleanupOperations(consumer, usageId, executorResults.map(x => x.resultUri), tracker);
139
+ yield this.performCleanupOperations(scope, tracker);
130
140
  const finalResult = this.computeFinalResult(tracker, executorResults, usageId, exportRes.key);
131
141
  finalResult.elapsedMS = performance.now() - start;
132
142
  if (Algo_1.default.hasVal(postOperation.totalOutputCount))
@@ -136,7 +146,7 @@ class ExecutorOrchestratorClass {
136
146
  }
137
147
  catch (error) {
138
148
  yield ConsumerOnFinishManager_1.default.onConsumerError(consumer, usageId);
139
- yield this.performCleanupOperations(consumer, usageId, workersId.map(x => ConsumerExecutor_1.default._getWorkPath(consumer, x)), tracker);
149
+ yield this.performCleanupOperations(scope, tracker);
140
150
  UsageManager_1.default.failUsage(usageId, Helper_1.default.asError(error).message);
141
151
  throw error;
142
152
  }
@@ -210,12 +220,12 @@ class ExecutorOrchestratorClass {
210
220
  // No newline found, return file end
211
221
  return fileSize;
212
222
  };
213
- this.readySourceFiles = (consumer) => __awaiter(this, void 0, void 0, function* () {
223
+ this.readySourceFiles = (consumer, scope) => __awaiter(this, void 0, void 0, function* () {
214
224
  const results = [];
215
225
  for (let i = 0; i < consumer.producers.length; i++) {
216
226
  const cProd = consumer.producers[i];
217
227
  const prod = Environment_1.default.getProducer(cProd.name);
218
- results.push({ prod, cProd, response: yield ProducerExecutor_1.default.ready(prod) });
228
+ results.push({ prod, cProd, response: yield ProducerExecutor_1.default.ready(prod, scope) });
219
229
  }
220
230
  return results;
221
231
  });
@@ -239,13 +249,13 @@ class ExecutorOrchestratorClass {
239
249
  return path_1.default.resolve('./.build/workers');
240
250
  }
241
251
  };
242
- this.reconcileExecutorThreadsResults = (consumer, executionId, executorResults, tracker) => __awaiter(this, void 0, void 0, function* () {
243
- const workPath = ConsumerExecutor_1.default._getWorkPath(consumer, executionId);
244
- ConsumerExecutor_1.default._ensurePath(workPath);
252
+ this.reconcileExecutorThreadsResults = (scope, executorResults, tracker) => __awaiter(this, void 0, void 0, function* () {
253
+ const mainPath = ExecutorScope_1.default.getMainPath(scope);
254
+ ConsumerExecutor_1.default._ensurePath(mainPath);
245
255
  // Merge all the various files into a single one
246
256
  if (executorResults.length > 1) {
247
257
  const perf = performance.now();
248
- const output = fs_1.default.createWriteStream(workPath);
258
+ const output = fs_1.default.createWriteStream(mainPath);
249
259
  output.setMaxListeners(executorResults.length + 1);
250
260
  for (const workerResult of executorResults) {
251
261
  yield (0, promises_2.pipeline)(fs_1.default.createReadStream(workerResult.resultUri), output, { end: false });
@@ -256,14 +266,12 @@ class ExecutorOrchestratorClass {
256
266
  }
257
267
  else if (executorResults.length === 1) {
258
268
  // If there is only one worker, then just rename the worker .dataset to the general consumer one
259
- yield promises_1.default.rename(executorResults[0].resultUri, workPath);
269
+ yield promises_1.default.rename(executorResults[0].resultUri, mainPath);
260
270
  }
261
271
  });
262
- this.performCleanupOperations = (consumer, executionId, workersPath, tracker) => __awaiter(this, void 0, void 0, function* () {
263
- const workPath = ConsumerExecutor_1.default._getWorkPath(consumer, executionId);
272
+ this.performCleanupOperations = (scope, tracker) => __awaiter(this, void 0, void 0, function* () {
264
273
  const start = performance.now();
265
- yield Promise.all(workersPath.map(x => ConsumerExecutor_1.default._clearWorkPath(x)));
266
- yield ConsumerExecutor_1.default._clearWorkPath(workPath);
274
+ yield ExecutorScope_1.default.clearScope(scope);
267
275
  tracker.measure('cleanup-operations', performance.now() - start);
268
276
  });
269
277
  this.computeFinalResult = (tracker, executorResults, executionId, resultUri) => {
@@ -0,0 +1,52 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ var __importDefault = (this && this.__importDefault) || function (mod) {
12
+ return (mod && mod.__esModule) ? mod : { "default": mod };
13
+ };
14
+ Object.defineProperty(exports, "__esModule", { value: true });
15
+ const path_1 = __importDefault(require("path"));
16
+ const fs_1 = __importDefault(require("fs"));
17
+ const promises_1 = __importDefault(require("fs/promises"));
18
+ const Constants_1 = __importDefault(require("../Constants"));
19
+ class ExecutorScopeClass {
20
+ constructor() {
21
+ this.WORKERS_FOLDER = 'workers';
22
+ this.PRODUCERS_FOLDER = 'producers';
23
+ this.getWorkerPath = (scope, workerId) => {
24
+ return path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER,
25
+ // A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
26
+ scope.folder, this.WORKERS_FOLDER, `${workerId}.dataset`);
27
+ };
28
+ this.getProducerPath = (scope, producer, sourceFileKey) => {
29
+ return path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER,
30
+ // A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
31
+ scope.folder, this.PRODUCERS_FOLDER, producer.name, `${sourceFileKey}.dataset`);
32
+ };
33
+ this.getMainPath = (scope) => {
34
+ return path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER, scope.folder, 'main.dataset');
35
+ };
36
+ this.clearScope = (scope) => __awaiter(this, void 0, void 0, function* () {
37
+ const scopePath = path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER, scope.folder);
38
+ if (fs_1.default.existsSync(scopePath)) {
39
+ yield promises_1.default.rm(scopePath, { recursive: true, force: true });
40
+ }
41
+ });
42
+ this.ensurePath = (fileUri) => {
43
+ const dir = path_1.default.dirname(fileUri);
44
+ if (!fs_1.default.existsSync(dir))
45
+ fs_1.default.mkdirSync(dir, { recursive: true });
46
+ if (!fs_1.default.existsSync(fileUri))
47
+ fs_1.default.writeFileSync(fileUri, '');
48
+ };
49
+ }
50
+ }
51
+ const ExecutorScope = new ExecutorScopeClass();
52
+ exports.default = ExecutorScope;
@@ -17,7 +17,7 @@ const DSTE_1 = __importDefault(require("../core/dste/DSTE"));
17
17
  const DriverFactory_1 = __importDefault(require("../drivers/DriverFactory"));
18
18
  const Environment_1 = __importDefault(require("../engines/Environment"));
19
19
  const CSVParser_1 = __importDefault(require("../engines/parsing/CSVParser"));
20
- const ConsumerExecutor_1 = __importDefault(require("./ConsumerExecutor"));
20
+ const ExecutorScope_1 = __importDefault(require("./ExecutorScope"));
21
21
  class OutputExecutorClass {
22
22
  constructor() {
23
23
  this._getInternalRecordFormat = (consumer) => {
@@ -62,13 +62,13 @@ class OutputExecutorClass {
62
62
  return JSON.stringify(record);
63
63
  }
64
64
  };
65
- this.exportResult = (consumer, executionId, fields) => __awaiter(this, void 0, void 0, function* () {
65
+ this.exportResult = (consumer, fields, scope) => __awaiter(this, void 0, void 0, function* () {
66
66
  const internalFormat = this._getInternalRecordFormat(consumer);
67
67
  for (const output of consumer.outputs) {
68
68
  const destination = Environment_1.default.getSource(output.exportDestination);
69
69
  const driver = yield DriverFactory_1.default.instantiateDestination(destination);
70
- const currentPath = ConsumerExecutor_1.default._getWorkPath(consumer, executionId);
71
- const destinationName = this._composeFileName(consumer, output, this._getExtension(output), executionId);
70
+ const currentPath = ExecutorScope_1.default.getMainPath(scope);
71
+ const destinationName = this._composeFileName(consumer, output, this._getExtension(output));
72
72
  if (output.format === internalFormat) {
73
73
  return yield driver.move(currentPath, destinationName);
74
74
  }
@@ -22,12 +22,12 @@ const LineParser_1 = __importDefault(require("../engines/parsing/LineParser"));
22
22
  const CryptoEngine_1 = __importDefault(require("../engines/CryptoEngine"));
23
23
  class ProducerExecutorClass {
24
24
  constructor() {
25
- this.ready = (producer) => __awaiter(this, void 0, void 0, function* () {
25
+ this.ready = (producer, scope) => __awaiter(this, void 0, void 0, function* () {
26
26
  (0, Affirm_1.default)(producer, 'Invalid producer');
27
27
  const source = Environment_1.default.getSource(producer.source);
28
28
  (0, Affirm_1.default)(source, `Invalid source ${producer.source} on producer ${producer.name}`);
29
29
  const driver = yield DriverFactory_1.default.instantiateSource(source);
30
- return yield driver.ready(producer);
30
+ return yield driver.ready({ producer, scope });
31
31
  });
32
32
  this.processHeader = (line, producer) => {
33
33
  const { settings: { fileType, hasHeaderRow, delimiter } } = producer;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "1.0.11",
3
+ "version": "1.0.14",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,
@@ -21,7 +21,7 @@
21
21
  "debug": "npx tsx ./src/index.ts debug",
22
22
  "create-producer": "npx tsx ./src/index.ts create-producer",
23
23
  "copy-static-file": "npx tsx ./scripts/CopyStaticFile.js",
24
- "build": "npm i && npm run sync && tsc --outDir .build && npm run copy-static-file",
24
+ "build": "npm i && npm run sync && rm -rf .build && tsc --outDir .build && npm run copy-static-file",
25
25
  "fast-build": "tsc --outDir .build",
26
26
  "upload": "npm run build && cd .build && npm publish --access=public"
27
27
  },
@@ -21,11 +21,12 @@ dotenv_1.default.configDotenv();
21
21
  const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
22
22
  Environment_1.default.load('./');
23
23
  try {
24
- const { workerId, chunk, consumer, producer, prodDimensions, options } = workerData;
24
+ const { workerId, chunk, consumer, producer, prodDimensions, scope, options } = workerData;
25
25
  (0, Affirm_1.default)(workerId, `Invalid worker id`);
26
26
  (0, Affirm_1.default)(consumer, `Invalid consumer`);
27
27
  (0, Affirm_1.default)(producer, `Invalid producer`);
28
28
  (0, Affirm_1.default)(chunk, `Invalid chunk`);
29
+ (0, Affirm_1.default)(scope, `Invalid executor scope`);
29
30
  const executor = new Executor_1.default();
30
31
  const res = yield executor.run({
31
32
  consumer,
@@ -34,6 +35,7 @@ const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
34
35
  workerId,
35
36
  chunk,
36
37
  options,
38
+ scope,
37
39
  reportWork: packet => workerpool_1.default.workerEmit(packet)
38
40
  });
39
41
  return res;