@forzalabs/remora 1.0.13 → 1.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,9 +25,11 @@ const readline_1 = __importDefault(require("readline"));
25
25
  const promises_1 = __importDefault(require("fs/promises"));
26
26
  const TransformationEngine_1 = __importDefault(require("../engines/transform/TransformationEngine"));
27
27
  const RequestExecutor_1 = __importDefault(require("../engines/execution/RequestExecutor"));
28
- const Affirm_1 = __importDefault(require("../core/Affirm"));
29
28
  const Constants_1 = __importDefault(require("../Constants"));
30
29
  const Algo_1 = __importDefault(require("../core/Algo"));
30
+ const LineParser_1 = __importDefault(require("../engines/parsing/LineParser"));
31
+ const OutputExecutor_1 = __importDefault(require("./OutputExecutor"));
32
+ const ConsumerManager_1 = __importDefault(require("../engines/consumer/ConsumerManager"));
31
33
  class ConsumerExecutorClass {
32
34
  constructor() {
33
35
  this._getWorkPath = (consumer, executionId) => {
@@ -64,12 +66,6 @@ class ConsumerExecutorClass {
64
66
  if (!fs_1.default.existsSync(pathUri))
65
67
  fs_1.default.writeFileSync(pathUri, '');
66
68
  };
67
- this.ready = (consumer, executionId) => {
68
- (0, Affirm_1.default)(consumer, 'Invalid consumer');
69
- const workPath = this._getWorkPath(consumer, executionId);
70
- this._ensurePath(workPath);
71
- return fs_1.default.createWriteStream(workPath);
72
- };
73
69
  this.processRecord = (options) => {
74
70
  var _a, _b;
75
71
  const { consumer, fields, dimensions, producer, record, requestOptions } = options;
@@ -118,12 +114,11 @@ class ConsumerExecutorClass {
118
114
  }
119
115
  return record;
120
116
  };
121
- this.processDistinct = (consumer, executionId) => __awaiter(this, void 0, void 0, function* () {
117
+ this.processDistinct = (datasetPath) => __awaiter(this, void 0, void 0, function* () {
122
118
  var _a, e_1, _b, _c;
123
- const workPath = this._getWorkPath(consumer, executionId);
124
- const reader = fs_1.default.createReadStream(workPath);
119
+ const reader = fs_1.default.createReadStream(datasetPath);
125
120
  const lineReader = readline_1.default.createInterface({ input: reader, crlfDelay: Infinity });
126
- const tempWorkPath = workPath + '_tmp';
121
+ const tempWorkPath = datasetPath + '_tmp';
127
122
  const writer = fs_1.default.createWriteStream(tempWorkPath);
128
123
  let newLineCount = 0;
129
124
  const seen = new Set();
@@ -148,9 +143,120 @@ class ConsumerExecutorClass {
148
143
  }
149
144
  writer.close();
150
145
  reader.close();
151
- fs_1.default.renameSync(tempWorkPath, workPath);
146
+ // Wait for the writer to finish before renaming
147
+ yield new Promise((resolve, reject) => {
148
+ writer.on('finish', resolve);
149
+ writer.on('error', reject);
150
+ writer.end();
151
+ });
152
+ fs_1.default.renameSync(tempWorkPath, datasetPath);
152
153
  return newLineCount;
153
154
  });
155
+ this.processDistinctOn = (consumer, datasetPath) => __awaiter(this, void 0, void 0, function* () {
156
+ var _a, e_2, _b, _c;
157
+ const reader = fs_1.default.createReadStream(datasetPath);
158
+ const lineReader = readline_1.default.createInterface({ input: reader, crlfDelay: Infinity });
159
+ const { distinctOn } = consumer.options;
160
+ const { keys, resolution } = distinctOn;
161
+ const { strategy, orderBy, direction = 'asc' } = resolution;
162
+ const internalRecordFormat = OutputExecutor_1.default._getInternalRecordFormat(consumer);
163
+ const internalFields = ConsumerManager_1.default.getExpandedFields(consumer);
164
+ // Map to store the winning record for each composite key
165
+ // Key: composite key string, Value: { record: parsed object, line: original JSON line }
166
+ const winners = new Map();
167
+ try {
168
+ for (var _d = true, lineReader_2 = __asyncValues(lineReader), lineReader_2_1; lineReader_2_1 = yield lineReader_2.next(), _a = lineReader_2_1.done, !_a; _d = true) {
169
+ _c = lineReader_2_1.value;
170
+ _d = false;
171
+ const line = _c;
172
+ const record = (internalRecordFormat === 'CSV' || internalRecordFormat === 'TXT')
173
+ ? LineParser_1.default._internalParseCSV(line, internalFields)
174
+ : LineParser_1.default._internalParseJSON(line);
175
+ const compositeKey = keys.map(k => { var _a; return String((_a = record[k]) !== null && _a !== void 0 ? _a : ''); }).join('|');
176
+ const existing = winners.get(compositeKey);
177
+ if (!existing) {
178
+ winners.set(compositeKey, { record, line });
179
+ continue;
180
+ }
181
+ const shouldReplace = this._shouldReplaceRecord(existing.record, record, strategy, orderBy, direction);
182
+ if (shouldReplace) {
183
+ winners.set(compositeKey, { record, line });
184
+ }
185
+ }
186
+ }
187
+ catch (e_2_1) { e_2 = { error: e_2_1 }; }
188
+ finally {
189
+ try {
190
+ if (!_d && !_a && (_b = lineReader_2.return)) yield _b.call(lineReader_2);
191
+ }
192
+ finally { if (e_2) throw e_2.error; }
193
+ }
194
+ reader.close();
195
+ // Write the winning records to the temp file
196
+ const tempWorkPath = datasetPath + '_tmp';
197
+ const writer = fs_1.default.createWriteStream(tempWorkPath);
198
+ for (const { line } of winners.values()) {
199
+ writer.write(line + '\n');
200
+ }
201
+ // Wait for the writer to finish before renaming
202
+ yield new Promise((resolve, reject) => {
203
+ writer.on('finish', resolve);
204
+ writer.on('error', reject);
205
+ writer.end();
206
+ });
207
+ fs_1.default.renameSync(tempWorkPath, datasetPath);
208
+ return winners.size;
209
+ });
210
+ /**
211
+ * Determines if the new record should replace the existing record based on the resolution strategy
212
+ */
213
+ this._shouldReplaceRecord = (existing, newRecord, strategy, orderBy, direction) => {
214
+ switch (strategy) {
215
+ case 'first':
216
+ return false;
217
+ case 'last':
218
+ return true;
219
+ case 'min': {
220
+ const existingVal = existing[orderBy];
221
+ const newVal = newRecord[orderBy];
222
+ const comparison = this._compareValues(newVal, existingVal);
223
+ // For 'min', we want the smallest value
224
+ // If direction is 'desc', we invert the logic (smallest becomes largest)
225
+ return direction === 'asc' ? comparison < 0 : comparison > 0;
226
+ }
227
+ case 'max': {
228
+ const existingVal = existing[orderBy];
229
+ const newVal = newRecord[orderBy];
230
+ const comparison = this._compareValues(newVal, existingVal);
231
+ // For 'max', we want the largest value
232
+ // If direction is 'desc', we invert the logic (largest becomes smallest)
233
+ return direction === 'asc' ? comparison > 0 : comparison < 0;
234
+ }
235
+ default:
236
+ return false;
237
+ }
238
+ };
239
+ /**
240
+ * Compares two values, handling numbers, strings, and dates
241
+ * Returns: negative if a < b, positive if a > b, 0 if equal
242
+ */
243
+ this._compareValues = (a, b) => {
244
+ // Handle null/undefined
245
+ if (a == null && b == null)
246
+ return 0;
247
+ if (a == null)
248
+ return -1;
249
+ if (b == null)
250
+ return 1;
251
+ // Try numeric comparison
252
+ const numA = Number(a);
253
+ const numB = Number(b);
254
+ if (!isNaN(numA) && !isNaN(numB)) {
255
+ return numA - numB;
256
+ }
257
+ // Fall back to string comparison
258
+ return String(a).localeCompare(String(b));
259
+ };
154
260
  }
155
261
  }
156
262
  const ConsumerExecutor = new ConsumerExecutorClass();
@@ -27,6 +27,7 @@ const Affirm_1 = __importDefault(require("../core/Affirm"));
27
27
  const OutputExecutor_1 = __importDefault(require("./OutputExecutor"));
28
28
  const ConsumerManager_1 = __importDefault(require("../engines/consumer/ConsumerManager"));
29
29
  const ExecutorPerformance_1 = __importDefault(require("./ExecutorPerformance"));
30
+ const ExecutorScope_1 = __importDefault(require("./ExecutorScope"));
30
31
  class Executor {
31
32
  constructor() {
32
33
  this._REPORT_WORK_AFTER_LINES = 1000;
@@ -38,9 +39,9 @@ class Executor {
38
39
  */
39
40
  this.run = (request) => __awaiter(this, void 0, void 0, function* () {
40
41
  var _a, e_1, _b, _c;
41
- var _d;
42
+ var _d, _e;
42
43
  (0, Affirm_1.default)(request, 'Invalid request');
43
- const { consumer, producer, prodDimensions, workerId, chunk, options, reportWork } = request;
44
+ const { consumer, producer, prodDimensions, workerId, chunk, options, scope, reportWork } = request;
44
45
  const counter = performance.now();
45
46
  const result = {
46
47
  executionId: workerId,
@@ -48,12 +49,13 @@ class Executor {
48
49
  elapsedMS: -1,
49
50
  inputCount: -1,
50
51
  outputCount: -1,
51
- resultUri: ConsumerExecutor_1.default._getWorkPath(consumer, workerId),
52
+ resultUri: ExecutorScope_1.default.getWorkerPath(scope, workerId),
52
53
  operations: {}
53
54
  };
55
+ ExecutorScope_1.default.ensurePath(result.resultUri);
54
56
  let totalOutputCount = 0, totalCycles = 1, perf = 0, lineIndex = 0;
55
57
  const readStream = this.openReadStream(chunk);
56
- const writeStream = this.openWriteStream(consumer, workerId);
58
+ const writeStream = this.openWriteStream(scope, workerId);
57
59
  const fields = ConsumerManager_1.default.getExpandedFields(consumer);
58
60
  const { isFirstChunk, start, end } = chunk;
59
61
  const totalBytes = end - start;
@@ -61,9 +63,9 @@ class Executor {
61
63
  // Process all the line-independent operations of the consumer in a single pass
62
64
  const lineStream = readline_1.default.createInterface({ input: readStream, crlfDelay: Infinity });
63
65
  try {
64
- for (var _e = true, lineStream_1 = __asyncValues(lineStream), lineStream_1_1; lineStream_1_1 = yield lineStream_1.next(), _a = lineStream_1_1.done, !_a; _e = true) {
66
+ for (var _f = true, lineStream_1 = __asyncValues(lineStream), lineStream_1_1; lineStream_1_1 = yield lineStream_1.next(), _a = lineStream_1_1.done, !_a; _f = true) {
65
67
  _c = lineStream_1_1.value;
66
- _e = false;
68
+ _f = false;
67
69
  const line = _c;
68
70
  if (lineIndex === 0 && isFirstChunk) {
69
71
  if (!this.shouldProcessFirstLine(producer)) {
@@ -115,22 +117,27 @@ class Executor {
115
117
  catch (e_1_1) { e_1 = { error: e_1_1 }; }
116
118
  finally {
117
119
  try {
118
- if (!_e && !_a && (_b = lineStream_1.return)) yield _b.call(lineStream_1);
120
+ if (!_f && !_a && (_b = lineStream_1.return)) yield _b.call(lineStream_1);
119
121
  }
120
122
  finally { if (e_1) throw e_1.error; }
121
123
  }
122
124
  // Process the operations that work on multiple lines
123
125
  if (((_d = consumer.options) === null || _d === void 0 ? void 0 : _d.distinct) === true) {
124
126
  perf = performance.now();
125
- totalOutputCount = yield ConsumerExecutor_1.default.processDistinct(consumer, workerId);
127
+ totalOutputCount = yield ConsumerExecutor_1.default.processDistinct(result.resultUri);
126
128
  this._performance.measure('process-distinct', performance.now() - perf);
127
129
  totalCycles++;
128
130
  }
131
+ if ((_e = consumer.options) === null || _e === void 0 ? void 0 : _e.distinctOn) {
132
+ perf = performance.now();
133
+ totalOutputCount = yield ConsumerExecutor_1.default.processDistinctOn(consumer, result.resultUri);
134
+ this._performance.measure('process-distinct-on', performance.now() - perf);
135
+ totalCycles++;
136
+ }
129
137
  result.elapsedMS = performance.now() - counter;
130
138
  result.cycles = totalCycles;
131
139
  result.inputCount = lineIndex;
132
140
  result.outputCount = totalOutputCount;
133
- result.resultUri = ConsumerExecutor_1.default._getWorkPath(consumer, workerId);
134
141
  result.operations = this._performance.getOperations();
135
142
  return result;
136
143
  });
@@ -138,8 +145,9 @@ class Executor {
138
145
  const { end, fileUri, start } = chunk;
139
146
  return fs_1.default.createReadStream(fileUri, { start, end: end });
140
147
  };
141
- this.openWriteStream = (consumer, executionId) => {
142
- return ConsumerExecutor_1.default.ready(consumer, executionId);
148
+ this.openWriteStream = (scope, workerId) => {
149
+ const workerPath = ExecutorScope_1.default.getWorkerPath(scope, workerId);
150
+ return fs_1.default.createWriteStream(workerPath);
143
151
  };
144
152
  this.shouldProcessFirstLine = (producer) => {
145
153
  (0, Affirm_1.default)(producer, 'Invalid producer');
@@ -32,6 +32,7 @@ const ExecutorPerformance_1 = __importDefault(require("./ExecutorPerformance"));
32
32
  const ExecutorProgress_1 = __importDefault(require("./ExecutorProgress"));
33
33
  const Algo_1 = __importDefault(require("../core/Algo"));
34
34
  const ConsumerOnFinishManager_1 = __importDefault(require("../engines/consumer/ConsumerOnFinishManager"));
35
+ const ExecutorScope_1 = __importDefault(require("./ExecutorScope"));
35
36
  class ExecutorOrchestratorClass {
36
37
  constructor() {
37
38
  this.init = () => {
@@ -48,7 +49,7 @@ class ExecutorOrchestratorClass {
48
49
  }
49
50
  };
50
51
  this.launch = (request) => __awaiter(this, void 0, void 0, function* () {
51
- var _a;
52
+ var _a, _b;
52
53
  (0, Affirm_1.default)(request, 'Invalid options');
53
54
  const { consumer, details, logProgress, options } = request;
54
55
  (0, Affirm_1.default)(consumer, 'Invalid consumer');
@@ -56,12 +57,14 @@ class ExecutorOrchestratorClass {
56
57
  const tracker = new ExecutorPerformance_1.default();
57
58
  const _progress = new ExecutorProgress_1.default(logProgress);
58
59
  const { usageId } = UsageManager_1.default.startUsage(consumer, details);
59
- const workersId = [];
60
+ const scope = { id: usageId, folder: `${consumer.name}_${usageId}`, workersId: [] };
60
61
  try {
61
62
  const start = performance.now();
62
63
  this.init();
63
64
  const executorResults = [];
64
- const sourceFilesByProducer = yield this.readySourceFiles(consumer);
65
+ let counter = performance.now();
66
+ const sourceFilesByProducer = yield this.readySourceFiles(consumer, scope);
67
+ tracker.measure('ready-producers', performance.now() - counter);
65
68
  let globalWorkerIndex = 0;
66
69
  for (const pair of sourceFilesByProducer) {
67
70
  const { prod, cProd, response } = pair;
@@ -86,15 +89,16 @@ class ExecutorOrchestratorClass {
86
89
  const currentWorkerIndex = globalWorkerIndex;
87
90
  globalWorkerIndex++;
88
91
  const workerData = {
92
+ producer: prod,
89
93
  chunk,
90
94
  consumer,
91
95
  prodDimensions,
92
- producer: prod,
93
- workerId: workerId,
94
- options: options
96
+ workerId,
97
+ scope,
98
+ options
95
99
  };
96
100
  _progress.register((currentWorkerIndex + 1).toString(), prod.name, fileIndex, totalFiles);
97
- workersId.push(workerId);
101
+ scope.workersId.push(workerId);
98
102
  workerThreads.push(this._executorPool.exec('executor', [workerData], {
99
103
  on: payload => this.onWorkAdvanced(payload, currentWorkerIndex, _progress)
100
104
  }));
@@ -105,28 +109,34 @@ class ExecutorOrchestratorClass {
105
109
  }
106
110
  }
107
111
  _progress.complete();
108
- yield this.reconcileExecutorThreadsResults(consumer, usageId, executorResults, tracker);
112
+ yield this.reconcileExecutorThreadsResults(scope, executorResults, tracker);
109
113
  // If there is more than one worker, then I need to redo the operations that are done on multiple lines (cause now the worker files have been merged together)
110
114
  const postOperation = { totalOutputCount: null };
111
115
  if (executorResults.length > 1) {
112
116
  if (((_a = consumer.options) === null || _a === void 0 ? void 0 : _a.distinct) === true) {
113
- const perf = performance.now();
114
- const unifiedOutputCount = yield ConsumerExecutor_1.default.processDistinct(consumer, usageId);
115
- tracker.measure('process-distinct:main', performance.now() - perf);
117
+ counter = performance.now();
118
+ const unifiedOutputCount = yield ConsumerExecutor_1.default.processDistinct(ExecutorScope_1.default.getMainPath(scope));
119
+ tracker.measure('process-distinct:main', performance.now() - counter);
120
+ postOperation.totalOutputCount = unifiedOutputCount;
121
+ }
122
+ if (((_b = consumer.options) === null || _b === void 0 ? void 0 : _b.distinct) === true) {
123
+ counter = performance.now();
124
+ const unifiedOutputCount = yield ConsumerExecutor_1.default.processDistinctOn(consumer, ExecutorScope_1.default.getMainPath(scope));
125
+ tracker.measure('process-distinct-on:main', performance.now() - counter);
116
126
  postOperation.totalOutputCount = unifiedOutputCount;
117
127
  }
118
128
  }
119
129
  // Export to the destination
120
- let perf = performance.now();
121
- const exportRes = yield OutputExecutor_1.default.exportResult(consumer, usageId, ConsumerManager_1.default.getExpandedFields(consumer));
122
- tracker.measure('export-result', performance.now() - perf);
130
+ counter = performance.now();
131
+ const exportRes = yield OutputExecutor_1.default.exportResult(consumer, ConsumerManager_1.default.getExpandedFields(consumer), scope);
132
+ tracker.measure('export-result', performance.now() - counter);
123
133
  // Perform on-success actions if any
124
134
  if (consumer.outputs.some(x => x.onSuccess)) {
125
- perf = performance.now();
135
+ counter = performance.now();
126
136
  yield ConsumerOnFinishManager_1.default.onConsumerSuccess(consumer, usageId);
127
- tracker.measure('on-success-actions', performance.now() - perf);
137
+ tracker.measure('on-success-actions', performance.now() - counter);
128
138
  }
129
- yield this.performCleanupOperations(consumer, usageId, executorResults.map(x => x.resultUri), tracker);
139
+ yield this.performCleanupOperations(scope, tracker);
130
140
  const finalResult = this.computeFinalResult(tracker, executorResults, usageId, exportRes.key);
131
141
  finalResult.elapsedMS = performance.now() - start;
132
142
  if (Algo_1.default.hasVal(postOperation.totalOutputCount))
@@ -136,7 +146,7 @@ class ExecutorOrchestratorClass {
136
146
  }
137
147
  catch (error) {
138
148
  yield ConsumerOnFinishManager_1.default.onConsumerError(consumer, usageId);
139
- yield this.performCleanupOperations(consumer, usageId, workersId.map(x => ConsumerExecutor_1.default._getWorkPath(consumer, x)), tracker);
149
+ yield this.performCleanupOperations(scope, tracker);
140
150
  UsageManager_1.default.failUsage(usageId, Helper_1.default.asError(error).message);
141
151
  throw error;
142
152
  }
@@ -210,12 +220,12 @@ class ExecutorOrchestratorClass {
210
220
  // No newline found, return file end
211
221
  return fileSize;
212
222
  };
213
- this.readySourceFiles = (consumer) => __awaiter(this, void 0, void 0, function* () {
223
+ this.readySourceFiles = (consumer, scope) => __awaiter(this, void 0, void 0, function* () {
214
224
  const results = [];
215
225
  for (let i = 0; i < consumer.producers.length; i++) {
216
226
  const cProd = consumer.producers[i];
217
227
  const prod = Environment_1.default.getProducer(cProd.name);
218
- results.push({ prod, cProd, response: yield ProducerExecutor_1.default.ready(prod) });
228
+ results.push({ prod, cProd, response: yield ProducerExecutor_1.default.ready(prod, scope) });
219
229
  }
220
230
  return results;
221
231
  });
@@ -239,13 +249,13 @@ class ExecutorOrchestratorClass {
239
249
  return path_1.default.resolve('./.build/workers');
240
250
  }
241
251
  };
242
- this.reconcileExecutorThreadsResults = (consumer, executionId, executorResults, tracker) => __awaiter(this, void 0, void 0, function* () {
243
- const workPath = ConsumerExecutor_1.default._getWorkPath(consumer, executionId);
244
- ConsumerExecutor_1.default._ensurePath(workPath);
252
+ this.reconcileExecutorThreadsResults = (scope, executorResults, tracker) => __awaiter(this, void 0, void 0, function* () {
253
+ const mainPath = ExecutorScope_1.default.getMainPath(scope);
254
+ ConsumerExecutor_1.default._ensurePath(mainPath);
245
255
  // Merge all the various files into a single one
246
256
  if (executorResults.length > 1) {
247
257
  const perf = performance.now();
248
- const output = fs_1.default.createWriteStream(workPath);
258
+ const output = fs_1.default.createWriteStream(mainPath);
249
259
  output.setMaxListeners(executorResults.length + 1);
250
260
  for (const workerResult of executorResults) {
251
261
  yield (0, promises_2.pipeline)(fs_1.default.createReadStream(workerResult.resultUri), output, { end: false });
@@ -256,14 +266,12 @@ class ExecutorOrchestratorClass {
256
266
  }
257
267
  else if (executorResults.length === 1) {
258
268
  // If there is only one worker, then just rename the worker .dataset to the general consumer one
259
- yield promises_1.default.rename(executorResults[0].resultUri, workPath);
269
+ yield promises_1.default.rename(executorResults[0].resultUri, mainPath);
260
270
  }
261
271
  });
262
- this.performCleanupOperations = (consumer, executionId, workersPath, tracker) => __awaiter(this, void 0, void 0, function* () {
263
- const workPath = ConsumerExecutor_1.default._getWorkPath(consumer, executionId);
272
+ this.performCleanupOperations = (scope, tracker) => __awaiter(this, void 0, void 0, function* () {
264
273
  const start = performance.now();
265
- yield Promise.all(workersPath.map(x => ConsumerExecutor_1.default._clearWorkPath(x)));
266
- yield ConsumerExecutor_1.default._clearWorkPath(workPath);
274
+ yield ExecutorScope_1.default.clearScope(scope);
267
275
  tracker.measure('cleanup-operations', performance.now() - start);
268
276
  });
269
277
  this.computeFinalResult = (tracker, executorResults, executionId, resultUri) => {
@@ -0,0 +1,52 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ var __importDefault = (this && this.__importDefault) || function (mod) {
12
+ return (mod && mod.__esModule) ? mod : { "default": mod };
13
+ };
14
+ Object.defineProperty(exports, "__esModule", { value: true });
15
+ const path_1 = __importDefault(require("path"));
16
+ const fs_1 = __importDefault(require("fs"));
17
+ const promises_1 = __importDefault(require("fs/promises"));
18
+ const Constants_1 = __importDefault(require("../Constants"));
19
+ class ExecutorScopeClass {
20
+ constructor() {
21
+ this.WORKERS_FOLDER = 'workers';
22
+ this.PRODUCERS_FOLDER = 'producers';
23
+ this.getWorkerPath = (scope, workerId) => {
24
+ return path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER,
25
+ // A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
26
+ scope.folder, this.WORKERS_FOLDER, `${workerId}.dataset`);
27
+ };
28
+ this.getProducerPath = (scope, producer, sourceFileKey) => {
29
+ return path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER,
30
+ // A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
31
+ scope.folder, this.PRODUCERS_FOLDER, producer.name, `${sourceFileKey}.dataset`);
32
+ };
33
+ this.getMainPath = (scope) => {
34
+ return path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER, scope.folder, 'main.dataset');
35
+ };
36
+ this.clearScope = (scope) => __awaiter(this, void 0, void 0, function* () {
37
+ const scopePath = path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER, scope.folder);
38
+ if (fs_1.default.existsSync(scopePath)) {
39
+ yield promises_1.default.rm(scopePath, { recursive: true, force: true });
40
+ }
41
+ });
42
+ this.ensurePath = (fileUri) => {
43
+ const dir = path_1.default.dirname(fileUri);
44
+ if (!fs_1.default.existsSync(dir))
45
+ fs_1.default.mkdirSync(dir, { recursive: true });
46
+ if (!fs_1.default.existsSync(fileUri))
47
+ fs_1.default.writeFileSync(fileUri, '');
48
+ };
49
+ }
50
+ }
51
+ const ExecutorScope = new ExecutorScopeClass();
52
+ exports.default = ExecutorScope;
@@ -17,7 +17,7 @@ const DSTE_1 = __importDefault(require("../core/dste/DSTE"));
17
17
  const DriverFactory_1 = __importDefault(require("../drivers/DriverFactory"));
18
18
  const Environment_1 = __importDefault(require("../engines/Environment"));
19
19
  const CSVParser_1 = __importDefault(require("../engines/parsing/CSVParser"));
20
- const ConsumerExecutor_1 = __importDefault(require("./ConsumerExecutor"));
20
+ const ExecutorScope_1 = __importDefault(require("./ExecutorScope"));
21
21
  class OutputExecutorClass {
22
22
  constructor() {
23
23
  this._getInternalRecordFormat = (consumer) => {
@@ -62,13 +62,13 @@ class OutputExecutorClass {
62
62
  return JSON.stringify(record);
63
63
  }
64
64
  };
65
- this.exportResult = (consumer, executionId, fields) => __awaiter(this, void 0, void 0, function* () {
65
+ this.exportResult = (consumer, fields, scope) => __awaiter(this, void 0, void 0, function* () {
66
66
  const internalFormat = this._getInternalRecordFormat(consumer);
67
67
  for (const output of consumer.outputs) {
68
68
  const destination = Environment_1.default.getSource(output.exportDestination);
69
69
  const driver = yield DriverFactory_1.default.instantiateDestination(destination);
70
- const currentPath = ConsumerExecutor_1.default._getWorkPath(consumer, executionId);
71
- const destinationName = this._composeFileName(consumer, output, this._getExtension(output), executionId);
70
+ const currentPath = ExecutorScope_1.default.getMainPath(scope);
71
+ const destinationName = this._composeFileName(consumer, output, this._getExtension(output));
72
72
  if (output.format === internalFormat) {
73
73
  return yield driver.move(currentPath, destinationName);
74
74
  }
@@ -22,12 +22,12 @@ const LineParser_1 = __importDefault(require("../engines/parsing/LineParser"));
22
22
  const CryptoEngine_1 = __importDefault(require("../engines/CryptoEngine"));
23
23
  class ProducerExecutorClass {
24
24
  constructor() {
25
- this.ready = (producer) => __awaiter(this, void 0, void 0, function* () {
25
+ this.ready = (producer, scope) => __awaiter(this, void 0, void 0, function* () {
26
26
  (0, Affirm_1.default)(producer, 'Invalid producer');
27
27
  const source = Environment_1.default.getSource(producer.source);
28
28
  (0, Affirm_1.default)(source, `Invalid source ${producer.source} on producer ${producer.name}`);
29
29
  const driver = yield DriverFactory_1.default.instantiateSource(source);
30
- return yield driver.ready(producer);
30
+ return yield driver.ready({ producer, scope });
31
31
  });
32
32
  this.processHeader = (line, producer) => {
33
33
  const { settings: { fileType, hasHeaderRow, delimiter } } = producer;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "1.0.13",
3
+ "version": "1.0.14",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,
@@ -21,7 +21,7 @@
21
21
  "debug": "npx tsx ./src/index.ts debug",
22
22
  "create-producer": "npx tsx ./src/index.ts create-producer",
23
23
  "copy-static-file": "npx tsx ./scripts/CopyStaticFile.js",
24
- "build": "npm i && npm run sync && tsc --outDir .build && npm run copy-static-file",
24
+ "build": "npm i && npm run sync && rm -rf .build && tsc --outDir .build && npm run copy-static-file",
25
25
  "fast-build": "tsc --outDir .build",
26
26
  "upload": "npm run build && cd .build && npm publish --access=public"
27
27
  },
@@ -21,11 +21,12 @@ dotenv_1.default.configDotenv();
21
21
  const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
22
22
  Environment_1.default.load('./');
23
23
  try {
24
- const { workerId, chunk, consumer, producer, prodDimensions, options } = workerData;
24
+ const { workerId, chunk, consumer, producer, prodDimensions, scope, options } = workerData;
25
25
  (0, Affirm_1.default)(workerId, `Invalid worker id`);
26
26
  (0, Affirm_1.default)(consumer, `Invalid consumer`);
27
27
  (0, Affirm_1.default)(producer, `Invalid producer`);
28
28
  (0, Affirm_1.default)(chunk, `Invalid chunk`);
29
+ (0, Affirm_1.default)(scope, `Invalid executor scope`);
29
30
  const executor = new Executor_1.default();
30
31
  const res = yield executor.run({
31
32
  consumer,
@@ -34,6 +35,7 @@ const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
34
35
  workerId,
35
36
  chunk,
36
37
  options,
38
+ scope,
37
39
  reportWork: packet => workerpool_1.default.workerEmit(packet)
38
40
  });
39
41
  return res;
@@ -1,48 +0,0 @@
1
- "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
- var __importDefault = (this && this.__importDefault) || function (mod) {
12
- return (mod && mod.__esModule) ? mod : { "default": mod };
13
- };
14
- Object.defineProperty(exports, "__esModule", { value: true });
15
- const Affirm_1 = __importDefault(require("../core/Affirm"));
16
- const DatabaseEngine_1 = __importDefault(require("../database/DatabaseEngine"));
17
- const UserManager_1 = __importDefault(require("../engines/UserManager"));
18
- const Settings_1 = __importDefault(require("../helper/Settings"));
19
- const bcryptjs_1 = __importDefault(require("bcryptjs"));
20
- const JWTManager_1 = __importDefault(require("./JWTManager"));
21
- class AdminManagerClass {
22
- constructor() {
23
- this.COLLECTION = Settings_1.default.db.collections.users;
24
- this.rootSignIn = (password) => __awaiter(this, void 0, void 0, function* () {
25
- (0, Affirm_1.default)(password, 'Invalid password');
26
- const rootUser = yield DatabaseEngine_1.default.findOne(this.COLLECTION, { isRoot: true });
27
- (0, Affirm_1.default)(rootUser, 'Incorrect system configuration: root user not found');
28
- const isSame = yield bcryptjs_1.default.compare(password, rootUser.rootPasswordHash);
29
- if (!isSame)
30
- throw new Error('Invalid credentials');
31
- rootUser.lastLogin = new Date().toJSON();
32
- yield UserManager_1.default.update(rootUser);
33
- const adminSecret = process.env.ADMIN_JWT_SECRET;
34
- (0, Affirm_1.default)(adminSecret, 'Wrong system config: missing admin jwt secret');
35
- const payload = {
36
- apiKeyId: rootUser._id,
37
- installationId: process.env.INSTALLATION_ID,
38
- name: rootUser.name,
39
- scopes: { consumers: ['*'], projects: ['*'] },
40
- isAdmin: true
41
- };
42
- const token = JWTManager_1.default.issue(adminSecret, payload, 8);
43
- return token;
44
- });
45
- }
46
- }
47
- const AdminManager = new AdminManagerClass();
48
- exports.default = AdminManager;
@@ -1,45 +0,0 @@
1
- "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
- var __importDefault = (this && this.__importDefault) || function (mod) {
12
- return (mod && mod.__esModule) ? mod : { "default": mod };
13
- };
14
- Object.defineProperty(exports, "__esModule", { value: true });
15
- const Affirm_1 = __importDefault(require("../core/Affirm"));
16
- const DatabaseEngine_1 = __importDefault(require("../database/DatabaseEngine"));
17
- const Helper_1 = __importDefault(require("../helper/Helper"));
18
- const Settings_1 = __importDefault(require("../helper/Settings"));
19
- const JWTManager_1 = __importDefault(require("./JWTManager"));
20
- class ApiKeysManagerClass {
21
- constructor() {
22
- this.COLLECTION = Settings_1.default.db.collections.apiKeys;
23
- this.create = (name, scopes) => __awaiter(this, void 0, void 0, function* () {
24
- (0, Affirm_1.default)(name, 'Invalid name');
25
- const apiKeyId = Helper_1.default.uuid();
26
- const apiSecret = JWTManager_1.default.sign(apiKeyId, name, scopes);
27
- const newApiKey = {
28
- _id: apiKeyId,
29
- _signature: '',
30
- createdAt: new Date().toJSON(),
31
- isActive: true,
32
- name: name,
33
- scopes: scopes,
34
- value: apiSecret
35
- };
36
- return yield DatabaseEngine_1.default.upsert(this.COLLECTION, newApiKey._id, newApiKey);
37
- });
38
- this.get = (apiKeyId) => __awaiter(this, void 0, void 0, function* () {
39
- (0, Affirm_1.default)(apiKeyId, 'Invalid api key id');
40
- return yield DatabaseEngine_1.default.get(this.COLLECTION, apiKeyId);
41
- });
42
- }
43
- }
44
- const ApiKeysManager = new ApiKeysManagerClass();
45
- exports.default = ApiKeysManager;