@forzalabs/remora 1.0.13 → 1.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +2 -1
- package/database/DatabaseEngine.js +7 -6
- package/definitions/json_schemas/consumer-schema.json +39 -23
- package/drivers/DeltaShareDriver.js +2 -2
- package/drivers/HttpApiDriver.js +3 -3
- package/drivers/RedshiftDriver.js +2 -2
- package/drivers/files/LocalDestinationDriver.js +0 -55
- package/drivers/files/LocalSourceDriver.js +3 -2
- package/drivers/s3/S3DestinationDriver.js +107 -69
- package/drivers/s3/S3SourceDriver.js +44 -4
- package/engines/parsing/LineParser.js +19 -0
- package/engines/validation/Validator.js +7 -2
- package/executors/ConsumerExecutor.js +118 -12
- package/executors/Executor.js +19 -11
- package/executors/ExecutorOrchestrator.js +37 -29
- package/executors/ExecutorScope.js +52 -0
- package/executors/OutputExecutor.js +4 -4
- package/executors/ProducerExecutor.js +2 -2
- package/package.json +2 -2
- package/workers/ExecutorWorker.js +3 -1
- package/auth/AdminManager.js +0 -48
- package/auth/ApiKeysManager.js +0 -45
- package/auth/JWTManager.js +0 -56
- package/database/DatabaseInitializer.js +0 -80
- package/engines/file/FileExporter.js +0 -58
- package/workers/definitions.js +0 -2
|
@@ -25,9 +25,11 @@ const readline_1 = __importDefault(require("readline"));
|
|
|
25
25
|
const promises_1 = __importDefault(require("fs/promises"));
|
|
26
26
|
const TransformationEngine_1 = __importDefault(require("../engines/transform/TransformationEngine"));
|
|
27
27
|
const RequestExecutor_1 = __importDefault(require("../engines/execution/RequestExecutor"));
|
|
28
|
-
const Affirm_1 = __importDefault(require("../core/Affirm"));
|
|
29
28
|
const Constants_1 = __importDefault(require("../Constants"));
|
|
30
29
|
const Algo_1 = __importDefault(require("../core/Algo"));
|
|
30
|
+
const LineParser_1 = __importDefault(require("../engines/parsing/LineParser"));
|
|
31
|
+
const OutputExecutor_1 = __importDefault(require("./OutputExecutor"));
|
|
32
|
+
const ConsumerManager_1 = __importDefault(require("../engines/consumer/ConsumerManager"));
|
|
31
33
|
class ConsumerExecutorClass {
|
|
32
34
|
constructor() {
|
|
33
35
|
this._getWorkPath = (consumer, executionId) => {
|
|
@@ -64,12 +66,6 @@ class ConsumerExecutorClass {
|
|
|
64
66
|
if (!fs_1.default.existsSync(pathUri))
|
|
65
67
|
fs_1.default.writeFileSync(pathUri, '');
|
|
66
68
|
};
|
|
67
|
-
this.ready = (consumer, executionId) => {
|
|
68
|
-
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
69
|
-
const workPath = this._getWorkPath(consumer, executionId);
|
|
70
|
-
this._ensurePath(workPath);
|
|
71
|
-
return fs_1.default.createWriteStream(workPath);
|
|
72
|
-
};
|
|
73
69
|
this.processRecord = (options) => {
|
|
74
70
|
var _a, _b;
|
|
75
71
|
const { consumer, fields, dimensions, producer, record, requestOptions } = options;
|
|
@@ -118,12 +114,11 @@ class ConsumerExecutorClass {
|
|
|
118
114
|
}
|
|
119
115
|
return record;
|
|
120
116
|
};
|
|
121
|
-
this.processDistinct = (
|
|
117
|
+
this.processDistinct = (datasetPath) => __awaiter(this, void 0, void 0, function* () {
|
|
122
118
|
var _a, e_1, _b, _c;
|
|
123
|
-
const
|
|
124
|
-
const reader = fs_1.default.createReadStream(workPath);
|
|
119
|
+
const reader = fs_1.default.createReadStream(datasetPath);
|
|
125
120
|
const lineReader = readline_1.default.createInterface({ input: reader, crlfDelay: Infinity });
|
|
126
|
-
const tempWorkPath =
|
|
121
|
+
const tempWorkPath = datasetPath + '_tmp';
|
|
127
122
|
const writer = fs_1.default.createWriteStream(tempWorkPath);
|
|
128
123
|
let newLineCount = 0;
|
|
129
124
|
const seen = new Set();
|
|
@@ -148,9 +143,120 @@ class ConsumerExecutorClass {
|
|
|
148
143
|
}
|
|
149
144
|
writer.close();
|
|
150
145
|
reader.close();
|
|
151
|
-
|
|
146
|
+
// Wait for the writer to finish before renaming
|
|
147
|
+
yield new Promise((resolve, reject) => {
|
|
148
|
+
writer.on('finish', resolve);
|
|
149
|
+
writer.on('error', reject);
|
|
150
|
+
writer.end();
|
|
151
|
+
});
|
|
152
|
+
fs_1.default.renameSync(tempWorkPath, datasetPath);
|
|
152
153
|
return newLineCount;
|
|
153
154
|
});
|
|
155
|
+
this.processDistinctOn = (consumer, datasetPath) => __awaiter(this, void 0, void 0, function* () {
|
|
156
|
+
var _a, e_2, _b, _c;
|
|
157
|
+
const reader = fs_1.default.createReadStream(datasetPath);
|
|
158
|
+
const lineReader = readline_1.default.createInterface({ input: reader, crlfDelay: Infinity });
|
|
159
|
+
const { distinctOn } = consumer.options;
|
|
160
|
+
const { keys, resolution } = distinctOn;
|
|
161
|
+
const { strategy, orderBy, direction = 'asc' } = resolution;
|
|
162
|
+
const internalRecordFormat = OutputExecutor_1.default._getInternalRecordFormat(consumer);
|
|
163
|
+
const internalFields = ConsumerManager_1.default.getExpandedFields(consumer);
|
|
164
|
+
// Map to store the winning record for each composite key
|
|
165
|
+
// Key: composite key string, Value: { record: parsed object, line: original JSON line }
|
|
166
|
+
const winners = new Map();
|
|
167
|
+
try {
|
|
168
|
+
for (var _d = true, lineReader_2 = __asyncValues(lineReader), lineReader_2_1; lineReader_2_1 = yield lineReader_2.next(), _a = lineReader_2_1.done, !_a; _d = true) {
|
|
169
|
+
_c = lineReader_2_1.value;
|
|
170
|
+
_d = false;
|
|
171
|
+
const line = _c;
|
|
172
|
+
const record = (internalRecordFormat === 'CSV' || internalRecordFormat === 'TXT')
|
|
173
|
+
? LineParser_1.default._internalParseCSV(line, internalFields)
|
|
174
|
+
: LineParser_1.default._internalParseJSON(line);
|
|
175
|
+
const compositeKey = keys.map(k => { var _a; return String((_a = record[k]) !== null && _a !== void 0 ? _a : ''); }).join('|');
|
|
176
|
+
const existing = winners.get(compositeKey);
|
|
177
|
+
if (!existing) {
|
|
178
|
+
winners.set(compositeKey, { record, line });
|
|
179
|
+
continue;
|
|
180
|
+
}
|
|
181
|
+
const shouldReplace = this._shouldReplaceRecord(existing.record, record, strategy, orderBy, direction);
|
|
182
|
+
if (shouldReplace) {
|
|
183
|
+
winners.set(compositeKey, { record, line });
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
catch (e_2_1) { e_2 = { error: e_2_1 }; }
|
|
188
|
+
finally {
|
|
189
|
+
try {
|
|
190
|
+
if (!_d && !_a && (_b = lineReader_2.return)) yield _b.call(lineReader_2);
|
|
191
|
+
}
|
|
192
|
+
finally { if (e_2) throw e_2.error; }
|
|
193
|
+
}
|
|
194
|
+
reader.close();
|
|
195
|
+
// Write the winning records to the temp file
|
|
196
|
+
const tempWorkPath = datasetPath + '_tmp';
|
|
197
|
+
const writer = fs_1.default.createWriteStream(tempWorkPath);
|
|
198
|
+
for (const { line } of winners.values()) {
|
|
199
|
+
writer.write(line + '\n');
|
|
200
|
+
}
|
|
201
|
+
// Wait for the writer to finish before renaming
|
|
202
|
+
yield new Promise((resolve, reject) => {
|
|
203
|
+
writer.on('finish', resolve);
|
|
204
|
+
writer.on('error', reject);
|
|
205
|
+
writer.end();
|
|
206
|
+
});
|
|
207
|
+
fs_1.default.renameSync(tempWorkPath, datasetPath);
|
|
208
|
+
return winners.size;
|
|
209
|
+
});
|
|
210
|
+
/**
|
|
211
|
+
* Determines if the new record should replace the existing record based on the resolution strategy
|
|
212
|
+
*/
|
|
213
|
+
this._shouldReplaceRecord = (existing, newRecord, strategy, orderBy, direction) => {
|
|
214
|
+
switch (strategy) {
|
|
215
|
+
case 'first':
|
|
216
|
+
return false;
|
|
217
|
+
case 'last':
|
|
218
|
+
return true;
|
|
219
|
+
case 'min': {
|
|
220
|
+
const existingVal = existing[orderBy];
|
|
221
|
+
const newVal = newRecord[orderBy];
|
|
222
|
+
const comparison = this._compareValues(newVal, existingVal);
|
|
223
|
+
// For 'min', we want the smallest value
|
|
224
|
+
// If direction is 'desc', we invert the logic (smallest becomes largest)
|
|
225
|
+
return direction === 'asc' ? comparison < 0 : comparison > 0;
|
|
226
|
+
}
|
|
227
|
+
case 'max': {
|
|
228
|
+
const existingVal = existing[orderBy];
|
|
229
|
+
const newVal = newRecord[orderBy];
|
|
230
|
+
const comparison = this._compareValues(newVal, existingVal);
|
|
231
|
+
// For 'max', we want the largest value
|
|
232
|
+
// If direction is 'desc', we invert the logic (largest becomes smallest)
|
|
233
|
+
return direction === 'asc' ? comparison > 0 : comparison < 0;
|
|
234
|
+
}
|
|
235
|
+
default:
|
|
236
|
+
return false;
|
|
237
|
+
}
|
|
238
|
+
};
|
|
239
|
+
/**
|
|
240
|
+
* Compares two values, handling numbers, strings, and dates
|
|
241
|
+
* Returns: negative if a < b, positive if a > b, 0 if equal
|
|
242
|
+
*/
|
|
243
|
+
this._compareValues = (a, b) => {
|
|
244
|
+
// Handle null/undefined
|
|
245
|
+
if (a == null && b == null)
|
|
246
|
+
return 0;
|
|
247
|
+
if (a == null)
|
|
248
|
+
return -1;
|
|
249
|
+
if (b == null)
|
|
250
|
+
return 1;
|
|
251
|
+
// Try numeric comparison
|
|
252
|
+
const numA = Number(a);
|
|
253
|
+
const numB = Number(b);
|
|
254
|
+
if (!isNaN(numA) && !isNaN(numB)) {
|
|
255
|
+
return numA - numB;
|
|
256
|
+
}
|
|
257
|
+
// Fall back to string comparison
|
|
258
|
+
return String(a).localeCompare(String(b));
|
|
259
|
+
};
|
|
154
260
|
}
|
|
155
261
|
}
|
|
156
262
|
const ConsumerExecutor = new ConsumerExecutorClass();
|
package/executors/Executor.js
CHANGED
|
@@ -27,6 +27,7 @@ const Affirm_1 = __importDefault(require("../core/Affirm"));
|
|
|
27
27
|
const OutputExecutor_1 = __importDefault(require("./OutputExecutor"));
|
|
28
28
|
const ConsumerManager_1 = __importDefault(require("../engines/consumer/ConsumerManager"));
|
|
29
29
|
const ExecutorPerformance_1 = __importDefault(require("./ExecutorPerformance"));
|
|
30
|
+
const ExecutorScope_1 = __importDefault(require("./ExecutorScope"));
|
|
30
31
|
class Executor {
|
|
31
32
|
constructor() {
|
|
32
33
|
this._REPORT_WORK_AFTER_LINES = 1000;
|
|
@@ -38,9 +39,9 @@ class Executor {
|
|
|
38
39
|
*/
|
|
39
40
|
this.run = (request) => __awaiter(this, void 0, void 0, function* () {
|
|
40
41
|
var _a, e_1, _b, _c;
|
|
41
|
-
var _d;
|
|
42
|
+
var _d, _e;
|
|
42
43
|
(0, Affirm_1.default)(request, 'Invalid request');
|
|
43
|
-
const { consumer, producer, prodDimensions, workerId, chunk, options, reportWork } = request;
|
|
44
|
+
const { consumer, producer, prodDimensions, workerId, chunk, options, scope, reportWork } = request;
|
|
44
45
|
const counter = performance.now();
|
|
45
46
|
const result = {
|
|
46
47
|
executionId: workerId,
|
|
@@ -48,12 +49,13 @@ class Executor {
|
|
|
48
49
|
elapsedMS: -1,
|
|
49
50
|
inputCount: -1,
|
|
50
51
|
outputCount: -1,
|
|
51
|
-
resultUri:
|
|
52
|
+
resultUri: ExecutorScope_1.default.getWorkerPath(scope, workerId),
|
|
52
53
|
operations: {}
|
|
53
54
|
};
|
|
55
|
+
ExecutorScope_1.default.ensurePath(result.resultUri);
|
|
54
56
|
let totalOutputCount = 0, totalCycles = 1, perf = 0, lineIndex = 0;
|
|
55
57
|
const readStream = this.openReadStream(chunk);
|
|
56
|
-
const writeStream = this.openWriteStream(
|
|
58
|
+
const writeStream = this.openWriteStream(scope, workerId);
|
|
57
59
|
const fields = ConsumerManager_1.default.getExpandedFields(consumer);
|
|
58
60
|
const { isFirstChunk, start, end } = chunk;
|
|
59
61
|
const totalBytes = end - start;
|
|
@@ -61,9 +63,9 @@ class Executor {
|
|
|
61
63
|
// Process all the line-independent operations of the consumer in a single pass
|
|
62
64
|
const lineStream = readline_1.default.createInterface({ input: readStream, crlfDelay: Infinity });
|
|
63
65
|
try {
|
|
64
|
-
for (var
|
|
66
|
+
for (var _f = true, lineStream_1 = __asyncValues(lineStream), lineStream_1_1; lineStream_1_1 = yield lineStream_1.next(), _a = lineStream_1_1.done, !_a; _f = true) {
|
|
65
67
|
_c = lineStream_1_1.value;
|
|
66
|
-
|
|
68
|
+
_f = false;
|
|
67
69
|
const line = _c;
|
|
68
70
|
if (lineIndex === 0 && isFirstChunk) {
|
|
69
71
|
if (!this.shouldProcessFirstLine(producer)) {
|
|
@@ -115,22 +117,27 @@ class Executor {
|
|
|
115
117
|
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
116
118
|
finally {
|
|
117
119
|
try {
|
|
118
|
-
if (!
|
|
120
|
+
if (!_f && !_a && (_b = lineStream_1.return)) yield _b.call(lineStream_1);
|
|
119
121
|
}
|
|
120
122
|
finally { if (e_1) throw e_1.error; }
|
|
121
123
|
}
|
|
122
124
|
// Process the operations that work on multiple lines
|
|
123
125
|
if (((_d = consumer.options) === null || _d === void 0 ? void 0 : _d.distinct) === true) {
|
|
124
126
|
perf = performance.now();
|
|
125
|
-
totalOutputCount = yield ConsumerExecutor_1.default.processDistinct(
|
|
127
|
+
totalOutputCount = yield ConsumerExecutor_1.default.processDistinct(result.resultUri);
|
|
126
128
|
this._performance.measure('process-distinct', performance.now() - perf);
|
|
127
129
|
totalCycles++;
|
|
128
130
|
}
|
|
131
|
+
if ((_e = consumer.options) === null || _e === void 0 ? void 0 : _e.distinctOn) {
|
|
132
|
+
perf = performance.now();
|
|
133
|
+
totalOutputCount = yield ConsumerExecutor_1.default.processDistinctOn(consumer, result.resultUri);
|
|
134
|
+
this._performance.measure('process-distinct-on', performance.now() - perf);
|
|
135
|
+
totalCycles++;
|
|
136
|
+
}
|
|
129
137
|
result.elapsedMS = performance.now() - counter;
|
|
130
138
|
result.cycles = totalCycles;
|
|
131
139
|
result.inputCount = lineIndex;
|
|
132
140
|
result.outputCount = totalOutputCount;
|
|
133
|
-
result.resultUri = ConsumerExecutor_1.default._getWorkPath(consumer, workerId);
|
|
134
141
|
result.operations = this._performance.getOperations();
|
|
135
142
|
return result;
|
|
136
143
|
});
|
|
@@ -138,8 +145,9 @@ class Executor {
|
|
|
138
145
|
const { end, fileUri, start } = chunk;
|
|
139
146
|
return fs_1.default.createReadStream(fileUri, { start, end: end });
|
|
140
147
|
};
|
|
141
|
-
this.openWriteStream = (
|
|
142
|
-
|
|
148
|
+
this.openWriteStream = (scope, workerId) => {
|
|
149
|
+
const workerPath = ExecutorScope_1.default.getWorkerPath(scope, workerId);
|
|
150
|
+
return fs_1.default.createWriteStream(workerPath);
|
|
143
151
|
};
|
|
144
152
|
this.shouldProcessFirstLine = (producer) => {
|
|
145
153
|
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
@@ -32,6 +32,7 @@ const ExecutorPerformance_1 = __importDefault(require("./ExecutorPerformance"));
|
|
|
32
32
|
const ExecutorProgress_1 = __importDefault(require("./ExecutorProgress"));
|
|
33
33
|
const Algo_1 = __importDefault(require("../core/Algo"));
|
|
34
34
|
const ConsumerOnFinishManager_1 = __importDefault(require("../engines/consumer/ConsumerOnFinishManager"));
|
|
35
|
+
const ExecutorScope_1 = __importDefault(require("./ExecutorScope"));
|
|
35
36
|
class ExecutorOrchestratorClass {
|
|
36
37
|
constructor() {
|
|
37
38
|
this.init = () => {
|
|
@@ -48,7 +49,7 @@ class ExecutorOrchestratorClass {
|
|
|
48
49
|
}
|
|
49
50
|
};
|
|
50
51
|
this.launch = (request) => __awaiter(this, void 0, void 0, function* () {
|
|
51
|
-
var _a;
|
|
52
|
+
var _a, _b;
|
|
52
53
|
(0, Affirm_1.default)(request, 'Invalid options');
|
|
53
54
|
const { consumer, details, logProgress, options } = request;
|
|
54
55
|
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
@@ -56,12 +57,14 @@ class ExecutorOrchestratorClass {
|
|
|
56
57
|
const tracker = new ExecutorPerformance_1.default();
|
|
57
58
|
const _progress = new ExecutorProgress_1.default(logProgress);
|
|
58
59
|
const { usageId } = UsageManager_1.default.startUsage(consumer, details);
|
|
59
|
-
const
|
|
60
|
+
const scope = { id: usageId, folder: `${consumer.name}_${usageId}`, workersId: [] };
|
|
60
61
|
try {
|
|
61
62
|
const start = performance.now();
|
|
62
63
|
this.init();
|
|
63
64
|
const executorResults = [];
|
|
64
|
-
|
|
65
|
+
let counter = performance.now();
|
|
66
|
+
const sourceFilesByProducer = yield this.readySourceFiles(consumer, scope);
|
|
67
|
+
tracker.measure('ready-producers', performance.now() - counter);
|
|
65
68
|
let globalWorkerIndex = 0;
|
|
66
69
|
for (const pair of sourceFilesByProducer) {
|
|
67
70
|
const { prod, cProd, response } = pair;
|
|
@@ -86,15 +89,16 @@ class ExecutorOrchestratorClass {
|
|
|
86
89
|
const currentWorkerIndex = globalWorkerIndex;
|
|
87
90
|
globalWorkerIndex++;
|
|
88
91
|
const workerData = {
|
|
92
|
+
producer: prod,
|
|
89
93
|
chunk,
|
|
90
94
|
consumer,
|
|
91
95
|
prodDimensions,
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
options
|
|
96
|
+
workerId,
|
|
97
|
+
scope,
|
|
98
|
+
options
|
|
95
99
|
};
|
|
96
100
|
_progress.register((currentWorkerIndex + 1).toString(), prod.name, fileIndex, totalFiles);
|
|
97
|
-
workersId.push(workerId);
|
|
101
|
+
scope.workersId.push(workerId);
|
|
98
102
|
workerThreads.push(this._executorPool.exec('executor', [workerData], {
|
|
99
103
|
on: payload => this.onWorkAdvanced(payload, currentWorkerIndex, _progress)
|
|
100
104
|
}));
|
|
@@ -105,28 +109,34 @@ class ExecutorOrchestratorClass {
|
|
|
105
109
|
}
|
|
106
110
|
}
|
|
107
111
|
_progress.complete();
|
|
108
|
-
yield this.reconcileExecutorThreadsResults(
|
|
112
|
+
yield this.reconcileExecutorThreadsResults(scope, executorResults, tracker);
|
|
109
113
|
// If there is more than one worker, then I need to redo the operations that are done on multiple lines (cause now the worker files have been merged together)
|
|
110
114
|
const postOperation = { totalOutputCount: null };
|
|
111
115
|
if (executorResults.length > 1) {
|
|
112
116
|
if (((_a = consumer.options) === null || _a === void 0 ? void 0 : _a.distinct) === true) {
|
|
113
|
-
|
|
114
|
-
const unifiedOutputCount = yield ConsumerExecutor_1.default.processDistinct(
|
|
115
|
-
tracker.measure('process-distinct:main', performance.now() -
|
|
117
|
+
counter = performance.now();
|
|
118
|
+
const unifiedOutputCount = yield ConsumerExecutor_1.default.processDistinct(ExecutorScope_1.default.getMainPath(scope));
|
|
119
|
+
tracker.measure('process-distinct:main', performance.now() - counter);
|
|
120
|
+
postOperation.totalOutputCount = unifiedOutputCount;
|
|
121
|
+
}
|
|
122
|
+
if (((_b = consumer.options) === null || _b === void 0 ? void 0 : _b.distinct) === true) {
|
|
123
|
+
counter = performance.now();
|
|
124
|
+
const unifiedOutputCount = yield ConsumerExecutor_1.default.processDistinctOn(consumer, ExecutorScope_1.default.getMainPath(scope));
|
|
125
|
+
tracker.measure('process-distinct-on:main', performance.now() - counter);
|
|
116
126
|
postOperation.totalOutputCount = unifiedOutputCount;
|
|
117
127
|
}
|
|
118
128
|
}
|
|
119
129
|
// Export to the destination
|
|
120
|
-
|
|
121
|
-
const exportRes = yield OutputExecutor_1.default.exportResult(consumer,
|
|
122
|
-
tracker.measure('export-result', performance.now() -
|
|
130
|
+
counter = performance.now();
|
|
131
|
+
const exportRes = yield OutputExecutor_1.default.exportResult(consumer, ConsumerManager_1.default.getExpandedFields(consumer), scope);
|
|
132
|
+
tracker.measure('export-result', performance.now() - counter);
|
|
123
133
|
// Perform on-success actions if any
|
|
124
134
|
if (consumer.outputs.some(x => x.onSuccess)) {
|
|
125
|
-
|
|
135
|
+
counter = performance.now();
|
|
126
136
|
yield ConsumerOnFinishManager_1.default.onConsumerSuccess(consumer, usageId);
|
|
127
|
-
tracker.measure('on-success-actions', performance.now() -
|
|
137
|
+
tracker.measure('on-success-actions', performance.now() - counter);
|
|
128
138
|
}
|
|
129
|
-
yield this.performCleanupOperations(
|
|
139
|
+
yield this.performCleanupOperations(scope, tracker);
|
|
130
140
|
const finalResult = this.computeFinalResult(tracker, executorResults, usageId, exportRes.key);
|
|
131
141
|
finalResult.elapsedMS = performance.now() - start;
|
|
132
142
|
if (Algo_1.default.hasVal(postOperation.totalOutputCount))
|
|
@@ -136,7 +146,7 @@ class ExecutorOrchestratorClass {
|
|
|
136
146
|
}
|
|
137
147
|
catch (error) {
|
|
138
148
|
yield ConsumerOnFinishManager_1.default.onConsumerError(consumer, usageId);
|
|
139
|
-
yield this.performCleanupOperations(
|
|
149
|
+
yield this.performCleanupOperations(scope, tracker);
|
|
140
150
|
UsageManager_1.default.failUsage(usageId, Helper_1.default.asError(error).message);
|
|
141
151
|
throw error;
|
|
142
152
|
}
|
|
@@ -210,12 +220,12 @@ class ExecutorOrchestratorClass {
|
|
|
210
220
|
// No newline found, return file end
|
|
211
221
|
return fileSize;
|
|
212
222
|
};
|
|
213
|
-
this.readySourceFiles = (consumer) => __awaiter(this, void 0, void 0, function* () {
|
|
223
|
+
this.readySourceFiles = (consumer, scope) => __awaiter(this, void 0, void 0, function* () {
|
|
214
224
|
const results = [];
|
|
215
225
|
for (let i = 0; i < consumer.producers.length; i++) {
|
|
216
226
|
const cProd = consumer.producers[i];
|
|
217
227
|
const prod = Environment_1.default.getProducer(cProd.name);
|
|
218
|
-
results.push({ prod, cProd, response: yield ProducerExecutor_1.default.ready(prod) });
|
|
228
|
+
results.push({ prod, cProd, response: yield ProducerExecutor_1.default.ready(prod, scope) });
|
|
219
229
|
}
|
|
220
230
|
return results;
|
|
221
231
|
});
|
|
@@ -239,13 +249,13 @@ class ExecutorOrchestratorClass {
|
|
|
239
249
|
return path_1.default.resolve('./.build/workers');
|
|
240
250
|
}
|
|
241
251
|
};
|
|
242
|
-
this.reconcileExecutorThreadsResults = (
|
|
243
|
-
const
|
|
244
|
-
ConsumerExecutor_1.default._ensurePath(
|
|
252
|
+
this.reconcileExecutorThreadsResults = (scope, executorResults, tracker) => __awaiter(this, void 0, void 0, function* () {
|
|
253
|
+
const mainPath = ExecutorScope_1.default.getMainPath(scope);
|
|
254
|
+
ConsumerExecutor_1.default._ensurePath(mainPath);
|
|
245
255
|
// Merge all the various files into a single one
|
|
246
256
|
if (executorResults.length > 1) {
|
|
247
257
|
const perf = performance.now();
|
|
248
|
-
const output = fs_1.default.createWriteStream(
|
|
258
|
+
const output = fs_1.default.createWriteStream(mainPath);
|
|
249
259
|
output.setMaxListeners(executorResults.length + 1);
|
|
250
260
|
for (const workerResult of executorResults) {
|
|
251
261
|
yield (0, promises_2.pipeline)(fs_1.default.createReadStream(workerResult.resultUri), output, { end: false });
|
|
@@ -256,14 +266,12 @@ class ExecutorOrchestratorClass {
|
|
|
256
266
|
}
|
|
257
267
|
else if (executorResults.length === 1) {
|
|
258
268
|
// If there is only one worker, then just rename the worker .dataset to the general consumer one
|
|
259
|
-
yield promises_1.default.rename(executorResults[0].resultUri,
|
|
269
|
+
yield promises_1.default.rename(executorResults[0].resultUri, mainPath);
|
|
260
270
|
}
|
|
261
271
|
});
|
|
262
|
-
this.performCleanupOperations = (
|
|
263
|
-
const workPath = ConsumerExecutor_1.default._getWorkPath(consumer, executionId);
|
|
272
|
+
this.performCleanupOperations = (scope, tracker) => __awaiter(this, void 0, void 0, function* () {
|
|
264
273
|
const start = performance.now();
|
|
265
|
-
yield
|
|
266
|
-
yield ConsumerExecutor_1.default._clearWorkPath(workPath);
|
|
274
|
+
yield ExecutorScope_1.default.clearScope(scope);
|
|
267
275
|
tracker.measure('cleanup-operations', performance.now() - start);
|
|
268
276
|
});
|
|
269
277
|
this.computeFinalResult = (tracker, executorResults, executionId, resultUri) => {
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
const path_1 = __importDefault(require("path"));
|
|
16
|
+
const fs_1 = __importDefault(require("fs"));
|
|
17
|
+
const promises_1 = __importDefault(require("fs/promises"));
|
|
18
|
+
const Constants_1 = __importDefault(require("../Constants"));
|
|
19
|
+
class ExecutorScopeClass {
|
|
20
|
+
constructor() {
|
|
21
|
+
this.WORKERS_FOLDER = 'workers';
|
|
22
|
+
this.PRODUCERS_FOLDER = 'producers';
|
|
23
|
+
this.getWorkerPath = (scope, workerId) => {
|
|
24
|
+
return path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER,
|
|
25
|
+
// A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
|
|
26
|
+
scope.folder, this.WORKERS_FOLDER, `${workerId}.dataset`);
|
|
27
|
+
};
|
|
28
|
+
this.getProducerPath = (scope, producer, sourceFileKey) => {
|
|
29
|
+
return path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER,
|
|
30
|
+
// A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
|
|
31
|
+
scope.folder, this.PRODUCERS_FOLDER, producer.name, `${sourceFileKey}.dataset`);
|
|
32
|
+
};
|
|
33
|
+
this.getMainPath = (scope) => {
|
|
34
|
+
return path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER, scope.folder, 'main.dataset');
|
|
35
|
+
};
|
|
36
|
+
this.clearScope = (scope) => __awaiter(this, void 0, void 0, function* () {
|
|
37
|
+
const scopePath = path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER, scope.folder);
|
|
38
|
+
if (fs_1.default.existsSync(scopePath)) {
|
|
39
|
+
yield promises_1.default.rm(scopePath, { recursive: true, force: true });
|
|
40
|
+
}
|
|
41
|
+
});
|
|
42
|
+
this.ensurePath = (fileUri) => {
|
|
43
|
+
const dir = path_1.default.dirname(fileUri);
|
|
44
|
+
if (!fs_1.default.existsSync(dir))
|
|
45
|
+
fs_1.default.mkdirSync(dir, { recursive: true });
|
|
46
|
+
if (!fs_1.default.existsSync(fileUri))
|
|
47
|
+
fs_1.default.writeFileSync(fileUri, '');
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
const ExecutorScope = new ExecutorScopeClass();
|
|
52
|
+
exports.default = ExecutorScope;
|
|
@@ -17,7 +17,7 @@ const DSTE_1 = __importDefault(require("../core/dste/DSTE"));
|
|
|
17
17
|
const DriverFactory_1 = __importDefault(require("../drivers/DriverFactory"));
|
|
18
18
|
const Environment_1 = __importDefault(require("../engines/Environment"));
|
|
19
19
|
const CSVParser_1 = __importDefault(require("../engines/parsing/CSVParser"));
|
|
20
|
-
const
|
|
20
|
+
const ExecutorScope_1 = __importDefault(require("./ExecutorScope"));
|
|
21
21
|
class OutputExecutorClass {
|
|
22
22
|
constructor() {
|
|
23
23
|
this._getInternalRecordFormat = (consumer) => {
|
|
@@ -62,13 +62,13 @@ class OutputExecutorClass {
|
|
|
62
62
|
return JSON.stringify(record);
|
|
63
63
|
}
|
|
64
64
|
};
|
|
65
|
-
this.exportResult = (consumer,
|
|
65
|
+
this.exportResult = (consumer, fields, scope) => __awaiter(this, void 0, void 0, function* () {
|
|
66
66
|
const internalFormat = this._getInternalRecordFormat(consumer);
|
|
67
67
|
for (const output of consumer.outputs) {
|
|
68
68
|
const destination = Environment_1.default.getSource(output.exportDestination);
|
|
69
69
|
const driver = yield DriverFactory_1.default.instantiateDestination(destination);
|
|
70
|
-
const currentPath =
|
|
71
|
-
const destinationName = this._composeFileName(consumer, output, this._getExtension(output)
|
|
70
|
+
const currentPath = ExecutorScope_1.default.getMainPath(scope);
|
|
71
|
+
const destinationName = this._composeFileName(consumer, output, this._getExtension(output));
|
|
72
72
|
if (output.format === internalFormat) {
|
|
73
73
|
return yield driver.move(currentPath, destinationName);
|
|
74
74
|
}
|
|
@@ -22,12 +22,12 @@ const LineParser_1 = __importDefault(require("../engines/parsing/LineParser"));
|
|
|
22
22
|
const CryptoEngine_1 = __importDefault(require("../engines/CryptoEngine"));
|
|
23
23
|
class ProducerExecutorClass {
|
|
24
24
|
constructor() {
|
|
25
|
-
this.ready = (producer) => __awaiter(this, void 0, void 0, function* () {
|
|
25
|
+
this.ready = (producer, scope) => __awaiter(this, void 0, void 0, function* () {
|
|
26
26
|
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
27
27
|
const source = Environment_1.default.getSource(producer.source);
|
|
28
28
|
(0, Affirm_1.default)(source, `Invalid source ${producer.source} on producer ${producer.name}`);
|
|
29
29
|
const driver = yield DriverFactory_1.default.instantiateSource(source);
|
|
30
|
-
return yield driver.ready(producer);
|
|
30
|
+
return yield driver.ready({ producer, scope });
|
|
31
31
|
});
|
|
32
32
|
this.processHeader = (line, producer) => {
|
|
33
33
|
const { settings: { fileType, hasHeaderRow, delimiter } } = producer;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@forzalabs/remora",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.14",
|
|
4
4
|
"description": "A powerful CLI tool for seamless data translation.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"private": false,
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
"debug": "npx tsx ./src/index.ts debug",
|
|
22
22
|
"create-producer": "npx tsx ./src/index.ts create-producer",
|
|
23
23
|
"copy-static-file": "npx tsx ./scripts/CopyStaticFile.js",
|
|
24
|
-
"build": "npm i && npm run sync && tsc --outDir .build && npm run copy-static-file",
|
|
24
|
+
"build": "npm i && npm run sync && rm -rf .build && tsc --outDir .build && npm run copy-static-file",
|
|
25
25
|
"fast-build": "tsc --outDir .build",
|
|
26
26
|
"upload": "npm run build && cd .build && npm publish --access=public"
|
|
27
27
|
},
|
|
@@ -21,11 +21,12 @@ dotenv_1.default.configDotenv();
|
|
|
21
21
|
const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
|
|
22
22
|
Environment_1.default.load('./');
|
|
23
23
|
try {
|
|
24
|
-
const { workerId, chunk, consumer, producer, prodDimensions, options } = workerData;
|
|
24
|
+
const { workerId, chunk, consumer, producer, prodDimensions, scope, options } = workerData;
|
|
25
25
|
(0, Affirm_1.default)(workerId, `Invalid worker id`);
|
|
26
26
|
(0, Affirm_1.default)(consumer, `Invalid consumer`);
|
|
27
27
|
(0, Affirm_1.default)(producer, `Invalid producer`);
|
|
28
28
|
(0, Affirm_1.default)(chunk, `Invalid chunk`);
|
|
29
|
+
(0, Affirm_1.default)(scope, `Invalid executor scope`);
|
|
29
30
|
const executor = new Executor_1.default();
|
|
30
31
|
const res = yield executor.run({
|
|
31
32
|
consumer,
|
|
@@ -34,6 +35,7 @@ const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
34
35
|
workerId,
|
|
35
36
|
chunk,
|
|
36
37
|
options,
|
|
38
|
+
scope,
|
|
37
39
|
reportWork: packet => workerpool_1.default.workerEmit(packet)
|
|
38
40
|
});
|
|
39
41
|
return res;
|
package/auth/AdminManager.js
DELETED
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
-
});
|
|
10
|
-
};
|
|
11
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
-
};
|
|
14
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
-
const Affirm_1 = __importDefault(require("../core/Affirm"));
|
|
16
|
-
const DatabaseEngine_1 = __importDefault(require("../database/DatabaseEngine"));
|
|
17
|
-
const UserManager_1 = __importDefault(require("../engines/UserManager"));
|
|
18
|
-
const Settings_1 = __importDefault(require("../helper/Settings"));
|
|
19
|
-
const bcryptjs_1 = __importDefault(require("bcryptjs"));
|
|
20
|
-
const JWTManager_1 = __importDefault(require("./JWTManager"));
|
|
21
|
-
class AdminManagerClass {
|
|
22
|
-
constructor() {
|
|
23
|
-
this.COLLECTION = Settings_1.default.db.collections.users;
|
|
24
|
-
this.rootSignIn = (password) => __awaiter(this, void 0, void 0, function* () {
|
|
25
|
-
(0, Affirm_1.default)(password, 'Invalid password');
|
|
26
|
-
const rootUser = yield DatabaseEngine_1.default.findOne(this.COLLECTION, { isRoot: true });
|
|
27
|
-
(0, Affirm_1.default)(rootUser, 'Incorrect system configuration: root user not found');
|
|
28
|
-
const isSame = yield bcryptjs_1.default.compare(password, rootUser.rootPasswordHash);
|
|
29
|
-
if (!isSame)
|
|
30
|
-
throw new Error('Invalid credentials');
|
|
31
|
-
rootUser.lastLogin = new Date().toJSON();
|
|
32
|
-
yield UserManager_1.default.update(rootUser);
|
|
33
|
-
const adminSecret = process.env.ADMIN_JWT_SECRET;
|
|
34
|
-
(0, Affirm_1.default)(adminSecret, 'Wrong system config: missing admin jwt secret');
|
|
35
|
-
const payload = {
|
|
36
|
-
apiKeyId: rootUser._id,
|
|
37
|
-
installationId: process.env.INSTALLATION_ID,
|
|
38
|
-
name: rootUser.name,
|
|
39
|
-
scopes: { consumers: ['*'], projects: ['*'] },
|
|
40
|
-
isAdmin: true
|
|
41
|
-
};
|
|
42
|
-
const token = JWTManager_1.default.issue(adminSecret, payload, 8);
|
|
43
|
-
return token;
|
|
44
|
-
});
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
const AdminManager = new AdminManagerClass();
|
|
48
|
-
exports.default = AdminManager;
|
package/auth/ApiKeysManager.js
DELETED
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
-
});
|
|
10
|
-
};
|
|
11
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
-
};
|
|
14
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
-
const Affirm_1 = __importDefault(require("../core/Affirm"));
|
|
16
|
-
const DatabaseEngine_1 = __importDefault(require("../database/DatabaseEngine"));
|
|
17
|
-
const Helper_1 = __importDefault(require("../helper/Helper"));
|
|
18
|
-
const Settings_1 = __importDefault(require("../helper/Settings"));
|
|
19
|
-
const JWTManager_1 = __importDefault(require("./JWTManager"));
|
|
20
|
-
class ApiKeysManagerClass {
|
|
21
|
-
constructor() {
|
|
22
|
-
this.COLLECTION = Settings_1.default.db.collections.apiKeys;
|
|
23
|
-
this.create = (name, scopes) => __awaiter(this, void 0, void 0, function* () {
|
|
24
|
-
(0, Affirm_1.default)(name, 'Invalid name');
|
|
25
|
-
const apiKeyId = Helper_1.default.uuid();
|
|
26
|
-
const apiSecret = JWTManager_1.default.sign(apiKeyId, name, scopes);
|
|
27
|
-
const newApiKey = {
|
|
28
|
-
_id: apiKeyId,
|
|
29
|
-
_signature: '',
|
|
30
|
-
createdAt: new Date().toJSON(),
|
|
31
|
-
isActive: true,
|
|
32
|
-
name: name,
|
|
33
|
-
scopes: scopes,
|
|
34
|
-
value: apiSecret
|
|
35
|
-
};
|
|
36
|
-
return yield DatabaseEngine_1.default.upsert(this.COLLECTION, newApiKey._id, newApiKey);
|
|
37
|
-
});
|
|
38
|
-
this.get = (apiKeyId) => __awaiter(this, void 0, void 0, function* () {
|
|
39
|
-
(0, Affirm_1.default)(apiKeyId, 'Invalid api key id');
|
|
40
|
-
return yield DatabaseEngine_1.default.get(this.COLLECTION, apiKeyId);
|
|
41
|
-
});
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
const ApiKeysManager = new ApiKeysManagerClass();
|
|
45
|
-
exports.default = ApiKeysManager;
|