@forzalabs/remora 1.0.11 → 1.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +2 -1
- package/database/DatabaseEngine.js +7 -6
- package/definitions/json_schemas/consumer-schema.json +40 -25
- package/definitions/json_schemas/producer-schema.json +1 -2
- package/definitions/json_schemas/source-schema.json +1 -2
- package/drivers/DeltaShareDriver.js +2 -2
- package/drivers/HttpApiDriver.js +3 -3
- package/drivers/RedshiftDriver.js +2 -2
- package/drivers/files/LocalDestinationDriver.js +0 -55
- package/drivers/files/LocalSourceDriver.js +3 -2
- package/drivers/s3/S3DestinationDriver.js +107 -69
- package/drivers/s3/S3SourceDriver.js +44 -4
- package/engines/parsing/LineParser.js +19 -0
- package/engines/validation/Validator.js +7 -2
- package/executors/ConsumerExecutor.js +127 -17
- package/executors/Executor.js +19 -11
- package/executors/ExecutorOrchestrator.js +37 -29
- package/executors/ExecutorScope.js +52 -0
- package/executors/OutputExecutor.js +4 -4
- package/executors/ProducerExecutor.js +2 -2
- package/package.json +2 -2
- package/workers/ExecutorWorker.js +3 -1
- package/auth/AdminManager.js +0 -48
- package/auth/ApiKeysManager.js +0 -45
- package/auth/JWTManager.js +0 -56
- package/database/DatabaseInitializer.js +0 -80
- package/engines/file/FileExporter.js +0 -58
- package/workers/definitions.js +0 -2
|
@@ -119,11 +119,11 @@ class ValidatorClass {
|
|
|
119
119
|
// For now we only support connecting producers of the same engine type to a consumer, so we give an error if we detect different ones
|
|
120
120
|
const uniqEngines = Algo_1.default.uniqBy(sources, 'engine');
|
|
121
121
|
if (uniqEngines.length !== 1)
|
|
122
|
-
errors.push(`Sources with different engines
|
|
122
|
+
errors.push(`Sources with different engines are used in the consumer "${consumer.name}" (${uniqEngines.join(', ')})`);
|
|
123
123
|
// For now we also only support consumers that have producers ALL having the same exact source
|
|
124
124
|
const uniqNames = Algo_1.default.uniqBy(sources, 'name');
|
|
125
125
|
if (uniqNames.length !== 1)
|
|
126
|
-
errors.push(`Producers with different sources
|
|
126
|
+
errors.push(`Producers with different sources are used in the consumer "${consumer.name}" (${uniqNames.join(', ')})`);
|
|
127
127
|
if (consumer.filters && consumer.filters.length > 0) {
|
|
128
128
|
if (consumer.filters.some(x => x.sql && x.rule))
|
|
129
129
|
errors.push(`A single consumer can't have both filters based on SQL and filters based on rules.`);
|
|
@@ -188,6 +188,11 @@ class ValidatorClass {
|
|
|
188
188
|
errors.push(`The export destination "${output.exportDestination}" was not found in the sources.`);
|
|
189
189
|
}
|
|
190
190
|
}
|
|
191
|
+
// Validate distinct
|
|
192
|
+
if (consumer.options) {
|
|
193
|
+
if (Algo_1.default.hasVal(consumer.options.distinct) && Algo_1.default.hasVal(consumer.options.distinctOn))
|
|
194
|
+
errors.push(`Can't specify a "distinct" and a "distinctOn" clause on the same consumer (${consumer.name}); use one or the other.`);
|
|
195
|
+
}
|
|
191
196
|
}
|
|
192
197
|
catch (e) {
|
|
193
198
|
if (errors.length === 0)
|
|
@@ -25,9 +25,11 @@ const readline_1 = __importDefault(require("readline"));
|
|
|
25
25
|
const promises_1 = __importDefault(require("fs/promises"));
|
|
26
26
|
const TransformationEngine_1 = __importDefault(require("../engines/transform/TransformationEngine"));
|
|
27
27
|
const RequestExecutor_1 = __importDefault(require("../engines/execution/RequestExecutor"));
|
|
28
|
-
const Affirm_1 = __importDefault(require("../core/Affirm"));
|
|
29
28
|
const Constants_1 = __importDefault(require("../Constants"));
|
|
30
29
|
const Algo_1 = __importDefault(require("../core/Algo"));
|
|
30
|
+
const LineParser_1 = __importDefault(require("../engines/parsing/LineParser"));
|
|
31
|
+
const OutputExecutor_1 = __importDefault(require("./OutputExecutor"));
|
|
32
|
+
const ConsumerManager_1 = __importDefault(require("../engines/consumer/ConsumerManager"));
|
|
31
33
|
class ConsumerExecutorClass {
|
|
32
34
|
constructor() {
|
|
33
35
|
this._getWorkPath = (consumer, executionId) => {
|
|
@@ -64,20 +66,14 @@ class ConsumerExecutorClass {
|
|
|
64
66
|
if (!fs_1.default.existsSync(pathUri))
|
|
65
67
|
fs_1.default.writeFileSync(pathUri, '');
|
|
66
68
|
};
|
|
67
|
-
this.ready = (consumer, executionId) => {
|
|
68
|
-
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
69
|
-
const workPath = this._getWorkPath(consumer, executionId);
|
|
70
|
-
this._ensurePath(workPath);
|
|
71
|
-
return fs_1.default.createWriteStream(workPath);
|
|
72
|
-
};
|
|
73
69
|
this.processRecord = (options) => {
|
|
74
|
-
var _a;
|
|
70
|
+
var _a, _b;
|
|
75
71
|
const { consumer, fields, dimensions, producer, record, requestOptions } = options;
|
|
76
|
-
//
|
|
72
|
+
// Map to consumer fields and apply consumer field logic
|
|
77
73
|
for (const field of fields) {
|
|
78
74
|
const { cField } = field;
|
|
79
75
|
const fieldKey = (_a = cField.alias) !== null && _a !== void 0 ? _a : cField.key;
|
|
80
|
-
//
|
|
76
|
+
// Set the fixed default value for the field, or throw error if not present in the producer
|
|
81
77
|
const dimension = dimensions.find(x => x.name === cField.key);
|
|
82
78
|
if (!dimension) {
|
|
83
79
|
if (cField.fixed && Algo_1.default.hasVal(cField.default))
|
|
@@ -85,12 +81,16 @@ class ConsumerExecutorClass {
|
|
|
85
81
|
else
|
|
86
82
|
throw new Error(`The requested field "${cField.key}" from the consumer is not present in the underlying producer "${producer.name}" (${dimensions.map(x => x.name).join(', ')})`);
|
|
87
83
|
}
|
|
88
|
-
//
|
|
84
|
+
// Change the name of the dimension
|
|
89
85
|
if (cField.alias && cField.alias !== dimension.name) {
|
|
90
86
|
record[cField.alias] = record[dimension.name];
|
|
91
87
|
delete record[dimension.name];
|
|
92
88
|
}
|
|
93
|
-
|
|
89
|
+
}
|
|
90
|
+
// Transformations need to be applied after the mapping since they might refer to other fields with their new names
|
|
91
|
+
for (const field of fields) {
|
|
92
|
+
const { cField } = field;
|
|
93
|
+
const fieldKey = (_b = cField.alias) !== null && _b !== void 0 ? _b : cField.key;
|
|
94
94
|
if (cField.transform)
|
|
95
95
|
record[fieldKey] = TransformationEngine_1.default.applyTransformations(record[fieldKey], cField.transform, cField, record);
|
|
96
96
|
}
|
|
@@ -114,12 +114,11 @@ class ConsumerExecutorClass {
|
|
|
114
114
|
}
|
|
115
115
|
return record;
|
|
116
116
|
};
|
|
117
|
-
this.processDistinct = (
|
|
117
|
+
this.processDistinct = (datasetPath) => __awaiter(this, void 0, void 0, function* () {
|
|
118
118
|
var _a, e_1, _b, _c;
|
|
119
|
-
const
|
|
120
|
-
const reader = fs_1.default.createReadStream(workPath);
|
|
119
|
+
const reader = fs_1.default.createReadStream(datasetPath);
|
|
121
120
|
const lineReader = readline_1.default.createInterface({ input: reader, crlfDelay: Infinity });
|
|
122
|
-
const tempWorkPath =
|
|
121
|
+
const tempWorkPath = datasetPath + '_tmp';
|
|
123
122
|
const writer = fs_1.default.createWriteStream(tempWorkPath);
|
|
124
123
|
let newLineCount = 0;
|
|
125
124
|
const seen = new Set();
|
|
@@ -144,9 +143,120 @@ class ConsumerExecutorClass {
|
|
|
144
143
|
}
|
|
145
144
|
writer.close();
|
|
146
145
|
reader.close();
|
|
147
|
-
|
|
146
|
+
// Wait for the writer to finish before renaming
|
|
147
|
+
yield new Promise((resolve, reject) => {
|
|
148
|
+
writer.on('finish', resolve);
|
|
149
|
+
writer.on('error', reject);
|
|
150
|
+
writer.end();
|
|
151
|
+
});
|
|
152
|
+
fs_1.default.renameSync(tempWorkPath, datasetPath);
|
|
148
153
|
return newLineCount;
|
|
149
154
|
});
|
|
155
|
+
this.processDistinctOn = (consumer, datasetPath) => __awaiter(this, void 0, void 0, function* () {
|
|
156
|
+
var _a, e_2, _b, _c;
|
|
157
|
+
const reader = fs_1.default.createReadStream(datasetPath);
|
|
158
|
+
const lineReader = readline_1.default.createInterface({ input: reader, crlfDelay: Infinity });
|
|
159
|
+
const { distinctOn } = consumer.options;
|
|
160
|
+
const { keys, resolution } = distinctOn;
|
|
161
|
+
const { strategy, orderBy, direction = 'asc' } = resolution;
|
|
162
|
+
const internalRecordFormat = OutputExecutor_1.default._getInternalRecordFormat(consumer);
|
|
163
|
+
const internalFields = ConsumerManager_1.default.getExpandedFields(consumer);
|
|
164
|
+
// Map to store the winning record for each composite key
|
|
165
|
+
// Key: composite key string, Value: { record: parsed object, line: original JSON line }
|
|
166
|
+
const winners = new Map();
|
|
167
|
+
try {
|
|
168
|
+
for (var _d = true, lineReader_2 = __asyncValues(lineReader), lineReader_2_1; lineReader_2_1 = yield lineReader_2.next(), _a = lineReader_2_1.done, !_a; _d = true) {
|
|
169
|
+
_c = lineReader_2_1.value;
|
|
170
|
+
_d = false;
|
|
171
|
+
const line = _c;
|
|
172
|
+
const record = (internalRecordFormat === 'CSV' || internalRecordFormat === 'TXT')
|
|
173
|
+
? LineParser_1.default._internalParseCSV(line, internalFields)
|
|
174
|
+
: LineParser_1.default._internalParseJSON(line);
|
|
175
|
+
const compositeKey = keys.map(k => { var _a; return String((_a = record[k]) !== null && _a !== void 0 ? _a : ''); }).join('|');
|
|
176
|
+
const existing = winners.get(compositeKey);
|
|
177
|
+
if (!existing) {
|
|
178
|
+
winners.set(compositeKey, { record, line });
|
|
179
|
+
continue;
|
|
180
|
+
}
|
|
181
|
+
const shouldReplace = this._shouldReplaceRecord(existing.record, record, strategy, orderBy, direction);
|
|
182
|
+
if (shouldReplace) {
|
|
183
|
+
winners.set(compositeKey, { record, line });
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
catch (e_2_1) { e_2 = { error: e_2_1 }; }
|
|
188
|
+
finally {
|
|
189
|
+
try {
|
|
190
|
+
if (!_d && !_a && (_b = lineReader_2.return)) yield _b.call(lineReader_2);
|
|
191
|
+
}
|
|
192
|
+
finally { if (e_2) throw e_2.error; }
|
|
193
|
+
}
|
|
194
|
+
reader.close();
|
|
195
|
+
// Write the winning records to the temp file
|
|
196
|
+
const tempWorkPath = datasetPath + '_tmp';
|
|
197
|
+
const writer = fs_1.default.createWriteStream(tempWorkPath);
|
|
198
|
+
for (const { line } of winners.values()) {
|
|
199
|
+
writer.write(line + '\n');
|
|
200
|
+
}
|
|
201
|
+
// Wait for the writer to finish before renaming
|
|
202
|
+
yield new Promise((resolve, reject) => {
|
|
203
|
+
writer.on('finish', resolve);
|
|
204
|
+
writer.on('error', reject);
|
|
205
|
+
writer.end();
|
|
206
|
+
});
|
|
207
|
+
fs_1.default.renameSync(tempWorkPath, datasetPath);
|
|
208
|
+
return winners.size;
|
|
209
|
+
});
|
|
210
|
+
/**
|
|
211
|
+
* Determines if the new record should replace the existing record based on the resolution strategy
|
|
212
|
+
*/
|
|
213
|
+
this._shouldReplaceRecord = (existing, newRecord, strategy, orderBy, direction) => {
|
|
214
|
+
switch (strategy) {
|
|
215
|
+
case 'first':
|
|
216
|
+
return false;
|
|
217
|
+
case 'last':
|
|
218
|
+
return true;
|
|
219
|
+
case 'min': {
|
|
220
|
+
const existingVal = existing[orderBy];
|
|
221
|
+
const newVal = newRecord[orderBy];
|
|
222
|
+
const comparison = this._compareValues(newVal, existingVal);
|
|
223
|
+
// For 'min', we want the smallest value
|
|
224
|
+
// If direction is 'desc', we invert the logic (smallest becomes largest)
|
|
225
|
+
return direction === 'asc' ? comparison < 0 : comparison > 0;
|
|
226
|
+
}
|
|
227
|
+
case 'max': {
|
|
228
|
+
const existingVal = existing[orderBy];
|
|
229
|
+
const newVal = newRecord[orderBy];
|
|
230
|
+
const comparison = this._compareValues(newVal, existingVal);
|
|
231
|
+
// For 'max', we want the largest value
|
|
232
|
+
// If direction is 'desc', we invert the logic (largest becomes smallest)
|
|
233
|
+
return direction === 'asc' ? comparison > 0 : comparison < 0;
|
|
234
|
+
}
|
|
235
|
+
default:
|
|
236
|
+
return false;
|
|
237
|
+
}
|
|
238
|
+
};
|
|
239
|
+
/**
|
|
240
|
+
* Compares two values, handling numbers, strings, and dates
|
|
241
|
+
* Returns: negative if a < b, positive if a > b, 0 if equal
|
|
242
|
+
*/
|
|
243
|
+
this._compareValues = (a, b) => {
|
|
244
|
+
// Handle null/undefined
|
|
245
|
+
if (a == null && b == null)
|
|
246
|
+
return 0;
|
|
247
|
+
if (a == null)
|
|
248
|
+
return -1;
|
|
249
|
+
if (b == null)
|
|
250
|
+
return 1;
|
|
251
|
+
// Try numeric comparison
|
|
252
|
+
const numA = Number(a);
|
|
253
|
+
const numB = Number(b);
|
|
254
|
+
if (!isNaN(numA) && !isNaN(numB)) {
|
|
255
|
+
return numA - numB;
|
|
256
|
+
}
|
|
257
|
+
// Fall back to string comparison
|
|
258
|
+
return String(a).localeCompare(String(b));
|
|
259
|
+
};
|
|
150
260
|
}
|
|
151
261
|
}
|
|
152
262
|
const ConsumerExecutor = new ConsumerExecutorClass();
|
package/executors/Executor.js
CHANGED
|
@@ -27,6 +27,7 @@ const Affirm_1 = __importDefault(require("../core/Affirm"));
|
|
|
27
27
|
const OutputExecutor_1 = __importDefault(require("./OutputExecutor"));
|
|
28
28
|
const ConsumerManager_1 = __importDefault(require("../engines/consumer/ConsumerManager"));
|
|
29
29
|
const ExecutorPerformance_1 = __importDefault(require("./ExecutorPerformance"));
|
|
30
|
+
const ExecutorScope_1 = __importDefault(require("./ExecutorScope"));
|
|
30
31
|
class Executor {
|
|
31
32
|
constructor() {
|
|
32
33
|
this._REPORT_WORK_AFTER_LINES = 1000;
|
|
@@ -38,9 +39,9 @@ class Executor {
|
|
|
38
39
|
*/
|
|
39
40
|
this.run = (request) => __awaiter(this, void 0, void 0, function* () {
|
|
40
41
|
var _a, e_1, _b, _c;
|
|
41
|
-
var _d;
|
|
42
|
+
var _d, _e;
|
|
42
43
|
(0, Affirm_1.default)(request, 'Invalid request');
|
|
43
|
-
const { consumer, producer, prodDimensions, workerId, chunk, options, reportWork } = request;
|
|
44
|
+
const { consumer, producer, prodDimensions, workerId, chunk, options, scope, reportWork } = request;
|
|
44
45
|
const counter = performance.now();
|
|
45
46
|
const result = {
|
|
46
47
|
executionId: workerId,
|
|
@@ -48,12 +49,13 @@ class Executor {
|
|
|
48
49
|
elapsedMS: -1,
|
|
49
50
|
inputCount: -1,
|
|
50
51
|
outputCount: -1,
|
|
51
|
-
resultUri:
|
|
52
|
+
resultUri: ExecutorScope_1.default.getWorkerPath(scope, workerId),
|
|
52
53
|
operations: {}
|
|
53
54
|
};
|
|
55
|
+
ExecutorScope_1.default.ensurePath(result.resultUri);
|
|
54
56
|
let totalOutputCount = 0, totalCycles = 1, perf = 0, lineIndex = 0;
|
|
55
57
|
const readStream = this.openReadStream(chunk);
|
|
56
|
-
const writeStream = this.openWriteStream(
|
|
58
|
+
const writeStream = this.openWriteStream(scope, workerId);
|
|
57
59
|
const fields = ConsumerManager_1.default.getExpandedFields(consumer);
|
|
58
60
|
const { isFirstChunk, start, end } = chunk;
|
|
59
61
|
const totalBytes = end - start;
|
|
@@ -61,9 +63,9 @@ class Executor {
|
|
|
61
63
|
// Process all the line-independent operations of the consumer in a single pass
|
|
62
64
|
const lineStream = readline_1.default.createInterface({ input: readStream, crlfDelay: Infinity });
|
|
63
65
|
try {
|
|
64
|
-
for (var
|
|
66
|
+
for (var _f = true, lineStream_1 = __asyncValues(lineStream), lineStream_1_1; lineStream_1_1 = yield lineStream_1.next(), _a = lineStream_1_1.done, !_a; _f = true) {
|
|
65
67
|
_c = lineStream_1_1.value;
|
|
66
|
-
|
|
68
|
+
_f = false;
|
|
67
69
|
const line = _c;
|
|
68
70
|
if (lineIndex === 0 && isFirstChunk) {
|
|
69
71
|
if (!this.shouldProcessFirstLine(producer)) {
|
|
@@ -115,22 +117,27 @@ class Executor {
|
|
|
115
117
|
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
116
118
|
finally {
|
|
117
119
|
try {
|
|
118
|
-
if (!
|
|
120
|
+
if (!_f && !_a && (_b = lineStream_1.return)) yield _b.call(lineStream_1);
|
|
119
121
|
}
|
|
120
122
|
finally { if (e_1) throw e_1.error; }
|
|
121
123
|
}
|
|
122
124
|
// Process the operations that work on multiple lines
|
|
123
125
|
if (((_d = consumer.options) === null || _d === void 0 ? void 0 : _d.distinct) === true) {
|
|
124
126
|
perf = performance.now();
|
|
125
|
-
totalOutputCount = yield ConsumerExecutor_1.default.processDistinct(
|
|
127
|
+
totalOutputCount = yield ConsumerExecutor_1.default.processDistinct(result.resultUri);
|
|
126
128
|
this._performance.measure('process-distinct', performance.now() - perf);
|
|
127
129
|
totalCycles++;
|
|
128
130
|
}
|
|
131
|
+
if ((_e = consumer.options) === null || _e === void 0 ? void 0 : _e.distinctOn) {
|
|
132
|
+
perf = performance.now();
|
|
133
|
+
totalOutputCount = yield ConsumerExecutor_1.default.processDistinctOn(consumer, result.resultUri);
|
|
134
|
+
this._performance.measure('process-distinct-on', performance.now() - perf);
|
|
135
|
+
totalCycles++;
|
|
136
|
+
}
|
|
129
137
|
result.elapsedMS = performance.now() - counter;
|
|
130
138
|
result.cycles = totalCycles;
|
|
131
139
|
result.inputCount = lineIndex;
|
|
132
140
|
result.outputCount = totalOutputCount;
|
|
133
|
-
result.resultUri = ConsumerExecutor_1.default._getWorkPath(consumer, workerId);
|
|
134
141
|
result.operations = this._performance.getOperations();
|
|
135
142
|
return result;
|
|
136
143
|
});
|
|
@@ -138,8 +145,9 @@ class Executor {
|
|
|
138
145
|
const { end, fileUri, start } = chunk;
|
|
139
146
|
return fs_1.default.createReadStream(fileUri, { start, end: end });
|
|
140
147
|
};
|
|
141
|
-
this.openWriteStream = (
|
|
142
|
-
|
|
148
|
+
this.openWriteStream = (scope, workerId) => {
|
|
149
|
+
const workerPath = ExecutorScope_1.default.getWorkerPath(scope, workerId);
|
|
150
|
+
return fs_1.default.createWriteStream(workerPath);
|
|
143
151
|
};
|
|
144
152
|
this.shouldProcessFirstLine = (producer) => {
|
|
145
153
|
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
@@ -32,6 +32,7 @@ const ExecutorPerformance_1 = __importDefault(require("./ExecutorPerformance"));
|
|
|
32
32
|
const ExecutorProgress_1 = __importDefault(require("./ExecutorProgress"));
|
|
33
33
|
const Algo_1 = __importDefault(require("../core/Algo"));
|
|
34
34
|
const ConsumerOnFinishManager_1 = __importDefault(require("../engines/consumer/ConsumerOnFinishManager"));
|
|
35
|
+
const ExecutorScope_1 = __importDefault(require("./ExecutorScope"));
|
|
35
36
|
class ExecutorOrchestratorClass {
|
|
36
37
|
constructor() {
|
|
37
38
|
this.init = () => {
|
|
@@ -48,7 +49,7 @@ class ExecutorOrchestratorClass {
|
|
|
48
49
|
}
|
|
49
50
|
};
|
|
50
51
|
this.launch = (request) => __awaiter(this, void 0, void 0, function* () {
|
|
51
|
-
var _a;
|
|
52
|
+
var _a, _b;
|
|
52
53
|
(0, Affirm_1.default)(request, 'Invalid options');
|
|
53
54
|
const { consumer, details, logProgress, options } = request;
|
|
54
55
|
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
@@ -56,12 +57,14 @@ class ExecutorOrchestratorClass {
|
|
|
56
57
|
const tracker = new ExecutorPerformance_1.default();
|
|
57
58
|
const _progress = new ExecutorProgress_1.default(logProgress);
|
|
58
59
|
const { usageId } = UsageManager_1.default.startUsage(consumer, details);
|
|
59
|
-
const
|
|
60
|
+
const scope = { id: usageId, folder: `${consumer.name}_${usageId}`, workersId: [] };
|
|
60
61
|
try {
|
|
61
62
|
const start = performance.now();
|
|
62
63
|
this.init();
|
|
63
64
|
const executorResults = [];
|
|
64
|
-
|
|
65
|
+
let counter = performance.now();
|
|
66
|
+
const sourceFilesByProducer = yield this.readySourceFiles(consumer, scope);
|
|
67
|
+
tracker.measure('ready-producers', performance.now() - counter);
|
|
65
68
|
let globalWorkerIndex = 0;
|
|
66
69
|
for (const pair of sourceFilesByProducer) {
|
|
67
70
|
const { prod, cProd, response } = pair;
|
|
@@ -86,15 +89,16 @@ class ExecutorOrchestratorClass {
|
|
|
86
89
|
const currentWorkerIndex = globalWorkerIndex;
|
|
87
90
|
globalWorkerIndex++;
|
|
88
91
|
const workerData = {
|
|
92
|
+
producer: prod,
|
|
89
93
|
chunk,
|
|
90
94
|
consumer,
|
|
91
95
|
prodDimensions,
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
options
|
|
96
|
+
workerId,
|
|
97
|
+
scope,
|
|
98
|
+
options
|
|
95
99
|
};
|
|
96
100
|
_progress.register((currentWorkerIndex + 1).toString(), prod.name, fileIndex, totalFiles);
|
|
97
|
-
workersId.push(workerId);
|
|
101
|
+
scope.workersId.push(workerId);
|
|
98
102
|
workerThreads.push(this._executorPool.exec('executor', [workerData], {
|
|
99
103
|
on: payload => this.onWorkAdvanced(payload, currentWorkerIndex, _progress)
|
|
100
104
|
}));
|
|
@@ -105,28 +109,34 @@ class ExecutorOrchestratorClass {
|
|
|
105
109
|
}
|
|
106
110
|
}
|
|
107
111
|
_progress.complete();
|
|
108
|
-
yield this.reconcileExecutorThreadsResults(
|
|
112
|
+
yield this.reconcileExecutorThreadsResults(scope, executorResults, tracker);
|
|
109
113
|
// If there is more than one worker, then I need to redo the operations that are done on multiple lines (cause now the worker files have been merged together)
|
|
110
114
|
const postOperation = { totalOutputCount: null };
|
|
111
115
|
if (executorResults.length > 1) {
|
|
112
116
|
if (((_a = consumer.options) === null || _a === void 0 ? void 0 : _a.distinct) === true) {
|
|
113
|
-
|
|
114
|
-
const unifiedOutputCount = yield ConsumerExecutor_1.default.processDistinct(
|
|
115
|
-
tracker.measure('process-distinct:main', performance.now() -
|
|
117
|
+
counter = performance.now();
|
|
118
|
+
const unifiedOutputCount = yield ConsumerExecutor_1.default.processDistinct(ExecutorScope_1.default.getMainPath(scope));
|
|
119
|
+
tracker.measure('process-distinct:main', performance.now() - counter);
|
|
120
|
+
postOperation.totalOutputCount = unifiedOutputCount;
|
|
121
|
+
}
|
|
122
|
+
if (((_b = consumer.options) === null || _b === void 0 ? void 0 : _b.distinct) === true) {
|
|
123
|
+
counter = performance.now();
|
|
124
|
+
const unifiedOutputCount = yield ConsumerExecutor_1.default.processDistinctOn(consumer, ExecutorScope_1.default.getMainPath(scope));
|
|
125
|
+
tracker.measure('process-distinct-on:main', performance.now() - counter);
|
|
116
126
|
postOperation.totalOutputCount = unifiedOutputCount;
|
|
117
127
|
}
|
|
118
128
|
}
|
|
119
129
|
// Export to the destination
|
|
120
|
-
|
|
121
|
-
const exportRes = yield OutputExecutor_1.default.exportResult(consumer,
|
|
122
|
-
tracker.measure('export-result', performance.now() -
|
|
130
|
+
counter = performance.now();
|
|
131
|
+
const exportRes = yield OutputExecutor_1.default.exportResult(consumer, ConsumerManager_1.default.getExpandedFields(consumer), scope);
|
|
132
|
+
tracker.measure('export-result', performance.now() - counter);
|
|
123
133
|
// Perform on-success actions if any
|
|
124
134
|
if (consumer.outputs.some(x => x.onSuccess)) {
|
|
125
|
-
|
|
135
|
+
counter = performance.now();
|
|
126
136
|
yield ConsumerOnFinishManager_1.default.onConsumerSuccess(consumer, usageId);
|
|
127
|
-
tracker.measure('on-success-actions', performance.now() -
|
|
137
|
+
tracker.measure('on-success-actions', performance.now() - counter);
|
|
128
138
|
}
|
|
129
|
-
yield this.performCleanupOperations(
|
|
139
|
+
yield this.performCleanupOperations(scope, tracker);
|
|
130
140
|
const finalResult = this.computeFinalResult(tracker, executorResults, usageId, exportRes.key);
|
|
131
141
|
finalResult.elapsedMS = performance.now() - start;
|
|
132
142
|
if (Algo_1.default.hasVal(postOperation.totalOutputCount))
|
|
@@ -136,7 +146,7 @@ class ExecutorOrchestratorClass {
|
|
|
136
146
|
}
|
|
137
147
|
catch (error) {
|
|
138
148
|
yield ConsumerOnFinishManager_1.default.onConsumerError(consumer, usageId);
|
|
139
|
-
yield this.performCleanupOperations(
|
|
149
|
+
yield this.performCleanupOperations(scope, tracker);
|
|
140
150
|
UsageManager_1.default.failUsage(usageId, Helper_1.default.asError(error).message);
|
|
141
151
|
throw error;
|
|
142
152
|
}
|
|
@@ -210,12 +220,12 @@ class ExecutorOrchestratorClass {
|
|
|
210
220
|
// No newline found, return file end
|
|
211
221
|
return fileSize;
|
|
212
222
|
};
|
|
213
|
-
this.readySourceFiles = (consumer) => __awaiter(this, void 0, void 0, function* () {
|
|
223
|
+
this.readySourceFiles = (consumer, scope) => __awaiter(this, void 0, void 0, function* () {
|
|
214
224
|
const results = [];
|
|
215
225
|
for (let i = 0; i < consumer.producers.length; i++) {
|
|
216
226
|
const cProd = consumer.producers[i];
|
|
217
227
|
const prod = Environment_1.default.getProducer(cProd.name);
|
|
218
|
-
results.push({ prod, cProd, response: yield ProducerExecutor_1.default.ready(prod) });
|
|
228
|
+
results.push({ prod, cProd, response: yield ProducerExecutor_1.default.ready(prod, scope) });
|
|
219
229
|
}
|
|
220
230
|
return results;
|
|
221
231
|
});
|
|
@@ -239,13 +249,13 @@ class ExecutorOrchestratorClass {
|
|
|
239
249
|
return path_1.default.resolve('./.build/workers');
|
|
240
250
|
}
|
|
241
251
|
};
|
|
242
|
-
this.reconcileExecutorThreadsResults = (
|
|
243
|
-
const
|
|
244
|
-
ConsumerExecutor_1.default._ensurePath(
|
|
252
|
+
this.reconcileExecutorThreadsResults = (scope, executorResults, tracker) => __awaiter(this, void 0, void 0, function* () {
|
|
253
|
+
const mainPath = ExecutorScope_1.default.getMainPath(scope);
|
|
254
|
+
ConsumerExecutor_1.default._ensurePath(mainPath);
|
|
245
255
|
// Merge all the various files into a single one
|
|
246
256
|
if (executorResults.length > 1) {
|
|
247
257
|
const perf = performance.now();
|
|
248
|
-
const output = fs_1.default.createWriteStream(
|
|
258
|
+
const output = fs_1.default.createWriteStream(mainPath);
|
|
249
259
|
output.setMaxListeners(executorResults.length + 1);
|
|
250
260
|
for (const workerResult of executorResults) {
|
|
251
261
|
yield (0, promises_2.pipeline)(fs_1.default.createReadStream(workerResult.resultUri), output, { end: false });
|
|
@@ -256,14 +266,12 @@ class ExecutorOrchestratorClass {
|
|
|
256
266
|
}
|
|
257
267
|
else if (executorResults.length === 1) {
|
|
258
268
|
// If there is only one worker, then just rename the worker .dataset to the general consumer one
|
|
259
|
-
yield promises_1.default.rename(executorResults[0].resultUri,
|
|
269
|
+
yield promises_1.default.rename(executorResults[0].resultUri, mainPath);
|
|
260
270
|
}
|
|
261
271
|
});
|
|
262
|
-
this.performCleanupOperations = (
|
|
263
|
-
const workPath = ConsumerExecutor_1.default._getWorkPath(consumer, executionId);
|
|
272
|
+
this.performCleanupOperations = (scope, tracker) => __awaiter(this, void 0, void 0, function* () {
|
|
264
273
|
const start = performance.now();
|
|
265
|
-
yield
|
|
266
|
-
yield ConsumerExecutor_1.default._clearWorkPath(workPath);
|
|
274
|
+
yield ExecutorScope_1.default.clearScope(scope);
|
|
267
275
|
tracker.measure('cleanup-operations', performance.now() - start);
|
|
268
276
|
});
|
|
269
277
|
this.computeFinalResult = (tracker, executorResults, executionId, resultUri) => {
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
const path_1 = __importDefault(require("path"));
|
|
16
|
+
const fs_1 = __importDefault(require("fs"));
|
|
17
|
+
const promises_1 = __importDefault(require("fs/promises"));
|
|
18
|
+
const Constants_1 = __importDefault(require("../Constants"));
|
|
19
|
+
class ExecutorScopeClass {
|
|
20
|
+
constructor() {
|
|
21
|
+
this.WORKERS_FOLDER = 'workers';
|
|
22
|
+
this.PRODUCERS_FOLDER = 'producers';
|
|
23
|
+
this.getWorkerPath = (scope, workerId) => {
|
|
24
|
+
return path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER,
|
|
25
|
+
// A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
|
|
26
|
+
scope.folder, this.WORKERS_FOLDER, `${workerId}.dataset`);
|
|
27
|
+
};
|
|
28
|
+
this.getProducerPath = (scope, producer, sourceFileKey) => {
|
|
29
|
+
return path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER,
|
|
30
|
+
// A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
|
|
31
|
+
scope.folder, this.PRODUCERS_FOLDER, producer.name, `${sourceFileKey}.dataset`);
|
|
32
|
+
};
|
|
33
|
+
this.getMainPath = (scope) => {
|
|
34
|
+
return path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER, scope.folder, 'main.dataset');
|
|
35
|
+
};
|
|
36
|
+
this.clearScope = (scope) => __awaiter(this, void 0, void 0, function* () {
|
|
37
|
+
const scopePath = path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER, scope.folder);
|
|
38
|
+
if (fs_1.default.existsSync(scopePath)) {
|
|
39
|
+
yield promises_1.default.rm(scopePath, { recursive: true, force: true });
|
|
40
|
+
}
|
|
41
|
+
});
|
|
42
|
+
this.ensurePath = (fileUri) => {
|
|
43
|
+
const dir = path_1.default.dirname(fileUri);
|
|
44
|
+
if (!fs_1.default.existsSync(dir))
|
|
45
|
+
fs_1.default.mkdirSync(dir, { recursive: true });
|
|
46
|
+
if (!fs_1.default.existsSync(fileUri))
|
|
47
|
+
fs_1.default.writeFileSync(fileUri, '');
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
const ExecutorScope = new ExecutorScopeClass();
|
|
52
|
+
exports.default = ExecutorScope;
|
|
@@ -17,7 +17,7 @@ const DSTE_1 = __importDefault(require("../core/dste/DSTE"));
|
|
|
17
17
|
const DriverFactory_1 = __importDefault(require("../drivers/DriverFactory"));
|
|
18
18
|
const Environment_1 = __importDefault(require("../engines/Environment"));
|
|
19
19
|
const CSVParser_1 = __importDefault(require("../engines/parsing/CSVParser"));
|
|
20
|
-
const
|
|
20
|
+
const ExecutorScope_1 = __importDefault(require("./ExecutorScope"));
|
|
21
21
|
class OutputExecutorClass {
|
|
22
22
|
constructor() {
|
|
23
23
|
this._getInternalRecordFormat = (consumer) => {
|
|
@@ -62,13 +62,13 @@ class OutputExecutorClass {
|
|
|
62
62
|
return JSON.stringify(record);
|
|
63
63
|
}
|
|
64
64
|
};
|
|
65
|
-
this.exportResult = (consumer,
|
|
65
|
+
this.exportResult = (consumer, fields, scope) => __awaiter(this, void 0, void 0, function* () {
|
|
66
66
|
const internalFormat = this._getInternalRecordFormat(consumer);
|
|
67
67
|
for (const output of consumer.outputs) {
|
|
68
68
|
const destination = Environment_1.default.getSource(output.exportDestination);
|
|
69
69
|
const driver = yield DriverFactory_1.default.instantiateDestination(destination);
|
|
70
|
-
const currentPath =
|
|
71
|
-
const destinationName = this._composeFileName(consumer, output, this._getExtension(output)
|
|
70
|
+
const currentPath = ExecutorScope_1.default.getMainPath(scope);
|
|
71
|
+
const destinationName = this._composeFileName(consumer, output, this._getExtension(output));
|
|
72
72
|
if (output.format === internalFormat) {
|
|
73
73
|
return yield driver.move(currentPath, destinationName);
|
|
74
74
|
}
|
|
@@ -22,12 +22,12 @@ const LineParser_1 = __importDefault(require("../engines/parsing/LineParser"));
|
|
|
22
22
|
const CryptoEngine_1 = __importDefault(require("../engines/CryptoEngine"));
|
|
23
23
|
class ProducerExecutorClass {
|
|
24
24
|
constructor() {
|
|
25
|
-
this.ready = (producer) => __awaiter(this, void 0, void 0, function* () {
|
|
25
|
+
this.ready = (producer, scope) => __awaiter(this, void 0, void 0, function* () {
|
|
26
26
|
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
27
27
|
const source = Environment_1.default.getSource(producer.source);
|
|
28
28
|
(0, Affirm_1.default)(source, `Invalid source ${producer.source} on producer ${producer.name}`);
|
|
29
29
|
const driver = yield DriverFactory_1.default.instantiateSource(source);
|
|
30
|
-
return yield driver.ready(producer);
|
|
30
|
+
return yield driver.ready({ producer, scope });
|
|
31
31
|
});
|
|
32
32
|
this.processHeader = (line, producer) => {
|
|
33
33
|
const { settings: { fileType, hasHeaderRow, delimiter } } = producer;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@forzalabs/remora",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.14",
|
|
4
4
|
"description": "A powerful CLI tool for seamless data translation.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"private": false,
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
"debug": "npx tsx ./src/index.ts debug",
|
|
22
22
|
"create-producer": "npx tsx ./src/index.ts create-producer",
|
|
23
23
|
"copy-static-file": "npx tsx ./scripts/CopyStaticFile.js",
|
|
24
|
-
"build": "npm i && npm run sync && tsc --outDir .build && npm run copy-static-file",
|
|
24
|
+
"build": "npm i && npm run sync && rm -rf .build && tsc --outDir .build && npm run copy-static-file",
|
|
25
25
|
"fast-build": "tsc --outDir .build",
|
|
26
26
|
"upload": "npm run build && cd .build && npm publish --access=public"
|
|
27
27
|
},
|
|
@@ -21,11 +21,12 @@ dotenv_1.default.configDotenv();
|
|
|
21
21
|
const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
|
|
22
22
|
Environment_1.default.load('./');
|
|
23
23
|
try {
|
|
24
|
-
const { workerId, chunk, consumer, producer, prodDimensions, options } = workerData;
|
|
24
|
+
const { workerId, chunk, consumer, producer, prodDimensions, scope, options } = workerData;
|
|
25
25
|
(0, Affirm_1.default)(workerId, `Invalid worker id`);
|
|
26
26
|
(0, Affirm_1.default)(consumer, `Invalid consumer`);
|
|
27
27
|
(0, Affirm_1.default)(producer, `Invalid producer`);
|
|
28
28
|
(0, Affirm_1.default)(chunk, `Invalid chunk`);
|
|
29
|
+
(0, Affirm_1.default)(scope, `Invalid executor scope`);
|
|
29
30
|
const executor = new Executor_1.default();
|
|
30
31
|
const res = yield executor.run({
|
|
31
32
|
consumer,
|
|
@@ -34,6 +35,7 @@ const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
34
35
|
workerId,
|
|
35
36
|
chunk,
|
|
36
37
|
options,
|
|
38
|
+
scope,
|
|
37
39
|
reportWork: packet => workerpool_1.default.workerEmit(packet)
|
|
38
40
|
});
|
|
39
41
|
return res;
|