@forzalabs/remora 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +1 -1
- package/definitions/json_schemas/consumer-schema.json +4 -0
- package/engines/consumer/ConsumerManager.js +25 -9
- package/engines/consumer/PostProcessor.js +11 -3
- package/engines/dataset/Dataset.js +23 -3
- package/engines/dataset/DatasetManager.js +35 -16
- package/engines/dataset/ParallelDataset.js +1 -1
- package/engines/execution/ExecutionEnvironment.js +3 -3
- package/engines/execution/ExecutionPlanner.js +10 -10
- package/engines/parsing/ParseManager.js +2 -4
- package/engines/producer/ProducerManager.js +15 -0
- package/engines/transform/JoinEngine.js +8 -3
- package/package.json +1 -1
- package/workers/FilterWorker.js +1 -1
- package/workers/ProjectionWorker.js +1 -1
- package/workers/TransformWorker.js +1 -1
package/Constants.js
CHANGED
|
@@ -83,6 +83,10 @@
|
|
|
83
83
|
"union": {
|
|
84
84
|
"type": "boolean",
|
|
85
85
|
"description": "Merges the data from the various producers in a single dataset. They must have the same output dimensions. If true, then you can't set any joins on any producer, since all producers are merged in a single dataset."
|
|
86
|
+
},
|
|
87
|
+
"isOptional": {
|
|
88
|
+
"type": "boolean",
|
|
89
|
+
"description": "If true, if the data for this producer is not available when executing the consumer (e.g. the file is not present) the consumer will not fail, but just create a placeholder dataset and continue execution."
|
|
86
90
|
}
|
|
87
91
|
},
|
|
88
92
|
"required": [
|
|
@@ -82,18 +82,34 @@ class ConsumerManagerClass {
|
|
|
82
82
|
(0, Affirm_1.default)(field, 'Invalid consumer field');
|
|
83
83
|
const expandedFields = [];
|
|
84
84
|
if (field.cField.key === '*') {
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
85
|
+
// If the producers are "union" then they have the same dimensions, meaning that I can use the "*"
|
|
86
|
+
if (consumer.producers.length > 1 && consumer.producers.every(x => x.union)) {
|
|
87
|
+
const first = consumer.producers[0];
|
|
88
|
+
const firstColumns = availableColumns.filter(x => x.owner === first.name);
|
|
89
|
+
expandedFields.push(...firstColumns.map(x => ({
|
|
88
90
|
cField: {
|
|
89
|
-
key:
|
|
90
|
-
alias:
|
|
91
|
-
from:
|
|
91
|
+
key: x.nameInProducer,
|
|
92
|
+
alias: x.nameInProducer,
|
|
93
|
+
from: x.owner
|
|
92
94
|
},
|
|
93
|
-
dimension:
|
|
94
|
-
measure:
|
|
95
|
+
dimension: x.dimension,
|
|
96
|
+
measure: x.measure
|
|
97
|
+
})));
|
|
98
|
+
}
|
|
99
|
+
else {
|
|
100
|
+
const from = (_a = field.cField.from) !== null && _a !== void 0 ? _a : (consumer.producers.length === 1 ? consumer.producers[0].name : null);
|
|
101
|
+
availableColumns.filter(x => x.owner === from).forEach(col => {
|
|
102
|
+
expandedFields.push({
|
|
103
|
+
cField: {
|
|
104
|
+
key: col.nameInProducer,
|
|
105
|
+
alias: col.nameInProducer,
|
|
106
|
+
from: col.owner
|
|
107
|
+
},
|
|
108
|
+
dimension: col.dimension,
|
|
109
|
+
measure: col.measure
|
|
110
|
+
});
|
|
95
111
|
});
|
|
96
|
-
}
|
|
112
|
+
}
|
|
97
113
|
}
|
|
98
114
|
else if (field.cField.grouping) {
|
|
99
115
|
expandedFields.push({
|
|
@@ -37,7 +37,7 @@ class PostProcessorClass {
|
|
|
37
37
|
const fields = ConsumerManager_1.default.getExpandedFields(consumer);
|
|
38
38
|
const dimensionsUpdates = DatasetManager_1.default.computeDimensionsUpdates(dataset, consumer);
|
|
39
39
|
let updatedDimensions = null;
|
|
40
|
-
|
|
40
|
+
dataset = yield dataset.map(record => {
|
|
41
41
|
var _a, _b;
|
|
42
42
|
// First apply the updates to the dimensions of this record
|
|
43
43
|
if (dimensionsUpdates.length > 0) {
|
|
@@ -62,8 +62,16 @@ class PostProcessorClass {
|
|
|
62
62
|
}
|
|
63
63
|
return record;
|
|
64
64
|
}, options);
|
|
65
|
-
|
|
66
|
-
|
|
65
|
+
if (!updatedDimensions || updatedDimensions.length === 0) {
|
|
66
|
+
// This means that no updates were applied cause no records were present
|
|
67
|
+
// I need to force a fake update to get the new dimensions, since those might still have changed
|
|
68
|
+
const fakeRecord = new DatasetRecord_1.default('', dataset.getDimensions(), dataset.getDelimiter());
|
|
69
|
+
for (const update of dimensionsUpdates)
|
|
70
|
+
fakeRecord.wholeUpdateDimension(update);
|
|
71
|
+
updatedDimensions = fakeRecord._dimensions;
|
|
72
|
+
}
|
|
73
|
+
dataset.setDimensions(updatedDimensions);
|
|
74
|
+
return dataset;
|
|
67
75
|
});
|
|
68
76
|
/**
|
|
69
77
|
* Gets an array of objects (with potentially nested fields) and unpacks them to an array of objects with no nested fields
|
|
@@ -35,8 +35,9 @@ const Helper_1 = __importDefault(require("../../helper/Helper"));
|
|
|
35
35
|
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
36
36
|
const Environment_1 = __importDefault(require("../Environment"));
|
|
37
37
|
const Logger_1 = __importDefault(require("../../helper/Logger"));
|
|
38
|
+
const ProducerManager_1 = __importDefault(require("../producer/ProducerManager"));
|
|
38
39
|
class Dataset {
|
|
39
|
-
constructor(
|
|
40
|
+
constructor(options) {
|
|
40
41
|
var _a, _b;
|
|
41
42
|
this.getPath = () => this._path;
|
|
42
43
|
this.setPath = (path) => {
|
|
@@ -66,8 +67,25 @@ class Dataset {
|
|
|
66
67
|
this.load = (source) => __awaiter(this, void 0, void 0, function* () {
|
|
67
68
|
(0, Affirm_1.default)(source, 'Invalid source');
|
|
68
69
|
this._startOperation('load', { source: source.engine });
|
|
69
|
-
|
|
70
|
-
|
|
70
|
+
try {
|
|
71
|
+
const driver = yield DriverFactory_1.default.instantiateSource(source);
|
|
72
|
+
yield driver.download(this);
|
|
73
|
+
}
|
|
74
|
+
catch (error) {
|
|
75
|
+
if (this._file.isOptional) {
|
|
76
|
+
Logger_1.default.log(`Error loading dataset "${this.name}", creating default configuration and mock data because "isOptional" is true.`);
|
|
77
|
+
if (!this.getDimensions() || this.getDimensions().length === 0)
|
|
78
|
+
this.setDimensions(ProducerManager_1.default.getColumns(this._baseProducer).map((x, i) => { var _a; return ({ index: i, key: x.nameInProducer, name: x.aliasInProducer, hidden: false, type: (_a = x.dimension) === null || _a === void 0 ? void 0 : _a.type }); }));
|
|
79
|
+
if (!this.getFirstLine() || this.getFirstLine().length === 0) {
|
|
80
|
+
if (this._file.hasHeaderRow)
|
|
81
|
+
this.setFirstLine(this.getDimensions().map(x => x.key).join(this.getDelimiter()));
|
|
82
|
+
else
|
|
83
|
+
this.setFirstLine('');
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
else
|
|
87
|
+
throw error;
|
|
88
|
+
}
|
|
71
89
|
this._size = this._computeSize();
|
|
72
90
|
this._finishOperation('load');
|
|
73
91
|
return this;
|
|
@@ -839,9 +857,11 @@ class Dataset {
|
|
|
839
857
|
return searchInOperations(this._operations);
|
|
840
858
|
};
|
|
841
859
|
this._computeSize = () => fs_1.default.statSync(this._path).size / (1024 * 1024);
|
|
860
|
+
const { name, baseProducer, file, batchSize, executionId } = options;
|
|
842
861
|
this.name = name;
|
|
843
862
|
this._file = file;
|
|
844
863
|
this._executionId = executionId;
|
|
864
|
+
this._baseProducer = baseProducer;
|
|
845
865
|
this._batchSize = (_a = batchSize !== null && batchSize !== void 0 ? batchSize : parseInt(Environment_1.default.get('MAX_ITEMS_IN_MEMORY'))) !== null && _a !== void 0 ? _a : Constants_1.default.defaults.MAX_ITEMS_IN_MEMORY;
|
|
846
866
|
this._dimensions = [];
|
|
847
867
|
this._firstLine = '';
|
|
@@ -20,7 +20,7 @@ const FileCompiler_1 = __importDefault(require("../file/FileCompiler"));
|
|
|
20
20
|
const ParseManager_1 = __importDefault(require("../parsing/ParseManager"));
|
|
21
21
|
const Dataset_1 = __importDefault(require("./Dataset"));
|
|
22
22
|
const promises_1 = require("stream/promises");
|
|
23
|
-
const fs_1 = require("fs");
|
|
23
|
+
const fs_1 = __importDefault(require("fs"));
|
|
24
24
|
const DeveloperEngine_1 = __importDefault(require("../ai/DeveloperEngine"));
|
|
25
25
|
const Constants_1 = __importDefault(require("../../Constants"));
|
|
26
26
|
class DatasetManagerClass {
|
|
@@ -30,33 +30,41 @@ class DatasetManagerClass {
|
|
|
30
30
|
* be isolated inside a sub-folder specific to that execution to avoid concurrency conflicts
|
|
31
31
|
* when the same producer / consumer is executed multiple times in parallel.
|
|
32
32
|
*/
|
|
33
|
-
this.create = (producer,
|
|
33
|
+
this.create = (producer, options) => {
|
|
34
34
|
var _a, _b;
|
|
35
35
|
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
36
36
|
const { name, settings: { delimiter, fileKey, fileType, hasHeaderRow, sheetName, httpApi } } = producer;
|
|
37
|
+
const executionId = options === null || options === void 0 ? void 0 : options.executionId;
|
|
38
|
+
const cProducer = options === null || options === void 0 ? void 0 : options.cProducer;
|
|
37
39
|
// Check if any dimension has sourceFilename flag set to true
|
|
38
40
|
const hasSourceFilenameDimension = (_b = (_a = producer.dimensions) === null || _a === void 0 ? void 0 : _a.some(d => d.sourceFilename === true)) !== null && _b !== void 0 ? _b : false;
|
|
39
|
-
const dataset = new Dataset_1.default(
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
41
|
+
const dataset = new Dataset_1.default({
|
|
42
|
+
name,
|
|
43
|
+
baseProducer: producer,
|
|
44
|
+
file: {
|
|
45
|
+
fileKey,
|
|
46
|
+
fileType,
|
|
47
|
+
hasHeaderRow,
|
|
48
|
+
sheetName,
|
|
49
|
+
delimiter,
|
|
50
|
+
httpApi,
|
|
51
|
+
includeSourceFilename: hasSourceFilenameDimension,
|
|
52
|
+
isOptional: cProducer === null || cProducer === void 0 ? void 0 : cProducer.isOptional
|
|
53
|
+
},
|
|
54
|
+
executionId
|
|
55
|
+
});
|
|
48
56
|
return dataset;
|
|
49
57
|
};
|
|
50
58
|
this.buildDimensions = (dataset_1, producer_1, ...args_1) => __awaiter(this, [dataset_1, producer_1, ...args_1], void 0, function* (dataset, producer, discover = false) {
|
|
51
59
|
(0, Affirm_1.default)(dataset, `Invalid dataset`);
|
|
52
60
|
(0, Affirm_1.default)(producer, `Invalid producer`);
|
|
53
61
|
const firstLine = dataset.getFirstLine();
|
|
54
|
-
|
|
62
|
+
Affirm_1.default.hasValue(firstLine, `The first line of the dataset was not set.`);
|
|
55
63
|
return this.buildDimensionsFromFirstLine(firstLine, dataset.getFile(), producer, discover);
|
|
56
64
|
});
|
|
57
65
|
this.buildDimensionsFromFirstLine = (firstLine_1, dsFile_1, producer_1, ...args_1) => __awaiter(this, [firstLine_1, dsFile_1, producer_1, ...args_1], void 0, function* (firstLine, dsFile, producer, discover = false) {
|
|
58
66
|
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o;
|
|
59
|
-
|
|
67
|
+
Affirm_1.default.hasValue(firstLine, `Invalid first line`);
|
|
60
68
|
(0, Affirm_1.default)(dsFile, `Invalid dataset file`);
|
|
61
69
|
(0, Affirm_1.default)(producer, `Invalid producer`);
|
|
62
70
|
const file = dsFile;
|
|
@@ -83,6 +91,13 @@ class DatasetManagerClass {
|
|
|
83
91
|
const columns = FileCompiler_1.default.compileProducer(producer, source);
|
|
84
92
|
const firstObject = JSON.parse(firstLine);
|
|
85
93
|
const keys = Object.keys(firstObject);
|
|
94
|
+
// const columnsWithDot = columns.filter(x => x.aliasInProducer.includes('.'))
|
|
95
|
+
// if (columnsWithDot.length > 0) {
|
|
96
|
+
// console.log(columns, keys, 'PAPAPAPP')
|
|
97
|
+
// for (const colWithDot of columnsWithDot) {
|
|
98
|
+
// console.log(colWithDot)
|
|
99
|
+
// }
|
|
100
|
+
// }
|
|
86
101
|
// If includeSourceFilename is enabled, the driver has added $source_filename column
|
|
87
102
|
// We need to add it to the keys list so dimensions can reference it
|
|
88
103
|
const includeSourceFilename = file.includeSourceFilename === true;
|
|
@@ -247,12 +262,16 @@ class DatasetManagerClass {
|
|
|
247
262
|
const datasetPath = dataset.getPath();
|
|
248
263
|
for (let i = 0; i < threadPaths.length; i++) {
|
|
249
264
|
const path = threadPaths[i];
|
|
250
|
-
|
|
265
|
+
// If the thread skipped execution (maybe because no data needed to change), then the
|
|
266
|
+
// dataset file might not exist, in this case, just skip it
|
|
267
|
+
if (!fs_1.default.existsSync(path))
|
|
268
|
+
continue;
|
|
269
|
+
const readStream = fs_1.default.createReadStream(path);
|
|
251
270
|
// For the first file, create a new write stream
|
|
252
271
|
// For subsequent files, append to the existing file
|
|
253
|
-
const writeStream =
|
|
272
|
+
const writeStream = fs_1.default.createWriteStream(datasetPath, { flags: i === 0 ? 'w' : 'a' });
|
|
254
273
|
yield (0, promises_1.pipeline)(readStream, writeStream);
|
|
255
|
-
|
|
274
|
+
fs_1.default.unlinkSync(path);
|
|
256
275
|
}
|
|
257
276
|
return dataset;
|
|
258
277
|
});
|
|
@@ -63,7 +63,7 @@ class ParallelDatasetClass {
|
|
|
63
63
|
const datasetCount = dataset.getCount();
|
|
64
64
|
const batchSize = (_a = parseInt(Environment_1.default.get('MAX_ITEMS_IN_MEMORY'))) !== null && _a !== void 0 ? _a : Constants_1.default.defaults.MAX_ITEMS_IN_MEMORY;
|
|
65
65
|
const workerChunkSize = batchSize * Math.round(Constants_1.default.defaults.INDICATIVE_THREAD_LINE_COUNT / batchSize);
|
|
66
|
-
const workerCount = Math.min(Math.ceil(datasetCount / workerChunkSize), Constants_1.default.defaults.MAX_THREAD_COUNT);
|
|
66
|
+
const workerCount = Math.max(Math.min(Math.ceil(datasetCount / workerChunkSize), Constants_1.default.defaults.MAX_THREAD_COUNT), 1);
|
|
67
67
|
const adjustedWorkerCount = Math.ceil(datasetCount / workerCount);
|
|
68
68
|
return { workerCount, adjustedWorkerCount };
|
|
69
69
|
};
|
|
@@ -65,17 +65,17 @@ class ExecutionEnvironment {
|
|
|
65
65
|
(0, Affirm_1.default)(planStep.producer, `Invalid producer in execute-SQL step`);
|
|
66
66
|
const driver = yield DriverFactory_1.default.instantiateSource(planStep.source);
|
|
67
67
|
const queryData = (yield driver.query(this._envData.finalSQL)).rows;
|
|
68
|
-
let dataset = DatasetManager_1.default.create(planStep.producer, this._executionId);
|
|
68
|
+
let dataset = DatasetManager_1.default.create(planStep.producer, { cProducer: planStep.cProducer, executionId: this._executionId });
|
|
69
69
|
dataset = yield dataset.loadFromMemory(queryData, planStep.producer);
|
|
70
70
|
this._storeIntermidiate(planStep, dataset);
|
|
71
71
|
break;
|
|
72
72
|
}
|
|
73
73
|
case 'load-dataset': {
|
|
74
74
|
(0, Affirm_1.default)(planStep.producer, `Invalid producer in read-file-lines step`);
|
|
75
|
-
const { producer } = planStep;
|
|
75
|
+
const { producer, cProducer } = planStep;
|
|
76
76
|
const source = Environment_1.default.getSource(producer.source);
|
|
77
77
|
(0, Affirm_1.default)(source, `Source "${producer.source}" of producer "${producer.name}" not found.`);
|
|
78
|
-
let dataset = DatasetManager_1.default.create(producer, this._executionId);
|
|
78
|
+
let dataset = DatasetManager_1.default.create(producer, { cProducer, executionId: this._executionId });
|
|
79
79
|
dataset = yield dataset.load(source);
|
|
80
80
|
this._storeIntermidiate(planStep, dataset);
|
|
81
81
|
break;
|
|
@@ -85,9 +85,9 @@ class ExecutionPlannerClas {
|
|
|
85
85
|
const uniqEngineClasses = Algo_1.default.uniq(engineClasses);
|
|
86
86
|
const plan = [];
|
|
87
87
|
if (uniqEngineClasses.length === 1 && uniqEngineClasses[0] === 'sql')
|
|
88
|
-
plan.push(...this._planProducer(producers[0], options));
|
|
88
|
+
plan.push(...this._planProducer(producers[0], consumer.producers[0], options));
|
|
89
89
|
else
|
|
90
|
-
plan.push(...(producers.flatMap(x => this._planProducer(x, options))));
|
|
90
|
+
plan.push(...(producers.flatMap((x, i) => this._planProducer(x, consumer.producers[i], options))));
|
|
91
91
|
// I technically don't need this, but I keep it to merge all the datasets to a single one
|
|
92
92
|
// so the other steps of the plan can work with a single dataset variable
|
|
93
93
|
plan.push({ type: 'join-producers-data' });
|
|
@@ -95,7 +95,7 @@ class ExecutionPlannerClas {
|
|
|
95
95
|
plan.push({ type: 'apply-consumer-filters-on-JSON' });
|
|
96
96
|
return plan;
|
|
97
97
|
};
|
|
98
|
-
this._planProducer = (producer, options) => {
|
|
98
|
+
this._planProducer = (producer, cProducer, options) => {
|
|
99
99
|
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
100
100
|
const source = Environment_1.default.getSource(producer.source);
|
|
101
101
|
(0, Affirm_1.default)(source, `Source "${producer.source}" of producer "${producer.name}" not found.`);
|
|
@@ -104,21 +104,21 @@ class ExecutionPlannerClas {
|
|
|
104
104
|
switch (producerEngine) {
|
|
105
105
|
case 'postgres':
|
|
106
106
|
case 'aws-redshift': {
|
|
107
|
-
plan.push({ type: 'compile-consumer-to-SQL', producer });
|
|
107
|
+
plan.push({ type: 'compile-consumer-to-SQL', producer, cProducer });
|
|
108
108
|
if (Algo_1.default.hasVal(options))
|
|
109
|
-
plan.push({ type: 'compile-execution-request-to-SQL', producer });
|
|
110
|
-
plan.push({ type: 'execute-SQL', source: source, producer });
|
|
109
|
+
plan.push({ type: 'compile-execution-request-to-SQL', producer, cProducer });
|
|
110
|
+
plan.push({ type: 'execute-SQL', source: source, producer, cProducer });
|
|
111
111
|
break;
|
|
112
112
|
}
|
|
113
113
|
case 'local':
|
|
114
114
|
case 'aws-s3':
|
|
115
115
|
case 'delta-share':
|
|
116
116
|
case 'http-api': {
|
|
117
|
-
plan.push({ type: 'load-dataset', producer });
|
|
118
|
-
plan.push({ type: 'prepare-dataset', producer });
|
|
117
|
+
plan.push({ type: 'load-dataset', producer, cProducer });
|
|
118
|
+
plan.push({ type: 'prepare-dataset', producer, cProducer });
|
|
119
119
|
if (producer.dimensions.some(x => { var _a, _b; return ((_a = x.alias) === null || _a === void 0 ? void 0 : _a.includes('{')) || ((_b = x.alias) === null || _b === void 0 ? void 0 : _b.includes('[')); }))
|
|
120
|
-
plan.push({ type: 'nested-field-unpacking', producer });
|
|
121
|
-
plan.push({ type: 'post-process-json', producer });
|
|
120
|
+
plan.push({ type: 'nested-field-unpacking', producer, cProducer });
|
|
121
|
+
plan.push({ type: 'post-process-json', producer, cProducer });
|
|
122
122
|
break;
|
|
123
123
|
}
|
|
124
124
|
default: throw new Error(`Engine "${producerEngine}" not supported`);
|
|
@@ -4,10 +4,9 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
7
|
-
const Environment_1 = __importDefault(require("../Environment"));
|
|
8
|
-
const FileCompiler_1 = __importDefault(require("../file/FileCompiler"));
|
|
9
7
|
const CSVParser_1 = __importDefault(require("./CSVParser"));
|
|
10
8
|
const Constants_1 = __importDefault(require("../../Constants"));
|
|
9
|
+
const ProducerManager_1 = __importDefault(require("../producer/ProducerManager"));
|
|
11
10
|
class ParseManagerClass {
|
|
12
11
|
constructor() {
|
|
13
12
|
this._extractHeader = (headerLine, delimiter, producer, discover) => {
|
|
@@ -15,8 +14,7 @@ class ParseManagerClass {
|
|
|
15
14
|
(0, Affirm_1.default)(headerLine, `Invalid CSV header line for producer "${producer.name}"`);
|
|
16
15
|
(0, Affirm_1.default)(delimiter, 'Invalid CSV delimiter');
|
|
17
16
|
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
18
|
-
|
|
19
|
-
let columns = FileCompiler_1.default.compileProducer(producer, source);
|
|
17
|
+
let columns = ProducerManager_1.default.getColumns(producer);
|
|
20
18
|
const headerColumns = CSVParser_1.default.parseRow(headerLine, delimiter).map(x => x.trim());
|
|
21
19
|
// If I'm discovering the file, then it means that the dimensions are not set, so I use the ones that I get from the file directly
|
|
22
20
|
if (discover)
|
|
@@ -7,6 +7,21 @@ const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
|
7
7
|
const SecretManager_1 = __importDefault(require("../SecretManager"));
|
|
8
8
|
class ProducerManagerClass {
|
|
9
9
|
constructor() {
|
|
10
|
+
this.getColumns = (producer) => {
|
|
11
|
+
var _a;
|
|
12
|
+
(0, Affirm_1.default)(producer, `Invalid producer`);
|
|
13
|
+
(0, Affirm_1.default)((_a = producer.settings.fileKey) !== null && _a !== void 0 ? _a : producer.settings.sqlTable, `Missing required file key in producer settings`);
|
|
14
|
+
(0, Affirm_1.default)(producer.settings.fileType, `Missing required file type in producer settings`);
|
|
15
|
+
const columns = producer.dimensions.map(x => ({
|
|
16
|
+
aliasInProducer: x.alias,
|
|
17
|
+
nameInProducer: x.name,
|
|
18
|
+
consumerAlias: null,
|
|
19
|
+
consumerKey: null,
|
|
20
|
+
owner: producer.name,
|
|
21
|
+
dimension: x
|
|
22
|
+
}));
|
|
23
|
+
return columns;
|
|
24
|
+
};
|
|
10
25
|
this.getMask = (dimension) => {
|
|
11
26
|
if (!dimension || !dimension.mask)
|
|
12
27
|
return null;
|
|
@@ -97,9 +97,14 @@ class JoinEngineClass {
|
|
|
97
97
|
const consumerShape = ConsumerEngine_1.default.getOutputShape(consumer);
|
|
98
98
|
const consumerColumns = ConsumerEngine_1.default.compile(consumer);
|
|
99
99
|
// Create a new dataset for the joined result
|
|
100
|
-
const resultDataset = new Dataset_1.default(
|
|
101
|
-
|
|
102
|
-
|
|
100
|
+
const resultDataset = new Dataset_1.default({
|
|
101
|
+
name: `joined_${consumer.name}`,
|
|
102
|
+
file: {
|
|
103
|
+
fileKey: 'temp',
|
|
104
|
+
fileType: 'CSV'
|
|
105
|
+
},
|
|
106
|
+
baseProducer: Environment_1.default.getProducer(consumer.producers[0].name),
|
|
107
|
+
executionId: producedData[0].dataset.getExecutionId()
|
|
103
108
|
});
|
|
104
109
|
// Get dimensions for the result dataset based on consumer columns
|
|
105
110
|
const resultDimensions = consumerColumns.map((col, index) => {
|
package/package.json
CHANGED
package/workers/FilterWorker.js
CHANGED
|
@@ -31,7 +31,7 @@ const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
31
31
|
(0, Affirm_1.default)(datasetDimensions, `Invalid dataset dimensions`);
|
|
32
32
|
(0, Affirm_1.default)(filter, `Invalid filter data`);
|
|
33
33
|
(0, Affirm_1.default)(datasetDelimiter, `Invalid dataset delimter`);
|
|
34
|
-
const dataset = new Dataset_1.default(datasetName, datasetFile,
|
|
34
|
+
const dataset = new Dataset_1.default({ name: datasetName, file: datasetFile, baseProducer: null, executionId });
|
|
35
35
|
dataset
|
|
36
36
|
.setDimensions(datasetDimensions)
|
|
37
37
|
.setDelimiter(datasetDelimiter);
|
|
@@ -33,7 +33,7 @@ const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
33
33
|
(0, Affirm_1.default)(datasetDelimiter, `Invalid dataset delimter`);
|
|
34
34
|
const consumer = Environment_1.default.getConsumer(projectionData.consumerName);
|
|
35
35
|
(0, Affirm_1.default)(consumer, `Wrong consumer name sent to projection worker: "${projectionData.consumerName}" not found.`);
|
|
36
|
-
const dataset = new Dataset_1.default(datasetName, datasetFile,
|
|
36
|
+
const dataset = new Dataset_1.default({ name: datasetName, file: datasetFile, baseProducer: null, executionId });
|
|
37
37
|
dataset
|
|
38
38
|
.setDimensions(datasetDimensions)
|
|
39
39
|
.setDelimiter(datasetDelimiter);
|
|
@@ -33,7 +33,7 @@ const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
33
33
|
(0, Affirm_1.default)(datasetDelimiter, `Invalid dataset delimter`);
|
|
34
34
|
const consumer = Environment_1.default.getConsumer(transformData.consumerName);
|
|
35
35
|
(0, Affirm_1.default)(consumer, `Wrong consumer name sent to projection worker: "${transformData.consumerName}" not found.`);
|
|
36
|
-
const dataset = new Dataset_1.default(datasetName, datasetFile,
|
|
36
|
+
const dataset = new Dataset_1.default({ name: datasetName, file: datasetFile, baseProducer: null, executionId });
|
|
37
37
|
dataset
|
|
38
38
|
.setDimensions(datasetDimensions)
|
|
39
39
|
.setDelimiter(datasetDelimiter);
|