@forzalabs/remora 0.0.50-nasco.3 → 0.0.51-nasco.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Constants.js
CHANGED
|
@@ -79,6 +79,10 @@
|
|
|
79
79
|
"code"
|
|
80
80
|
],
|
|
81
81
|
"additionalProperties": false
|
|
82
|
+
},
|
|
83
|
+
"union": {
|
|
84
|
+
"type": "boolean",
|
|
85
|
+
"description": "Merges the data from the various producers in a single dataset. They must have the same output dimensions. If true, then you can't set any joins on any producer, since all producers are merged in a single dataset."
|
|
82
86
|
}
|
|
83
87
|
},
|
|
84
88
|
"required": [
|
|
@@ -92,6 +92,8 @@ class JoinEngineClass {
|
|
|
92
92
|
(0, Affirm_1.default)(producedData, 'Invalid produced data');
|
|
93
93
|
if (consumer.producers.length <= 1)
|
|
94
94
|
return this.findProducerData(consumer.producers[0].name, producedData);
|
|
95
|
+
if (consumer.producers.some(x => x.union))
|
|
96
|
+
return yield this.union(consumer, producedData);
|
|
95
97
|
const consumerShape = ConsumerEngine_1.default.getOutputShape(consumer);
|
|
96
98
|
const consumerColumns = ConsumerEngine_1.default.compile(consumer);
|
|
97
99
|
// Create a new dataset for the joined result
|
|
@@ -132,6 +134,21 @@ class JoinEngineClass {
|
|
|
132
134
|
}
|
|
133
135
|
return resultDataset;
|
|
134
136
|
});
|
|
137
|
+
this.union = (consumer, producedData) => __awaiter(this, void 0, void 0, function* () {
|
|
138
|
+
const getDimensionsKey = (ds) => ds.getDimensions().map(x => x.name.trim()).join(';').trim();
|
|
139
|
+
const mainDataset = producedData[0].dataset;
|
|
140
|
+
const mainDimKey = getDimensionsKey(mainDataset);
|
|
141
|
+
const otherProducedData = producedData.slice(1);
|
|
142
|
+
for (const prodData of otherProducedData) {
|
|
143
|
+
const prodDimKey = getDimensionsKey(prodData.dataset);
|
|
144
|
+
if (mainDimKey !== prodDimKey)
|
|
145
|
+
throw new Error(`On consumer "${consumer.name}", can't union the dataset "${prodData.dataset['_name']}" (producer: ${prodData.producerKey}) because the dimensions are different from the main dataset "${mainDataset['_name']}" (producer: ${producedData[0].producerKey}). "${mainDimKey}" != "${prodDimKey}"`);
|
|
146
|
+
yield prodData.dataset.streamBatches((batch) => __awaiter(this, void 0, void 0, function* () {
|
|
147
|
+
yield mainDataset.append(batch);
|
|
148
|
+
}));
|
|
149
|
+
}
|
|
150
|
+
return mainDataset;
|
|
151
|
+
});
|
|
135
152
|
this.performStreamingJoin = (leftDataset, rightLookup, condition, relationship, consumerColumns, resultDataset) => __awaiter(this, void 0, void 0, function* () {
|
|
136
153
|
const joinedRecords = [];
|
|
137
154
|
const batchSize = leftDataset.getBatchSize();
|
|
@@ -90,6 +90,12 @@ class ValidatorClass {
|
|
|
90
90
|
errors.push('No producers found');
|
|
91
91
|
if (producers.some(x => !x))
|
|
92
92
|
errors.push(`Invalid producer found in consumer "${consumer.name}"`);
|
|
93
|
+
if (consumer.producers.length > 0) {
|
|
94
|
+
const withJoins = consumer.producers.filter(x => (Algo_1.default.hasVal(x.joins) && x.joins.length > 0) || !x.union);
|
|
95
|
+
const withUnions = consumer.producers.filter(x => x.union === true);
|
|
96
|
+
if (withJoins.length > 0 && withUnions.length)
|
|
97
|
+
errors.push(`Multiple producers in consumer have mixed "joins" and "union": you can either have multiple producers with "joins" or multiple producers with "union", but not both (joins: ${withJoins.map(x => x.name).join(', ')}; unions: ${withUnions.map(x => x.name).join(', ')})`);
|
|
98
|
+
}
|
|
93
99
|
// Validation on sources
|
|
94
100
|
const sources = producers.map(x => Environment_1.default.getSource(x.source));
|
|
95
101
|
if (sources.length === 0)
|