@forzalabs/remora 0.0.50-nasco.3 → 0.0.51-nasco.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Constants.js CHANGED
@@ -1,7 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  const CONSTANTS = {
4
- cliVersion: '0.0.50-nasco',
4
+ cliVersion: '0.0.51-nasco',
5
5
  lambdaVersion: 1,
6
6
  port: 5069,
7
7
  defaults: {
@@ -79,6 +79,10 @@
79
79
  "code"
80
80
  ],
81
81
  "additionalProperties": false
82
+ },
83
+ "union": {
84
+ "type": "boolean",
85
+ "description": "Merges the data from the various producers in a single dataset. They must have the same output dimensions. If true, then you can't set any joins on any producer, since all producers are merged in a single dataset."
82
86
  }
83
87
  },
84
88
  "required": [
@@ -92,6 +92,8 @@ class JoinEngineClass {
92
92
  (0, Affirm_1.default)(producedData, 'Invalid produced data');
93
93
  if (consumer.producers.length <= 1)
94
94
  return this.findProducerData(consumer.producers[0].name, producedData);
95
+ if (consumer.producers.some(x => x.union))
96
+ return yield this.union(consumer, producedData);
95
97
  const consumerShape = ConsumerEngine_1.default.getOutputShape(consumer);
96
98
  const consumerColumns = ConsumerEngine_1.default.compile(consumer);
97
99
  // Create a new dataset for the joined result
@@ -132,6 +134,21 @@ class JoinEngineClass {
132
134
  }
133
135
  return resultDataset;
134
136
  });
137
+ this.union = (consumer, producedData) => __awaiter(this, void 0, void 0, function* () {
138
+ const getDimensionsKey = (ds) => ds.getDimensions().map(x => x.name.trim()).join(';').trim();
139
+ const mainDataset = producedData[0].dataset;
140
+ const mainDimKey = getDimensionsKey(mainDataset);
141
+ const otherProducedData = producedData.slice(1);
142
+ for (const prodData of otherProducedData) {
143
+ const prodDimKey = getDimensionsKey(prodData.dataset);
144
+ if (mainDimKey !== prodDimKey)
145
+ throw new Error(`On consumer "${consumer.name}", can't union the dataset "${prodData.dataset['_name']}" (producer: ${prodData.producerKey}) because the dimensions are different from the main dataset "${mainDataset['_name']}" (producer: ${producedData[0].producerKey}). "${mainDimKey}" != "${prodDimKey}"`);
146
+ yield prodData.dataset.streamBatches((batch) => __awaiter(this, void 0, void 0, function* () {
147
+ yield mainDataset.append(batch);
148
+ }));
149
+ }
150
+ return mainDataset;
151
+ });
135
152
  this.performStreamingJoin = (leftDataset, rightLookup, condition, relationship, consumerColumns, resultDataset) => __awaiter(this, void 0, void 0, function* () {
136
153
  const joinedRecords = [];
137
154
  const batchSize = leftDataset.getBatchSize();
@@ -90,6 +90,12 @@ class ValidatorClass {
90
90
  errors.push('No producers found');
91
91
  if (producers.some(x => !x))
92
92
  errors.push(`Invalid producer found in consumer "${consumer.name}"`);
93
+ if (consumer.producers.length > 0) {
94
+ const withJoins = consumer.producers.filter(x => (Algo_1.default.hasVal(x.joins) && x.joins.length > 0) || !x.union);
95
+ const withUnions = consumer.producers.filter(x => x.union === true);
96
+ if (withJoins.length > 0 && withUnions.length)
97
+ errors.push(`Multiple producers in consumer have mixed "joins" and "union": you can either have multiple producers with "joins" or multiple producers with "union", but not both (joins: ${withJoins.map(x => x.name).join(', ')}; unions: ${withUnions.map(x => x.name).join(', ')})`);
98
+ }
93
99
  // Validation on sources
94
100
  const sources = producers.map(x => Environment_1.default.getSource(x.source));
95
101
  if (sources.length === 0)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "0.0.50-nasco.3",
3
+ "version": "0.0.51-nasco.3",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,