@forzalabs/remora 0.0.49-nasco.3 → 0.0.51-nasco.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +1 -1
- package/core/Algo.js +2 -1
- package/definitions/json_schemas/consumer-schema.json +4 -0
- package/engines/consumer/PostProcessor.js +1 -35
- package/engines/dataset/Dataset.js +48 -60
- package/engines/dataset/DatasetRecord.js +35 -40
- package/engines/execution/ExecutionEnvironment.js +2 -24
- package/engines/transform/JoinEngine.js +17 -0
- package/engines/validation/Validator.js +6 -0
- package/package.json +2 -1
package/Constants.js
CHANGED
package/core/Algo.js
CHANGED
|
@@ -150,6 +150,7 @@ const algo = {
|
|
|
150
150
|
(0, Affirm_1.default)(arr.length > 0, 'Array must be non-empty');
|
|
151
151
|
return Math.max(...arr);
|
|
152
152
|
},
|
|
153
|
-
replaceAll: (text, search, replace) => text.replace(new RegExp(search, 'g'), replace)
|
|
153
|
+
replaceAll: (text, search, replace) => text.replace(new RegExp(search, 'g'), replace),
|
|
154
|
+
deepClone: (data) => JSON.parse(JSON.stringify(data))
|
|
154
155
|
};
|
|
155
156
|
exports.default = algo;
|
|
@@ -79,6 +79,10 @@
|
|
|
79
79
|
"code"
|
|
80
80
|
],
|
|
81
81
|
"additionalProperties": false
|
|
82
|
+
},
|
|
83
|
+
"union": {
|
|
84
|
+
"type": "boolean",
|
|
85
|
+
"description": "Merges the data from the various producers in a single dataset. They must have the same output dimensions. If true, then you can't set any joins on any producer, since all producers are merged in a single dataset."
|
|
82
86
|
}
|
|
83
87
|
},
|
|
84
88
|
"required": [
|
|
@@ -33,9 +33,7 @@ class PostProcessorClass {
|
|
|
33
33
|
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
34
34
|
(0, Affirm_1.default)(dataset, 'Invalid dataset');
|
|
35
35
|
const fields = ConsumerManager_1.default.getExpandedFields(consumer);
|
|
36
|
-
let newDataset = yield
|
|
37
|
-
newDataset = this.updateDimensions(newDataset, consumer);
|
|
38
|
-
newDataset = yield this.reorderDimensions(newDataset, consumer);
|
|
36
|
+
let newDataset = yield dataset.wholeUpdateDimensions(fields);
|
|
39
37
|
newDataset = yield newDataset.map(record => {
|
|
40
38
|
var _a, _b;
|
|
41
39
|
for (const field of fields) {
|
|
@@ -53,38 +51,6 @@ class PostProcessorClass {
|
|
|
53
51
|
});
|
|
54
52
|
return newDataset;
|
|
55
53
|
});
|
|
56
|
-
this.updateDimensions = (dataset, consumer) => {
|
|
57
|
-
const fields = ConsumerManager_1.default.getExpandedFields(consumer);
|
|
58
|
-
dataset.updateDimensions(fields);
|
|
59
|
-
return dataset;
|
|
60
|
-
};
|
|
61
|
-
this.dropDimensions = (dataset, consumer) => __awaiter(this, void 0, void 0, function* () {
|
|
62
|
-
const initialDimensions = dataset.getDimensions();
|
|
63
|
-
const fields = ConsumerManager_1.default.getExpandedFields(consumer);
|
|
64
|
-
const dimensionsToKeep = new Set();
|
|
65
|
-
// First, identify which dimensions the consumer actually wants
|
|
66
|
-
for (const field of fields) {
|
|
67
|
-
const { key } = field.cField;
|
|
68
|
-
dimensionsToKeep.add(key);
|
|
69
|
-
}
|
|
70
|
-
// Create a copy to iterate over since we'll be modifying the original
|
|
71
|
-
const dimensionsCopy = [...initialDimensions];
|
|
72
|
-
const dimensionsToDrop = [];
|
|
73
|
-
for (const dim of dimensionsCopy) {
|
|
74
|
-
if (!dimensionsToKeep.has(dim.name)) {
|
|
75
|
-
// This dimension is not wanted by the consumer, collect it for dropping
|
|
76
|
-
dimensionsToDrop.push(dim.name);
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
// Drop all unwanted dimensions in a single optimized operation
|
|
80
|
-
if (dimensionsToDrop.length > 0)
|
|
81
|
-
yield dataset.dropDimensions(dimensionsToDrop);
|
|
82
|
-
return dataset;
|
|
83
|
-
});
|
|
84
|
-
this.reorderDimensions = (dataset, consumer) => __awaiter(this, void 0, void 0, function* () {
|
|
85
|
-
const fields = ConsumerManager_1.default.getExpandedFields(consumer);
|
|
86
|
-
return yield dataset.reorderDimensions(fields.map(x => { var _a; return ((_a = x.cField.alias) !== null && _a !== void 0 ? _a : x.cField.key); }));
|
|
87
|
-
});
|
|
88
54
|
/**
|
|
89
55
|
* Gets an array of objects (with potentially nested fields) and unpacks them to an array of objects with no nested fields
|
|
90
56
|
* If some nested keys are lists, then a logic similar to a SQL JOIN is used and rows are duplicated
|
|
@@ -34,6 +34,7 @@ const Helper_1 = __importDefault(require("../../helper/Helper"));
|
|
|
34
34
|
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
35
35
|
class Dataset {
|
|
36
36
|
constructor(name, file, batchSize = Constants_1.default.defaults.MAX_ITEMS_IN_MEMORY) {
|
|
37
|
+
this._pipeline = [];
|
|
37
38
|
this.getPath = () => this._path;
|
|
38
39
|
this.setPath = (path) => {
|
|
39
40
|
this._path = path;
|
|
@@ -122,13 +123,14 @@ class Dataset {
|
|
|
122
123
|
const rl = (0, readline_1.createInterface)({ input: readStream, crlfDelay: Infinity });
|
|
123
124
|
let batch = [];
|
|
124
125
|
let lineCount = 0;
|
|
126
|
+
const dimensions = Algo_1.default.deepClone(this._dimensions);
|
|
125
127
|
try {
|
|
126
128
|
for (var _d = true, rl_1 = __asyncValues(rl), rl_1_1; rl_1_1 = yield rl_1.next(), _a = rl_1_1.done, !_a; _d = true) {
|
|
127
129
|
_c = rl_1_1.value;
|
|
128
130
|
_d = false;
|
|
129
131
|
const line = _c;
|
|
130
132
|
try {
|
|
131
|
-
const record = new DatasetRecord_1.default(line,
|
|
133
|
+
const record = new DatasetRecord_1.default(line, dimensions, this._delimiter);
|
|
132
134
|
batch.push(record);
|
|
133
135
|
lineCount++;
|
|
134
136
|
if (batch.length >= this._batchSize) {
|
|
@@ -520,6 +522,7 @@ class Dataset {
|
|
|
520
522
|
if (batch.length > 0) {
|
|
521
523
|
yield processor(batch, batchIndex);
|
|
522
524
|
}
|
|
525
|
+
this._iterations++;
|
|
523
526
|
this._finishOperation('stream-batches');
|
|
524
527
|
});
|
|
525
528
|
/**
|
|
@@ -679,68 +682,52 @@ class Dataset {
|
|
|
679
682
|
return this;
|
|
680
683
|
});
|
|
681
684
|
this.getDimensions = () => this._dimensions;
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
}
|
|
685
|
+
/**
|
|
686
|
+
* - remove dimension
|
|
687
|
+
* - rename a dimension
|
|
688
|
+
* - change hidden flag
|
|
689
|
+
* - move a dimension
|
|
690
|
+
*/
|
|
691
|
+
this.wholeUpdateDimensions = (fields) => __awaiter(this, void 0, void 0, function* () {
|
|
692
|
+
var _a;
|
|
693
|
+
let updates = [];
|
|
694
|
+
// Add all the updates
|
|
695
|
+
for (let i = 0; i < fields.length; i++) {
|
|
696
|
+
const { cField } = fields[i];
|
|
697
|
+
const currentMatch = structuredClone(this._dimensions.find(x => x.name === cField.key));
|
|
698
|
+
updates.push({
|
|
699
|
+
currentDimension: currentMatch,
|
|
700
|
+
newName: (_a = cField.alias) !== null && _a !== void 0 ? _a : cField.key,
|
|
701
|
+
newHidden: cField.hidden,
|
|
702
|
+
newPosition: i,
|
|
703
|
+
toDelete: false
|
|
704
|
+
});
|
|
702
705
|
}
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
this._dimensions = this._dimensions
|
|
716
|
-
.filter(x => !dimensionNames.includes(x.name))
|
|
717
|
-
.map((x, i) => (Object.assign(Object.assign({}, x), { index: i })));
|
|
718
|
-
this._finishOperation('drop-dimensions');
|
|
719
|
-
return this;
|
|
720
|
-
});
|
|
721
|
-
this.reorderDimensions = (dimensionNames) => __awaiter(this, void 0, void 0, function* () {
|
|
722
|
-
if (dimensionNames.length === 0)
|
|
706
|
+
// Add all the updates to remove dimensions
|
|
707
|
+
for (const dim of this._dimensions) {
|
|
708
|
+
if (!updates.find(x => { var _a; return ((_a = x.currentDimension) === null || _a === void 0 ? void 0 : _a.name) === dim.name; }))
|
|
709
|
+
updates.push({ currentDimension: dim, toDelete: true });
|
|
710
|
+
}
|
|
711
|
+
// Now keep only the updates that actually change something
|
|
712
|
+
updates = updates.filter(x => x.toDelete
|
|
713
|
+
|| !x.currentDimension
|
|
714
|
+
|| (x.currentDimension && (x.currentDimension.name !== x.newName
|
|
715
|
+
|| (Algo_1.default.hasVal(x.newHidden) && x.newHidden !== x.currentDimension.hidden)
|
|
716
|
+
|| x.newPosition !== x.currentDimension.index)));
|
|
717
|
+
if (updates.length === 0)
|
|
723
718
|
return this;
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
const dim = this._dimensions.find(x => x.name === name);
|
|
734
|
-
const newDim = structuredClone(dim);
|
|
735
|
-
newDim.index = index;
|
|
736
|
-
return { newDimension: newDim, oldDimension: dim };
|
|
719
|
+
let updatedDimensions = null;
|
|
720
|
+
const newDataset = yield this.map(record => {
|
|
721
|
+
for (const update of updates) {
|
|
722
|
+
record.wholeUpdateDimension(update);
|
|
723
|
+
}
|
|
724
|
+
record._dimensions.sort((a, b) => a.index - b.index);
|
|
725
|
+
if (!updatedDimensions)
|
|
726
|
+
updatedDimensions = record._dimensions;
|
|
727
|
+
return record;
|
|
737
728
|
});
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
// Reorder the data in the file using streaming approach
|
|
741
|
-
yield this.map(record => record.reorderDimensions(movements));
|
|
742
|
-
this._finishOperation('reorder-dimensions');
|
|
743
|
-
return this;
|
|
729
|
+
this._dimensions = updatedDimensions;
|
|
730
|
+
return newDataset;
|
|
744
731
|
});
|
|
745
732
|
this.print = (...args_1) => __awaiter(this, [...args_1], void 0, function* (count = 3, full = false) {
|
|
746
733
|
console.log(`DS ${this._name} (${this._size} | ${this._iterations})`);
|
|
@@ -862,6 +849,7 @@ class Dataset {
|
|
|
862
849
|
this._size = 0;
|
|
863
850
|
this._iterations = 0;
|
|
864
851
|
this._operations = [];
|
|
852
|
+
this._pipeline = [];
|
|
865
853
|
const datasetName = this._name
|
|
866
854
|
.replace(/[^a-zA-Z0-9_-]/g, '_')
|
|
867
855
|
.replace(/_{2,}/g, '_')
|
|
@@ -6,44 +6,45 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
6
6
|
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
7
7
|
class DatasetRecord {
|
|
8
8
|
constructor(row, dimensions, delimiter) {
|
|
9
|
+
this.parse = (row, delimiter, dimensions) => {
|
|
10
|
+
if (!this.isEmpty() && dimensions.length > 0) {
|
|
11
|
+
const parts = row.split(delimiter);
|
|
12
|
+
for (let i = 0; i < dimensions.length; i++) {
|
|
13
|
+
const dim = dimensions[i];
|
|
14
|
+
this._value[dim.name] = parts[i];
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
};
|
|
9
18
|
this.stringify = () => this._dimensions.map(x => this._value[x.name]).join(this._delimiter);
|
|
10
19
|
this.isEmpty = () => { var _a; return ((_a = this._row) === null || _a === void 0 ? void 0 : _a.trim().length) === 0; };
|
|
11
20
|
this.getRaw = () => this._row;
|
|
12
21
|
this.getValue = (dimension) => this._value[dimension];
|
|
13
22
|
this.setValue = (dimension, value) => this._value[dimension] = value;
|
|
14
|
-
this.
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
// Add the dimension with its new index
|
|
39
|
-
this._dimensions.push(newDimension);
|
|
40
|
-
// Remap the value from the old position to the new dimension name
|
|
41
|
-
if (parts.length > oldDimension.index) {
|
|
42
|
-
newValue[newDimension.name] = parts[oldDimension.index];
|
|
43
|
-
}
|
|
23
|
+
this.wholeUpdateDimension = (update) => {
|
|
24
|
+
var _a;
|
|
25
|
+
if (update.toDelete) {
|
|
26
|
+
// To remove
|
|
27
|
+
delete this._value[update.currentDimension.name];
|
|
28
|
+
this._dimensions = this._dimensions.filter(x => x.key !== update.currentDimension.name);
|
|
29
|
+
}
|
|
30
|
+
else if (!update.currentDimension) {
|
|
31
|
+
// To create (at the right position)
|
|
32
|
+
const newDimension = { index: update.newPosition, key: update.newName, name: update.newName, hidden: update.newHidden };
|
|
33
|
+
this._value[newDimension.name] = null;
|
|
34
|
+
this._dimensions = [...this._dimensions, newDimension];
|
|
35
|
+
}
|
|
36
|
+
else {
|
|
37
|
+
// Change: name, hidden, position
|
|
38
|
+
const index = this._dimensions.findIndex(x => x.key === update.currentDimension.name);
|
|
39
|
+
const currentDim = this._dimensions[index];
|
|
40
|
+
const updatedDim = { name: update.newName, key: (_a = currentDim.key) !== null && _a !== void 0 ? _a : update.newName, hidden: update.newHidden, index: update.newPosition };
|
|
41
|
+
this._value[updatedDim.name] = this._value[currentDim.name];
|
|
42
|
+
if (updatedDim.name !== currentDim.name)
|
|
43
|
+
delete this._value[currentDim.name];
|
|
44
|
+
const newDimensions = [...this._dimensions];
|
|
45
|
+
newDimensions.splice(index, 1, updatedDim);
|
|
46
|
+
this._dimensions = newDimensions;
|
|
44
47
|
}
|
|
45
|
-
// Update the value mapping
|
|
46
|
-
this._value = newValue;
|
|
47
48
|
return this;
|
|
48
49
|
};
|
|
49
50
|
this.toJSON = () => {
|
|
@@ -70,16 +71,10 @@ class DatasetRecord {
|
|
|
70
71
|
return line;
|
|
71
72
|
};
|
|
72
73
|
this._row = row;
|
|
73
|
-
this._dimensions = dimensions
|
|
74
|
+
this._dimensions = dimensions;
|
|
74
75
|
this._delimiter = delimiter;
|
|
75
76
|
this._value = {};
|
|
76
|
-
|
|
77
|
-
const parts = row.split(delimiter);
|
|
78
|
-
for (let i = 0; i < dimensions.length; i++) {
|
|
79
|
-
const dim = dimensions[i];
|
|
80
|
-
this._value[dim.name] = parts[i];
|
|
81
|
-
}
|
|
82
|
-
}
|
|
77
|
+
this.parse(row, delimiter, this._dimensions);
|
|
83
78
|
}
|
|
84
79
|
}
|
|
85
80
|
exports.default = DatasetRecord;
|
|
@@ -17,7 +17,6 @@ const DriverFactory_1 = __importDefault(require("../../drivers/DriverFactory"));
|
|
|
17
17
|
const ConsumerEngine_1 = __importDefault(require("../consumer/ConsumerEngine"));
|
|
18
18
|
const PostProcessor_1 = __importDefault(require("../consumer/PostProcessor"));
|
|
19
19
|
const FileExporter_1 = __importDefault(require("../file/FileExporter"));
|
|
20
|
-
const ProducerEngine_1 = __importDefault(require("../producer/ProducerEngine"));
|
|
21
20
|
const SQLBuilder_1 = __importDefault(require("../sql/SQLBuilder"));
|
|
22
21
|
const SQLCompiler_1 = __importDefault(require("../sql/SQLCompiler"));
|
|
23
22
|
const ExecutionPlanner_1 = __importDefault(require("./ExecutionPlanner"));
|
|
@@ -70,27 +69,6 @@ class ExecutionEnvironment {
|
|
|
70
69
|
this._storeIntermidiate(planStep, dataset);
|
|
71
70
|
break;
|
|
72
71
|
}
|
|
73
|
-
case 'read-file-whole': {
|
|
74
|
-
(0, Affirm_1.default)(planStep.producer, `Invalid producer in read-file-whole step`);
|
|
75
|
-
const fileData = yield ProducerEngine_1.default.readFile(planStep.producer, { readmode: 'all' });
|
|
76
|
-
this._storeIntermidiate(planStep, fileData.dataset);
|
|
77
|
-
break;
|
|
78
|
-
}
|
|
79
|
-
case 'read-file-lines': {
|
|
80
|
-
(0, Affirm_1.default)(planStep.lines, `Invalid lines in read-file-lines step`);
|
|
81
|
-
(0, Affirm_1.default)(planStep.producer, `Invalid producer in read-file-lines step`);
|
|
82
|
-
const { producer, lines: { from, to } } = planStep;
|
|
83
|
-
const fileData = yield ProducerEngine_1.default.readFile(producer, { readmode: 'lines', lines: { from, to } });
|
|
84
|
-
this._storeIntermidiate(planStep, fileData.dataset);
|
|
85
|
-
break;
|
|
86
|
-
}
|
|
87
|
-
case 'download-file-locally': {
|
|
88
|
-
(0, Affirm_1.default)(planStep.producer, `Invalid producer in download-file-locally step`);
|
|
89
|
-
const { producer } = planStep;
|
|
90
|
-
const readRes = yield ProducerEngine_1.default.readFile(producer, { readmode: 'download' });
|
|
91
|
-
this._storeIntermidiate(planStep, readRes.dataset);
|
|
92
|
-
break;
|
|
93
|
-
}
|
|
94
72
|
case 'load-dataset': {
|
|
95
73
|
(0, Affirm_1.default)(planStep.producer, `Invalid producer in read-file-lines step`);
|
|
96
74
|
const { producer } = planStep;
|
|
@@ -171,14 +149,14 @@ class ExecutionEnvironment {
|
|
|
171
149
|
}
|
|
172
150
|
default: throw new Error(`Invalid execution plan step type "${planStep.type}"`);
|
|
173
151
|
}
|
|
174
|
-
Logger_1.default.log(
|
|
152
|
+
Logger_1.default.log(`\tCompleted step: ${planStep.type}`);
|
|
175
153
|
}
|
|
176
154
|
}
|
|
177
155
|
catch (error) {
|
|
178
156
|
const ds = (_a = this._resultingDataset) !== null && _a !== void 0 ? _a : this._getIntermidiate(currentStep);
|
|
179
157
|
if (ds)
|
|
180
158
|
Logger_1.default.log(`Failed execution of consumer at step ${currentStep.type}:\n\tSize: ${ds.getSize()}\n\tCycles: ${ds.getCycles()}\n\tOperations: ${Logger_1.default.formatList(ds.getOperations())}`);
|
|
181
|
-
Logger_1.default.log(
|
|
159
|
+
Logger_1.default.log(`\tFailed step: ${currentStep.type}->\n\t${error}`);
|
|
182
160
|
throw error;
|
|
183
161
|
}
|
|
184
162
|
Logger_1.default.log(`Completed execution of consumer:\n\tSize: ${result._stats.size}\n\tCycles: ${result._stats.cycles}\n\tTime: ${result._stats.elapsedMS}\n\tOperations: ${Logger_1.default.formatList(result._stats.operations)}`);
|
|
@@ -92,6 +92,8 @@ class JoinEngineClass {
|
|
|
92
92
|
(0, Affirm_1.default)(producedData, 'Invalid produced data');
|
|
93
93
|
if (consumer.producers.length <= 1)
|
|
94
94
|
return this.findProducerData(consumer.producers[0].name, producedData);
|
|
95
|
+
if (consumer.producers.some(x => x.union))
|
|
96
|
+
return yield this.union(consumer, producedData);
|
|
95
97
|
const consumerShape = ConsumerEngine_1.default.getOutputShape(consumer);
|
|
96
98
|
const consumerColumns = ConsumerEngine_1.default.compile(consumer);
|
|
97
99
|
// Create a new dataset for the joined result
|
|
@@ -132,6 +134,21 @@ class JoinEngineClass {
|
|
|
132
134
|
}
|
|
133
135
|
return resultDataset;
|
|
134
136
|
});
|
|
137
|
+
this.union = (consumer, producedData) => __awaiter(this, void 0, void 0, function* () {
|
|
138
|
+
const getDimensionsKey = (ds) => ds.getDimensions().map(x => x.name.trim()).join(';').trim();
|
|
139
|
+
const mainDataset = producedData[0].dataset;
|
|
140
|
+
const mainDimKey = getDimensionsKey(mainDataset);
|
|
141
|
+
const otherProducedData = producedData.slice(1);
|
|
142
|
+
for (const prodData of otherProducedData) {
|
|
143
|
+
const prodDimKey = getDimensionsKey(prodData.dataset);
|
|
144
|
+
if (mainDimKey !== prodDimKey)
|
|
145
|
+
throw new Error(`On consumer "${consumer.name}", can't union the dataset "${prodData.dataset['_name']}" (producer: ${prodData.producerKey}) because the dimensions are different from the main dataset "${mainDataset['_name']}" (producer: ${producedData[0].producerKey}). "${mainDimKey}" != "${prodDimKey}"`);
|
|
146
|
+
yield prodData.dataset.streamBatches((batch) => __awaiter(this, void 0, void 0, function* () {
|
|
147
|
+
yield mainDataset.append(batch);
|
|
148
|
+
}));
|
|
149
|
+
}
|
|
150
|
+
return mainDataset;
|
|
151
|
+
});
|
|
135
152
|
this.performStreamingJoin = (leftDataset, rightLookup, condition, relationship, consumerColumns, resultDataset) => __awaiter(this, void 0, void 0, function* () {
|
|
136
153
|
const joinedRecords = [];
|
|
137
154
|
const batchSize = leftDataset.getBatchSize();
|
|
@@ -90,6 +90,12 @@ class ValidatorClass {
|
|
|
90
90
|
errors.push('No producers found');
|
|
91
91
|
if (producers.some(x => !x))
|
|
92
92
|
errors.push(`Invalid producer found in consumer "${consumer.name}"`);
|
|
93
|
+
if (consumer.producers.length > 0) {
|
|
94
|
+
const withJoins = consumer.producers.filter(x => (Algo_1.default.hasVal(x.joins) && x.joins.length > 0) || !x.union);
|
|
95
|
+
const withUnions = consumer.producers.filter(x => x.union === true);
|
|
96
|
+
if (withJoins.length > 0 && withUnions.length)
|
|
97
|
+
errors.push(`Multiple producers in consumer have mixed "joins" and "union": you can either have multiple producers with "joins" or multiple producers with "union", but not both (joins: ${withJoins.map(x => x.name).join(', ')}; unions: ${withUnions.map(x => x.name).join(', ')})`);
|
|
98
|
+
}
|
|
93
99
|
// Validation on sources
|
|
94
100
|
const sources = producers.map(x => Environment_1.default.getSource(x.source));
|
|
95
101
|
if (sources.length === 0)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@forzalabs/remora",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.51-nasco.3",
|
|
4
4
|
"description": "A powerful CLI tool for seamless data translation.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"private": false,
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
},
|
|
10
10
|
"scripts": {
|
|
11
11
|
"sync": "cd ../dev_ops && npm run sync",
|
|
12
|
+
"dev": "clear && npx tsx scripts/dev.ts",
|
|
12
13
|
"tsc-check": "npx tsc --noemit",
|
|
13
14
|
"init": "npx tsx ./src/index.ts init",
|
|
14
15
|
"version": "npx tsx ./src/index.ts -v",
|