@forzalabs/remora 0.0.23 → 0.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +1 -1
- package/actions/run.js +10 -3
- package/engines/ParseManager.js +9 -6
- package/engines/ProducerEngine.js +2 -2
- package/engines/ai/DeveloperEngine.js +1 -1
- package/engines/consumer/PostProcessor.js +9 -3
- package/engines/execution/ExecutionEnvironment.js +53 -24
- package/engines/execution/ExecutionPlanner.js +58 -33
- package/engines/transform/JoinEngine.js +149 -0
- package/engines/transform/TypeCaster.js +6 -0
- package/package.json +2 -1
package/Constants.js
CHANGED
package/actions/run.js
CHANGED
|
@@ -26,9 +26,16 @@ const run = (consumerName) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
26
26
|
console.log();
|
|
27
27
|
const spinner = (0, ora_1.default)(chalk_1.default.blue('Running consumer(s)...\n')).start();
|
|
28
28
|
const user = UserManager_1.default.getUser();
|
|
29
|
-
const consumersToExecute =
|
|
30
|
-
|
|
31
|
-
|
|
29
|
+
const consumersToExecute = [];
|
|
30
|
+
if (consumerName && consumerName.length > 0) {
|
|
31
|
+
const cons = Environment_1.default.getConsumer(consumerName);
|
|
32
|
+
if (!cons)
|
|
33
|
+
throw new Error(`Consumer with name "${consumerName}" was not found.`);
|
|
34
|
+
consumersToExecute.push(cons);
|
|
35
|
+
}
|
|
36
|
+
else {
|
|
37
|
+
consumersToExecute.push(...Environment_1.default._env.consumers);
|
|
38
|
+
}
|
|
32
39
|
const results = [];
|
|
33
40
|
for (let i = 0; i < consumersToExecute.length; i++) {
|
|
34
41
|
const consumer = consumersToExecute[i];
|
package/engines/ParseManager.js
CHANGED
|
@@ -11,16 +11,16 @@ class ParseManagerClass {
|
|
|
11
11
|
this.csvToJson = (csv, producer) => {
|
|
12
12
|
(0, Affirm_1.default)(csv, 'Invalid csv content');
|
|
13
13
|
Affirm_1.default.hasValue(csv.length, 'Invalid csv content length');
|
|
14
|
-
|
|
15
|
-
return this.csvLinesToJson(fileRows, producer);
|
|
14
|
+
return this.csvLinesToJson(csv, producer);
|
|
16
15
|
};
|
|
17
|
-
this.csvLinesToJson = (lines, producer) => {
|
|
16
|
+
this.csvLinesToJson = (lines, producer, discover) => {
|
|
18
17
|
var _a;
|
|
19
18
|
(0, Affirm_1.default)(lines, 'Invalid csv lines');
|
|
20
19
|
Affirm_1.default.hasValue(lines.length, 'Invalid csv lines length');
|
|
21
20
|
const delimiterChar = (_a = producer.settings.delimiter) !== null && _a !== void 0 ? _a : ',';
|
|
21
|
+
const headerRow = lines[0];
|
|
22
22
|
const rows = lines.slice(1).map(x => x.split(delimiterChar).map(k => k.trim()));
|
|
23
|
-
const headerColumns = this._extractHeader(
|
|
23
|
+
const headerColumns = this._extractHeader(headerRow, delimiterChar, producer, discover);
|
|
24
24
|
const result = [];
|
|
25
25
|
for (const row of rows) {
|
|
26
26
|
const rowObject = {};
|
|
@@ -32,14 +32,17 @@ class ParseManagerClass {
|
|
|
32
32
|
}
|
|
33
33
|
return result;
|
|
34
34
|
};
|
|
35
|
-
this._extractHeader = (headerLine, delimiter, producer) => {
|
|
35
|
+
this._extractHeader = (headerLine, delimiter, producer, discover) => {
|
|
36
36
|
var _a;
|
|
37
|
-
(0, Affirm_1.default)(headerLine,
|
|
37
|
+
(0, Affirm_1.default)(headerLine, `Invalid CSV header line for producer "${producer.name}"`);
|
|
38
38
|
(0, Affirm_1.default)(delimiter, 'Invalid CSV delimiter');
|
|
39
39
|
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
40
40
|
const source = Environment_1.default.getSource(producer.source);
|
|
41
41
|
const columns = FileCompiler_1.default.compileProducer(producer, source);
|
|
42
42
|
const headerColumns = headerLine.split(delimiter).map(x => x.trim());
|
|
43
|
+
// If I'm discovering the file, then it means that the dimensions are not set, so I use the ones that I get from the file directly
|
|
44
|
+
if (discover)
|
|
45
|
+
columns.push(...headerColumns.map(x => ({ nameInProducer: x })));
|
|
43
46
|
const csvColumns = [];
|
|
44
47
|
for (const pColumn of columns) {
|
|
45
48
|
const columnKey = (_a = pColumn.aliasInProducer) !== null && _a !== void 0 ? _a : pColumn.nameInProducer;
|
|
@@ -111,7 +111,7 @@ class ProducerEngineClass {
|
|
|
111
111
|
throw new Error(`Invalid file type "${producer.settings.fileType}" for engine type "${source.engine}" for producer "${producer.name}": not supported`);
|
|
112
112
|
}
|
|
113
113
|
});
|
|
114
|
-
this.readSampleData = (producer_1, ...args_1) => __awaiter(this, [producer_1, ...args_1], void 0, function* (producer, sampleSize = 10) {
|
|
114
|
+
this.readSampleData = (producer_1, ...args_1) => __awaiter(this, [producer_1, ...args_1], void 0, function* (producer, sampleSize = 10, discover = false) {
|
|
115
115
|
var _a, _b, _c;
|
|
116
116
|
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
117
117
|
(0, Affirm_1.default)(sampleSize > 0, 'Sample size must be greater than 0');
|
|
@@ -132,7 +132,7 @@ class ProducerEngineClass {
|
|
|
132
132
|
case 'aws-s3': {
|
|
133
133
|
const fileData = yield this.readFile(producer, { readmode: 'lines', lines: { from: 0, to: sampleSize } });
|
|
134
134
|
if (((_a = producer.settings.fileType) === null || _a === void 0 ? void 0 : _a.toUpperCase()) === 'CSV') {
|
|
135
|
-
sampleData = ParseManager_1.default.csvLinesToJson(fileData.data, producer);
|
|
135
|
+
sampleData = ParseManager_1.default.csvLinesToJson(fileData.data, producer, discover);
|
|
136
136
|
}
|
|
137
137
|
else if (((_b = producer.settings.fileType) === null || _b === void 0 ? void 0 : _b.toUpperCase()) === 'JSON' || ((_c = producer.settings.fileType) === null || _c === void 0 ? void 0 : _c.toUpperCase()) === 'JSONL') {
|
|
138
138
|
// With JSON or JSONL the readFile function already parses the strings
|
|
@@ -21,7 +21,7 @@ class DeveloperEngineClass {
|
|
|
21
21
|
this.discover = (producer) => __awaiter(this, void 0, void 0, function* () {
|
|
22
22
|
var _a;
|
|
23
23
|
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
24
|
-
const sampleData = yield ProducerEngine_1.default.readSampleData(producer);
|
|
24
|
+
const sampleData = yield ProducerEngine_1.default.readSampleData(producer, 10, true);
|
|
25
25
|
(0, Affirm_1.default)(sampleData, 'Discover process failed: no result found');
|
|
26
26
|
const typeDefinitions = this.extractFieldTypes(sampleData);
|
|
27
27
|
const mappedProducer = {
|
|
@@ -145,10 +145,16 @@ class PostProcessorClass {
|
|
|
145
145
|
return unpackedData;
|
|
146
146
|
};
|
|
147
147
|
this._getFieldValue = (record, field) => {
|
|
148
|
+
var _a, _b, _c;
|
|
148
149
|
const fieldValue = record[field.cField.key];
|
|
149
|
-
if (Algo_1.default.hasVal(fieldValue))
|
|
150
|
-
|
|
151
|
-
|
|
150
|
+
if (Algo_1.default.hasVal(fieldValue) && !isNaN(fieldValue)) {
|
|
151
|
+
const fieldType = (_b = (_a = field.dimension) === null || _a === void 0 ? void 0 : _a.type) !== null && _b !== void 0 ? _b : 'string';
|
|
152
|
+
if (fieldType === 'number' && typeof fieldValue === 'string' && fieldValue.length === 0)
|
|
153
|
+
return (_c = field.cField.default) !== null && _c !== void 0 ? _c : fieldValue;
|
|
154
|
+
else
|
|
155
|
+
return fieldValue;
|
|
156
|
+
}
|
|
157
|
+
else if ((!Algo_1.default.hasVal(fieldValue) || isNaN(fieldValue)) && Algo_1.default.hasVal(field.cField.default))
|
|
152
158
|
return field.cField.default;
|
|
153
159
|
else
|
|
154
160
|
return fieldValue;
|
|
@@ -24,6 +24,7 @@ const SQLCompiler_1 = __importDefault(require("../sql/SQLCompiler"));
|
|
|
24
24
|
const ExecutionPlanner_1 = __importDefault(require("./ExecutionPlanner"));
|
|
25
25
|
const RequestExecutor_1 = __importDefault(require("./RequestExecutor"));
|
|
26
26
|
const TransformationEngine_1 = __importDefault(require("../transform/TransformationEngine"));
|
|
27
|
+
const JoinEngine_1 = __importDefault(require("../transform/JoinEngine"));
|
|
27
28
|
class ExecutionEnvironment {
|
|
28
29
|
constructor(consumer) {
|
|
29
30
|
this.run = (options) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -37,28 +38,27 @@ class ExecutionEnvironment {
|
|
|
37
38
|
switch (planStep.type) {
|
|
38
39
|
case 'compile-consumer-to-SQL': {
|
|
39
40
|
const sql = SQLCompiler_1.default.getConsumerReference(this._consumer);
|
|
40
|
-
this.
|
|
41
|
-
this.
|
|
41
|
+
this._envData.consumerSQL = sql;
|
|
42
|
+
this._envData.finalSQL = sql;
|
|
42
43
|
break;
|
|
43
44
|
}
|
|
44
45
|
case 'compile-execution-request-to-SQL': {
|
|
45
46
|
const sql = SQLBuilder_1.default.buildConsumerQuery(options);
|
|
46
|
-
this.
|
|
47
|
-
this.
|
|
47
|
+
this._envData.executionRequestSQL = sql;
|
|
48
|
+
this._envData.finalSQL = `WITH consumer AS (${this._envData.consumerSQL})\nSELECT * FROM consumer${this._envData.executionRequestSQL}`;
|
|
48
49
|
break;
|
|
49
50
|
}
|
|
50
51
|
case 'execute-SQL': {
|
|
51
52
|
(0, Affirm_1.default)(planStep.source, `Invalid source in execute-SQL step`);
|
|
52
53
|
const driver = yield DriverFactory_1.default.instantiateSource(planStep.source);
|
|
53
|
-
|
|
54
|
-
this.
|
|
54
|
+
const queryData = (yield driver.query(this._envData.finalSQL)).rows;
|
|
55
|
+
this._storeIntermidiate(planStep, queryData);
|
|
55
56
|
break;
|
|
56
57
|
}
|
|
57
58
|
case 'read-file-whole': {
|
|
58
59
|
(0, Affirm_1.default)(planStep.producer, `Invalid producer in read-file-whole step`);
|
|
59
60
|
const fileData = yield ProducerEngine_1.default.readFile(planStep.producer, { readmode: 'all' });
|
|
60
|
-
this.
|
|
61
|
-
this._fetchedDataType = fileData.dataType;
|
|
61
|
+
this._storeIntermidiate(planStep, fileData.data);
|
|
62
62
|
break;
|
|
63
63
|
}
|
|
64
64
|
case 'read-file-lines': {
|
|
@@ -66,56 +66,85 @@ class ExecutionEnvironment {
|
|
|
66
66
|
(0, Affirm_1.default)(planStep.producer, `Invalid producer in read-file-lines step`);
|
|
67
67
|
const { producer, lines: { from, to } } = planStep;
|
|
68
68
|
const fileData = yield ProducerEngine_1.default.readFile(producer, { readmode: 'lines', lines: { from, to } });
|
|
69
|
-
this.
|
|
70
|
-
this._fetchedDataType = fileData.dataType;
|
|
69
|
+
this._storeIntermidiate(planStep, fileData.data);
|
|
71
70
|
break;
|
|
72
71
|
}
|
|
73
72
|
case 'nested-field-unpacking': {
|
|
74
73
|
(0, Affirm_1.default)(planStep.producer, `Invalid producer in nested-field-unpacking step`);
|
|
75
|
-
|
|
74
|
+
const unpackedData = PostProcessor_1.default.unpack(this._resultingData, planStep.producer);
|
|
75
|
+
this._storeIntermidiate(planStep, unpackedData);
|
|
76
76
|
break;
|
|
77
77
|
}
|
|
78
78
|
case 'post-process-json': {
|
|
79
|
-
this._fetchedData =
|
|
79
|
+
// this._fetchedData = PostProcessor.doProjection(this._consumer, this._fetchedData)
|
|
80
|
+
const myProdData = this._getIntermidiate(planStep);
|
|
81
|
+
const processedData = PostProcessor_1.default.doProjection(this._consumer, myProdData);
|
|
82
|
+
this._storeIntermidiate(planStep, processedData);
|
|
80
83
|
break;
|
|
81
84
|
}
|
|
82
85
|
case 'csv-to-json': {
|
|
83
|
-
(0, Affirm_1.default)(this.
|
|
84
|
-
(0, Affirm_1.default)(Array.isArray(this.
|
|
86
|
+
(0, Affirm_1.default)(this._resultingData, 'Invalid data');
|
|
87
|
+
(0, Affirm_1.default)(Array.isArray(this._resultingData), 'Invalid data type, must be an array');
|
|
85
88
|
(0, Affirm_1.default)(planStep.producer, `Invalid producer in csv-to-json step`);
|
|
86
|
-
const csv = this.
|
|
87
|
-
this._fetchedData =
|
|
89
|
+
const csv = this._getIntermidiate(planStep);
|
|
90
|
+
// this._fetchedData = ParseManager.csvToJson(csv, planStep.producer)
|
|
91
|
+
const jsonData = ParseManager_1.default.csvToJson(csv, planStep.producer);
|
|
92
|
+
this._storeIntermidiate(planStep, jsonData);
|
|
88
93
|
break;
|
|
89
94
|
}
|
|
90
95
|
case 'export-file': {
|
|
91
96
|
(0, Affirm_1.default)(planStep.output, `Invalid output in export-file step`);
|
|
92
|
-
const res = yield FileExporter_1.default.export(this._consumer, planStep.output, this.
|
|
97
|
+
const res = yield FileExporter_1.default.export(this._consumer, planStep.output, this._resultingData);
|
|
93
98
|
result.fileUri = res;
|
|
94
99
|
break;
|
|
95
100
|
}
|
|
96
101
|
case 'apply-execution-request-to-result': {
|
|
97
|
-
this.
|
|
102
|
+
this._resultingData = RequestExecutor_1.default.execute(this._resultingData, options);
|
|
98
103
|
break;
|
|
99
104
|
}
|
|
100
105
|
case 'apply-consumer-filters-on-JSON': {
|
|
101
|
-
this.
|
|
106
|
+
this._resultingData = RequestExecutor_1.default._applyFilters(this._resultingData, this._consumer.filters.map(x => x.rule));
|
|
102
107
|
break;
|
|
103
108
|
}
|
|
104
109
|
case 'apply-transformations': {
|
|
105
|
-
this.
|
|
110
|
+
this._resultingData = TransformationEngine_1.default.apply(this._consumer, this._resultingData);
|
|
111
|
+
break;
|
|
112
|
+
}
|
|
113
|
+
case 'join-producers-data': {
|
|
114
|
+
const joinedData = JoinEngine_1.default.join(this._consumer, this._producedData);
|
|
115
|
+
this._resultingData = joinedData;
|
|
106
116
|
break;
|
|
107
117
|
}
|
|
108
118
|
default: throw new Error(`Invalid execution plan step type "${planStep.type}"`);
|
|
109
119
|
}
|
|
110
120
|
}
|
|
111
|
-
result.data = this.
|
|
121
|
+
result.data = this._resultingData;
|
|
112
122
|
result._elapsedMS = performance.now() - start;
|
|
113
123
|
return result;
|
|
114
124
|
});
|
|
125
|
+
this._storeIntermidiate = (step, data) => {
|
|
126
|
+
var _a, _b;
|
|
127
|
+
(0, Affirm_1.default)(step, 'Invalid step');
|
|
128
|
+
const key = (_b = (_a = step.producer) === null || _a === void 0 ? void 0 : _a.name) !== null && _b !== void 0 ? _b : '_default_';
|
|
129
|
+
let pData = this._producedData.find(x => x.producerKey === key);
|
|
130
|
+
if (!pData) {
|
|
131
|
+
pData = { producerKey: key, data: [] };
|
|
132
|
+
this._producedData.push(pData);
|
|
133
|
+
}
|
|
134
|
+
pData.data = data;
|
|
135
|
+
};
|
|
136
|
+
this._getIntermidiate = (step) => {
|
|
137
|
+
var _a, _b;
|
|
138
|
+
(0, Affirm_1.default)(step, 'Invalid step');
|
|
139
|
+
const key = (_b = (_a = step.producer) === null || _a === void 0 ? void 0 : _a.name) !== null && _b !== void 0 ? _b : '_default_';
|
|
140
|
+
const produced = this._producedData.find(x => x.producerKey === key);
|
|
141
|
+
(0, Affirm_1.default)(produced, `No produced dataset found for step "${step.type}" of producer "${key}".`);
|
|
142
|
+
return produced.data;
|
|
143
|
+
};
|
|
115
144
|
this._consumer = consumer;
|
|
116
|
-
this.
|
|
117
|
-
this.
|
|
118
|
-
this.
|
|
145
|
+
this._envData = { consumerSQL: null, executionRequestSQL: null, finalSQL: null };
|
|
146
|
+
this._producedData = [];
|
|
147
|
+
this._resultingData = [];
|
|
119
148
|
}
|
|
120
149
|
}
|
|
121
150
|
exports.default = ExecutionEnvironment;
|
|
@@ -6,6 +6,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
6
6
|
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
7
7
|
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
8
8
|
const ConsumerManager_1 = __importDefault(require("../consumer/ConsumerManager"));
|
|
9
|
+
const Environment_1 = __importDefault(require("../Environment"));
|
|
9
10
|
class ExecutionPlannerClas {
|
|
10
11
|
constructor() {
|
|
11
12
|
this.getEngineClass = (engine) => {
|
|
@@ -18,39 +19,9 @@ class ExecutionPlannerClas {
|
|
|
18
19
|
}
|
|
19
20
|
};
|
|
20
21
|
this.plan = (consumer, options) => {
|
|
21
|
-
var _a, _b;
|
|
22
22
|
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
23
|
-
const
|
|
24
|
-
const
|
|
25
|
-
const plan = [];
|
|
26
|
-
switch (producerEngine) {
|
|
27
|
-
case 'postgres':
|
|
28
|
-
case 'aws-redshift': {
|
|
29
|
-
plan.push({ type: 'compile-consumer-to-SQL' });
|
|
30
|
-
if (Algo_1.default.hasVal(options))
|
|
31
|
-
plan.push({ type: 'compile-execution-request-to-SQL' });
|
|
32
|
-
plan.push({ type: 'execute-SQL', source: source });
|
|
33
|
-
break;
|
|
34
|
-
}
|
|
35
|
-
case 'local':
|
|
36
|
-
case 'aws-s3': {
|
|
37
|
-
const prod = producer;
|
|
38
|
-
if (Algo_1.default.hasVal(options) && (options.limit || options.offset))
|
|
39
|
-
plan.push({ type: 'read-file-lines', producer: prod, lines: { from: (_a = options.offset) !== null && _a !== void 0 ? _a : 0, to: options.limit ? (options.offset + options.limit) : undefined } });
|
|
40
|
-
else
|
|
41
|
-
plan.push({ type: 'read-file-whole', producer: prod });
|
|
42
|
-
if (((_b = prod.settings.fileType) === null || _b === void 0 ? void 0 : _b.toUpperCase()) === 'CSV') {
|
|
43
|
-
plan.push({ type: 'csv-to-json', producer: prod });
|
|
44
|
-
}
|
|
45
|
-
if (prod.dimensions.some(x => { var _a, _b; return ((_a = x.alias) === null || _a === void 0 ? void 0 : _a.includes('{')) || ((_b = x.alias) === null || _b === void 0 ? void 0 : _b.includes('[')); }))
|
|
46
|
-
plan.push({ type: 'nested-field-unpacking', producer: prod });
|
|
47
|
-
plan.push({ type: 'post-process-json' });
|
|
48
|
-
if (consumer.filters && consumer.filters.length > 0)
|
|
49
|
-
plan.push({ type: 'apply-consumer-filters-on-JSON' });
|
|
50
|
-
break;
|
|
51
|
-
}
|
|
52
|
-
default: throw new Error(`Engine "${producerEngine}" not supported`);
|
|
53
|
-
}
|
|
23
|
+
const producersPlan = this._planProducers(consumer, options);
|
|
24
|
+
const plan = [...producersPlan];
|
|
54
25
|
// At this point I have the data loaded in memory
|
|
55
26
|
// TODO: can I handle streaming data? (e.g. a file that is too big to fit in memory)
|
|
56
27
|
// TODO: how to handle pagination of SQL results?
|
|
@@ -58,7 +29,8 @@ class ExecutionPlannerClas {
|
|
|
58
29
|
// TODO: transformations can also be applied directly to the producer... how???
|
|
59
30
|
if (consumer.fields.some(x => Algo_1.default.hasVal(x.transform)))
|
|
60
31
|
plan.push({ type: 'apply-transformations' });
|
|
61
|
-
const
|
|
32
|
+
const [source] = ConsumerManager_1.default.getSource(consumer);
|
|
33
|
+
const engineClass = this.getEngineClass(source.engine);
|
|
62
34
|
for (const output of consumer.outputs) {
|
|
63
35
|
switch (output.format.toUpperCase()) {
|
|
64
36
|
case 'JSON': {
|
|
@@ -90,6 +62,59 @@ class ExecutionPlannerClas {
|
|
|
90
62
|
}
|
|
91
63
|
return plan;
|
|
92
64
|
};
|
|
65
|
+
this._planProducers = (consumer, options) => {
|
|
66
|
+
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
67
|
+
const producers = consumer.producers.map(x => Environment_1.default.getProducer(x.name));
|
|
68
|
+
(0, Affirm_1.default)(producers, `Invalid producers on consumer "${consumer.name}"`);
|
|
69
|
+
(0, Affirm_1.default)(producers.every(x => Algo_1.default.hasVal(x)), `One or more producers of consumer "${consumer.name}" not found.`);
|
|
70
|
+
const sources = producers.map(x => Environment_1.default.getSource(x.source));
|
|
71
|
+
(0, Affirm_1.default)(sources, `Invalid sources on consumer "${consumer.name}"`);
|
|
72
|
+
(0, Affirm_1.default)(sources.every(x => Algo_1.default.hasVal(x)), `One or more sources of consumer "${consumer.name}" not found.`);
|
|
73
|
+
const engineClasses = sources.map(x => this.getEngineClass(x.engine));
|
|
74
|
+
const uniqEngineClasses = Algo_1.default.uniq(engineClasses);
|
|
75
|
+
const plan = [];
|
|
76
|
+
if (uniqEngineClasses.length === 1 && uniqEngineClasses[0] === 'sql')
|
|
77
|
+
plan.push(...this._planProducer(producers[0], options));
|
|
78
|
+
else
|
|
79
|
+
plan.push(...(producers.flatMap(x => this._planProducer(x, options))));
|
|
80
|
+
plan.push({ type: 'join-producers-data' });
|
|
81
|
+
if (consumer.filters && consumer.filters.length > 0)
|
|
82
|
+
plan.push({ type: 'apply-consumer-filters-on-JSON' });
|
|
83
|
+
return plan;
|
|
84
|
+
};
|
|
85
|
+
this._planProducer = (producer, options) => {
|
|
86
|
+
var _a, _b;
|
|
87
|
+
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
88
|
+
const source = Environment_1.default.getSource(producer.source);
|
|
89
|
+
(0, Affirm_1.default)(source, `Source "${producer.source}" of producer "${producer.name}" not found.`);
|
|
90
|
+
const plan = [];
|
|
91
|
+
const producerEngine = source.engine;
|
|
92
|
+
switch (producerEngine) {
|
|
93
|
+
case 'postgres':
|
|
94
|
+
case 'aws-redshift': {
|
|
95
|
+
plan.push({ type: 'compile-consumer-to-SQL', producer });
|
|
96
|
+
if (Algo_1.default.hasVal(options))
|
|
97
|
+
plan.push({ type: 'compile-execution-request-to-SQL', producer });
|
|
98
|
+
plan.push({ type: 'execute-SQL', source: source, producer });
|
|
99
|
+
break;
|
|
100
|
+
}
|
|
101
|
+
case 'local':
|
|
102
|
+
case 'aws-s3': {
|
|
103
|
+
if (Algo_1.default.hasVal(options) && (options.limit || options.offset))
|
|
104
|
+
plan.push({ type: 'read-file-lines', producer, lines: { from: (_a = options.offset) !== null && _a !== void 0 ? _a : 0, to: options.limit ? (options.offset + options.limit) : undefined } });
|
|
105
|
+
else
|
|
106
|
+
plan.push({ type: 'read-file-whole', producer });
|
|
107
|
+
if (((_b = producer.settings.fileType) === null || _b === void 0 ? void 0 : _b.toUpperCase()) === 'CSV')
|
|
108
|
+
plan.push({ type: 'csv-to-json', producer });
|
|
109
|
+
if (producer.dimensions.some(x => { var _a, _b; return ((_a = x.alias) === null || _a === void 0 ? void 0 : _a.includes('{')) || ((_b = x.alias) === null || _b === void 0 ? void 0 : _b.includes('[')); }))
|
|
110
|
+
plan.push({ type: 'nested-field-unpacking', producer });
|
|
111
|
+
plan.push({ type: 'post-process-json', producer });
|
|
112
|
+
break;
|
|
113
|
+
}
|
|
114
|
+
default: throw new Error(`Engine "${producerEngine}" not supported`);
|
|
115
|
+
}
|
|
116
|
+
return plan;
|
|
117
|
+
};
|
|
93
118
|
}
|
|
94
119
|
}
|
|
95
120
|
const ExecutionPlanner = new ExecutionPlannerClas();
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
7
|
+
const Environment_1 = __importDefault(require("../Environment"));
|
|
8
|
+
const ConsumerEngine_1 = __importDefault(require("../consumer/ConsumerEngine"));
|
|
9
|
+
class JoinEngineClass {
|
|
10
|
+
constructor() {
|
|
11
|
+
this.validateFieldInProducer = (fieldName, producerName) => {
|
|
12
|
+
var _a, _b, _c, _d;
|
|
13
|
+
const producer = Environment_1.default.getProducer(producerName);
|
|
14
|
+
if (!producer) {
|
|
15
|
+
throw new Error(`Producer ${producerName} not found`);
|
|
16
|
+
}
|
|
17
|
+
// Check dimensions
|
|
18
|
+
const hasDimension = producer.dimensions.some(d => d.name === fieldName);
|
|
19
|
+
// Check measures
|
|
20
|
+
const hasMeasure = (_b = (_a = producer.measures) === null || _a === void 0 ? void 0 : _a.some(m => m.name === fieldName)) !== null && _b !== void 0 ? _b : false;
|
|
21
|
+
if (!hasDimension && !hasMeasure) {
|
|
22
|
+
throw new Error(`Field '${fieldName}' not found in producer '${producerName}'. Available fields: ${producer.dimensions.map(d => d.name).concat((_d = (_c = producer.measures) === null || _c === void 0 ? void 0 : _c.map(m => m.name)) !== null && _d !== void 0 ? _d : []).join(', ')}`);
|
|
23
|
+
}
|
|
24
|
+
};
|
|
25
|
+
this.validateFieldInConsumer = (fieldName, consumerShape) => {
|
|
26
|
+
const hasField = consumerShape.dimensions.find(x => x.name === fieldName);
|
|
27
|
+
if (!hasField)
|
|
28
|
+
throw new Error(`Field '${fieldName}' not found in consumer '${consumerShape.name}'. Your join condition must be on fields that are present in the consumer.`);
|
|
29
|
+
};
|
|
30
|
+
this.parseJoinCondition = (sql, producer) => {
|
|
31
|
+
// Extract field names from SQL condition like ${P.id} = ${orders.user_id}
|
|
32
|
+
const regex = /\${([^}]+)}/g;
|
|
33
|
+
const matches = Array.from(sql.matchAll(regex));
|
|
34
|
+
if (matches.length !== 2) {
|
|
35
|
+
throw new Error(`Invalid join condition: ${sql}. Expected format: \${P.field} = \${producer.field}`);
|
|
36
|
+
}
|
|
37
|
+
const [left, right] = matches.map(m => m[1]);
|
|
38
|
+
const [leftProducer, leftField] = left.split('.');
|
|
39
|
+
const [rightProducer, rightField] = right.split('.');
|
|
40
|
+
if (!leftField || !rightField) {
|
|
41
|
+
throw new Error(`Invalid join condition: ${sql}. Both sides must specify a field name after the dot.`);
|
|
42
|
+
}
|
|
43
|
+
// Replace P with actual producer name
|
|
44
|
+
const actualLeftProducer = leftProducer === 'P' ? producer.name : leftProducer;
|
|
45
|
+
const actualRightProducer = rightProducer === 'P' ? producer.name : rightProducer;
|
|
46
|
+
// Validate both fields exist in their respective producers
|
|
47
|
+
this.validateFieldInProducer(leftField, actualLeftProducer);
|
|
48
|
+
this.validateFieldInProducer(rightField, actualRightProducer);
|
|
49
|
+
return {
|
|
50
|
+
leftProducer: actualLeftProducer,
|
|
51
|
+
leftField: leftField,
|
|
52
|
+
rightProducer: actualRightProducer,
|
|
53
|
+
rightField: rightField
|
|
54
|
+
};
|
|
55
|
+
};
|
|
56
|
+
this.findProducerData = (producerName, producedData) => {
|
|
57
|
+
const data = producedData.find(pd => pd.producerKey === producerName);
|
|
58
|
+
if (!data) {
|
|
59
|
+
throw new Error(`No data found for producer: ${producerName}`);
|
|
60
|
+
}
|
|
61
|
+
return data.data;
|
|
62
|
+
};
|
|
63
|
+
this.createLookupMap = (data, key) => {
|
|
64
|
+
var _a;
|
|
65
|
+
const map = new Map();
|
|
66
|
+
for (const item of data) {
|
|
67
|
+
const row = item;
|
|
68
|
+
const keyValue = (_a = row[key]) === null || _a === void 0 ? void 0 : _a.toString();
|
|
69
|
+
if (keyValue === undefined)
|
|
70
|
+
continue;
|
|
71
|
+
const existing = map.get(keyValue);
|
|
72
|
+
if (existing)
|
|
73
|
+
existing.push(row);
|
|
74
|
+
else
|
|
75
|
+
map.set(keyValue, [row]);
|
|
76
|
+
}
|
|
77
|
+
return map;
|
|
78
|
+
};
|
|
79
|
+
this.join = (consumer, producedData) => {
|
|
80
|
+
var _a;
|
|
81
|
+
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
82
|
+
(0, Affirm_1.default)(producedData, 'Invalid produced data');
|
|
83
|
+
if (consumer.producers.length <= 1) {
|
|
84
|
+
return this.findProducerData(consumer.producers[0].name, producedData);
|
|
85
|
+
}
|
|
86
|
+
// Start with the first producer's data
|
|
87
|
+
// let result = this.findProducerData(consumer.producers[0].name, producedData)
|
|
88
|
+
let result = [];
|
|
89
|
+
const consumerShape = ConsumerEngine_1.default.getOutputShape(consumer);
|
|
90
|
+
const consumerColumns = ConsumerEngine_1.default.compile(consumer);
|
|
91
|
+
// Iterate through each producer and its joins
|
|
92
|
+
for (let i = 0; i < consumer.producers.length; i++) {
|
|
93
|
+
const producer = consumer.producers[i];
|
|
94
|
+
if (!producer.joins)
|
|
95
|
+
continue;
|
|
96
|
+
for (const join of producer.joins) {
|
|
97
|
+
const otherProducer = consumer.producers.find(p => p.name === join.otherName);
|
|
98
|
+
if (!otherProducer) {
|
|
99
|
+
throw new Error(`Producer ${join.otherName} not found`);
|
|
100
|
+
}
|
|
101
|
+
const condition = this.parseJoinCondition(join.sql, producer);
|
|
102
|
+
this.validateFieldInConsumer(condition.leftField, consumerShape);
|
|
103
|
+
this.validateFieldInConsumer(condition.rightField, consumerShape);
|
|
104
|
+
const leftData = this.findProducerData(condition.leftProducer, producedData);
|
|
105
|
+
const rightData = this.findProducerData(condition.rightProducer, producedData);
|
|
106
|
+
// Create lookup map for the right dataset
|
|
107
|
+
const rightLookup = this.createLookupMap(rightData, condition.rightField);
|
|
108
|
+
// Perform the join based on relationship type
|
|
109
|
+
const joinedResult = [];
|
|
110
|
+
for (const item of leftData) {
|
|
111
|
+
const leftRow = item;
|
|
112
|
+
const leftValue = (_a = leftRow[condition.leftField]) === null || _a === void 0 ? void 0 : _a.toString();
|
|
113
|
+
if (leftValue === undefined)
|
|
114
|
+
continue;
|
|
115
|
+
const rightRows = rightLookup.get(leftValue) || [];
|
|
116
|
+
if (rightRows.length === 0) {
|
|
117
|
+
if (join.relationship !== 'one-to-many') {
|
|
118
|
+
// For one-to-one and many-to-one, keep rows even without matches
|
|
119
|
+
joinedResult.push(leftRow);
|
|
120
|
+
}
|
|
121
|
+
continue;
|
|
122
|
+
}
|
|
123
|
+
// Merge rows based on relationship type and field ownership
|
|
124
|
+
for (const rightRow of rightRows) {
|
|
125
|
+
const mergedRow = {};
|
|
126
|
+
// For each field in the compiled consumer, get it from its owner
|
|
127
|
+
for (const column of consumerColumns) {
|
|
128
|
+
const fieldName = column.consumerAlias;
|
|
129
|
+
// Get the data from the owner producer
|
|
130
|
+
if (column.owner === condition.leftProducer) {
|
|
131
|
+
mergedRow[fieldName] = leftRow[fieldName];
|
|
132
|
+
}
|
|
133
|
+
else if (column.owner === condition.rightProducer) {
|
|
134
|
+
mergedRow[fieldName] = rightRow[fieldName];
|
|
135
|
+
}
|
|
136
|
+
// If neither has the field, it will be undefined
|
|
137
|
+
}
|
|
138
|
+
joinedResult.push(mergedRow);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
result = joinedResult;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
return result;
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
const JoinEngine = new JoinEngineClass();
|
|
149
|
+
exports.default = JoinEngine;
|
|
@@ -1,10 +1,16 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
2
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
3
7
|
class TypeCasterClass {
|
|
4
8
|
/**
|
|
5
9
|
* Casts the value to the requested type (only if needed)
|
|
6
10
|
*/
|
|
7
11
|
cast(value, type) {
|
|
12
|
+
if (!Algo_1.default.hasVal(value))
|
|
13
|
+
return value;
|
|
8
14
|
switch (type) {
|
|
9
15
|
case 'boolean': {
|
|
10
16
|
if (typeof value === 'boolean')
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@forzalabs/remora",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.25",
|
|
4
4
|
"description": "A powerful CLI tool for seamless data translation.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"private": false,
|
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
"deploy": "npx tsx ./src/index.ts deploy",
|
|
18
18
|
"debug": "npx tsx ./src/index.ts debug",
|
|
19
19
|
"automap": "npx tsx ./src/index.ts automap",
|
|
20
|
+
"create-producer": "npx tsx ./src/index.ts create-producer",
|
|
20
21
|
"copy-static-file": "npx tsx ./scripts/CopyStaticFile.js",
|
|
21
22
|
"build": "npm i && npm run sync && tsc --outDir .build && npm run copy-static-file",
|
|
22
23
|
"upload": "npm run build && cd .build && npm publish --access=public"
|