@forzalabs/remora 0.0.24 → 0.0.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +1 -1
- package/engines/ParseManager.js +3 -2
- package/engines/execution/ExecutionEnvironment.js +51 -24
- package/engines/execution/ExecutionPlanner.js +58 -33
- package/engines/transform/JoinEngine.js +144 -0
- package/engines/transform/TransformationEngine.js +6 -4
- package/engines/transform/TypeCaster.js +6 -0
- package/package.json +2 -1
package/Constants.js
CHANGED
package/engines/ParseManager.js
CHANGED
|
@@ -18,8 +18,9 @@ class ParseManagerClass {
|
|
|
18
18
|
(0, Affirm_1.default)(lines, 'Invalid csv lines');
|
|
19
19
|
Affirm_1.default.hasValue(lines.length, 'Invalid csv lines length');
|
|
20
20
|
const delimiterChar = (_a = producer.settings.delimiter) !== null && _a !== void 0 ? _a : ',';
|
|
21
|
+
const headerRow = lines[0];
|
|
21
22
|
const rows = lines.slice(1).map(x => x.split(delimiterChar).map(k => k.trim()));
|
|
22
|
-
const headerColumns = this._extractHeader(
|
|
23
|
+
const headerColumns = this._extractHeader(headerRow, delimiterChar, producer, discover);
|
|
23
24
|
const result = [];
|
|
24
25
|
for (const row of rows) {
|
|
25
26
|
const rowObject = {};
|
|
@@ -33,7 +34,7 @@ class ParseManagerClass {
|
|
|
33
34
|
};
|
|
34
35
|
this._extractHeader = (headerLine, delimiter, producer, discover) => {
|
|
35
36
|
var _a;
|
|
36
|
-
(0, Affirm_1.default)(headerLine,
|
|
37
|
+
(0, Affirm_1.default)(headerLine, `Invalid CSV header line for producer "${producer.name}"`);
|
|
37
38
|
(0, Affirm_1.default)(delimiter, 'Invalid CSV delimiter');
|
|
38
39
|
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
39
40
|
const source = Environment_1.default.getSource(producer.source);
|
|
@@ -24,6 +24,7 @@ const SQLCompiler_1 = __importDefault(require("../sql/SQLCompiler"));
|
|
|
24
24
|
const ExecutionPlanner_1 = __importDefault(require("./ExecutionPlanner"));
|
|
25
25
|
const RequestExecutor_1 = __importDefault(require("./RequestExecutor"));
|
|
26
26
|
const TransformationEngine_1 = __importDefault(require("../transform/TransformationEngine"));
|
|
27
|
+
const JoinEngine_1 = __importDefault(require("../transform/JoinEngine"));
|
|
27
28
|
class ExecutionEnvironment {
|
|
28
29
|
constructor(consumer) {
|
|
29
30
|
this.run = (options) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -37,28 +38,27 @@ class ExecutionEnvironment {
|
|
|
37
38
|
switch (planStep.type) {
|
|
38
39
|
case 'compile-consumer-to-SQL': {
|
|
39
40
|
const sql = SQLCompiler_1.default.getConsumerReference(this._consumer);
|
|
40
|
-
this.
|
|
41
|
-
this.
|
|
41
|
+
this._envData.consumerSQL = sql;
|
|
42
|
+
this._envData.finalSQL = sql;
|
|
42
43
|
break;
|
|
43
44
|
}
|
|
44
45
|
case 'compile-execution-request-to-SQL': {
|
|
45
46
|
const sql = SQLBuilder_1.default.buildConsumerQuery(options);
|
|
46
|
-
this.
|
|
47
|
-
this.
|
|
47
|
+
this._envData.executionRequestSQL = sql;
|
|
48
|
+
this._envData.finalSQL = `WITH consumer AS (${this._envData.consumerSQL})\nSELECT * FROM consumer${this._envData.executionRequestSQL}`;
|
|
48
49
|
break;
|
|
49
50
|
}
|
|
50
51
|
case 'execute-SQL': {
|
|
51
52
|
(0, Affirm_1.default)(planStep.source, `Invalid source in execute-SQL step`);
|
|
52
53
|
const driver = yield DriverFactory_1.default.instantiateSource(planStep.source);
|
|
53
|
-
|
|
54
|
-
this.
|
|
54
|
+
const queryData = (yield driver.query(this._envData.finalSQL)).rows;
|
|
55
|
+
this._storeIntermidiate(planStep, queryData);
|
|
55
56
|
break;
|
|
56
57
|
}
|
|
57
58
|
case 'read-file-whole': {
|
|
58
59
|
(0, Affirm_1.default)(planStep.producer, `Invalid producer in read-file-whole step`);
|
|
59
60
|
const fileData = yield ProducerEngine_1.default.readFile(planStep.producer, { readmode: 'all' });
|
|
60
|
-
this.
|
|
61
|
-
this._fetchedDataType = fileData.dataType;
|
|
61
|
+
this._storeIntermidiate(planStep, fileData.data);
|
|
62
62
|
break;
|
|
63
63
|
}
|
|
64
64
|
case 'read-file-lines': {
|
|
@@ -66,56 +66,83 @@ class ExecutionEnvironment {
|
|
|
66
66
|
(0, Affirm_1.default)(planStep.producer, `Invalid producer in read-file-lines step`);
|
|
67
67
|
const { producer, lines: { from, to } } = planStep;
|
|
68
68
|
const fileData = yield ProducerEngine_1.default.readFile(producer, { readmode: 'lines', lines: { from, to } });
|
|
69
|
-
this.
|
|
70
|
-
this._fetchedDataType = fileData.dataType;
|
|
69
|
+
this._storeIntermidiate(planStep, fileData.data);
|
|
71
70
|
break;
|
|
72
71
|
}
|
|
73
72
|
case 'nested-field-unpacking': {
|
|
74
73
|
(0, Affirm_1.default)(planStep.producer, `Invalid producer in nested-field-unpacking step`);
|
|
75
|
-
|
|
74
|
+
const unpackedData = PostProcessor_1.default.unpack(this._resultingData, planStep.producer);
|
|
75
|
+
this._storeIntermidiate(planStep, unpackedData);
|
|
76
76
|
break;
|
|
77
77
|
}
|
|
78
78
|
case 'post-process-json': {
|
|
79
|
-
|
|
79
|
+
const myProdData = this._getIntermidiate(planStep);
|
|
80
|
+
const processedData = PostProcessor_1.default.doProjection(this._consumer, myProdData);
|
|
81
|
+
this._storeIntermidiate(planStep, processedData);
|
|
80
82
|
break;
|
|
81
83
|
}
|
|
82
84
|
case 'csv-to-json': {
|
|
83
|
-
(0, Affirm_1.default)(this.
|
|
84
|
-
(0, Affirm_1.default)(Array.isArray(this.
|
|
85
|
+
(0, Affirm_1.default)(this._resultingData, 'Invalid data');
|
|
86
|
+
(0, Affirm_1.default)(Array.isArray(this._resultingData), 'Invalid data type, must be an array');
|
|
85
87
|
(0, Affirm_1.default)(planStep.producer, `Invalid producer in csv-to-json step`);
|
|
86
|
-
const csv = this.
|
|
87
|
-
|
|
88
|
+
const csv = this._getIntermidiate(planStep);
|
|
89
|
+
const jsonData = ParseManager_1.default.csvToJson(csv, planStep.producer);
|
|
90
|
+
this._storeIntermidiate(planStep, jsonData);
|
|
88
91
|
break;
|
|
89
92
|
}
|
|
90
93
|
case 'export-file': {
|
|
91
94
|
(0, Affirm_1.default)(planStep.output, `Invalid output in export-file step`);
|
|
92
|
-
const res = yield FileExporter_1.default.export(this._consumer, planStep.output, this.
|
|
95
|
+
const res = yield FileExporter_1.default.export(this._consumer, planStep.output, this._resultingData);
|
|
93
96
|
result.fileUri = res;
|
|
94
97
|
break;
|
|
95
98
|
}
|
|
96
99
|
case 'apply-execution-request-to-result': {
|
|
97
|
-
this.
|
|
100
|
+
this._resultingData = RequestExecutor_1.default.execute(this._resultingData, options);
|
|
98
101
|
break;
|
|
99
102
|
}
|
|
100
103
|
case 'apply-consumer-filters-on-JSON': {
|
|
101
|
-
this.
|
|
104
|
+
this._resultingData = RequestExecutor_1.default._applyFilters(this._resultingData, this._consumer.filters.map(x => x.rule));
|
|
102
105
|
break;
|
|
103
106
|
}
|
|
104
107
|
case 'apply-transformations': {
|
|
105
|
-
this.
|
|
108
|
+
this._resultingData = TransformationEngine_1.default.apply(this._consumer, this._resultingData);
|
|
109
|
+
break;
|
|
110
|
+
}
|
|
111
|
+
case 'join-producers-data': {
|
|
112
|
+
const joinedData = JoinEngine_1.default.join(this._consumer, this._producedData);
|
|
113
|
+
this._resultingData = joinedData;
|
|
106
114
|
break;
|
|
107
115
|
}
|
|
108
116
|
default: throw new Error(`Invalid execution plan step type "${planStep.type}"`);
|
|
109
117
|
}
|
|
110
118
|
}
|
|
111
|
-
result.data = this.
|
|
119
|
+
result.data = this._resultingData;
|
|
112
120
|
result._elapsedMS = performance.now() - start;
|
|
113
121
|
return result;
|
|
114
122
|
});
|
|
123
|
+
this._storeIntermidiate = (step, data) => {
|
|
124
|
+
var _a, _b;
|
|
125
|
+
(0, Affirm_1.default)(step, 'Invalid step');
|
|
126
|
+
const key = (_b = (_a = step.producer) === null || _a === void 0 ? void 0 : _a.name) !== null && _b !== void 0 ? _b : '_default_';
|
|
127
|
+
let pData = this._producedData.find(x => x.producerKey === key);
|
|
128
|
+
if (!pData) {
|
|
129
|
+
pData = { producerKey: key, data: [] };
|
|
130
|
+
this._producedData.push(pData);
|
|
131
|
+
}
|
|
132
|
+
pData.data = data;
|
|
133
|
+
};
|
|
134
|
+
this._getIntermidiate = (step) => {
|
|
135
|
+
var _a, _b;
|
|
136
|
+
(0, Affirm_1.default)(step, 'Invalid step');
|
|
137
|
+
const key = (_b = (_a = step.producer) === null || _a === void 0 ? void 0 : _a.name) !== null && _b !== void 0 ? _b : '_default_';
|
|
138
|
+
const produced = this._producedData.find(x => x.producerKey === key);
|
|
139
|
+
(0, Affirm_1.default)(produced, `No produced dataset found for step "${step.type}" of producer "${key}".`);
|
|
140
|
+
return produced.data;
|
|
141
|
+
};
|
|
115
142
|
this._consumer = consumer;
|
|
116
|
-
this.
|
|
117
|
-
this.
|
|
118
|
-
this.
|
|
143
|
+
this._envData = { consumerSQL: null, executionRequestSQL: null, finalSQL: null };
|
|
144
|
+
this._producedData = [];
|
|
145
|
+
this._resultingData = [];
|
|
119
146
|
}
|
|
120
147
|
}
|
|
121
148
|
exports.default = ExecutionEnvironment;
|
|
@@ -6,6 +6,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
6
6
|
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
7
7
|
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
8
8
|
const ConsumerManager_1 = __importDefault(require("../consumer/ConsumerManager"));
|
|
9
|
+
const Environment_1 = __importDefault(require("../Environment"));
|
|
9
10
|
class ExecutionPlannerClas {
|
|
10
11
|
constructor() {
|
|
11
12
|
this.getEngineClass = (engine) => {
|
|
@@ -18,39 +19,9 @@ class ExecutionPlannerClas {
|
|
|
18
19
|
}
|
|
19
20
|
};
|
|
20
21
|
this.plan = (consumer, options) => {
|
|
21
|
-
var _a, _b;
|
|
22
22
|
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
23
|
-
const
|
|
24
|
-
const
|
|
25
|
-
const plan = [];
|
|
26
|
-
switch (producerEngine) {
|
|
27
|
-
case 'postgres':
|
|
28
|
-
case 'aws-redshift': {
|
|
29
|
-
plan.push({ type: 'compile-consumer-to-SQL' });
|
|
30
|
-
if (Algo_1.default.hasVal(options))
|
|
31
|
-
plan.push({ type: 'compile-execution-request-to-SQL' });
|
|
32
|
-
plan.push({ type: 'execute-SQL', source: source });
|
|
33
|
-
break;
|
|
34
|
-
}
|
|
35
|
-
case 'local':
|
|
36
|
-
case 'aws-s3': {
|
|
37
|
-
const prod = producer;
|
|
38
|
-
if (Algo_1.default.hasVal(options) && (options.limit || options.offset))
|
|
39
|
-
plan.push({ type: 'read-file-lines', producer: prod, lines: { from: (_a = options.offset) !== null && _a !== void 0 ? _a : 0, to: options.limit ? (options.offset + options.limit) : undefined } });
|
|
40
|
-
else
|
|
41
|
-
plan.push({ type: 'read-file-whole', producer: prod });
|
|
42
|
-
if (((_b = prod.settings.fileType) === null || _b === void 0 ? void 0 : _b.toUpperCase()) === 'CSV') {
|
|
43
|
-
plan.push({ type: 'csv-to-json', producer: prod });
|
|
44
|
-
}
|
|
45
|
-
if (prod.dimensions.some(x => { var _a, _b; return ((_a = x.alias) === null || _a === void 0 ? void 0 : _a.includes('{')) || ((_b = x.alias) === null || _b === void 0 ? void 0 : _b.includes('[')); }))
|
|
46
|
-
plan.push({ type: 'nested-field-unpacking', producer: prod });
|
|
47
|
-
plan.push({ type: 'post-process-json' });
|
|
48
|
-
if (consumer.filters && consumer.filters.length > 0)
|
|
49
|
-
plan.push({ type: 'apply-consumer-filters-on-JSON' });
|
|
50
|
-
break;
|
|
51
|
-
}
|
|
52
|
-
default: throw new Error(`Engine "${producerEngine}" not supported`);
|
|
53
|
-
}
|
|
23
|
+
const producersPlan = this._planProducers(consumer, options);
|
|
24
|
+
const plan = [...producersPlan];
|
|
54
25
|
// At this point I have the data loaded in memory
|
|
55
26
|
// TODO: can I handle streaming data? (e.g. a file that is too big to fit in memory)
|
|
56
27
|
// TODO: how to handle pagination of SQL results?
|
|
@@ -58,7 +29,8 @@ class ExecutionPlannerClas {
|
|
|
58
29
|
// TODO: transformations can also be applied directly to the producer... how???
|
|
59
30
|
if (consumer.fields.some(x => Algo_1.default.hasVal(x.transform)))
|
|
60
31
|
plan.push({ type: 'apply-transformations' });
|
|
61
|
-
const
|
|
32
|
+
const [source] = ConsumerManager_1.default.getSource(consumer);
|
|
33
|
+
const engineClass = this.getEngineClass(source.engine);
|
|
62
34
|
for (const output of consumer.outputs) {
|
|
63
35
|
switch (output.format.toUpperCase()) {
|
|
64
36
|
case 'JSON': {
|
|
@@ -90,6 +62,59 @@ class ExecutionPlannerClas {
|
|
|
90
62
|
}
|
|
91
63
|
return plan;
|
|
92
64
|
};
|
|
65
|
+
this._planProducers = (consumer, options) => {
|
|
66
|
+
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
67
|
+
const producers = consumer.producers.map(x => Environment_1.default.getProducer(x.name));
|
|
68
|
+
(0, Affirm_1.default)(producers, `Invalid producers on consumer "${consumer.name}"`);
|
|
69
|
+
(0, Affirm_1.default)(producers.every(x => Algo_1.default.hasVal(x)), `One or more producers of consumer "${consumer.name}" not found.`);
|
|
70
|
+
const sources = producers.map(x => Environment_1.default.getSource(x.source));
|
|
71
|
+
(0, Affirm_1.default)(sources, `Invalid sources on consumer "${consumer.name}"`);
|
|
72
|
+
(0, Affirm_1.default)(sources.every(x => Algo_1.default.hasVal(x)), `One or more sources of consumer "${consumer.name}" not found.`);
|
|
73
|
+
const engineClasses = sources.map(x => this.getEngineClass(x.engine));
|
|
74
|
+
const uniqEngineClasses = Algo_1.default.uniq(engineClasses);
|
|
75
|
+
const plan = [];
|
|
76
|
+
if (uniqEngineClasses.length === 1 && uniqEngineClasses[0] === 'sql')
|
|
77
|
+
plan.push(...this._planProducer(producers[0], options));
|
|
78
|
+
else
|
|
79
|
+
plan.push(...(producers.flatMap(x => this._planProducer(x, options))));
|
|
80
|
+
plan.push({ type: 'join-producers-data' });
|
|
81
|
+
if (consumer.filters && consumer.filters.length > 0)
|
|
82
|
+
plan.push({ type: 'apply-consumer-filters-on-JSON' });
|
|
83
|
+
return plan;
|
|
84
|
+
};
|
|
85
|
+
this._planProducer = (producer, options) => {
|
|
86
|
+
var _a, _b;
|
|
87
|
+
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
88
|
+
const source = Environment_1.default.getSource(producer.source);
|
|
89
|
+
(0, Affirm_1.default)(source, `Source "${producer.source}" of producer "${producer.name}" not found.`);
|
|
90
|
+
const plan = [];
|
|
91
|
+
const producerEngine = source.engine;
|
|
92
|
+
switch (producerEngine) {
|
|
93
|
+
case 'postgres':
|
|
94
|
+
case 'aws-redshift': {
|
|
95
|
+
plan.push({ type: 'compile-consumer-to-SQL', producer });
|
|
96
|
+
if (Algo_1.default.hasVal(options))
|
|
97
|
+
plan.push({ type: 'compile-execution-request-to-SQL', producer });
|
|
98
|
+
plan.push({ type: 'execute-SQL', source: source, producer });
|
|
99
|
+
break;
|
|
100
|
+
}
|
|
101
|
+
case 'local':
|
|
102
|
+
case 'aws-s3': {
|
|
103
|
+
if (Algo_1.default.hasVal(options) && (options.limit || options.offset))
|
|
104
|
+
plan.push({ type: 'read-file-lines', producer, lines: { from: (_a = options.offset) !== null && _a !== void 0 ? _a : 0, to: options.limit ? (options.offset + options.limit) : undefined } });
|
|
105
|
+
else
|
|
106
|
+
plan.push({ type: 'read-file-whole', producer });
|
|
107
|
+
if (((_b = producer.settings.fileType) === null || _b === void 0 ? void 0 : _b.toUpperCase()) === 'CSV')
|
|
108
|
+
plan.push({ type: 'csv-to-json', producer });
|
|
109
|
+
if (producer.dimensions.some(x => { var _a, _b; return ((_a = x.alias) === null || _a === void 0 ? void 0 : _a.includes('{')) || ((_b = x.alias) === null || _b === void 0 ? void 0 : _b.includes('[')); }))
|
|
110
|
+
plan.push({ type: 'nested-field-unpacking', producer });
|
|
111
|
+
plan.push({ type: 'post-process-json', producer });
|
|
112
|
+
break;
|
|
113
|
+
}
|
|
114
|
+
default: throw new Error(`Engine "${producerEngine}" not supported`);
|
|
115
|
+
}
|
|
116
|
+
return plan;
|
|
117
|
+
};
|
|
93
118
|
}
|
|
94
119
|
}
|
|
95
120
|
const ExecutionPlanner = new ExecutionPlannerClas();
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
7
|
+
const Environment_1 = __importDefault(require("../Environment"));
|
|
8
|
+
const ConsumerEngine_1 = __importDefault(require("../consumer/ConsumerEngine"));
|
|
9
|
+
class JoinEngineClass {
|
|
10
|
+
constructor() {
|
|
11
|
+
this.validateFieldInProducer = (fieldName, producerName) => {
|
|
12
|
+
var _a, _b, _c, _d;
|
|
13
|
+
const producer = Environment_1.default.getProducer(producerName);
|
|
14
|
+
if (!producer) {
|
|
15
|
+
throw new Error(`Producer ${producerName} not found`);
|
|
16
|
+
}
|
|
17
|
+
// Check dimensions
|
|
18
|
+
const hasDimension = producer.dimensions.some(d => d.name === fieldName);
|
|
19
|
+
// Check measures
|
|
20
|
+
const hasMeasure = (_b = (_a = producer.measures) === null || _a === void 0 ? void 0 : _a.some(m => m.name === fieldName)) !== null && _b !== void 0 ? _b : false;
|
|
21
|
+
if (!hasDimension && !hasMeasure) {
|
|
22
|
+
throw new Error(`Field '${fieldName}' not found in producer '${producerName}'. Available fields: ${producer.dimensions.map(d => d.name).concat((_d = (_c = producer.measures) === null || _c === void 0 ? void 0 : _c.map(m => m.name)) !== null && _d !== void 0 ? _d : []).join(', ')}`);
|
|
23
|
+
}
|
|
24
|
+
};
|
|
25
|
+
this.validateFieldInConsumer = (fieldName, consumerShape) => {
|
|
26
|
+
const hasField = consumerShape.dimensions.find(x => x.name === fieldName);
|
|
27
|
+
if (!hasField)
|
|
28
|
+
throw new Error(`Field '${fieldName}' not found in consumer '${consumerShape.name}'. Your join condition must be on fields that are present in the consumer.`);
|
|
29
|
+
};
|
|
30
|
+
this.parseJoinCondition = (sql, producer) => {
|
|
31
|
+
// Extract field names from SQL condition like ${P.id} = ${orders.user_id}
|
|
32
|
+
const regex = /\${([^}]+)}/g;
|
|
33
|
+
const matches = Array.from(sql.matchAll(regex));
|
|
34
|
+
if (matches.length !== 2)
|
|
35
|
+
throw new Error(`Invalid join condition: ${sql}. Expected format: \${P.field} = \${producer.field}`);
|
|
36
|
+
const [left, right] = matches.map(m => m[1]);
|
|
37
|
+
const [leftProducer, leftField] = left.split('.');
|
|
38
|
+
const [rightProducer, rightField] = right.split('.');
|
|
39
|
+
if (!leftField || !rightField)
|
|
40
|
+
throw new Error(`Invalid join condition: ${sql}. Both sides must specify a field name after the dot.`);
|
|
41
|
+
// Replace P with actual producer name
|
|
42
|
+
const actualLeftProducer = leftProducer === 'P' ? producer.name : leftProducer;
|
|
43
|
+
const actualRightProducer = rightProducer === 'P' ? producer.name : rightProducer;
|
|
44
|
+
// Validate both fields exist in their respective producers
|
|
45
|
+
this.validateFieldInProducer(leftField, actualLeftProducer);
|
|
46
|
+
this.validateFieldInProducer(rightField, actualRightProducer);
|
|
47
|
+
return {
|
|
48
|
+
leftProducer: actualLeftProducer,
|
|
49
|
+
leftField: leftField,
|
|
50
|
+
rightProducer: actualRightProducer,
|
|
51
|
+
rightField: rightField
|
|
52
|
+
};
|
|
53
|
+
};
|
|
54
|
+
this.findProducerData = (producerName, producedData) => {
|
|
55
|
+
const data = producedData.find(pd => pd.producerKey === producerName);
|
|
56
|
+
if (!data)
|
|
57
|
+
throw new Error(`No data found for producer: ${producerName}`);
|
|
58
|
+
return data.data;
|
|
59
|
+
};
|
|
60
|
+
this.createLookupMap = (data, key) => {
|
|
61
|
+
var _a;
|
|
62
|
+
const map = new Map();
|
|
63
|
+
for (const item of data) {
|
|
64
|
+
const row = item;
|
|
65
|
+
const keyValue = (_a = row[key]) === null || _a === void 0 ? void 0 : _a.toString();
|
|
66
|
+
if (keyValue === undefined)
|
|
67
|
+
continue;
|
|
68
|
+
const existing = map.get(keyValue);
|
|
69
|
+
if (existing)
|
|
70
|
+
existing.push(row);
|
|
71
|
+
else
|
|
72
|
+
map.set(keyValue, [row]);
|
|
73
|
+
}
|
|
74
|
+
return map;
|
|
75
|
+
};
|
|
76
|
+
this.join = (consumer, producedData) => {
|
|
77
|
+
var _a;
|
|
78
|
+
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
79
|
+
(0, Affirm_1.default)(producedData, 'Invalid produced data');
|
|
80
|
+
if (consumer.producers.length <= 1)
|
|
81
|
+
return this.findProducerData(consumer.producers[0].name, producedData);
|
|
82
|
+
// Start with the first producer's data
|
|
83
|
+
let result = [];
|
|
84
|
+
const consumerShape = ConsumerEngine_1.default.getOutputShape(consumer);
|
|
85
|
+
const consumerColumns = ConsumerEngine_1.default.compile(consumer);
|
|
86
|
+
// Iterate through each producer and its joins
|
|
87
|
+
for (let i = 0; i < consumer.producers.length; i++) {
|
|
88
|
+
const producer = consumer.producers[i];
|
|
89
|
+
if (!producer.joins)
|
|
90
|
+
continue;
|
|
91
|
+
for (const join of producer.joins) {
|
|
92
|
+
const otherProducer = consumer.producers.find(p => p.name === join.otherName);
|
|
93
|
+
if (!otherProducer) {
|
|
94
|
+
throw new Error(`Producer ${join.otherName} not found`);
|
|
95
|
+
}
|
|
96
|
+
const condition = this.parseJoinCondition(join.sql, producer);
|
|
97
|
+
this.validateFieldInConsumer(condition.leftField, consumerShape);
|
|
98
|
+
this.validateFieldInConsumer(condition.rightField, consumerShape);
|
|
99
|
+
const leftData = this.findProducerData(condition.leftProducer, producedData);
|
|
100
|
+
const rightData = this.findProducerData(condition.rightProducer, producedData);
|
|
101
|
+
// Create lookup map for the right dataset
|
|
102
|
+
const rightLookup = this.createLookupMap(rightData, condition.rightField);
|
|
103
|
+
// Perform the join based on relationship type
|
|
104
|
+
const joinedResult = [];
|
|
105
|
+
for (const item of leftData) {
|
|
106
|
+
const leftRow = item;
|
|
107
|
+
const leftValue = (_a = leftRow[condition.leftField]) === null || _a === void 0 ? void 0 : _a.toString();
|
|
108
|
+
if (leftValue === undefined)
|
|
109
|
+
continue;
|
|
110
|
+
const rightRows = rightLookup.get(leftValue) || [];
|
|
111
|
+
if (rightRows.length === 0) {
|
|
112
|
+
if (join.relationship !== 'one-to-many') {
|
|
113
|
+
// For one-to-one and many-to-one, keep rows even without matches
|
|
114
|
+
joinedResult.push(leftRow);
|
|
115
|
+
}
|
|
116
|
+
continue;
|
|
117
|
+
}
|
|
118
|
+
// Merge rows based on relationship type and field ownership
|
|
119
|
+
for (const rightRow of rightRows) {
|
|
120
|
+
const mergedRow = {};
|
|
121
|
+
// For each field in the compiled consumer, get it from its owner
|
|
122
|
+
for (const column of consumerColumns) {
|
|
123
|
+
const fieldName = column.consumerAlias;
|
|
124
|
+
// Get the data from the owner producer
|
|
125
|
+
if (column.owner === condition.leftProducer) {
|
|
126
|
+
mergedRow[fieldName] = leftRow[fieldName];
|
|
127
|
+
}
|
|
128
|
+
else if (column.owner === condition.rightProducer) {
|
|
129
|
+
mergedRow[fieldName] = rightRow[fieldName];
|
|
130
|
+
}
|
|
131
|
+
// If neither has the field, it will be undefined
|
|
132
|
+
}
|
|
133
|
+
joinedResult.push(mergedRow);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
result = joinedResult;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
return result;
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
const JoinEngine = new JoinEngineClass();
|
|
144
|
+
exports.default = JoinEngine;
|
|
@@ -9,6 +9,7 @@ const TypeCaster_1 = __importDefault(require("./TypeCaster"));
|
|
|
9
9
|
class TransformationEngineClass {
|
|
10
10
|
constructor() {
|
|
11
11
|
this.apply = (consumer, data) => {
|
|
12
|
+
var _a;
|
|
12
13
|
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
13
14
|
Affirm_1.default.hasValue(data, 'Invalid data');
|
|
14
15
|
const fieldsToTransform = consumer.fields.filter(field => Algo_1.default.hasVal(field.transform));
|
|
@@ -18,18 +19,19 @@ class TransformationEngineClass {
|
|
|
18
19
|
for (const field of fieldsToTransform) {
|
|
19
20
|
if (!field.transform)
|
|
20
21
|
continue;
|
|
21
|
-
const
|
|
22
|
+
const fieldKey = (_a = field.alias) !== null && _a !== void 0 ? _a : field.key;
|
|
23
|
+
const value = record[fieldKey];
|
|
22
24
|
if (!Algo_1.default.hasVal(value) && Algo_1.default.hasVal(field.default))
|
|
23
|
-
record[
|
|
25
|
+
record[fieldKey] = field.default;
|
|
24
26
|
else if (!Algo_1.default.hasVal(value))
|
|
25
27
|
continue;
|
|
26
28
|
try {
|
|
27
|
-
record[
|
|
29
|
+
record[fieldKey] = this.applyTransformations(value, field.transform, field);
|
|
28
30
|
}
|
|
29
31
|
catch (error) {
|
|
30
32
|
switch (field.onError) {
|
|
31
33
|
case 'set_default':
|
|
32
|
-
record[
|
|
34
|
+
record[fieldKey] = field.default;
|
|
33
35
|
break;
|
|
34
36
|
case 'skip':
|
|
35
37
|
break;
|
|
@@ -1,10 +1,16 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
2
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
3
7
|
class TypeCasterClass {
|
|
4
8
|
/**
|
|
5
9
|
* Casts the value to the requested type (only if needed)
|
|
6
10
|
*/
|
|
7
11
|
cast(value, type) {
|
|
12
|
+
if (!Algo_1.default.hasVal(value))
|
|
13
|
+
return value;
|
|
8
14
|
switch (type) {
|
|
9
15
|
case 'boolean': {
|
|
10
16
|
if (typeof value === 'boolean')
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@forzalabs/remora",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.26",
|
|
4
4
|
"description": "A powerful CLI tool for seamless data translation.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"private": false,
|
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
"deploy": "npx tsx ./src/index.ts deploy",
|
|
18
18
|
"debug": "npx tsx ./src/index.ts debug",
|
|
19
19
|
"automap": "npx tsx ./src/index.ts automap",
|
|
20
|
+
"create-producer": "npx tsx ./src/index.ts create-producer",
|
|
20
21
|
"copy-static-file": "npx tsx ./scripts/CopyStaticFile.js",
|
|
21
22
|
"build": "npm i && npm run sync && tsc --outDir .build && npm run copy-static-file",
|
|
22
23
|
"upload": "npm run build && cd .build && npm publish --access=public"
|