@forzalabs/remora 0.2.5 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +10 -2
- package/actions/debug.js +1 -0
- package/actions/deploy.js +1 -0
- package/actions/run.js +17 -13
- package/actions/sample.js +1 -1
- package/core/Algo.js +8 -4
- package/definitions/ExecutorDefinitions.js +2 -0
- package/definitions/json_schemas/consumer-schema.json +1 -1
- package/definitions/json_schemas/producer-schema.json +1 -1
- package/definitions/temp.js +2 -0
- package/drivers/DeltaShareDriver.js +4 -0
- package/drivers/DriverFactory.js +10 -10
- package/drivers/DriverHelper.js +33 -10
- package/drivers/HttpApiDriver.js +4 -0
- package/drivers/LocalDriver.js +72 -5
- package/drivers/RedshiftDriver.js +4 -0
- package/drivers/S3Driver.js +36 -52
- package/drivers/files/LocalDestinationDriver.js +200 -0
- package/drivers/files/LocalSourceDriver.js +394 -0
- package/drivers/s3/S3DestinationDriver.js +159 -0
- package/drivers/s3/S3SourceDriver.js +455 -0
- package/engines/ai/LLM.js +0 -11
- package/engines/consumer/ConsumerEngine.js +0 -77
- package/engines/consumer/ConsumerManager.js +61 -36
- package/engines/consumer/ConsumerOnFinishManager.js +14 -0
- package/engines/consumer/PostProcessor.js +1 -7
- package/engines/dataset/Dataset.js +0 -61
- package/engines/dataset/DatasetManager.js +16 -76
- package/engines/dataset/DatasetRecord.js +4 -3
- package/engines/deployment/DeploymentPlanner.js +0 -7
- package/engines/execution/ExecutionPlanner.js +2 -2
- package/engines/execution/RequestExecutor.js +4 -45
- package/engines/file/FileExporter.js +7 -32
- package/engines/parsing/CSVParser.js +27 -26
- package/engines/parsing/LineParser.js +52 -0
- package/engines/parsing/XMLParser.js +1 -1
- package/engines/producer/ProducerEngine.js +0 -45
- package/engines/scheduler/CronScheduler.js +12 -4
- package/engines/scheduler/QueueManager.js +11 -4
- package/engines/sql/SQLCompiler.js +4 -4
- package/engines/transform/JoinEngine.js +3 -3
- package/engines/transform/TransformationEngine.js +3 -86
- package/engines/usage/UsageManager.js +8 -6
- package/engines/validation/Validator.js +12 -18
- package/executors/ConsumerExecutor.js +152 -0
- package/executors/Executor.js +168 -0
- package/executors/ExecutorOrchestrator.js +315 -0
- package/executors/ExecutorPerformance.js +17 -0
- package/executors/ExecutorProgress.js +52 -0
- package/executors/OutputExecutor.js +118 -0
- package/executors/ProducerExecutor.js +108 -0
- package/package.json +3 -3
- package/workers/ExecutorWorker.js +48 -0
|
@@ -1,28 +1,11 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
-
});
|
|
10
|
-
};
|
|
11
2
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
4
|
};
|
|
14
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
6
|
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
16
|
-
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
17
|
-
const DriverFactory_1 = __importDefault(require("../../drivers/DriverFactory"));
|
|
18
|
-
const Helper_1 = __importDefault(require("../../helper/Helper"));
|
|
19
|
-
const DeploymentPlanner_1 = __importDefault(require("../deployment/DeploymentPlanner"));
|
|
20
7
|
const Environment_1 = __importDefault(require("../Environment"));
|
|
21
|
-
const ExecutionEnvironment_1 = __importDefault(require("../execution/ExecutionEnvironment"));
|
|
22
8
|
const ProducerManager_1 = __importDefault(require("../producer/ProducerManager"));
|
|
23
|
-
const SQLCompiler_1 = __importDefault(require("../sql/SQLCompiler"));
|
|
24
|
-
const SQLUtils_1 = __importDefault(require("../sql/SQLUtils"));
|
|
25
|
-
const UsageManager_1 = __importDefault(require("../usage/UsageManager"));
|
|
26
9
|
const ConsumerManager_1 = __importDefault(require("./ConsumerManager"));
|
|
27
10
|
class ConsumerEngineClass {
|
|
28
11
|
constructor() {
|
|
@@ -70,10 +53,6 @@ class ConsumerEngineClass {
|
|
|
70
53
|
selectedColumns.push(col);
|
|
71
54
|
});
|
|
72
55
|
}
|
|
73
|
-
else if (field.grouping) {
|
|
74
|
-
// This field should be ignored since it is only created when building the output for supported formats (json)
|
|
75
|
-
continue;
|
|
76
|
-
}
|
|
77
56
|
else {
|
|
78
57
|
const col = ConsumerManager_1.default.searchFieldInColumns(field, availableColumns, consumer);
|
|
79
58
|
(0, Affirm_1.default)(col, `Consumer "${consumer.name}" misconfiguration: the requested field "${field.key}" is not found in any of the specified producers ("${consumer.producers.map(x => x.name).join(', ')}")`);
|
|
@@ -86,62 +65,6 @@ class ConsumerEngineClass {
|
|
|
86
65
|
(0, Affirm_1.default)(columnsWithNoAlias.length === 0, `Consumer "${consumer.name}" compilation error: some selected fields don't have a correct alias or key (${columnsWithNoAlias.map(x => x.nameInProducer).join(', ')})`);
|
|
87
66
|
return selectedColumns;
|
|
88
67
|
};
|
|
89
|
-
this.deploy = (consumer) => __awaiter(this, void 0, void 0, function* () {
|
|
90
|
-
(0, Affirm_1.default)(consumer, `Invalid consumer`);
|
|
91
|
-
const firstProd = Environment_1.default.getFirstProducer(consumer.producers[0].name);
|
|
92
|
-
(0, Affirm_1.default)(firstProd, `Missing producer in consumer "${consumer.name}"`);
|
|
93
|
-
// For now we only support connecting producers of the same engine type to a consumer, so we give an error if we detect different ones
|
|
94
|
-
const allSources = consumer.producers.map(x => Environment_1.default.getSource(Environment_1.default.getProducer(x.name).source));
|
|
95
|
-
const uniqEngines = Algo_1.default.uniqBy(allSources, 'engine');
|
|
96
|
-
(0, Affirm_1.default)(uniqEngines.length === 1, `Sources with different engines were used in a single consumer (${uniqEngines.join(', ')})`);
|
|
97
|
-
// For now we also only support consumers that have producers ALL having the same exact source
|
|
98
|
-
const uniqNames = Algo_1.default.uniqBy(allSources, 'name');
|
|
99
|
-
(0, Affirm_1.default)(uniqNames.length === 1, `Producers with different sources were used in a single consumer (${uniqNames.join(', ')})`);
|
|
100
|
-
const source = Environment_1.default.getSource(firstProd.source);
|
|
101
|
-
const driver = yield DriverFactory_1.default.instantiateSource(source);
|
|
102
|
-
const plan = DeploymentPlanner_1.default.planConsumer(consumer);
|
|
103
|
-
for (const planStep of plan) {
|
|
104
|
-
switch (planStep.type) {
|
|
105
|
-
case 'create-materialized-view': {
|
|
106
|
-
const sql = SQLCompiler_1.default.compileConsumer(consumer);
|
|
107
|
-
(0, Affirm_1.default)(sql, `Invalid SQL from deployment compilation for consumer "${consumer.name}"`);
|
|
108
|
-
const internalSchema = Environment_1.default.get('REMORA_SCHEMA');
|
|
109
|
-
(0, Affirm_1.default)(internalSchema, `Missing "REMORA_SCHEMA" on project settings (needed due to "${consumer.name}" wanting to create a view)`);
|
|
110
|
-
// TODO When I want to update a materialize view there is no way except killing it and recreating it. The problem is that: 1) it is not said that it can be deleted since that materialize view could have some dependencies 2) we should find a way to update it without it going completely offline.
|
|
111
|
-
const mvSQL = `
|
|
112
|
-
DROP MATERIALIZED VIEW IF EXISTS "${internalSchema}"."${SQLUtils_1.default.acceleratedViewName(consumer.name)}";
|
|
113
|
-
CREATE MATERIALIZED VIEW "${internalSchema}"."${SQLUtils_1.default.acceleratedViewName(consumer.name)}" AS ${sql}`;
|
|
114
|
-
yield driver.execute(mvSQL);
|
|
115
|
-
break;
|
|
116
|
-
}
|
|
117
|
-
case 'create-view': {
|
|
118
|
-
const sql = SQLCompiler_1.default.compileConsumer(consumer);
|
|
119
|
-
(0, Affirm_1.default)(sql, `Invalid SQL from deployment compilation for consumer "${consumer.name}"`);
|
|
120
|
-
const internalSchema = Environment_1.default.get('REMORA_SCHEMA');
|
|
121
|
-
(0, Affirm_1.default)(internalSchema, `Missing "REMORA_SCHEMA" on project settings (needed due to "${consumer.name}" wanting to create a view)`);
|
|
122
|
-
const vSQL = `CREATE OR REPLACE VIEW "${internalSchema}"."${SQLUtils_1.default.sanitizeName(consumer.name)}" AS ${sql}`;
|
|
123
|
-
yield driver.execute(vSQL);
|
|
124
|
-
break;
|
|
125
|
-
}
|
|
126
|
-
default: throw new Error(`Invalid execution consumer plan step type "${planStep.type}"`);
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
});
|
|
130
|
-
this.execute = (consumer, options, user, details) => __awaiter(this, void 0, void 0, function* () {
|
|
131
|
-
(0, Affirm_1.default)(consumer, `Invalid consumer`);
|
|
132
|
-
(0, Affirm_1.default)(options, `Invalid execute consume options`);
|
|
133
|
-
const { usageId } = UsageManager_1.default.startUsage(consumer, user, details);
|
|
134
|
-
try {
|
|
135
|
-
const execution = new ExecutionEnvironment_1.default(consumer, usageId);
|
|
136
|
-
const result = yield execution.run(options);
|
|
137
|
-
UsageManager_1.default.endUsage(usageId, result._stats.size);
|
|
138
|
-
return result;
|
|
139
|
-
}
|
|
140
|
-
catch (error) {
|
|
141
|
-
UsageManager_1.default.failUsage(usageId, Helper_1.default.asError(error).message);
|
|
142
|
-
throw error;
|
|
143
|
-
}
|
|
144
|
-
});
|
|
145
68
|
this.getOutputShape = (consumer) => {
|
|
146
69
|
(0, Affirm_1.default)(consumer, `Invalid consumer`);
|
|
147
70
|
const compiled = this.compile(consumer);
|
|
@@ -6,21 +6,9 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
6
6
|
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
7
7
|
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
8
8
|
const Environment_1 = __importDefault(require("../Environment"));
|
|
9
|
+
const ProducerManager_1 = __importDefault(require("../producer/ProducerManager"));
|
|
9
10
|
class ConsumerManagerClass {
|
|
10
11
|
constructor() {
|
|
11
|
-
this.getConsumerFlatFields = (consumer) => {
|
|
12
|
-
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
13
|
-
return this.getFlatFields(consumer.fields);
|
|
14
|
-
};
|
|
15
|
-
this.getFlatFields = (list) => {
|
|
16
|
-
let result = [...list];
|
|
17
|
-
for (let i = 0; i < list.length; i++) {
|
|
18
|
-
const field = list[i];
|
|
19
|
-
if (field.grouping && field.grouping.subFields && field.grouping.subFields.length > 0)
|
|
20
|
-
result = [...result, ...this.getFlatFields(field.grouping.subFields)];
|
|
21
|
-
}
|
|
22
|
-
return result;
|
|
23
|
-
};
|
|
24
12
|
/**
|
|
25
13
|
* Returns the full list of fields that are used by a consumer, while keeping the nested structure of fields.
|
|
26
14
|
* If there are *, then replace them with the actual fields found in the underlying producer/consumer
|
|
@@ -28,17 +16,16 @@ class ConsumerManagerClass {
|
|
|
28
16
|
this.getExpandedFields = (consumer) => {
|
|
29
17
|
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
30
18
|
const availableColumns = this.getAvailableColumns(consumer);
|
|
31
|
-
const convertedFields =
|
|
19
|
+
const convertedFields = consumer.fields.map(x => {
|
|
20
|
+
var _a;
|
|
21
|
+
return ({
|
|
22
|
+
cField: x,
|
|
23
|
+
finalKey: (_a = x.alias) !== null && _a !== void 0 ? _a : x.key
|
|
24
|
+
});
|
|
25
|
+
});
|
|
32
26
|
const expandedFields = convertedFields.flatMap(x => this.expandField(consumer, x, availableColumns));
|
|
33
27
|
return expandedFields;
|
|
34
28
|
};
|
|
35
|
-
this.convertFields = (fieldsToConvert) => {
|
|
36
|
-
(0, Affirm_1.default)(fieldsToConvert, 'Invalid fields');
|
|
37
|
-
const convertedFields = fieldsToConvert.map(x => ({
|
|
38
|
-
cField: x
|
|
39
|
-
}));
|
|
40
|
-
return convertedFields;
|
|
41
|
-
};
|
|
42
29
|
/**
|
|
43
30
|
* Return all the available columns (dimensions and measures) to the consumer given its producers
|
|
44
31
|
*/
|
|
@@ -92,6 +79,7 @@ class ConsumerManagerClass {
|
|
|
92
79
|
alias: x.nameInProducer,
|
|
93
80
|
from: x.owner
|
|
94
81
|
},
|
|
82
|
+
finalKey: x.nameInProducer,
|
|
95
83
|
dimension: x.dimension,
|
|
96
84
|
measure: x.measure
|
|
97
85
|
})));
|
|
@@ -105,27 +93,13 @@ class ConsumerManagerClass {
|
|
|
105
93
|
alias: col.nameInProducer,
|
|
106
94
|
from: col.owner
|
|
107
95
|
},
|
|
96
|
+
finalKey: col.nameInProducer,
|
|
108
97
|
dimension: col.dimension,
|
|
109
98
|
measure: col.measure
|
|
110
99
|
});
|
|
111
100
|
});
|
|
112
101
|
}
|
|
113
102
|
}
|
|
114
|
-
else if (field.cField.grouping) {
|
|
115
|
-
expandedFields.push({
|
|
116
|
-
cField: {
|
|
117
|
-
key: field.cField.key,
|
|
118
|
-
alias: field.cField.alias,
|
|
119
|
-
from: field.cField.from,
|
|
120
|
-
grouping: {
|
|
121
|
-
groupingKey: field.cField.grouping.groupingKey,
|
|
122
|
-
subFields: field.cField.grouping.subFields.flatMap(x => this.expandField(consumer, { cField: x }, availableColumns)).map(x => x.cField)
|
|
123
|
-
}
|
|
124
|
-
},
|
|
125
|
-
dimension: field.dimension,
|
|
126
|
-
measure: field.measure
|
|
127
|
-
});
|
|
128
|
-
}
|
|
129
103
|
else {
|
|
130
104
|
const col = ConsumerManager.searchFieldInColumns(field.cField, availableColumns, consumer);
|
|
131
105
|
(0, Affirm_1.default)(col, `Consumer "${consumer.name}" misconfiguration: the requested field "${field.cField.key}" is not found in any of the specified producers ("${consumer.producers.map(x => x.name).join(', ')}")`);
|
|
@@ -187,6 +161,57 @@ class ConsumerManagerClass {
|
|
|
187
161
|
(0, Affirm_1.default)(uniqNames.length === 1, `Producers with different sources were used in the consumer "${consumer.name}" (${uniqNames.join(', ')})`);
|
|
188
162
|
return [sources[0], producers[0]];
|
|
189
163
|
};
|
|
164
|
+
this.compile = (consumer) => {
|
|
165
|
+
var _a, _b;
|
|
166
|
+
(0, Affirm_1.default)(consumer, `Invalid consumer`);
|
|
167
|
+
const availableColumns = this.getAvailableColumns(consumer);
|
|
168
|
+
const selectedColumns = [];
|
|
169
|
+
const flat = consumer.fields;
|
|
170
|
+
for (let i = 0; i < flat.length; i++) {
|
|
171
|
+
const field = flat[i];
|
|
172
|
+
// TODO: replace with the new funcitons in the consumermanager to reduce diplicate code
|
|
173
|
+
if (field.key === '*') {
|
|
174
|
+
const from = (_a = field.from) !== null && _a !== void 0 ? _a : (consumer.producers.length === 1 ? consumer.producers[0].name : null);
|
|
175
|
+
availableColumns.filter(x => x.owner === from).forEach(col => {
|
|
176
|
+
col.consumerKey = col.nameInProducer;
|
|
177
|
+
col.consumerAlias = col.nameInProducer;
|
|
178
|
+
selectedColumns.push(col);
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
else {
|
|
182
|
+
const col = ConsumerManager.searchFieldInColumns(field, availableColumns, consumer);
|
|
183
|
+
(0, Affirm_1.default)(col, `Consumer "${consumer.name}" misconfiguration: the requested field "${field.key}" is not found in any of the specified producers ("${consumer.producers.map(x => x.name).join(', ')}")`);
|
|
184
|
+
col.consumerKey = field.key;
|
|
185
|
+
col.consumerAlias = (_b = field.alias) !== null && _b !== void 0 ? _b : field.key;
|
|
186
|
+
selectedColumns.push(col);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
const columnsWithNoAlias = selectedColumns.filter(x => !x.consumerAlias || !x.consumerKey);
|
|
190
|
+
(0, Affirm_1.default)(columnsWithNoAlias.length === 0, `Consumer "${consumer.name}" compilation error: some selected fields don't have a correct alias or key (${columnsWithNoAlias.map(x => x.nameInProducer).join(', ')})`);
|
|
191
|
+
return selectedColumns;
|
|
192
|
+
};
|
|
193
|
+
this.getOutputShape = (consumer) => {
|
|
194
|
+
(0, Affirm_1.default)(consumer, `Invalid consumer`);
|
|
195
|
+
const compiled = this.compile(consumer);
|
|
196
|
+
const outDimensions = compiled.map(x => {
|
|
197
|
+
var _a, _b, _c, _d, _e, _f, _g;
|
|
198
|
+
return ({
|
|
199
|
+
name: (_a = x.consumerAlias) !== null && _a !== void 0 ? _a : x.consumerKey,
|
|
200
|
+
type: (_b = x.dimension) === null || _b === void 0 ? void 0 : _b.type,
|
|
201
|
+
classification: (_c = x.dimension) === null || _c === void 0 ? void 0 : _c.classification,
|
|
202
|
+
description: (_e = (_d = x.dimension) === null || _d === void 0 ? void 0 : _d.description) !== null && _e !== void 0 ? _e : (_f = x.measure) === null || _f === void 0 ? void 0 : _f.description,
|
|
203
|
+
mask: ProducerManager_1.default.getMask(x.dimension),
|
|
204
|
+
pk: (_g = x.dimension) === null || _g === void 0 ? void 0 : _g.pk
|
|
205
|
+
});
|
|
206
|
+
});
|
|
207
|
+
return {
|
|
208
|
+
_version: consumer._version,
|
|
209
|
+
name: consumer.name,
|
|
210
|
+
description: consumer.description,
|
|
211
|
+
metadata: consumer.metadata,
|
|
212
|
+
dimensions: outDimensions
|
|
213
|
+
};
|
|
214
|
+
};
|
|
190
215
|
}
|
|
191
216
|
}
|
|
192
217
|
const ConsumerManager = new ConsumerManagerClass();
|
|
@@ -17,6 +17,20 @@ const Environment_1 = __importDefault(require("../Environment"));
|
|
|
17
17
|
const DriverFactory_1 = __importDefault(require("../../drivers/DriverFactory"));
|
|
18
18
|
class ConsumerOnFinishManagerClass {
|
|
19
19
|
constructor() {
|
|
20
|
+
this.onConsumerSuccess = (consumer, executionId) => __awaiter(this, void 0, void 0, function* () {
|
|
21
|
+
void executionId;
|
|
22
|
+
for (const output of consumer.outputs) {
|
|
23
|
+
if (output.onSuccess)
|
|
24
|
+
yield this.performOnSuccessActions(consumer, output);
|
|
25
|
+
}
|
|
26
|
+
});
|
|
27
|
+
this.onConsumerError = (consumer, executionId) => __awaiter(this, void 0, void 0, function* () {
|
|
28
|
+
void executionId;
|
|
29
|
+
for (const output of consumer.outputs) {
|
|
30
|
+
if (output.onError)
|
|
31
|
+
yield this.performOnSuccessActions(consumer, output);
|
|
32
|
+
}
|
|
33
|
+
});
|
|
20
34
|
this.performOnSuccessActions = (consumer, output) => __awaiter(this, void 0, void 0, function* () {
|
|
21
35
|
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
22
36
|
(0, Affirm_1.default)(output, 'Invalid output');
|
|
@@ -46,9 +46,8 @@ class PostProcessorClass {
|
|
|
46
46
|
}
|
|
47
47
|
record.sortDimensions();
|
|
48
48
|
}
|
|
49
|
-
if (!updatedDimensions)
|
|
49
|
+
if (!updatedDimensions)
|
|
50
50
|
updatedDimensions = record._dimensions;
|
|
51
|
-
}
|
|
52
51
|
// Finally apply the rules and changes of the consumer fields to the record
|
|
53
52
|
for (const field of fields) {
|
|
54
53
|
const { key, alias } = field.cField;
|
|
@@ -71,11 +70,6 @@ class PostProcessorClass {
|
|
|
71
70
|
fakeRecord.wholeUpdateDimension(update);
|
|
72
71
|
updatedDimensions = fakeRecord._dimensions;
|
|
73
72
|
}
|
|
74
|
-
// Validate that dimensions have sequential indexes with no gaps
|
|
75
|
-
const indexes = updatedDimensions.map(d => d.index).sort((a, b) => a - b);
|
|
76
|
-
for (let i = 0; i < indexes.length; i++) {
|
|
77
|
-
(0, Affirm_1.default)(indexes[i] === i, `Missing or duplicate dimension index: expected index ${i} but found ${indexes[i]}. See dimension updates applied on consumer "${consumer.name}".`);
|
|
78
|
-
}
|
|
79
73
|
dataset.setDimensions(updatedDimensions);
|
|
80
74
|
return dataset;
|
|
81
75
|
});
|
|
@@ -27,9 +27,7 @@ const Constants_1 = __importDefault(require("../../Constants"));
|
|
|
27
27
|
const DatasetManager_1 = __importDefault(require("./DatasetManager"));
|
|
28
28
|
const DatasetRecord_1 = __importDefault(require("./DatasetRecord"));
|
|
29
29
|
const DatasetRecordPool_1 = __importDefault(require("./DatasetRecordPool"));
|
|
30
|
-
const xlsx_1 = __importDefault(require("xlsx"));
|
|
31
30
|
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
32
|
-
const XMLParser_1 = __importDefault(require("../parsing/XMLParser"));
|
|
33
31
|
const DriverFactory_1 = __importDefault(require("../../drivers/DriverFactory"));
|
|
34
32
|
const Helper_1 = __importDefault(require("../../helper/Helper"));
|
|
35
33
|
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
@@ -665,65 +663,6 @@ class Dataset {
|
|
|
665
663
|
this._finishOperation('read-lines');
|
|
666
664
|
return results;
|
|
667
665
|
});
|
|
668
|
-
/**
|
|
669
|
-
* - computes dimensions + delimiter
|
|
670
|
-
* - cleans data (removes empty rows) and headers
|
|
671
|
-
* - parses json, xml, xls
|
|
672
|
-
*/
|
|
673
|
-
this.prepare = (producer) => __awaiter(this, void 0, void 0, function* () {
|
|
674
|
-
this._startOperation('prepare');
|
|
675
|
-
const dimsRes = yield DatasetManager_1.default.buildDimensions(this, producer);
|
|
676
|
-
yield this.prepareWithDimensions(dimsRes);
|
|
677
|
-
this._finishOperation('prepare');
|
|
678
|
-
return this;
|
|
679
|
-
});
|
|
680
|
-
this.prepareWithDimensions = (dimResult) => __awaiter(this, void 0, void 0, function* () {
|
|
681
|
-
this._startOperation('prepare-with-dimensions');
|
|
682
|
-
const { delimiter, dimensions } = dimResult;
|
|
683
|
-
this._delimiter = delimiter;
|
|
684
|
-
this._dimensions = dimensions;
|
|
685
|
-
switch (this._file.fileType) {
|
|
686
|
-
case 'TXT':
|
|
687
|
-
case 'CSV':
|
|
688
|
-
case 'JSON':
|
|
689
|
-
case 'JSONL':
|
|
690
|
-
break;
|
|
691
|
-
case 'XLS':
|
|
692
|
-
case 'XLSX': {
|
|
693
|
-
const excel = xlsx_1.default.readFile(this._path);
|
|
694
|
-
let targetSheetName = this._file.sheetName;
|
|
695
|
-
if (!targetSheetName) {
|
|
696
|
-
(0, Affirm_1.default)(excel.SheetNames.length > 0, 'The Excel file has no sheets.');
|
|
697
|
-
targetSheetName = excel.SheetNames[0];
|
|
698
|
-
}
|
|
699
|
-
else {
|
|
700
|
-
(0, Affirm_1.default)(excel.SheetNames.includes(targetSheetName), `The sheet "${targetSheetName}" doesn't exist in the excel (available: ${excel.SheetNames.join(', ')})`);
|
|
701
|
-
}
|
|
702
|
-
const sheet = excel.Sheets[targetSheetName];
|
|
703
|
-
const csv = xlsx_1.default.utils.sheet_to_csv(sheet);
|
|
704
|
-
const lines = csv.split('\n');
|
|
705
|
-
this.clear();
|
|
706
|
-
const records = lines.map(x => new DatasetRecord_1.default(x, this._dimensions, this._delimiter));
|
|
707
|
-
yield this.append(records);
|
|
708
|
-
break;
|
|
709
|
-
}
|
|
710
|
-
case 'XML': {
|
|
711
|
-
const fileContent = fs_1.default.readFileSync(this._path, 'utf-8');
|
|
712
|
-
const jsonData = XMLParser_1.default.xmlToJson(fileContent);
|
|
713
|
-
// Convert JSON data to string lines. This might need adjustment based on XML structure.
|
|
714
|
-
// Assuming jsonData is an array of objects, where each object is a record.
|
|
715
|
-
const lines = Array.isArray(jsonData)
|
|
716
|
-
? jsonData.map(item => JSON.stringify(item))
|
|
717
|
-
: [JSON.stringify(jsonData)];
|
|
718
|
-
this.clear();
|
|
719
|
-
const records = lines.map(x => new DatasetRecord_1.default(x, this._dimensions, this._delimiter));
|
|
720
|
-
yield this.append(records);
|
|
721
|
-
break;
|
|
722
|
-
}
|
|
723
|
-
}
|
|
724
|
-
this._finishOperation('prepare-with-dimensions');
|
|
725
|
-
return this;
|
|
726
|
-
});
|
|
727
666
|
this.getDimensions = () => this._dimensions;
|
|
728
667
|
this.setDimensions = (dimensions) => {
|
|
729
668
|
this._dimensions = dimensions;
|
|
@@ -13,16 +13,11 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
13
13
|
};
|
|
14
14
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
15
|
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
16
|
-
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
17
|
-
const ConsumerManager_1 = __importDefault(require("../consumer/ConsumerManager"));
|
|
18
|
-
const Environment_1 = __importDefault(require("../Environment"));
|
|
19
|
-
const FileCompiler_1 = __importDefault(require("../file/FileCompiler"));
|
|
20
16
|
const ParseManager_1 = __importDefault(require("../parsing/ParseManager"));
|
|
21
17
|
const Dataset_1 = __importDefault(require("./Dataset"));
|
|
22
|
-
const promises_1 = require("stream/promises");
|
|
23
|
-
const fs_1 = __importDefault(require("fs"));
|
|
24
18
|
const DeveloperEngine_1 = __importDefault(require("../ai/DeveloperEngine"));
|
|
25
19
|
const Constants_1 = __importDefault(require("../../Constants"));
|
|
20
|
+
const ProducerManager_1 = __importDefault(require("../producer/ProducerManager"));
|
|
26
21
|
class DatasetManagerClass {
|
|
27
22
|
constructor() {
|
|
28
23
|
/**
|
|
@@ -55,13 +50,6 @@ class DatasetManagerClass {
|
|
|
55
50
|
});
|
|
56
51
|
return dataset;
|
|
57
52
|
};
|
|
58
|
-
this.buildDimensions = (dataset_1, producer_1, ...args_1) => __awaiter(this, [dataset_1, producer_1, ...args_1], void 0, function* (dataset, producer, discover = false) {
|
|
59
|
-
(0, Affirm_1.default)(dataset, `Invalid dataset`);
|
|
60
|
-
(0, Affirm_1.default)(producer, `Invalid producer`);
|
|
61
|
-
const firstLine = dataset.getFirstLine();
|
|
62
|
-
Affirm_1.default.hasValue(firstLine, `The first line of the dataset was not set.`);
|
|
63
|
-
return this.buildDimensionsFromFirstLine(firstLine, dataset.getFile(), producer, discover);
|
|
64
|
-
});
|
|
65
53
|
this.buildDimensionsFromFirstLine = (firstLine_1, dsFile_1, producer_1, ...args_1) => __awaiter(this, [firstLine_1, dsFile_1, producer_1, ...args_1], void 0, function* (firstLine, dsFile, producer, discover = false) {
|
|
66
54
|
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o;
|
|
67
55
|
Affirm_1.default.hasValue(firstLine, `Invalid first line`);
|
|
@@ -69,8 +57,6 @@ class DatasetManagerClass {
|
|
|
69
57
|
(0, Affirm_1.default)(producer, `Invalid producer`);
|
|
70
58
|
const file = dsFile;
|
|
71
59
|
switch (file.fileType) {
|
|
72
|
-
case 'XLS':
|
|
73
|
-
case 'XLSX':
|
|
74
60
|
case 'CSV': {
|
|
75
61
|
const delimiterChar = (_a = file.delimiter) !== null && _a !== void 0 ? _a : ',';
|
|
76
62
|
const headerLine = firstLine;
|
|
@@ -88,12 +74,17 @@ class DatasetManagerClass {
|
|
|
88
74
|
}
|
|
89
75
|
case 'PARQUET':
|
|
90
76
|
case 'JSONL':
|
|
91
|
-
case 'XML':
|
|
92
77
|
case 'JSON': {
|
|
93
|
-
const
|
|
94
|
-
const columns = FileCompiler_1.default.compileProducer(producer, source);
|
|
78
|
+
const columns = ProducerManager_1.default.getColumns(producer);
|
|
95
79
|
const firstObject = JSON.parse(firstLine);
|
|
96
80
|
const keys = Object.keys(firstObject);
|
|
81
|
+
// const columnsWithDot = columns.filter(x => x.aliasInProducer.includes('.'))
|
|
82
|
+
// if (columnsWithDot.length > 0) {
|
|
83
|
+
// console.log(columns, keys, 'PAPAPAPP')
|
|
84
|
+
// for (const colWithDot of columnsWithDot) {
|
|
85
|
+
// console.log(colWithDot)
|
|
86
|
+
// }
|
|
87
|
+
// }
|
|
97
88
|
// If includeSourceFilename is enabled, the driver has added $source_filename column
|
|
98
89
|
// We need to add it to the keys list so dimensions can reference it
|
|
99
90
|
const includeSourceFilename = file.includeSourceFilename === true;
|
|
@@ -146,8 +137,7 @@ class DatasetManagerClass {
|
|
|
146
137
|
if (!file.hasHeaderRow) {
|
|
147
138
|
// If the file is a TXT and there isn't an header row, then I add a fake one that maps directly to the producer
|
|
148
139
|
const delimiterChar = (_k = file.delimiter) !== null && _k !== void 0 ? _k : ',';
|
|
149
|
-
const
|
|
150
|
-
const columns = FileCompiler_1.default.compileProducer(producer, source);
|
|
140
|
+
const columns = ProducerManager_1.default.getColumns(producer);
|
|
151
141
|
const includeSourceFilename = file.includeSourceFilename === true;
|
|
152
142
|
if (discover) {
|
|
153
143
|
// Since I don't have an header, and I'm discovering, I just create placeholder dimensions based on the same number of columns of the txt
|
|
@@ -207,64 +197,14 @@ class DatasetManagerClass {
|
|
|
207
197
|
};
|
|
208
198
|
}
|
|
209
199
|
}
|
|
200
|
+
case 'XLS':
|
|
201
|
+
break;
|
|
202
|
+
case 'XLSX':
|
|
203
|
+
break;
|
|
204
|
+
case 'XML':
|
|
205
|
+
break;
|
|
210
206
|
}
|
|
211
207
|
});
|
|
212
|
-
this.computeDimensionsUpdates = (dataset, consumer) => {
|
|
213
|
-
var _a;
|
|
214
|
-
(0, Affirm_1.default)(dataset, 'Invalid dataset');
|
|
215
|
-
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
216
|
-
const fields = ConsumerManager_1.default.getExpandedFields(consumer);
|
|
217
|
-
const dimensions = dataset.getDimensions();
|
|
218
|
-
let updates = [];
|
|
219
|
-
// Add all the updates
|
|
220
|
-
for (let i = 0; i < fields.length; i++) {
|
|
221
|
-
const { cField } = fields[i];
|
|
222
|
-
const currentMatch = structuredClone(dimensions.find(x => x.name === cField.key));
|
|
223
|
-
if (!currentMatch && !cField.fixed)
|
|
224
|
-
throw new Error(`The requested field "${cField.key}" from the consumer is not present in the underlying dataset "${dataset.name}" (${dimensions.map(x => x.name).join(', ')})`);
|
|
225
|
-
updates.push({
|
|
226
|
-
currentDimension: currentMatch,
|
|
227
|
-
newName: (_a = cField.alias) !== null && _a !== void 0 ? _a : cField.key,
|
|
228
|
-
newHidden: cField.hidden,
|
|
229
|
-
newPosition: i,
|
|
230
|
-
toDelete: false
|
|
231
|
-
});
|
|
232
|
-
}
|
|
233
|
-
// Add all the updates to remove dimensions
|
|
234
|
-
for (const dim of dimensions) {
|
|
235
|
-
if (!updates.find(x => { var _a; return ((_a = x.currentDimension) === null || _a === void 0 ? void 0 : _a.name) === dim.name; }))
|
|
236
|
-
updates.push({ currentDimension: dim, toDelete: true });
|
|
237
|
-
}
|
|
238
|
-
// Now keep only the updates that actually change something
|
|
239
|
-
updates = updates.filter(x => x.toDelete
|
|
240
|
-
|| !x.currentDimension
|
|
241
|
-
|| (x.currentDimension && (x.currentDimension.name !== x.newName
|
|
242
|
-
|| (Algo_1.default.hasVal(x.newHidden) && x.newHidden !== x.currentDimension.hidden)
|
|
243
|
-
|| x.newPosition !== x.currentDimension.index)));
|
|
244
|
-
return updates;
|
|
245
|
-
};
|
|
246
|
-
/**
|
|
247
|
-
* Each worker threads writes to his own dataset file to avoid concurrency and data loss,
|
|
248
|
-
* at the end of their work, I merge their results to a single file
|
|
249
|
-
*/
|
|
250
|
-
this.mergeWorkersPaths = (threadPaths, dataset) => __awaiter(this, void 0, void 0, function* () {
|
|
251
|
-
dataset.clear();
|
|
252
|
-
const datasetPath = dataset.getPath();
|
|
253
|
-
for (let i = 0; i < threadPaths.length; i++) {
|
|
254
|
-
const path = threadPaths[i];
|
|
255
|
-
// If the thread skipped execution (maybe because no data needed to change), then the
|
|
256
|
-
// dataset file might not exist, in this case, just skip it
|
|
257
|
-
if (!fs_1.default.existsSync(path))
|
|
258
|
-
continue;
|
|
259
|
-
const readStream = fs_1.default.createReadStream(path);
|
|
260
|
-
// For the first file, create a new write stream
|
|
261
|
-
// For subsequent files, append to the existing file
|
|
262
|
-
const writeStream = fs_1.default.createWriteStream(datasetPath, { flags: i === 0 ? 'w' : 'a' });
|
|
263
|
-
yield (0, promises_1.pipeline)(readStream, writeStream);
|
|
264
|
-
fs_1.default.unlinkSync(path);
|
|
265
|
-
}
|
|
266
|
-
return dataset;
|
|
267
|
-
});
|
|
268
208
|
}
|
|
269
209
|
}
|
|
270
210
|
const DatasetManager = new DatasetManagerClass();
|
|
@@ -11,7 +11,8 @@ class DatasetRecord {
|
|
|
11
11
|
this.parse = (row, delimiter, dimensions) => {
|
|
12
12
|
if (!this.isEmpty() && dimensions.length > 0) {
|
|
13
13
|
const parts = CSVParser_1.default.parseRow(row, delimiter);
|
|
14
|
-
for (
|
|
14
|
+
for (let i = 0; i < dimensions.length; i++) {
|
|
15
|
+
const dim = dimensions[i];
|
|
15
16
|
// Use dim.index to get the correct column from the file, not the loop index
|
|
16
17
|
this._value[dim.name] = TypeCaster_1.default.cast(parts[dim.index], dim.type, dim.format);
|
|
17
18
|
}
|
|
@@ -52,8 +53,8 @@ class DatasetRecord {
|
|
|
52
53
|
index: update.newPosition,
|
|
53
54
|
key: update.newName,
|
|
54
55
|
name: update.newName,
|
|
55
|
-
|
|
56
|
-
|
|
56
|
+
hidden: update.newHidden,
|
|
57
|
+
type: (_b = (_a = update.currentDimension) === null || _a === void 0 ? void 0 : _a.type) !== null && _b !== void 0 ? _b : 'string'
|
|
57
58
|
};
|
|
58
59
|
this._value[newDimension.name] = null;
|
|
59
60
|
this._dimensions = [...this._dimensions, newDimension];
|
|
@@ -14,13 +14,6 @@ class DeploymentPlannerClass {
|
|
|
14
14
|
const output = consumer.outputs[i];
|
|
15
15
|
switch (output.format) {
|
|
16
16
|
// csv, json, parquet outputs do not need to generate anything at deploy
|
|
17
|
-
case 'SQL': {
|
|
18
|
-
if (output.accelerated && !output.direct)
|
|
19
|
-
plan.push({ type: 'create-materialized-view', output: output });
|
|
20
|
-
else if (!output.direct)
|
|
21
|
-
plan.push({ type: 'create-view', output: output });
|
|
22
|
-
break;
|
|
23
|
-
}
|
|
24
17
|
case 'API': {
|
|
25
18
|
throw new Error(`Invalid consumer "${consumer.name}" format "${output.format}": not implemented yet.`);
|
|
26
19
|
}
|
|
@@ -7,7 +7,7 @@ const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
|
7
7
|
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
8
8
|
const ConsumerManager_1 = __importDefault(require("../consumer/ConsumerManager"));
|
|
9
9
|
const Environment_1 = __importDefault(require("../Environment"));
|
|
10
|
-
class
|
|
10
|
+
class ExecutionPlannerClass {
|
|
11
11
|
constructor() {
|
|
12
12
|
this.getEngineClass = (engine) => {
|
|
13
13
|
switch (engine) {
|
|
@@ -127,5 +127,5 @@ class ExecutionPlannerClas {
|
|
|
127
127
|
};
|
|
128
128
|
}
|
|
129
129
|
}
|
|
130
|
-
const ExecutionPlanner = new
|
|
130
|
+
const ExecutionPlanner = new ExecutionPlannerClass();
|
|
131
131
|
exports.default = ExecutionPlanner;
|
|
@@ -1,41 +1,11 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
-
});
|
|
10
|
-
};
|
|
11
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
-
};
|
|
14
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
-
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
16
3
|
class RequestExecutorClass {
|
|
17
4
|
constructor() {
|
|
18
|
-
|
|
19
|
-
* Applies the filters, limit, offset and order on the in-memory-data
|
|
20
|
-
*/
|
|
21
|
-
this.execute = (dataset, request) => __awaiter(this, void 0, void 0, function* () {
|
|
22
|
-
(0, Affirm_1.default)(dataset, 'Invalid data');
|
|
23
|
-
(0, Affirm_1.default)(request, 'Invalid request');
|
|
24
|
-
if (request.filters)
|
|
25
|
-
dataset = yield this.applyFilters(dataset, request.filters);
|
|
26
|
-
if (request.order)
|
|
27
|
-
dataset = yield this._applyOrdering(dataset, request.order);
|
|
28
|
-
return dataset;
|
|
29
|
-
});
|
|
30
|
-
this.applyFilters = (dataset_1, filters_1, ...args_1) => __awaiter(this, [dataset_1, filters_1, ...args_1], void 0, function* (dataset, filters, options = {}) {
|
|
31
|
-
return yield dataset.filter(record => {
|
|
32
|
-
return filters.every(filter => this._evaluateFilter(record, filter));
|
|
33
|
-
}, options);
|
|
34
|
-
});
|
|
35
|
-
this._evaluateFilter = (record, filter) => {
|
|
5
|
+
this.evaluateFilter = (record, filter) => {
|
|
36
6
|
const evaluate = (baseRecord, baseFilter) => {
|
|
37
7
|
const { member, operator, values } = baseFilter;
|
|
38
|
-
const value = baseRecord
|
|
8
|
+
const value = baseRecord[member];
|
|
39
9
|
const singleValue = values[0];
|
|
40
10
|
switch (operator) {
|
|
41
11
|
case 'equals':
|
|
@@ -85,23 +55,12 @@ class RequestExecutorClass {
|
|
|
85
55
|
const { and, or } = filter;
|
|
86
56
|
const baseResult = evaluate(record, filter);
|
|
87
57
|
if (and)
|
|
88
|
-
return baseResult && and.every(subFilter => this.
|
|
58
|
+
return baseResult && and.every(subFilter => this.evaluateFilter(record, subFilter));
|
|
89
59
|
if (or)
|
|
90
|
-
return baseResult || or.some(subFilter => this.
|
|
60
|
+
return baseResult || or.some(subFilter => this.evaluateFilter(record, subFilter));
|
|
91
61
|
else
|
|
92
62
|
return baseResult;
|
|
93
63
|
};
|
|
94
|
-
this._applyOrdering = (dataset, order) => __awaiter(this, void 0, void 0, function* () {
|
|
95
|
-
return yield dataset.sort((a, b) => {
|
|
96
|
-
for (const [field, direction] of order) {
|
|
97
|
-
if (a[field] < b[field])
|
|
98
|
-
return direction === 'asc' ? -1 : 1;
|
|
99
|
-
if (a[field] > b[field])
|
|
100
|
-
return direction === 'asc' ? 1 : -1;
|
|
101
|
-
}
|
|
102
|
-
return 0;
|
|
103
|
-
});
|
|
104
|
-
});
|
|
105
64
|
}
|
|
106
65
|
}
|
|
107
66
|
const RequestExecutor = new RequestExecutorClass();
|