@forzalabs/remora 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/engines/scheduler/CronScheduler.js +2 -2
- package/engines/scheduler/QueueManager.js +2 -2
- package/package.json +1 -1
- package/settings.js +12 -0
- package/documentation/default_resources/schema.json +0 -36
- package/drivers/LocalDriver.js +0 -542
- package/drivers/S3Driver.js +0 -563
- package/drivers/S3SourceDriver.js +0 -132
- package/engines/DataframeManager.js +0 -55
- package/engines/ParseManager.js +0 -75
- package/engines/ProducerEngine.js +0 -160
- package/engines/UsageDataManager.js +0 -110
- package/engines/UsageManager.js +0 -61
- package/engines/Validator.js +0 -157
- package/engines/consumer/ConsumerEngine.js +0 -128
- package/engines/consumer/PostProcessor.js +0 -253
- package/engines/dataset/ParallelDataset.js +0 -184
- package/engines/dataset/TransformWorker.js +0 -2
- package/engines/dataset/definitions.js +0 -2
- package/engines/dataset/example-parallel-transform.js +0 -2
- package/engines/dataset/test-parallel.js +0 -2
- package/engines/deployment/DeploymentPlanner.js +0 -39
- package/engines/execution/ExecutionEnvironment.js +0 -209
- package/engines/execution/ExecutionPlanner.js +0 -131
- package/engines/file/FileCompiler.js +0 -29
- package/engines/file/FileContentBuilder.js +0 -34
- package/engines/schema/SchemaEngine.js +0 -33
- package/engines/sql/SQLBuilder.js +0 -96
- package/engines/sql/SQLCompiler.js +0 -141
- package/engines/sql/SQLUtils.js +0 -22
- package/workers/FilterWorker.js +0 -62
- package/workers/ProjectionWorker.js +0 -63
- package/workers/TransformWorker.js +0 -63
- package/workers/TsWorker.js +0 -14
package/engines/Validator.js
DELETED
|
@@ -1,157 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
const Affirm_1 = __importDefault(require("../core/Affirm"));
|
|
7
|
-
const Algo_1 = __importDefault(require("../core/Algo"));
|
|
8
|
-
const ConsumerManager_1 = __importDefault(require("./consumer/ConsumerManager"));
|
|
9
|
-
const Environment_1 = __importDefault(require("./Environment"));
|
|
10
|
-
const ExecutionPlanner_1 = __importDefault(require("./execution/ExecutionPlanner"));
|
|
11
|
-
class ValidatorClass {
|
|
12
|
-
constructor() {
|
|
13
|
-
this.validateSources = (sources) => {
|
|
14
|
-
(0, Affirm_1.default)(sources, 'Invalid sources');
|
|
15
|
-
const errors = [];
|
|
16
|
-
try {
|
|
17
|
-
const dupes = Algo_1.default.duplicatesObject(sources, 'name');
|
|
18
|
-
if (dupes.length > 0)
|
|
19
|
-
errors.push(`Duplicate name(s) found in sources: "${dupes.map(x => x.name).join(', ')}"`);
|
|
20
|
-
for (let i = 0; i < sources.length; i++) {
|
|
21
|
-
const source = sources[i];
|
|
22
|
-
if (source.engine === 'local' && !source.authentication.path)
|
|
23
|
-
errors.push(`For source ${source.name}, the path has not been configured`);
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
catch (e) {
|
|
27
|
-
if (errors.length === 0)
|
|
28
|
-
errors.push(`There was an error in the validation Sources. (error: ${e})`);
|
|
29
|
-
}
|
|
30
|
-
return errors;
|
|
31
|
-
};
|
|
32
|
-
this.validateProducers = (producers) => {
|
|
33
|
-
(0, Affirm_1.default)(producers, 'Invalid producers');
|
|
34
|
-
const errors = [];
|
|
35
|
-
try {
|
|
36
|
-
const dupes = Algo_1.default.duplicatesObject(producers, 'name');
|
|
37
|
-
if (dupes.length > 0)
|
|
38
|
-
errors.push(`Duplicate name(s) found in producers: "${dupes.map(x => x.name).join(', ')}"`);
|
|
39
|
-
}
|
|
40
|
-
catch (e) {
|
|
41
|
-
if (errors.length === 0)
|
|
42
|
-
errors.push(`There was an error in the validation Producers. (error: ${e})`);
|
|
43
|
-
}
|
|
44
|
-
return errors;
|
|
45
|
-
};
|
|
46
|
-
this.validateProducer = (producer) => {
|
|
47
|
-
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
48
|
-
const errors = [];
|
|
49
|
-
try {
|
|
50
|
-
if (!producer.source || producer.source.length === 0)
|
|
51
|
-
errors.push(`Missing parameter "source" in producer`);
|
|
52
|
-
if (producer.dimensions.some(x => x.name.includes('{') || x.name.includes('[')))
|
|
53
|
-
errors.push(`Invalid dimension name found in producer "${producer.name}": can't use characters "{" or "[" in dimension names`);
|
|
54
|
-
}
|
|
55
|
-
catch (e) {
|
|
56
|
-
if (errors.length === 0)
|
|
57
|
-
errors.push(`There was an error in the validation Producer. (error: ${e})`);
|
|
58
|
-
}
|
|
59
|
-
return errors;
|
|
60
|
-
};
|
|
61
|
-
this.validateConsumers = (consumers) => {
|
|
62
|
-
(0, Affirm_1.default)(consumers, 'Invalid consumers');
|
|
63
|
-
const errors = [];
|
|
64
|
-
try {
|
|
65
|
-
const dupes = Algo_1.default.duplicatesObject(consumers, 'name');
|
|
66
|
-
if (dupes.length > 0)
|
|
67
|
-
errors.push(`Duplicate name(s) found in consumers: "${dupes.map(x => x.name).join(', ')}"`);
|
|
68
|
-
}
|
|
69
|
-
catch (e) {
|
|
70
|
-
if (errors.length === 0)
|
|
71
|
-
errors.push(`There was an error in the validation Consumers. (error: ${e})`);
|
|
72
|
-
}
|
|
73
|
-
return errors;
|
|
74
|
-
};
|
|
75
|
-
this.validateConsumer = (consumer) => {
|
|
76
|
-
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
77
|
-
const errors = [];
|
|
78
|
-
try {
|
|
79
|
-
// TODO: check that a consumer doesn't consume hitself
|
|
80
|
-
const allFieldsWithNoFrom = consumer.fields.filter(x => x.key === '*' && !x.from);
|
|
81
|
-
if (allFieldsWithNoFrom.length > 0 && consumer.producers.length > 1)
|
|
82
|
-
errors.push(`Field with key "*" was used without specifying the "from" producer and multiple producers were found.`);
|
|
83
|
-
if (consumer.fields.some(x => x.key === '*' && x.grouping))
|
|
84
|
-
errors.push(`Field with key "*" can't be used for "grouping". Either remove the grouping or change the key.`);
|
|
85
|
-
// Validation on producers
|
|
86
|
-
if (consumer.producers.length === 0)
|
|
87
|
-
errors.push(`Consumer must have at least 1 producer.`);
|
|
88
|
-
const producers = consumer.producers.map(x => Environment_1.default.getProducer(x.name));
|
|
89
|
-
if (producers.length === 0)
|
|
90
|
-
errors.push('No producers found');
|
|
91
|
-
if (producers.some(x => !x))
|
|
92
|
-
errors.push(`Invalid producer found in consumer "${consumer.name}"`);
|
|
93
|
-
// Validation on sources
|
|
94
|
-
const sources = producers.map(x => Environment_1.default.getSource(x.source));
|
|
95
|
-
if (sources.length === 0)
|
|
96
|
-
errors.push('No sources found');
|
|
97
|
-
if (sources.some(x => !x))
|
|
98
|
-
errors.push(`Invalid source found in consumer "${consumer.name}"`);
|
|
99
|
-
// For now we only support connecting producers of the same engine type to a consumer, so we give an error if we detect different ones
|
|
100
|
-
const uniqEngines = Algo_1.default.uniqBy(sources, 'engine');
|
|
101
|
-
if (uniqEngines.length !== 1)
|
|
102
|
-
errors.push(`Sources with different engines were used in the consumer "${consumer.name}" (${uniqEngines.join(', ')})`);
|
|
103
|
-
// For now we also only support consumers that have producers ALL having the same exact source
|
|
104
|
-
const uniqNames = Algo_1.default.uniqBy(sources, 'name');
|
|
105
|
-
if (uniqNames.length !== 1)
|
|
106
|
-
errors.push(`Producers with different sources were used in the consumer "${consumer.name}" (${uniqNames.join(', ')})`);
|
|
107
|
-
if (consumer.filters && consumer.filters.length > 0) {
|
|
108
|
-
if (consumer.filters.some(x => x.sql && x.rule))
|
|
109
|
-
errors.push(`A single consumer can't have both filters based on SQL and filters based on rules.`);
|
|
110
|
-
const [source] = ConsumerManager_1.default.getSource(consumer);
|
|
111
|
-
const engineClass = ExecutionPlanner_1.default.getEngineClass(source.engine);
|
|
112
|
-
if (engineClass === 'file' && consumer.filters.some(x => x.sql))
|
|
113
|
-
errors.push(`Filters based on SQL are only valid for SQL based sources. (source: ${source.name})`);
|
|
114
|
-
if (engineClass === 'sql' && consumer.filters.some(x => x.rule))
|
|
115
|
-
errors.push(`Filters based on rules are only valid for non-SQL based sources. (source: ${source.name})`);
|
|
116
|
-
}
|
|
117
|
-
// Validation on fields
|
|
118
|
-
const validateGroupingLevels = (fields, level = 0) => {
|
|
119
|
-
let errors = [];
|
|
120
|
-
const groupingFields = fields.filter(x => x.grouping);
|
|
121
|
-
if (groupingFields.length > 1)
|
|
122
|
-
errors.push(`There can't be 2 fields with grouping defined at the same level (${groupingFields.map(x => x.key).join(', ')}). Level: ${level}`);
|
|
123
|
-
groupingFields.forEach((field) => {
|
|
124
|
-
if (field.grouping)
|
|
125
|
-
errors = [...errors, ...validateGroupingLevels(field.grouping.subFields, level + 1)];
|
|
126
|
-
});
|
|
127
|
-
return errors;
|
|
128
|
-
};
|
|
129
|
-
errors.push(...validateGroupingLevels(consumer.fields));
|
|
130
|
-
// Validation outputs
|
|
131
|
-
const duplicatesOutputs = Algo_1.default.duplicatesObject(consumer.outputs, 'format');
|
|
132
|
-
if (duplicatesOutputs.length > 0) {
|
|
133
|
-
const duplicatesTypes = Algo_1.default.uniq(duplicatesOutputs.map(x => x.format));
|
|
134
|
-
errors.push(`There are outputs with the same type. (duplicates type: ${duplicatesTypes.join(' and ')})`);
|
|
135
|
-
}
|
|
136
|
-
for (const output of consumer.outputs) {
|
|
137
|
-
const format = output.format.toUpperCase();
|
|
138
|
-
if (format === 'SQL' && output.accellerated && output.direct)
|
|
139
|
-
errors.push(`An output SQL cannot be both direct and accelerated (output: ${format})`);
|
|
140
|
-
if ((format === 'CSV' || format === 'JSON' || format === 'PARQUET')) {
|
|
141
|
-
if (!output.exportDestination)
|
|
142
|
-
errors.push(`A static file output must have an export destination set (${format})`);
|
|
143
|
-
else if (!Environment_1.default.getSource(output.exportDestination))
|
|
144
|
-
errors.push(`The export destination "${output.exportDestination}" was not found in the sources.`);
|
|
145
|
-
}
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
catch (e) {
|
|
149
|
-
if (errors.length === 0)
|
|
150
|
-
errors.push(`There was an error in the validation Consumer. (error: ${e})`);
|
|
151
|
-
}
|
|
152
|
-
return errors;
|
|
153
|
-
};
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
|
-
const Validator = new ValidatorClass();
|
|
157
|
-
exports.default = Validator;
|
|
@@ -1,128 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
7
|
-
const Environment_1 = __importDefault(require("../Environment"));
|
|
8
|
-
const ProducerManager_1 = __importDefault(require("../producer/ProducerManager"));
|
|
9
|
-
const ConsumerManager_1 = __importDefault(require("./ConsumerManager"));
|
|
10
|
-
class ConsumerEngineClass {
|
|
11
|
-
constructor() {
|
|
12
|
-
this.compile = (consumer) => {
|
|
13
|
-
var _a, _b;
|
|
14
|
-
(0, Affirm_1.default)(consumer, `Invalid consumer`);
|
|
15
|
-
const availableColumns = consumer.producers.flatMap(cProd => {
|
|
16
|
-
var _a, _b;
|
|
17
|
-
const producer = Environment_1.default.getProducer(cProd.name);
|
|
18
|
-
if (!producer) {
|
|
19
|
-
const subConsumer = Environment_1.default.getConsumer(cProd.name);
|
|
20
|
-
(0, Affirm_1.default)(subConsumer, `No producer found with name "${cProd.name}"`);
|
|
21
|
-
return this.compile(subConsumer);
|
|
22
|
-
}
|
|
23
|
-
else {
|
|
24
|
-
const dims = producer.dimensions.map(x => ({
|
|
25
|
-
consumerAlias: null,
|
|
26
|
-
consumerKey: null,
|
|
27
|
-
nameInProducer: x.name,
|
|
28
|
-
aliasInProducer: x.alias,
|
|
29
|
-
dimension: x,
|
|
30
|
-
owner: cProd.name
|
|
31
|
-
}));
|
|
32
|
-
const meas = (_b = (_a = producer.measures) === null || _a === void 0 ? void 0 : _a.map(x => ({
|
|
33
|
-
consumerAlias: null,
|
|
34
|
-
consumerKey: null,
|
|
35
|
-
nameInProducer: x.name,
|
|
36
|
-
aliasInProducer: x.name,
|
|
37
|
-
measure: x,
|
|
38
|
-
owner: cProd.name
|
|
39
|
-
}))) !== null && _b !== void 0 ? _b : [];
|
|
40
|
-
return [...dims, ...meas];
|
|
41
|
-
}
|
|
42
|
-
});
|
|
43
|
-
const selectedColumns = [];
|
|
44
|
-
const flat = ConsumerManager_1.default.getConsumerFlatFields(consumer);
|
|
45
|
-
for (let i = 0; i < flat.length; i++) {
|
|
46
|
-
const field = flat[i];
|
|
47
|
-
// TODO: replace with the new funcitons in the consumermanager to reduce diplicate code
|
|
48
|
-
if (field.key === '*') {
|
|
49
|
-
const from = (_a = field.from) !== null && _a !== void 0 ? _a : (consumer.producers.length === 1 ? consumer.producers[0].name : null);
|
|
50
|
-
availableColumns.filter(x => x.owner === from).forEach(col => {
|
|
51
|
-
col.consumerKey = col.nameInProducer;
|
|
52
|
-
col.consumerAlias = col.nameInProducer;
|
|
53
|
-
selectedColumns.push(col);
|
|
54
|
-
});
|
|
55
|
-
}
|
|
56
|
-
else {
|
|
57
|
-
const col = ConsumerManager_1.default.searchFieldInColumns(field, availableColumns, consumer);
|
|
58
|
-
(0, Affirm_1.default)(col, `Consumer "${consumer.name}" misconfiguration: the requested field "${field.key}" is not found in any of the specified producers ("${consumer.producers.map(x => x.name).join(', ')}")`);
|
|
59
|
-
col.consumerKey = field.key;
|
|
60
|
-
col.consumerAlias = (_b = field.alias) !== null && _b !== void 0 ? _b : field.key;
|
|
61
|
-
selectedColumns.push(col);
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
const columnsWithNoAlias = selectedColumns.filter(x => !x.consumerAlias || !x.consumerKey);
|
|
65
|
-
(0, Affirm_1.default)(columnsWithNoAlias.length === 0, `Consumer "${consumer.name}" compilation error: some selected fields don't have a correct alias or key (${columnsWithNoAlias.map(x => x.nameInProducer).join(', ')})`);
|
|
66
|
-
return selectedColumns;
|
|
67
|
-
};
|
|
68
|
-
this.getOutputShape = (consumer) => {
|
|
69
|
-
(0, Affirm_1.default)(consumer, `Invalid consumer`);
|
|
70
|
-
const compiled = this.compile(consumer);
|
|
71
|
-
const outDimensions = compiled.map(x => {
|
|
72
|
-
var _a, _b, _c, _d, _e, _f, _g;
|
|
73
|
-
return ({
|
|
74
|
-
name: (_a = x.consumerAlias) !== null && _a !== void 0 ? _a : x.consumerKey,
|
|
75
|
-
type: (_b = x.dimension) === null || _b === void 0 ? void 0 : _b.type,
|
|
76
|
-
classification: (_c = x.dimension) === null || _c === void 0 ? void 0 : _c.classification,
|
|
77
|
-
description: (_e = (_d = x.dimension) === null || _d === void 0 ? void 0 : _d.description) !== null && _e !== void 0 ? _e : (_f = x.measure) === null || _f === void 0 ? void 0 : _f.description,
|
|
78
|
-
mask: ProducerManager_1.default.getMask(x.dimension),
|
|
79
|
-
pk: (_g = x.dimension) === null || _g === void 0 ? void 0 : _g.pk
|
|
80
|
-
});
|
|
81
|
-
});
|
|
82
|
-
return {
|
|
83
|
-
_version: consumer._version,
|
|
84
|
-
name: consumer.name,
|
|
85
|
-
description: consumer.description,
|
|
86
|
-
metadata: consumer.metadata,
|
|
87
|
-
dimensions: outDimensions
|
|
88
|
-
};
|
|
89
|
-
};
|
|
90
|
-
/**
|
|
91
|
-
* Given a consumer, create the entire dependency chain of all the sub-consumers, producers and finally sources that are used by this consumer
|
|
92
|
-
*/
|
|
93
|
-
this.getDependencyChain = (consumer, depth = 0) => {
|
|
94
|
-
(0, Affirm_1.default)(consumer, `Invalid consumer`);
|
|
95
|
-
const chain = [];
|
|
96
|
-
for (let i = 0; i < consumer.producers.length; i++) {
|
|
97
|
-
const cProd = consumer.producers[i];
|
|
98
|
-
const producer = Environment_1.default.getProducer(cProd.name);
|
|
99
|
-
if (!producer) {
|
|
100
|
-
const subConsumer = Environment_1.default.getConsumer(cProd.name);
|
|
101
|
-
(0, Affirm_1.default)(subConsumer, `No producer found with name "${cProd.name}"`);
|
|
102
|
-
chain.push({
|
|
103
|
-
depth: depth,
|
|
104
|
-
from: { name: consumer.name, type: 'consumer' },
|
|
105
|
-
to: { name: subConsumer.name, type: 'consumer' }
|
|
106
|
-
});
|
|
107
|
-
if (subConsumer.producers && subConsumer.producers.length > 0)
|
|
108
|
-
return [...chain, ...this.getDependencyChain(subConsumer, depth + 1)];
|
|
109
|
-
}
|
|
110
|
-
else {
|
|
111
|
-
chain.push({
|
|
112
|
-
depth: depth,
|
|
113
|
-
from: { name: consumer.name, type: 'consumer' },
|
|
114
|
-
to: { name: producer.name, type: 'producer' }
|
|
115
|
-
});
|
|
116
|
-
chain.push({
|
|
117
|
-
depth: depth + 1,
|
|
118
|
-
from: { name: producer.name, type: 'producer' },
|
|
119
|
-
to: { name: producer.source, type: 'source' }
|
|
120
|
-
});
|
|
121
|
-
}
|
|
122
|
-
}
|
|
123
|
-
return chain.sort((a, b) => a.depth - b.depth);
|
|
124
|
-
};
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
const ConsumerEngine = new ConsumerEngineClass();
|
|
128
|
-
exports.default = ConsumerEngine;
|
|
@@ -1,253 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
-
});
|
|
10
|
-
};
|
|
11
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
-
};
|
|
14
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
-
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
16
|
-
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
17
|
-
const CryptoEngine_1 = __importDefault(require("../CryptoEngine"));
|
|
18
|
-
const DatasetManager_1 = __importDefault(require("../dataset/DatasetManager"));
|
|
19
|
-
const DatasetRecord_1 = __importDefault(require("../dataset/DatasetRecord"));
|
|
20
|
-
const Environment_1 = __importDefault(require("../Environment"));
|
|
21
|
-
const FileCompiler_1 = __importDefault(require("../file/FileCompiler"));
|
|
22
|
-
const ProducerManager_1 = __importDefault(require("../producer/ProducerManager"));
|
|
23
|
-
const TypeCaster_1 = __importDefault(require("../transform/TypeCaster"));
|
|
24
|
-
const ConsumerManager_1 = __importDefault(require("./ConsumerManager"));
|
|
25
|
-
class PostProcessorClass {
|
|
26
|
-
constructor() {
|
|
27
|
-
/**
|
|
28
|
-
* Maps an array of objects and projects it to another array of objects but with different shape:
|
|
29
|
-
* - updates the dimensions of the dataset (drop, rename, reorder, hide)
|
|
30
|
-
* - type casting
|
|
31
|
-
* - default field values
|
|
32
|
-
* - masking/hashing of data
|
|
33
|
-
*/
|
|
34
|
-
this.doProjection = (consumer, dataset, options) => __awaiter(this, void 0, void 0, function* () {
|
|
35
|
-
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
36
|
-
(0, Affirm_1.default)(dataset, 'Invalid dataset');
|
|
37
|
-
const fields = ConsumerManager_1.default.getExpandedFields(consumer);
|
|
38
|
-
const dimensionsUpdates = DatasetManager_1.default.computeDimensionsUpdates(dataset, consumer);
|
|
39
|
-
let updatedDimensions = null;
|
|
40
|
-
dataset = yield dataset.map(record => {
|
|
41
|
-
var _a, _b;
|
|
42
|
-
// First apply the updates to the dimensions of this record
|
|
43
|
-
if (dimensionsUpdates.length > 0) {
|
|
44
|
-
for (const update of dimensionsUpdates) {
|
|
45
|
-
record.wholeUpdateDimension(update);
|
|
46
|
-
}
|
|
47
|
-
record.sortDimensions();
|
|
48
|
-
}
|
|
49
|
-
if (!updatedDimensions)
|
|
50
|
-
updatedDimensions = record._dimensions;
|
|
51
|
-
// Finally apply the rules and changes of the consumer fields to the record
|
|
52
|
-
for (const field of fields) {
|
|
53
|
-
const { key, alias } = field.cField;
|
|
54
|
-
const fieldKey = alias !== null && alias !== void 0 ? alias : key;
|
|
55
|
-
const maskType = ProducerManager_1.default.getMask(field.dimension);
|
|
56
|
-
const fieldType = (_b = (_a = field.dimension) === null || _a === void 0 ? void 0 : _a.type) !== null && _b !== void 0 ? _b : 'string';
|
|
57
|
-
const fieldValue = this._getFieldValue(record, field);
|
|
58
|
-
if (Algo_1.default.hasVal(maskType))
|
|
59
|
-
record.setValue(fieldKey, CryptoEngine_1.default.hashValue(maskType, fieldValue, fieldType));
|
|
60
|
-
else
|
|
61
|
-
record.setValue(fieldKey, TypeCaster_1.default.cast(fieldValue, fieldType));
|
|
62
|
-
}
|
|
63
|
-
return record;
|
|
64
|
-
}, options);
|
|
65
|
-
if (!updatedDimensions || updatedDimensions.length === 0) {
|
|
66
|
-
// This means that no updates were applied cause no records were present
|
|
67
|
-
// I need to force a fake update to get the new dimensions, since those might still have changed
|
|
68
|
-
const fakeRecord = new DatasetRecord_1.default('', dataset.getDimensions(), dataset.getDelimiter());
|
|
69
|
-
for (const update of dimensionsUpdates)
|
|
70
|
-
fakeRecord.wholeUpdateDimension(update);
|
|
71
|
-
updatedDimensions = fakeRecord._dimensions;
|
|
72
|
-
}
|
|
73
|
-
dataset.setDimensions(updatedDimensions);
|
|
74
|
-
return dataset;
|
|
75
|
-
});
|
|
76
|
-
/**
|
|
77
|
-
* Gets an array of objects (with potentially nested fields) and unpacks them to an array of objects with no nested fields
|
|
78
|
-
* If some nested keys are lists, then a logic similar to a SQL JOIN is used and rows are duplicated
|
|
79
|
-
*/
|
|
80
|
-
this.unpack = (dataset, producer) => __awaiter(this, void 0, void 0, function* () {
|
|
81
|
-
(0, Affirm_1.default)(dataset, 'Invalid dataset');
|
|
82
|
-
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
83
|
-
const source = Environment_1.default.getSource(producer.source);
|
|
84
|
-
(0, Affirm_1.default)(source, `No source found for producer "${producer.name}" with name "${producer.source}"`);
|
|
85
|
-
const columns = FileCompiler_1.default.compileProducer(producer, source);
|
|
86
|
-
(0, Affirm_1.default)(columns, `Invalid columns from compilation for producer "${producer.name}"`);
|
|
87
|
-
const unpackDimension = (item, dimension) => {
|
|
88
|
-
var _a, _b;
|
|
89
|
-
const { nameInProducer, aliasInProducer } = dimension;
|
|
90
|
-
const maskType = ProducerManager_1.default.getMask(dimension.dimension);
|
|
91
|
-
const fieldType = (_b = (_a = dimension.dimension) === null || _a === void 0 ? void 0 : _a.type) !== null && _b !== void 0 ? _b : 'string';
|
|
92
|
-
const keys = aliasInProducer.split('.');
|
|
93
|
-
// Parse the JSON content from the DatasetRecord first
|
|
94
|
-
let prevValue;
|
|
95
|
-
try {
|
|
96
|
-
// Try to parse as JSON if the raw content looks like JSON
|
|
97
|
-
const rawContent = item.getRaw();
|
|
98
|
-
if (rawContent.trim().startsWith('{') || rawContent.trim().startsWith('[')) {
|
|
99
|
-
prevValue = JSON.parse(rawContent);
|
|
100
|
-
}
|
|
101
|
-
else {
|
|
102
|
-
// For non-JSON data, create an object from the record's values
|
|
103
|
-
const recordObj = {};
|
|
104
|
-
const dimensions = dataset.getDimensions();
|
|
105
|
-
for (const dim of dimensions) {
|
|
106
|
-
recordObj[dim.name] = item.getValue(dim.name);
|
|
107
|
-
}
|
|
108
|
-
prevValue = recordObj;
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
catch (_c) {
|
|
112
|
-
// Fallback to using the record's values directly
|
|
113
|
-
const recordObj = {};
|
|
114
|
-
const dimensions = dataset.getDimensions();
|
|
115
|
-
for (const dim of dimensions) {
|
|
116
|
-
recordObj[dim.name] = item.getValue(dim.name);
|
|
117
|
-
}
|
|
118
|
-
prevValue = recordObj;
|
|
119
|
-
}
|
|
120
|
-
// Navigate through the nested structure
|
|
121
|
-
for (const key of keys) {
|
|
122
|
-
if (key.includes('{')) {
|
|
123
|
-
const cleanedKey = key.replace('{', '').replace('}', '');
|
|
124
|
-
if (Array.isArray(prevValue))
|
|
125
|
-
prevValue = prevValue === null || prevValue === void 0 ? void 0 : prevValue.map((x) => x[cleanedKey]);
|
|
126
|
-
else
|
|
127
|
-
prevValue = prevValue === null || prevValue === void 0 ? void 0 : prevValue[cleanedKey];
|
|
128
|
-
}
|
|
129
|
-
else if (key.includes('[')) {
|
|
130
|
-
const cleanedKey = key.replace('[', '').replace(']', '');
|
|
131
|
-
if (Array.isArray(prevValue))
|
|
132
|
-
prevValue = prevValue === null || prevValue === void 0 ? void 0 : prevValue.flatMap((x) => x[cleanedKey]);
|
|
133
|
-
else
|
|
134
|
-
prevValue = prevValue === null || prevValue === void 0 ? void 0 : prevValue[cleanedKey];
|
|
135
|
-
}
|
|
136
|
-
else {
|
|
137
|
-
if (Array.isArray(prevValue))
|
|
138
|
-
prevValue = prevValue === null || prevValue === void 0 ? void 0 : prevValue.map((x) => x[key]);
|
|
139
|
-
else
|
|
140
|
-
prevValue = prevValue === null || prevValue === void 0 ? void 0 : prevValue[key];
|
|
141
|
-
}
|
|
142
|
-
}
|
|
143
|
-
// Apply masking/hashing
|
|
144
|
-
const valueAsString = prevValue !== null && prevValue !== undefined ? String(prevValue) : '';
|
|
145
|
-
prevValue = CryptoEngine_1.default.hashValue(maskType, valueAsString, fieldType);
|
|
146
|
-
const res = { [nameInProducer]: prevValue };
|
|
147
|
-
return res;
|
|
148
|
-
};
|
|
149
|
-
const splitArrayFields = (item) => {
|
|
150
|
-
const keysWithArrayValues = Object.keys(item).filter(key => Array.isArray(item[key]));
|
|
151
|
-
if (keysWithArrayValues.length === 0)
|
|
152
|
-
return [item];
|
|
153
|
-
const key = keysWithArrayValues[0];
|
|
154
|
-
const values = item[key];
|
|
155
|
-
const remainingItem = Object.assign({}, item);
|
|
156
|
-
delete remainingItem[key];
|
|
157
|
-
const splitRemaining = splitArrayFields(remainingItem);
|
|
158
|
-
return values.flatMap((value) => {
|
|
159
|
-
return splitRemaining.map((remaining) => {
|
|
160
|
-
return Object.assign(Object.assign({}, remaining), { [key]: value });
|
|
161
|
-
});
|
|
162
|
-
});
|
|
163
|
-
};
|
|
164
|
-
const unpackSingle = (item) => {
|
|
165
|
-
var _a;
|
|
166
|
-
const unpackedRecord = {};
|
|
167
|
-
// Extract values from all columns
|
|
168
|
-
for (const column of columns) {
|
|
169
|
-
const value = unpackDimension(item, column);
|
|
170
|
-
Object.assign(unpackedRecord, value);
|
|
171
|
-
}
|
|
172
|
-
// Split array fields to create multiple records
|
|
173
|
-
const splitRecords = splitArrayFields(unpackedRecord);
|
|
174
|
-
// Convert back to DatasetRecord objects
|
|
175
|
-
const resultRecords = [];
|
|
176
|
-
// Get the expected field names from all columns to maintain consistent dimensions
|
|
177
|
-
const expectedFieldNames = columns.map(col => col.nameInProducer);
|
|
178
|
-
for (const splitRecord of splitRecords) {
|
|
179
|
-
// Ensure all expected fields are present, filling with empty string if missing
|
|
180
|
-
const normalizedRecord = {};
|
|
181
|
-
for (const fieldName of expectedFieldNames) {
|
|
182
|
-
normalizedRecord[fieldName] = (_a = splitRecord[fieldName]) !== null && _a !== void 0 ? _a : '';
|
|
183
|
-
}
|
|
184
|
-
// Create dimensions based on the expected field names
|
|
185
|
-
const newDimensions = expectedFieldNames.map((key, index) => {
|
|
186
|
-
var _a, _b, _c;
|
|
187
|
-
return ({
|
|
188
|
-
name: key,
|
|
189
|
-
key: key,
|
|
190
|
-
index: index,
|
|
191
|
-
hidden: null,
|
|
192
|
-
type: (_c = (_b = (_a = columns[index]) === null || _a === void 0 ? void 0 : _a.dimension) === null || _b === void 0 ? void 0 : _b.type) !== null && _c !== void 0 ? _c : 'string'
|
|
193
|
-
});
|
|
194
|
-
});
|
|
195
|
-
// Create the row string
|
|
196
|
-
const values = newDimensions.map(dim => {
|
|
197
|
-
const value = normalizedRecord[dim.name];
|
|
198
|
-
return value !== null && value !== undefined ? String(value) : '';
|
|
199
|
-
});
|
|
200
|
-
const rowString = values.join(dataset.getDelimiter());
|
|
201
|
-
// Create new DatasetRecord
|
|
202
|
-
const newRecord = new DatasetRecord_1.default(rowString, newDimensions, dataset.getDelimiter());
|
|
203
|
-
resultRecords.push(newRecord);
|
|
204
|
-
}
|
|
205
|
-
return resultRecords;
|
|
206
|
-
};
|
|
207
|
-
const resDataset = yield dataset.flatMap(record => unpackSingle(record));
|
|
208
|
-
// Update the dataset dimensions to match the unpacked structure
|
|
209
|
-
// TODO: 99% certain this will cause a bug
|
|
210
|
-
if (columns.length > 0) {
|
|
211
|
-
const newDimensions = columns.map((col, index) => {
|
|
212
|
-
var _a;
|
|
213
|
-
return ({
|
|
214
|
-
name: col.nameInProducer,
|
|
215
|
-
key: col.nameInProducer,
|
|
216
|
-
index: index,
|
|
217
|
-
hidden: null,
|
|
218
|
-
type: (_a = col.dimension) === null || _a === void 0 ? void 0 : _a.type
|
|
219
|
-
});
|
|
220
|
-
});
|
|
221
|
-
resDataset.setDimensions(newDimensions);
|
|
222
|
-
}
|
|
223
|
-
return resDataset;
|
|
224
|
-
});
|
|
225
|
-
this._getFieldValue = (record, field) => {
|
|
226
|
-
var _a, _b, _c;
|
|
227
|
-
const { key, alias, fixed, default: defaultValue } = field.cField;
|
|
228
|
-
if (fixed && Algo_1.default.hasVal(defaultValue))
|
|
229
|
-
return defaultValue;
|
|
230
|
-
const fieldKey = alias !== null && alias !== void 0 ? alias : key;
|
|
231
|
-
const fieldValue = record.getValue(fieldKey);
|
|
232
|
-
const fieldType = (_b = (_a = field.dimension) === null || _a === void 0 ? void 0 : _a.type) !== null && _b !== void 0 ? _b : 'string';
|
|
233
|
-
if (Algo_1.default.hasVal(fieldValue) && !isNaN(fieldValue)) {
|
|
234
|
-
if (fieldType === 'number' && typeof fieldValue === 'string' && fieldValue.length === 0)
|
|
235
|
-
return (_c = field.cField.default) !== null && _c !== void 0 ? _c : fieldValue;
|
|
236
|
-
else
|
|
237
|
-
return fieldValue;
|
|
238
|
-
}
|
|
239
|
-
else if ((!Algo_1.default.hasVal(fieldValue) || (isNaN(fieldValue) && fieldType === 'number')) && Algo_1.default.hasVal(field.cField.default)) {
|
|
240
|
-
return field.cField.default;
|
|
241
|
-
}
|
|
242
|
-
else {
|
|
243
|
-
return fieldValue;
|
|
244
|
-
}
|
|
245
|
-
};
|
|
246
|
-
this.distinct = (dataset) => __awaiter(this, void 0, void 0, function* () {
|
|
247
|
-
(0, Affirm_1.default)(dataset, 'Invalid dataset');
|
|
248
|
-
return yield dataset.distinct();
|
|
249
|
-
});
|
|
250
|
-
}
|
|
251
|
-
}
|
|
252
|
-
const PostProcessor = new PostProcessorClass();
|
|
253
|
-
exports.default = PostProcessor;
|