@forzalabs/remora 0.0.45-nasco.3 → 0.0.47-nasco.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +1 -1
- package/definitions/json_schemas/consumer-schema.json +46 -0
- package/drivers/LocalDriver.js +2 -0
- package/engines/consumer/ConsumerManager.js +1 -1
- package/engines/consumer/PostProcessor.js +10 -15
- package/engines/dataset/Dataset.js +24 -6
- package/engines/dataset/DatasetManager.js +4 -4
- package/engines/dataset/DatasetRecord.js +16 -1
- package/engines/execution/ExecutionEnvironment.js +1 -0
- package/engines/transform/JoinEngine.js +2 -1
- package/engines/transform/TransformationEngine.js +36 -3
- package/engines/validation/Validator.js +12 -1
- package/package.json +1 -1
package/Constants.js
CHANGED
|
@@ -173,6 +173,14 @@
|
|
|
173
173
|
},
|
|
174
174
|
"default": {
|
|
175
175
|
"description": "Default value of the field if it is missing (or on error if specified)"
|
|
176
|
+
},
|
|
177
|
+
"hidden": {
|
|
178
|
+
"type": "boolean",
|
|
179
|
+
"description": "If set, the field is kept and used during processing, but omitted when exporting the data"
|
|
180
|
+
},
|
|
181
|
+
"fixed": {
|
|
182
|
+
"type": "boolean",
|
|
183
|
+
"description": "If set, \"default\" must have a value. This field is not searched in the underlying dataset, but is a fixed value set by the \"default\" prop."
|
|
176
184
|
}
|
|
177
185
|
},
|
|
178
186
|
"required": [
|
|
@@ -428,6 +436,14 @@
|
|
|
428
436
|
},
|
|
429
437
|
"default": {
|
|
430
438
|
"description": "Default value of the field if it is missing (or on error if specified)"
|
|
439
|
+
},
|
|
440
|
+
"hidden": {
|
|
441
|
+
"type": "boolean",
|
|
442
|
+
"description": "If set, the field is kept and used during processing, but omitted when exporting the data"
|
|
443
|
+
},
|
|
444
|
+
"fixed": {
|
|
445
|
+
"type": "boolean",
|
|
446
|
+
"description": "If set, \"default\" must have a value. This field is not searched in the underlying dataset, but is a fixed value set by the \"default\" prop."
|
|
431
447
|
}
|
|
432
448
|
},
|
|
433
449
|
"required": [
|
|
@@ -732,6 +748,36 @@
|
|
|
732
748
|
},
|
|
733
749
|
"required": ["append"],
|
|
734
750
|
"additionalProperties": false
|
|
751
|
+
},
|
|
752
|
+
{
|
|
753
|
+
"type": "object",
|
|
754
|
+
"properties": {
|
|
755
|
+
"combine_fields": {
|
|
756
|
+
"type": "object",
|
|
757
|
+
"properties": {
|
|
758
|
+
"fields": {
|
|
759
|
+
"type": "array",
|
|
760
|
+
"items": {
|
|
761
|
+
"type": "string"
|
|
762
|
+
},
|
|
763
|
+
"description": "Array of field names to combine",
|
|
764
|
+
"minItems": 1
|
|
765
|
+
},
|
|
766
|
+
"separator": {
|
|
767
|
+
"type": "string",
|
|
768
|
+
"description": "Optional separator between fields (default: empty string)"
|
|
769
|
+
},
|
|
770
|
+
"template": {
|
|
771
|
+
"type": "string",
|
|
772
|
+
"description": "Optional template string with placeholders like '{field1} - {field2}'"
|
|
773
|
+
}
|
|
774
|
+
},
|
|
775
|
+
"required": ["fields"],
|
|
776
|
+
"additionalProperties": false
|
|
777
|
+
}
|
|
778
|
+
},
|
|
779
|
+
"required": ["combine_fields"],
|
|
780
|
+
"additionalProperties": false
|
|
735
781
|
}
|
|
736
782
|
]
|
|
737
783
|
}
|
package/drivers/LocalDriver.js
CHANGED
|
@@ -63,6 +63,7 @@ const XMLParser_1 = __importDefault(require("../engines/parsing/XMLParser")); //
|
|
|
63
63
|
const Helper_1 = __importDefault(require("../helper/Helper"));
|
|
64
64
|
const ParseHelper_1 = __importDefault(require("../engines/parsing/ParseHelper"));
|
|
65
65
|
const FileExporter_1 = __importDefault(require("../engines/file/FileExporter"));
|
|
66
|
+
const Logger_1 = __importDefault(require("../helper/Logger"));
|
|
66
67
|
class LocalSourceDriver {
|
|
67
68
|
constructor() {
|
|
68
69
|
this.init = (source) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -131,6 +132,7 @@ class LocalSourceDriver {
|
|
|
131
132
|
const { fileKey } = file;
|
|
132
133
|
if (fileKey.includes('%')) {
|
|
133
134
|
const allFileKeys = this.listFiles(fileKey);
|
|
135
|
+
Logger_1.default.log(`Matched ${allFileKeys.length} files, copying to locally and creating unified dataset.`);
|
|
134
136
|
// Copy files sequentially to avoid file conflicts
|
|
135
137
|
for (let i = 0; i < allFileKeys.length; i++) {
|
|
136
138
|
yield copyLocally(allFileKeys[i], i > 0); // Append mode for subsequent files
|
|
@@ -138,7 +138,7 @@ class ConsumerManagerClass {
|
|
|
138
138
|
if (!column) {
|
|
139
139
|
// If the consumer doesn't find the field in the producer but has a default value AND set_default onError
|
|
140
140
|
// then instead of failing, create a placeholder column for the producer
|
|
141
|
-
if (field.
|
|
141
|
+
if (field.fixed === true && Algo_1.default.hasVal(field.default)) {
|
|
142
142
|
column = {
|
|
143
143
|
aliasInProducer: field.key,
|
|
144
144
|
nameInProducer: (_a = field.alias) !== null && _a !== void 0 ? _a : field.key,
|
|
@@ -34,7 +34,7 @@ class PostProcessorClass {
|
|
|
34
34
|
(0, Affirm_1.default)(dataset, 'Invalid dataset');
|
|
35
35
|
const fields = ConsumerManager_1.default.getExpandedFields(consumer);
|
|
36
36
|
let newDataset = yield this.dropDimensions(dataset, consumer);
|
|
37
|
-
newDataset = this.
|
|
37
|
+
newDataset = this.updateDimensions(newDataset, consumer);
|
|
38
38
|
newDataset = yield this.reorderDimensions(newDataset, consumer);
|
|
39
39
|
newDataset = yield newDataset.map(record => {
|
|
40
40
|
var _a, _b;
|
|
@@ -53,18 +53,9 @@ class PostProcessorClass {
|
|
|
53
53
|
});
|
|
54
54
|
return newDataset;
|
|
55
55
|
});
|
|
56
|
-
this.
|
|
57
|
-
const dimensions = dataset.getDimensions();
|
|
56
|
+
this.updateDimensions = (dataset, consumer) => {
|
|
58
57
|
const fields = ConsumerManager_1.default.getExpandedFields(consumer);
|
|
59
|
-
|
|
60
|
-
// This dimension is wanted by the consumer, check if it needs renaming
|
|
61
|
-
const consumerField = fields.find(x => x.cField.key === dim.name);
|
|
62
|
-
if (consumerField) {
|
|
63
|
-
const { cField: { key, alias } } = consumerField;
|
|
64
|
-
if (key !== alias && Algo_1.default.hasVal(alias) && alias.length > 0)
|
|
65
|
-
dataset.renameDimension(key, alias);
|
|
66
|
-
}
|
|
67
|
-
}
|
|
58
|
+
dataset.updateDimensions(fields);
|
|
68
59
|
return dataset;
|
|
69
60
|
};
|
|
70
61
|
this.dropDimensions = (dataset, consumer) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -206,7 +197,8 @@ class PostProcessorClass {
|
|
|
206
197
|
const newDimensions = expectedFieldNames.map((key, index) => ({
|
|
207
198
|
name: key,
|
|
208
199
|
key: key,
|
|
209
|
-
index: index
|
|
200
|
+
index: index,
|
|
201
|
+
hidden: null
|
|
210
202
|
}));
|
|
211
203
|
// Create the row string
|
|
212
204
|
const values = newDimensions.map(dim => {
|
|
@@ -227,7 +219,8 @@ class PostProcessorClass {
|
|
|
227
219
|
const newDimensions = columns.map((col, index) => ({
|
|
228
220
|
name: col.nameInProducer,
|
|
229
221
|
key: col.nameInProducer,
|
|
230
|
-
index: index
|
|
222
|
+
index: index,
|
|
223
|
+
hidden: null
|
|
231
224
|
}));
|
|
232
225
|
// Update the dataset dimensions
|
|
233
226
|
resDataset['_dimensions'] = newDimensions;
|
|
@@ -236,7 +229,9 @@ class PostProcessorClass {
|
|
|
236
229
|
});
|
|
237
230
|
this._getFieldValue = (record, field) => {
|
|
238
231
|
var _a, _b, _c;
|
|
239
|
-
const { key, alias } = field.cField;
|
|
232
|
+
const { key, alias, fixed, default: defaultValue } = field.cField;
|
|
233
|
+
if (fixed && Algo_1.default.hasVal(defaultValue))
|
|
234
|
+
return defaultValue;
|
|
240
235
|
const fieldKey = alias !== null && alias !== void 0 ? alias : key;
|
|
241
236
|
const fieldValue = record.getValue(fieldKey);
|
|
242
237
|
if (Algo_1.default.hasVal(fieldValue) && !isNaN(fieldValue)) {
|
|
@@ -31,6 +31,7 @@ const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
|
31
31
|
const XMLParser_1 = __importDefault(require("../parsing/XMLParser"));
|
|
32
32
|
const DriverFactory_1 = __importDefault(require("../../drivers/DriverFactory"));
|
|
33
33
|
const Helper_1 = __importDefault(require("../../helper/Helper"));
|
|
34
|
+
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
34
35
|
class Dataset {
|
|
35
36
|
constructor(name, file, batchSize = Constants_1.default.defaults.MAX_ITEMS_IN_MEMORY) {
|
|
36
37
|
this.getPath = () => this._path;
|
|
@@ -678,12 +679,29 @@ class Dataset {
|
|
|
678
679
|
return this;
|
|
679
680
|
});
|
|
680
681
|
this.getDimensions = () => this._dimensions;
|
|
681
|
-
this.
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
682
|
+
this.updateDimensions = (fields) => {
|
|
683
|
+
this._startOperation('update-dimensions');
|
|
684
|
+
for (const field of fields) {
|
|
685
|
+
const { cField: { key, alias, hidden, fixed, default: defaultValue } } = field;
|
|
686
|
+
const currentDim = this._dimensions.find(x => x.name === key);
|
|
687
|
+
if (currentDim) {
|
|
688
|
+
currentDim.name = alias !== null && alias !== void 0 ? alias : key;
|
|
689
|
+
currentDim.hidden = hidden;
|
|
690
|
+
}
|
|
691
|
+
else if (fixed && Algo_1.default.hasVal(defaultValue)) {
|
|
692
|
+
this._dimensions.push({
|
|
693
|
+
hidden: hidden,
|
|
694
|
+
index: this._dimensions.length,
|
|
695
|
+
key: key,
|
|
696
|
+
name: alias !== null && alias !== void 0 ? alias : key
|
|
697
|
+
});
|
|
698
|
+
}
|
|
699
|
+
else {
|
|
700
|
+
throw new Error(`Trying to update the dataset dimension "${(alias !== null && alias !== void 0 ? alias : key)}", but none was found (${this._dimensions.map(x => x.name).join(', ')})`);
|
|
701
|
+
}
|
|
702
|
+
}
|
|
703
|
+
this._finishOperation('update-dimensions');
|
|
704
|
+
return this;
|
|
687
705
|
};
|
|
688
706
|
this.dropDimensions = (dimensionNames) => __awaiter(this, void 0, void 0, function* () {
|
|
689
707
|
if (dimensionNames.length === 0)
|
|
@@ -49,7 +49,7 @@ class DatasetManagerClass {
|
|
|
49
49
|
const headerLine = firstLine;
|
|
50
50
|
const rawDimensions = ParseManager_1.default._extractHeader(headerLine, delimiterChar, producer, discover);
|
|
51
51
|
return {
|
|
52
|
-
dimensions: rawDimensions.map(x => ({ key: x.name, name: x.saveAs, index: x.index })),
|
|
52
|
+
dimensions: rawDimensions.map(x => ({ key: x.name, name: x.saveAs, index: x.index, hidden: null })),
|
|
53
53
|
delimiter: delimiterChar
|
|
54
54
|
};
|
|
55
55
|
}
|
|
@@ -64,7 +64,7 @@ class DatasetManagerClass {
|
|
|
64
64
|
const columnKey = (_b = pColumn.aliasInProducer) !== null && _b !== void 0 ? _b : pColumn.nameInProducer;
|
|
65
65
|
const csvColumnIndex = keys.findIndex(x => x === columnKey);
|
|
66
66
|
(0, Affirm_1.default)(csvColumnIndex > -1, `The column "${pColumn.nameInProducer}" (with key "${columnKey}") of producer "${producer.name}" doesn't exist in the underlying dataset.`);
|
|
67
|
-
dimensions.push({ index: csvColumnIndex, key: columnKey, name: pColumn.nameInProducer });
|
|
67
|
+
dimensions.push({ index: csvColumnIndex, key: columnKey, name: pColumn.nameInProducer, hidden: null });
|
|
68
68
|
}
|
|
69
69
|
const delimiterChar = (_c = file.delimiter) !== null && _c !== void 0 ? _c : ',';
|
|
70
70
|
return { dimensions, delimiter: delimiterChar };
|
|
@@ -76,7 +76,7 @@ class DatasetManagerClass {
|
|
|
76
76
|
const source = Environment_1.default.getSource(producer.source);
|
|
77
77
|
const columns = FileCompiler_1.default.compileProducer(producer, source);
|
|
78
78
|
return {
|
|
79
|
-
dimensions: columns.map((x, i) => { var _a; return ({ key: (_a = x.aliasInProducer) !== null && _a !== void 0 ? _a : x.nameInProducer, name: x.nameInProducer, index: i }); }),
|
|
79
|
+
dimensions: columns.map((x, i) => { var _a; return ({ key: (_a = x.aliasInProducer) !== null && _a !== void 0 ? _a : x.nameInProducer, name: x.nameInProducer, index: i, hidden: null }); }),
|
|
80
80
|
delimiter: delimiterChar
|
|
81
81
|
};
|
|
82
82
|
}
|
|
@@ -84,7 +84,7 @@ class DatasetManagerClass {
|
|
|
84
84
|
const delimiterChar = (_e = producer.settings.delimiter) !== null && _e !== void 0 ? _e : ',';
|
|
85
85
|
const rawDimensions = ParseManager_1.default._extractHeader(firstLine, delimiterChar, producer, discover);
|
|
86
86
|
return {
|
|
87
|
-
dimensions: rawDimensions.map(x => ({ key: x.name, name: x.saveAs, index: x.index })),
|
|
87
|
+
dimensions: rawDimensions.map(x => ({ key: x.name, name: x.saveAs, index: x.index, hidden: null })),
|
|
88
88
|
delimiter: delimiterChar
|
|
89
89
|
};
|
|
90
90
|
}
|
|
@@ -46,10 +46,25 @@ class DatasetRecord {
|
|
|
46
46
|
this._value = newValue;
|
|
47
47
|
return this;
|
|
48
48
|
};
|
|
49
|
-
this.toJSON = () =>
|
|
49
|
+
this.toJSON = () => {
|
|
50
|
+
if (this._dimensions.some(x => x.hidden)) {
|
|
51
|
+
// remove the not wanted dimension
|
|
52
|
+
const clonedValue = structuredClone(this._value);
|
|
53
|
+
for (const dim of this._dimensions) {
|
|
54
|
+
if (dim.hidden)
|
|
55
|
+
delete clonedValue[dim.name];
|
|
56
|
+
}
|
|
57
|
+
return JSON.stringify(clonedValue);
|
|
58
|
+
}
|
|
59
|
+
else {
|
|
60
|
+
return JSON.stringify(this._value);
|
|
61
|
+
}
|
|
62
|
+
};
|
|
50
63
|
this.toCSV = (delimiter) => {
|
|
51
64
|
const myDelimtier = delimiter !== null && delimiter !== void 0 ? delimiter : this._delimiter;
|
|
65
|
+
// remove the not wanted dimension
|
|
52
66
|
const line = this._dimensions
|
|
67
|
+
.filter(x => !x.hidden)
|
|
53
68
|
.map(x => { var _a, _b; return `"${Algo_1.default.replaceAll((_b = (_a = this._value[x.name]) === null || _a === void 0 ? void 0 : _a.toString()) !== null && _b !== void 0 ? _b : '', '"', '""')}"`; })
|
|
54
69
|
.join(myDelimtier);
|
|
55
70
|
return line;
|
|
@@ -123,6 +123,7 @@ class ExecutionEnvironment {
|
|
|
123
123
|
}
|
|
124
124
|
case 'export-file': {
|
|
125
125
|
(0, Affirm_1.default)(planStep.output, `Invalid output in export-file step`);
|
|
126
|
+
(0, Affirm_1.default)(this._resultingDataset, 'Invalid resulting dataset in export-file step');
|
|
126
127
|
const res = yield FileExporter_1.default.export(this._consumer, planStep.output, this._resultingDataset);
|
|
127
128
|
result.fileUri = res;
|
|
128
129
|
break;
|
|
@@ -103,7 +103,8 @@ class JoinEngineClass {
|
|
|
103
103
|
const resultDimensions = consumerColumns.map((col, index) => ({
|
|
104
104
|
name: col.consumerAlias || col.consumerKey,
|
|
105
105
|
key: col.consumerAlias || col.consumerKey,
|
|
106
|
-
index
|
|
106
|
+
index,
|
|
107
|
+
hidden: null
|
|
107
108
|
}));
|
|
108
109
|
// Initialize the result dataset with proper dimensions
|
|
109
110
|
resultDataset.getDimensions().length = 0;
|
|
@@ -31,10 +31,13 @@ class TransformationEngineClass {
|
|
|
31
31
|
const value = record.getValue(fieldKey);
|
|
32
32
|
if (!Algo_1.default.hasVal(value) && Algo_1.default.hasVal(field.default))
|
|
33
33
|
record.setValue(fieldKey, field.default);
|
|
34
|
+
else if (!Algo_1.default.hasVal(value) && this.isFieldCombinationTransformation(field.transform))
|
|
35
|
+
// For field combination transformations, we don't skip null values as they might combine with other fields
|
|
36
|
+
continue;
|
|
34
37
|
else if (!Algo_1.default.hasVal(value))
|
|
35
38
|
continue;
|
|
36
39
|
try {
|
|
37
|
-
record.setValue(fieldKey, this.applyTransformations(value, field.transform, field));
|
|
40
|
+
record.setValue(fieldKey, this.applyTransformations(value, field.transform, field, record));
|
|
38
41
|
}
|
|
39
42
|
catch (error) {
|
|
40
43
|
switch (field.onError) {
|
|
@@ -52,13 +55,19 @@ class TransformationEngineClass {
|
|
|
52
55
|
return record;
|
|
53
56
|
});
|
|
54
57
|
});
|
|
55
|
-
this.
|
|
58
|
+
this.isFieldCombinationTransformation = (transformation) => {
|
|
59
|
+
if (Array.isArray(transformation)) {
|
|
60
|
+
return transformation.some(t => this.isFieldCombinationTransformation(t));
|
|
61
|
+
}
|
|
62
|
+
return 'combine_fields' in transformation;
|
|
63
|
+
};
|
|
64
|
+
this.applyTransformations = (value, transformations, field, record) => {
|
|
56
65
|
var _a;
|
|
57
66
|
if (Array.isArray(transformations)) {
|
|
58
67
|
// Process array transformations without creating intermediate arrays
|
|
59
68
|
let result = value;
|
|
60
69
|
for (const transform of transformations) {
|
|
61
|
-
result = this.applyTransformations(result, transform, field);
|
|
70
|
+
result = this.applyTransformations(result, transform, field, record);
|
|
62
71
|
}
|
|
63
72
|
return result;
|
|
64
73
|
}
|
|
@@ -190,6 +199,30 @@ class TransformationEngineClass {
|
|
|
190
199
|
return transformations.prepend + TypeCaster_1.default.cast(value, 'string');
|
|
191
200
|
if ('append' in transformations)
|
|
192
201
|
return TypeCaster_1.default.cast(value, 'string') + transformations.append;
|
|
202
|
+
if ('combine_fields' in transformations) {
|
|
203
|
+
if (!record) {
|
|
204
|
+
throw new Error(`Cannot apply combine_fields transformation without record context in field '${field.key}'`);
|
|
205
|
+
}
|
|
206
|
+
const { fields, separator = '', template } = transformations.combine_fields;
|
|
207
|
+
// Get values from the specified fields
|
|
208
|
+
const fieldValues = fields.map(fieldName => {
|
|
209
|
+
const fieldValue = record.getValue(fieldName);
|
|
210
|
+
return fieldValue !== null && fieldValue !== undefined ? String(fieldValue) : '';
|
|
211
|
+
});
|
|
212
|
+
// If template is provided, use it for formatting
|
|
213
|
+
if (template) {
|
|
214
|
+
let result = template;
|
|
215
|
+
for (let i = 0; i < fields.length; i++) {
|
|
216
|
+
const placeholder = `{${fields[i]}}`;
|
|
217
|
+
result = result.replace(new RegExp(placeholder, 'g'), fieldValues[i]);
|
|
218
|
+
}
|
|
219
|
+
return result;
|
|
220
|
+
}
|
|
221
|
+
else {
|
|
222
|
+
// Otherwise, join with separator
|
|
223
|
+
return fieldValues.join(separator);
|
|
224
|
+
}
|
|
225
|
+
}
|
|
193
226
|
if ('conditional' in transformations) {
|
|
194
227
|
for (const clause of transformations.conditional.clauses) {
|
|
195
228
|
if (this.evaluateCondition(value, clause.if)) {
|
|
@@ -138,6 +138,7 @@ class ValidatorClass {
|
|
|
138
138
|
return errors;
|
|
139
139
|
};
|
|
140
140
|
const validateTransformations = (fields) => {
|
|
141
|
+
var _a;
|
|
141
142
|
const errors = [];
|
|
142
143
|
const trxsFields = fields.filter(x => x.transform);
|
|
143
144
|
for (const field of trxsFields) {
|
|
@@ -149,7 +150,17 @@ class ValidatorClass {
|
|
|
149
150
|
for (const trans of trxToValidate) {
|
|
150
151
|
const trxKeys = Object.keys(trans);
|
|
151
152
|
if (trxKeys.length !== 1)
|
|
152
|
-
errors.push(`There can only be 1 transformation type in your transformation pipeline. Field "${field.key}" got ${trxKeys.length}
|
|
153
|
+
errors.push(`There can only be 1 transformation type in your transformation pipeline. Field "${field.key}" got ${trxKeys.length}.`);
|
|
154
|
+
if ('combine_fields' in trans) {
|
|
155
|
+
const { combine_fields } = trans;
|
|
156
|
+
if (!combine_fields.fields || combine_fields.fields.length === 0)
|
|
157
|
+
errors.push(`The "combine_field" transformation is missing the "fields" property ("${field.key}").`);
|
|
158
|
+
const missingFieldsInConsumer = combine_fields.fields
|
|
159
|
+
.map(x => ({ field: x, found: fields.find(k => { var _a; return ((_a = k.alias) !== null && _a !== void 0 ? _a : k.key) === x; }) }))
|
|
160
|
+
.filter(x => !x.found);
|
|
161
|
+
if (missingFieldsInConsumer.length > 0)
|
|
162
|
+
errors.push(`The requested field(s) for a transformation is missing in the consumer -> missing field(s): "${missingFieldsInConsumer.map(x => x.field).join(', ')}"; field transformation: "${(_a = field.alias) !== null && _a !== void 0 ? _a : field.key}";`);
|
|
163
|
+
}
|
|
153
164
|
}
|
|
154
165
|
}
|
|
155
166
|
return errors;
|