@forzalabs/remora 0.0.48-nasco.3 → 0.0.50-nasco.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +1 -1
- package/core/Algo.js +2 -1
- package/drivers/DriverHelper.js +5 -3
- package/drivers/LocalDriver.js +27 -7
- package/drivers/S3Driver.js +11 -1
- package/engines/consumer/PostProcessor.js +1 -35
- package/engines/dataset/Dataset.js +48 -60
- package/engines/dataset/DatasetRecord.js +35 -40
- package/engines/execution/ExecutionEnvironment.js +2 -24
- package/package.json +2 -1
package/Constants.js
CHANGED
package/core/Algo.js
CHANGED
|
@@ -150,6 +150,7 @@ const algo = {
|
|
|
150
150
|
(0, Affirm_1.default)(arr.length > 0, 'Array must be non-empty');
|
|
151
151
|
return Math.max(...arr);
|
|
152
152
|
},
|
|
153
|
-
replaceAll: (text, search, replace) => text.replace(new RegExp(search, 'g'), replace)
|
|
153
|
+
replaceAll: (text, search, replace) => text.replace(new RegExp(search, 'g'), replace),
|
|
154
|
+
deepClone: (data) => JSON.parse(JSON.stringify(data))
|
|
154
155
|
};
|
|
155
156
|
exports.default = algo;
|
package/drivers/DriverHelper.js
CHANGED
|
@@ -24,9 +24,12 @@ const readline_1 = require("readline");
|
|
|
24
24
|
const promises_1 = require("stream/promises");
|
|
25
25
|
const fs_1 = require("fs");
|
|
26
26
|
const Logger_1 = __importDefault(require("../helper/Logger"));
|
|
27
|
+
const Affirm_1 = __importDefault(require("../core/Affirm"));
|
|
27
28
|
const DriverHelper = {
|
|
28
|
-
appendToUnifiedFile: (
|
|
29
|
-
|
|
29
|
+
appendToUnifiedFile: (options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
30
|
+
(0, Affirm_1.default)(options, 'Invalid options');
|
|
31
|
+
const { append, destinationPath, fileKey, headerLine, stream, fileType, hasHeaderRow } = options;
|
|
32
|
+
const shouldValidateHeader = fileType === 'CSV' || (fileType === 'TXT' && hasHeaderRow === true);
|
|
30
33
|
let isFirstLine = true;
|
|
31
34
|
let hasValidatedHeader = shouldValidateHeader ? false : true;
|
|
32
35
|
const headerValidationTransform = new stream_1.Transform({
|
|
@@ -36,7 +39,6 @@ const DriverHelper = {
|
|
|
36
39
|
const lines = chunkStr.split('\n');
|
|
37
40
|
if (isFirstLine && lines.length > 0) {
|
|
38
41
|
const firstLine = lines[0];
|
|
39
|
-
// Validate header only for CSV and TXT files
|
|
40
42
|
if (shouldValidateHeader && headerLine && headerLine.trim() !== '' && firstLine.trim() !== headerLine.trim()) {
|
|
41
43
|
const msg = `Error creating unified dataset: file "${fileKey}" has a different header line than the other files in this dataset\n\t-${fileKey}: ${firstLine}\n\t-main: ${headerLine}`;
|
|
42
44
|
Logger_1.default.log(msg);
|
package/drivers/LocalDriver.js
CHANGED
|
@@ -83,12 +83,15 @@ class LocalSourceDriver {
|
|
|
83
83
|
const { fileKey } = request;
|
|
84
84
|
if (fileKey.includes('%')) {
|
|
85
85
|
const allFileKeys = this.listFiles(fileKey);
|
|
86
|
-
|
|
86
|
+
Logger_1.default.log(`Matched ${allFileKeys.length} files, copying to locally and creating unified dataset.`);
|
|
87
|
+
const firstPath = path_1.default.join(this._path, allFileKeys[0]);
|
|
88
|
+
const headerLine = (yield DriverHelper_1.default.quickReadFile(firstPath, 1))[0];
|
|
89
|
+
const promises = allFileKeys.map((x, i) => this._get(Object.assign(Object.assign({}, request), { fileKey: x }), headerLine, i));
|
|
87
90
|
const results = yield Promise.all(promises);
|
|
88
91
|
return results.flat();
|
|
89
92
|
}
|
|
90
93
|
else {
|
|
91
|
-
return yield this._get(request);
|
|
94
|
+
return yield this._get(request, '');
|
|
92
95
|
}
|
|
93
96
|
});
|
|
94
97
|
this.readLinesInRange = (request) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -102,12 +105,15 @@ class LocalSourceDriver {
|
|
|
102
105
|
const { fileKey } = request;
|
|
103
106
|
if (fileKey.includes('%')) {
|
|
104
107
|
const allFileKeys = this.listFiles(fileKey);
|
|
105
|
-
|
|
108
|
+
Logger_1.default.log(`Matched ${allFileKeys.length} files, copying to locally and creating unified dataset.`);
|
|
109
|
+
const firstPath = path_1.default.join(this._path, allFileKeys[0]);
|
|
110
|
+
const headerLine = (yield DriverHelper_1.default.quickReadFile(firstPath, 1))[0];
|
|
111
|
+
const promises = allFileKeys.map((x, i) => this._get(Object.assign(Object.assign({}, request), { fileKey: x }), headerLine, i));
|
|
106
112
|
const results = yield Promise.all(promises);
|
|
107
113
|
return results.flat();
|
|
108
114
|
}
|
|
109
115
|
else {
|
|
110
|
-
return yield this._get(request);
|
|
116
|
+
return yield this._get(request, '');
|
|
111
117
|
}
|
|
112
118
|
});
|
|
113
119
|
this.download = (dataset) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -122,12 +128,20 @@ class LocalSourceDriver {
|
|
|
122
128
|
(0, Affirm_1.default)(fs.existsSync(sourceFilePath), `Source file does not exist: ${sourceFilePath}`);
|
|
123
129
|
// Copy and validate header in a single stream pass
|
|
124
130
|
const readStream = fs.createReadStream(sourceFilePath);
|
|
125
|
-
return DriverHelper_1.default.appendToUnifiedFile(
|
|
131
|
+
return DriverHelper_1.default.appendToUnifiedFile({
|
|
132
|
+
stream: readStream,
|
|
133
|
+
fileKey,
|
|
134
|
+
destinationPath: dataset.getPath(),
|
|
135
|
+
append: appendMode,
|
|
136
|
+
headerLine,
|
|
137
|
+
fileType: file.fileType,
|
|
138
|
+
hasHeaderRow: file.hasHeaderRow
|
|
139
|
+
});
|
|
126
140
|
});
|
|
127
141
|
const { fileKey } = file;
|
|
128
142
|
if (fileKey.includes('%')) {
|
|
129
143
|
const allFileKeys = this.listFiles(fileKey);
|
|
130
|
-
Logger_1.default.log(`Matched ${allFileKeys.length} files, copying
|
|
144
|
+
Logger_1.default.log(`Matched ${allFileKeys.length} files, copying locally and creating unified dataset.`);
|
|
131
145
|
// Get header line from the first file
|
|
132
146
|
const headerLine = (yield DriverHelper_1.default.quickReadFile(path_1.default.join(this._path, allFileKeys[0]), 1))[0];
|
|
133
147
|
// Copy files sequentially to avoid file conflicts
|
|
@@ -222,7 +236,7 @@ class LocalSourceDriver {
|
|
|
222
236
|
}
|
|
223
237
|
return lines;
|
|
224
238
|
});
|
|
225
|
-
this._get = (request, index) => __awaiter(this, void 0, void 0, function* () {
|
|
239
|
+
this._get = (request, headerLine, index) => __awaiter(this, void 0, void 0, function* () {
|
|
226
240
|
const { fileKey, fileType, options } = request;
|
|
227
241
|
let lineFrom, lineTo, sheetName, hasHeaderRow;
|
|
228
242
|
if (options) {
|
|
@@ -257,6 +271,12 @@ class LocalSourceDriver {
|
|
|
257
271
|
lines = yield this._readXmlLines(fileUrl);
|
|
258
272
|
break;
|
|
259
273
|
}
|
|
274
|
+
const firstLine = lines[0];
|
|
275
|
+
if (headerLine && headerLine.trim() !== '' && firstLine.trim() !== headerLine.trim()) {
|
|
276
|
+
const msg = `Error creating unified dataset: file "${fileKey}" has a different header line than the other files in this dataset\n\t-${fileKey}: ${firstLine}\n\t-main: ${headerLine}`;
|
|
277
|
+
Logger_1.default.log(msg);
|
|
278
|
+
throw new Error(msg);
|
|
279
|
+
}
|
|
260
280
|
// If this is not the first file read in a pattern match AND the file type has an header,
|
|
261
281
|
// then I need to remove the header from the resulting lines or the header will be duplicated
|
|
262
282
|
if (index > 0 && ParseHelper_1.default.shouldHaveHeader(fileType, hasHeaderRow)) {
|
package/drivers/S3Driver.js
CHANGED
|
@@ -31,6 +31,7 @@ const Helper_1 = __importDefault(require("../helper/Helper"));
|
|
|
31
31
|
const ParseHelper_1 = __importDefault(require("../engines/parsing/ParseHelper"));
|
|
32
32
|
const FileExporter_1 = __importDefault(require("../engines/file/FileExporter"));
|
|
33
33
|
const DriverHelper_1 = __importDefault(require("./DriverHelper"));
|
|
34
|
+
const Logger_1 = __importDefault(require("../helper/Logger"));
|
|
34
35
|
class S3DestinationDriver {
|
|
35
36
|
constructor() {
|
|
36
37
|
this.init = (source) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -235,11 +236,20 @@ class S3SourceDriver {
|
|
|
235
236
|
const response = yield this._client.send(command);
|
|
236
237
|
(0, Affirm_1.default)(response.Body, 'Failed to fetch object from S3');
|
|
237
238
|
const stream = response.Body;
|
|
238
|
-
return DriverHelper_1.default.appendToUnifiedFile(
|
|
239
|
+
return DriverHelper_1.default.appendToUnifiedFile({
|
|
240
|
+
stream,
|
|
241
|
+
fileKey: fileUrl,
|
|
242
|
+
destinationPath: dataset.getPath(),
|
|
243
|
+
append: appendMode,
|
|
244
|
+
headerLine,
|
|
245
|
+
fileType: file.fileType,
|
|
246
|
+
hasHeaderRow: file.hasHeaderRow
|
|
247
|
+
});
|
|
239
248
|
});
|
|
240
249
|
const { fileKey } = file;
|
|
241
250
|
if (fileKey.includes('%')) {
|
|
242
251
|
const allFileKeys = yield this.listFiles(fileKey);
|
|
252
|
+
Logger_1.default.log(`Matched ${allFileKeys.length} files, copying locally and creating unified dataset.`);
|
|
243
253
|
// Get header line from the first file
|
|
244
254
|
const firstFileCommand = new client_s3_1.GetObjectCommand({
|
|
245
255
|
Bucket: this._bucketName,
|
|
@@ -33,9 +33,7 @@ class PostProcessorClass {
|
|
|
33
33
|
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
34
34
|
(0, Affirm_1.default)(dataset, 'Invalid dataset');
|
|
35
35
|
const fields = ConsumerManager_1.default.getExpandedFields(consumer);
|
|
36
|
-
let newDataset = yield
|
|
37
|
-
newDataset = this.updateDimensions(newDataset, consumer);
|
|
38
|
-
newDataset = yield this.reorderDimensions(newDataset, consumer);
|
|
36
|
+
let newDataset = yield dataset.wholeUpdateDimensions(fields);
|
|
39
37
|
newDataset = yield newDataset.map(record => {
|
|
40
38
|
var _a, _b;
|
|
41
39
|
for (const field of fields) {
|
|
@@ -53,38 +51,6 @@ class PostProcessorClass {
|
|
|
53
51
|
});
|
|
54
52
|
return newDataset;
|
|
55
53
|
});
|
|
56
|
-
this.updateDimensions = (dataset, consumer) => {
|
|
57
|
-
const fields = ConsumerManager_1.default.getExpandedFields(consumer);
|
|
58
|
-
dataset.updateDimensions(fields);
|
|
59
|
-
return dataset;
|
|
60
|
-
};
|
|
61
|
-
this.dropDimensions = (dataset, consumer) => __awaiter(this, void 0, void 0, function* () {
|
|
62
|
-
const initialDimensions = dataset.getDimensions();
|
|
63
|
-
const fields = ConsumerManager_1.default.getExpandedFields(consumer);
|
|
64
|
-
const dimensionsToKeep = new Set();
|
|
65
|
-
// First, identify which dimensions the consumer actually wants
|
|
66
|
-
for (const field of fields) {
|
|
67
|
-
const { key } = field.cField;
|
|
68
|
-
dimensionsToKeep.add(key);
|
|
69
|
-
}
|
|
70
|
-
// Create a copy to iterate over since we'll be modifying the original
|
|
71
|
-
const dimensionsCopy = [...initialDimensions];
|
|
72
|
-
const dimensionsToDrop = [];
|
|
73
|
-
for (const dim of dimensionsCopy) {
|
|
74
|
-
if (!dimensionsToKeep.has(dim.name)) {
|
|
75
|
-
// This dimension is not wanted by the consumer, collect it for dropping
|
|
76
|
-
dimensionsToDrop.push(dim.name);
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
// Drop all unwanted dimensions in a single optimized operation
|
|
80
|
-
if (dimensionsToDrop.length > 0)
|
|
81
|
-
yield dataset.dropDimensions(dimensionsToDrop);
|
|
82
|
-
return dataset;
|
|
83
|
-
});
|
|
84
|
-
this.reorderDimensions = (dataset, consumer) => __awaiter(this, void 0, void 0, function* () {
|
|
85
|
-
const fields = ConsumerManager_1.default.getExpandedFields(consumer);
|
|
86
|
-
return yield dataset.reorderDimensions(fields.map(x => { var _a; return ((_a = x.cField.alias) !== null && _a !== void 0 ? _a : x.cField.key); }));
|
|
87
|
-
});
|
|
88
54
|
/**
|
|
89
55
|
* Gets an array of objects (with potentially nested fields) and unpacks them to an array of objects with no nested fields
|
|
90
56
|
* If some nested keys are lists, then a logic similar to a SQL JOIN is used and rows are duplicated
|
|
@@ -34,6 +34,7 @@ const Helper_1 = __importDefault(require("../../helper/Helper"));
|
|
|
34
34
|
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
35
35
|
class Dataset {
|
|
36
36
|
constructor(name, file, batchSize = Constants_1.default.defaults.MAX_ITEMS_IN_MEMORY) {
|
|
37
|
+
this._pipeline = [];
|
|
37
38
|
this.getPath = () => this._path;
|
|
38
39
|
this.setPath = (path) => {
|
|
39
40
|
this._path = path;
|
|
@@ -122,13 +123,14 @@ class Dataset {
|
|
|
122
123
|
const rl = (0, readline_1.createInterface)({ input: readStream, crlfDelay: Infinity });
|
|
123
124
|
let batch = [];
|
|
124
125
|
let lineCount = 0;
|
|
126
|
+
const dimensions = Algo_1.default.deepClone(this._dimensions);
|
|
125
127
|
try {
|
|
126
128
|
for (var _d = true, rl_1 = __asyncValues(rl), rl_1_1; rl_1_1 = yield rl_1.next(), _a = rl_1_1.done, !_a; _d = true) {
|
|
127
129
|
_c = rl_1_1.value;
|
|
128
130
|
_d = false;
|
|
129
131
|
const line = _c;
|
|
130
132
|
try {
|
|
131
|
-
const record = new DatasetRecord_1.default(line,
|
|
133
|
+
const record = new DatasetRecord_1.default(line, dimensions, this._delimiter);
|
|
132
134
|
batch.push(record);
|
|
133
135
|
lineCount++;
|
|
134
136
|
if (batch.length >= this._batchSize) {
|
|
@@ -520,6 +522,7 @@ class Dataset {
|
|
|
520
522
|
if (batch.length > 0) {
|
|
521
523
|
yield processor(batch, batchIndex);
|
|
522
524
|
}
|
|
525
|
+
this._iterations++;
|
|
523
526
|
this._finishOperation('stream-batches');
|
|
524
527
|
});
|
|
525
528
|
/**
|
|
@@ -679,68 +682,52 @@ class Dataset {
|
|
|
679
682
|
return this;
|
|
680
683
|
});
|
|
681
684
|
this.getDimensions = () => this._dimensions;
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
}
|
|
685
|
+
/**
|
|
686
|
+
* - remove dimension
|
|
687
|
+
* - rename a dimension
|
|
688
|
+
* - change hidden flag
|
|
689
|
+
* - move a dimension
|
|
690
|
+
*/
|
|
691
|
+
this.wholeUpdateDimensions = (fields) => __awaiter(this, void 0, void 0, function* () {
|
|
692
|
+
var _a;
|
|
693
|
+
let updates = [];
|
|
694
|
+
// Add all the updates
|
|
695
|
+
for (let i = 0; i < fields.length; i++) {
|
|
696
|
+
const { cField } = fields[i];
|
|
697
|
+
const currentMatch = structuredClone(this._dimensions.find(x => x.name === cField.key));
|
|
698
|
+
updates.push({
|
|
699
|
+
currentDimension: currentMatch,
|
|
700
|
+
newName: (_a = cField.alias) !== null && _a !== void 0 ? _a : cField.key,
|
|
701
|
+
newHidden: cField.hidden,
|
|
702
|
+
newPosition: i,
|
|
703
|
+
toDelete: false
|
|
704
|
+
});
|
|
702
705
|
}
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
this._dimensions = this._dimensions
|
|
716
|
-
.filter(x => !dimensionNames.includes(x.name))
|
|
717
|
-
.map((x, i) => (Object.assign(Object.assign({}, x), { index: i })));
|
|
718
|
-
this._finishOperation('drop-dimensions');
|
|
719
|
-
return this;
|
|
720
|
-
});
|
|
721
|
-
this.reorderDimensions = (dimensionNames) => __awaiter(this, void 0, void 0, function* () {
|
|
722
|
-
if (dimensionNames.length === 0)
|
|
706
|
+
// Add all the updates to remove dimensions
|
|
707
|
+
for (const dim of this._dimensions) {
|
|
708
|
+
if (!updates.find(x => { var _a; return ((_a = x.currentDimension) === null || _a === void 0 ? void 0 : _a.name) === dim.name; }))
|
|
709
|
+
updates.push({ currentDimension: dim, toDelete: true });
|
|
710
|
+
}
|
|
711
|
+
// Now keep only the updates that actually change something
|
|
712
|
+
updates = updates.filter(x => x.toDelete
|
|
713
|
+
|| !x.currentDimension
|
|
714
|
+
|| (x.currentDimension && (x.currentDimension.name !== x.newName
|
|
715
|
+
|| (Algo_1.default.hasVal(x.newHidden) && x.newHidden !== x.currentDimension.hidden)
|
|
716
|
+
|| x.newPosition !== x.currentDimension.index)));
|
|
717
|
+
if (updates.length === 0)
|
|
723
718
|
return this;
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
const dim = this._dimensions.find(x => x.name === name);
|
|
734
|
-
const newDim = structuredClone(dim);
|
|
735
|
-
newDim.index = index;
|
|
736
|
-
return { newDimension: newDim, oldDimension: dim };
|
|
719
|
+
let updatedDimensions = null;
|
|
720
|
+
const newDataset = yield this.map(record => {
|
|
721
|
+
for (const update of updates) {
|
|
722
|
+
record.wholeUpdateDimension(update);
|
|
723
|
+
}
|
|
724
|
+
record._dimensions.sort((a, b) => a.index - b.index);
|
|
725
|
+
if (!updatedDimensions)
|
|
726
|
+
updatedDimensions = record._dimensions;
|
|
727
|
+
return record;
|
|
737
728
|
});
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
// Reorder the data in the file using streaming approach
|
|
741
|
-
yield this.map(record => record.reorderDimensions(movements));
|
|
742
|
-
this._finishOperation('reorder-dimensions');
|
|
743
|
-
return this;
|
|
729
|
+
this._dimensions = updatedDimensions;
|
|
730
|
+
return newDataset;
|
|
744
731
|
});
|
|
745
732
|
this.print = (...args_1) => __awaiter(this, [...args_1], void 0, function* (count = 3, full = false) {
|
|
746
733
|
console.log(`DS ${this._name} (${this._size} | ${this._iterations})`);
|
|
@@ -862,6 +849,7 @@ class Dataset {
|
|
|
862
849
|
this._size = 0;
|
|
863
850
|
this._iterations = 0;
|
|
864
851
|
this._operations = [];
|
|
852
|
+
this._pipeline = [];
|
|
865
853
|
const datasetName = this._name
|
|
866
854
|
.replace(/[^a-zA-Z0-9_-]/g, '_')
|
|
867
855
|
.replace(/_{2,}/g, '_')
|
|
@@ -6,44 +6,45 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
6
6
|
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
7
7
|
class DatasetRecord {
|
|
8
8
|
constructor(row, dimensions, delimiter) {
|
|
9
|
+
this.parse = (row, delimiter, dimensions) => {
|
|
10
|
+
if (!this.isEmpty() && dimensions.length > 0) {
|
|
11
|
+
const parts = row.split(delimiter);
|
|
12
|
+
for (let i = 0; i < dimensions.length; i++) {
|
|
13
|
+
const dim = dimensions[i];
|
|
14
|
+
this._value[dim.name] = parts[i];
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
};
|
|
9
18
|
this.stringify = () => this._dimensions.map(x => this._value[x.name]).join(this._delimiter);
|
|
10
19
|
this.isEmpty = () => { var _a; return ((_a = this._row) === null || _a === void 0 ? void 0 : _a.trim().length) === 0; };
|
|
11
20
|
this.getRaw = () => this._row;
|
|
12
21
|
this.getValue = (dimension) => this._value[dimension];
|
|
13
22
|
this.setValue = (dimension, value) => this._value[dimension] = value;
|
|
14
|
-
this.
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
// Add the dimension with its new index
|
|
39
|
-
this._dimensions.push(newDimension);
|
|
40
|
-
// Remap the value from the old position to the new dimension name
|
|
41
|
-
if (parts.length > oldDimension.index) {
|
|
42
|
-
newValue[newDimension.name] = parts[oldDimension.index];
|
|
43
|
-
}
|
|
23
|
+
this.wholeUpdateDimension = (update) => {
|
|
24
|
+
var _a;
|
|
25
|
+
if (update.toDelete) {
|
|
26
|
+
// To remove
|
|
27
|
+
delete this._value[update.currentDimension.name];
|
|
28
|
+
this._dimensions = this._dimensions.filter(x => x.key !== update.currentDimension.name);
|
|
29
|
+
}
|
|
30
|
+
else if (!update.currentDimension) {
|
|
31
|
+
// To create (at the right position)
|
|
32
|
+
const newDimension = { index: update.newPosition, key: update.newName, name: update.newName, hidden: update.newHidden };
|
|
33
|
+
this._value[newDimension.name] = null;
|
|
34
|
+
this._dimensions = [...this._dimensions, newDimension];
|
|
35
|
+
}
|
|
36
|
+
else {
|
|
37
|
+
// Change: name, hidden, position
|
|
38
|
+
const index = this._dimensions.findIndex(x => x.key === update.currentDimension.name);
|
|
39
|
+
const currentDim = this._dimensions[index];
|
|
40
|
+
const updatedDim = { name: update.newName, key: (_a = currentDim.key) !== null && _a !== void 0 ? _a : update.newName, hidden: update.newHidden, index: update.newPosition };
|
|
41
|
+
this._value[updatedDim.name] = this._value[currentDim.name];
|
|
42
|
+
if (updatedDim.name !== currentDim.name)
|
|
43
|
+
delete this._value[currentDim.name];
|
|
44
|
+
const newDimensions = [...this._dimensions];
|
|
45
|
+
newDimensions.splice(index, 1, updatedDim);
|
|
46
|
+
this._dimensions = newDimensions;
|
|
44
47
|
}
|
|
45
|
-
// Update the value mapping
|
|
46
|
-
this._value = newValue;
|
|
47
48
|
return this;
|
|
48
49
|
};
|
|
49
50
|
this.toJSON = () => {
|
|
@@ -70,16 +71,10 @@ class DatasetRecord {
|
|
|
70
71
|
return line;
|
|
71
72
|
};
|
|
72
73
|
this._row = row;
|
|
73
|
-
this._dimensions = dimensions
|
|
74
|
+
this._dimensions = dimensions;
|
|
74
75
|
this._delimiter = delimiter;
|
|
75
76
|
this._value = {};
|
|
76
|
-
|
|
77
|
-
const parts = row.split(delimiter);
|
|
78
|
-
for (let i = 0; i < dimensions.length; i++) {
|
|
79
|
-
const dim = dimensions[i];
|
|
80
|
-
this._value[dim.name] = parts[i];
|
|
81
|
-
}
|
|
82
|
-
}
|
|
77
|
+
this.parse(row, delimiter, this._dimensions);
|
|
83
78
|
}
|
|
84
79
|
}
|
|
85
80
|
exports.default = DatasetRecord;
|
|
@@ -17,7 +17,6 @@ const DriverFactory_1 = __importDefault(require("../../drivers/DriverFactory"));
|
|
|
17
17
|
const ConsumerEngine_1 = __importDefault(require("../consumer/ConsumerEngine"));
|
|
18
18
|
const PostProcessor_1 = __importDefault(require("../consumer/PostProcessor"));
|
|
19
19
|
const FileExporter_1 = __importDefault(require("../file/FileExporter"));
|
|
20
|
-
const ProducerEngine_1 = __importDefault(require("../producer/ProducerEngine"));
|
|
21
20
|
const SQLBuilder_1 = __importDefault(require("../sql/SQLBuilder"));
|
|
22
21
|
const SQLCompiler_1 = __importDefault(require("../sql/SQLCompiler"));
|
|
23
22
|
const ExecutionPlanner_1 = __importDefault(require("./ExecutionPlanner"));
|
|
@@ -70,27 +69,6 @@ class ExecutionEnvironment {
|
|
|
70
69
|
this._storeIntermidiate(planStep, dataset);
|
|
71
70
|
break;
|
|
72
71
|
}
|
|
73
|
-
case 'read-file-whole': {
|
|
74
|
-
(0, Affirm_1.default)(planStep.producer, `Invalid producer in read-file-whole step`);
|
|
75
|
-
const fileData = yield ProducerEngine_1.default.readFile(planStep.producer, { readmode: 'all' });
|
|
76
|
-
this._storeIntermidiate(planStep, fileData.dataset);
|
|
77
|
-
break;
|
|
78
|
-
}
|
|
79
|
-
case 'read-file-lines': {
|
|
80
|
-
(0, Affirm_1.default)(planStep.lines, `Invalid lines in read-file-lines step`);
|
|
81
|
-
(0, Affirm_1.default)(planStep.producer, `Invalid producer in read-file-lines step`);
|
|
82
|
-
const { producer, lines: { from, to } } = planStep;
|
|
83
|
-
const fileData = yield ProducerEngine_1.default.readFile(producer, { readmode: 'lines', lines: { from, to } });
|
|
84
|
-
this._storeIntermidiate(planStep, fileData.dataset);
|
|
85
|
-
break;
|
|
86
|
-
}
|
|
87
|
-
case 'download-file-locally': {
|
|
88
|
-
(0, Affirm_1.default)(planStep.producer, `Invalid producer in download-file-locally step`);
|
|
89
|
-
const { producer } = planStep;
|
|
90
|
-
const readRes = yield ProducerEngine_1.default.readFile(producer, { readmode: 'download' });
|
|
91
|
-
this._storeIntermidiate(planStep, readRes.dataset);
|
|
92
|
-
break;
|
|
93
|
-
}
|
|
94
72
|
case 'load-dataset': {
|
|
95
73
|
(0, Affirm_1.default)(planStep.producer, `Invalid producer in read-file-lines step`);
|
|
96
74
|
const { producer } = planStep;
|
|
@@ -171,14 +149,14 @@ class ExecutionEnvironment {
|
|
|
171
149
|
}
|
|
172
150
|
default: throw new Error(`Invalid execution plan step type "${planStep.type}"`);
|
|
173
151
|
}
|
|
174
|
-
Logger_1.default.log(
|
|
152
|
+
Logger_1.default.log(`\tCompleted step: ${planStep.type}`);
|
|
175
153
|
}
|
|
176
154
|
}
|
|
177
155
|
catch (error) {
|
|
178
156
|
const ds = (_a = this._resultingDataset) !== null && _a !== void 0 ? _a : this._getIntermidiate(currentStep);
|
|
179
157
|
if (ds)
|
|
180
158
|
Logger_1.default.log(`Failed execution of consumer at step ${currentStep.type}:\n\tSize: ${ds.getSize()}\n\tCycles: ${ds.getCycles()}\n\tOperations: ${Logger_1.default.formatList(ds.getOperations())}`);
|
|
181
|
-
Logger_1.default.log(
|
|
159
|
+
Logger_1.default.log(`\tFailed step: ${currentStep.type}->\n\t${error}`);
|
|
182
160
|
throw error;
|
|
183
161
|
}
|
|
184
162
|
Logger_1.default.log(`Completed execution of consumer:\n\tSize: ${result._stats.size}\n\tCycles: ${result._stats.cycles}\n\tTime: ${result._stats.elapsedMS}\n\tOperations: ${Logger_1.default.formatList(result._stats.operations)}`);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@forzalabs/remora",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.50-nasco.3",
|
|
4
4
|
"description": "A powerful CLI tool for seamless data translation.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"private": false,
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
},
|
|
10
10
|
"scripts": {
|
|
11
11
|
"sync": "cd ../dev_ops && npm run sync",
|
|
12
|
+
"dev": "clear && npx tsx scripts/dev.ts",
|
|
12
13
|
"tsc-check": "npx tsc --noemit",
|
|
13
14
|
"init": "npx tsx ./src/index.ts init",
|
|
14
15
|
"version": "npx tsx ./src/index.ts -v",
|