@forzalabs/remora 0.0.63-nasco.3 → 0.1.1-nasco.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +4 -2
- package/drivers/DriverHelper.js +3 -0
- package/drivers/LocalDriver.js +5 -2
- package/drivers/S3Driver.js +5 -2
- package/engines/consumer/PostProcessor.js +2 -2
- package/engines/dataset/Dataset.js +50 -20
- package/engines/dataset/DatasetManager.js +22 -2
- package/engines/dataset/ParallelDataset.js +142 -0
- package/engines/execution/ExecutionEnvironment.js +6 -7
- package/engines/execution/RequestExecutor.js +2 -2
- package/engines/transform/JoinEngine.js +1 -1
- package/engines/transform/TransformationEngine.js +2 -2
- package/package.json +4 -2
- package/workers/FilterWorker.js +62 -0
- package/workers/ProjectionWorker.js +63 -0
- package/workers/TransformWorker.js +63 -0
- package/workers/TsWorker.js +14 -0
- package/workers/definitions.js +2 -0
package/Constants.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
const CONSTANTS = {
|
|
4
|
-
cliVersion: '0.
|
|
4
|
+
cliVersion: '0.1.1-nasco',
|
|
5
5
|
lambdaVersion: 1,
|
|
6
6
|
port: 5069,
|
|
7
7
|
defaults: {
|
|
@@ -10,7 +10,9 @@ const CONSTANTS = {
|
|
|
10
10
|
STRING_MAX_CHARACTERS_LENGTH: 10000000,
|
|
11
11
|
MAX_ITEMS_IN_MEMORY: 200000,
|
|
12
12
|
MIN_RUNTIME_HEAP_MB: 4000,
|
|
13
|
-
RECOMMENDED_RUNTIME_HEAP_MB: 8000
|
|
13
|
+
RECOMMENDED_RUNTIME_HEAP_MB: 8000,
|
|
14
|
+
INDICATIVE_THREAD_LINE_COUNT: 750000,
|
|
15
|
+
MAX_THREAD_COUNT: 8
|
|
14
16
|
}
|
|
15
17
|
};
|
|
16
18
|
exports.default = CONSTANTS;
|
package/drivers/DriverHelper.js
CHANGED
|
@@ -38,6 +38,7 @@ const DriverHelper = {
|
|
|
38
38
|
let hasValidatedHeader = shouldValidateHeader ? false : true;
|
|
39
39
|
let leftoverData = '';
|
|
40
40
|
let globalIndex = 0;
|
|
41
|
+
let lineCount = 0;
|
|
41
42
|
const headerValidationTransform = new stream_1.Transform({
|
|
42
43
|
transform(chunk, encoding, callback) {
|
|
43
44
|
const chunkStr = leftoverData + chunk.toString();
|
|
@@ -99,6 +100,7 @@ const DriverHelper = {
|
|
|
99
100
|
return true;
|
|
100
101
|
};
|
|
101
102
|
const processLine = (line) => {
|
|
103
|
+
lineCount++;
|
|
102
104
|
switch (fileType) {
|
|
103
105
|
case 'JSON':
|
|
104
106
|
case 'JSONL': {
|
|
@@ -118,6 +120,7 @@ const DriverHelper = {
|
|
|
118
120
|
const writeOptions = append ? { flags: 'a' } : {};
|
|
119
121
|
const writeStream = (0, fs_1.createWriteStream)(destinationPath, writeOptions);
|
|
120
122
|
yield (0, promises_1.pipeline)(stream, headerValidationTransform, writeStream);
|
|
123
|
+
return lineCount;
|
|
121
124
|
}),
|
|
122
125
|
quickReadFile: (filePath, lineCount) => __awaiter(void 0, void 0, void 0, function* () {
|
|
123
126
|
var _a, e_1, _b, _c;
|
package/drivers/LocalDriver.js
CHANGED
|
@@ -146,17 +146,20 @@ class LocalSourceDriver {
|
|
|
146
146
|
// Get header line from the first file
|
|
147
147
|
const headerLine = (yield DriverHelper_1.default.quickReadFile(path_1.default.join(this._path, allFileKeys[0]), 1))[0];
|
|
148
148
|
dataset.setFirstLine(headerLine);
|
|
149
|
+
let totalLineCount = 0;
|
|
149
150
|
// Copy files sequentially to avoid file conflicts
|
|
150
151
|
for (let i = 0; i < allFileKeys.length; i++) {
|
|
151
|
-
yield copyLocally(allFileKeys[i], headerLine, i > 0); // Append mode for subsequent files
|
|
152
|
+
totalLineCount += yield copyLocally(allFileKeys[i], headerLine, i > 0); // Append mode for subsequent files
|
|
152
153
|
}
|
|
154
|
+
dataset.setCount(totalLineCount);
|
|
153
155
|
return dataset;
|
|
154
156
|
}
|
|
155
157
|
else {
|
|
156
158
|
// For single file, no header validation needed
|
|
157
159
|
const headerLine = (yield DriverHelper_1.default.quickReadFile(path_1.default.join(this._path, fileKey), 1))[0];
|
|
158
160
|
dataset.setFirstLine(headerLine);
|
|
159
|
-
yield copyLocally(fileKey, headerLine, false);
|
|
161
|
+
const totalLineCount = yield copyLocally(fileKey, headerLine, false);
|
|
162
|
+
dataset.setCount(totalLineCount);
|
|
160
163
|
return dataset;
|
|
161
164
|
}
|
|
162
165
|
});
|
package/drivers/S3Driver.js
CHANGED
|
@@ -226,10 +226,12 @@ class S3SourceDriver {
|
|
|
226
226
|
const firstFileStream = firstFileResponse.Body;
|
|
227
227
|
const headerLine = yield this.getFirstLineFromStream(firstFileStream);
|
|
228
228
|
dataset.setFirstLine(headerLine);
|
|
229
|
+
let totalLineCount = 0;
|
|
229
230
|
// Download files sequentially to avoid file conflicts
|
|
230
231
|
for (let i = 0; i < allFileKeys.length; i++) {
|
|
231
|
-
yield downloadLocally(allFileKeys[i], headerLine, i > 0); // Append mode for subsequent files
|
|
232
|
+
totalLineCount += yield downloadLocally(allFileKeys[i], headerLine, i > 0); // Append mode for subsequent files
|
|
232
233
|
}
|
|
234
|
+
dataset.setCount(totalLineCount);
|
|
233
235
|
return dataset;
|
|
234
236
|
}
|
|
235
237
|
else {
|
|
@@ -244,7 +246,8 @@ class S3SourceDriver {
|
|
|
244
246
|
const headerLine = yield this.getFirstLineFromStream(firstFileStream);
|
|
245
247
|
dataset.setFirstLine(headerLine);
|
|
246
248
|
// For single file, no header validation needed
|
|
247
|
-
yield downloadLocally(fileKey, headerLine);
|
|
249
|
+
const totalLineCount = yield downloadLocally(fileKey, headerLine);
|
|
250
|
+
dataset.setCount(totalLineCount);
|
|
248
251
|
return dataset;
|
|
249
252
|
}
|
|
250
253
|
});
|
|
@@ -31,7 +31,7 @@ class PostProcessorClass {
|
|
|
31
31
|
* - default field values
|
|
32
32
|
* - masking/hashing of data
|
|
33
33
|
*/
|
|
34
|
-
this.doProjection = (consumer, dataset) => __awaiter(this, void 0, void 0, function* () {
|
|
34
|
+
this.doProjection = (consumer, dataset, options) => __awaiter(this, void 0, void 0, function* () {
|
|
35
35
|
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
36
36
|
(0, Affirm_1.default)(dataset, 'Invalid dataset');
|
|
37
37
|
const fields = ConsumerManager_1.default.getExpandedFields(consumer);
|
|
@@ -61,7 +61,7 @@ class PostProcessorClass {
|
|
|
61
61
|
record.setValue(fieldKey, TypeCaster_1.default.cast(fieldValue, fieldType));
|
|
62
62
|
}
|
|
63
63
|
return record;
|
|
64
|
-
});
|
|
64
|
+
}, options);
|
|
65
65
|
newDataset.setDimensinons(updatedDimensions);
|
|
66
66
|
return newDataset;
|
|
67
67
|
});
|
|
@@ -49,15 +49,24 @@ class Dataset {
|
|
|
49
49
|
return this;
|
|
50
50
|
};
|
|
51
51
|
this.getFirstLine = () => this._firstLine;
|
|
52
|
-
this.
|
|
52
|
+
this.getCount = () => this._count;
|
|
53
|
+
this.setCount = (count) => {
|
|
54
|
+
this._count = count;
|
|
55
|
+
return this;
|
|
56
|
+
};
|
|
53
57
|
this.getCycles = () => this._iterations;
|
|
54
58
|
this.getDelimiter = () => this._delimiter;
|
|
59
|
+
this.setDelimiter = (delimiter) => {
|
|
60
|
+
this._delimiter = delimiter;
|
|
61
|
+
return this;
|
|
62
|
+
};
|
|
55
63
|
this.getOperations = () => this._operations;
|
|
56
64
|
this.load = (source) => __awaiter(this, void 0, void 0, function* () {
|
|
57
65
|
(0, Affirm_1.default)(source, 'Invalid source');
|
|
58
66
|
this._startOperation('load', { source: source.engine });
|
|
59
67
|
const driver = yield DriverFactory_1.default.instantiateSource(source);
|
|
60
68
|
yield driver.download(this);
|
|
69
|
+
this._size = this._computeSize();
|
|
61
70
|
this._finishOperation('load');
|
|
62
71
|
return this;
|
|
63
72
|
});
|
|
@@ -101,7 +110,8 @@ class Dataset {
|
|
|
101
110
|
}
|
|
102
111
|
// Write all records to the dataset file
|
|
103
112
|
yield this.append(records);
|
|
104
|
-
this._size =
|
|
113
|
+
this._size = this._computeSize();
|
|
114
|
+
this._count = data.length;
|
|
105
115
|
this._finishOperation('load-from-memory');
|
|
106
116
|
return this;
|
|
107
117
|
}
|
|
@@ -115,12 +125,14 @@ class Dataset {
|
|
|
115
125
|
*/
|
|
116
126
|
this.transformStream = (transformer_1, ...args_1) => __awaiter(this, [transformer_1, ...args_1], void 0, function* (transformer, options = {}) {
|
|
117
127
|
var _a, e_1, _b, _c;
|
|
128
|
+
var _d, _e, _f, _g;
|
|
118
129
|
const inputPath = options.inputPath || this._path;
|
|
119
130
|
const outputPath = options.outputPath || this._tempPath;
|
|
131
|
+
const fromLine = (_e = (_d = options.range) === null || _d === void 0 ? void 0 : _d.fromLine) !== null && _e !== void 0 ? _e : -1;
|
|
132
|
+
const toLine = (_g = (_f = options.range) === null || _f === void 0 ? void 0 : _f.toLine) !== null && _g !== void 0 ? _g : Infinity;
|
|
120
133
|
this.ensureFile(outputPath);
|
|
121
|
-
if (!fs_1.default.existsSync(inputPath))
|
|
134
|
+
if (!fs_1.default.existsSync(inputPath))
|
|
122
135
|
throw new Error(`Input file does not exist: ${inputPath}`);
|
|
123
|
-
}
|
|
124
136
|
this._startOperation('transform-stream');
|
|
125
137
|
const readStream = (0, fs_2.createReadStream)(inputPath);
|
|
126
138
|
const writeStream = (0, fs_2.createWriteStream)(outputPath);
|
|
@@ -128,12 +140,22 @@ class Dataset {
|
|
|
128
140
|
const dimensions = Algo_1.default.deepClone(this._dimensions);
|
|
129
141
|
let batch = [];
|
|
130
142
|
let lineCount = 0;
|
|
143
|
+
let index = 0;
|
|
131
144
|
try {
|
|
132
|
-
for (var
|
|
145
|
+
for (var _h = true, rl_1 = __asyncValues(rl), rl_1_1; rl_1_1 = yield rl_1.next(), _a = rl_1_1.done, !_a; _h = true) {
|
|
133
146
|
_c = rl_1_1.value;
|
|
134
|
-
|
|
147
|
+
_h = false;
|
|
135
148
|
const line = _c;
|
|
136
149
|
try {
|
|
150
|
+
if (index < fromLine) {
|
|
151
|
+
index++;
|
|
152
|
+
continue;
|
|
153
|
+
}
|
|
154
|
+
else if (index >= toLine) {
|
|
155
|
+
index++;
|
|
156
|
+
break;
|
|
157
|
+
}
|
|
158
|
+
index++;
|
|
137
159
|
// Reuse record from pool and reinitialize it with new line data
|
|
138
160
|
const record = this._recordPool.getNext(line, dimensions, this._delimiter);
|
|
139
161
|
batch.push(record);
|
|
@@ -144,7 +166,7 @@ class Dataset {
|
|
|
144
166
|
writeStream.write(transformedRecord.stringify() + '\n');
|
|
145
167
|
}
|
|
146
168
|
batch = [];
|
|
147
|
-
this._recordPool.reset();
|
|
169
|
+
this._recordPool.reset();
|
|
148
170
|
}
|
|
149
171
|
}
|
|
150
172
|
catch (error) {
|
|
@@ -155,7 +177,7 @@ class Dataset {
|
|
|
155
177
|
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
156
178
|
finally {
|
|
157
179
|
try {
|
|
158
|
-
if (!
|
|
180
|
+
if (!_h && !_a && (_b = rl_1.return)) yield _b.call(rl_1);
|
|
159
181
|
}
|
|
160
182
|
finally { if (e_1) throw e_1.error; }
|
|
161
183
|
}
|
|
@@ -176,14 +198,15 @@ class Dataset {
|
|
|
176
198
|
if (outputPath === this._tempPath) {
|
|
177
199
|
fs_1.default.renameSync(this._tempPath, this._path);
|
|
178
200
|
}
|
|
179
|
-
this.
|
|
201
|
+
this._count = lineCount;
|
|
202
|
+
this._size = this._computeSize();
|
|
180
203
|
this._iterations++;
|
|
181
204
|
this._finishOperation('transform-stream');
|
|
182
205
|
});
|
|
183
206
|
/**
|
|
184
207
|
* Filter items in the file using batched streaming
|
|
185
208
|
*/
|
|
186
|
-
this.filter = (
|
|
209
|
+
this.filter = (predicate_1, ...args_1) => __awaiter(this, [predicate_1, ...args_1], void 0, function* (predicate, options = {}) {
|
|
187
210
|
this._startOperation('filter');
|
|
188
211
|
let globalIndex = 0;
|
|
189
212
|
yield this.transformStream((batch) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -195,7 +218,7 @@ class Dataset {
|
|
|
195
218
|
globalIndex++;
|
|
196
219
|
}
|
|
197
220
|
return filteredBatch;
|
|
198
|
-
}));
|
|
221
|
+
}), options);
|
|
199
222
|
this._finishOperation('filter');
|
|
200
223
|
return this;
|
|
201
224
|
});
|
|
@@ -617,7 +640,6 @@ class Dataset {
|
|
|
617
640
|
}
|
|
618
641
|
rl.close();
|
|
619
642
|
readStream.close();
|
|
620
|
-
this._size = lineCount;
|
|
621
643
|
this._finishOperation('read-lines');
|
|
622
644
|
return results;
|
|
623
645
|
});
|
|
@@ -628,7 +650,14 @@ class Dataset {
|
|
|
628
650
|
*/
|
|
629
651
|
this.prepare = (producer) => __awaiter(this, void 0, void 0, function* () {
|
|
630
652
|
this._startOperation('prepare');
|
|
631
|
-
const
|
|
653
|
+
const dimsRes = yield DatasetManager_1.default.buildDimensions(this, producer);
|
|
654
|
+
yield this.prepareWithDimensions(dimsRes);
|
|
655
|
+
this._finishOperation('prepare');
|
|
656
|
+
return this;
|
|
657
|
+
});
|
|
658
|
+
this.prepareWithDimensions = (dimResult) => __awaiter(this, void 0, void 0, function* () {
|
|
659
|
+
this._startOperation('prepare-with-dimensions');
|
|
660
|
+
const { delimiter, dimensions } = dimResult;
|
|
632
661
|
this._delimiter = delimiter;
|
|
633
662
|
this._dimensions = dimensions;
|
|
634
663
|
switch (this._file.fileType) {
|
|
@@ -670,7 +699,7 @@ class Dataset {
|
|
|
670
699
|
break;
|
|
671
700
|
}
|
|
672
701
|
}
|
|
673
|
-
this._finishOperation('prepare');
|
|
702
|
+
this._finishOperation('prepare-with-dimensions');
|
|
674
703
|
return this;
|
|
675
704
|
});
|
|
676
705
|
this.getDimensions = () => this._dimensions;
|
|
@@ -686,7 +715,7 @@ class Dataset {
|
|
|
686
715
|
this._recordPool.updateDimensions(this._dimensions, this._delimiter);
|
|
687
716
|
};
|
|
688
717
|
this.print = (...args_1) => __awaiter(this, [...args_1], void 0, function* (count = 3, full = false) {
|
|
689
|
-
console.log(`DS ${this.
|
|
718
|
+
console.log(`DS ${this.name} (${this._count} | ${this._iterations})`);
|
|
690
719
|
console.log(this._dimensions.map(x => x.name).join(this._delimiter));
|
|
691
720
|
const records = yield this.readLines(count);
|
|
692
721
|
records.forEach(x => console.log(full ? x : x.stringify()));
|
|
@@ -695,7 +724,7 @@ class Dataset {
|
|
|
695
724
|
this.printStats = () => {
|
|
696
725
|
var _a, _b;
|
|
697
726
|
const total = ((_b = (_a = this._operations) === null || _a === void 0 ? void 0 : _a.map(x => x.elapsedMs)) !== null && _b !== void 0 ? _b : []).reduce((sum, ms) => sum + ms, 0);
|
|
698
|
-
console.log(`DS[stats] ${this.
|
|
727
|
+
console.log(`DS[stats] ${this.name} (size: ${this._count} | cycles: ${this._iterations} | ms: ${Helper_1.default.formatDuration(total)})`);
|
|
699
728
|
console.log(`Operations: ${this._operations.length}`);
|
|
700
729
|
console.log(JSON.stringify(this._operations, null, 4));
|
|
701
730
|
};
|
|
@@ -760,7 +789,7 @@ class Dataset {
|
|
|
760
789
|
const finishedOperation = this._findRunningOperation(name);
|
|
761
790
|
if (finishedOperation) {
|
|
762
791
|
finishedOperation.status = 'completed';
|
|
763
|
-
finishedOperation.count = this.
|
|
792
|
+
finishedOperation.count = this._count;
|
|
764
793
|
finishedOperation.elapsedMs = performance.now() - finishedOperation.elapsedMs;
|
|
765
794
|
}
|
|
766
795
|
else {
|
|
@@ -797,18 +826,19 @@ class Dataset {
|
|
|
797
826
|
};
|
|
798
827
|
return searchInOperations(this._operations);
|
|
799
828
|
};
|
|
800
|
-
this.
|
|
829
|
+
this._computeSize = () => fs_1.default.statSync(this._path).size / (1024 * 1024);
|
|
830
|
+
this.name = name;
|
|
801
831
|
this._file = file;
|
|
802
832
|
this._batchSize = (_a = batchSize !== null && batchSize !== void 0 ? batchSize : parseInt(Environment_1.default.get('MAX_ITEMS_IN_MEMORY'))) !== null && _a !== void 0 ? _a : Constants_1.default.defaults.MAX_ITEMS_IN_MEMORY;
|
|
803
833
|
this._dimensions = [];
|
|
804
834
|
this._firstLine = '';
|
|
805
835
|
this._delimiter = ',';
|
|
806
|
-
this.
|
|
836
|
+
this._count = 0;
|
|
807
837
|
this._iterations = 0;
|
|
808
838
|
this._operations = [];
|
|
809
839
|
// Initialize record pool for optimization
|
|
810
840
|
this._recordPool = new DatasetRecordPool_1.default(this._batchSize);
|
|
811
|
-
const datasetName = this.
|
|
841
|
+
const datasetName = this.name
|
|
812
842
|
.replace(/[^a-zA-Z0-9_-]/g, '_')
|
|
813
843
|
.replace(/_{2,}/g, '_')
|
|
814
844
|
.replace(/^_+|_+$/g, '')
|
|
@@ -19,6 +19,8 @@ const Environment_1 = __importDefault(require("../Environment"));
|
|
|
19
19
|
const FileCompiler_1 = __importDefault(require("../file/FileCompiler"));
|
|
20
20
|
const ParseManager_1 = __importDefault(require("../parsing/ParseManager"));
|
|
21
21
|
const Dataset_1 = __importDefault(require("./Dataset"));
|
|
22
|
+
const promises_1 = require("stream/promises");
|
|
23
|
+
const fs_1 = require("fs");
|
|
22
24
|
class DatasetManagerClass {
|
|
23
25
|
constructor() {
|
|
24
26
|
this.create = (producer) => {
|
|
@@ -29,7 +31,7 @@ class DatasetManagerClass {
|
|
|
29
31
|
fileType,
|
|
30
32
|
hasHeaderRow,
|
|
31
33
|
sheetName,
|
|
32
|
-
delimiter
|
|
34
|
+
delimiter
|
|
33
35
|
});
|
|
34
36
|
return dataset;
|
|
35
37
|
};
|
|
@@ -125,7 +127,7 @@ class DatasetManagerClass {
|
|
|
125
127
|
const { cField } = fields[i];
|
|
126
128
|
const currentMatch = structuredClone(dimensions.find(x => x.name === cField.key));
|
|
127
129
|
if (!currentMatch && !cField.fixed)
|
|
128
|
-
throw new Error(`The requested field "${cField.key}" from the consumer is not present in the underlying dataset "${dataset
|
|
130
|
+
throw new Error(`The requested field "${cField.key}" from the consumer is not present in the underlying dataset "${dataset.name}" (${dimensions.map(x => x.name).join(', ')})`);
|
|
129
131
|
updates.push({
|
|
130
132
|
currentDimension: currentMatch,
|
|
131
133
|
newName: (_a = cField.alias) !== null && _a !== void 0 ? _a : cField.key,
|
|
@@ -147,6 +149,24 @@ class DatasetManagerClass {
|
|
|
147
149
|
|| x.newPosition !== x.currentDimension.index)));
|
|
148
150
|
return updates;
|
|
149
151
|
};
|
|
152
|
+
/**
|
|
153
|
+
* Each worker threads writes to his own dataset file to avoid concurrency and data loss,
|
|
154
|
+
* at the end of their work, I merge their results to a single file
|
|
155
|
+
*/
|
|
156
|
+
this.mergeWorkersPaths = (threadPaths, dataset) => __awaiter(this, void 0, void 0, function* () {
|
|
157
|
+
dataset.clear();
|
|
158
|
+
const datasetPath = dataset.getPath();
|
|
159
|
+
for (let i = 0; i < threadPaths.length; i++) {
|
|
160
|
+
const path = threadPaths[i];
|
|
161
|
+
const readStream = (0, fs_1.createReadStream)(path);
|
|
162
|
+
// For the first file, create a new write stream
|
|
163
|
+
// For subsequent files, append to the existing file
|
|
164
|
+
const writeStream = (0, fs_1.createWriteStream)(datasetPath, { flags: i === 0 ? 'w' : 'a' });
|
|
165
|
+
yield (0, promises_1.pipeline)(readStream, writeStream);
|
|
166
|
+
(0, fs_1.unlinkSync)(path);
|
|
167
|
+
}
|
|
168
|
+
return dataset;
|
|
169
|
+
});
|
|
150
170
|
}
|
|
151
171
|
}
|
|
152
172
|
const DatasetManager = new DatasetManagerClass();
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
const Constants_1 = __importDefault(require("../../Constants"));
|
|
16
|
+
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
17
|
+
const Environment_1 = __importDefault(require("../Environment"));
|
|
18
|
+
const workerpool_1 = __importDefault(require("workerpool"));
|
|
19
|
+
const DatasetManager_1 = __importDefault(require("./DatasetManager"));
|
|
20
|
+
class ParallelDatasetClass {
|
|
21
|
+
constructor() {
|
|
22
|
+
this._scopeWork = (dataset) => {
|
|
23
|
+
var _a;
|
|
24
|
+
const datasetCount = dataset.getCount();
|
|
25
|
+
const batchSize = (_a = parseInt(Environment_1.default.get('MAX_ITEMS_IN_MEMORY'))) !== null && _a !== void 0 ? _a : Constants_1.default.defaults.MAX_ITEMS_IN_MEMORY;
|
|
26
|
+
const workerChunkSize = batchSize * Math.round(Constants_1.default.defaults.INDICATIVE_THREAD_LINE_COUNT / batchSize);
|
|
27
|
+
const workerCount = Math.min(Math.ceil(datasetCount / workerChunkSize), Constants_1.default.defaults.MAX_THREAD_COUNT);
|
|
28
|
+
const adjustedWorkerCount = Math.ceil(datasetCount / workerCount);
|
|
29
|
+
return { workerCount, adjustedWorkerCount };
|
|
30
|
+
};
|
|
31
|
+
this.filter = (dataset, filters) => __awaiter(this, void 0, void 0, function* () {
|
|
32
|
+
(0, Affirm_1.default)(dataset, `Invalid dataset`);
|
|
33
|
+
(0, Affirm_1.default)(filters, `Invalid filters`);
|
|
34
|
+
// Distribute the work of the filter among the various workers, trying to have them match the batch size
|
|
35
|
+
const { adjustedWorkerCount, workerCount } = this._scopeWork(dataset);
|
|
36
|
+
dataset._startOperation('filter-parallel', { workerCount });
|
|
37
|
+
const threads = [];
|
|
38
|
+
for (let i = 0; i < workerCount; i++) {
|
|
39
|
+
const workerId = `worker_filter_${i}`;
|
|
40
|
+
const fromLine = adjustedWorkerCount * i;
|
|
41
|
+
const toLine = (i === workerCount - 1)
|
|
42
|
+
? Infinity
|
|
43
|
+
: (adjustedWorkerCount * i) + adjustedWorkerCount;
|
|
44
|
+
const workerData = {
|
|
45
|
+
datasetDimensions: dataset.getDimensions(),
|
|
46
|
+
datasetFile: dataset.getFile(),
|
|
47
|
+
datasetName: dataset.name,
|
|
48
|
+
datasetDelimiter: dataset.getDelimiter(),
|
|
49
|
+
fromLine: fromLine,
|
|
50
|
+
toLine: toLine,
|
|
51
|
+
workerId: workerId,
|
|
52
|
+
filterData: {
|
|
53
|
+
rules: filters
|
|
54
|
+
}
|
|
55
|
+
};
|
|
56
|
+
threads.push(this._filterPool.exec('filter', [workerData]));
|
|
57
|
+
}
|
|
58
|
+
const results = yield Promise.all(threads);
|
|
59
|
+
yield this._filterPool.terminate();
|
|
60
|
+
(0, Affirm_1.default)(results.every(x => x.success), `Error in processing the dataset on multiple threads: filter ${dataset.name}`);
|
|
61
|
+
yield DatasetManager_1.default.mergeWorkersPaths(results.map(x => x.datasetPath), dataset);
|
|
62
|
+
dataset
|
|
63
|
+
.setDelimiter(results[0].datasetDelimiter)
|
|
64
|
+
.setDimensinons(results[0].datasetDimensions);
|
|
65
|
+
dataset._finishOperation('filter-parallel');
|
|
66
|
+
return dataset;
|
|
67
|
+
});
|
|
68
|
+
this.projection = (dataset, consumer) => __awaiter(this, void 0, void 0, function* () {
|
|
69
|
+
(0, Affirm_1.default)(dataset, `Invalid dataset`);
|
|
70
|
+
(0, Affirm_1.default)(consumer, `Invalid consumer`);
|
|
71
|
+
const { adjustedWorkerCount, workerCount } = this._scopeWork(dataset);
|
|
72
|
+
dataset._startOperation('projection-parallel', { workerCount });
|
|
73
|
+
const threads = [];
|
|
74
|
+
for (let i = 0; i < workerCount; i++) {
|
|
75
|
+
const workerId = `worker_projection_${i}`;
|
|
76
|
+
const fromLine = adjustedWorkerCount * i;
|
|
77
|
+
const toLine = (i === workerCount - 1)
|
|
78
|
+
? Infinity
|
|
79
|
+
: (adjustedWorkerCount * i) + adjustedWorkerCount;
|
|
80
|
+
const workerData = {
|
|
81
|
+
datasetDimensions: dataset.getDimensions(),
|
|
82
|
+
datasetFile: dataset.getFile(),
|
|
83
|
+
datasetName: dataset.name,
|
|
84
|
+
datasetDelimiter: dataset.getDelimiter(),
|
|
85
|
+
fromLine: fromLine,
|
|
86
|
+
toLine: toLine,
|
|
87
|
+
workerId: workerId,
|
|
88
|
+
projectionData: { consumerName: consumer.name }
|
|
89
|
+
};
|
|
90
|
+
threads.push(this._projectionPool.exec('projection', [workerData]));
|
|
91
|
+
}
|
|
92
|
+
const results = yield Promise.all(threads);
|
|
93
|
+
yield this._projectionPool.terminate();
|
|
94
|
+
(0, Affirm_1.default)(results.every(x => x.success), `Error in processing the dataset on multiple threads: projection ${dataset.name}`);
|
|
95
|
+
yield DatasetManager_1.default.mergeWorkersPaths(results.map(x => x.datasetPath), dataset);
|
|
96
|
+
dataset
|
|
97
|
+
.setDelimiter(results[0].datasetDelimiter)
|
|
98
|
+
.setDimensinons(results[0].datasetDimensions);
|
|
99
|
+
dataset._finishOperation('projection-parallel');
|
|
100
|
+
return dataset;
|
|
101
|
+
});
|
|
102
|
+
this.transform = (dataset, consumer) => __awaiter(this, void 0, void 0, function* () {
|
|
103
|
+
(0, Affirm_1.default)(dataset, `Invalid dataset`);
|
|
104
|
+
(0, Affirm_1.default)(consumer, `Invalid consumer`);
|
|
105
|
+
const { adjustedWorkerCount, workerCount } = this._scopeWork(dataset);
|
|
106
|
+
dataset._startOperation('transform-parallel', { workerCount });
|
|
107
|
+
const threads = [];
|
|
108
|
+
for (let i = 0; i < workerCount; i++) {
|
|
109
|
+
const workerId = `worker_transform_${i}`;
|
|
110
|
+
const fromLine = adjustedWorkerCount * i;
|
|
111
|
+
const toLine = (i === workerCount - 1)
|
|
112
|
+
? Infinity
|
|
113
|
+
: (adjustedWorkerCount * i) + adjustedWorkerCount;
|
|
114
|
+
const workerData = {
|
|
115
|
+
datasetDimensions: dataset.getDimensions(),
|
|
116
|
+
datasetFile: dataset.getFile(),
|
|
117
|
+
datasetName: dataset.name,
|
|
118
|
+
datasetDelimiter: dataset.getDelimiter(),
|
|
119
|
+
fromLine: fromLine,
|
|
120
|
+
toLine: toLine,
|
|
121
|
+
workerId: workerId,
|
|
122
|
+
transformData: { consumerName: consumer.name }
|
|
123
|
+
};
|
|
124
|
+
threads.push(this._transformPool.exec('transform', [workerData]));
|
|
125
|
+
}
|
|
126
|
+
const results = yield Promise.all(threads);
|
|
127
|
+
yield this._transformPool.terminate();
|
|
128
|
+
(0, Affirm_1.default)(results.every(x => x.success), `Error in processing the dataset on multiple threads: projection ${dataset.name}`);
|
|
129
|
+
yield DatasetManager_1.default.mergeWorkersPaths(results.map(x => x.datasetPath), dataset);
|
|
130
|
+
dataset
|
|
131
|
+
.setDelimiter(results[0].datasetDelimiter)
|
|
132
|
+
.setDimensinons(results[0].datasetDimensions);
|
|
133
|
+
dataset._finishOperation('transform-parallel');
|
|
134
|
+
return dataset;
|
|
135
|
+
});
|
|
136
|
+
this._filterPool = workerpool_1.default.pool('./.build/workers/FilterWorker.js');
|
|
137
|
+
this._projectionPool = workerpool_1.default.pool('./.build/workers/ProjectionWorker.js');
|
|
138
|
+
this._transformPool = workerpool_1.default.pool('./.build/workers/TransformWorker.js');
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
const ParallelDataset = new ParallelDatasetClass();
|
|
142
|
+
exports.default = ParallelDataset;
|
|
@@ -21,12 +21,12 @@ const SQLBuilder_1 = __importDefault(require("../sql/SQLBuilder"));
|
|
|
21
21
|
const SQLCompiler_1 = __importDefault(require("../sql/SQLCompiler"));
|
|
22
22
|
const ExecutionPlanner_1 = __importDefault(require("./ExecutionPlanner"));
|
|
23
23
|
const RequestExecutor_1 = __importDefault(require("./RequestExecutor"));
|
|
24
|
-
const TransformationEngine_1 = __importDefault(require("../transform/TransformationEngine"));
|
|
25
24
|
const JoinEngine_1 = __importDefault(require("../transform/JoinEngine"));
|
|
26
25
|
const DatasetManager_1 = __importDefault(require("../dataset/DatasetManager"));
|
|
27
26
|
const Environment_1 = __importDefault(require("../Environment"));
|
|
28
27
|
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
29
28
|
const Logger_1 = __importDefault(require("../../helper/Logger"));
|
|
29
|
+
const ParallelDataset_1 = __importDefault(require("../dataset/ParallelDataset"));
|
|
30
30
|
class ExecutionEnvironment {
|
|
31
31
|
constructor(consumer) {
|
|
32
32
|
this.run = (options) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -95,7 +95,7 @@ class ExecutionEnvironment {
|
|
|
95
95
|
}
|
|
96
96
|
case 'post-process-json': {
|
|
97
97
|
const dataset = this._getIntermidiate(planStep);
|
|
98
|
-
const newDataset = yield
|
|
98
|
+
const newDataset = yield ParallelDataset_1.default.projection(dataset, this._consumer);
|
|
99
99
|
this._storeIntermidiate(planStep, newDataset);
|
|
100
100
|
break;
|
|
101
101
|
}
|
|
@@ -111,12 +111,11 @@ class ExecutionEnvironment {
|
|
|
111
111
|
break;
|
|
112
112
|
}
|
|
113
113
|
case 'apply-consumer-filters-on-JSON': {
|
|
114
|
-
|
|
115
|
-
this._resultingDataset = yield RequestExecutor_1.default.applyFilters(this._resultingDataset, rules);
|
|
114
|
+
this._resultingDataset = yield ParallelDataset_1.default.filter(this._resultingDataset, this._consumer.filters);
|
|
116
115
|
break;
|
|
117
116
|
}
|
|
118
117
|
case 'apply-transformations': {
|
|
119
|
-
this._resultingDataset = yield
|
|
118
|
+
this._resultingDataset = yield ParallelDataset_1.default.transform(this._resultingDataset, this._consumer);
|
|
120
119
|
break;
|
|
121
120
|
}
|
|
122
121
|
case 'join-producers-data': {
|
|
@@ -134,7 +133,7 @@ class ExecutionEnvironment {
|
|
|
134
133
|
cycles: this._resultingDataset.getCycles(),
|
|
135
134
|
elapsedMS: performance.now() - start,
|
|
136
135
|
operations: structuredClone(this._resultingDataset.getOperations()),
|
|
137
|
-
size: this._resultingDataset.
|
|
136
|
+
size: this._resultingDataset.getCount()
|
|
138
137
|
};
|
|
139
138
|
break;
|
|
140
139
|
}
|
|
@@ -155,7 +154,7 @@ class ExecutionEnvironment {
|
|
|
155
154
|
catch (error) {
|
|
156
155
|
const ds = (_c = this._resultingDataset) !== null && _c !== void 0 ? _c : (_d = this._producedData.at(-1)) === null || _d === void 0 ? void 0 : _d.dataset;
|
|
157
156
|
if (ds)
|
|
158
|
-
Logger_1.default.log(`Failed execution of consumer at step ${currentStep.type}:\n\tSize: ${ds.
|
|
157
|
+
Logger_1.default.log(`Failed execution of consumer at step ${currentStep.type}:\n\tSize: ${ds.getCount()}\n\tCycles: ${ds.getCycles()}\n\tOperations: ${Logger_1.default.formatList(ds.getOperations())}`);
|
|
159
158
|
Logger_1.default.log(`\tFailed step: ${currentStep.type}->\n\t${error}`);
|
|
160
159
|
throw error;
|
|
161
160
|
}
|
|
@@ -27,10 +27,10 @@ class RequestExecutorClass {
|
|
|
27
27
|
dataset = yield this._applyOrdering(dataset, request.order);
|
|
28
28
|
return dataset;
|
|
29
29
|
});
|
|
30
|
-
this.applyFilters = (
|
|
30
|
+
this.applyFilters = (dataset_1, filters_1, ...args_1) => __awaiter(this, [dataset_1, filters_1, ...args_1], void 0, function* (dataset, filters, options = {}) {
|
|
31
31
|
return yield dataset.filter(record => {
|
|
32
32
|
return filters.every(filter => this._evaluateFilter(record, filter));
|
|
33
|
-
});
|
|
33
|
+
}, options);
|
|
34
34
|
});
|
|
35
35
|
this._evaluateFilter = (record, filter) => {
|
|
36
36
|
const evaluate = (baseRecord, baseFilter) => {
|
|
@@ -142,7 +142,7 @@ class JoinEngineClass {
|
|
|
142
142
|
for (const prodData of otherProducedData) {
|
|
143
143
|
const prodDimKey = getDimensionsKey(prodData.dataset);
|
|
144
144
|
if (mainDimKey !== prodDimKey)
|
|
145
|
-
throw new Error(`On consumer "${consumer.name}", can't union the dataset "${prodData.dataset
|
|
145
|
+
throw new Error(`On consumer "${consumer.name}", can't union the dataset "${prodData.dataset.name}" (producer: ${prodData.producerKey}) because the dimensions are different from the main dataset "${mainDataset.name}" (producer: ${producedData[0].producerKey}). "${mainDimKey}" != "${prodDimKey}"`);
|
|
146
146
|
yield prodData.dataset.streamBatches((batch) => __awaiter(this, void 0, void 0, function* () {
|
|
147
147
|
yield mainDataset.append(batch);
|
|
148
148
|
}));
|
|
@@ -17,7 +17,7 @@ const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
|
17
17
|
const TypeCaster_1 = __importDefault(require("./TypeCaster"));
|
|
18
18
|
class TransformationEngineClass {
|
|
19
19
|
constructor() {
|
|
20
|
-
this.apply = (consumer, dataset) => __awaiter(this, void 0, void 0, function* () {
|
|
20
|
+
this.apply = (consumer, dataset, options) => __awaiter(this, void 0, void 0, function* () {
|
|
21
21
|
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
22
22
|
(0, Affirm_1.default)(dataset, 'Invalid data');
|
|
23
23
|
const fieldsToTransform = consumer.fields.filter(field => Algo_1.default.hasVal(field.transform));
|
|
@@ -53,7 +53,7 @@ class TransformationEngineClass {
|
|
|
53
53
|
}
|
|
54
54
|
}
|
|
55
55
|
return record;
|
|
56
|
-
});
|
|
56
|
+
}, options);
|
|
57
57
|
});
|
|
58
58
|
this.isFieldCombinationTransformation = (transformation) => {
|
|
59
59
|
if (Array.isArray(transformation)) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@forzalabs/remora",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.1.1-nasco.3",
|
|
4
4
|
"description": "A powerful CLI tool for seamless data translation.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"private": false,
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
},
|
|
10
10
|
"scripts": {
|
|
11
11
|
"sync": "cd ../dev_ops && npm run sync",
|
|
12
|
-
"dev": "clear && npx tsx scripts/
|
|
12
|
+
"dev": "clear && npm run fast-build && clear && npx tsx scripts/dev.ts",
|
|
13
13
|
"tsc-check": "npx tsc --noemit",
|
|
14
14
|
"init": "npx tsx ./src/index.ts init",
|
|
15
15
|
"version": "npx tsx ./src/index.ts -v",
|
|
@@ -21,6 +21,7 @@
|
|
|
21
21
|
"create-producer": "npx tsx ./src/index.ts create-producer",
|
|
22
22
|
"copy-static-file": "npx tsx ./scripts/CopyStaticFile.js",
|
|
23
23
|
"build": "npm i && npm run sync && tsc --outDir .build && npm run copy-static-file",
|
|
24
|
+
"fast-build": "tsc --outDir .build",
|
|
24
25
|
"upload": "npm run build && cd .build && npm publish --tag nasco --access=public"
|
|
25
26
|
},
|
|
26
27
|
"keywords": [
|
|
@@ -55,6 +56,7 @@
|
|
|
55
56
|
"react": "^18.2.0",
|
|
56
57
|
"react-dom": "^18.2.0",
|
|
57
58
|
"seedrandom": "^3.0.5",
|
|
59
|
+
"workerpool": "^9.3.3",
|
|
58
60
|
"xlsx": "^0.18.5",
|
|
59
61
|
"zod": "^3.24.2"
|
|
60
62
|
},
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
const workerpool_1 = __importDefault(require("workerpool"));
|
|
16
|
+
const dotenv_1 = __importDefault(require("dotenv"));
|
|
17
|
+
const Affirm_1 = __importDefault(require("../core/Affirm"));
|
|
18
|
+
const Dataset_1 = __importDefault(require("../engines/dataset/Dataset"));
|
|
19
|
+
const Environment_1 = __importDefault(require("../engines/Environment"));
|
|
20
|
+
const RequestExecutor_1 = __importDefault(require("../engines/execution/RequestExecutor"));
|
|
21
|
+
dotenv_1.default.configDotenv();
|
|
22
|
+
const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
|
|
23
|
+
Environment_1.default.load('./');
|
|
24
|
+
try {
|
|
25
|
+
const { datasetName, fromLine, toLine, workerId, datasetFile, datasetDimensions, datasetDelimiter, filterData: filter } = workerData;
|
|
26
|
+
Affirm_1.default.hasValue(fromLine, `Invalid from line`);
|
|
27
|
+
Affirm_1.default.hasValue(toLine, `Invalid to line`);
|
|
28
|
+
(0, Affirm_1.default)(datasetName, `Invalid dataset name`);
|
|
29
|
+
(0, Affirm_1.default)(workerId, `Invalid worker id`);
|
|
30
|
+
(0, Affirm_1.default)(datasetFile, `Invalid dataset file`);
|
|
31
|
+
(0, Affirm_1.default)(datasetDimensions, `Invalid dataset dimensions`);
|
|
32
|
+
(0, Affirm_1.default)(filter, `Invalid filter data`);
|
|
33
|
+
(0, Affirm_1.default)(datasetDelimiter, `Invalid dataset delimter`);
|
|
34
|
+
const dataset = new Dataset_1.default(datasetName, datasetFile);
|
|
35
|
+
dataset
|
|
36
|
+
.setDimensinons(datasetDimensions)
|
|
37
|
+
.setDelimiter(datasetDelimiter);
|
|
38
|
+
const outputPath = dataset['_tempPath'] + workerId;
|
|
39
|
+
const rules = filter.rules.map(x => x.rule);
|
|
40
|
+
yield RequestExecutor_1.default.applyFilters(dataset, rules, { outputPath, range: { fromLine, toLine } });
|
|
41
|
+
const result = {
|
|
42
|
+
success: true,
|
|
43
|
+
datasetDelimiter: dataset.getDelimiter(),
|
|
44
|
+
datasetDimensions: dataset.getDimensions(),
|
|
45
|
+
datasetFile: dataset.getFile(),
|
|
46
|
+
datasetName: dataset.name,
|
|
47
|
+
datasetPath: outputPath
|
|
48
|
+
};
|
|
49
|
+
return result;
|
|
50
|
+
}
|
|
51
|
+
catch (error) {
|
|
52
|
+
console.error(error);
|
|
53
|
+
const result = {
|
|
54
|
+
success: false,
|
|
55
|
+
error
|
|
56
|
+
};
|
|
57
|
+
return result;
|
|
58
|
+
}
|
|
59
|
+
});
|
|
60
|
+
workerpool_1.default.worker({
|
|
61
|
+
filter: run
|
|
62
|
+
});
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
const workerpool_1 = __importDefault(require("workerpool"));
|
|
16
|
+
const dotenv_1 = __importDefault(require("dotenv"));
|
|
17
|
+
const Affirm_1 = __importDefault(require("../core/Affirm"));
|
|
18
|
+
const Dataset_1 = __importDefault(require("../engines/dataset/Dataset"));
|
|
19
|
+
const Environment_1 = __importDefault(require("../engines/Environment"));
|
|
20
|
+
const PostProcessor_1 = __importDefault(require("../engines/consumer/PostProcessor"));
|
|
21
|
+
dotenv_1.default.configDotenv();
|
|
22
|
+
const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
|
|
23
|
+
Environment_1.default.load('./');
|
|
24
|
+
try {
|
|
25
|
+
const { datasetName, fromLine, toLine, workerId, datasetFile, datasetDimensions, datasetDelimiter, projectionData } = workerData;
|
|
26
|
+
Affirm_1.default.hasValue(fromLine, `Invalid from line`);
|
|
27
|
+
Affirm_1.default.hasValue(toLine, `Invalid to line`);
|
|
28
|
+
(0, Affirm_1.default)(datasetName, `Invalid dataset name`);
|
|
29
|
+
(0, Affirm_1.default)(workerId, `Invalid worker id`);
|
|
30
|
+
(0, Affirm_1.default)(datasetFile, `Invalid dataset file`);
|
|
31
|
+
(0, Affirm_1.default)(datasetDimensions, `Invalid dataset dimensions`);
|
|
32
|
+
(0, Affirm_1.default)(projectionData, `Invalid projection data`);
|
|
33
|
+
(0, Affirm_1.default)(datasetDelimiter, `Invalid dataset delimter`);
|
|
34
|
+
const consumer = Environment_1.default.getConsumer(projectionData.consumerName);
|
|
35
|
+
(0, Affirm_1.default)(consumer, `Wrong consumer name sent to projection worker: "${projectionData.consumerName}" not found.`);
|
|
36
|
+
const dataset = new Dataset_1.default(datasetName, datasetFile);
|
|
37
|
+
dataset
|
|
38
|
+
.setDimensinons(datasetDimensions)
|
|
39
|
+
.setDelimiter(datasetDelimiter);
|
|
40
|
+
const outputPath = dataset['_tempPath'] + workerId;
|
|
41
|
+
yield PostProcessor_1.default.doProjection(consumer, dataset, { outputPath, range: { fromLine, toLine } });
|
|
42
|
+
const result = {
|
|
43
|
+
success: true,
|
|
44
|
+
datasetDelimiter: dataset.getDelimiter(),
|
|
45
|
+
datasetDimensions: dataset.getDimensions(),
|
|
46
|
+
datasetFile: dataset.getFile(),
|
|
47
|
+
datasetName: dataset.name,
|
|
48
|
+
datasetPath: outputPath
|
|
49
|
+
};
|
|
50
|
+
return result;
|
|
51
|
+
}
|
|
52
|
+
catch (error) {
|
|
53
|
+
console.error(error);
|
|
54
|
+
const result = {
|
|
55
|
+
success: false,
|
|
56
|
+
error
|
|
57
|
+
};
|
|
58
|
+
return result;
|
|
59
|
+
}
|
|
60
|
+
});
|
|
61
|
+
workerpool_1.default.worker({
|
|
62
|
+
projection: run
|
|
63
|
+
});
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
const workerpool_1 = __importDefault(require("workerpool"));
|
|
16
|
+
const dotenv_1 = __importDefault(require("dotenv"));
|
|
17
|
+
const Affirm_1 = __importDefault(require("../core/Affirm"));
|
|
18
|
+
const Dataset_1 = __importDefault(require("../engines/dataset/Dataset"));
|
|
19
|
+
const Environment_1 = __importDefault(require("../engines/Environment"));
|
|
20
|
+
const TransformationEngine_1 = __importDefault(require("../engines/transform/TransformationEngine"));
|
|
21
|
+
dotenv_1.default.configDotenv();
|
|
22
|
+
const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
|
|
23
|
+
Environment_1.default.load('./');
|
|
24
|
+
try {
|
|
25
|
+
const { datasetName, fromLine, toLine, workerId, datasetFile, datasetDimensions, datasetDelimiter, transformData } = workerData;
|
|
26
|
+
Affirm_1.default.hasValue(fromLine, `Invalid from line`);
|
|
27
|
+
Affirm_1.default.hasValue(toLine, `Invalid to line`);
|
|
28
|
+
(0, Affirm_1.default)(datasetName, `Invalid dataset name`);
|
|
29
|
+
(0, Affirm_1.default)(workerId, `Invalid worker id`);
|
|
30
|
+
(0, Affirm_1.default)(datasetFile, `Invalid dataset file`);
|
|
31
|
+
(0, Affirm_1.default)(datasetDimensions, `Invalid dataset dimensions`);
|
|
32
|
+
(0, Affirm_1.default)(transformData, `Invalid transform data`);
|
|
33
|
+
(0, Affirm_1.default)(datasetDelimiter, `Invalid dataset delimter`);
|
|
34
|
+
const consumer = Environment_1.default.getConsumer(transformData.consumerName);
|
|
35
|
+
(0, Affirm_1.default)(consumer, `Wrong consumer name sent to projection worker: "${transformData.consumerName}" not found.`);
|
|
36
|
+
const dataset = new Dataset_1.default(datasetName, datasetFile);
|
|
37
|
+
dataset
|
|
38
|
+
.setDimensinons(datasetDimensions)
|
|
39
|
+
.setDelimiter(datasetDelimiter);
|
|
40
|
+
const outputPath = dataset['_tempPath'] + workerId;
|
|
41
|
+
yield TransformationEngine_1.default.apply(consumer, dataset, { outputPath, range: { fromLine, toLine } });
|
|
42
|
+
const result = {
|
|
43
|
+
success: true,
|
|
44
|
+
datasetDelimiter: dataset.getDelimiter(),
|
|
45
|
+
datasetDimensions: dataset.getDimensions(),
|
|
46
|
+
datasetFile: dataset.getFile(),
|
|
47
|
+
datasetName: dataset.name,
|
|
48
|
+
datasetPath: outputPath
|
|
49
|
+
};
|
|
50
|
+
return result;
|
|
51
|
+
}
|
|
52
|
+
catch (error) {
|
|
53
|
+
console.error(error);
|
|
54
|
+
const result = {
|
|
55
|
+
success: false,
|
|
56
|
+
error
|
|
57
|
+
};
|
|
58
|
+
return result;
|
|
59
|
+
}
|
|
60
|
+
});
|
|
61
|
+
workerpool_1.default.worker({
|
|
62
|
+
transform: run
|
|
63
|
+
});
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const workerpool_1 = __importDefault(require("workerpool"));
|
|
7
|
+
const DatasetRecord_1 = __importDefault(require("../engines/dataset/DatasetRecord"));
|
|
8
|
+
workerpool_1.default.worker({
|
|
9
|
+
ts: () => {
|
|
10
|
+
const tt = new DatasetRecord_1.default('bububub,bububbu', [{ hidden: false, index: 0, key: '11', name: '11' }], ',');
|
|
11
|
+
console.log(tt);
|
|
12
|
+
console.log('hello form typescript', tt.stringify());
|
|
13
|
+
}
|
|
14
|
+
});
|