@forzalabs/remora 1.0.21 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/actions/automap.js +26 -42
- package/actions/compile.js +27 -43
- package/actions/create_consumer.js +24 -40
- package/actions/create_producer.js +16 -32
- package/actions/debug.js +18 -34
- package/actions/deploy.js +30 -46
- package/actions/discover.js +13 -29
- package/actions/init.js +29 -45
- package/actions/mock.js +16 -32
- package/actions/run.js +34 -52
- package/actions/sample.js +42 -58
- package/index.js +38 -43
- package/package.json +4 -4
- package/workers/ExecutorWorker.js +18 -32
- package/Constants.js +0 -34
- package/core/Affirm.js +0 -42
- package/core/Algo.js +0 -160
- package/core/dste/DSTE.js +0 -113
- package/core/logger/DebugLogService.js +0 -48
- package/core/logger/DevelopmentLogService.js +0 -70
- package/core/logger/LocalLogService.js +0 -70
- package/core/logger/Logger.js +0 -54
- package/database/DatabaseEngine.js +0 -149
- package/database/DatabaseStructure.js +0 -27
- package/definitions/DatasetDefinitions.js +0 -2
- package/definitions/ExecutorDefinitions.js +0 -2
- package/definitions/ProcessENV.js +0 -2
- package/definitions/agents/DestinationDriver.js +0 -2
- package/definitions/agents/SourceDriver.js +0 -2
- package/definitions/cli.js +0 -2
- package/definitions/database/ApiKeys.js +0 -2
- package/definitions/database/Stored.js +0 -7
- package/definitions/database/UsageStat.js +0 -2
- package/definitions/database/User.js +0 -2
- package/definitions/json_schemas/consumer-schema.json +0 -1226
- package/definitions/json_schemas/producer-schema.json +0 -308
- package/definitions/json_schemas/project-schema.json +0 -100
- package/definitions/json_schemas/source-schema.json +0 -249
- package/definitions/requests/ConsumerRequest.js +0 -2
- package/definitions/requests/Developer.js +0 -2
- package/definitions/requests/Mapping.js +0 -2
- package/definitions/requests/ProducerRequest.js +0 -2
- package/definitions/requests/Request.js +0 -2
- package/definitions/resources/Compiled.js +0 -2
- package/definitions/resources/Consumer.js +0 -2
- package/definitions/resources/Environment.js +0 -2
- package/definitions/resources/Library.js +0 -2
- package/definitions/resources/Producer.js +0 -2
- package/definitions/resources/Project.js +0 -2
- package/definitions/resources/Schema.js +0 -2
- package/definitions/resources/Source.js +0 -2
- package/definitions/temp.js +0 -2
- package/definitions/transform/Transformations.js +0 -2
- package/drivers/DeltaShareDriver.js +0 -186
- package/drivers/DriverFactory.js +0 -72
- package/drivers/DriverHelper.js +0 -248
- package/drivers/HttpApiDriver.js +0 -208
- package/drivers/RedshiftDriver.js +0 -184
- package/drivers/files/LocalDestinationDriver.js +0 -146
- package/drivers/files/LocalSourceDriver.js +0 -405
- package/drivers/s3/S3DestinationDriver.js +0 -197
- package/drivers/s3/S3SourceDriver.js +0 -495
- package/engines/CryptoEngine.js +0 -75
- package/engines/Environment.js +0 -170
- package/engines/ProcessENVManager.js +0 -83
- package/engines/RandomEngine.js +0 -47
- package/engines/SecretManager.js +0 -23
- package/engines/UserManager.js +0 -66
- package/engines/ai/AutoMapperEngine.js +0 -37
- package/engines/ai/DeveloperEngine.js +0 -497
- package/engines/ai/LLM.js +0 -255
- package/engines/consumer/ConsumerManager.js +0 -218
- package/engines/consumer/ConsumerOnFinishManager.js +0 -202
- package/engines/dataset/Dataset.js +0 -824
- package/engines/dataset/DatasetManager.js +0 -211
- package/engines/dataset/DatasetRecord.js +0 -120
- package/engines/dataset/DatasetRecordPool.js +0 -77
- package/engines/execution/RequestExecutor.js +0 -67
- package/engines/parsing/CSVParser.js +0 -60
- package/engines/parsing/LineParser.js +0 -71
- package/engines/parsing/ParseCompression.js +0 -101
- package/engines/parsing/ParseHelper.js +0 -18
- package/engines/parsing/ParseManager.js +0 -54
- package/engines/parsing/XLSParser.js +0 -87
- package/engines/parsing/XMLParser.js +0 -115
- package/engines/producer/ProducerEngine.js +0 -127
- package/engines/producer/ProducerManager.js +0 -43
- package/engines/scheduler/CronScheduler.js +0 -222
- package/engines/scheduler/QueueManager.js +0 -314
- package/engines/schema/SchemaValidator.js +0 -67
- package/engines/transform/JoinEngine.js +0 -232
- package/engines/transform/TransformationEngine.js +0 -277
- package/engines/transform/TypeCaster.js +0 -59
- package/engines/usage/DataframeManager.js +0 -55
- package/engines/usage/UsageDataManager.js +0 -151
- package/engines/usage/UsageManager.js +0 -65
- package/engines/validation/Validator.js +0 -216
- package/executors/ConsumerExecutor.js +0 -280
- package/executors/Executor.js +0 -177
- package/executors/ExecutorOrchestrator.js +0 -331
- package/executors/ExecutorPerformance.js +0 -17
- package/executors/ExecutorProgress.js +0 -54
- package/executors/ExecutorScope.js +0 -52
- package/executors/OutputExecutor.js +0 -118
- package/executors/ProducerExecutor.js +0 -108
- package/helper/Helper.js +0 -149
- package/helper/Logger.js +0 -84
- package/helper/Runtime.js +0 -20
- package/helper/Settings.js +0 -13
- package/licencing/LicenceManager.js +0 -64
- package/settings.js +0 -12
|
@@ -1,331 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
-
});
|
|
10
|
-
};
|
|
11
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
-
};
|
|
14
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
-
const os_1 = __importDefault(require("os"));
|
|
16
|
-
const fs_1 = __importDefault(require("fs"));
|
|
17
|
-
const promises_1 = __importDefault(require("fs/promises"));
|
|
18
|
-
const path_1 = __importDefault(require("path"));
|
|
19
|
-
const promises_2 = require("stream/promises");
|
|
20
|
-
const workerpool_1 = __importDefault(require("workerpool"));
|
|
21
|
-
const Affirm_1 = __importDefault(require("../core/Affirm"));
|
|
22
|
-
const UsageManager_1 = __importDefault(require("../engines/usage/UsageManager"));
|
|
23
|
-
const Helper_1 = __importDefault(require("../helper/Helper"));
|
|
24
|
-
const Environment_1 = __importDefault(require("../engines/Environment"));
|
|
25
|
-
const ProducerExecutor_1 = __importDefault(require("./ProducerExecutor"));
|
|
26
|
-
const Constants_1 = __importDefault(require("../Constants"));
|
|
27
|
-
const DriverHelper_1 = __importDefault(require("../drivers/DriverHelper"));
|
|
28
|
-
const ConsumerExecutor_1 = __importDefault(require("./ConsumerExecutor"));
|
|
29
|
-
const OutputExecutor_1 = __importDefault(require("./OutputExecutor"));
|
|
30
|
-
const ConsumerManager_1 = __importDefault(require("../engines/consumer/ConsumerManager"));
|
|
31
|
-
const ExecutorPerformance_1 = __importDefault(require("./ExecutorPerformance"));
|
|
32
|
-
const ExecutorProgress_1 = __importDefault(require("./ExecutorProgress"));
|
|
33
|
-
const Algo_1 = __importDefault(require("../core/Algo"));
|
|
34
|
-
const ConsumerOnFinishManager_1 = __importDefault(require("../engines/consumer/ConsumerOnFinishManager"));
|
|
35
|
-
const ExecutorScope_1 = __importDefault(require("./ExecutorScope"));
|
|
36
|
-
const ProcessENVManager_1 = __importDefault(require("../engines/ProcessENVManager"));
|
|
37
|
-
class ExecutorOrchestratorClass {
|
|
38
|
-
constructor() {
|
|
39
|
-
this.init = () => {
|
|
40
|
-
if (!this._executorPool) {
|
|
41
|
-
const options = {
|
|
42
|
-
workerThreadOpts: {
|
|
43
|
-
resourceLimits: {
|
|
44
|
-
maxOldGenerationSizeMb: Constants_1.default.defaults.MIN_RUNTIME_HEAP_MB
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
};
|
|
48
|
-
const workerPath = this._getWorkerPath();
|
|
49
|
-
this._executorPool = workerpool_1.default.pool(path_1.default.join(workerPath, 'ExecutorWorker.js'), options);
|
|
50
|
-
}
|
|
51
|
-
};
|
|
52
|
-
this.launch = (request) => __awaiter(this, void 0, void 0, function* () {
|
|
53
|
-
var _a, _b;
|
|
54
|
-
(0, Affirm_1.default)(request, 'Invalid options');
|
|
55
|
-
const { consumer, details, logProgress, options } = request;
|
|
56
|
-
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
57
|
-
(0, Affirm_1.default)(details, 'Invalid execution details');
|
|
58
|
-
const tracker = new ExecutorPerformance_1.default();
|
|
59
|
-
const _progress = new ExecutorProgress_1.default(logProgress);
|
|
60
|
-
const { usageId } = UsageManager_1.default.startUsage(consumer, details);
|
|
61
|
-
const scope = { id: usageId, folder: `${consumer.name}_${usageId}`, workersId: [] };
|
|
62
|
-
try {
|
|
63
|
-
const start = performance.now();
|
|
64
|
-
this.init();
|
|
65
|
-
const executorResults = [];
|
|
66
|
-
let counter = performance.now();
|
|
67
|
-
const sourceFilesByProducer = yield this.readySourceFiles(consumer, scope);
|
|
68
|
-
tracker.measure('ready-producers', performance.now() - counter);
|
|
69
|
-
let globalWorkerIndex = 0;
|
|
70
|
-
for (const pair of sourceFilesByProducer) {
|
|
71
|
-
const { prod, cProd, response } = pair;
|
|
72
|
-
// Make sure that the data files are there, if missing and isOptional = true, then skip
|
|
73
|
-
if (!fs_1.default.existsSync(response.files[0].fullUri)) {
|
|
74
|
-
if (!cProd.isOptional)
|
|
75
|
-
throw new Error(`Expected data file ${response.files[0].fullUri} of producer ${prod.name} in consumer ${consumer.name} is missing.`);
|
|
76
|
-
else if (cProd.isOptional === true)
|
|
77
|
-
continue;
|
|
78
|
-
}
|
|
79
|
-
console.log('Starting operations on ', response.files[0].fullUri);
|
|
80
|
-
// Extract the dimensions for this producer just once
|
|
81
|
-
const firstLine = (yield DriverHelper_1.default.quickReadFile(response.files[0].fullUri, 1))[0];
|
|
82
|
-
const header = ProducerExecutor_1.default.processHeader(firstLine, prod);
|
|
83
|
-
const prodDimensions = ProducerExecutor_1.default.reconcileHeader(header, prod);
|
|
84
|
-
const totalFiles = response.files.length;
|
|
85
|
-
for (const [fileIndex, file] of response.files.entries()) {
|
|
86
|
-
const chunks = ExecutorOrchestrator.scopeWork(file.fullUri);
|
|
87
|
-
const workerThreads = [];
|
|
88
|
-
for (const chunk of chunks) {
|
|
89
|
-
// Spawn off thread
|
|
90
|
-
const workerId = `${usageId}_${globalWorkerIndex}`;
|
|
91
|
-
const currentWorkerIndex = globalWorkerIndex;
|
|
92
|
-
globalWorkerIndex++;
|
|
93
|
-
const workerData = {
|
|
94
|
-
producer: prod,
|
|
95
|
-
chunk,
|
|
96
|
-
consumer,
|
|
97
|
-
prodDimensions,
|
|
98
|
-
workerId,
|
|
99
|
-
scope,
|
|
100
|
-
options
|
|
101
|
-
};
|
|
102
|
-
_progress.register((currentWorkerIndex + 1).toString(), prod.name, fileIndex, totalFiles);
|
|
103
|
-
scope.workersId.push(workerId);
|
|
104
|
-
workerThreads.push(this._executorPool.exec('executor', [workerData], {
|
|
105
|
-
on: payload => this.onWorkAdvanced(payload, currentWorkerIndex, _progress)
|
|
106
|
-
}));
|
|
107
|
-
}
|
|
108
|
-
executorResults.push(...yield Promise.all(workerThreads));
|
|
109
|
-
// WARNING: will this not create problems when multiple are executed together at the same time since this is a singleton?!?
|
|
110
|
-
yield this._executorPool.terminate();
|
|
111
|
-
}
|
|
112
|
-
}
|
|
113
|
-
_progress.complete();
|
|
114
|
-
if (executorResults.some(x => !Algo_1.default.hasVal(x)))
|
|
115
|
-
throw new Error(`${executorResults.filter(x => !Algo_1.default.hasVal(x)).length} worker(s) failed to produce valid results`);
|
|
116
|
-
yield this.reconcileExecutorThreadsResults(scope, executorResults, tracker);
|
|
117
|
-
// If there is more than one worker, then I need to redo the operations that are done on multiple lines (cause now the worker files have been merged together)
|
|
118
|
-
const postOperation = { totalOutputCount: null };
|
|
119
|
-
if (executorResults.length > 1) {
|
|
120
|
-
if (((_a = consumer.options) === null || _a === void 0 ? void 0 : _a.distinct) === true) {
|
|
121
|
-
counter = performance.now();
|
|
122
|
-
const unifiedOutputCount = yield ConsumerExecutor_1.default.processDistinct(ExecutorScope_1.default.getMainPath(scope));
|
|
123
|
-
tracker.measure('process-distinct:main', performance.now() - counter);
|
|
124
|
-
postOperation.totalOutputCount = unifiedOutputCount;
|
|
125
|
-
}
|
|
126
|
-
if ((_b = consumer.options) === null || _b === void 0 ? void 0 : _b.distinctOn) {
|
|
127
|
-
counter = performance.now();
|
|
128
|
-
const unifiedOutputCount = yield ConsumerExecutor_1.default.processDistinctOn(consumer, ExecutorScope_1.default.getMainPath(scope));
|
|
129
|
-
tracker.measure('process-distinct-on:main', performance.now() - counter);
|
|
130
|
-
postOperation.totalOutputCount = unifiedOutputCount;
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
// Export to the destination
|
|
134
|
-
counter = performance.now();
|
|
135
|
-
const exportRes = yield OutputExecutor_1.default.exportResult(consumer, ConsumerManager_1.default.getExpandedFields(consumer), scope);
|
|
136
|
-
tracker.measure('export-result', performance.now() - counter);
|
|
137
|
-
// Perform on-success actions if any
|
|
138
|
-
if (consumer.outputs.some(x => x.onSuccess)) {
|
|
139
|
-
counter = performance.now();
|
|
140
|
-
yield ConsumerOnFinishManager_1.default.onConsumerSuccess(consumer, usageId);
|
|
141
|
-
tracker.measure('on-success-actions', performance.now() - counter);
|
|
142
|
-
}
|
|
143
|
-
yield this.performCleanupOperations(scope, tracker);
|
|
144
|
-
const finalResult = this.computeFinalResult(tracker, executorResults, usageId, exportRes.key);
|
|
145
|
-
finalResult.elapsedMS = performance.now() - start;
|
|
146
|
-
if (Algo_1.default.hasVal(postOperation.totalOutputCount))
|
|
147
|
-
finalResult.outputCount = postOperation.totalOutputCount;
|
|
148
|
-
UsageManager_1.default.endUsage(usageId, finalResult.outputCount, finalResult);
|
|
149
|
-
return finalResult;
|
|
150
|
-
}
|
|
151
|
-
catch (error) {
|
|
152
|
-
yield ConsumerOnFinishManager_1.default.onConsumerError(consumer, usageId);
|
|
153
|
-
yield this.performCleanupOperations(scope, tracker);
|
|
154
|
-
UsageManager_1.default.failUsage(usageId, Helper_1.default.asError(error).message);
|
|
155
|
-
throw error;
|
|
156
|
-
}
|
|
157
|
-
});
|
|
158
|
-
/**
|
|
159
|
-
* Calculates line-aligned chunk offsets for parallel file processing.
|
|
160
|
-
* Each chunk boundary is adjusted to the next newline to avoid breaking lines.
|
|
161
|
-
* Returns a single chunk for small files where parallelism overhead isn't worth it.
|
|
162
|
-
*/
|
|
163
|
-
this.scopeWork = (fileUri, numChunks) => {
|
|
164
|
-
const fileSize = fs_1.default.statSync(fileUri).size;
|
|
165
|
-
if (fileSize === 0)
|
|
166
|
-
return [];
|
|
167
|
-
// Small files: single chunk, parallelism overhead not worth it
|
|
168
|
-
if (fileSize < Constants_1.default.defaults.MIN_FILE_SIZE_FOR_PARALLEL) {
|
|
169
|
-
return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri }];
|
|
170
|
-
}
|
|
171
|
-
// Calculate optimal chunk count based on file size and CPU cores (-1 cause it is used by the main thread)
|
|
172
|
-
const cpus = numChunks !== null && numChunks !== void 0 ? numChunks : (os_1.default.cpus().length - 1);
|
|
173
|
-
const maxChunksBySize = Math.floor(fileSize / Constants_1.default.defaults.MIN_CHUNK_SIZE);
|
|
174
|
-
const effectiveChunks = Math.min(cpus, maxChunksBySize);
|
|
175
|
-
if (effectiveChunks <= 1)
|
|
176
|
-
return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri }];
|
|
177
|
-
const targetChunkSize = Math.floor(fileSize / effectiveChunks);
|
|
178
|
-
const fd = fs_1.default.openSync(fileUri, 'r');
|
|
179
|
-
try {
|
|
180
|
-
const offsets = [];
|
|
181
|
-
let currentStart = 0;
|
|
182
|
-
for (let i = 0; i < cpus - 1; i++) {
|
|
183
|
-
const targetEnd = currentStart + targetChunkSize;
|
|
184
|
-
// Don't overshoot file size
|
|
185
|
-
if (targetEnd >= fileSize) {
|
|
186
|
-
break;
|
|
187
|
-
}
|
|
188
|
-
// Find next newline after target boundary
|
|
189
|
-
const alignedEnd = this.findNextNewline(fd, targetEnd, fileSize);
|
|
190
|
-
offsets.push({ start: currentStart, end: alignedEnd, isFirstChunk: i === 0, fileUri });
|
|
191
|
-
currentStart = alignedEnd;
|
|
192
|
-
}
|
|
193
|
-
// Final chunk goes to end of file
|
|
194
|
-
if (currentStart < fileSize) {
|
|
195
|
-
offsets.push({ start: currentStart, end: fileSize, isFirstChunk: offsets.length === 0, fileUri });
|
|
196
|
-
}
|
|
197
|
-
return offsets;
|
|
198
|
-
}
|
|
199
|
-
finally {
|
|
200
|
-
fs_1.default.closeSync(fd);
|
|
201
|
-
}
|
|
202
|
-
};
|
|
203
|
-
/**
|
|
204
|
-
* Efficiently finds the next newline character starting from a position.
|
|
205
|
-
* Uses small buffer reads for speed.
|
|
206
|
-
*/
|
|
207
|
-
this.findNextNewline = (fd, position, fileSize) => {
|
|
208
|
-
const BUFFER_SIZE = 8192; // 8KB buffer for scanning
|
|
209
|
-
const buffer = Buffer.allocUnsafe(BUFFER_SIZE);
|
|
210
|
-
let currentPos = position;
|
|
211
|
-
while (currentPos < fileSize) {
|
|
212
|
-
const bytesToRead = Math.min(BUFFER_SIZE, fileSize - currentPos);
|
|
213
|
-
const bytesRead = fs_1.default.readSync(fd, buffer, 0, bytesToRead, currentPos);
|
|
214
|
-
if (bytesRead === 0)
|
|
215
|
-
break;
|
|
216
|
-
// Scan buffer for newline
|
|
217
|
-
for (let i = 0; i < bytesRead; i++) {
|
|
218
|
-
if (buffer[i] === 0x0A) { // \n
|
|
219
|
-
return currentPos + i + 1; // Position after the newline
|
|
220
|
-
}
|
|
221
|
-
}
|
|
222
|
-
currentPos += bytesRead;
|
|
223
|
-
}
|
|
224
|
-
// No newline found, return file end
|
|
225
|
-
return fileSize;
|
|
226
|
-
};
|
|
227
|
-
this.readySourceFiles = (consumer, scope) => __awaiter(this, void 0, void 0, function* () {
|
|
228
|
-
const results = [];
|
|
229
|
-
for (let i = 0; i < consumer.producers.length; i++) {
|
|
230
|
-
const cProd = consumer.producers[i];
|
|
231
|
-
const prod = Environment_1.default.getProducer(cProd.name);
|
|
232
|
-
results.push({ prod, cProd, response: yield ProducerExecutor_1.default.ready(prod, scope) });
|
|
233
|
-
}
|
|
234
|
-
return results;
|
|
235
|
-
});
|
|
236
|
-
this._getWorkerPath = () => {
|
|
237
|
-
// Get the current file's directory
|
|
238
|
-
const currentDir = __dirname;
|
|
239
|
-
if (ProcessENVManager_1.default.getEnvVariable('NODE_ENV') === 'dev' || ProcessENVManager_1.default.getEnvVariable('NODE_ENV') === 'development')
|
|
240
|
-
return path_1.default.resolve('./.build/workers');
|
|
241
|
-
const forcedPath = ProcessENVManager_1.default.getEnvVariable('REMORA_WORKERS_PATH');
|
|
242
|
-
if (forcedPath && forcedPath.length > 0)
|
|
243
|
-
return path_1.default.join(__dirname, forcedPath);
|
|
244
|
-
// Check if we're in a published npm package (no .build in path)
|
|
245
|
-
if (!currentDir.includes('.build')) {
|
|
246
|
-
// We're in the published package, workers are relative to package root
|
|
247
|
-
// __dirname is something like: /path/to/package/executors
|
|
248
|
-
// Workers are at /path/to/package/workers (sibling folder)
|
|
249
|
-
return path_1.default.join(__dirname, '../workers');
|
|
250
|
-
}
|
|
251
|
-
else {
|
|
252
|
-
// We're in development, workers are in ./.build/workers
|
|
253
|
-
return path_1.default.resolve('./.build/workers');
|
|
254
|
-
}
|
|
255
|
-
};
|
|
256
|
-
this.reconcileExecutorThreadsResults = (scope, executorResults, tracker) => __awaiter(this, void 0, void 0, function* () {
|
|
257
|
-
const mainPath = ExecutorScope_1.default.getMainPath(scope);
|
|
258
|
-
ConsumerExecutor_1.default._ensurePath(mainPath);
|
|
259
|
-
// Merge all the various files into a single one
|
|
260
|
-
if (executorResults.length > 1) {
|
|
261
|
-
const perf = performance.now();
|
|
262
|
-
const output = fs_1.default.createWriteStream(mainPath);
|
|
263
|
-
output.setMaxListeners(executorResults.length + 1);
|
|
264
|
-
for (const workerResult of executorResults) {
|
|
265
|
-
yield (0, promises_2.pipeline)(fs_1.default.createReadStream(workerResult.resultUri), output, { end: false });
|
|
266
|
-
}
|
|
267
|
-
output.end();
|
|
268
|
-
output.close();
|
|
269
|
-
tracker.measure('merge-workers', performance.now() - perf);
|
|
270
|
-
}
|
|
271
|
-
else if (executorResults.length === 1) {
|
|
272
|
-
// If there is only one worker, then just rename the worker .dataset to the general consumer one
|
|
273
|
-
yield promises_1.default.rename(executorResults[0].resultUri, mainPath);
|
|
274
|
-
}
|
|
275
|
-
});
|
|
276
|
-
this.performCleanupOperations = (scope, tracker) => __awaiter(this, void 0, void 0, function* () {
|
|
277
|
-
const start = performance.now();
|
|
278
|
-
yield ExecutorScope_1.default.clearScope(scope);
|
|
279
|
-
tracker.measure('cleanup-operations', performance.now() - start);
|
|
280
|
-
});
|
|
281
|
-
this.computeFinalResult = (tracker, executorResults, executionId, resultUri) => {
|
|
282
|
-
const result = {
|
|
283
|
-
cycles: Algo_1.default.max(executorResults.map(x => x.cycles)),
|
|
284
|
-
elapsedMS: Algo_1.default.sum(executorResults.map(x => x.elapsedMS)),
|
|
285
|
-
inputCount: Algo_1.default.sum(executorResults.map(x => x.inputCount)),
|
|
286
|
-
outputCount: Algo_1.default.sum(executorResults.map(x => x.outputCount)),
|
|
287
|
-
workerCount: executorResults.length,
|
|
288
|
-
executionId,
|
|
289
|
-
resultUri,
|
|
290
|
-
operations: {}
|
|
291
|
-
};
|
|
292
|
-
for (const res of executorResults) {
|
|
293
|
-
for (const opKey of Object.keys(res.operations)) {
|
|
294
|
-
const op = res.operations[opKey];
|
|
295
|
-
let label = result.operations[opKey];
|
|
296
|
-
if (!label) {
|
|
297
|
-
result.operations[opKey] = { avg: -1, max: -1, min: -1, elapsedMS: [] };
|
|
298
|
-
label = result.operations[opKey];
|
|
299
|
-
}
|
|
300
|
-
label.elapsedMS.push(op.elapsedMS);
|
|
301
|
-
}
|
|
302
|
-
// Calculate min, max, avg for all operations after collecting all data
|
|
303
|
-
for (const opKey of Object.keys(result.operations)) {
|
|
304
|
-
const operation = result.operations[opKey];
|
|
305
|
-
if (operation.elapsedMS.length > 0) {
|
|
306
|
-
operation.min = Math.min(...operation.elapsedMS);
|
|
307
|
-
operation.max = Math.max(...operation.elapsedMS);
|
|
308
|
-
operation.avg = Algo_1.default.mean(operation.elapsedMS);
|
|
309
|
-
}
|
|
310
|
-
}
|
|
311
|
-
}
|
|
312
|
-
// Add tracker operations to result
|
|
313
|
-
const trackerOperations = tracker.getOperations();
|
|
314
|
-
for (const opKey of Object.keys(trackerOperations)) {
|
|
315
|
-
const trackerOp = trackerOperations[opKey];
|
|
316
|
-
const value = trackerOp.elapsedMS;
|
|
317
|
-
if (!result.operations[opKey]) {
|
|
318
|
-
result.operations[opKey] = { avg: value, max: value, min: value, elapsedMS: [] };
|
|
319
|
-
}
|
|
320
|
-
result.operations[opKey].elapsedMS.push(value);
|
|
321
|
-
}
|
|
322
|
-
return result;
|
|
323
|
-
};
|
|
324
|
-
this.onWorkAdvanced = (packet, index, progress) => {
|
|
325
|
-
const { processed, total } = packet;
|
|
326
|
-
progress.update((index + 1).toString(), processed / total);
|
|
327
|
-
};
|
|
328
|
-
}
|
|
329
|
-
}
|
|
330
|
-
const ExecutorOrchestrator = new ExecutorOrchestratorClass();
|
|
331
|
-
exports.default = ExecutorOrchestrator;
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
class ExecutorPerformance {
|
|
4
|
-
constructor() {
|
|
5
|
-
this.measure = (name, elapsedMS) => {
|
|
6
|
-
let tracker = this._operations[name];
|
|
7
|
-
if (!tracker) {
|
|
8
|
-
this._operations[name] = { elapsedMS: 0 };
|
|
9
|
-
tracker = this._operations[name];
|
|
10
|
-
}
|
|
11
|
-
tracker.elapsedMS += elapsedMS;
|
|
12
|
-
};
|
|
13
|
-
this.getOperations = () => this._operations;
|
|
14
|
-
this._operations = {};
|
|
15
|
-
}
|
|
16
|
-
}
|
|
17
|
-
exports.default = ExecutorPerformance;
|
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
class ExecutorProgress {
|
|
4
|
-
constructor(isEnabled) {
|
|
5
|
-
this._isEnabled = false;
|
|
6
|
-
this._FPS = 2;
|
|
7
|
-
this._lastRenderTime = 0;
|
|
8
|
-
this._lastRenderedLines = -1;
|
|
9
|
-
this.register = (name, producerName, fileIndex, totalFiles) => {
|
|
10
|
-
this.workers[name] = { progress: 0, producerName, fileIndex, totalFiles };
|
|
11
|
-
};
|
|
12
|
-
this.update = (name, value) => {
|
|
13
|
-
if (this.workers[name])
|
|
14
|
-
this.workers[name].progress = value;
|
|
15
|
-
const now = Date.now();
|
|
16
|
-
const interval = 1000 / this._FPS;
|
|
17
|
-
if (now - this._lastRenderTime >= interval) {
|
|
18
|
-
this._lastRenderTime = now;
|
|
19
|
-
this.render();
|
|
20
|
-
}
|
|
21
|
-
};
|
|
22
|
-
this.complete = () => {
|
|
23
|
-
for (const key of Object.keys(this.workers)) {
|
|
24
|
-
this.workers[key].progress = 1;
|
|
25
|
-
}
|
|
26
|
-
this.render();
|
|
27
|
-
};
|
|
28
|
-
this.render = () => {
|
|
29
|
-
if (!this._isEnabled)
|
|
30
|
-
return;
|
|
31
|
-
if (this._lastRenderedLines > 0) {
|
|
32
|
-
for (let i = 0; i < this._lastRenderedLines; i++) {
|
|
33
|
-
process.stdout.moveCursor(0, -1);
|
|
34
|
-
process.stdout.clearLine(1);
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
this._lastRenderedLines = 0;
|
|
38
|
-
for (const key of Object.keys(this.workers)) {
|
|
39
|
-
const worker = this.workers[key];
|
|
40
|
-
const percentage = Math.min(100, Math.max(0, worker.progress * 100));
|
|
41
|
-
const barWidth = 30;
|
|
42
|
-
const filledWidth = Math.floor((percentage / 100) * barWidth);
|
|
43
|
-
const emptyWidth = barWidth - filledWidth;
|
|
44
|
-
const bar = '#'.repeat(filledWidth) + '-'.repeat(emptyWidth);
|
|
45
|
-
const fileInfo = worker.totalFiles > 1 ? ` [${worker.fileIndex + 1}/${worker.totalFiles}]` : '';
|
|
46
|
-
console.log(`Worker ${key.padStart(2, '0')}: [${bar}] ${percentage.toFixed(2)}% (${worker.producerName}${fileInfo})`);
|
|
47
|
-
this._lastRenderedLines++;
|
|
48
|
-
}
|
|
49
|
-
};
|
|
50
|
-
this._isEnabled = isEnabled;
|
|
51
|
-
this.workers = {};
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
exports.default = ExecutorProgress;
|
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
-
});
|
|
10
|
-
};
|
|
11
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
-
};
|
|
14
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
-
const path_1 = __importDefault(require("path"));
|
|
16
|
-
const fs_1 = __importDefault(require("fs"));
|
|
17
|
-
const promises_1 = __importDefault(require("fs/promises"));
|
|
18
|
-
const Constants_1 = __importDefault(require("../Constants"));
|
|
19
|
-
class ExecutorScopeClass {
|
|
20
|
-
constructor() {
|
|
21
|
-
this.WORKERS_FOLDER = 'workers';
|
|
22
|
-
this.PRODUCERS_FOLDER = 'producers';
|
|
23
|
-
this.getWorkerPath = (scope, workerId) => {
|
|
24
|
-
return path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER,
|
|
25
|
-
// A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
|
|
26
|
-
scope.folder, this.WORKERS_FOLDER, `${workerId}.dataset`);
|
|
27
|
-
};
|
|
28
|
-
this.getProducerPath = (scope, producer, sourceFileKey) => {
|
|
29
|
-
return path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER,
|
|
30
|
-
// A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
|
|
31
|
-
scope.folder, this.PRODUCERS_FOLDER, producer.name, `${sourceFileKey}.dataset`);
|
|
32
|
-
};
|
|
33
|
-
this.getMainPath = (scope) => {
|
|
34
|
-
return path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER, scope.folder, 'main.dataset');
|
|
35
|
-
};
|
|
36
|
-
this.clearScope = (scope) => __awaiter(this, void 0, void 0, function* () {
|
|
37
|
-
const scopePath = path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER, scope.folder);
|
|
38
|
-
if (fs_1.default.existsSync(scopePath)) {
|
|
39
|
-
yield promises_1.default.rm(scopePath, { recursive: true, force: true });
|
|
40
|
-
}
|
|
41
|
-
});
|
|
42
|
-
this.ensurePath = (fileUri) => {
|
|
43
|
-
const dir = path_1.default.dirname(fileUri);
|
|
44
|
-
if (!fs_1.default.existsSync(dir))
|
|
45
|
-
fs_1.default.mkdirSync(dir, { recursive: true });
|
|
46
|
-
if (!fs_1.default.existsSync(fileUri))
|
|
47
|
-
fs_1.default.writeFileSync(fileUri, '');
|
|
48
|
-
};
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
const ExecutorScope = new ExecutorScopeClass();
|
|
52
|
-
exports.default = ExecutorScope;
|
|
@@ -1,118 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
-
});
|
|
10
|
-
};
|
|
11
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
-
};
|
|
14
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
-
const Algo_1 = __importDefault(require("../core/Algo"));
|
|
16
|
-
const DSTE_1 = __importDefault(require("../core/dste/DSTE"));
|
|
17
|
-
const DriverFactory_1 = __importDefault(require("../drivers/DriverFactory"));
|
|
18
|
-
const Environment_1 = __importDefault(require("../engines/Environment"));
|
|
19
|
-
const CSVParser_1 = __importDefault(require("../engines/parsing/CSVParser"));
|
|
20
|
-
const ExecutorScope_1 = __importDefault(require("./ExecutorScope"));
|
|
21
|
-
class OutputExecutorClass {
|
|
22
|
-
constructor() {
|
|
23
|
-
this._getInternalRecordFormat = (consumer) => {
|
|
24
|
-
const output = consumer.outputs[0];
|
|
25
|
-
const format = output.format === 'API'
|
|
26
|
-
? 'JSON'
|
|
27
|
-
: output.format === 'PARQUET'
|
|
28
|
-
? 'CSV'
|
|
29
|
-
: output.format;
|
|
30
|
-
return format;
|
|
31
|
-
};
|
|
32
|
-
this.outputRecord = (record, consumer, fields) => {
|
|
33
|
-
const format = this._getInternalRecordFormat(consumer);
|
|
34
|
-
switch (format) {
|
|
35
|
-
case 'CSV':
|
|
36
|
-
return this.toCSV(record, fields, ',');
|
|
37
|
-
case 'JSON':
|
|
38
|
-
return this.toJSON(record, fields);
|
|
39
|
-
default:
|
|
40
|
-
throw new Error(`Export format ${format} not implemented yet.`);
|
|
41
|
-
}
|
|
42
|
-
};
|
|
43
|
-
this.toCSV = (record, fields, delimiter) => {
|
|
44
|
-
const myDelimtier = delimiter !== null && delimiter !== void 0 ? delimiter : ',';
|
|
45
|
-
// remove the not wanted dimension
|
|
46
|
-
const line = fields
|
|
47
|
-
.filter(x => !x.cField.hidden)
|
|
48
|
-
.map(x => { var _a, _b; return `"${Algo_1.default.replaceAll((_b = (_a = record[x.finalKey]) === null || _a === void 0 ? void 0 : _a.toString()) !== null && _b !== void 0 ? _b : '', '"', '""')}"`; })
|
|
49
|
-
.join(myDelimtier);
|
|
50
|
-
return line;
|
|
51
|
-
};
|
|
52
|
-
this.toJSON = (record, fields) => {
|
|
53
|
-
if (fields.some(x => x.cField.hidden)) {
|
|
54
|
-
// remove the un-wanted dimensions
|
|
55
|
-
for (const dim of fields) {
|
|
56
|
-
if (dim.cField.hidden)
|
|
57
|
-
delete record[dim.finalKey];
|
|
58
|
-
}
|
|
59
|
-
return JSON.stringify(record);
|
|
60
|
-
}
|
|
61
|
-
else {
|
|
62
|
-
return JSON.stringify(record);
|
|
63
|
-
}
|
|
64
|
-
};
|
|
65
|
-
this.exportResult = (consumer, fields, scope) => __awaiter(this, void 0, void 0, function* () {
|
|
66
|
-
const internalFormat = this._getInternalRecordFormat(consumer);
|
|
67
|
-
for (const output of consumer.outputs) {
|
|
68
|
-
const destination = Environment_1.default.getSource(output.exportDestination);
|
|
69
|
-
const driver = yield DriverFactory_1.default.instantiateDestination(destination);
|
|
70
|
-
const currentPath = ExecutorScope_1.default.getMainPath(scope);
|
|
71
|
-
const destinationName = this._composeFileName(consumer, output, this._getExtension(output));
|
|
72
|
-
if (output.format === internalFormat) {
|
|
73
|
-
return yield driver.move(currentPath, destinationName);
|
|
74
|
-
}
|
|
75
|
-
else {
|
|
76
|
-
switch (output.format) {
|
|
77
|
-
case 'CSV':
|
|
78
|
-
return yield driver.transformAndMove(currentPath, line => {
|
|
79
|
-
const parsed = JSON.parse(line);
|
|
80
|
-
return Object.keys(parsed).map(x => `"${parsed[x]}"`).join(',');
|
|
81
|
-
}, destinationName);
|
|
82
|
-
case 'API':
|
|
83
|
-
case 'JSON':
|
|
84
|
-
return yield driver.transformAndMove(currentPath, line => {
|
|
85
|
-
const parts = CSVParser_1.default.parseRow(line, ',');
|
|
86
|
-
const value = {};
|
|
87
|
-
for (const [index, field] of fields.entries())
|
|
88
|
-
value[field.finalKey] = parts[index];
|
|
89
|
-
return JSON.stringify(value);
|
|
90
|
-
}, destinationName);
|
|
91
|
-
case 'PARQUET':
|
|
92
|
-
default:
|
|
93
|
-
throw new Error(`Export result to format ${output.format} not implemented yet.`);
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
});
|
|
98
|
-
this._getExtension = (output) => {
|
|
99
|
-
return output.format === 'CSV'
|
|
100
|
-
? 'csv'
|
|
101
|
-
: output.format === 'JSON'
|
|
102
|
-
? 'jsonl'
|
|
103
|
-
: 'txt';
|
|
104
|
-
};
|
|
105
|
-
this._composeFileName = (consumer, output, extension, executionId) => {
|
|
106
|
-
if (output.exportName && output.exportName.trim().length > 0) {
|
|
107
|
-
// Ensure no extension duplication
|
|
108
|
-
const sanitized = output.exportName.replace(/\.[^.]+$/, '');
|
|
109
|
-
return `${sanitized}.${extension}`;
|
|
110
|
-
}
|
|
111
|
-
const baseTs = Algo_1.default.replaceAll(DSTE_1.default.now().toISOString().split('.')[0], ':', '-');
|
|
112
|
-
const suffix = executionId ? `_${executionId}` : '';
|
|
113
|
-
return `${consumer.name}_${baseTs}${suffix}.${extension}`;
|
|
114
|
-
};
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
const OutputExecutor = new OutputExecutorClass();
|
|
118
|
-
exports.default = OutputExecutor;
|