@forzalabs/remora 1.0.22 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/index.js +8313 -118
  2. package/package.json +9 -26
  3. package/workers/ExecutorWorker.js +7643 -46
  4. package/Constants.js +0 -34
  5. package/actions/automap.js +0 -77
  6. package/actions/compile.js +0 -57
  7. package/actions/create_consumer.js +0 -75
  8. package/actions/create_producer.js +0 -41
  9. package/actions/debug.js +0 -62
  10. package/actions/deploy.js +0 -96
  11. package/actions/discover.js +0 -36
  12. package/actions/init.js +0 -77
  13. package/actions/mock.js +0 -43
  14. package/actions/run.js +0 -116
  15. package/actions/sample.js +0 -176
  16. package/core/Affirm.js +0 -42
  17. package/core/Algo.js +0 -160
  18. package/core/dste/DSTE.js +0 -113
  19. package/core/logger/DebugLogService.js +0 -48
  20. package/core/logger/DevelopmentLogService.js +0 -70
  21. package/core/logger/LocalLogService.js +0 -70
  22. package/core/logger/Logger.js +0 -54
  23. package/database/DatabaseEngine.js +0 -149
  24. package/database/DatabaseStructure.js +0 -27
  25. package/definitions/DatasetDefinitions.js +0 -2
  26. package/definitions/ExecutorDefinitions.js +0 -2
  27. package/definitions/ProcessENV.js +0 -169
  28. package/definitions/agents/DestinationDriver.js +0 -2
  29. package/definitions/agents/SourceDriver.js +0 -2
  30. package/definitions/cli.js +0 -2
  31. package/definitions/database/ApiKeys.js +0 -2
  32. package/definitions/database/Stored.js +0 -7
  33. package/definitions/database/UsageStat.js +0 -2
  34. package/definitions/database/User.js +0 -2
  35. package/definitions/requests/ConsumerRequest.js +0 -2
  36. package/definitions/requests/Developer.js +0 -2
  37. package/definitions/requests/Mapping.js +0 -2
  38. package/definitions/requests/ProducerRequest.js +0 -2
  39. package/definitions/requests/Request.js +0 -2
  40. package/definitions/resources/Compiled.js +0 -2
  41. package/definitions/resources/Consumer.js +0 -2
  42. package/definitions/resources/Environment.js +0 -2
  43. package/definitions/resources/Library.js +0 -2
  44. package/definitions/resources/Producer.js +0 -2
  45. package/definitions/resources/Project.js +0 -2
  46. package/definitions/resources/Schema.js +0 -2
  47. package/definitions/resources/Source.js +0 -2
  48. package/definitions/temp.js +0 -2
  49. package/definitions/transform/Transformations.js +0 -2
  50. package/drivers/DeltaShareDriver.js +0 -186
  51. package/drivers/DriverFactory.js +0 -72
  52. package/drivers/DriverHelper.js +0 -248
  53. package/drivers/HttpApiDriver.js +0 -208
  54. package/drivers/RedshiftDriver.js +0 -184
  55. package/drivers/files/LocalDestinationDriver.js +0 -146
  56. package/drivers/files/LocalSourceDriver.js +0 -405
  57. package/drivers/s3/S3DestinationDriver.js +0 -197
  58. package/drivers/s3/S3SourceDriver.js +0 -495
  59. package/engines/CryptoEngine.js +0 -75
  60. package/engines/Environment.js +0 -170
  61. package/engines/ProcessENVManager.js +0 -112
  62. package/engines/RandomEngine.js +0 -47
  63. package/engines/SecretManager.js +0 -23
  64. package/engines/UserManager.js +0 -66
  65. package/engines/ai/AutoMapperEngine.js +0 -37
  66. package/engines/ai/DeveloperEngine.js +0 -497
  67. package/engines/ai/LLM.js +0 -255
  68. package/engines/consumer/ConsumerManager.js +0 -218
  69. package/engines/consumer/ConsumerOnFinishManager.js +0 -202
  70. package/engines/dataset/Dataset.js +0 -824
  71. package/engines/dataset/DatasetManager.js +0 -211
  72. package/engines/dataset/DatasetRecord.js +0 -120
  73. package/engines/dataset/DatasetRecordPool.js +0 -77
  74. package/engines/execution/RequestExecutor.js +0 -67
  75. package/engines/parsing/CSVParser.js +0 -60
  76. package/engines/parsing/LineParser.js +0 -71
  77. package/engines/parsing/ParseCompression.js +0 -101
  78. package/engines/parsing/ParseHelper.js +0 -18
  79. package/engines/parsing/ParseManager.js +0 -54
  80. package/engines/parsing/XLSParser.js +0 -87
  81. package/engines/parsing/XMLParser.js +0 -115
  82. package/engines/producer/ProducerEngine.js +0 -127
  83. package/engines/producer/ProducerManager.js +0 -43
  84. package/engines/scheduler/CronScheduler.js +0 -222
  85. package/engines/scheduler/QueueManager.js +0 -314
  86. package/engines/schema/SchemaValidator.js +0 -67
  87. package/engines/transform/JoinEngine.js +0 -232
  88. package/engines/transform/TransformationEngine.js +0 -277
  89. package/engines/transform/TypeCaster.js +0 -59
  90. package/engines/usage/DataframeManager.js +0 -55
  91. package/engines/usage/UsageDataManager.js +0 -151
  92. package/engines/usage/UsageManager.js +0 -65
  93. package/engines/validation/Validator.js +0 -216
  94. package/executors/ConsumerExecutor.js +0 -280
  95. package/executors/Executor.js +0 -177
  96. package/executors/ExecutorOrchestrator.js +0 -331
  97. package/executors/ExecutorPerformance.js +0 -17
  98. package/executors/ExecutorProgress.js +0 -54
  99. package/executors/ExecutorScope.js +0 -52
  100. package/executors/OutputExecutor.js +0 -118
  101. package/executors/ProducerExecutor.js +0 -108
  102. package/helper/Helper.js +0 -149
  103. package/helper/Logger.js +0 -84
  104. package/helper/Runtime.js +0 -20
  105. package/helper/Settings.js +0 -13
  106. package/licencing/LicenceManager.js +0 -64
  107. package/settings.js +0 -12
  108. /package/{definitions/json_schemas → json_schemas}/consumer-schema.json +0 -0
  109. /package/{definitions/json_schemas → json_schemas}/producer-schema.json +0 -0
  110. /package/{definitions/json_schemas → json_schemas}/project-schema.json +0 -0
  111. /package/{definitions/json_schemas → json_schemas}/source-schema.json +0 -0
@@ -1,331 +0,0 @@
1
- "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
- var __importDefault = (this && this.__importDefault) || function (mod) {
12
- return (mod && mod.__esModule) ? mod : { "default": mod };
13
- };
14
- Object.defineProperty(exports, "__esModule", { value: true });
15
- const os_1 = __importDefault(require("os"));
16
- const fs_1 = __importDefault(require("fs"));
17
- const promises_1 = __importDefault(require("fs/promises"));
18
- const path_1 = __importDefault(require("path"));
19
- const promises_2 = require("stream/promises");
20
- const workerpool_1 = __importDefault(require("workerpool"));
21
- const Affirm_1 = __importDefault(require("../core/Affirm"));
22
- const UsageManager_1 = __importDefault(require("../engines/usage/UsageManager"));
23
- const Helper_1 = __importDefault(require("../helper/Helper"));
24
- const Environment_1 = __importDefault(require("../engines/Environment"));
25
- const ProducerExecutor_1 = __importDefault(require("./ProducerExecutor"));
26
- const Constants_1 = __importDefault(require("../Constants"));
27
- const DriverHelper_1 = __importDefault(require("../drivers/DriverHelper"));
28
- const ConsumerExecutor_1 = __importDefault(require("./ConsumerExecutor"));
29
- const OutputExecutor_1 = __importDefault(require("./OutputExecutor"));
30
- const ConsumerManager_1 = __importDefault(require("../engines/consumer/ConsumerManager"));
31
- const ExecutorPerformance_1 = __importDefault(require("./ExecutorPerformance"));
32
- const ExecutorProgress_1 = __importDefault(require("./ExecutorProgress"));
33
- const Algo_1 = __importDefault(require("../core/Algo"));
34
- const ConsumerOnFinishManager_1 = __importDefault(require("../engines/consumer/ConsumerOnFinishManager"));
35
- const ExecutorScope_1 = __importDefault(require("./ExecutorScope"));
36
- const ProcessENVManager_1 = __importDefault(require("../engines/ProcessENVManager"));
37
- class ExecutorOrchestratorClass {
38
- constructor() {
39
- this.init = () => {
40
- if (!this._executorPool) {
41
- const options = {
42
- workerThreadOpts: {
43
- resourceLimits: {
44
- maxOldGenerationSizeMb: Constants_1.default.defaults.MIN_RUNTIME_HEAP_MB
45
- }
46
- }
47
- };
48
- const workerPath = this._getWorkerPath();
49
- this._executorPool = workerpool_1.default.pool(path_1.default.join(workerPath, 'ExecutorWorker.js'), options);
50
- }
51
- };
52
- this.launch = (request) => __awaiter(this, void 0, void 0, function* () {
53
- var _a, _b;
54
- (0, Affirm_1.default)(request, 'Invalid options');
55
- const { consumer, details, logProgress, options } = request;
56
- (0, Affirm_1.default)(consumer, 'Invalid consumer');
57
- (0, Affirm_1.default)(details, 'Invalid execution details');
58
- const tracker = new ExecutorPerformance_1.default();
59
- const _progress = new ExecutorProgress_1.default(logProgress);
60
- const { usageId } = UsageManager_1.default.startUsage(consumer, details);
61
- const scope = { id: usageId, folder: `${consumer.name}_${usageId}`, workersId: [] };
62
- try {
63
- const start = performance.now();
64
- this.init();
65
- const executorResults = [];
66
- let counter = performance.now();
67
- const sourceFilesByProducer = yield this.readySourceFiles(consumer, scope);
68
- tracker.measure('ready-producers', performance.now() - counter);
69
- let globalWorkerIndex = 0;
70
- for (const pair of sourceFilesByProducer) {
71
- const { prod, cProd, response } = pair;
72
- // Make sure that the data files are there, if missing and isOptional = true, then skip
73
- if (!fs_1.default.existsSync(response.files[0].fullUri)) {
74
- if (!cProd.isOptional)
75
- throw new Error(`Expected data file ${response.files[0].fullUri} of producer ${prod.name} in consumer ${consumer.name} is missing.`);
76
- else if (cProd.isOptional === true)
77
- continue;
78
- }
79
- console.log('Starting operations on ', response.files[0].fullUri);
80
- // Extract the dimensions for this producer just once
81
- const firstLine = (yield DriverHelper_1.default.quickReadFile(response.files[0].fullUri, 1))[0];
82
- const header = ProducerExecutor_1.default.processHeader(firstLine, prod);
83
- const prodDimensions = ProducerExecutor_1.default.reconcileHeader(header, prod);
84
- const totalFiles = response.files.length;
85
- for (const [fileIndex, file] of response.files.entries()) {
86
- const chunks = ExecutorOrchestrator.scopeWork(file.fullUri);
87
- const workerThreads = [];
88
- for (const chunk of chunks) {
89
- // Spawn off thread
90
- const workerId = `${usageId}_${globalWorkerIndex}`;
91
- const currentWorkerIndex = globalWorkerIndex;
92
- globalWorkerIndex++;
93
- const workerData = {
94
- producer: prod,
95
- chunk,
96
- consumer,
97
- prodDimensions,
98
- workerId,
99
- scope,
100
- options
101
- };
102
- _progress.register((currentWorkerIndex + 1).toString(), prod.name, fileIndex, totalFiles);
103
- scope.workersId.push(workerId);
104
- workerThreads.push(this._executorPool.exec('executor', [workerData], {
105
- on: payload => this.onWorkAdvanced(payload, currentWorkerIndex, _progress)
106
- }));
107
- }
108
- executorResults.push(...yield Promise.all(workerThreads));
109
- // WARNING: will this not create problems when multiple are executed together at the same time since this is a singleton?!?
110
- yield this._executorPool.terminate();
111
- }
112
- }
113
- _progress.complete();
114
- if (executorResults.some(x => !Algo_1.default.hasVal(x)))
115
- throw new Error(`${executorResults.filter(x => !Algo_1.default.hasVal(x)).length} worker(s) failed to produce valid results`);
116
- yield this.reconcileExecutorThreadsResults(scope, executorResults, tracker);
117
- // If there is more than one worker, then I need to redo the operations that are done on multiple lines (cause now the worker files have been merged together)
118
- const postOperation = { totalOutputCount: null };
119
- if (executorResults.length > 1) {
120
- if (((_a = consumer.options) === null || _a === void 0 ? void 0 : _a.distinct) === true) {
121
- counter = performance.now();
122
- const unifiedOutputCount = yield ConsumerExecutor_1.default.processDistinct(ExecutorScope_1.default.getMainPath(scope));
123
- tracker.measure('process-distinct:main', performance.now() - counter);
124
- postOperation.totalOutputCount = unifiedOutputCount;
125
- }
126
- if ((_b = consumer.options) === null || _b === void 0 ? void 0 : _b.distinctOn) {
127
- counter = performance.now();
128
- const unifiedOutputCount = yield ConsumerExecutor_1.default.processDistinctOn(consumer, ExecutorScope_1.default.getMainPath(scope));
129
- tracker.measure('process-distinct-on:main', performance.now() - counter);
130
- postOperation.totalOutputCount = unifiedOutputCount;
131
- }
132
- }
133
- // Export to the destination
134
- counter = performance.now();
135
- const exportRes = yield OutputExecutor_1.default.exportResult(consumer, ConsumerManager_1.default.getExpandedFields(consumer), scope);
136
- tracker.measure('export-result', performance.now() - counter);
137
- // Perform on-success actions if any
138
- if (consumer.outputs.some(x => x.onSuccess)) {
139
- counter = performance.now();
140
- yield ConsumerOnFinishManager_1.default.onConsumerSuccess(consumer, usageId);
141
- tracker.measure('on-success-actions', performance.now() - counter);
142
- }
143
- yield this.performCleanupOperations(scope, tracker);
144
- const finalResult = this.computeFinalResult(tracker, executorResults, usageId, exportRes.key);
145
- finalResult.elapsedMS = performance.now() - start;
146
- if (Algo_1.default.hasVal(postOperation.totalOutputCount))
147
- finalResult.outputCount = postOperation.totalOutputCount;
148
- UsageManager_1.default.endUsage(usageId, finalResult.outputCount, finalResult);
149
- return finalResult;
150
- }
151
- catch (error) {
152
- yield ConsumerOnFinishManager_1.default.onConsumerError(consumer, usageId);
153
- yield this.performCleanupOperations(scope, tracker);
154
- UsageManager_1.default.failUsage(usageId, Helper_1.default.asError(error).message);
155
- throw error;
156
- }
157
- });
158
- /**
159
- * Calculates line-aligned chunk offsets for parallel file processing.
160
- * Each chunk boundary is adjusted to the next newline to avoid breaking lines.
161
- * Returns a single chunk for small files where parallelism overhead isn't worth it.
162
- */
163
- this.scopeWork = (fileUri, numChunks) => {
164
- const fileSize = fs_1.default.statSync(fileUri).size;
165
- if (fileSize === 0)
166
- return [];
167
- // Small files: single chunk, parallelism overhead not worth it
168
- if (fileSize < Constants_1.default.defaults.MIN_FILE_SIZE_FOR_PARALLEL) {
169
- return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri }];
170
- }
171
- // Calculate optimal chunk count based on file size and CPU cores (-1 cause it is used by the main thread)
172
- const cpus = numChunks !== null && numChunks !== void 0 ? numChunks : (os_1.default.cpus().length - 1);
173
- const maxChunksBySize = Math.floor(fileSize / Constants_1.default.defaults.MIN_CHUNK_SIZE);
174
- const effectiveChunks = Math.min(cpus, maxChunksBySize);
175
- if (effectiveChunks <= 1)
176
- return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri }];
177
- const targetChunkSize = Math.floor(fileSize / effectiveChunks);
178
- const fd = fs_1.default.openSync(fileUri, 'r');
179
- try {
180
- const offsets = [];
181
- let currentStart = 0;
182
- for (let i = 0; i < cpus - 1; i++) {
183
- const targetEnd = currentStart + targetChunkSize;
184
- // Don't overshoot file size
185
- if (targetEnd >= fileSize) {
186
- break;
187
- }
188
- // Find next newline after target boundary
189
- const alignedEnd = this.findNextNewline(fd, targetEnd, fileSize);
190
- offsets.push({ start: currentStart, end: alignedEnd, isFirstChunk: i === 0, fileUri });
191
- currentStart = alignedEnd;
192
- }
193
- // Final chunk goes to end of file
194
- if (currentStart < fileSize) {
195
- offsets.push({ start: currentStart, end: fileSize, isFirstChunk: offsets.length === 0, fileUri });
196
- }
197
- return offsets;
198
- }
199
- finally {
200
- fs_1.default.closeSync(fd);
201
- }
202
- };
203
- /**
204
- * Efficiently finds the next newline character starting from a position.
205
- * Uses small buffer reads for speed.
206
- */
207
- this.findNextNewline = (fd, position, fileSize) => {
208
- const BUFFER_SIZE = 8192; // 8KB buffer for scanning
209
- const buffer = Buffer.allocUnsafe(BUFFER_SIZE);
210
- let currentPos = position;
211
- while (currentPos < fileSize) {
212
- const bytesToRead = Math.min(BUFFER_SIZE, fileSize - currentPos);
213
- const bytesRead = fs_1.default.readSync(fd, buffer, 0, bytesToRead, currentPos);
214
- if (bytesRead === 0)
215
- break;
216
- // Scan buffer for newline
217
- for (let i = 0; i < bytesRead; i++) {
218
- if (buffer[i] === 0x0A) { // \n
219
- return currentPos + i + 1; // Position after the newline
220
- }
221
- }
222
- currentPos += bytesRead;
223
- }
224
- // No newline found, return file end
225
- return fileSize;
226
- };
227
- this.readySourceFiles = (consumer, scope) => __awaiter(this, void 0, void 0, function* () {
228
- const results = [];
229
- for (let i = 0; i < consumer.producers.length; i++) {
230
- const cProd = consumer.producers[i];
231
- const prod = Environment_1.default.getProducer(cProd.name);
232
- results.push({ prod, cProd, response: yield ProducerExecutor_1.default.ready(prod, scope) });
233
- }
234
- return results;
235
- });
236
- this._getWorkerPath = () => {
237
- // Get the current file's directory
238
- const currentDir = __dirname;
239
- if (ProcessENVManager_1.default.getEnvVariable('NODE_ENV') === 'dev' || ProcessENVManager_1.default.getEnvVariable('NODE_ENV') === 'development')
240
- return path_1.default.resolve('./.build/workers');
241
- const forcedPath = ProcessENVManager_1.default.getEnvVariable('REMORA_WORKERS_PATH');
242
- if (forcedPath && forcedPath.length > 0)
243
- return path_1.default.join(__dirname, forcedPath);
244
- // Check if we're in a published npm package (no .build in path)
245
- if (!currentDir.includes('.build')) {
246
- // We're in the published package, workers are relative to package root
247
- // __dirname is something like: /path/to/package/executors
248
- // Workers are at /path/to/package/workers (sibling folder)
249
- return path_1.default.join(__dirname, '../workers');
250
- }
251
- else {
252
- // We're in development, workers are in ./.build/workers
253
- return path_1.default.resolve('./.build/workers');
254
- }
255
- };
256
- this.reconcileExecutorThreadsResults = (scope, executorResults, tracker) => __awaiter(this, void 0, void 0, function* () {
257
- const mainPath = ExecutorScope_1.default.getMainPath(scope);
258
- ConsumerExecutor_1.default._ensurePath(mainPath);
259
- // Merge all the various files into a single one
260
- if (executorResults.length > 1) {
261
- const perf = performance.now();
262
- const output = fs_1.default.createWriteStream(mainPath);
263
- output.setMaxListeners(executorResults.length + 1);
264
- for (const workerResult of executorResults) {
265
- yield (0, promises_2.pipeline)(fs_1.default.createReadStream(workerResult.resultUri), output, { end: false });
266
- }
267
- output.end();
268
- output.close();
269
- tracker.measure('merge-workers', performance.now() - perf);
270
- }
271
- else if (executorResults.length === 1) {
272
- // If there is only one worker, then just rename the worker .dataset to the general consumer one
273
- yield promises_1.default.rename(executorResults[0].resultUri, mainPath);
274
- }
275
- });
276
- this.performCleanupOperations = (scope, tracker) => __awaiter(this, void 0, void 0, function* () {
277
- const start = performance.now();
278
- yield ExecutorScope_1.default.clearScope(scope);
279
- tracker.measure('cleanup-operations', performance.now() - start);
280
- });
281
- this.computeFinalResult = (tracker, executorResults, executionId, resultUri) => {
282
- const result = {
283
- cycles: Algo_1.default.max(executorResults.map(x => x.cycles)),
284
- elapsedMS: Algo_1.default.sum(executorResults.map(x => x.elapsedMS)),
285
- inputCount: Algo_1.default.sum(executorResults.map(x => x.inputCount)),
286
- outputCount: Algo_1.default.sum(executorResults.map(x => x.outputCount)),
287
- workerCount: executorResults.length,
288
- executionId,
289
- resultUri,
290
- operations: {}
291
- };
292
- for (const res of executorResults) {
293
- for (const opKey of Object.keys(res.operations)) {
294
- const op = res.operations[opKey];
295
- let label = result.operations[opKey];
296
- if (!label) {
297
- result.operations[opKey] = { avg: -1, max: -1, min: -1, elapsedMS: [] };
298
- label = result.operations[opKey];
299
- }
300
- label.elapsedMS.push(op.elapsedMS);
301
- }
302
- // Calculate min, max, avg for all operations after collecting all data
303
- for (const opKey of Object.keys(result.operations)) {
304
- const operation = result.operations[opKey];
305
- if (operation.elapsedMS.length > 0) {
306
- operation.min = Math.min(...operation.elapsedMS);
307
- operation.max = Math.max(...operation.elapsedMS);
308
- operation.avg = Algo_1.default.mean(operation.elapsedMS);
309
- }
310
- }
311
- }
312
- // Add tracker operations to result
313
- const trackerOperations = tracker.getOperations();
314
- for (const opKey of Object.keys(trackerOperations)) {
315
- const trackerOp = trackerOperations[opKey];
316
- const value = trackerOp.elapsedMS;
317
- if (!result.operations[opKey]) {
318
- result.operations[opKey] = { avg: value, max: value, min: value, elapsedMS: [] };
319
- }
320
- result.operations[opKey].elapsedMS.push(value);
321
- }
322
- return result;
323
- };
324
- this.onWorkAdvanced = (packet, index, progress) => {
325
- const { processed, total } = packet;
326
- progress.update((index + 1).toString(), processed / total);
327
- };
328
- }
329
- }
330
- const ExecutorOrchestrator = new ExecutorOrchestratorClass();
331
- exports.default = ExecutorOrchestrator;
@@ -1,17 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- class ExecutorPerformance {
4
- constructor() {
5
- this.measure = (name, elapsedMS) => {
6
- let tracker = this._operations[name];
7
- if (!tracker) {
8
- this._operations[name] = { elapsedMS: 0 };
9
- tracker = this._operations[name];
10
- }
11
- tracker.elapsedMS += elapsedMS;
12
- };
13
- this.getOperations = () => this._operations;
14
- this._operations = {};
15
- }
16
- }
17
- exports.default = ExecutorPerformance;
@@ -1,54 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- class ExecutorProgress {
4
- constructor(isEnabled) {
5
- this._isEnabled = false;
6
- this._FPS = 2;
7
- this._lastRenderTime = 0;
8
- this._lastRenderedLines = -1;
9
- this.register = (name, producerName, fileIndex, totalFiles) => {
10
- this.workers[name] = { progress: 0, producerName, fileIndex, totalFiles };
11
- };
12
- this.update = (name, value) => {
13
- if (this.workers[name])
14
- this.workers[name].progress = value;
15
- const now = Date.now();
16
- const interval = 1000 / this._FPS;
17
- if (now - this._lastRenderTime >= interval) {
18
- this._lastRenderTime = now;
19
- this.render();
20
- }
21
- };
22
- this.complete = () => {
23
- for (const key of Object.keys(this.workers)) {
24
- this.workers[key].progress = 1;
25
- }
26
- this.render();
27
- };
28
- this.render = () => {
29
- if (!this._isEnabled)
30
- return;
31
- if (this._lastRenderedLines > 0) {
32
- for (let i = 0; i < this._lastRenderedLines; i++) {
33
- process.stdout.moveCursor(0, -1);
34
- process.stdout.clearLine(1);
35
- }
36
- }
37
- this._lastRenderedLines = 0;
38
- for (const key of Object.keys(this.workers)) {
39
- const worker = this.workers[key];
40
- const percentage = Math.min(100, Math.max(0, worker.progress * 100));
41
- const barWidth = 30;
42
- const filledWidth = Math.floor((percentage / 100) * barWidth);
43
- const emptyWidth = barWidth - filledWidth;
44
- const bar = '#'.repeat(filledWidth) + '-'.repeat(emptyWidth);
45
- const fileInfo = worker.totalFiles > 1 ? ` [${worker.fileIndex + 1}/${worker.totalFiles}]` : '';
46
- console.log(`Worker ${key.padStart(2, '0')}: [${bar}] ${percentage.toFixed(2)}% (${worker.producerName}${fileInfo})`);
47
- this._lastRenderedLines++;
48
- }
49
- };
50
- this._isEnabled = isEnabled;
51
- this.workers = {};
52
- }
53
- }
54
- exports.default = ExecutorProgress;
@@ -1,52 +0,0 @@
1
- "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
- var __importDefault = (this && this.__importDefault) || function (mod) {
12
- return (mod && mod.__esModule) ? mod : { "default": mod };
13
- };
14
- Object.defineProperty(exports, "__esModule", { value: true });
15
- const path_1 = __importDefault(require("path"));
16
- const fs_1 = __importDefault(require("fs"));
17
- const promises_1 = __importDefault(require("fs/promises"));
18
- const Constants_1 = __importDefault(require("../Constants"));
19
- class ExecutorScopeClass {
20
- constructor() {
21
- this.WORKERS_FOLDER = 'workers';
22
- this.PRODUCERS_FOLDER = 'producers';
23
- this.getWorkerPath = (scope, workerId) => {
24
- return path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER,
25
- // A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
26
- scope.folder, this.WORKERS_FOLDER, `${workerId}.dataset`);
27
- };
28
- this.getProducerPath = (scope, producer, sourceFileKey) => {
29
- return path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER,
30
- // A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
31
- scope.folder, this.PRODUCERS_FOLDER, producer.name, `${sourceFileKey}.dataset`);
32
- };
33
- this.getMainPath = (scope) => {
34
- return path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER, scope.folder, 'main.dataset');
35
- };
36
- this.clearScope = (scope) => __awaiter(this, void 0, void 0, function* () {
37
- const scopePath = path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER, scope.folder);
38
- if (fs_1.default.existsSync(scopePath)) {
39
- yield promises_1.default.rm(scopePath, { recursive: true, force: true });
40
- }
41
- });
42
- this.ensurePath = (fileUri) => {
43
- const dir = path_1.default.dirname(fileUri);
44
- if (!fs_1.default.existsSync(dir))
45
- fs_1.default.mkdirSync(dir, { recursive: true });
46
- if (!fs_1.default.existsSync(fileUri))
47
- fs_1.default.writeFileSync(fileUri, '');
48
- };
49
- }
50
- }
51
- const ExecutorScope = new ExecutorScopeClass();
52
- exports.default = ExecutorScope;
@@ -1,118 +0,0 @@
1
- "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
- var __importDefault = (this && this.__importDefault) || function (mod) {
12
- return (mod && mod.__esModule) ? mod : { "default": mod };
13
- };
14
- Object.defineProperty(exports, "__esModule", { value: true });
15
- const Algo_1 = __importDefault(require("../core/Algo"));
16
- const DSTE_1 = __importDefault(require("../core/dste/DSTE"));
17
- const DriverFactory_1 = __importDefault(require("../drivers/DriverFactory"));
18
- const Environment_1 = __importDefault(require("../engines/Environment"));
19
- const CSVParser_1 = __importDefault(require("../engines/parsing/CSVParser"));
20
- const ExecutorScope_1 = __importDefault(require("./ExecutorScope"));
21
- class OutputExecutorClass {
22
- constructor() {
23
- this._getInternalRecordFormat = (consumer) => {
24
- const output = consumer.outputs[0];
25
- const format = output.format === 'API'
26
- ? 'JSON'
27
- : output.format === 'PARQUET'
28
- ? 'CSV'
29
- : output.format;
30
- return format;
31
- };
32
- this.outputRecord = (record, consumer, fields) => {
33
- const format = this._getInternalRecordFormat(consumer);
34
- switch (format) {
35
- case 'CSV':
36
- return this.toCSV(record, fields, ',');
37
- case 'JSON':
38
- return this.toJSON(record, fields);
39
- default:
40
- throw new Error(`Export format ${format} not implemented yet.`);
41
- }
42
- };
43
- this.toCSV = (record, fields, delimiter) => {
44
- const myDelimtier = delimiter !== null && delimiter !== void 0 ? delimiter : ',';
45
- // remove the not wanted dimension
46
- const line = fields
47
- .filter(x => !x.cField.hidden)
48
- .map(x => { var _a, _b; return `"${Algo_1.default.replaceAll((_b = (_a = record[x.finalKey]) === null || _a === void 0 ? void 0 : _a.toString()) !== null && _b !== void 0 ? _b : '', '"', '""')}"`; })
49
- .join(myDelimtier);
50
- return line;
51
- };
52
- this.toJSON = (record, fields) => {
53
- if (fields.some(x => x.cField.hidden)) {
54
- // remove the un-wanted dimensions
55
- for (const dim of fields) {
56
- if (dim.cField.hidden)
57
- delete record[dim.finalKey];
58
- }
59
- return JSON.stringify(record);
60
- }
61
- else {
62
- return JSON.stringify(record);
63
- }
64
- };
65
- this.exportResult = (consumer, fields, scope) => __awaiter(this, void 0, void 0, function* () {
66
- const internalFormat = this._getInternalRecordFormat(consumer);
67
- for (const output of consumer.outputs) {
68
- const destination = Environment_1.default.getSource(output.exportDestination);
69
- const driver = yield DriverFactory_1.default.instantiateDestination(destination);
70
- const currentPath = ExecutorScope_1.default.getMainPath(scope);
71
- const destinationName = this._composeFileName(consumer, output, this._getExtension(output));
72
- if (output.format === internalFormat) {
73
- return yield driver.move(currentPath, destinationName);
74
- }
75
- else {
76
- switch (output.format) {
77
- case 'CSV':
78
- return yield driver.transformAndMove(currentPath, line => {
79
- const parsed = JSON.parse(line);
80
- return Object.keys(parsed).map(x => `"${parsed[x]}"`).join(',');
81
- }, destinationName);
82
- case 'API':
83
- case 'JSON':
84
- return yield driver.transformAndMove(currentPath, line => {
85
- const parts = CSVParser_1.default.parseRow(line, ',');
86
- const value = {};
87
- for (const [index, field] of fields.entries())
88
- value[field.finalKey] = parts[index];
89
- return JSON.stringify(value);
90
- }, destinationName);
91
- case 'PARQUET':
92
- default:
93
- throw new Error(`Export result to format ${output.format} not implemented yet.`);
94
- }
95
- }
96
- }
97
- });
98
- this._getExtension = (output) => {
99
- return output.format === 'CSV'
100
- ? 'csv'
101
- : output.format === 'JSON'
102
- ? 'jsonl'
103
- : 'txt';
104
- };
105
- this._composeFileName = (consumer, output, extension, executionId) => {
106
- if (output.exportName && output.exportName.trim().length > 0) {
107
- // Ensure no extension duplication
108
- const sanitized = output.exportName.replace(/\.[^.]+$/, '');
109
- return `${sanitized}.${extension}`;
110
- }
111
- const baseTs = Algo_1.default.replaceAll(DSTE_1.default.now().toISOString().split('.')[0], ':', '-');
112
- const suffix = executionId ? `_${executionId}` : '';
113
- return `${consumer.name}_${baseTs}${suffix}.${extension}`;
114
- };
115
- }
116
- }
117
- const OutputExecutor = new OutputExecutorClass();
118
- exports.default = OutputExecutor;