@forzalabs/remora 1.0.21 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/actions/automap.js +26 -42
  2. package/actions/compile.js +27 -43
  3. package/actions/create_consumer.js +24 -40
  4. package/actions/create_producer.js +16 -32
  5. package/actions/debug.js +18 -34
  6. package/actions/deploy.js +30 -46
  7. package/actions/discover.js +13 -29
  8. package/actions/init.js +29 -45
  9. package/actions/mock.js +16 -32
  10. package/actions/run.js +34 -52
  11. package/actions/sample.js +42 -58
  12. package/index.js +38 -43
  13. package/package.json +4 -4
  14. package/workers/ExecutorWorker.js +18 -32
  15. package/Constants.js +0 -34
  16. package/core/Affirm.js +0 -42
  17. package/core/Algo.js +0 -160
  18. package/core/dste/DSTE.js +0 -113
  19. package/core/logger/DebugLogService.js +0 -48
  20. package/core/logger/DevelopmentLogService.js +0 -70
  21. package/core/logger/LocalLogService.js +0 -70
  22. package/core/logger/Logger.js +0 -54
  23. package/database/DatabaseEngine.js +0 -149
  24. package/database/DatabaseStructure.js +0 -27
  25. package/definitions/DatasetDefinitions.js +0 -2
  26. package/definitions/ExecutorDefinitions.js +0 -2
  27. package/definitions/ProcessENV.js +0 -2
  28. package/definitions/agents/DestinationDriver.js +0 -2
  29. package/definitions/agents/SourceDriver.js +0 -2
  30. package/definitions/cli.js +0 -2
  31. package/definitions/database/ApiKeys.js +0 -2
  32. package/definitions/database/Stored.js +0 -7
  33. package/definitions/database/UsageStat.js +0 -2
  34. package/definitions/database/User.js +0 -2
  35. package/definitions/json_schemas/consumer-schema.json +0 -1226
  36. package/definitions/json_schemas/producer-schema.json +0 -308
  37. package/definitions/json_schemas/project-schema.json +0 -100
  38. package/definitions/json_schemas/source-schema.json +0 -249
  39. package/definitions/requests/ConsumerRequest.js +0 -2
  40. package/definitions/requests/Developer.js +0 -2
  41. package/definitions/requests/Mapping.js +0 -2
  42. package/definitions/requests/ProducerRequest.js +0 -2
  43. package/definitions/requests/Request.js +0 -2
  44. package/definitions/resources/Compiled.js +0 -2
  45. package/definitions/resources/Consumer.js +0 -2
  46. package/definitions/resources/Environment.js +0 -2
  47. package/definitions/resources/Library.js +0 -2
  48. package/definitions/resources/Producer.js +0 -2
  49. package/definitions/resources/Project.js +0 -2
  50. package/definitions/resources/Schema.js +0 -2
  51. package/definitions/resources/Source.js +0 -2
  52. package/definitions/temp.js +0 -2
  53. package/definitions/transform/Transformations.js +0 -2
  54. package/drivers/DeltaShareDriver.js +0 -186
  55. package/drivers/DriverFactory.js +0 -72
  56. package/drivers/DriverHelper.js +0 -248
  57. package/drivers/HttpApiDriver.js +0 -208
  58. package/drivers/RedshiftDriver.js +0 -184
  59. package/drivers/files/LocalDestinationDriver.js +0 -146
  60. package/drivers/files/LocalSourceDriver.js +0 -405
  61. package/drivers/s3/S3DestinationDriver.js +0 -197
  62. package/drivers/s3/S3SourceDriver.js +0 -495
  63. package/engines/CryptoEngine.js +0 -75
  64. package/engines/Environment.js +0 -170
  65. package/engines/ProcessENVManager.js +0 -83
  66. package/engines/RandomEngine.js +0 -47
  67. package/engines/SecretManager.js +0 -23
  68. package/engines/UserManager.js +0 -66
  69. package/engines/ai/AutoMapperEngine.js +0 -37
  70. package/engines/ai/DeveloperEngine.js +0 -497
  71. package/engines/ai/LLM.js +0 -255
  72. package/engines/consumer/ConsumerManager.js +0 -218
  73. package/engines/consumer/ConsumerOnFinishManager.js +0 -202
  74. package/engines/dataset/Dataset.js +0 -824
  75. package/engines/dataset/DatasetManager.js +0 -211
  76. package/engines/dataset/DatasetRecord.js +0 -120
  77. package/engines/dataset/DatasetRecordPool.js +0 -77
  78. package/engines/execution/RequestExecutor.js +0 -67
  79. package/engines/parsing/CSVParser.js +0 -60
  80. package/engines/parsing/LineParser.js +0 -71
  81. package/engines/parsing/ParseCompression.js +0 -101
  82. package/engines/parsing/ParseHelper.js +0 -18
  83. package/engines/parsing/ParseManager.js +0 -54
  84. package/engines/parsing/XLSParser.js +0 -87
  85. package/engines/parsing/XMLParser.js +0 -115
  86. package/engines/producer/ProducerEngine.js +0 -127
  87. package/engines/producer/ProducerManager.js +0 -43
  88. package/engines/scheduler/CronScheduler.js +0 -222
  89. package/engines/scheduler/QueueManager.js +0 -314
  90. package/engines/schema/SchemaValidator.js +0 -67
  91. package/engines/transform/JoinEngine.js +0 -232
  92. package/engines/transform/TransformationEngine.js +0 -277
  93. package/engines/transform/TypeCaster.js +0 -59
  94. package/engines/usage/DataframeManager.js +0 -55
  95. package/engines/usage/UsageDataManager.js +0 -151
  96. package/engines/usage/UsageManager.js +0 -65
  97. package/engines/validation/Validator.js +0 -216
  98. package/executors/ConsumerExecutor.js +0 -280
  99. package/executors/Executor.js +0 -177
  100. package/executors/ExecutorOrchestrator.js +0 -331
  101. package/executors/ExecutorPerformance.js +0 -17
  102. package/executors/ExecutorProgress.js +0 -54
  103. package/executors/ExecutorScope.js +0 -52
  104. package/executors/OutputExecutor.js +0 -118
  105. package/executors/ProducerExecutor.js +0 -108
  106. package/helper/Helper.js +0 -149
  107. package/helper/Logger.js +0 -84
  108. package/helper/Runtime.js +0 -20
  109. package/helper/Settings.js +0 -13
  110. package/licencing/LicenceManager.js +0 -64
  111. package/settings.js +0 -12
@@ -1,331 +0,0 @@
1
- "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
- var __importDefault = (this && this.__importDefault) || function (mod) {
12
- return (mod && mod.__esModule) ? mod : { "default": mod };
13
- };
14
- Object.defineProperty(exports, "__esModule", { value: true });
15
- const os_1 = __importDefault(require("os"));
16
- const fs_1 = __importDefault(require("fs"));
17
- const promises_1 = __importDefault(require("fs/promises"));
18
- const path_1 = __importDefault(require("path"));
19
- const promises_2 = require("stream/promises");
20
- const workerpool_1 = __importDefault(require("workerpool"));
21
- const Affirm_1 = __importDefault(require("../core/Affirm"));
22
- const UsageManager_1 = __importDefault(require("../engines/usage/UsageManager"));
23
- const Helper_1 = __importDefault(require("../helper/Helper"));
24
- const Environment_1 = __importDefault(require("../engines/Environment"));
25
- const ProducerExecutor_1 = __importDefault(require("./ProducerExecutor"));
26
- const Constants_1 = __importDefault(require("../Constants"));
27
- const DriverHelper_1 = __importDefault(require("../drivers/DriverHelper"));
28
- const ConsumerExecutor_1 = __importDefault(require("./ConsumerExecutor"));
29
- const OutputExecutor_1 = __importDefault(require("./OutputExecutor"));
30
- const ConsumerManager_1 = __importDefault(require("../engines/consumer/ConsumerManager"));
31
- const ExecutorPerformance_1 = __importDefault(require("./ExecutorPerformance"));
32
- const ExecutorProgress_1 = __importDefault(require("./ExecutorProgress"));
33
- const Algo_1 = __importDefault(require("../core/Algo"));
34
- const ConsumerOnFinishManager_1 = __importDefault(require("../engines/consumer/ConsumerOnFinishManager"));
35
- const ExecutorScope_1 = __importDefault(require("./ExecutorScope"));
36
- const ProcessENVManager_1 = __importDefault(require("../engines/ProcessENVManager"));
37
- class ExecutorOrchestratorClass {
38
- constructor() {
39
- this.init = () => {
40
- if (!this._executorPool) {
41
- const options = {
42
- workerThreadOpts: {
43
- resourceLimits: {
44
- maxOldGenerationSizeMb: Constants_1.default.defaults.MIN_RUNTIME_HEAP_MB
45
- }
46
- }
47
- };
48
- const workerPath = this._getWorkerPath();
49
- this._executorPool = workerpool_1.default.pool(path_1.default.join(workerPath, 'ExecutorWorker.js'), options);
50
- }
51
- };
52
- this.launch = (request) => __awaiter(this, void 0, void 0, function* () {
53
- var _a, _b;
54
- (0, Affirm_1.default)(request, 'Invalid options');
55
- const { consumer, details, logProgress, options } = request;
56
- (0, Affirm_1.default)(consumer, 'Invalid consumer');
57
- (0, Affirm_1.default)(details, 'Invalid execution details');
58
- const tracker = new ExecutorPerformance_1.default();
59
- const _progress = new ExecutorProgress_1.default(logProgress);
60
- const { usageId } = UsageManager_1.default.startUsage(consumer, details);
61
- const scope = { id: usageId, folder: `${consumer.name}_${usageId}`, workersId: [] };
62
- try {
63
- const start = performance.now();
64
- this.init();
65
- const executorResults = [];
66
- let counter = performance.now();
67
- const sourceFilesByProducer = yield this.readySourceFiles(consumer, scope);
68
- tracker.measure('ready-producers', performance.now() - counter);
69
- let globalWorkerIndex = 0;
70
- for (const pair of sourceFilesByProducer) {
71
- const { prod, cProd, response } = pair;
72
- // Make sure that the data files are there, if missing and isOptional = true, then skip
73
- if (!fs_1.default.existsSync(response.files[0].fullUri)) {
74
- if (!cProd.isOptional)
75
- throw new Error(`Expected data file ${response.files[0].fullUri} of producer ${prod.name} in consumer ${consumer.name} is missing.`);
76
- else if (cProd.isOptional === true)
77
- continue;
78
- }
79
- console.log('Starting operations on ', response.files[0].fullUri);
80
- // Extract the dimensions for this producer just once
81
- const firstLine = (yield DriverHelper_1.default.quickReadFile(response.files[0].fullUri, 1))[0];
82
- const header = ProducerExecutor_1.default.processHeader(firstLine, prod);
83
- const prodDimensions = ProducerExecutor_1.default.reconcileHeader(header, prod);
84
- const totalFiles = response.files.length;
85
- for (const [fileIndex, file] of response.files.entries()) {
86
- const chunks = ExecutorOrchestrator.scopeWork(file.fullUri);
87
- const workerThreads = [];
88
- for (const chunk of chunks) {
89
- // Spawn off thread
90
- const workerId = `${usageId}_${globalWorkerIndex}`;
91
- const currentWorkerIndex = globalWorkerIndex;
92
- globalWorkerIndex++;
93
- const workerData = {
94
- producer: prod,
95
- chunk,
96
- consumer,
97
- prodDimensions,
98
- workerId,
99
- scope,
100
- options
101
- };
102
- _progress.register((currentWorkerIndex + 1).toString(), prod.name, fileIndex, totalFiles);
103
- scope.workersId.push(workerId);
104
- workerThreads.push(this._executorPool.exec('executor', [workerData], {
105
- on: payload => this.onWorkAdvanced(payload, currentWorkerIndex, _progress)
106
- }));
107
- }
108
- executorResults.push(...yield Promise.all(workerThreads));
109
- // WARNING: will this not create problems when multiple are executed together at the same time since this is a singleton?!?
110
- yield this._executorPool.terminate();
111
- }
112
- }
113
- _progress.complete();
114
- if (executorResults.some(x => !Algo_1.default.hasVal(x)))
115
- throw new Error(`${executorResults.filter(x => !Algo_1.default.hasVal(x)).length} worker(s) failed to produce valid results`);
116
- yield this.reconcileExecutorThreadsResults(scope, executorResults, tracker);
117
- // If there is more than one worker, then I need to redo the operations that are done on multiple lines (cause now the worker files have been merged together)
118
- const postOperation = { totalOutputCount: null };
119
- if (executorResults.length > 1) {
120
- if (((_a = consumer.options) === null || _a === void 0 ? void 0 : _a.distinct) === true) {
121
- counter = performance.now();
122
- const unifiedOutputCount = yield ConsumerExecutor_1.default.processDistinct(ExecutorScope_1.default.getMainPath(scope));
123
- tracker.measure('process-distinct:main', performance.now() - counter);
124
- postOperation.totalOutputCount = unifiedOutputCount;
125
- }
126
- if ((_b = consumer.options) === null || _b === void 0 ? void 0 : _b.distinctOn) {
127
- counter = performance.now();
128
- const unifiedOutputCount = yield ConsumerExecutor_1.default.processDistinctOn(consumer, ExecutorScope_1.default.getMainPath(scope));
129
- tracker.measure('process-distinct-on:main', performance.now() - counter);
130
- postOperation.totalOutputCount = unifiedOutputCount;
131
- }
132
- }
133
- // Export to the destination
134
- counter = performance.now();
135
- const exportRes = yield OutputExecutor_1.default.exportResult(consumer, ConsumerManager_1.default.getExpandedFields(consumer), scope);
136
- tracker.measure('export-result', performance.now() - counter);
137
- // Perform on-success actions if any
138
- if (consumer.outputs.some(x => x.onSuccess)) {
139
- counter = performance.now();
140
- yield ConsumerOnFinishManager_1.default.onConsumerSuccess(consumer, usageId);
141
- tracker.measure('on-success-actions', performance.now() - counter);
142
- }
143
- yield this.performCleanupOperations(scope, tracker);
144
- const finalResult = this.computeFinalResult(tracker, executorResults, usageId, exportRes.key);
145
- finalResult.elapsedMS = performance.now() - start;
146
- if (Algo_1.default.hasVal(postOperation.totalOutputCount))
147
- finalResult.outputCount = postOperation.totalOutputCount;
148
- UsageManager_1.default.endUsage(usageId, finalResult.outputCount, finalResult);
149
- return finalResult;
150
- }
151
- catch (error) {
152
- yield ConsumerOnFinishManager_1.default.onConsumerError(consumer, usageId);
153
- yield this.performCleanupOperations(scope, tracker);
154
- UsageManager_1.default.failUsage(usageId, Helper_1.default.asError(error).message);
155
- throw error;
156
- }
157
- });
158
- /**
159
- * Calculates line-aligned chunk offsets for parallel file processing.
160
- * Each chunk boundary is adjusted to the next newline to avoid breaking lines.
161
- * Returns a single chunk for small files where parallelism overhead isn't worth it.
162
- */
163
- this.scopeWork = (fileUri, numChunks) => {
164
- const fileSize = fs_1.default.statSync(fileUri).size;
165
- if (fileSize === 0)
166
- return [];
167
- // Small files: single chunk, parallelism overhead not worth it
168
- if (fileSize < Constants_1.default.defaults.MIN_FILE_SIZE_FOR_PARALLEL) {
169
- return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri }];
170
- }
171
- // Calculate optimal chunk count based on file size and CPU cores (-1 cause it is used by the main thread)
172
- const cpus = numChunks !== null && numChunks !== void 0 ? numChunks : (os_1.default.cpus().length - 1);
173
- const maxChunksBySize = Math.floor(fileSize / Constants_1.default.defaults.MIN_CHUNK_SIZE);
174
- const effectiveChunks = Math.min(cpus, maxChunksBySize);
175
- if (effectiveChunks <= 1)
176
- return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri }];
177
- const targetChunkSize = Math.floor(fileSize / effectiveChunks);
178
- const fd = fs_1.default.openSync(fileUri, 'r');
179
- try {
180
- const offsets = [];
181
- let currentStart = 0;
182
- for (let i = 0; i < cpus - 1; i++) {
183
- const targetEnd = currentStart + targetChunkSize;
184
- // Don't overshoot file size
185
- if (targetEnd >= fileSize) {
186
- break;
187
- }
188
- // Find next newline after target boundary
189
- const alignedEnd = this.findNextNewline(fd, targetEnd, fileSize);
190
- offsets.push({ start: currentStart, end: alignedEnd, isFirstChunk: i === 0, fileUri });
191
- currentStart = alignedEnd;
192
- }
193
- // Final chunk goes to end of file
194
- if (currentStart < fileSize) {
195
- offsets.push({ start: currentStart, end: fileSize, isFirstChunk: offsets.length === 0, fileUri });
196
- }
197
- return offsets;
198
- }
199
- finally {
200
- fs_1.default.closeSync(fd);
201
- }
202
- };
203
- /**
204
- * Efficiently finds the next newline character starting from a position.
205
- * Uses small buffer reads for speed.
206
- */
207
- this.findNextNewline = (fd, position, fileSize) => {
208
- const BUFFER_SIZE = 8192; // 8KB buffer for scanning
209
- const buffer = Buffer.allocUnsafe(BUFFER_SIZE);
210
- let currentPos = position;
211
- while (currentPos < fileSize) {
212
- const bytesToRead = Math.min(BUFFER_SIZE, fileSize - currentPos);
213
- const bytesRead = fs_1.default.readSync(fd, buffer, 0, bytesToRead, currentPos);
214
- if (bytesRead === 0)
215
- break;
216
- // Scan buffer for newline
217
- for (let i = 0; i < bytesRead; i++) {
218
- if (buffer[i] === 0x0A) { // \n
219
- return currentPos + i + 1; // Position after the newline
220
- }
221
- }
222
- currentPos += bytesRead;
223
- }
224
- // No newline found, return file end
225
- return fileSize;
226
- };
227
- this.readySourceFiles = (consumer, scope) => __awaiter(this, void 0, void 0, function* () {
228
- const results = [];
229
- for (let i = 0; i < consumer.producers.length; i++) {
230
- const cProd = consumer.producers[i];
231
- const prod = Environment_1.default.getProducer(cProd.name);
232
- results.push({ prod, cProd, response: yield ProducerExecutor_1.default.ready(prod, scope) });
233
- }
234
- return results;
235
- });
236
- this._getWorkerPath = () => {
237
- // Get the current file's directory
238
- const currentDir = __dirname;
239
- if (ProcessENVManager_1.default.getEnvVariable('NODE_ENV') === 'dev' || ProcessENVManager_1.default.getEnvVariable('NODE_ENV') === 'development')
240
- return path_1.default.resolve('./.build/workers');
241
- const forcedPath = ProcessENVManager_1.default.getEnvVariable('REMORA_WORKERS_PATH');
242
- if (forcedPath && forcedPath.length > 0)
243
- return path_1.default.join(__dirname, forcedPath);
244
- // Check if we're in a published npm package (no .build in path)
245
- if (!currentDir.includes('.build')) {
246
- // We're in the published package, workers are relative to package root
247
- // __dirname is something like: /path/to/package/executors
248
- // Workers are at /path/to/package/workers (sibling folder)
249
- return path_1.default.join(__dirname, '../workers');
250
- }
251
- else {
252
- // We're in development, workers are in ./.build/workers
253
- return path_1.default.resolve('./.build/workers');
254
- }
255
- };
256
- this.reconcileExecutorThreadsResults = (scope, executorResults, tracker) => __awaiter(this, void 0, void 0, function* () {
257
- const mainPath = ExecutorScope_1.default.getMainPath(scope);
258
- ConsumerExecutor_1.default._ensurePath(mainPath);
259
- // Merge all the various files into a single one
260
- if (executorResults.length > 1) {
261
- const perf = performance.now();
262
- const output = fs_1.default.createWriteStream(mainPath);
263
- output.setMaxListeners(executorResults.length + 1);
264
- for (const workerResult of executorResults) {
265
- yield (0, promises_2.pipeline)(fs_1.default.createReadStream(workerResult.resultUri), output, { end: false });
266
- }
267
- output.end();
268
- output.close();
269
- tracker.measure('merge-workers', performance.now() - perf);
270
- }
271
- else if (executorResults.length === 1) {
272
- // If there is only one worker, then just rename the worker .dataset to the general consumer one
273
- yield promises_1.default.rename(executorResults[0].resultUri, mainPath);
274
- }
275
- });
276
- this.performCleanupOperations = (scope, tracker) => __awaiter(this, void 0, void 0, function* () {
277
- const start = performance.now();
278
- yield ExecutorScope_1.default.clearScope(scope);
279
- tracker.measure('cleanup-operations', performance.now() - start);
280
- });
281
- this.computeFinalResult = (tracker, executorResults, executionId, resultUri) => {
282
- const result = {
283
- cycles: Algo_1.default.max(executorResults.map(x => x.cycles)),
284
- elapsedMS: Algo_1.default.sum(executorResults.map(x => x.elapsedMS)),
285
- inputCount: Algo_1.default.sum(executorResults.map(x => x.inputCount)),
286
- outputCount: Algo_1.default.sum(executorResults.map(x => x.outputCount)),
287
- workerCount: executorResults.length,
288
- executionId,
289
- resultUri,
290
- operations: {}
291
- };
292
- for (const res of executorResults) {
293
- for (const opKey of Object.keys(res.operations)) {
294
- const op = res.operations[opKey];
295
- let label = result.operations[opKey];
296
- if (!label) {
297
- result.operations[opKey] = { avg: -1, max: -1, min: -1, elapsedMS: [] };
298
- label = result.operations[opKey];
299
- }
300
- label.elapsedMS.push(op.elapsedMS);
301
- }
302
- // Calculate min, max, avg for all operations after collecting all data
303
- for (const opKey of Object.keys(result.operations)) {
304
- const operation = result.operations[opKey];
305
- if (operation.elapsedMS.length > 0) {
306
- operation.min = Math.min(...operation.elapsedMS);
307
- operation.max = Math.max(...operation.elapsedMS);
308
- operation.avg = Algo_1.default.mean(operation.elapsedMS);
309
- }
310
- }
311
- }
312
- // Add tracker operations to result
313
- const trackerOperations = tracker.getOperations();
314
- for (const opKey of Object.keys(trackerOperations)) {
315
- const trackerOp = trackerOperations[opKey];
316
- const value = trackerOp.elapsedMS;
317
- if (!result.operations[opKey]) {
318
- result.operations[opKey] = { avg: value, max: value, min: value, elapsedMS: [] };
319
- }
320
- result.operations[opKey].elapsedMS.push(value);
321
- }
322
- return result;
323
- };
324
- this.onWorkAdvanced = (packet, index, progress) => {
325
- const { processed, total } = packet;
326
- progress.update((index + 1).toString(), processed / total);
327
- };
328
- }
329
- }
330
- const ExecutorOrchestrator = new ExecutorOrchestratorClass();
331
- exports.default = ExecutorOrchestrator;
@@ -1,17 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- class ExecutorPerformance {
4
- constructor() {
5
- this.measure = (name, elapsedMS) => {
6
- let tracker = this._operations[name];
7
- if (!tracker) {
8
- this._operations[name] = { elapsedMS: 0 };
9
- tracker = this._operations[name];
10
- }
11
- tracker.elapsedMS += elapsedMS;
12
- };
13
- this.getOperations = () => this._operations;
14
- this._operations = {};
15
- }
16
- }
17
- exports.default = ExecutorPerformance;
@@ -1,54 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- class ExecutorProgress {
4
- constructor(isEnabled) {
5
- this._isEnabled = false;
6
- this._FPS = 2;
7
- this._lastRenderTime = 0;
8
- this._lastRenderedLines = -1;
9
- this.register = (name, producerName, fileIndex, totalFiles) => {
10
- this.workers[name] = { progress: 0, producerName, fileIndex, totalFiles };
11
- };
12
- this.update = (name, value) => {
13
- if (this.workers[name])
14
- this.workers[name].progress = value;
15
- const now = Date.now();
16
- const interval = 1000 / this._FPS;
17
- if (now - this._lastRenderTime >= interval) {
18
- this._lastRenderTime = now;
19
- this.render();
20
- }
21
- };
22
- this.complete = () => {
23
- for (const key of Object.keys(this.workers)) {
24
- this.workers[key].progress = 1;
25
- }
26
- this.render();
27
- };
28
- this.render = () => {
29
- if (!this._isEnabled)
30
- return;
31
- if (this._lastRenderedLines > 0) {
32
- for (let i = 0; i < this._lastRenderedLines; i++) {
33
- process.stdout.moveCursor(0, -1);
34
- process.stdout.clearLine(1);
35
- }
36
- }
37
- this._lastRenderedLines = 0;
38
- for (const key of Object.keys(this.workers)) {
39
- const worker = this.workers[key];
40
- const percentage = Math.min(100, Math.max(0, worker.progress * 100));
41
- const barWidth = 30;
42
- const filledWidth = Math.floor((percentage / 100) * barWidth);
43
- const emptyWidth = barWidth - filledWidth;
44
- const bar = '#'.repeat(filledWidth) + '-'.repeat(emptyWidth);
45
- const fileInfo = worker.totalFiles > 1 ? ` [${worker.fileIndex + 1}/${worker.totalFiles}]` : '';
46
- console.log(`Worker ${key.padStart(2, '0')}: [${bar}] ${percentage.toFixed(2)}% (${worker.producerName}${fileInfo})`);
47
- this._lastRenderedLines++;
48
- }
49
- };
50
- this._isEnabled = isEnabled;
51
- this.workers = {};
52
- }
53
- }
54
- exports.default = ExecutorProgress;
@@ -1,52 +0,0 @@
1
- "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
- var __importDefault = (this && this.__importDefault) || function (mod) {
12
- return (mod && mod.__esModule) ? mod : { "default": mod };
13
- };
14
- Object.defineProperty(exports, "__esModule", { value: true });
15
- const path_1 = __importDefault(require("path"));
16
- const fs_1 = __importDefault(require("fs"));
17
- const promises_1 = __importDefault(require("fs/promises"));
18
- const Constants_1 = __importDefault(require("../Constants"));
19
- class ExecutorScopeClass {
20
- constructor() {
21
- this.WORKERS_FOLDER = 'workers';
22
- this.PRODUCERS_FOLDER = 'producers';
23
- this.getWorkerPath = (scope, workerId) => {
24
- return path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER,
25
- // A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
26
- scope.folder, this.WORKERS_FOLDER, `${workerId}.dataset`);
27
- };
28
- this.getProducerPath = (scope, producer, sourceFileKey) => {
29
- return path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER,
30
- // A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
31
- scope.folder, this.PRODUCERS_FOLDER, producer.name, `${sourceFileKey}.dataset`);
32
- };
33
- this.getMainPath = (scope) => {
34
- return path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER, scope.folder, 'main.dataset');
35
- };
36
- this.clearScope = (scope) => __awaiter(this, void 0, void 0, function* () {
37
- const scopePath = path_1.default.join(Constants_1.default.defaults.REMORA_PATH, Constants_1.default.defaults.PRODUCER_TEMP_FOLDER, scope.folder);
38
- if (fs_1.default.existsSync(scopePath)) {
39
- yield promises_1.default.rm(scopePath, { recursive: true, force: true });
40
- }
41
- });
42
- this.ensurePath = (fileUri) => {
43
- const dir = path_1.default.dirname(fileUri);
44
- if (!fs_1.default.existsSync(dir))
45
- fs_1.default.mkdirSync(dir, { recursive: true });
46
- if (!fs_1.default.existsSync(fileUri))
47
- fs_1.default.writeFileSync(fileUri, '');
48
- };
49
- }
50
- }
51
- const ExecutorScope = new ExecutorScopeClass();
52
- exports.default = ExecutorScope;
@@ -1,118 +0,0 @@
1
- "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
- var __importDefault = (this && this.__importDefault) || function (mod) {
12
- return (mod && mod.__esModule) ? mod : { "default": mod };
13
- };
14
- Object.defineProperty(exports, "__esModule", { value: true });
15
- const Algo_1 = __importDefault(require("../core/Algo"));
16
- const DSTE_1 = __importDefault(require("../core/dste/DSTE"));
17
- const DriverFactory_1 = __importDefault(require("../drivers/DriverFactory"));
18
- const Environment_1 = __importDefault(require("../engines/Environment"));
19
- const CSVParser_1 = __importDefault(require("../engines/parsing/CSVParser"));
20
- const ExecutorScope_1 = __importDefault(require("./ExecutorScope"));
21
- class OutputExecutorClass {
22
- constructor() {
23
- this._getInternalRecordFormat = (consumer) => {
24
- const output = consumer.outputs[0];
25
- const format = output.format === 'API'
26
- ? 'JSON'
27
- : output.format === 'PARQUET'
28
- ? 'CSV'
29
- : output.format;
30
- return format;
31
- };
32
- this.outputRecord = (record, consumer, fields) => {
33
- const format = this._getInternalRecordFormat(consumer);
34
- switch (format) {
35
- case 'CSV':
36
- return this.toCSV(record, fields, ',');
37
- case 'JSON':
38
- return this.toJSON(record, fields);
39
- default:
40
- throw new Error(`Export format ${format} not implemented yet.`);
41
- }
42
- };
43
- this.toCSV = (record, fields, delimiter) => {
44
- const myDelimtier = delimiter !== null && delimiter !== void 0 ? delimiter : ',';
45
- // remove the not wanted dimension
46
- const line = fields
47
- .filter(x => !x.cField.hidden)
48
- .map(x => { var _a, _b; return `"${Algo_1.default.replaceAll((_b = (_a = record[x.finalKey]) === null || _a === void 0 ? void 0 : _a.toString()) !== null && _b !== void 0 ? _b : '', '"', '""')}"`; })
49
- .join(myDelimtier);
50
- return line;
51
- };
52
- this.toJSON = (record, fields) => {
53
- if (fields.some(x => x.cField.hidden)) {
54
- // remove the un-wanted dimensions
55
- for (const dim of fields) {
56
- if (dim.cField.hidden)
57
- delete record[dim.finalKey];
58
- }
59
- return JSON.stringify(record);
60
- }
61
- else {
62
- return JSON.stringify(record);
63
- }
64
- };
65
- this.exportResult = (consumer, fields, scope) => __awaiter(this, void 0, void 0, function* () {
66
- const internalFormat = this._getInternalRecordFormat(consumer);
67
- for (const output of consumer.outputs) {
68
- const destination = Environment_1.default.getSource(output.exportDestination);
69
- const driver = yield DriverFactory_1.default.instantiateDestination(destination);
70
- const currentPath = ExecutorScope_1.default.getMainPath(scope);
71
- const destinationName = this._composeFileName(consumer, output, this._getExtension(output));
72
- if (output.format === internalFormat) {
73
- return yield driver.move(currentPath, destinationName);
74
- }
75
- else {
76
- switch (output.format) {
77
- case 'CSV':
78
- return yield driver.transformAndMove(currentPath, line => {
79
- const parsed = JSON.parse(line);
80
- return Object.keys(parsed).map(x => `"${parsed[x]}"`).join(',');
81
- }, destinationName);
82
- case 'API':
83
- case 'JSON':
84
- return yield driver.transformAndMove(currentPath, line => {
85
- const parts = CSVParser_1.default.parseRow(line, ',');
86
- const value = {};
87
- for (const [index, field] of fields.entries())
88
- value[field.finalKey] = parts[index];
89
- return JSON.stringify(value);
90
- }, destinationName);
91
- case 'PARQUET':
92
- default:
93
- throw new Error(`Export result to format ${output.format} not implemented yet.`);
94
- }
95
- }
96
- }
97
- });
98
- this._getExtension = (output) => {
99
- return output.format === 'CSV'
100
- ? 'csv'
101
- : output.format === 'JSON'
102
- ? 'jsonl'
103
- : 'txt';
104
- };
105
- this._composeFileName = (consumer, output, extension, executionId) => {
106
- if (output.exportName && output.exportName.trim().length > 0) {
107
- // Ensure no extension duplication
108
- const sanitized = output.exportName.replace(/\.[^.]+$/, '');
109
- return `${sanitized}.${extension}`;
110
- }
111
- const baseTs = Algo_1.default.replaceAll(DSTE_1.default.now().toISOString().split('.')[0], ':', '-');
112
- const suffix = executionId ? `_${executionId}` : '';
113
- return `${consumer.name}_${baseTs}${suffix}.${extension}`;
114
- };
115
- }
116
- }
117
- const OutputExecutor = new OutputExecutorClass();
118
- exports.default = OutputExecutor;