@forzalabs/remora 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,184 +0,0 @@
1
- "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
- var __importDefault = (this && this.__importDefault) || function (mod) {
12
- return (mod && mod.__esModule) ? mod : { "default": mod };
13
- };
14
- Object.defineProperty(exports, "__esModule", { value: true });
15
- const Constants_1 = __importDefault(require("../../Constants"));
16
- const Affirm_1 = __importDefault(require("../../core/Affirm"));
17
- const Environment_1 = __importDefault(require("../Environment"));
18
- const workerpool_1 = __importDefault(require("workerpool"));
19
- const DatasetManager_1 = __importDefault(require("./DatasetManager"));
20
- const path_1 = __importDefault(require("path"));
21
- class ParallelDatasetClass {
22
- constructor() {
23
- this.init = () => {
24
- /**
25
- * I need the init to be called after all the setup has been completed because I need the .env to be loaded
26
- */
27
- if (!this._filterPool || !this._projectionPool || !this._transformPool) {
28
- const options = {
29
- workerThreadOpts: {
30
- resourceLimits: {
31
- maxOldGenerationSizeMb: Constants_1.default.defaults.MIN_RUNTIME_HEAP_MB
32
- }
33
- }
34
- };
35
- const workerPath = this._getWorkerPath();
36
- this._filterPool = workerpool_1.default.pool(path_1.default.join(workerPath, 'FilterWorker.js'), options);
37
- this._projectionPool = workerpool_1.default.pool(path_1.default.join(workerPath, 'ProjectionWorker.js'), options);
38
- this._transformPool = workerpool_1.default.pool(path_1.default.join(workerPath, 'TransformWorker.js'), options);
39
- }
40
- };
41
- this._getWorkerPath = () => {
42
- // Get the current file's directory
43
- const currentDir = __dirname;
44
- if (process.env.NODE_ENV === 'dev' || process.env.NODE_ENV === 'development')
45
- return path_1.default.resolve('./.build/workers');
46
- const forcedPath = process.env.REMORA_WORKERS_PATH;
47
- if (forcedPath && forcedPath.length > 0)
48
- return path_1.default.join(__dirname, forcedPath);
49
- // Check if we're in a published npm package (no .build in path)
50
- if (!currentDir.includes('.build')) {
51
- // We're in the published package, workers are relative to package root
52
- // __dirname is something like: /path/to/package/engines/dataset
53
- // We need to go up to package root and then to workers
54
- return path_1.default.join(__dirname, '../../workers');
55
- }
56
- else {
57
- // We're in development, workers are in ./.build/workers
58
- return path_1.default.resolve('./.build/workers');
59
- }
60
- };
61
- this._scopeWork = (dataset) => {
62
- var _a;
63
- const datasetCount = dataset.getCount();
64
- const batchSize = (_a = parseInt(Environment_1.default.get('MAX_ITEMS_IN_MEMORY'))) !== null && _a !== void 0 ? _a : Constants_1.default.defaults.MAX_ITEMS_IN_MEMORY;
65
- const workerChunkSize = batchSize * Math.round(Constants_1.default.defaults.INDICATIVE_THREAD_LINE_COUNT / batchSize);
66
- const workerCount = Math.max(Math.min(Math.ceil(datasetCount / workerChunkSize), Constants_1.default.defaults.MAX_THREAD_COUNT), 1);
67
- const adjustedWorkerCount = Math.ceil(datasetCount / workerCount);
68
- return { workerCount, adjustedWorkerCount };
69
- };
70
- this.filter = (dataset, filters) => __awaiter(this, void 0, void 0, function* () {
71
- (0, Affirm_1.default)(dataset, `Invalid dataset`);
72
- (0, Affirm_1.default)(filters, `Invalid filters`);
73
- this.init();
74
- // Distribute the work of the filter among the various workers, trying to have them match the batch size
75
- const { adjustedWorkerCount, workerCount } = this._scopeWork(dataset);
76
- dataset._startOperation('filter-parallel', { workerCount });
77
- const threads = [];
78
- for (let i = 0; i < workerCount; i++) {
79
- const workerId = `worker_filter_${i}`;
80
- const fromLine = adjustedWorkerCount * i;
81
- const toLine = (i === workerCount - 1)
82
- ? Infinity
83
- : (adjustedWorkerCount * i) + adjustedWorkerCount;
84
- const workerData = {
85
- datasetDimensions: dataset.getDimensions(),
86
- datasetFile: dataset.getFile(),
87
- executionId: dataset.getExecutionId(),
88
- datasetName: dataset.name,
89
- datasetDelimiter: dataset.getDelimiter(),
90
- fromLine: fromLine,
91
- toLine: toLine,
92
- workerId: workerId,
93
- filterData: {
94
- rules: filters
95
- }
96
- };
97
- threads.push(this._filterPool.exec('filter', [workerData]));
98
- }
99
- const results = yield Promise.all(threads);
100
- yield this._filterPool.terminate();
101
- (0, Affirm_1.default)(results.every(x => x.success), `Error in processing the dataset on multiple threads: filter ${dataset.name}`);
102
- yield DatasetManager_1.default.mergeWorkersPaths(results.map(x => x.datasetPath), dataset);
103
- dataset
104
- .setDelimiter(results[0].datasetDelimiter)
105
- .setDimensions(results[0].datasetDimensions);
106
- dataset._finishOperation('filter-parallel');
107
- return dataset;
108
- });
109
- this.projection = (dataset, consumer) => __awaiter(this, void 0, void 0, function* () {
110
- (0, Affirm_1.default)(dataset, `Invalid dataset`);
111
- (0, Affirm_1.default)(consumer, `Invalid consumer`);
112
- this.init();
113
- const { adjustedWorkerCount, workerCount } = this._scopeWork(dataset);
114
- dataset._startOperation('projection-parallel', { workerCount });
115
- const threads = [];
116
- for (let i = 0; i < workerCount; i++) {
117
- const workerId = `worker_projection_${i}`;
118
- const fromLine = adjustedWorkerCount * i;
119
- const toLine = (i === workerCount - 1)
120
- ? Infinity
121
- : (adjustedWorkerCount * i) + adjustedWorkerCount;
122
- const workerData = {
123
- datasetDimensions: dataset.getDimensions(),
124
- datasetFile: dataset.getFile(),
125
- executionId: dataset.getExecutionId(),
126
- datasetName: dataset.name,
127
- datasetDelimiter: dataset.getDelimiter(),
128
- fromLine: fromLine,
129
- toLine: toLine,
130
- workerId: workerId,
131
- projectionData: { consumerName: consumer.name }
132
- };
133
- threads.push(this._projectionPool.exec('projection', [workerData]));
134
- }
135
- const results = yield Promise.all(threads);
136
- yield this._projectionPool.terminate();
137
- (0, Affirm_1.default)(results.every(x => x.success), `Error in processing the dataset on multiple threads: projection ${dataset.name}`);
138
- yield DatasetManager_1.default.mergeWorkersPaths(results.map(x => x.datasetPath), dataset);
139
- dataset
140
- .setDelimiter(results[0].datasetDelimiter)
141
- .setDimensions(results[0].datasetDimensions);
142
- dataset._finishOperation('projection-parallel');
143
- return dataset;
144
- });
145
- this.transform = (dataset, consumer) => __awaiter(this, void 0, void 0, function* () {
146
- (0, Affirm_1.default)(dataset, `Invalid dataset`);
147
- (0, Affirm_1.default)(consumer, `Invalid consumer`);
148
- this.init();
149
- const { adjustedWorkerCount, workerCount } = this._scopeWork(dataset);
150
- dataset._startOperation('transform-parallel', { workerCount });
151
- const threads = [];
152
- for (let i = 0; i < workerCount; i++) {
153
- const workerId = `worker_transform_${i}`;
154
- const fromLine = adjustedWorkerCount * i;
155
- const toLine = (i === workerCount - 1)
156
- ? Infinity
157
- : (adjustedWorkerCount * i) + adjustedWorkerCount;
158
- const workerData = {
159
- datasetDimensions: dataset.getDimensions(),
160
- datasetFile: dataset.getFile(),
161
- executionId: dataset.getExecutionId(),
162
- datasetName: dataset.name,
163
- datasetDelimiter: dataset.getDelimiter(),
164
- fromLine: fromLine,
165
- toLine: toLine,
166
- workerId: workerId,
167
- transformData: { consumerName: consumer.name }
168
- };
169
- threads.push(this._transformPool.exec('transform', [workerData]));
170
- }
171
- const results = yield Promise.all(threads);
172
- yield this._transformPool.terminate();
173
- (0, Affirm_1.default)(results.every(x => x.success), `Error in processing the dataset on multiple threads: projection ${dataset.name}`);
174
- yield DatasetManager_1.default.mergeWorkersPaths(results.map(x => x.datasetPath), dataset);
175
- dataset
176
- .setDelimiter(results[0].datasetDelimiter)
177
- .setDimensions(results[0].datasetDimensions);
178
- dataset._finishOperation('transform-parallel');
179
- return dataset;
180
- });
181
- }
182
- }
183
- const ParallelDataset = new ParallelDatasetClass();
184
- exports.default = ParallelDataset;
@@ -1,2 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
@@ -1,2 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
@@ -1,2 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
@@ -1,2 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
@@ -1,39 +0,0 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- const ConsumerManager_1 = __importDefault(require("../consumer/ConsumerManager"));
7
- const Environment_1 = __importDefault(require("../Environment"));
8
- class DeploymentPlannerClass {
9
- constructor() {
10
- this.planConsumer = (consumer) => {
11
- const [source, producer] = ConsumerManager_1.default.getSource(consumer);
12
- const plan = [];
13
- for (let i = 0; i < consumer.outputs.length; i++) {
14
- const output = consumer.outputs[i];
15
- switch (output.format) {
16
- // csv, json, parquet outputs do not need to generate anything at deploy
17
- case 'API': {
18
- throw new Error(`Invalid consumer "${consumer.name}" format "${output.format}": not implemented yet.`);
19
- }
20
- }
21
- }
22
- return plan;
23
- };
24
- this.planProducer = (producer) => {
25
- const source = Environment_1.default.getSource(producer.source);
26
- const plan = [];
27
- switch (source.engine) {
28
- case 'aws-redshift': {
29
- if (!producer.settings.direct)
30
- plan.push({ type: 'create-view', producer: producer });
31
- break;
32
- }
33
- }
34
- return plan;
35
- };
36
- }
37
- }
38
- const DeploymentPlanner = new DeploymentPlannerClass();
39
- exports.default = DeploymentPlanner;
@@ -1,209 +0,0 @@
1
- "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
- var __importDefault = (this && this.__importDefault) || function (mod) {
12
- return (mod && mod.__esModule) ? mod : { "default": mod };
13
- };
14
- Object.defineProperty(exports, "__esModule", { value: true });
15
- const Affirm_1 = __importDefault(require("../../core/Affirm"));
16
- const DriverFactory_1 = __importDefault(require("../../drivers/DriverFactory"));
17
- const ConsumerEngine_1 = __importDefault(require("../consumer/ConsumerEngine"));
18
- const PostProcessor_1 = __importDefault(require("../consumer/PostProcessor"));
19
- const FileExporter_1 = __importDefault(require("../file/FileExporter"));
20
- const SQLBuilder_1 = __importDefault(require("../sql/SQLBuilder"));
21
- const SQLCompiler_1 = __importDefault(require("../sql/SQLCompiler"));
22
- const ExecutionPlanner_1 = __importDefault(require("./ExecutionPlanner"));
23
- const RequestExecutor_1 = __importDefault(require("./RequestExecutor"));
24
- const JoinEngine_1 = __importDefault(require("../transform/JoinEngine"));
25
- const DatasetManager_1 = __importDefault(require("../dataset/DatasetManager"));
26
- const Environment_1 = __importDefault(require("../Environment"));
27
- const Algo_1 = __importDefault(require("../../core/Algo"));
28
- const Logger_1 = __importDefault(require("../../helper/Logger"));
29
- const ParallelDataset_1 = __importDefault(require("../dataset/ParallelDataset"));
30
- const ConsumerOnFinishManager_1 = __importDefault(require("../consumer/ConsumerOnFinishManager"));
31
- class ExecutionEnvironment {
32
- constructor(consumer, executionId) {
33
- this.run = (options) => __awaiter(this, void 0, void 0, function* () {
34
- var _a, _b, _c, _d;
35
- (0, Affirm_1.default)(this._consumer, 'Invalid consumer');
36
- const plan = ExecutionPlanner_1.default.plan(this._consumer, options);
37
- (0, Affirm_1.default)(plan, `Invalid execution plan`);
38
- (0, Affirm_1.default)(plan.length > 0, `Empty execution plan`);
39
- Logger_1.default.log(`Starting execution of consumer with plan:\n${plan.map(x => `\t- ${x.type}`).join('\n')}`);
40
- const start = performance.now();
41
- const result = {
42
- shape: ConsumerEngine_1.default.getOutputShape(this._consumer),
43
- _stats: { cycles: -1, elapsedMS: -1, operations: [], size: -1 }
44
- };
45
- let currentStep = null;
46
- try {
47
- for (const planStep of plan) {
48
- currentStep = planStep;
49
- Logger_1.default.log(`Started step: ${planStep.type} "${(_b = (_a = planStep.producer) === null || _a === void 0 ? void 0 : _a.name) !== null && _b !== void 0 ? _b : this._consumer.name}"`);
50
- switch (planStep.type) {
51
- case 'compile-consumer-to-SQL': {
52
- const sql = SQLCompiler_1.default.getConsumerReference(this._consumer);
53
- this._envData.consumerSQL = sql;
54
- this._envData.finalSQL = sql;
55
- break;
56
- }
57
- case 'compile-execution-request-to-SQL': {
58
- const sql = SQLBuilder_1.default.buildConsumerQuery(options);
59
- this._envData.executionRequestSQL = sql;
60
- this._envData.finalSQL = `WITH consumer AS (${this._envData.consumerSQL})\nSELECT * FROM consumer${this._envData.executionRequestSQL}`;
61
- break;
62
- }
63
- case 'execute-SQL': {
64
- (0, Affirm_1.default)(planStep.source, `Invalid source in execute-SQL step`);
65
- (0, Affirm_1.default)(planStep.producer, `Invalid producer in execute-SQL step`);
66
- const driver = yield DriverFactory_1.default.instantiateSource(planStep.source);
67
- const queryData = (yield driver.query(this._envData.finalSQL)).rows;
68
- let dataset = DatasetManager_1.default.create(planStep.producer, { cProducer: planStep.cProducer, executionId: this._executionId });
69
- dataset = yield dataset.loadFromMemory(queryData, planStep.producer);
70
- this._storeIntermidiate(planStep, dataset);
71
- break;
72
- }
73
- case 'load-dataset': {
74
- (0, Affirm_1.default)(planStep.producer, `Invalid producer in read-file-lines step`);
75
- const { producer, cProducer } = planStep;
76
- const source = Environment_1.default.getSource(producer.source);
77
- (0, Affirm_1.default)(source, `Source "${producer.source}" of producer "${producer.name}" not found.`);
78
- let dataset = DatasetManager_1.default.create(producer, { cProducer, executionId: this._executionId });
79
- dataset = yield dataset.load(source);
80
- this._storeIntermidiate(planStep, dataset);
81
- break;
82
- }
83
- case 'prepare-dataset': {
84
- const rawDataset = this._getIntermidiate(planStep);
85
- (0, Affirm_1.default)(rawDataset, `Invalid dataset in prepare-dataset step`);
86
- const { producer } = planStep;
87
- const dataset = yield rawDataset.prepare(producer);
88
- this._storeIntermidiate(planStep, dataset);
89
- break;
90
- }
91
- case 'nested-field-unpacking': {
92
- (0, Affirm_1.default)(planStep.producer, `Invalid producer in nested-field-unpacking step`);
93
- const unpackedData = yield PostProcessor_1.default.unpack(this._resultingDataset, planStep.producer);
94
- this._storeIntermidiate(planStep, unpackedData);
95
- break;
96
- }
97
- case 'post-process-json': {
98
- const dataset = this._getIntermidiate(planStep);
99
- const newDataset = yield ParallelDataset_1.default.projection(dataset, this._consumer);
100
- this._storeIntermidiate(planStep, newDataset);
101
- break;
102
- }
103
- case 'export-file': {
104
- (0, Affirm_1.default)(planStep.output, `Invalid output in export-file step`);
105
- (0, Affirm_1.default)(this._resultingDataset, 'Invalid resulting dataset in export-file step');
106
- const res = yield FileExporter_1.default.export(this._consumer, planStep.output, this._resultingDataset, this._executionId);
107
- result.fileUri = res;
108
- break;
109
- }
110
- case 'apply-execution-request-to-result': {
111
- this._resultingDataset = yield RequestExecutor_1.default.execute(this._resultingDataset, options);
112
- break;
113
- }
114
- case 'apply-consumer-filters-on-JSON': {
115
- this._resultingDataset = yield ParallelDataset_1.default.filter(this._resultingDataset, this._consumer.filters);
116
- break;
117
- }
118
- case 'apply-transformations': {
119
- this._resultingDataset = yield ParallelDataset_1.default.transform(this._resultingDataset, this._consumer);
120
- break;
121
- }
122
- case 'join-producers-data': {
123
- const dataset = yield JoinEngine_1.default.join(this._consumer, this._producedData);
124
- this._resultingDataset = dataset;
125
- break;
126
- }
127
- case 'apply-consumer-distinct': {
128
- this._resultingDataset = yield PostProcessor_1.default.distinct(this._resultingDataset);
129
- break;
130
- }
131
- case 'perform-on-success-actions': {
132
- yield ConsumerOnFinishManager_1.default.performOnSuccessActions(this._consumer, planStep.output);
133
- break;
134
- }
135
- case 'save-execution-stats': {
136
- (0, Affirm_1.default)(this._resultingDataset, `Invalid result dataset in save-execution-stats`);
137
- result._stats = {
138
- cycles: this._resultingDataset.getCycles(),
139
- elapsedMS: performance.now() - start,
140
- operations: structuredClone(this._resultingDataset.getOperations()),
141
- size: this._resultingDataset.getCount()
142
- };
143
- break;
144
- }
145
- case 'clean-datasets': {
146
- const datasets = [
147
- ...this._producedData.map(x => x.dataset),
148
- this._resultingDataset
149
- ].filter(Algo_1.default.hasVal);
150
- const promises = datasets.map(x => x.destroy());
151
- yield Promise.all(promises);
152
- break;
153
- }
154
- default: throw new Error(`Invalid execution plan step type "${planStep.type}"`);
155
- }
156
- Logger_1.default.log(`\tCompleted step: ${planStep.type}`);
157
- }
158
- }
159
- catch (error) {
160
- const ds = (_c = this._resultingDataset) !== null && _c !== void 0 ? _c : (_d = this._producedData.at(-1)) === null || _d === void 0 ? void 0 : _d.dataset;
161
- if (ds)
162
- Logger_1.default.log(`Failed execution of consumer at step ${currentStep.type}:\n\tSize: ${ds.getCount()}\n\tCycles: ${ds.getCycles()}\n\tOperations: ${Logger_1.default.formatList(ds.getOperations())}`);
163
- Logger_1.default.log(`\tFailed step: ${currentStep.type}->\n\t${error}`);
164
- try {
165
- yield ConsumerOnFinishManager_1.default.performOnErrorActions(this._consumer, currentStep.output);
166
- }
167
- catch (error) {
168
- Logger_1.default.log(`Error when trying to perform onError actions on failed consumer ${error}`);
169
- }
170
- // IMPORTANT: cleanup all the datasets to not leave any data around and to avoid memory leaks
171
- const datasets = [
172
- ...this._producedData.map(x => x.dataset),
173
- this._resultingDataset
174
- ].filter(Algo_1.default.hasVal);
175
- const promises = datasets.map(x => x.destroy());
176
- yield Promise.all(promises);
177
- throw error;
178
- }
179
- Logger_1.default.log(`Completed execution of consumer:\n\tSize: ${result._stats.size}\n\tCycles: ${result._stats.cycles}\n\tTime: ${result._stats.elapsedMS}\n\tOperations: ${Logger_1.default.formatList(result._stats.operations)}`);
180
- return result;
181
- });
182
- this._storeIntermidiate = (step, dataset) => {
183
- var _a, _b;
184
- (0, Affirm_1.default)(step, 'Invalid step');
185
- const key = (_b = (_a = step.producer) === null || _a === void 0 ? void 0 : _a.name) !== null && _b !== void 0 ? _b : '_default_';
186
- let pData = this._producedData.find(x => x.producerKey === key);
187
- if (!pData) {
188
- pData = { producerKey: key, dataset: null };
189
- this._producedData.push(pData);
190
- }
191
- pData.dataset = dataset;
192
- };
193
- this._getIntermidiate = (step) => {
194
- var _a, _b;
195
- (0, Affirm_1.default)(step, 'Invalid step');
196
- const key = (_b = (_a = step.producer) === null || _a === void 0 ? void 0 : _a.name) !== null && _b !== void 0 ? _b : '_default_';
197
- const produced = this._producedData.find(x => x.producerKey === key);
198
- (0, Affirm_1.default)(produced, `No produced dataset found for step "${step.type}" of producer "${key}".`);
199
- return produced.dataset;
200
- };
201
- this._consumer = consumer;
202
- this._envData = { consumerSQL: null, executionRequestSQL: null, finalSQL: null };
203
- this._producedData = [];
204
- this._resultingDataset = null;
205
- // A short unique id to isolate temp dataset files & output names
206
- this._executionId = executionId;
207
- }
208
- }
209
- exports.default = ExecutionEnvironment;
@@ -1,131 +0,0 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- const Affirm_1 = __importDefault(require("../../core/Affirm"));
7
- const Algo_1 = __importDefault(require("../../core/Algo"));
8
- const ConsumerManager_1 = __importDefault(require("../consumer/ConsumerManager"));
9
- const Environment_1 = __importDefault(require("../Environment"));
10
- class ExecutionPlannerClass {
11
- constructor() {
12
- this.getEngineClass = (engine) => {
13
- switch (engine) {
14
- case 'aws-dynamodb': return 'no-sql';
15
- case 'aws-redshift':
16
- case 'postgres': return 'sql';
17
- case 'delta-share':
18
- case 'aws-s3': return 'file';
19
- case 'local': return 'local';
20
- }
21
- };
22
- this.plan = (consumer, options) => {
23
- var _a, _b, _c;
24
- (0, Affirm_1.default)(consumer, 'Invalid consumer');
25
- const producersPlan = this._planProducers(consumer, options);
26
- const plan = [...producersPlan];
27
- // At this point I have created the .dataset and can perform operations on it
28
- // TODO: how to handle pagination of SQL results?
29
- // Apply the transformations to the fields of the consumer
30
- // TODO: transformations can also be applied directly to the producer... how???
31
- if (consumer.fields.some(x => Algo_1.default.hasVal(x.transform)))
32
- plan.push({ type: 'apply-transformations' });
33
- const [source] = ConsumerManager_1.default.getSource(consumer);
34
- const engineClass = this.getEngineClass(source.engine);
35
- for (const output of consumer.outputs) {
36
- switch (output.format.toUpperCase()) {
37
- case 'JSON': {
38
- if (engineClass === 'file' && Algo_1.default.hasVal(options))
39
- plan.push({ type: 'apply-execution-request-to-result' });
40
- // TODO: test if it is needed and if it doesn't break soething else
41
- if (engineClass === 'sql')
42
- plan.push({ type: 'post-process-json', output });
43
- if ((_a = consumer.options) === null || _a === void 0 ? void 0 : _a.distinct)
44
- plan.push({ type: 'apply-consumer-distinct' });
45
- plan.push({ type: 'export-file', output });
46
- break;
47
- }
48
- case 'CSV':
49
- case 'PARQUET': {
50
- if ((_b = consumer.options) === null || _b === void 0 ? void 0 : _b.distinct)
51
- plan.push({ type: 'apply-consumer-distinct' });
52
- plan.push({ type: 'export-file', output });
53
- break;
54
- }
55
- case 'API': {
56
- if (engineClass === 'file' && Algo_1.default.hasVal(options))
57
- plan.push({ type: 'apply-execution-request-to-result' });
58
- if ((_c = consumer.options) === null || _c === void 0 ? void 0 : _c.distinct)
59
- plan.push({ type: 'apply-consumer-distinct' });
60
- break;
61
- }
62
- case 'SQL': {
63
- // TODO: what should I do here?? do I need to do anything?
64
- break;
65
- }
66
- default:
67
- throw new Error(`Output format "${output.format}" not supported`);
68
- }
69
- if (output.onSuccess && output.onSuccess.length > 0)
70
- plan.push({ type: 'perform-on-success-actions', output });
71
- }
72
- plan.push({ type: 'clean-datasets' });
73
- plan.push({ type: 'save-execution-stats' });
74
- return plan;
75
- };
76
- this._planProducers = (consumer, options) => {
77
- (0, Affirm_1.default)(consumer, 'Invalid consumer');
78
- const producers = consumer.producers.map(x => Environment_1.default.getProducer(x.name));
79
- (0, Affirm_1.default)(producers, `Invalid producers on consumer "${consumer.name}"`);
80
- (0, Affirm_1.default)(producers.every(x => Algo_1.default.hasVal(x)), `One or more producers of consumer "${consumer.name}" not found.`);
81
- const sources = producers.map(x => Environment_1.default.getSource(x.source));
82
- (0, Affirm_1.default)(sources, `Invalid sources on consumer "${consumer.name}"`);
83
- (0, Affirm_1.default)(sources.every(x => Algo_1.default.hasVal(x)), `One or more sources of consumer "${consumer.name}" not found.`);
84
- const engineClasses = sources.map(x => this.getEngineClass(x.engine));
85
- const uniqEngineClasses = Algo_1.default.uniq(engineClasses);
86
- const plan = [];
87
- if (uniqEngineClasses.length === 1 && uniqEngineClasses[0] === 'sql')
88
- plan.push(...this._planProducer(producers[0], consumer.producers[0], options));
89
- else
90
- plan.push(...(producers.flatMap((x, i) => this._planProducer(x, consumer.producers[i], options))));
91
- // I technically don't need this, but I keep it to merge all the datasets to a single one
92
- // so the other steps of the plan can work with a single dataset variable
93
- plan.push({ type: 'join-producers-data' });
94
- if (consumer.filters && consumer.filters.length > 0)
95
- plan.push({ type: 'apply-consumer-filters-on-JSON' });
96
- return plan;
97
- };
98
- this._planProducer = (producer, cProducer, options) => {
99
- (0, Affirm_1.default)(producer, 'Invalid producer');
100
- const source = Environment_1.default.getSource(producer.source);
101
- (0, Affirm_1.default)(source, `Source "${producer.source}" of producer "${producer.name}" not found.`);
102
- const plan = [];
103
- const producerEngine = source.engine;
104
- switch (producerEngine) {
105
- case 'postgres':
106
- case 'aws-redshift': {
107
- plan.push({ type: 'compile-consumer-to-SQL', producer, cProducer });
108
- if (Algo_1.default.hasVal(options))
109
- plan.push({ type: 'compile-execution-request-to-SQL', producer, cProducer });
110
- plan.push({ type: 'execute-SQL', source: source, producer, cProducer });
111
- break;
112
- }
113
- case 'local':
114
- case 'aws-s3':
115
- case 'delta-share':
116
- case 'http-api': {
117
- plan.push({ type: 'load-dataset', producer, cProducer });
118
- plan.push({ type: 'prepare-dataset', producer, cProducer });
119
- if (producer.dimensions.some(x => { var _a, _b; return ((_a = x.alias) === null || _a === void 0 ? void 0 : _a.includes('{')) || ((_b = x.alias) === null || _b === void 0 ? void 0 : _b.includes('[')); }))
120
- plan.push({ type: 'nested-field-unpacking', producer, cProducer });
121
- plan.push({ type: 'post-process-json', producer, cProducer });
122
- break;
123
- }
124
- default: throw new Error(`Engine "${producerEngine}" not supported`);
125
- }
126
- return plan;
127
- };
128
- }
129
- }
130
- const ExecutionPlanner = new ExecutionPlannerClass();
131
- exports.default = ExecutionPlanner;
@@ -1,29 +0,0 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- const Affirm_1 = __importDefault(require("../../core/Affirm"));
7
- class FileCompilerClass {
8
- constructor() {
9
- this.compileProducer = (producer, source) => {
10
- var _a;
11
- (0, Affirm_1.default)(producer, `Invalid producer`);
12
- (0, Affirm_1.default)(source, `Invalid source`);
13
- (0, Affirm_1.default)((_a = producer.settings.fileKey) !== null && _a !== void 0 ? _a : producer.settings.sqlTable, `Missing required file key in producer settings`);
14
- (0, Affirm_1.default)(producer.settings.fileType, `Missing required file type in producer settings`);
15
- (0, Affirm_1.default)(!producer.measures || producer.measures.length === 0, `Cannot use "measure" with a producer linked to a file (only dimensions are allowed).`);
16
- const columns = producer.dimensions.map(x => ({
17
- aliasInProducer: x.alias,
18
- nameInProducer: x.name,
19
- consumerAlias: null,
20
- consumerKey: null,
21
- owner: producer.name,
22
- dimension: x
23
- }));
24
- return columns;
25
- };
26
- }
27
- }
28
- const FileCompiler = new FileCompilerClass();
29
- exports.default = FileCompiler;
@@ -1,34 +0,0 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- const Affirm_1 = __importDefault(require("../../core/Affirm"));
7
- const Environment_1 = __importDefault(require("../Environment"));
8
- class FileContentBuilderClass {
9
- constructor() {
10
- /**
11
- * Converts an array of string to a single string separated with the separator.
12
- * In the V8 engine there is a maximum length to a string so I can't just join it all.
13
- * I use this to create chunks that are not too long.
14
- */
15
- this.compose = (lines, separator) => {
16
- Affirm_1.default.hasValue(lines, 'Invalid lines');
17
- Affirm_1.default.hasValue(lines, 'Invalid separator');
18
- const maxStringLength = parseInt(Environment_1.default.get('STRING_MAX_CHARACTERS_LENGTH'));
19
- const chunks = [];
20
- let currentChunk = '';
21
- for (const line of lines) {
22
- currentChunk += (line + separator);
23
- if (currentChunk.length >= maxStringLength) {
24
- chunks.push(currentChunk);
25
- currentChunk = '';
26
- }
27
- }
28
- chunks.push(currentChunk);
29
- return chunks;
30
- };
31
- }
32
- }
33
- const FileContentBuilder = new FileContentBuilderClass();
34
- exports.default = FileContentBuilder;