@forzalabs/remora 0.2.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/Constants.js +10 -2
  2. package/actions/debug.js +1 -0
  3. package/actions/deploy.js +1 -0
  4. package/actions/run.js +17 -13
  5. package/actions/sample.js +1 -1
  6. package/core/Algo.js +8 -4
  7. package/definitions/ExecutorDefinitions.js +2 -0
  8. package/definitions/json_schemas/consumer-schema.json +1 -1
  9. package/definitions/json_schemas/producer-schema.json +1 -1
  10. package/definitions/temp.js +2 -0
  11. package/drivers/DeltaShareDriver.js +4 -0
  12. package/drivers/DriverFactory.js +10 -10
  13. package/drivers/DriverHelper.js +33 -10
  14. package/drivers/HttpApiDriver.js +4 -0
  15. package/drivers/LocalDriver.js +72 -5
  16. package/drivers/RedshiftDriver.js +4 -0
  17. package/drivers/S3Driver.js +36 -52
  18. package/drivers/files/LocalDestinationDriver.js +200 -0
  19. package/drivers/files/LocalSourceDriver.js +394 -0
  20. package/drivers/s3/S3DestinationDriver.js +159 -0
  21. package/drivers/s3/S3SourceDriver.js +455 -0
  22. package/engines/ai/LLM.js +0 -11
  23. package/engines/consumer/ConsumerEngine.js +0 -77
  24. package/engines/consumer/ConsumerManager.js +61 -36
  25. package/engines/consumer/ConsumerOnFinishManager.js +14 -0
  26. package/engines/consumer/PostProcessor.js +1 -7
  27. package/engines/dataset/Dataset.js +0 -61
  28. package/engines/dataset/DatasetManager.js +16 -76
  29. package/engines/dataset/DatasetRecord.js +4 -3
  30. package/engines/deployment/DeploymentPlanner.js +0 -7
  31. package/engines/execution/ExecutionPlanner.js +2 -2
  32. package/engines/execution/RequestExecutor.js +4 -45
  33. package/engines/file/FileExporter.js +7 -32
  34. package/engines/parsing/CSVParser.js +27 -26
  35. package/engines/parsing/LineParser.js +52 -0
  36. package/engines/parsing/XMLParser.js +1 -1
  37. package/engines/producer/ProducerEngine.js +0 -45
  38. package/engines/scheduler/CronScheduler.js +12 -4
  39. package/engines/scheduler/QueueManager.js +11 -4
  40. package/engines/sql/SQLCompiler.js +4 -4
  41. package/engines/transform/JoinEngine.js +3 -3
  42. package/engines/transform/TransformationEngine.js +3 -86
  43. package/engines/usage/UsageManager.js +8 -6
  44. package/engines/validation/Validator.js +12 -18
  45. package/executors/ConsumerExecutor.js +152 -0
  46. package/executors/Executor.js +168 -0
  47. package/executors/ExecutorOrchestrator.js +315 -0
  48. package/executors/ExecutorPerformance.js +17 -0
  49. package/executors/ExecutorProgress.js +52 -0
  50. package/executors/OutputExecutor.js +118 -0
  51. package/executors/ProducerExecutor.js +108 -0
  52. package/package.json +3 -3
  53. package/workers/ExecutorWorker.js +48 -0
@@ -1,28 +1,11 @@
1
1
  "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
2
  var __importDefault = (this && this.__importDefault) || function (mod) {
12
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
13
4
  };
14
5
  Object.defineProperty(exports, "__esModule", { value: true });
15
6
  const Affirm_1 = __importDefault(require("../../core/Affirm"));
16
- const Algo_1 = __importDefault(require("../../core/Algo"));
17
- const DriverFactory_1 = __importDefault(require("../../drivers/DriverFactory"));
18
- const Helper_1 = __importDefault(require("../../helper/Helper"));
19
- const DeploymentPlanner_1 = __importDefault(require("../deployment/DeploymentPlanner"));
20
7
  const Environment_1 = __importDefault(require("../Environment"));
21
- const ExecutionEnvironment_1 = __importDefault(require("../execution/ExecutionEnvironment"));
22
8
  const ProducerManager_1 = __importDefault(require("../producer/ProducerManager"));
23
- const SQLCompiler_1 = __importDefault(require("../sql/SQLCompiler"));
24
- const SQLUtils_1 = __importDefault(require("../sql/SQLUtils"));
25
- const UsageManager_1 = __importDefault(require("../usage/UsageManager"));
26
9
  const ConsumerManager_1 = __importDefault(require("./ConsumerManager"));
27
10
  class ConsumerEngineClass {
28
11
  constructor() {
@@ -70,10 +53,6 @@ class ConsumerEngineClass {
70
53
  selectedColumns.push(col);
71
54
  });
72
55
  }
73
- else if (field.grouping) {
74
- // This field should be ignored since it is only created when building the output for supported formats (json)
75
- continue;
76
- }
77
56
  else {
78
57
  const col = ConsumerManager_1.default.searchFieldInColumns(field, availableColumns, consumer);
79
58
  (0, Affirm_1.default)(col, `Consumer "${consumer.name}" misconfiguration: the requested field "${field.key}" is not found in any of the specified producers ("${consumer.producers.map(x => x.name).join(', ')}")`);
@@ -86,62 +65,6 @@ class ConsumerEngineClass {
86
65
  (0, Affirm_1.default)(columnsWithNoAlias.length === 0, `Consumer "${consumer.name}" compilation error: some selected fields don't have a correct alias or key (${columnsWithNoAlias.map(x => x.nameInProducer).join(', ')})`);
87
66
  return selectedColumns;
88
67
  };
89
- this.deploy = (consumer) => __awaiter(this, void 0, void 0, function* () {
90
- (0, Affirm_1.default)(consumer, `Invalid consumer`);
91
- const firstProd = Environment_1.default.getFirstProducer(consumer.producers[0].name);
92
- (0, Affirm_1.default)(firstProd, `Missing producer in consumer "${consumer.name}"`);
93
- // For now we only support connecting producers of the same engine type to a consumer, so we give an error if we detect different ones
94
- const allSources = consumer.producers.map(x => Environment_1.default.getSource(Environment_1.default.getProducer(x.name).source));
95
- const uniqEngines = Algo_1.default.uniqBy(allSources, 'engine');
96
- (0, Affirm_1.default)(uniqEngines.length === 1, `Sources with different engines were used in a single consumer (${uniqEngines.join(', ')})`);
97
- // For now we also only support consumers that have producers ALL having the same exact source
98
- const uniqNames = Algo_1.default.uniqBy(allSources, 'name');
99
- (0, Affirm_1.default)(uniqNames.length === 1, `Producers with different sources were used in a single consumer (${uniqNames.join(', ')})`);
100
- const source = Environment_1.default.getSource(firstProd.source);
101
- const driver = yield DriverFactory_1.default.instantiateSource(source);
102
- const plan = DeploymentPlanner_1.default.planConsumer(consumer);
103
- for (const planStep of plan) {
104
- switch (planStep.type) {
105
- case 'create-materialized-view': {
106
- const sql = SQLCompiler_1.default.compileConsumer(consumer);
107
- (0, Affirm_1.default)(sql, `Invalid SQL from deployment compilation for consumer "${consumer.name}"`);
108
- const internalSchema = Environment_1.default.get('REMORA_SCHEMA');
109
- (0, Affirm_1.default)(internalSchema, `Missing "REMORA_SCHEMA" on project settings (needed due to "${consumer.name}" wanting to create a view)`);
110
- // TODO When I want to update a materialize view there is no way except killing it and recreating it. The problem is that: 1) it is not said that it can be deleted since that materialize view could have some dependencies 2) we should find a way to update it without it going completely offline.
111
- const mvSQL = `
112
- DROP MATERIALIZED VIEW IF EXISTS "${internalSchema}"."${SQLUtils_1.default.acceleratedViewName(consumer.name)}";
113
- CREATE MATERIALIZED VIEW "${internalSchema}"."${SQLUtils_1.default.acceleratedViewName(consumer.name)}" AS ${sql}`;
114
- yield driver.execute(mvSQL);
115
- break;
116
- }
117
- case 'create-view': {
118
- const sql = SQLCompiler_1.default.compileConsumer(consumer);
119
- (0, Affirm_1.default)(sql, `Invalid SQL from deployment compilation for consumer "${consumer.name}"`);
120
- const internalSchema = Environment_1.default.get('REMORA_SCHEMA');
121
- (0, Affirm_1.default)(internalSchema, `Missing "REMORA_SCHEMA" on project settings (needed due to "${consumer.name}" wanting to create a view)`);
122
- const vSQL = `CREATE OR REPLACE VIEW "${internalSchema}"."${SQLUtils_1.default.sanitizeName(consumer.name)}" AS ${sql}`;
123
- yield driver.execute(vSQL);
124
- break;
125
- }
126
- default: throw new Error(`Invalid execution consumer plan step type "${planStep.type}"`);
127
- }
128
- }
129
- });
130
- this.execute = (consumer, options, user, details) => __awaiter(this, void 0, void 0, function* () {
131
- (0, Affirm_1.default)(consumer, `Invalid consumer`);
132
- (0, Affirm_1.default)(options, `Invalid execute consume options`);
133
- const { usageId } = UsageManager_1.default.startUsage(consumer, user, details);
134
- try {
135
- const execution = new ExecutionEnvironment_1.default(consumer, usageId);
136
- const result = yield execution.run(options);
137
- UsageManager_1.default.endUsage(usageId, result._stats.size);
138
- return result;
139
- }
140
- catch (error) {
141
- UsageManager_1.default.failUsage(usageId, Helper_1.default.asError(error).message);
142
- throw error;
143
- }
144
- });
145
68
  this.getOutputShape = (consumer) => {
146
69
  (0, Affirm_1.default)(consumer, `Invalid consumer`);
147
70
  const compiled = this.compile(consumer);
@@ -6,21 +6,9 @@ Object.defineProperty(exports, "__esModule", { value: true });
6
6
  const Affirm_1 = __importDefault(require("../../core/Affirm"));
7
7
  const Algo_1 = __importDefault(require("../../core/Algo"));
8
8
  const Environment_1 = __importDefault(require("../Environment"));
9
+ const ProducerManager_1 = __importDefault(require("../producer/ProducerManager"));
9
10
  class ConsumerManagerClass {
10
11
  constructor() {
11
- this.getConsumerFlatFields = (consumer) => {
12
- (0, Affirm_1.default)(consumer, 'Invalid consumer');
13
- return this.getFlatFields(consumer.fields);
14
- };
15
- this.getFlatFields = (list) => {
16
- let result = [...list];
17
- for (let i = 0; i < list.length; i++) {
18
- const field = list[i];
19
- if (field.grouping && field.grouping.subFields && field.grouping.subFields.length > 0)
20
- result = [...result, ...this.getFlatFields(field.grouping.subFields)];
21
- }
22
- return result;
23
- };
24
12
  /**
25
13
  * Returns the full list of fields that are used by a consumer, while keeping the nested structure of fields.
26
14
  * If there are *, then replace them with the actual fields found in the underlying producer/consumer
@@ -28,17 +16,16 @@ class ConsumerManagerClass {
28
16
  this.getExpandedFields = (consumer) => {
29
17
  (0, Affirm_1.default)(consumer, 'Invalid consumer');
30
18
  const availableColumns = this.getAvailableColumns(consumer);
31
- const convertedFields = this.convertFields(consumer.fields);
19
+ const convertedFields = consumer.fields.map(x => {
20
+ var _a;
21
+ return ({
22
+ cField: x,
23
+ finalKey: (_a = x.alias) !== null && _a !== void 0 ? _a : x.key
24
+ });
25
+ });
32
26
  const expandedFields = convertedFields.flatMap(x => this.expandField(consumer, x, availableColumns));
33
27
  return expandedFields;
34
28
  };
35
- this.convertFields = (fieldsToConvert) => {
36
- (0, Affirm_1.default)(fieldsToConvert, 'Invalid fields');
37
- const convertedFields = fieldsToConvert.map(x => ({
38
- cField: x
39
- }));
40
- return convertedFields;
41
- };
42
29
  /**
43
30
  * Return all the available columns (dimensions and measures) to the consumer given its producers
44
31
  */
@@ -92,6 +79,7 @@ class ConsumerManagerClass {
92
79
  alias: x.nameInProducer,
93
80
  from: x.owner
94
81
  },
82
+ finalKey: x.nameInProducer,
95
83
  dimension: x.dimension,
96
84
  measure: x.measure
97
85
  })));
@@ -105,27 +93,13 @@ class ConsumerManagerClass {
105
93
  alias: col.nameInProducer,
106
94
  from: col.owner
107
95
  },
96
+ finalKey: col.nameInProducer,
108
97
  dimension: col.dimension,
109
98
  measure: col.measure
110
99
  });
111
100
  });
112
101
  }
113
102
  }
114
- else if (field.cField.grouping) {
115
- expandedFields.push({
116
- cField: {
117
- key: field.cField.key,
118
- alias: field.cField.alias,
119
- from: field.cField.from,
120
- grouping: {
121
- groupingKey: field.cField.grouping.groupingKey,
122
- subFields: field.cField.grouping.subFields.flatMap(x => this.expandField(consumer, { cField: x }, availableColumns)).map(x => x.cField)
123
- }
124
- },
125
- dimension: field.dimension,
126
- measure: field.measure
127
- });
128
- }
129
103
  else {
130
104
  const col = ConsumerManager.searchFieldInColumns(field.cField, availableColumns, consumer);
131
105
  (0, Affirm_1.default)(col, `Consumer "${consumer.name}" misconfiguration: the requested field "${field.cField.key}" is not found in any of the specified producers ("${consumer.producers.map(x => x.name).join(', ')}")`);
@@ -187,6 +161,57 @@ class ConsumerManagerClass {
187
161
  (0, Affirm_1.default)(uniqNames.length === 1, `Producers with different sources were used in the consumer "${consumer.name}" (${uniqNames.join(', ')})`);
188
162
  return [sources[0], producers[0]];
189
163
  };
164
+ this.compile = (consumer) => {
165
+ var _a, _b;
166
+ (0, Affirm_1.default)(consumer, `Invalid consumer`);
167
+ const availableColumns = this.getAvailableColumns(consumer);
168
+ const selectedColumns = [];
169
+ const flat = consumer.fields;
170
+ for (let i = 0; i < flat.length; i++) {
171
+ const field = flat[i];
172
+ // TODO: replace with the new funcitons in the consumermanager to reduce diplicate code
173
+ if (field.key === '*') {
174
+ const from = (_a = field.from) !== null && _a !== void 0 ? _a : (consumer.producers.length === 1 ? consumer.producers[0].name : null);
175
+ availableColumns.filter(x => x.owner === from).forEach(col => {
176
+ col.consumerKey = col.nameInProducer;
177
+ col.consumerAlias = col.nameInProducer;
178
+ selectedColumns.push(col);
179
+ });
180
+ }
181
+ else {
182
+ const col = ConsumerManager.searchFieldInColumns(field, availableColumns, consumer);
183
+ (0, Affirm_1.default)(col, `Consumer "${consumer.name}" misconfiguration: the requested field "${field.key}" is not found in any of the specified producers ("${consumer.producers.map(x => x.name).join(', ')}")`);
184
+ col.consumerKey = field.key;
185
+ col.consumerAlias = (_b = field.alias) !== null && _b !== void 0 ? _b : field.key;
186
+ selectedColumns.push(col);
187
+ }
188
+ }
189
+ const columnsWithNoAlias = selectedColumns.filter(x => !x.consumerAlias || !x.consumerKey);
190
+ (0, Affirm_1.default)(columnsWithNoAlias.length === 0, `Consumer "${consumer.name}" compilation error: some selected fields don't have a correct alias or key (${columnsWithNoAlias.map(x => x.nameInProducer).join(', ')})`);
191
+ return selectedColumns;
192
+ };
193
+ this.getOutputShape = (consumer) => {
194
+ (0, Affirm_1.default)(consumer, `Invalid consumer`);
195
+ const compiled = this.compile(consumer);
196
+ const outDimensions = compiled.map(x => {
197
+ var _a, _b, _c, _d, _e, _f, _g;
198
+ return ({
199
+ name: (_a = x.consumerAlias) !== null && _a !== void 0 ? _a : x.consumerKey,
200
+ type: (_b = x.dimension) === null || _b === void 0 ? void 0 : _b.type,
201
+ classification: (_c = x.dimension) === null || _c === void 0 ? void 0 : _c.classification,
202
+ description: (_e = (_d = x.dimension) === null || _d === void 0 ? void 0 : _d.description) !== null && _e !== void 0 ? _e : (_f = x.measure) === null || _f === void 0 ? void 0 : _f.description,
203
+ mask: ProducerManager_1.default.getMask(x.dimension),
204
+ pk: (_g = x.dimension) === null || _g === void 0 ? void 0 : _g.pk
205
+ });
206
+ });
207
+ return {
208
+ _version: consumer._version,
209
+ name: consumer.name,
210
+ description: consumer.description,
211
+ metadata: consumer.metadata,
212
+ dimensions: outDimensions
213
+ };
214
+ };
190
215
  }
191
216
  }
192
217
  const ConsumerManager = new ConsumerManagerClass();
@@ -17,6 +17,20 @@ const Environment_1 = __importDefault(require("../Environment"));
17
17
  const DriverFactory_1 = __importDefault(require("../../drivers/DriverFactory"));
18
18
  class ConsumerOnFinishManagerClass {
19
19
  constructor() {
20
+ this.onConsumerSuccess = (consumer, executionId) => __awaiter(this, void 0, void 0, function* () {
21
+ void executionId;
22
+ for (const output of consumer.outputs) {
23
+ if (output.onSuccess)
24
+ yield this.performOnSuccessActions(consumer, output);
25
+ }
26
+ });
27
+ this.onConsumerError = (consumer, executionId) => __awaiter(this, void 0, void 0, function* () {
28
+ void executionId;
29
+ for (const output of consumer.outputs) {
30
+ if (output.onError)
31
+ yield this.performOnSuccessActions(consumer, output);
32
+ }
33
+ });
20
34
  this.performOnSuccessActions = (consumer, output) => __awaiter(this, void 0, void 0, function* () {
21
35
  (0, Affirm_1.default)(consumer, 'Invalid consumer');
22
36
  (0, Affirm_1.default)(output, 'Invalid output');
@@ -46,9 +46,8 @@ class PostProcessorClass {
46
46
  }
47
47
  record.sortDimensions();
48
48
  }
49
- if (!updatedDimensions) {
49
+ if (!updatedDimensions)
50
50
  updatedDimensions = record._dimensions;
51
- }
52
51
  // Finally apply the rules and changes of the consumer fields to the record
53
52
  for (const field of fields) {
54
53
  const { key, alias } = field.cField;
@@ -71,11 +70,6 @@ class PostProcessorClass {
71
70
  fakeRecord.wholeUpdateDimension(update);
72
71
  updatedDimensions = fakeRecord._dimensions;
73
72
  }
74
- // Validate that dimensions have sequential indexes with no gaps
75
- const indexes = updatedDimensions.map(d => d.index).sort((a, b) => a - b);
76
- for (let i = 0; i < indexes.length; i++) {
77
- (0, Affirm_1.default)(indexes[i] === i, `Missing or duplicate dimension index: expected index ${i} but found ${indexes[i]}. See dimension updates applied on consumer "${consumer.name}".`);
78
- }
79
73
  dataset.setDimensions(updatedDimensions);
80
74
  return dataset;
81
75
  });
@@ -27,9 +27,7 @@ const Constants_1 = __importDefault(require("../../Constants"));
27
27
  const DatasetManager_1 = __importDefault(require("./DatasetManager"));
28
28
  const DatasetRecord_1 = __importDefault(require("./DatasetRecord"));
29
29
  const DatasetRecordPool_1 = __importDefault(require("./DatasetRecordPool"));
30
- const xlsx_1 = __importDefault(require("xlsx"));
31
30
  const Affirm_1 = __importDefault(require("../../core/Affirm"));
32
- const XMLParser_1 = __importDefault(require("../parsing/XMLParser"));
33
31
  const DriverFactory_1 = __importDefault(require("../../drivers/DriverFactory"));
34
32
  const Helper_1 = __importDefault(require("../../helper/Helper"));
35
33
  const Algo_1 = __importDefault(require("../../core/Algo"));
@@ -665,65 +663,6 @@ class Dataset {
665
663
  this._finishOperation('read-lines');
666
664
  return results;
667
665
  });
668
- /**
669
- * - computes dimensions + delimiter
670
- * - cleans data (removes empty rows) and headers
671
- * - parses json, xml, xls
672
- */
673
- this.prepare = (producer) => __awaiter(this, void 0, void 0, function* () {
674
- this._startOperation('prepare');
675
- const dimsRes = yield DatasetManager_1.default.buildDimensions(this, producer);
676
- yield this.prepareWithDimensions(dimsRes);
677
- this._finishOperation('prepare');
678
- return this;
679
- });
680
- this.prepareWithDimensions = (dimResult) => __awaiter(this, void 0, void 0, function* () {
681
- this._startOperation('prepare-with-dimensions');
682
- const { delimiter, dimensions } = dimResult;
683
- this._delimiter = delimiter;
684
- this._dimensions = dimensions;
685
- switch (this._file.fileType) {
686
- case 'TXT':
687
- case 'CSV':
688
- case 'JSON':
689
- case 'JSONL':
690
- break;
691
- case 'XLS':
692
- case 'XLSX': {
693
- const excel = xlsx_1.default.readFile(this._path);
694
- let targetSheetName = this._file.sheetName;
695
- if (!targetSheetName) {
696
- (0, Affirm_1.default)(excel.SheetNames.length > 0, 'The Excel file has no sheets.');
697
- targetSheetName = excel.SheetNames[0];
698
- }
699
- else {
700
- (0, Affirm_1.default)(excel.SheetNames.includes(targetSheetName), `The sheet "${targetSheetName}" doesn't exist in the excel (available: ${excel.SheetNames.join(', ')})`);
701
- }
702
- const sheet = excel.Sheets[targetSheetName];
703
- const csv = xlsx_1.default.utils.sheet_to_csv(sheet);
704
- const lines = csv.split('\n');
705
- this.clear();
706
- const records = lines.map(x => new DatasetRecord_1.default(x, this._dimensions, this._delimiter));
707
- yield this.append(records);
708
- break;
709
- }
710
- case 'XML': {
711
- const fileContent = fs_1.default.readFileSync(this._path, 'utf-8');
712
- const jsonData = XMLParser_1.default.xmlToJson(fileContent);
713
- // Convert JSON data to string lines. This might need adjustment based on XML structure.
714
- // Assuming jsonData is an array of objects, where each object is a record.
715
- const lines = Array.isArray(jsonData)
716
- ? jsonData.map(item => JSON.stringify(item))
717
- : [JSON.stringify(jsonData)];
718
- this.clear();
719
- const records = lines.map(x => new DatasetRecord_1.default(x, this._dimensions, this._delimiter));
720
- yield this.append(records);
721
- break;
722
- }
723
- }
724
- this._finishOperation('prepare-with-dimensions');
725
- return this;
726
- });
727
666
  this.getDimensions = () => this._dimensions;
728
667
  this.setDimensions = (dimensions) => {
729
668
  this._dimensions = dimensions;
@@ -13,16 +13,11 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
13
13
  };
14
14
  Object.defineProperty(exports, "__esModule", { value: true });
15
15
  const Affirm_1 = __importDefault(require("../../core/Affirm"));
16
- const Algo_1 = __importDefault(require("../../core/Algo"));
17
- const ConsumerManager_1 = __importDefault(require("../consumer/ConsumerManager"));
18
- const Environment_1 = __importDefault(require("../Environment"));
19
- const FileCompiler_1 = __importDefault(require("../file/FileCompiler"));
20
16
  const ParseManager_1 = __importDefault(require("../parsing/ParseManager"));
21
17
  const Dataset_1 = __importDefault(require("./Dataset"));
22
- const promises_1 = require("stream/promises");
23
- const fs_1 = __importDefault(require("fs"));
24
18
  const DeveloperEngine_1 = __importDefault(require("../ai/DeveloperEngine"));
25
19
  const Constants_1 = __importDefault(require("../../Constants"));
20
+ const ProducerManager_1 = __importDefault(require("../producer/ProducerManager"));
26
21
  class DatasetManagerClass {
27
22
  constructor() {
28
23
  /**
@@ -55,13 +50,6 @@ class DatasetManagerClass {
55
50
  });
56
51
  return dataset;
57
52
  };
58
- this.buildDimensions = (dataset_1, producer_1, ...args_1) => __awaiter(this, [dataset_1, producer_1, ...args_1], void 0, function* (dataset, producer, discover = false) {
59
- (0, Affirm_1.default)(dataset, `Invalid dataset`);
60
- (0, Affirm_1.default)(producer, `Invalid producer`);
61
- const firstLine = dataset.getFirstLine();
62
- Affirm_1.default.hasValue(firstLine, `The first line of the dataset was not set.`);
63
- return this.buildDimensionsFromFirstLine(firstLine, dataset.getFile(), producer, discover);
64
- });
65
53
  this.buildDimensionsFromFirstLine = (firstLine_1, dsFile_1, producer_1, ...args_1) => __awaiter(this, [firstLine_1, dsFile_1, producer_1, ...args_1], void 0, function* (firstLine, dsFile, producer, discover = false) {
66
54
  var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o;
67
55
  Affirm_1.default.hasValue(firstLine, `Invalid first line`);
@@ -69,8 +57,6 @@ class DatasetManagerClass {
69
57
  (0, Affirm_1.default)(producer, `Invalid producer`);
70
58
  const file = dsFile;
71
59
  switch (file.fileType) {
72
- case 'XLS':
73
- case 'XLSX':
74
60
  case 'CSV': {
75
61
  const delimiterChar = (_a = file.delimiter) !== null && _a !== void 0 ? _a : ',';
76
62
  const headerLine = firstLine;
@@ -88,12 +74,17 @@ class DatasetManagerClass {
88
74
  }
89
75
  case 'PARQUET':
90
76
  case 'JSONL':
91
- case 'XML':
92
77
  case 'JSON': {
93
- const source = Environment_1.default.getSource(producer.source);
94
- const columns = FileCompiler_1.default.compileProducer(producer, source);
78
+ const columns = ProducerManager_1.default.getColumns(producer);
95
79
  const firstObject = JSON.parse(firstLine);
96
80
  const keys = Object.keys(firstObject);
81
+ // const columnsWithDot = columns.filter(x => x.aliasInProducer.includes('.'))
82
+ // if (columnsWithDot.length > 0) {
83
+ // console.log(columns, keys, 'PAPAPAPP')
84
+ // for (const colWithDot of columnsWithDot) {
85
+ // console.log(colWithDot)
86
+ // }
87
+ // }
97
88
  // If includeSourceFilename is enabled, the driver has added $source_filename column
98
89
  // We need to add it to the keys list so dimensions can reference it
99
90
  const includeSourceFilename = file.includeSourceFilename === true;
@@ -146,8 +137,7 @@ class DatasetManagerClass {
146
137
  if (!file.hasHeaderRow) {
147
138
  // If the file is a TXT and there isn't an header row, then I add a fake one that maps directly to the producer
148
139
  const delimiterChar = (_k = file.delimiter) !== null && _k !== void 0 ? _k : ',';
149
- const source = Environment_1.default.getSource(producer.source);
150
- const columns = FileCompiler_1.default.compileProducer(producer, source);
140
+ const columns = ProducerManager_1.default.getColumns(producer);
151
141
  const includeSourceFilename = file.includeSourceFilename === true;
152
142
  if (discover) {
153
143
  // Since I don't have an header, and I'm discovering, I just create placeholder dimensions based on the same number of columns of the txt
@@ -207,64 +197,14 @@ class DatasetManagerClass {
207
197
  };
208
198
  }
209
199
  }
200
+ case 'XLS':
201
+ break;
202
+ case 'XLSX':
203
+ break;
204
+ case 'XML':
205
+ break;
210
206
  }
211
207
  });
212
- this.computeDimensionsUpdates = (dataset, consumer) => {
213
- var _a;
214
- (0, Affirm_1.default)(dataset, 'Invalid dataset');
215
- (0, Affirm_1.default)(consumer, 'Invalid consumer');
216
- const fields = ConsumerManager_1.default.getExpandedFields(consumer);
217
- const dimensions = dataset.getDimensions();
218
- let updates = [];
219
- // Add all the updates
220
- for (let i = 0; i < fields.length; i++) {
221
- const { cField } = fields[i];
222
- const currentMatch = structuredClone(dimensions.find(x => x.name === cField.key));
223
- if (!currentMatch && !cField.fixed)
224
- throw new Error(`The requested field "${cField.key}" from the consumer is not present in the underlying dataset "${dataset.name}" (${dimensions.map(x => x.name).join(', ')})`);
225
- updates.push({
226
- currentDimension: currentMatch,
227
- newName: (_a = cField.alias) !== null && _a !== void 0 ? _a : cField.key,
228
- newHidden: cField.hidden,
229
- newPosition: i,
230
- toDelete: false
231
- });
232
- }
233
- // Add all the updates to remove dimensions
234
- for (const dim of dimensions) {
235
- if (!updates.find(x => { var _a; return ((_a = x.currentDimension) === null || _a === void 0 ? void 0 : _a.name) === dim.name; }))
236
- updates.push({ currentDimension: dim, toDelete: true });
237
- }
238
- // Now keep only the updates that actually change something
239
- updates = updates.filter(x => x.toDelete
240
- || !x.currentDimension
241
- || (x.currentDimension && (x.currentDimension.name !== x.newName
242
- || (Algo_1.default.hasVal(x.newHidden) && x.newHidden !== x.currentDimension.hidden)
243
- || x.newPosition !== x.currentDimension.index)));
244
- return updates;
245
- };
246
- /**
247
- * Each worker threads writes to his own dataset file to avoid concurrency and data loss,
248
- * at the end of their work, I merge their results to a single file
249
- */
250
- this.mergeWorkersPaths = (threadPaths, dataset) => __awaiter(this, void 0, void 0, function* () {
251
- dataset.clear();
252
- const datasetPath = dataset.getPath();
253
- for (let i = 0; i < threadPaths.length; i++) {
254
- const path = threadPaths[i];
255
- // If the thread skipped execution (maybe because no data needed to change), then the
256
- // dataset file might not exist, in this case, just skip it
257
- if (!fs_1.default.existsSync(path))
258
- continue;
259
- const readStream = fs_1.default.createReadStream(path);
260
- // For the first file, create a new write stream
261
- // For subsequent files, append to the existing file
262
- const writeStream = fs_1.default.createWriteStream(datasetPath, { flags: i === 0 ? 'w' : 'a' });
263
- yield (0, promises_1.pipeline)(readStream, writeStream);
264
- fs_1.default.unlinkSync(path);
265
- }
266
- return dataset;
267
- });
268
208
  }
269
209
  }
270
210
  const DatasetManager = new DatasetManagerClass();
@@ -11,7 +11,8 @@ class DatasetRecord {
11
11
  this.parse = (row, delimiter, dimensions) => {
12
12
  if (!this.isEmpty() && dimensions.length > 0) {
13
13
  const parts = CSVParser_1.default.parseRow(row, delimiter);
14
- for (const dim of dimensions) {
14
+ for (let i = 0; i < dimensions.length; i++) {
15
+ const dim = dimensions[i];
15
16
  // Use dim.index to get the correct column from the file, not the loop index
16
17
  this._value[dim.name] = TypeCaster_1.default.cast(parts[dim.index], dim.type, dim.format);
17
18
  }
@@ -52,8 +53,8 @@ class DatasetRecord {
52
53
  index: update.newPosition,
53
54
  key: update.newName,
54
55
  name: update.newName,
55
- type: (_b = (_a = update.currentDimension) === null || _a === void 0 ? void 0 : _a.type) !== null && _b !== void 0 ? _b : 'string',
56
- hidden: update.newHidden
56
+ hidden: update.newHidden,
57
+ type: (_b = (_a = update.currentDimension) === null || _a === void 0 ? void 0 : _a.type) !== null && _b !== void 0 ? _b : 'string'
57
58
  };
58
59
  this._value[newDimension.name] = null;
59
60
  this._dimensions = [...this._dimensions, newDimension];
@@ -14,13 +14,6 @@ class DeploymentPlannerClass {
14
14
  const output = consumer.outputs[i];
15
15
  switch (output.format) {
16
16
  // csv, json, parquet outputs do not need to generate anything at deploy
17
- case 'SQL': {
18
- if (output.accelerated && !output.direct)
19
- plan.push({ type: 'create-materialized-view', output: output });
20
- else if (!output.direct)
21
- plan.push({ type: 'create-view', output: output });
22
- break;
23
- }
24
17
  case 'API': {
25
18
  throw new Error(`Invalid consumer "${consumer.name}" format "${output.format}": not implemented yet.`);
26
19
  }
@@ -7,7 +7,7 @@ const Affirm_1 = __importDefault(require("../../core/Affirm"));
7
7
  const Algo_1 = __importDefault(require("../../core/Algo"));
8
8
  const ConsumerManager_1 = __importDefault(require("../consumer/ConsumerManager"));
9
9
  const Environment_1 = __importDefault(require("../Environment"));
10
- class ExecutionPlannerClas {
10
+ class ExecutionPlannerClass {
11
11
  constructor() {
12
12
  this.getEngineClass = (engine) => {
13
13
  switch (engine) {
@@ -127,5 +127,5 @@ class ExecutionPlannerClas {
127
127
  };
128
128
  }
129
129
  }
130
- const ExecutionPlanner = new ExecutionPlannerClas();
130
+ const ExecutionPlanner = new ExecutionPlannerClass();
131
131
  exports.default = ExecutionPlanner;
@@ -1,41 +1,11 @@
1
1
  "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
- var __importDefault = (this && this.__importDefault) || function (mod) {
12
- return (mod && mod.__esModule) ? mod : { "default": mod };
13
- };
14
2
  Object.defineProperty(exports, "__esModule", { value: true });
15
- const Affirm_1 = __importDefault(require("../../core/Affirm"));
16
3
  class RequestExecutorClass {
17
4
  constructor() {
18
- /**
19
- * Applies the filters, limit, offset and order on the in-memory-data
20
- */
21
- this.execute = (dataset, request) => __awaiter(this, void 0, void 0, function* () {
22
- (0, Affirm_1.default)(dataset, 'Invalid data');
23
- (0, Affirm_1.default)(request, 'Invalid request');
24
- if (request.filters)
25
- dataset = yield this.applyFilters(dataset, request.filters);
26
- if (request.order)
27
- dataset = yield this._applyOrdering(dataset, request.order);
28
- return dataset;
29
- });
30
- this.applyFilters = (dataset_1, filters_1, ...args_1) => __awaiter(this, [dataset_1, filters_1, ...args_1], void 0, function* (dataset, filters, options = {}) {
31
- return yield dataset.filter(record => {
32
- return filters.every(filter => this._evaluateFilter(record, filter));
33
- }, options);
34
- });
35
- this._evaluateFilter = (record, filter) => {
5
+ this.evaluateFilter = (record, filter) => {
36
6
  const evaluate = (baseRecord, baseFilter) => {
37
7
  const { member, operator, values } = baseFilter;
38
- const value = baseRecord.getValue(member);
8
+ const value = baseRecord[member];
39
9
  const singleValue = values[0];
40
10
  switch (operator) {
41
11
  case 'equals':
@@ -85,23 +55,12 @@ class RequestExecutorClass {
85
55
  const { and, or } = filter;
86
56
  const baseResult = evaluate(record, filter);
87
57
  if (and)
88
- return baseResult && and.every(subFilter => this._evaluateFilter(record, subFilter));
58
+ return baseResult && and.every(subFilter => this.evaluateFilter(record, subFilter));
89
59
  if (or)
90
- return baseResult || or.some(subFilter => this._evaluateFilter(record, subFilter));
60
+ return baseResult || or.some(subFilter => this.evaluateFilter(record, subFilter));
91
61
  else
92
62
  return baseResult;
93
63
  };
94
- this._applyOrdering = (dataset, order) => __awaiter(this, void 0, void 0, function* () {
95
- return yield dataset.sort((a, b) => {
96
- for (const [field, direction] of order) {
97
- if (a[field] < b[field])
98
- return direction === 'asc' ? -1 : 1;
99
- if (a[field] > b[field])
100
- return direction === 'asc' ? 1 : -1;
101
- }
102
- return 0;
103
- });
104
- });
105
64
  }
106
65
  }
107
66
  const RequestExecutor = new RequestExecutorClass();