@forzalabs/remora 0.2.6 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +10 -2
- package/README.md +0 -14
- package/actions/debug.js +1 -0
- package/actions/deploy.js +1 -0
- package/actions/run.js +17 -13
- package/actions/sample.js +1 -1
- package/core/Algo.js +8 -4
- package/definitions/ExecutorDefinitions.js +2 -0
- package/definitions/json_schemas/consumer-schema.json +1 -1
- package/definitions/json_schemas/producer-schema.json +1 -1
- package/definitions/temp.js +2 -0
- package/drivers/DeltaShareDriver.js +4 -0
- package/drivers/DriverFactory.js +10 -10
- package/drivers/DriverHelper.js +33 -10
- package/drivers/HttpApiDriver.js +4 -0
- package/drivers/LocalDriver.js +73 -6
- package/drivers/RedshiftDriver.js +4 -0
- package/drivers/S3Driver.js +36 -52
- package/drivers/files/LocalDestinationDriver.js +200 -0
- package/drivers/files/LocalSourceDriver.js +394 -0
- package/drivers/s3/S3DestinationDriver.js +159 -0
- package/drivers/s3/S3SourceDriver.js +455 -0
- package/engines/ai/LLM.js +0 -11
- package/engines/consumer/ConsumerEngine.js +0 -77
- package/engines/consumer/ConsumerManager.js +61 -36
- package/engines/consumer/ConsumerOnFinishManager.js +14 -0
- package/engines/consumer/PostProcessor.js +1 -7
- package/engines/dataset/Dataset.js +0 -61
- package/engines/dataset/DatasetManager.js +16 -76
- package/engines/dataset/DatasetRecord.js +4 -3
- package/engines/deployment/DeploymentPlanner.js +0 -7
- package/engines/execution/ExecutionPlanner.js +2 -2
- package/engines/execution/RequestExecutor.js +4 -45
- package/engines/file/FileExporter.js +7 -32
- package/engines/parsing/CSVParser.js +27 -26
- package/engines/parsing/LineParser.js +52 -0
- package/engines/parsing/XMLParser.js +1 -1
- package/engines/producer/ProducerEngine.js +0 -45
- package/engines/scheduler/CronScheduler.js +12 -4
- package/engines/scheduler/QueueManager.js +11 -4
- package/engines/sql/SQLCompiler.js +4 -4
- package/engines/transform/JoinEngine.js +3 -3
- package/engines/transform/TransformationEngine.js +3 -89
- package/engines/usage/UsageManager.js +8 -6
- package/engines/validation/Validator.js +12 -18
- package/executors/ConsumerExecutor.js +152 -0
- package/executors/Executor.js +168 -0
- package/executors/ExecutorOrchestrator.js +315 -0
- package/executors/ExecutorPerformance.js +17 -0
- package/executors/ExecutorProgress.js +52 -0
- package/executors/OutputExecutor.js +118 -0
- package/executors/ProducerExecutor.js +108 -0
- package/package.json +3 -3
- package/workers/ExecutorWorker.js +48 -0
|
@@ -1,45 +1,13 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
-
});
|
|
10
|
-
};
|
|
11
2
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
4
|
};
|
|
14
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
6
|
const Constants_1 = __importDefault(require("../../Constants"));
|
|
16
|
-
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
17
7
|
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
18
8
|
const DSTE_1 = __importDefault(require("../../core/dste/DSTE"));
|
|
19
|
-
const DriverFactory_1 = __importDefault(require("../../drivers/DriverFactory"));
|
|
20
|
-
const Environment_1 = __importDefault(require("../Environment"));
|
|
21
9
|
class FileExporterClass {
|
|
22
10
|
constructor() {
|
|
23
|
-
this.export = (consumer, output, dataset, executionId) => __awaiter(this, void 0, void 0, function* () {
|
|
24
|
-
(0, Affirm_1.default)(consumer, `Invalid consumer`);
|
|
25
|
-
(0, Affirm_1.default)(output, `Invalid output`);
|
|
26
|
-
(0, Affirm_1.default)(dataset, `Invalid export dataset`);
|
|
27
|
-
const source = Environment_1.default.getSource(output.exportDestination);
|
|
28
|
-
(0, Affirm_1.default)(source, `Invalid consumer "${consumer.name}" export location source. Make sure that the export location is an available source.`);
|
|
29
|
-
const driver = yield DriverFactory_1.default.instantiateDestination(source);
|
|
30
|
-
const extension = output.format === 'CSV'
|
|
31
|
-
? 'csv'
|
|
32
|
-
: output.format === 'JSON'
|
|
33
|
-
? 'jsonl'
|
|
34
|
-
: 'txt';
|
|
35
|
-
const name = this._composeFileName(consumer, output, extension, executionId);
|
|
36
|
-
const uploadRes = yield driver.uploadStream({
|
|
37
|
-
dataset,
|
|
38
|
-
name,
|
|
39
|
-
recordProjection: { format: output.format, delimiter: ',' }
|
|
40
|
-
});
|
|
41
|
-
return uploadRes.key;
|
|
42
|
-
});
|
|
43
11
|
this.prepareBatch = (batch, options) => {
|
|
44
12
|
switch (options.recordProjection.format) {
|
|
45
13
|
case 'JSON': {
|
|
@@ -67,6 +35,13 @@ class FileExporterClass {
|
|
|
67
35
|
}
|
|
68
36
|
return chunks;
|
|
69
37
|
};
|
|
38
|
+
this._extension = (output) => {
|
|
39
|
+
return output.format === 'CSV'
|
|
40
|
+
? 'csv'
|
|
41
|
+
: output.format === 'JSON'
|
|
42
|
+
? 'jsonl'
|
|
43
|
+
: 'txt';
|
|
44
|
+
};
|
|
70
45
|
this._composeFileName = (consumer, output, extension, executionId) => {
|
|
71
46
|
if (output.exportName && output.exportName.trim().length > 0) {
|
|
72
47
|
// Ensure no extension duplication
|
|
@@ -10,47 +10,48 @@ class CSVParserClass {
|
|
|
10
10
|
(0, Affirm_1.default)(row, 'Invalid row');
|
|
11
11
|
(0, Affirm_1.default)(delimiter, 'Invalid delimiter');
|
|
12
12
|
const fields = [];
|
|
13
|
-
|
|
13
|
+
const len = row.length;
|
|
14
|
+
let fieldStart = 0;
|
|
15
|
+
let fieldEnd = 0;
|
|
14
16
|
let inQuotes = false;
|
|
17
|
+
let hasQuotes = false;
|
|
15
18
|
let i = 0;
|
|
16
|
-
while (i <
|
|
17
|
-
const char = row
|
|
18
|
-
|
|
19
|
-
if (char === '"') {
|
|
19
|
+
while (i < len) {
|
|
20
|
+
const char = row.charCodeAt(i);
|
|
21
|
+
if (char === 34) { // '"'
|
|
20
22
|
if (!inQuotes) {
|
|
21
|
-
// Starting a quoted field
|
|
22
23
|
inQuotes = true;
|
|
24
|
+
hasQuotes = true;
|
|
25
|
+
fieldStart = i + 1;
|
|
23
26
|
}
|
|
24
|
-
else if (
|
|
25
|
-
//
|
|
26
|
-
currentField += '"';
|
|
27
|
-
i++; // Skip the next quote
|
|
27
|
+
else if (row.charCodeAt(i + 1) === 34) {
|
|
28
|
+
i++; // Skip escaped quote, will handle in extraction
|
|
28
29
|
}
|
|
29
30
|
else {
|
|
30
|
-
// Ending a quoted field
|
|
31
31
|
inQuotes = false;
|
|
32
|
+
fieldEnd = i;
|
|
32
33
|
}
|
|
33
34
|
}
|
|
34
|
-
else if (char === delimiter && !inQuotes) {
|
|
35
|
-
//
|
|
36
|
-
|
|
37
|
-
|
|
35
|
+
else if (char === delimiter.charCodeAt(0) && !inQuotes) {
|
|
36
|
+
// Extract field
|
|
37
|
+
const field = hasQuotes
|
|
38
|
+
? row.slice(fieldStart, fieldEnd).replaceAll('""', '"')
|
|
39
|
+
: row.slice(fieldStart, i).trim();
|
|
40
|
+
fields.push(field);
|
|
41
|
+
fieldStart = i + 1;
|
|
42
|
+
fieldEnd = 0;
|
|
43
|
+
hasQuotes = false;
|
|
38
44
|
}
|
|
39
|
-
else if (char ===
|
|
40
|
-
|
|
41
|
-
if (!inQuotes) {
|
|
42
|
-
break;
|
|
43
|
-
}
|
|
44
|
-
currentField += char;
|
|
45
|
-
}
|
|
46
|
-
else {
|
|
47
|
-
// Regular character
|
|
48
|
-
currentField += char;
|
|
45
|
+
else if ((char === 13 || char === 10) && !inQuotes) { // \r or \n
|
|
46
|
+
break;
|
|
49
47
|
}
|
|
50
48
|
i++;
|
|
51
49
|
}
|
|
52
50
|
// Add the last field
|
|
53
|
-
|
|
51
|
+
const field = hasQuotes
|
|
52
|
+
? row.slice(fieldStart, fieldEnd).replaceAll('""', '"')
|
|
53
|
+
: row.slice(fieldStart, i).trim();
|
|
54
|
+
fields.push(field);
|
|
54
55
|
return fields;
|
|
55
56
|
};
|
|
56
57
|
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const TypeCaster_1 = __importDefault(require("../transform/TypeCaster"));
|
|
7
|
+
const CSVParser_1 = __importDefault(require("./CSVParser"));
|
|
8
|
+
class LineParserClass {
|
|
9
|
+
constructor() {
|
|
10
|
+
this.parse = (line, producer, dimensions, tracker) => {
|
|
11
|
+
var _a;
|
|
12
|
+
const { settings: { fileType, delimiter } } = producer;
|
|
13
|
+
switch (fileType) {
|
|
14
|
+
case 'PARQUET':
|
|
15
|
+
case 'TXT':
|
|
16
|
+
case 'XML':
|
|
17
|
+
case 'XLS':
|
|
18
|
+
case 'XLSX':
|
|
19
|
+
case 'CSV': {
|
|
20
|
+
let counter = performance.now();
|
|
21
|
+
const parts = CSVParser_1.default.parseRow(line, delimiter !== null && delimiter !== void 0 ? delimiter : ',');
|
|
22
|
+
tracker.measure('process-line:parse-csv-row', performance.now() - counter);
|
|
23
|
+
counter = performance.now();
|
|
24
|
+
const value = {};
|
|
25
|
+
for (const dim of dimensions) {
|
|
26
|
+
value[dim.name] = TypeCaster_1.default.cast(parts[dim.index], dim.prodDimension.type);
|
|
27
|
+
}
|
|
28
|
+
tracker.measure('process-line:cast&build-record', performance.now() - counter);
|
|
29
|
+
return value;
|
|
30
|
+
}
|
|
31
|
+
case 'JSON':
|
|
32
|
+
case 'JSONL': {
|
|
33
|
+
let counter = performance.now();
|
|
34
|
+
const parsed = JSON.parse(line);
|
|
35
|
+
tracker.measure('process-line:parse-json', performance.now() - counter);
|
|
36
|
+
counter = performance.now();
|
|
37
|
+
const value = {};
|
|
38
|
+
for (const dim of dimensions) {
|
|
39
|
+
const key = (_a = dim.prodDimension.alias) !== null && _a !== void 0 ? _a : dim.prodDimension.name;
|
|
40
|
+
value[dim.name] = TypeCaster_1.default.cast(parsed[key], dim.prodDimension.type);
|
|
41
|
+
}
|
|
42
|
+
tracker.measure('process-line:cast&build-record', performance.now() - counter);
|
|
43
|
+
return value;
|
|
44
|
+
}
|
|
45
|
+
default:
|
|
46
|
+
throw new Error(`File type ${fileType} not implemented yet.`);
|
|
47
|
+
}
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
const LineParser = new LineParserClass();
|
|
52
|
+
exports.default = LineParser;
|
|
@@ -47,8 +47,8 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
47
47
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
48
48
|
const fast_xml_parser_1 = require("fast-xml-parser");
|
|
49
49
|
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
50
|
-
const fs = __importStar(require("fs"));
|
|
51
50
|
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
51
|
+
const fs = __importStar(require("fs"));
|
|
52
52
|
const DEFAULT_OPTIONS = {
|
|
53
53
|
attributeNamePrefix: '@_',
|
|
54
54
|
ignoreAttributes: false,
|
|
@@ -14,56 +14,11 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
14
14
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
15
|
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
16
16
|
const DriverFactory_1 = __importDefault(require("../../drivers/DriverFactory"));
|
|
17
|
-
const DeploymentPlanner_1 = __importDefault(require("../deployment/DeploymentPlanner"));
|
|
18
17
|
const Environment_1 = __importDefault(require("../Environment"));
|
|
19
|
-
const FileCompiler_1 = __importDefault(require("../file/FileCompiler"));
|
|
20
|
-
const SQLCompiler_1 = __importDefault(require("../sql/SQLCompiler"));
|
|
21
|
-
const SQLUtils_1 = __importDefault(require("../sql/SQLUtils"));
|
|
22
18
|
const DatasetManager_1 = __importDefault(require("../dataset/DatasetManager"));
|
|
23
19
|
const Logger_1 = __importDefault(require("../../helper/Logger"));
|
|
24
20
|
class ProducerEngineClass {
|
|
25
21
|
constructor() {
|
|
26
|
-
this.compile = (producer) => {
|
|
27
|
-
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
28
|
-
const source = Environment_1.default.getSource(producer.source);
|
|
29
|
-
(0, Affirm_1.default)(source, `No source found for producer "${producer.name}" with name "${producer.source}"`);
|
|
30
|
-
switch (source.engine) {
|
|
31
|
-
case 'aws-redshift':
|
|
32
|
-
case 'postgres': {
|
|
33
|
-
const sql = SQLCompiler_1.default.compileProducer(producer, source);
|
|
34
|
-
(0, Affirm_1.default)(sql, `Invalid SQL from compilation for producer "${producer.name}"`);
|
|
35
|
-
return sql;
|
|
36
|
-
}
|
|
37
|
-
case 'aws-s3':
|
|
38
|
-
case 'delta-share': {
|
|
39
|
-
const columns = FileCompiler_1.default.compileProducer(producer, source);
|
|
40
|
-
(0, Affirm_1.default)(columns, `Invalid columns from compilation for producer "${producer.name}"`);
|
|
41
|
-
break;
|
|
42
|
-
}
|
|
43
|
-
default: throw new Error(`Invalid engine type "${source.engine}" for producer "${producer.name}": not implemented yet`);
|
|
44
|
-
}
|
|
45
|
-
};
|
|
46
|
-
this.deploy = (producer) => __awaiter(this, void 0, void 0, function* () {
|
|
47
|
-
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
48
|
-
const source = Environment_1.default.getSource(producer.source);
|
|
49
|
-
(0, Affirm_1.default)(source, `No source found for producer "${producer.name}" with name "${producer.source}"`);
|
|
50
|
-
const driver = yield DriverFactory_1.default.instantiateSource(source);
|
|
51
|
-
(0, Affirm_1.default)(driver, `No driver found for producer "${producer.name}" with driver type "${source.engine}"`);
|
|
52
|
-
const plan = DeploymentPlanner_1.default.planProducer(producer);
|
|
53
|
-
for (const planStep of plan) {
|
|
54
|
-
switch (planStep.type) {
|
|
55
|
-
case 'create-view': {
|
|
56
|
-
const internalSchema = Environment_1.default.get('REMORA_SCHEMA');
|
|
57
|
-
(0, Affirm_1.default)(internalSchema, `Missing "REMORA_SCHEMA" on project settings (needed due to "${producer.name}" wanting to create a view)`);
|
|
58
|
-
const sql = SQLCompiler_1.default.compileProducer(producer, source);
|
|
59
|
-
const vSQL = `CREATE OR REPLACE VIEW "${internalSchema}"."${SQLUtils_1.default.viewName(producer.name)}" AS ${sql}`;
|
|
60
|
-
yield driver.execute(vSQL);
|
|
61
|
-
break;
|
|
62
|
-
}
|
|
63
|
-
default: throw new Error(`Invalid execution consumer plan step type "${planStep.type}"`);
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
});
|
|
67
22
|
this.readFile = (producer, options) => __awaiter(this, void 0, void 0, function* () {
|
|
68
23
|
var _a, _b, _c;
|
|
69
24
|
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
@@ -47,8 +47,8 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
47
47
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
48
48
|
const cron = __importStar(require("node-cron"));
|
|
49
49
|
const Environment_1 = __importDefault(require("../Environment"));
|
|
50
|
-
const ConsumerEngine_1 = __importDefault(require("../consumer/ConsumerEngine"));
|
|
51
50
|
const UserManager_1 = __importDefault(require("../UserManager"));
|
|
51
|
+
const ExecutorOrchestrator_1 = __importDefault(require("../../executors/ExecutorOrchestrator"));
|
|
52
52
|
class CronScheduler {
|
|
53
53
|
constructor() {
|
|
54
54
|
this.scheduledJobs = new Map();
|
|
@@ -125,11 +125,19 @@ class CronScheduler {
|
|
|
125
125
|
try {
|
|
126
126
|
console.log(`Executing CRON job for consumer "${consumer.name}" output ${outputIndex}`);
|
|
127
127
|
const user = UserManager_1.default.getRemoraWorkerUser();
|
|
128
|
-
const
|
|
128
|
+
const runner = { _id: user._id, name: user.name, type: 'actor' };
|
|
129
|
+
const result = yield ExecutorOrchestrator_1.default.launch({
|
|
130
|
+
consumer,
|
|
131
|
+
details: {
|
|
132
|
+
invokedBy: 'CRON',
|
|
133
|
+
user: runner
|
|
134
|
+
},
|
|
135
|
+
logProgress: false
|
|
136
|
+
});
|
|
129
137
|
console.log(`CRON job completed successfully for consumer "${consumer.name}" output ${outputIndex}`);
|
|
130
138
|
// Log execution statistics
|
|
131
|
-
if (result
|
|
132
|
-
console.log(`CRON job stats: ${result.
|
|
139
|
+
if (result) {
|
|
140
|
+
console.log(`CRON job stats: ${result.elapsedMS}ms, size: ${result.outputCount}, cycles: ${result.cycles}`);
|
|
133
141
|
}
|
|
134
142
|
}
|
|
135
143
|
catch (error) {
|
|
@@ -14,9 +14,9 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
14
14
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
15
|
const client_sqs_1 = require("@aws-sdk/client-sqs");
|
|
16
16
|
const Environment_1 = __importDefault(require("../Environment"));
|
|
17
|
-
const ConsumerEngine_1 = __importDefault(require("../consumer/ConsumerEngine"));
|
|
18
17
|
const UserManager_1 = __importDefault(require("../UserManager"));
|
|
19
18
|
const SecretManager_1 = __importDefault(require("../SecretManager"));
|
|
19
|
+
const ExecutorOrchestrator_1 = __importDefault(require("../../executors/ExecutorOrchestrator"));
|
|
20
20
|
class QueueManager {
|
|
21
21
|
constructor() {
|
|
22
22
|
this.queueMappings = new Map();
|
|
@@ -199,11 +199,18 @@ class QueueManager {
|
|
|
199
199
|
}
|
|
200
200
|
console.log(`Processing queue message for consumer "${mapping.consumer.name}" output ${mapping.outputIndex}`);
|
|
201
201
|
const user = UserManager_1.default.getRemoraWorkerUser();
|
|
202
|
-
const result = yield
|
|
202
|
+
const result = yield ExecutorOrchestrator_1.default.launch({
|
|
203
|
+
consumer: mapping.consumer,
|
|
204
|
+
details: {
|
|
205
|
+
invokedBy: 'QUEUE',
|
|
206
|
+
user: { _id: user._id, name: user.name, type: 'actor' }
|
|
207
|
+
},
|
|
208
|
+
logProgress: false
|
|
209
|
+
});
|
|
203
210
|
console.log(`Queue trigger completed successfully for consumer "${mapping.consumer.name}" output ${mapping.outputIndex}`);
|
|
204
211
|
// Log execution statistics
|
|
205
|
-
if (result
|
|
206
|
-
console.log(`Queue trigger stats: ${result.
|
|
212
|
+
if (result) {
|
|
213
|
+
console.log(`Queue trigger stats: ${result.elapsedMS}ms, size: ${result.outputCount}, cycles: ${result.cycles}`);
|
|
207
214
|
}
|
|
208
215
|
messageProcessedByAnyConsumer = true;
|
|
209
216
|
}
|
|
@@ -78,10 +78,10 @@ class SQLCompilerClass {
|
|
|
78
78
|
};
|
|
79
79
|
this.getConsumerReference = (consumer) => {
|
|
80
80
|
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
81
|
-
if (consumer.outputs.some(x => x.format === 'SQL' && x.accelerated))
|
|
82
|
-
|
|
83
|
-
if (consumer.outputs.some(x => x.format === 'SQL' && !x.direct))
|
|
84
|
-
|
|
81
|
+
// if (consumer.outputs.some(x => x.format === 'SQL' && x.accelerated))
|
|
82
|
+
// return `SELECT * FROM "av_remora_${SQLUtils.sanitizeName(consumer.name)}"`
|
|
83
|
+
// if (consumer.outputs.some(x => x.format === 'SQL' && !x.direct))
|
|
84
|
+
// return `SELECT * FROM "v_remora_${SQLUtils.sanitizeName(consumer.name)}"`
|
|
85
85
|
return `SELECT * FROM (${this.compileConsumer(consumer)})`;
|
|
86
86
|
};
|
|
87
87
|
this.compileConsumer = (consumer) => {
|
|
@@ -14,9 +14,9 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
14
14
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
15
|
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
16
16
|
const Environment_1 = __importDefault(require("../Environment"));
|
|
17
|
-
const ConsumerEngine_1 = __importDefault(require("../consumer/ConsumerEngine"));
|
|
18
17
|
const Dataset_1 = __importDefault(require("../dataset/Dataset"));
|
|
19
18
|
const DatasetRecord_1 = __importDefault(require("../dataset/DatasetRecord"));
|
|
19
|
+
const ConsumerManager_1 = __importDefault(require("../consumer/ConsumerManager"));
|
|
20
20
|
class JoinEngineClass {
|
|
21
21
|
constructor() {
|
|
22
22
|
this.validateFieldInProducer = (fieldName, producerName) => {
|
|
@@ -94,8 +94,8 @@ class JoinEngineClass {
|
|
|
94
94
|
return this.findProducerData(consumer.producers[0].name, producedData);
|
|
95
95
|
if (consumer.producers.some(x => x.union))
|
|
96
96
|
return yield this.union(consumer, producedData);
|
|
97
|
-
const consumerShape =
|
|
98
|
-
const consumerColumns =
|
|
97
|
+
const consumerShape = ConsumerManager_1.default.getOutputShape(consumer);
|
|
98
|
+
const consumerColumns = ConsumerManager_1.default.compile(consumer);
|
|
99
99
|
// Create a new dataset for the joined result
|
|
100
100
|
const resultDataset = new Dataset_1.default({
|
|
101
101
|
name: `joined_${consumer.name}`,
|
|
@@ -1,82 +1,14 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
-
});
|
|
10
|
-
};
|
|
11
2
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
4
|
};
|
|
14
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
-
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
16
6
|
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
17
7
|
const TypeCaster_1 = __importDefault(require("./TypeCaster"));
|
|
18
8
|
const CryptoEngine_1 = __importDefault(require("../CryptoEngine"));
|
|
19
9
|
const DeveloperEngine_1 = __importDefault(require("../ai/DeveloperEngine"));
|
|
20
10
|
class TransformationEngineClass {
|
|
21
11
|
constructor() {
|
|
22
|
-
this.apply = (consumer, dataset, options) => __awaiter(this, void 0, void 0, function* () {
|
|
23
|
-
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
24
|
-
(0, Affirm_1.default)(dataset, 'Invalid data');
|
|
25
|
-
const fieldsToTransform = consumer.fields.filter(field => Algo_1.default.hasVal(field.transform));
|
|
26
|
-
Affirm_1.default.hasItems(fieldsToTransform, 'No fields with transformations');
|
|
27
|
-
yield dataset.map(record => {
|
|
28
|
-
var _a;
|
|
29
|
-
for (const field of fieldsToTransform) {
|
|
30
|
-
if (!field.transform)
|
|
31
|
-
continue;
|
|
32
|
-
const fieldKey = (_a = field.alias) !== null && _a !== void 0 ? _a : field.key;
|
|
33
|
-
const value = record.getValue(fieldKey);
|
|
34
|
-
if (!Algo_1.default.hasVal(value) && Algo_1.default.hasVal(field.default))
|
|
35
|
-
record.setValue(fieldKey, field.default);
|
|
36
|
-
else if (!Algo_1.default.hasVal(value) && this.isFieldCombinationTransformation(field.transform))
|
|
37
|
-
// For field combination transformations, we don't skip null values as they might combine with other fields
|
|
38
|
-
continue;
|
|
39
|
-
else if (!Algo_1.default.hasVal(value))
|
|
40
|
-
continue;
|
|
41
|
-
try {
|
|
42
|
-
record.setValue(fieldKey, this.applyTransformations(value, field.transform, field, record));
|
|
43
|
-
}
|
|
44
|
-
catch (error) {
|
|
45
|
-
switch (field.onError) {
|
|
46
|
-
case 'set_default':
|
|
47
|
-
record.setValue(fieldKey, field.default);
|
|
48
|
-
break;
|
|
49
|
-
case 'skip':
|
|
50
|
-
break;
|
|
51
|
-
case 'fail':
|
|
52
|
-
default:
|
|
53
|
-
if (field.transform)
|
|
54
|
-
throw new Error(`an error occured during the transformation ${field.transform} on filedKey: ${fieldKey} the error is the following: ${error}`);
|
|
55
|
-
else
|
|
56
|
-
throw new Error(`an error occured at filedKey: ${fieldKey} the error is the following: ${error}`);
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
return record;
|
|
61
|
-
}, options);
|
|
62
|
-
/**
|
|
63
|
-
* Some transformations (for now only "cast") change the underlying type of the dataset dimension
|
|
64
|
-
* Here I update the dimension type of the dataset.
|
|
65
|
-
* TODO: I think that we may have a bug if you cast AND then do an operation on the number, since it reverts back to being a string in the same trnasformation chain, since the dimension type update is applied only at the end of all the transformations
|
|
66
|
-
*/
|
|
67
|
-
for (const field of fieldsToTransform) {
|
|
68
|
-
if (!field.transform)
|
|
69
|
-
continue;
|
|
70
|
-
this.applyDimensionsChanges(field.transform, field, dataset);
|
|
71
|
-
}
|
|
72
|
-
return dataset;
|
|
73
|
-
});
|
|
74
|
-
this.isFieldCombinationTransformation = (transformation) => {
|
|
75
|
-
if (Array.isArray(transformation)) {
|
|
76
|
-
return transformation.some(t => this.isFieldCombinationTransformation(t));
|
|
77
|
-
}
|
|
78
|
-
return 'combine_fields' in transformation;
|
|
79
|
-
};
|
|
80
12
|
this.applyTransformations = (value, transformations, field, record) => {
|
|
81
13
|
var _a;
|
|
82
14
|
if (Array.isArray(transformations)) {
|
|
@@ -109,7 +41,7 @@ class TransformationEngineClass {
|
|
|
109
41
|
}
|
|
110
42
|
const { fields } = transformations.multiplyBy;
|
|
111
43
|
const fieldValues = fields.map(fieldName => {
|
|
112
|
-
const fieldValue = record
|
|
44
|
+
const fieldValue = record[fieldName];
|
|
113
45
|
return fieldValue !== null && fieldValue !== undefined ? TypeCaster_1.default.cast(fieldValue, 'number') : 1;
|
|
114
46
|
});
|
|
115
47
|
const product = fieldValues.reduce((accumulator, value) => accumulator * value, 1);
|
|
@@ -127,7 +59,7 @@ class TransformationEngineClass {
|
|
|
127
59
|
}
|
|
128
60
|
const { fields } = transformations.addBy;
|
|
129
61
|
const fieldValues = fields.map(fieldName => {
|
|
130
|
-
const fieldValue = record
|
|
62
|
+
const fieldValue = record[fieldName];
|
|
131
63
|
return fieldValue !== null && fieldValue !== undefined ? TypeCaster_1.default.cast(fieldValue, 'number') : 1;
|
|
132
64
|
});
|
|
133
65
|
const sum = fieldValues.reduce((accumulator, value) => accumulator + value);
|
|
@@ -249,7 +181,7 @@ class TransformationEngineClass {
|
|
|
249
181
|
const { fields, separator = '', template } = transformations.combine_fields;
|
|
250
182
|
// Get values from the specified fields
|
|
251
183
|
const fieldValues = fields.map(fieldName => {
|
|
252
|
-
const fieldValue = record
|
|
184
|
+
const fieldValue = record[fieldName];
|
|
253
185
|
return fieldValue !== null && fieldValue !== undefined ? String(fieldValue) : '';
|
|
254
186
|
});
|
|
255
187
|
// If template is provided, use it for formatting
|
|
@@ -337,24 +269,6 @@ class TransformationEngineClass {
|
|
|
337
269
|
throw new Error(`Failed to apply masking transformation '${maskType}' to field '${field.key}': ${error.message}`);
|
|
338
270
|
}
|
|
339
271
|
};
|
|
340
|
-
this.applyDimensionsChanges = (transformations, field, dataset) => {
|
|
341
|
-
if (Array.isArray(transformations)) {
|
|
342
|
-
for (const transform of transformations) {
|
|
343
|
-
this.applyDimensionsChanges(transform, field, dataset);
|
|
344
|
-
}
|
|
345
|
-
return dataset;
|
|
346
|
-
}
|
|
347
|
-
// Single transformation
|
|
348
|
-
if ('cast' in transformations) {
|
|
349
|
-
const { cast, format } = transformations;
|
|
350
|
-
let oldDimension = dataset.getDimensions().find(x => x.name === field.key);
|
|
351
|
-
if (!oldDimension)
|
|
352
|
-
oldDimension = dataset.getDimensions().find(x => x.key === field.key);
|
|
353
|
-
const newDimension = Object.assign(Object.assign({}, structuredClone(oldDimension)), { type: cast, format: format });
|
|
354
|
-
dataset.setSingleDimension(newDimension, oldDimension);
|
|
355
|
-
}
|
|
356
|
-
return dataset;
|
|
357
|
-
};
|
|
358
272
|
}
|
|
359
273
|
}
|
|
360
274
|
const TransformationEngine = new TransformationEngineClass();
|
|
@@ -18,8 +18,8 @@ class UsageManagerClass {
|
|
|
18
18
|
const now = DSTE_1.default.now();
|
|
19
19
|
return `${consumer.name}_${now.getUTCFullYear()}_${now.getUTCMonth()}_${now.getUTCDate()}`.toLowerCase();
|
|
20
20
|
};
|
|
21
|
-
this.startUsage = (consumer,
|
|
22
|
-
|
|
21
|
+
this.startUsage = (consumer, details) => {
|
|
22
|
+
const { user, invokedBy } = details;
|
|
23
23
|
const newUsage = {
|
|
24
24
|
_id: Helper_1.default.uuid(),
|
|
25
25
|
consumer: consumer.name,
|
|
@@ -27,19 +27,21 @@ class UsageManagerClass {
|
|
|
27
27
|
executedBy: user,
|
|
28
28
|
itemsCount: -1,
|
|
29
29
|
status: 'started',
|
|
30
|
-
invokedBy:
|
|
31
|
-
_signature: ''
|
|
30
|
+
invokedBy: invokedBy !== null && invokedBy !== void 0 ? invokedBy : 'UNKNOWN',
|
|
31
|
+
_signature: '',
|
|
32
|
+
stats: null
|
|
32
33
|
};
|
|
33
34
|
if (Helper_1.default.isDev())
|
|
34
35
|
return { usageId: newUsage._id, usage: Promise.resolve(newUsage) };
|
|
35
36
|
const updateRes = DatabaseEngine_1.default.upsert(Settings_1.default.db.collections.usage, newUsage._id, newUsage);
|
|
36
37
|
return { usageId: newUsage._id, usage: updateRes };
|
|
37
38
|
};
|
|
38
|
-
this.endUsage = (usageId, itemsCount) => {
|
|
39
|
+
this.endUsage = (usageId, itemsCount, stats) => {
|
|
39
40
|
const update = {
|
|
40
41
|
itemsCount: itemsCount,
|
|
41
42
|
status: 'success',
|
|
42
|
-
finishedAt: DSTE_1.default.now()
|
|
43
|
+
finishedAt: DSTE_1.default.now(),
|
|
44
|
+
stats
|
|
43
45
|
};
|
|
44
46
|
if (Helper_1.default.isDev())
|
|
45
47
|
return { usageId: null, usage: Promise.resolve(update) };
|
|
@@ -7,7 +7,6 @@ const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
|
7
7
|
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
8
8
|
const ConsumerManager_1 = __importDefault(require("../consumer/ConsumerManager"));
|
|
9
9
|
const Environment_1 = __importDefault(require("../Environment"));
|
|
10
|
-
const ExecutionPlanner_1 = __importDefault(require("../execution/ExecutionPlanner"));
|
|
11
10
|
class ValidatorClass {
|
|
12
11
|
constructor() {
|
|
13
12
|
this.validateSources = (sources) => {
|
|
@@ -97,8 +96,6 @@ class ValidatorClass {
|
|
|
97
96
|
const allFieldsWithNoFrom = consumer.fields.filter(x => x.key === '*' && !x.from);
|
|
98
97
|
if (allFieldsWithNoFrom.length > 0 && consumer.producers.length > 1)
|
|
99
98
|
errors.push(`Field with key "*" was used without specifying the "from" producer and multiple producers were found.`);
|
|
100
|
-
if (consumer.fields.some(x => x.key === '*' && x.grouping))
|
|
101
|
-
errors.push(`Field with key "*" can't be used for "grouping". Either remove the grouping or change the key.`);
|
|
102
99
|
// Validation on producers
|
|
103
100
|
if (consumer.producers.length === 0)
|
|
104
101
|
errors.push(`Consumer must have at least 1 producer.`);
|
|
@@ -131,13 +128,13 @@ class ValidatorClass {
|
|
|
131
128
|
if (consumer.filters.some(x => x.sql && x.rule))
|
|
132
129
|
errors.push(`A single consumer can't have both filters based on SQL and filters based on rules.`);
|
|
133
130
|
const [source] = ConsumerManager_1.default.getSource(consumer);
|
|
134
|
-
const engineClass =
|
|
131
|
+
const engineClass = this.getEngineClass(source.engine);
|
|
135
132
|
if (engineClass === 'file' && consumer.filters.some(x => x.sql))
|
|
136
133
|
errors.push(`Filters based on SQL are only valid for SQL based sources. (source: ${source.name})`);
|
|
137
134
|
if (engineClass === 'sql' && consumer.filters.some(x => x.rule))
|
|
138
135
|
errors.push(`Filters based on rules are only valid for non-SQL based sources. (source: ${source.name})`);
|
|
139
136
|
// Check that the members of the rules are present in the consumer
|
|
140
|
-
const allFields =
|
|
137
|
+
const allFields = consumer.fields;
|
|
141
138
|
const ruleBasedFilters = consumer.filters.filter(x => x.rule);
|
|
142
139
|
const rulesWithMatchingFields = ruleBasedFilters.map(x => ({
|
|
143
140
|
match: allFields.find(k => { var _a; return ((_a = k.alias) !== null && _a !== void 0 ? _a : k.key) === x.rule.member; }),
|
|
@@ -148,18 +145,6 @@ class ValidatorClass {
|
|
|
148
145
|
errors.push(`Filter(s) on member(s) "${missingRules.map(x => x.rule.rule.member).join(', ')}" is invalid since the member specified is not present in the consumer. Check the member value or add the missing field to the consumer.`);
|
|
149
146
|
}
|
|
150
147
|
}
|
|
151
|
-
// Validation on fields
|
|
152
|
-
const validateGroupingLevels = (fields, level = 0) => {
|
|
153
|
-
let errors = [];
|
|
154
|
-
const groupingFields = fields.filter(x => x.grouping);
|
|
155
|
-
if (groupingFields.length > 1)
|
|
156
|
-
errors.push(`There can't be 2 fields with grouping defined at the same level (${groupingFields.map(x => x.key).join(', ')}). Level: ${level}`);
|
|
157
|
-
groupingFields.forEach(field => {
|
|
158
|
-
if (field.grouping)
|
|
159
|
-
errors = [...errors, ...validateGroupingLevels(field.grouping.subFields, level + 1)];
|
|
160
|
-
});
|
|
161
|
-
return errors;
|
|
162
|
-
};
|
|
163
148
|
const validateTransformations = (fields) => {
|
|
164
149
|
var _a;
|
|
165
150
|
const errors = [];
|
|
@@ -185,7 +170,6 @@ class ValidatorClass {
|
|
|
185
170
|
}
|
|
186
171
|
return errors;
|
|
187
172
|
};
|
|
188
|
-
errors.push(...validateGroupingLevels(consumer.fields));
|
|
189
173
|
errors.push(...validateTransformations(consumer.fields));
|
|
190
174
|
// Validation outputs
|
|
191
175
|
const duplicatesOutputs = Algo_1.default.duplicatesObject(consumer.outputs, 'format');
|
|
@@ -211,6 +195,16 @@ class ValidatorClass {
|
|
|
211
195
|
}
|
|
212
196
|
return errors;
|
|
213
197
|
};
|
|
198
|
+
this.getEngineClass = (engine) => {
|
|
199
|
+
switch (engine) {
|
|
200
|
+
case 'aws-dynamodb': return 'no-sql';
|
|
201
|
+
case 'aws-redshift':
|
|
202
|
+
case 'postgres': return 'sql';
|
|
203
|
+
case 'delta-share':
|
|
204
|
+
case 'aws-s3': return 'file';
|
|
205
|
+
case 'local': return 'local';
|
|
206
|
+
}
|
|
207
|
+
};
|
|
214
208
|
}
|
|
215
209
|
}
|
|
216
210
|
const Validator = new ValidatorClass();
|