@forzalabs/remora 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/documentation/default_resources/schema.json +0 -36
- package/drivers/LocalDriver.js +0 -542
- package/drivers/S3Driver.js +0 -563
- package/drivers/S3SourceDriver.js +0 -132
- package/engines/DataframeManager.js +0 -55
- package/engines/ParseManager.js +0 -75
- package/engines/ProducerEngine.js +0 -160
- package/engines/UsageDataManager.js +0 -110
- package/engines/UsageManager.js +0 -61
- package/engines/Validator.js +0 -157
- package/engines/consumer/ConsumerEngine.js +0 -128
- package/engines/consumer/PostProcessor.js +0 -253
- package/engines/dataset/ParallelDataset.js +0 -184
- package/engines/dataset/TransformWorker.js +0 -2
- package/engines/dataset/definitions.js +0 -2
- package/engines/dataset/example-parallel-transform.js +0 -2
- package/engines/dataset/test-parallel.js +0 -2
- package/engines/deployment/DeploymentPlanner.js +0 -39
- package/engines/execution/ExecutionEnvironment.js +0 -209
- package/engines/execution/ExecutionPlanner.js +0 -131
- package/engines/file/FileCompiler.js +0 -29
- package/engines/file/FileContentBuilder.js +0 -34
- package/engines/schema/SchemaEngine.js +0 -33
- package/engines/sql/SQLBuilder.js +0 -96
- package/engines/sql/SQLCompiler.js +0 -141
- package/engines/sql/SQLUtils.js +0 -22
- package/workers/FilterWorker.js +0 -62
- package/workers/ProjectionWorker.js +0 -63
- package/workers/TransformWorker.js +0 -63
- package/workers/TsWorker.js +0 -14
|
@@ -1,132 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
-
});
|
|
10
|
-
};
|
|
11
|
-
var __asyncValues = (this && this.__asyncValues) || function (o) {
|
|
12
|
-
if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
|
|
13
|
-
var m = o[Symbol.asyncIterator], i;
|
|
14
|
-
return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
|
|
15
|
-
function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
|
|
16
|
-
function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
|
|
17
|
-
};
|
|
18
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
19
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
20
|
-
};
|
|
21
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
22
|
-
const client_s3_1 = require("@aws-sdk/client-s3");
|
|
23
|
-
const readline_1 = __importDefault(require("readline"));
|
|
24
|
-
const Affirm_1 = __importDefault(require("../core/Affirm"));
|
|
25
|
-
const SecretManager_1 = __importDefault(require("../engines/SecretManager"));
|
|
26
|
-
class S3SourceDriver {
|
|
27
|
-
constructor() {
|
|
28
|
-
this.init = (source) => __awaiter(this, void 0, void 0, function* () {
|
|
29
|
-
this._bucketName = source.authentication['bucket'];
|
|
30
|
-
const sessionToken = SecretManager_1.default.replaceSecret(source.authentication['sessionToken']);
|
|
31
|
-
const config = {
|
|
32
|
-
region: source.authentication['region'],
|
|
33
|
-
credentials: {
|
|
34
|
-
accessKeyId: SecretManager_1.default.replaceSecret(source.authentication['accessKey']),
|
|
35
|
-
secretAccessKey: SecretManager_1.default.replaceSecret(source.authentication['secretKey']),
|
|
36
|
-
sessionToken: sessionToken ? sessionToken : undefined
|
|
37
|
-
}
|
|
38
|
-
};
|
|
39
|
-
this._client = new client_s3_1.S3Client(config);
|
|
40
|
-
// TODO: is there a way to test if the connection was successful? like a query or scan that I can do?
|
|
41
|
-
return this;
|
|
42
|
-
});
|
|
43
|
-
this.download = (request) => __awaiter(this, void 0, void 0, function* () {
|
|
44
|
-
var _a, e_1, _b, _c;
|
|
45
|
-
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "connect()" first');
|
|
46
|
-
(0, Affirm_1.default)(request, `Invalid download request`);
|
|
47
|
-
(0, Affirm_1.default)(request.fileKey, `Invalid file key for download request`);
|
|
48
|
-
const { fileKey } = request;
|
|
49
|
-
const bucket = this._bucketName;
|
|
50
|
-
const response = yield this._client.send(new client_s3_1.GetObjectCommand({
|
|
51
|
-
Bucket: bucket,
|
|
52
|
-
Key: fileKey
|
|
53
|
-
}));
|
|
54
|
-
(0, Affirm_1.default)(response.Body, 'Failed to fetch object from S3');
|
|
55
|
-
const stream = response.Body;
|
|
56
|
-
const reader = readline_1.default.createInterface({ input: stream, crlfDelay: Infinity });
|
|
57
|
-
const lines = [];
|
|
58
|
-
try {
|
|
59
|
-
for (var _d = true, reader_1 = __asyncValues(reader), reader_1_1; reader_1_1 = yield reader_1.next(), _a = reader_1_1.done, !_a; _d = true) {
|
|
60
|
-
_c = reader_1_1.value;
|
|
61
|
-
_d = false;
|
|
62
|
-
const line = _c;
|
|
63
|
-
lines.push(line);
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
67
|
-
finally {
|
|
68
|
-
try {
|
|
69
|
-
if (!_d && !_a && (_b = reader_1.return)) yield _b.call(reader_1);
|
|
70
|
-
}
|
|
71
|
-
finally { if (e_1) throw e_1.error; }
|
|
72
|
-
}
|
|
73
|
-
reader.close();
|
|
74
|
-
return lines;
|
|
75
|
-
});
|
|
76
|
-
this.readLinesInRange = (readOptions) => __awaiter(this, void 0, void 0, function* () {
|
|
77
|
-
var _a, e_2, _b, _c;
|
|
78
|
-
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "connect()" first');
|
|
79
|
-
(0, Affirm_1.default)(readOptions, 'Invalid read options');
|
|
80
|
-
const { fileKey, lineFrom, lineTo } = readOptions;
|
|
81
|
-
const bucket = this._bucketName;
|
|
82
|
-
const response = yield this._client.send(new client_s3_1.GetObjectCommand({
|
|
83
|
-
Bucket: bucket,
|
|
84
|
-
Key: fileKey
|
|
85
|
-
}));
|
|
86
|
-
(0, Affirm_1.default)(response.Body, 'Failed to fetch object from S3');
|
|
87
|
-
const stream = response.Body;
|
|
88
|
-
const reader = readline_1.default.createInterface({ input: stream, crlfDelay: Infinity });
|
|
89
|
-
const lines = [];
|
|
90
|
-
let lineCounter = 0;
|
|
91
|
-
try {
|
|
92
|
-
for (var _d = true, reader_2 = __asyncValues(reader), reader_2_1; reader_2_1 = yield reader_2.next(), _a = reader_2_1.done, !_a; _d = true) {
|
|
93
|
-
_c = reader_2_1.value;
|
|
94
|
-
_d = false;
|
|
95
|
-
const line = _c;
|
|
96
|
-
if (lineCounter >= lineFrom && lineCounter < lineTo)
|
|
97
|
-
lines.push(line);
|
|
98
|
-
if (lineCounter > lineTo)
|
|
99
|
-
break;
|
|
100
|
-
lineCounter++;
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
catch (e_2_1) { e_2 = { error: e_2_1 }; }
|
|
104
|
-
finally {
|
|
105
|
-
try {
|
|
106
|
-
if (!_d && !_a && (_b = reader_2.return)) yield _b.call(reader_2);
|
|
107
|
-
}
|
|
108
|
-
finally { if (e_2) throw e_2.error; }
|
|
109
|
-
}
|
|
110
|
-
reader.close();
|
|
111
|
-
return lines;
|
|
112
|
-
});
|
|
113
|
-
this.exist = (producer) => __awaiter(this, void 0, void 0, function* () {
|
|
114
|
-
var _a;
|
|
115
|
-
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "connect()" first');
|
|
116
|
-
(0, Affirm_1.default)(producer, 'Invalid read producer');
|
|
117
|
-
const bucket = this._bucketName;
|
|
118
|
-
const fileKey = producer.settings.fileKey;
|
|
119
|
-
(0, Affirm_1.default)(fileKey, `Invalid file key for download request`);
|
|
120
|
-
try {
|
|
121
|
-
yield this._client.send(new client_s3_1.HeadObjectCommand({ Bucket: bucket, Key: fileKey }));
|
|
122
|
-
return true;
|
|
123
|
-
}
|
|
124
|
-
catch (error) {
|
|
125
|
-
if (((_a = error.$metadata) === null || _a === void 0 ? void 0 : _a.httpStatusCode) === 404 || error.name === 'NotFound')
|
|
126
|
-
return false;
|
|
127
|
-
throw error;
|
|
128
|
-
}
|
|
129
|
-
});
|
|
130
|
-
}
|
|
131
|
-
}
|
|
132
|
-
exports.default = S3SourceDriver;
|
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
const Algo_1 = __importDefault(require("../core/Algo"));
|
|
7
|
-
const Helper_1 = __importDefault(require("../helper/Helper"));
|
|
8
|
-
class DataframeManagerClass {
|
|
9
|
-
fill(points, from, to, onlyLastValue, maintainLastValue) {
|
|
10
|
-
const min = from !== null && from !== void 0 ? from : this.getMinDate(points);
|
|
11
|
-
const max = to !== null && to !== void 0 ? to : this.getMaxDate(points);
|
|
12
|
-
const orderPoints = points.length > 0 ? Algo_1.default.orderBy(points, 'x') : [];
|
|
13
|
-
const filledPoints = [];
|
|
14
|
-
const currentDate = new Date(min);
|
|
15
|
-
while (currentDate <= max) {
|
|
16
|
-
const monthKey = Helper_1.default.formatDateToYYYYMM(currentDate);
|
|
17
|
-
filledPoints.push({ x: monthKey, y: 0 });
|
|
18
|
-
currentDate.setMonth(currentDate.getMonth() + 1);
|
|
19
|
-
}
|
|
20
|
-
for (let i = 0; i < orderPoints.length; i++) {
|
|
21
|
-
const point = orderPoints[i];
|
|
22
|
-
const date = new Date(point.x);
|
|
23
|
-
const filledPoint = filledPoints.find(x => x.x === Helper_1.default.formatDateToYYYYMM(date));
|
|
24
|
-
if (filledPoint) {
|
|
25
|
-
if (!onlyLastValue)
|
|
26
|
-
filledPoint.y += point.y;
|
|
27
|
-
else
|
|
28
|
-
filledPoint.y = point.y;
|
|
29
|
-
if (maintainLastValue) {
|
|
30
|
-
const index = filledPoints.findIndex(x => x.x === Helper_1.default.formatDateToYYYYMM(date));
|
|
31
|
-
for (let k = index; k < filledPoints.length; k++) {
|
|
32
|
-
const nextFilledPoint = filledPoints[k];
|
|
33
|
-
nextFilledPoint.y = filledPoint.y;
|
|
34
|
-
}
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
return filledPoints;
|
|
39
|
-
}
|
|
40
|
-
getMinDate(points) {
|
|
41
|
-
if (!points || points.length === 0) {
|
|
42
|
-
const currentDate = new Date();
|
|
43
|
-
return new Date(currentDate.getFullYear() - 1, currentDate.getMonth(), currentDate.getDate());
|
|
44
|
-
}
|
|
45
|
-
return points.reduce((min, point) => (new Date(point.x) < min ? new Date(point === null || point === void 0 ? void 0 : point.x) : min), new Date(points[0].x));
|
|
46
|
-
}
|
|
47
|
-
getMaxDate(points) {
|
|
48
|
-
if (!points || points.length === 0) {
|
|
49
|
-
return new Date();
|
|
50
|
-
}
|
|
51
|
-
return points.reduce((max, point) => (new Date(point.x) > max ? new Date(point.x) : max), new Date(points[0].x));
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
const DataframeManager = new DataframeManagerClass();
|
|
55
|
-
exports.default = DataframeManager;
|
package/engines/ParseManager.js
DELETED
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
const Affirm_1 = __importDefault(require("../core/Affirm"));
|
|
7
|
-
const Environment_1 = __importDefault(require("./Environment"));
|
|
8
|
-
const FileCompiler_1 = __importDefault(require("./file/FileCompiler"));
|
|
9
|
-
class ParseManagerClass {
|
|
10
|
-
constructor() {
|
|
11
|
-
this.csvToJson = (csv, producer) => {
|
|
12
|
-
(0, Affirm_1.default)(csv, 'Invalid csv content');
|
|
13
|
-
Affirm_1.default.hasValue(csv.length, 'Invalid csv content length');
|
|
14
|
-
return this.csvLinesToJson(csv, producer);
|
|
15
|
-
};
|
|
16
|
-
this.csvLinesToJson = (lines, producer, discover) => {
|
|
17
|
-
var _a;
|
|
18
|
-
(0, Affirm_1.default)(lines, 'Invalid csv lines');
|
|
19
|
-
Affirm_1.default.hasValue(lines.length, 'Invalid csv lines length');
|
|
20
|
-
const delimiterChar = (_a = producer.settings.delimiter) !== null && _a !== void 0 ? _a : ',';
|
|
21
|
-
const { header, records } = this._getClassifiedRows(lines, delimiterChar, producer);
|
|
22
|
-
const headerColumns = this._extractHeader(header, delimiterChar, producer, discover);
|
|
23
|
-
const rows = records.map(x => x.split(delimiterChar).map(k => k.trim()));
|
|
24
|
-
const result = [];
|
|
25
|
-
for (const row of rows) {
|
|
26
|
-
const rowObject = {};
|
|
27
|
-
for (let i = 0; i < headerColumns.length; i++) {
|
|
28
|
-
const column = headerColumns[i];
|
|
29
|
-
rowObject[column.saveAs] = row[column.index];
|
|
30
|
-
}
|
|
31
|
-
result.push(rowObject);
|
|
32
|
-
}
|
|
33
|
-
return result;
|
|
34
|
-
};
|
|
35
|
-
this._getClassifiedRows = (lines, delimiterChar, producer) => {
|
|
36
|
-
if (producer.settings.fileType === 'TXT' && !producer.settings.hasHeaderRow) {
|
|
37
|
-
// If the file is a TXT and there isn't an header row, then I add a fake one that maps directly to the producer
|
|
38
|
-
const source = Environment_1.default.getSource(producer.source);
|
|
39
|
-
const columns = FileCompiler_1.default.compileProducer(producer, source);
|
|
40
|
-
return {
|
|
41
|
-
header: columns.map(x => x.nameInProducer).join(delimiterChar),
|
|
42
|
-
records: lines
|
|
43
|
-
};
|
|
44
|
-
}
|
|
45
|
-
else {
|
|
46
|
-
return {
|
|
47
|
-
header: lines[0],
|
|
48
|
-
records: lines.slice(1)
|
|
49
|
-
};
|
|
50
|
-
}
|
|
51
|
-
};
|
|
52
|
-
this._extractHeader = (headerLine, delimiter, producer, discover) => {
|
|
53
|
-
var _a;
|
|
54
|
-
(0, Affirm_1.default)(headerLine, `Invalid CSV header line for producer "${producer.name}"`);
|
|
55
|
-
(0, Affirm_1.default)(delimiter, 'Invalid CSV delimiter');
|
|
56
|
-
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
57
|
-
const source = Environment_1.default.getSource(producer.source);
|
|
58
|
-
const columns = FileCompiler_1.default.compileProducer(producer, source);
|
|
59
|
-
const headerColumns = headerLine.split(delimiter).map(x => x.trim());
|
|
60
|
-
// If I'm discovering the file, then it means that the dimensions are not set, so I use the ones that I get from the file directly
|
|
61
|
-
if (discover)
|
|
62
|
-
columns.push(...headerColumns.map(x => ({ nameInProducer: x })));
|
|
63
|
-
const csvColumns = [];
|
|
64
|
-
for (const pColumn of columns) {
|
|
65
|
-
const columnKey = (_a = pColumn.aliasInProducer) !== null && _a !== void 0 ? _a : pColumn.nameInProducer;
|
|
66
|
-
const csvColumnIndex = headerColumns.findIndex(x => x === columnKey);
|
|
67
|
-
(0, Affirm_1.default)(csvColumnIndex > -1, `The column "${pColumn.nameInProducer}" (with key "${columnKey}") of producer "${producer.name}" doesn't exist in the underlying dataset.`);
|
|
68
|
-
csvColumns.push({ index: csvColumnIndex, name: columnKey, saveAs: pColumn.nameInProducer });
|
|
69
|
-
}
|
|
70
|
-
return csvColumns;
|
|
71
|
-
};
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
const ParseManager = new ParseManagerClass();
|
|
75
|
-
exports.default = ParseManager;
|
|
@@ -1,160 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
-
});
|
|
10
|
-
};
|
|
11
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
-
};
|
|
14
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
-
const Affirm_1 = __importDefault(require("../core/Affirm"));
|
|
16
|
-
const DriverFactory_1 = __importDefault(require("../drivers/DriverFactory"));
|
|
17
|
-
const DeploymentPlanner_1 = __importDefault(require("./deployment/DeploymentPlanner"));
|
|
18
|
-
const Environment_1 = __importDefault(require("./Environment"));
|
|
19
|
-
const FileCompiler_1 = __importDefault(require("./file/FileCompiler"));
|
|
20
|
-
const SQLCompiler_1 = __importDefault(require("./sql/SQLCompiler"));
|
|
21
|
-
const SQLUtils_1 = __importDefault(require("./sql/SQLUtils"));
|
|
22
|
-
const DatasetManager_1 = __importDefault(require("./dataset/DatasetManager"));
|
|
23
|
-
const Logger_1 = __importDefault(require("../helper/Logger"));
|
|
24
|
-
class ProducerEngineClass {
|
|
25
|
-
constructor() {
|
|
26
|
-
this.compile = (producer) => {
|
|
27
|
-
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
28
|
-
const source = Environment_1.default.getSource(producer.source);
|
|
29
|
-
(0, Affirm_1.default)(source, `No source found for producer "${producer.name}" with name "${producer.source}"`);
|
|
30
|
-
switch (source.engine) {
|
|
31
|
-
case 'aws-redshift':
|
|
32
|
-
case 'postgres': {
|
|
33
|
-
const sql = SQLCompiler_1.default.compileProducer(producer, source);
|
|
34
|
-
(0, Affirm_1.default)(sql, `Invalid SQL from compilation for producer "${producer.name}"`);
|
|
35
|
-
return sql;
|
|
36
|
-
}
|
|
37
|
-
case 'aws-s3': {
|
|
38
|
-
const columns = FileCompiler_1.default.compileProducer(producer, source);
|
|
39
|
-
(0, Affirm_1.default)(columns, `Invalid columns from compilation for producer "${producer.name}"`);
|
|
40
|
-
break;
|
|
41
|
-
}
|
|
42
|
-
default: throw new Error(`Invalid engine type "${source.engine}" for producer "${producer.name}": not implemented yet`);
|
|
43
|
-
}
|
|
44
|
-
};
|
|
45
|
-
this.deploy = (producer) => __awaiter(this, void 0, void 0, function* () {
|
|
46
|
-
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
47
|
-
const source = Environment_1.default.getSource(producer.source);
|
|
48
|
-
(0, Affirm_1.default)(source, `No source found for producer "${producer.name}" with name "${producer.source}"`);
|
|
49
|
-
const driver = yield DriverFactory_1.default.instantiateSource(source);
|
|
50
|
-
(0, Affirm_1.default)(driver, `No driver found for producer "${producer.name}" with driver type "${source.engine}"`);
|
|
51
|
-
const plan = DeploymentPlanner_1.default.planProducer(producer);
|
|
52
|
-
for (const planStep of plan) {
|
|
53
|
-
switch (planStep.type) {
|
|
54
|
-
case 'create-view': {
|
|
55
|
-
const internalSchema = Environment_1.default.get('REMORA_SCHEMA');
|
|
56
|
-
(0, Affirm_1.default)(internalSchema, `Missing "REMORA_SCHEMA" on project settings (needed due to "${producer.name}" wanting to create a view)`);
|
|
57
|
-
const sql = SQLCompiler_1.default.compileProducer(producer, source);
|
|
58
|
-
const vSQL = `CREATE OR REPLACE VIEW "${internalSchema}"."${SQLUtils_1.default.viewName(producer.name)}" AS ${sql}`;
|
|
59
|
-
yield driver.execute(vSQL);
|
|
60
|
-
break;
|
|
61
|
-
}
|
|
62
|
-
default: throw new Error(`Invalid execution consumer plan step type "${planStep.type}"`);
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
});
|
|
66
|
-
this.readFile = (producer, options) => __awaiter(this, void 0, void 0, function* () {
|
|
67
|
-
var _a;
|
|
68
|
-
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
69
|
-
(0, Affirm_1.default)(options, 'Invalid options');
|
|
70
|
-
if (options.readmode === 'lines')
|
|
71
|
-
(0, Affirm_1.default)(options.lines, 'Invalid lines');
|
|
72
|
-
const source = Environment_1.default.getSource(producer.source);
|
|
73
|
-
(0, Affirm_1.default)(source, `No source found for producer "${producer.name}" with name "${producer.source}"`);
|
|
74
|
-
const driver = yield DriverFactory_1.default.instantiateSource(source);
|
|
75
|
-
(0, Affirm_1.default)(driver, `No driver found for producer "${producer.name}" with driver type "${source.engine}"`);
|
|
76
|
-
const { settings: { fileKey, fileType, sheetName, hasHeaderRow } } = producer;
|
|
77
|
-
let dataset = DatasetManager_1.default.create(producer);
|
|
78
|
-
let lines = [];
|
|
79
|
-
switch (options.readmode) {
|
|
80
|
-
case 'lines':
|
|
81
|
-
lines = yield driver.readLinesInRange({ fileKey, fileType, options: { lineFrom: options.lines.from, lineTo: options.lines.to, sheetName, hasHeaderRow } });
|
|
82
|
-
break;
|
|
83
|
-
case 'all':
|
|
84
|
-
lines = yield driver.readAll({ fileKey, fileType, options: { sheetName, hasHeaderRow } });
|
|
85
|
-
break;
|
|
86
|
-
case 'download':
|
|
87
|
-
dataset = yield driver.download(dataset);
|
|
88
|
-
break;
|
|
89
|
-
}
|
|
90
|
-
switch ((_a = producer.settings.fileType) === null || _a === void 0 ? void 0 : _a.toUpperCase()) {
|
|
91
|
-
case 'CSV':
|
|
92
|
-
case 'TXT':
|
|
93
|
-
return { data: lines, dataset, dataType: 'lines-of-text' };
|
|
94
|
-
case 'XLS':
|
|
95
|
-
case 'XLSX':
|
|
96
|
-
return { data: lines, dataset, dataType: 'lines-of-text' };
|
|
97
|
-
case 'JSONL':
|
|
98
|
-
case 'JSON': {
|
|
99
|
-
if (lines.length === 1) {
|
|
100
|
-
// Attempt to handle cases where a single line might contain multiple JSON objects separated by newlines
|
|
101
|
-
// Or if the entire file content is a single JSON array stringified.
|
|
102
|
-
try {
|
|
103
|
-
const parsedAsArray = JSON.parse(lines[0]);
|
|
104
|
-
if (Array.isArray(parsedAsArray)) {
|
|
105
|
-
return { data: parsedAsArray, dataset, dataType: 'array-of-json' };
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
catch (error) {
|
|
109
|
-
// If parsing as array fails, proceed to split by newline
|
|
110
|
-
console.warn('Failed to parse single line as JSON array, splitting by newline:', error);
|
|
111
|
-
}
|
|
112
|
-
lines = lines[0].split('\\n');
|
|
113
|
-
}
|
|
114
|
-
const json = lines.filter(line => line.trim() !== '').map(x => JSON.parse(x));
|
|
115
|
-
return { data: json, dataset, dataType: 'array-of-json' };
|
|
116
|
-
}
|
|
117
|
-
case 'XML': {
|
|
118
|
-
// The driver's _readXmlLines method now returns an array of JSON strings.
|
|
119
|
-
// Each string needs to be parsed into a JSON object.
|
|
120
|
-
const json = lines.filter(line => line.trim() !== '').map(x => JSON.parse(x));
|
|
121
|
-
return { data: json, dataset, dataType: 'array-of-json' };
|
|
122
|
-
}
|
|
123
|
-
default:
|
|
124
|
-
throw new Error(`Invalid file type "${producer.settings.fileType}" for engine type "${source.engine}" for producer "${producer.name}": not supported`);
|
|
125
|
-
}
|
|
126
|
-
});
|
|
127
|
-
this.readSampleData = (producer_1, ...args_1) => __awaiter(this, [producer_1, ...args_1], void 0, function* (producer, sampleSize = 10, discover = false) {
|
|
128
|
-
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
129
|
-
(0, Affirm_1.default)(sampleSize > 0, 'Sample size must be greater than 0');
|
|
130
|
-
const source = Environment_1.default.getSource(producer.source);
|
|
131
|
-
(0, Affirm_1.default)(source, `No source found for producer "${producer.name}" with name "${producer.source}"`);
|
|
132
|
-
let dataset = DatasetManager_1.default.create(producer);
|
|
133
|
-
switch (source.engine) {
|
|
134
|
-
case 'aws-redshift': {
|
|
135
|
-
const sql = `SELECT * FROM "${source.authentication['schema']}"."${producer.settings.sqlTable}" LIMIT ${sampleSize}`;
|
|
136
|
-
(0, Affirm_1.default)(sql, `Invalid SQL from deployment compilation for producer "${producer.name}"`);
|
|
137
|
-
const driver = yield DriverFactory_1.default.instantiateSource(source);
|
|
138
|
-
(0, Affirm_1.default)(driver, `No driver found for producer "${producer.name}" with driver type "${source.engine}"`);
|
|
139
|
-
const res = yield driver.query(sql);
|
|
140
|
-
dataset = yield dataset.loadFromMemory(res.rows, producer, discover);
|
|
141
|
-
break;
|
|
142
|
-
}
|
|
143
|
-
case 'local':
|
|
144
|
-
case 'aws-s3': {
|
|
145
|
-
const fileData = yield this.readFile(producer, { readmode: 'lines', lines: { from: 0, to: sampleSize } });
|
|
146
|
-
dataset = yield dataset.loadFromMemory(fileData.data, producer, discover);
|
|
147
|
-
break;
|
|
148
|
-
}
|
|
149
|
-
default:
|
|
150
|
-
throw new Error(`Invalid engine type "${source.engine}" for producer "${producer.name}": not supported`);
|
|
151
|
-
}
|
|
152
|
-
const sampleData = [...yield dataset.readLines(sampleSize)];
|
|
153
|
-
dataset.destroy();
|
|
154
|
-
Logger_1.default.log(`Finished reading sample dataset:\n${dataset.printStats()}`);
|
|
155
|
-
return sampleData;
|
|
156
|
-
});
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
const ProducerEngine = new ProducerEngineClass();
|
|
160
|
-
exports.default = ProducerEngine;
|
|
@@ -1,110 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
-
});
|
|
10
|
-
};
|
|
11
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
-
};
|
|
14
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
-
const DSTE_1 = __importDefault(require("../core/dste/DSTE"));
|
|
16
|
-
const DatabaseEngine_1 = __importDefault(require("../database/DatabaseEngine"));
|
|
17
|
-
const DataframeManager_1 = __importDefault(require("./DataframeManager"));
|
|
18
|
-
class UsageDataManager {
|
|
19
|
-
getUsageDetails() {
|
|
20
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
21
|
-
const now = DSTE_1.default.now();
|
|
22
|
-
const from = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);
|
|
23
|
-
const prevMonthFrom = new Date(now.getTime() - 60 * 24 * 60 * 60 * 1000);
|
|
24
|
-
const yearAgo = new Date(now.getFullYear(), now.getMonth() - 11, 1);
|
|
25
|
-
const collection = 'usage';
|
|
26
|
-
// Aggregate status counts for current and previous month
|
|
27
|
-
const getStatusCounts = (start, end) => __awaiter(this, void 0, void 0, function* () {
|
|
28
|
-
const results = yield DatabaseEngine_1.default.aggregate(collection, [
|
|
29
|
-
{ $match: { startedAt: { $gte: start, $lte: end } } },
|
|
30
|
-
{ $group: { _id: '$status', count: { $sum: 1 } } }
|
|
31
|
-
]);
|
|
32
|
-
let success = 0, failed = 0, total = 0;
|
|
33
|
-
results.forEach((r) => {
|
|
34
|
-
total += r.count;
|
|
35
|
-
if (r._id === 'success')
|
|
36
|
-
success = r.count;
|
|
37
|
-
if (r._id === 'failed')
|
|
38
|
-
failed = r.count;
|
|
39
|
-
});
|
|
40
|
-
return { total, success, failed };
|
|
41
|
-
});
|
|
42
|
-
const statusesRequests = yield getStatusCounts(from, now);
|
|
43
|
-
const prevStatusesRequests = yield getStatusCounts(prevMonthFrom, from);
|
|
44
|
-
// Monthly success and fails for last 12 months
|
|
45
|
-
const monthlySuccessPipeline = [
|
|
46
|
-
{ $match: { status: 'success', startedAt: { $gte: yearAgo, $lte: now } } },
|
|
47
|
-
{ $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
48
|
-
{ $group: { _id: { year: '$year', month: '$month' }, count: { $sum: 1 } } },
|
|
49
|
-
{ $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
|
|
50
|
-
{ $sort: { x: 1 } }
|
|
51
|
-
];
|
|
52
|
-
const monthlyFailsPipeline = [
|
|
53
|
-
{ $match: { status: 'failed', startedAt: { $gte: yearAgo, $lte: now } } },
|
|
54
|
-
{ $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
55
|
-
{ $group: { _id: { year: '$year', month: '$month' }, count: { $sum: 1 } } },
|
|
56
|
-
{ $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
|
|
57
|
-
{ $sort: { x: 1 } }
|
|
58
|
-
];
|
|
59
|
-
const rawMonthlySuccess = yield DatabaseEngine_1.default.aggregate(collection, monthlySuccessPipeline);
|
|
60
|
-
const rawMonthlyFails = yield DatabaseEngine_1.default.aggregate(collection, monthlyFailsPipeline);
|
|
61
|
-
// Top lines per month for last 12 months
|
|
62
|
-
const topLinesPipeline = [
|
|
63
|
-
{ $match: { startedAt: { $gte: yearAgo, $lte: now } } },
|
|
64
|
-
{ $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
65
|
-
{ $group: { _id: { year: '$year', month: '$month' }, itemsCount: { $max: '$itemsCount' } } },
|
|
66
|
-
{ $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$itemsCount' } },
|
|
67
|
-
{ $sort: { x: 1 } }
|
|
68
|
-
];
|
|
69
|
-
const topLines = yield DatabaseEngine_1.default.aggregate(collection, topLinesPipeline);
|
|
70
|
-
// Top times per month for last 12 months
|
|
71
|
-
const topTimePipeline = [
|
|
72
|
-
{ $match: { startedAt: { $gte: yearAgo, $lte: now } } },
|
|
73
|
-
{ $addFields: { durationMs: { $subtract: ['$finishedAt', '$startedAt'] }, year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
74
|
-
{ $group: { _id: { year: '$year', month: '$month' }, maxDuration: { $max: '$durationMs' } } },
|
|
75
|
-
{ $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$maxDuration' } },
|
|
76
|
-
{ $sort: { x: 1 } }
|
|
77
|
-
];
|
|
78
|
-
const topTime = yield DatabaseEngine_1.default.aggregate(collection, topTimePipeline);
|
|
79
|
-
// Monthly consumers: for each consumer, per month count
|
|
80
|
-
const consumerPipeline = [
|
|
81
|
-
{ $match: { startedAt: { $gte: yearAgo, $lte: now } } },
|
|
82
|
-
{ $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
83
|
-
{ $group: { _id: { consumer: '$consumer', year: '$year', month: '$month' }, count: { $sum: 1 } } },
|
|
84
|
-
{ $project: { _id: 0, consumer: '$_id.consumer', x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
|
|
85
|
-
{ $sort: { consumer: 1, x: 1 } }
|
|
86
|
-
];
|
|
87
|
-
const consumersData = yield DatabaseEngine_1.default.aggregate(collection, consumerPipeline);
|
|
88
|
-
// transform to consumer array
|
|
89
|
-
const consumerMap = {};
|
|
90
|
-
consumersData.forEach((r) => {
|
|
91
|
-
consumerMap[r.consumer] = consumerMap[r.consumer] || [];
|
|
92
|
-
consumerMap[r.consumer].push({ x: r.x, y: r.y });
|
|
93
|
-
});
|
|
94
|
-
const consumers = Object.entries(consumerMap).map(([name, data]) => ({ name, data: DataframeManager_1.default.fill(data !== null && data !== void 0 ? data : [], yearAgo, now) }));
|
|
95
|
-
// Recent executions
|
|
96
|
-
const recentExecution = yield DatabaseEngine_1.default.query(collection, { startedAt: { $gte: from, $lte: now } }, { sort: { startedAt: -1 }, limit: 10 });
|
|
97
|
-
return {
|
|
98
|
-
statusesRequests,
|
|
99
|
-
prevStatusesRequests,
|
|
100
|
-
monthlySuccess: DataframeManager_1.default.fill(rawMonthlySuccess !== null && rawMonthlySuccess !== void 0 ? rawMonthlySuccess : [], yearAgo, now),
|
|
101
|
-
monthlyFails: DataframeManager_1.default.fill(rawMonthlyFails !== null && rawMonthlyFails !== void 0 ? rawMonthlyFails : [], yearAgo, now),
|
|
102
|
-
consumers: consumers,
|
|
103
|
-
topLine: DataframeManager_1.default.fill(topLines !== null && topLines !== void 0 ? topLines : [], yearAgo, now),
|
|
104
|
-
topTime: DataframeManager_1.default.fill(topTime !== null && topTime !== void 0 ? topTime : [], yearAgo, now),
|
|
105
|
-
recentExecution
|
|
106
|
-
};
|
|
107
|
-
});
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
exports.default = new UsageDataManager();
|
package/engines/UsageManager.js
DELETED
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
const Affirm_1 = __importDefault(require("../core/Affirm"));
|
|
7
|
-
const DSTE_1 = __importDefault(require("../core/dste/DSTE"));
|
|
8
|
-
const Helper_1 = __importDefault(require("../helper/Helper"));
|
|
9
|
-
const DatabaseEngine_1 = __importDefault(require("../database/DatabaseEngine"));
|
|
10
|
-
const Settings_1 = __importDefault(require("../helper/Settings"));
|
|
11
|
-
class UsageManagerClass {
|
|
12
|
-
constructor() {
|
|
13
|
-
/**
|
|
14
|
-
* TODO: I need to group the usage stats into a bucket daily. When and how I do it is still a question...
|
|
15
|
-
*/
|
|
16
|
-
this.getTodayBucketId = (consumer) => {
|
|
17
|
-
(0, Affirm_1.default)(consumer, `Invalid consumer`);
|
|
18
|
-
const now = DSTE_1.default.now();
|
|
19
|
-
return `${consumer.name}_${now.getUTCFullYear()}_${now.getUTCMonth()}_${now.getUTCDate()}`.toLowerCase();
|
|
20
|
-
};
|
|
21
|
-
this.startUsage = (consumer, user) => {
|
|
22
|
-
const newUsage = {
|
|
23
|
-
_id: Helper_1.default.uuid(),
|
|
24
|
-
consumer: consumer.name,
|
|
25
|
-
startedAt: DSTE_1.default.now(),
|
|
26
|
-
executedBy: { name: user.name, _id: user._id },
|
|
27
|
-
itemsCount: -1,
|
|
28
|
-
status: 'started',
|
|
29
|
-
_signature: ''
|
|
30
|
-
};
|
|
31
|
-
if (Helper_1.default.isDev())
|
|
32
|
-
return { usageId: newUsage._id, usage: Promise.resolve(newUsage) };
|
|
33
|
-
const updateRes = DatabaseEngine_1.default.upsert(Settings_1.default.db.collections.usage, newUsage._id, newUsage);
|
|
34
|
-
return { usageId: newUsage._id, usage: updateRes };
|
|
35
|
-
};
|
|
36
|
-
this.endUsage = (usageId, itemsCount) => {
|
|
37
|
-
const update = {
|
|
38
|
-
itemsCount: itemsCount,
|
|
39
|
-
status: 'success',
|
|
40
|
-
finishedAt: DSTE_1.default.now()
|
|
41
|
-
};
|
|
42
|
-
if (Helper_1.default.isDev())
|
|
43
|
-
return { usageId: null, usage: Promise.resolve(update) };
|
|
44
|
-
const updateRes = DatabaseEngine_1.default.upsert(Settings_1.default.db.collections.usage, usageId, update);
|
|
45
|
-
return { usageId: usageId, usage: updateRes };
|
|
46
|
-
};
|
|
47
|
-
this.failUsage = (usageId, error) => {
|
|
48
|
-
const update = {
|
|
49
|
-
status: 'failed',
|
|
50
|
-
error: error,
|
|
51
|
-
finishedAt: DSTE_1.default.now()
|
|
52
|
-
};
|
|
53
|
-
if (Helper_1.default.isDev())
|
|
54
|
-
return { usageId: null, usage: Promise.resolve(update) };
|
|
55
|
-
const updateRes = DatabaseEngine_1.default.upsert(Settings_1.default.db.collections.usage, usageId, update);
|
|
56
|
-
return { usageId: usageId, usage: updateRes };
|
|
57
|
-
};
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
const UsageManager = new UsageManagerClass();
|
|
61
|
-
exports.default = UsageManager;
|