@forzalabs/remora 0.1.3-nasco.3 → 0.1.5-nasco.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +1 -1
- package/definitions/json_schemas/consumer-schema.json +9 -1
- package/definitions/json_schemas/producer-schema.json +2 -1
- package/definitions/json_schemas/source-schema.json +14 -1
- package/documentation/README.md +1 -0
- package/documentation/default_resources/consumer.json +7 -7
- package/drivers/DeltaShareDriver.js +178 -0
- package/drivers/DriverFactory.js +6 -0
- package/drivers/DriverHelper.js +16 -1
- package/drivers/LocalDriver.js +1 -0
- package/drivers/S3Driver.js +1 -0
- package/engines/ai/DeveloperEngine.js +90 -1
- package/engines/consumer/ConsumerEngine.js +1 -1
- package/engines/consumer/PostProcessor.js +27 -18
- package/engines/dataset/Dataset.js +18 -7
- package/engines/dataset/DatasetManager.js +58 -12
- package/engines/dataset/DatasetRecord.js +17 -4
- package/engines/dataset/ParallelDataset.js +29 -7
- package/engines/execution/ExecutionEnvironment.js +13 -4
- package/engines/execution/ExecutionPlanner.js +2 -1
- package/engines/file/FileCompiler.js +2 -1
- package/engines/file/FileExporter.js +12 -3
- package/engines/parsing/ParseManager.js +7 -2
- package/engines/producer/ProducerEngine.js +4 -2
- package/engines/transform/JoinEngine.js +10 -6
- package/engines/transform/TransformationEngine.js +35 -3
- package/engines/transform/TypeCaster.js +20 -9
- package/engines/usage/UsageDataManager.js +110 -0
- package/engines/validation/Validator.js +0 -3
- package/package.json +3 -1
- package/workers/FilterWorker.js +3 -3
- package/workers/ProjectionWorker.js +3 -3
- package/workers/TransformWorker.js +3 -3
|
@@ -4,34 +4,45 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
7
|
+
const dayjs_1 = __importDefault(require("dayjs"));
|
|
8
|
+
const customParseFormat_1 = __importDefault(require("dayjs/plugin/customParseFormat"));
|
|
9
|
+
const utc_1 = __importDefault(require("dayjs/plugin/utc"));
|
|
10
|
+
dayjs_1.default.extend(customParseFormat_1.default);
|
|
11
|
+
dayjs_1.default.extend(utc_1.default);
|
|
7
12
|
class TypeCasterClass {
|
|
8
13
|
/**
|
|
9
14
|
* Casts the value to the requested type (only if needed)
|
|
15
|
+
* Optional format parameter currently supports:
|
|
16
|
+
* - Parsing dates (type 'date'/'datetime') from string with tokens: yyyy, mm, dd
|
|
17
|
+
* - Formatting dates when casting to string with same tokens
|
|
10
18
|
*/
|
|
11
|
-
cast(value, type) {
|
|
19
|
+
cast(value, type, format) {
|
|
12
20
|
if (!Algo_1.default.hasVal(value))
|
|
13
21
|
return value;
|
|
14
22
|
switch (type) {
|
|
15
23
|
case 'boolean': {
|
|
16
24
|
if (typeof value === 'boolean')
|
|
17
25
|
return value;
|
|
18
|
-
|
|
19
|
-
return Boolean(value);
|
|
26
|
+
return Boolean(value);
|
|
20
27
|
}
|
|
21
28
|
case 'datetime':
|
|
22
|
-
case 'date':
|
|
23
|
-
|
|
29
|
+
case 'date': {
|
|
30
|
+
let dateValue = null;
|
|
31
|
+
if (format && typeof value === 'string')
|
|
32
|
+
dateValue = dayjs_1.default.utc(value, format, true).toDate();
|
|
33
|
+
else
|
|
34
|
+
dateValue = new Date(value);
|
|
35
|
+
return dateValue.toISOString();
|
|
36
|
+
}
|
|
24
37
|
case 'number': {
|
|
25
38
|
if (typeof value === 'number')
|
|
26
39
|
return value;
|
|
27
|
-
|
|
28
|
-
return Number(value);
|
|
40
|
+
return Number(value);
|
|
29
41
|
}
|
|
30
42
|
case 'string': {
|
|
31
43
|
if (typeof value === 'string')
|
|
32
44
|
return value;
|
|
33
|
-
|
|
34
|
-
return String(value);
|
|
45
|
+
return String(value);
|
|
35
46
|
}
|
|
36
47
|
}
|
|
37
48
|
}
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
const DSTE_1 = __importDefault(require("../../core/dste/DSTE"));
|
|
16
|
+
const DatabaseEngine_1 = __importDefault(require("../../database/DatabaseEngine"));
|
|
17
|
+
const DataframeManager_1 = __importDefault(require("./DataframeManager"));
|
|
18
|
+
class UsageDataManager {
|
|
19
|
+
getUsageDetails() {
|
|
20
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
21
|
+
const now = DSTE_1.default.now();
|
|
22
|
+
const from = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);
|
|
23
|
+
const prevMonthFrom = new Date(now.getTime() - 60 * 24 * 60 * 60 * 1000);
|
|
24
|
+
const yearAgo = new Date(now.getFullYear(), now.getMonth() - 11, 1);
|
|
25
|
+
const collection = 'usage';
|
|
26
|
+
// Aggregate status counts for current and previous month
|
|
27
|
+
const getStatusCounts = (start, end) => __awaiter(this, void 0, void 0, function* () {
|
|
28
|
+
const results = yield DatabaseEngine_1.default.aggregate(collection, [
|
|
29
|
+
{ $match: { startedAt: { $gte: start, $lte: end } } },
|
|
30
|
+
{ $group: { _id: '$status', count: { $sum: 1 } } }
|
|
31
|
+
]);
|
|
32
|
+
let success = 0, failed = 0, total = 0;
|
|
33
|
+
results.forEach(r => {
|
|
34
|
+
total += r.count;
|
|
35
|
+
if (r._id === 'success')
|
|
36
|
+
success = r.count;
|
|
37
|
+
if (r._id === 'failed')
|
|
38
|
+
failed = r.count;
|
|
39
|
+
});
|
|
40
|
+
return { total, success, failed };
|
|
41
|
+
});
|
|
42
|
+
const statusesRequests = yield getStatusCounts(from, now);
|
|
43
|
+
const prevStatusesRequests = yield getStatusCounts(prevMonthFrom, from);
|
|
44
|
+
// Monthly success and fails for last 12 months
|
|
45
|
+
const monthlySuccessPipeline = [
|
|
46
|
+
{ $match: { status: 'success', startedAt: { $gte: yearAgo, $lte: now } } },
|
|
47
|
+
{ $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
48
|
+
{ $group: { _id: { year: '$year', month: '$month' }, count: { $sum: 1 } } },
|
|
49
|
+
{ $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
|
|
50
|
+
{ $sort: { x: 1 } }
|
|
51
|
+
];
|
|
52
|
+
const monthlyFailsPipeline = [
|
|
53
|
+
{ $match: { status: 'failed', startedAt: { $gte: yearAgo, $lte: now } } },
|
|
54
|
+
{ $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
55
|
+
{ $group: { _id: { year: '$year', month: '$month' }, count: { $sum: 1 } } },
|
|
56
|
+
{ $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
|
|
57
|
+
{ $sort: { x: 1 } }
|
|
58
|
+
];
|
|
59
|
+
const rawMonthlySuccess = yield DatabaseEngine_1.default.aggregate(collection, monthlySuccessPipeline);
|
|
60
|
+
const rawMonthlyFails = yield DatabaseEngine_1.default.aggregate(collection, monthlyFailsPipeline);
|
|
61
|
+
// Top lines per month for last 12 months
|
|
62
|
+
const topLinesPipeline = [
|
|
63
|
+
{ $match: { startedAt: { $gte: yearAgo, $lte: now } } },
|
|
64
|
+
{ $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
65
|
+
{ $group: { _id: { year: '$year', month: '$month' }, itemsCount: { $max: '$itemsCount' } } },
|
|
66
|
+
{ $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$itemsCount' } },
|
|
67
|
+
{ $sort: { x: 1 } }
|
|
68
|
+
];
|
|
69
|
+
const topLines = yield DatabaseEngine_1.default.aggregate(collection, topLinesPipeline);
|
|
70
|
+
// Top times per month for last 12 months
|
|
71
|
+
const topTimePipeline = [
|
|
72
|
+
{ $match: { startedAt: { $gte: yearAgo, $lte: now } } },
|
|
73
|
+
{ $addFields: { durationMs: { $subtract: ['$finishedAt', '$startedAt'] }, year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
74
|
+
{ $group: { _id: { year: '$year', month: '$month' }, maxDuration: { $max: '$durationMs' } } },
|
|
75
|
+
{ $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$maxDuration' } },
|
|
76
|
+
{ $sort: { x: 1 } }
|
|
77
|
+
];
|
|
78
|
+
const topTime = yield DatabaseEngine_1.default.aggregate(collection, topTimePipeline);
|
|
79
|
+
// Monthly consumers: for each consumer, per month count
|
|
80
|
+
const consumerPipeline = [
|
|
81
|
+
{ $match: { startedAt: { $gte: yearAgo, $lte: now } } },
|
|
82
|
+
{ $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
83
|
+
{ $group: { _id: { consumer: '$consumer', year: '$year', month: '$month' }, count: { $sum: 1 } } },
|
|
84
|
+
{ $project: { _id: 0, consumer: '$_id.consumer', x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
|
|
85
|
+
{ $sort: { consumer: 1, x: 1 } }
|
|
86
|
+
];
|
|
87
|
+
const consumersData = yield DatabaseEngine_1.default.aggregate(collection, consumerPipeline);
|
|
88
|
+
// transform to consumer array
|
|
89
|
+
const consumerMap = {};
|
|
90
|
+
consumersData.forEach(r => {
|
|
91
|
+
consumerMap[r.consumer] = consumerMap[r.consumer] || [];
|
|
92
|
+
consumerMap[r.consumer].push({ x: r.x, y: r.y });
|
|
93
|
+
});
|
|
94
|
+
const consumers = Object.entries(consumerMap).map(([name, data]) => ({ name, data: DataframeManager_1.default.fill(data !== null && data !== void 0 ? data : [], yearAgo, now) }));
|
|
95
|
+
// Recent executions
|
|
96
|
+
const recentExecution = yield DatabaseEngine_1.default.query(collection, { startedAt: { $gte: from, $lte: now } }, { sort: { startedAt: -1 }, limit: 10 });
|
|
97
|
+
return {
|
|
98
|
+
statusesRequests,
|
|
99
|
+
prevStatusesRequests,
|
|
100
|
+
monthlySuccess: DataframeManager_1.default.fill(rawMonthlySuccess !== null && rawMonthlySuccess !== void 0 ? rawMonthlySuccess : [], yearAgo, now),
|
|
101
|
+
monthlyFails: DataframeManager_1.default.fill(rawMonthlyFails !== null && rawMonthlyFails !== void 0 ? rawMonthlyFails : [], yearAgo, now),
|
|
102
|
+
consumers: consumers,
|
|
103
|
+
topLine: DataframeManager_1.default.fill(topLines !== null && topLines !== void 0 ? topLines : [], yearAgo, now),
|
|
104
|
+
topTime: DataframeManager_1.default.fill(topTime !== null && topTime !== void 0 ? topTime : [], yearAgo, now),
|
|
105
|
+
recentExecution
|
|
106
|
+
};
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
exports.default = new UsageDataManager();
|
|
@@ -154,9 +154,6 @@ class ValidatorClass {
|
|
|
154
154
|
else
|
|
155
155
|
trxToValidate.push(field.transform);
|
|
156
156
|
for (const trans of trxToValidate) {
|
|
157
|
-
const trxKeys = Object.keys(trans);
|
|
158
|
-
if (trxKeys.length !== 1)
|
|
159
|
-
errors.push(`There can only be 1 transformation type in your transformation pipeline. Field "${field.key}" got ${trxKeys.length}.`);
|
|
160
157
|
if ('combine_fields' in trans) {
|
|
161
158
|
const { combine_fields } = trans;
|
|
162
159
|
if (!combine_fields.fields || combine_fields.fields.length === 0)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@forzalabs/remora",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.5-nasco.3",
|
|
4
4
|
"description": "A powerful CLI tool for seamless data translation.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"private": false,
|
|
@@ -43,9 +43,11 @@
|
|
|
43
43
|
"chalk": "^4.1.2",
|
|
44
44
|
"commander": "^10.0.0",
|
|
45
45
|
"cross-env": "^7.0.3",
|
|
46
|
+
"dayjs": "^1.11.13",
|
|
46
47
|
"dotenv": "^16.0.3",
|
|
47
48
|
"fast-xml-parser": "^5.2.3",
|
|
48
49
|
"fs-extra": "^11.1.0",
|
|
50
|
+
"hyparquet": "^1.17.4",
|
|
49
51
|
"inquirer": "^8.2.5",
|
|
50
52
|
"json-schema": "^0.4.0",
|
|
51
53
|
"jsonwebtoken": "^9.0.2",
|
package/workers/FilterWorker.js
CHANGED
|
@@ -22,7 +22,7 @@ dotenv_1.default.configDotenv();
|
|
|
22
22
|
const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
|
|
23
23
|
Environment_1.default.load('./');
|
|
24
24
|
try {
|
|
25
|
-
const { datasetName, fromLine, toLine, workerId, datasetFile, datasetDimensions, datasetDelimiter, filterData: filter } = workerData;
|
|
25
|
+
const { datasetName, fromLine, toLine, workerId, executionId, datasetFile, datasetDimensions, datasetDelimiter, filterData: filter } = workerData;
|
|
26
26
|
Affirm_1.default.hasValue(fromLine, `Invalid from line`);
|
|
27
27
|
Affirm_1.default.hasValue(toLine, `Invalid to line`);
|
|
28
28
|
(0, Affirm_1.default)(datasetName, `Invalid dataset name`);
|
|
@@ -31,9 +31,9 @@ const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
31
31
|
(0, Affirm_1.default)(datasetDimensions, `Invalid dataset dimensions`);
|
|
32
32
|
(0, Affirm_1.default)(filter, `Invalid filter data`);
|
|
33
33
|
(0, Affirm_1.default)(datasetDelimiter, `Invalid dataset delimter`);
|
|
34
|
-
const dataset = new Dataset_1.default(datasetName, datasetFile);
|
|
34
|
+
const dataset = new Dataset_1.default(datasetName, datasetFile, undefined, executionId);
|
|
35
35
|
dataset
|
|
36
|
-
.
|
|
36
|
+
.setDimensions(datasetDimensions)
|
|
37
37
|
.setDelimiter(datasetDelimiter);
|
|
38
38
|
const outputPath = dataset['_tempPath'] + workerId;
|
|
39
39
|
const rules = filter.rules.map(x => x.rule);
|
|
@@ -22,7 +22,7 @@ dotenv_1.default.configDotenv();
|
|
|
22
22
|
const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
|
|
23
23
|
Environment_1.default.load('./');
|
|
24
24
|
try {
|
|
25
|
-
const { datasetName, fromLine, toLine, workerId, datasetFile, datasetDimensions, datasetDelimiter, projectionData } = workerData;
|
|
25
|
+
const { datasetName, fromLine, toLine, workerId, datasetFile, executionId, datasetDimensions, datasetDelimiter, projectionData } = workerData;
|
|
26
26
|
Affirm_1.default.hasValue(fromLine, `Invalid from line`);
|
|
27
27
|
Affirm_1.default.hasValue(toLine, `Invalid to line`);
|
|
28
28
|
(0, Affirm_1.default)(datasetName, `Invalid dataset name`);
|
|
@@ -33,9 +33,9 @@ const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
33
33
|
(0, Affirm_1.default)(datasetDelimiter, `Invalid dataset delimter`);
|
|
34
34
|
const consumer = Environment_1.default.getConsumer(projectionData.consumerName);
|
|
35
35
|
(0, Affirm_1.default)(consumer, `Wrong consumer name sent to projection worker: "${projectionData.consumerName}" not found.`);
|
|
36
|
-
const dataset = new Dataset_1.default(datasetName, datasetFile);
|
|
36
|
+
const dataset = new Dataset_1.default(datasetName, datasetFile, undefined, executionId);
|
|
37
37
|
dataset
|
|
38
|
-
.
|
|
38
|
+
.setDimensions(datasetDimensions)
|
|
39
39
|
.setDelimiter(datasetDelimiter);
|
|
40
40
|
const outputPath = dataset['_tempPath'] + workerId;
|
|
41
41
|
yield PostProcessor_1.default.doProjection(consumer, dataset, { outputPath, range: { fromLine, toLine } });
|
|
@@ -22,7 +22,7 @@ dotenv_1.default.configDotenv();
|
|
|
22
22
|
const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
|
|
23
23
|
Environment_1.default.load('./');
|
|
24
24
|
try {
|
|
25
|
-
const { datasetName, fromLine, toLine, workerId, datasetFile, datasetDimensions, datasetDelimiter, transformData } = workerData;
|
|
25
|
+
const { datasetName, fromLine, toLine, workerId, executionId, datasetFile, datasetDimensions, datasetDelimiter, transformData } = workerData;
|
|
26
26
|
Affirm_1.default.hasValue(fromLine, `Invalid from line`);
|
|
27
27
|
Affirm_1.default.hasValue(toLine, `Invalid to line`);
|
|
28
28
|
(0, Affirm_1.default)(datasetName, `Invalid dataset name`);
|
|
@@ -33,9 +33,9 @@ const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
33
33
|
(0, Affirm_1.default)(datasetDelimiter, `Invalid dataset delimter`);
|
|
34
34
|
const consumer = Environment_1.default.getConsumer(transformData.consumerName);
|
|
35
35
|
(0, Affirm_1.default)(consumer, `Wrong consumer name sent to projection worker: "${transformData.consumerName}" not found.`);
|
|
36
|
-
const dataset = new Dataset_1.default(datasetName, datasetFile);
|
|
36
|
+
const dataset = new Dataset_1.default(datasetName, datasetFile, undefined, executionId);
|
|
37
37
|
dataset
|
|
38
|
-
.
|
|
38
|
+
.setDimensions(datasetDimensions)
|
|
39
39
|
.setDelimiter(datasetDelimiter);
|
|
40
40
|
const outputPath = dataset['_tempPath'] + workerId;
|
|
41
41
|
yield TransformationEngine_1.default.apply(consumer, dataset, { outputPath, range: { fromLine, toLine } });
|