@forzalabs/remora 0.1.4-nasco.3 → 0.1.6-nasco.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +1 -1
- package/definitions/json_schemas/consumer-schema.json +6 -2
- package/definitions/json_schemas/producer-schema.json +2 -1
- package/definitions/json_schemas/source-schema.json +14 -1
- package/documentation/README.md +1 -0
- package/documentation/default_resources/consumer.json +7 -7
- package/drivers/DeltaShareDriver.js +178 -0
- package/drivers/DriverFactory.js +6 -0
- package/drivers/DriverHelper.js +15 -0
- package/engines/ai/DeveloperEngine.js +90 -1
- package/engines/consumer/ConsumerEngine.js +1 -1
- package/engines/consumer/PostProcessor.js +22 -15
- package/engines/dataset/Dataset.js +22 -8
- package/engines/dataset/DatasetManager.js +58 -12
- package/engines/dataset/DatasetRecord.js +17 -4
- package/engines/dataset/ParallelDataset.js +16 -6
- package/engines/execution/ExecutionEnvironment.js +13 -4
- package/engines/execution/ExecutionPlanner.js +2 -1
- package/engines/file/FileCompiler.js +2 -1
- package/engines/file/FileExporter.js +12 -3
- package/engines/parsing/ParseManager.js +7 -2
- package/engines/producer/ProducerEngine.js +4 -2
- package/engines/transform/JoinEngine.js +10 -6
- package/engines/transform/TransformationEngine.js +31 -2
- package/engines/transform/TypeCaster.js +12 -4
- package/engines/usage/UsageDataManager.js +110 -0
- package/package.json +2 -1
- package/workers/FilterWorker.js +3 -3
- package/workers/ProjectionWorker.js +3 -3
- package/workers/TransformWorker.js +3 -3
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
const DSTE_1 = __importDefault(require("../../core/dste/DSTE"));
|
|
16
|
+
const DatabaseEngine_1 = __importDefault(require("../../database/DatabaseEngine"));
|
|
17
|
+
const DataframeManager_1 = __importDefault(require("./DataframeManager"));
|
|
18
|
+
class UsageDataManager {
|
|
19
|
+
getUsageDetails() {
|
|
20
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
21
|
+
const now = DSTE_1.default.now();
|
|
22
|
+
const from = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);
|
|
23
|
+
const prevMonthFrom = new Date(now.getTime() - 60 * 24 * 60 * 60 * 1000);
|
|
24
|
+
const yearAgo = new Date(now.getFullYear(), now.getMonth() - 11, 1);
|
|
25
|
+
const collection = 'usage';
|
|
26
|
+
// Aggregate status counts for current and previous month
|
|
27
|
+
const getStatusCounts = (start, end) => __awaiter(this, void 0, void 0, function* () {
|
|
28
|
+
const results = yield DatabaseEngine_1.default.aggregate(collection, [
|
|
29
|
+
{ $match: { startedAt: { $gte: start, $lte: end } } },
|
|
30
|
+
{ $group: { _id: '$status', count: { $sum: 1 } } }
|
|
31
|
+
]);
|
|
32
|
+
let success = 0, failed = 0, total = 0;
|
|
33
|
+
results.forEach(r => {
|
|
34
|
+
total += r.count;
|
|
35
|
+
if (r._id === 'success')
|
|
36
|
+
success = r.count;
|
|
37
|
+
if (r._id === 'failed')
|
|
38
|
+
failed = r.count;
|
|
39
|
+
});
|
|
40
|
+
return { total, success, failed };
|
|
41
|
+
});
|
|
42
|
+
const statusesRequests = yield getStatusCounts(from, now);
|
|
43
|
+
const prevStatusesRequests = yield getStatusCounts(prevMonthFrom, from);
|
|
44
|
+
// Monthly success and fails for last 12 months
|
|
45
|
+
const monthlySuccessPipeline = [
|
|
46
|
+
{ $match: { status: 'success', startedAt: { $gte: yearAgo, $lte: now } } },
|
|
47
|
+
{ $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
48
|
+
{ $group: { _id: { year: '$year', month: '$month' }, count: { $sum: 1 } } },
|
|
49
|
+
{ $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
|
|
50
|
+
{ $sort: { x: 1 } }
|
|
51
|
+
];
|
|
52
|
+
const monthlyFailsPipeline = [
|
|
53
|
+
{ $match: { status: 'failed', startedAt: { $gte: yearAgo, $lte: now } } },
|
|
54
|
+
{ $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
55
|
+
{ $group: { _id: { year: '$year', month: '$month' }, count: { $sum: 1 } } },
|
|
56
|
+
{ $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
|
|
57
|
+
{ $sort: { x: 1 } }
|
|
58
|
+
];
|
|
59
|
+
const rawMonthlySuccess = yield DatabaseEngine_1.default.aggregate(collection, monthlySuccessPipeline);
|
|
60
|
+
const rawMonthlyFails = yield DatabaseEngine_1.default.aggregate(collection, monthlyFailsPipeline);
|
|
61
|
+
// Top lines per month for last 12 months
|
|
62
|
+
const topLinesPipeline = [
|
|
63
|
+
{ $match: { startedAt: { $gte: yearAgo, $lte: now } } },
|
|
64
|
+
{ $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
65
|
+
{ $group: { _id: { year: '$year', month: '$month' }, itemsCount: { $max: '$itemsCount' } } },
|
|
66
|
+
{ $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$itemsCount' } },
|
|
67
|
+
{ $sort: { x: 1 } }
|
|
68
|
+
];
|
|
69
|
+
const topLines = yield DatabaseEngine_1.default.aggregate(collection, topLinesPipeline);
|
|
70
|
+
// Top times per month for last 12 months
|
|
71
|
+
const topTimePipeline = [
|
|
72
|
+
{ $match: { startedAt: { $gte: yearAgo, $lte: now } } },
|
|
73
|
+
{ $addFields: { durationMs: { $subtract: ['$finishedAt', '$startedAt'] }, year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
74
|
+
{ $group: { _id: { year: '$year', month: '$month' }, maxDuration: { $max: '$durationMs' } } },
|
|
75
|
+
{ $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$maxDuration' } },
|
|
76
|
+
{ $sort: { x: 1 } }
|
|
77
|
+
];
|
|
78
|
+
const topTime = yield DatabaseEngine_1.default.aggregate(collection, topTimePipeline);
|
|
79
|
+
// Monthly consumers: for each consumer, per month count
|
|
80
|
+
const consumerPipeline = [
|
|
81
|
+
{ $match: { startedAt: { $gte: yearAgo, $lte: now } } },
|
|
82
|
+
{ $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
83
|
+
{ $group: { _id: { consumer: '$consumer', year: '$year', month: '$month' }, count: { $sum: 1 } } },
|
|
84
|
+
{ $project: { _id: 0, consumer: '$_id.consumer', x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
|
|
85
|
+
{ $sort: { consumer: 1, x: 1 } }
|
|
86
|
+
];
|
|
87
|
+
const consumersData = yield DatabaseEngine_1.default.aggregate(collection, consumerPipeline);
|
|
88
|
+
// transform to consumer array
|
|
89
|
+
const consumerMap = {};
|
|
90
|
+
consumersData.forEach(r => {
|
|
91
|
+
consumerMap[r.consumer] = consumerMap[r.consumer] || [];
|
|
92
|
+
consumerMap[r.consumer].push({ x: r.x, y: r.y });
|
|
93
|
+
});
|
|
94
|
+
const consumers = Object.entries(consumerMap).map(([name, data]) => ({ name, data: DataframeManager_1.default.fill(data !== null && data !== void 0 ? data : [], yearAgo, now) }));
|
|
95
|
+
// Recent executions
|
|
96
|
+
const recentExecution = yield DatabaseEngine_1.default.query(collection, { startedAt: { $gte: from, $lte: now } }, { sort: { startedAt: -1 }, limit: 10 });
|
|
97
|
+
return {
|
|
98
|
+
statusesRequests,
|
|
99
|
+
prevStatusesRequests,
|
|
100
|
+
monthlySuccess: DataframeManager_1.default.fill(rawMonthlySuccess !== null && rawMonthlySuccess !== void 0 ? rawMonthlySuccess : [], yearAgo, now),
|
|
101
|
+
monthlyFails: DataframeManager_1.default.fill(rawMonthlyFails !== null && rawMonthlyFails !== void 0 ? rawMonthlyFails : [], yearAgo, now),
|
|
102
|
+
consumers: consumers,
|
|
103
|
+
topLine: DataframeManager_1.default.fill(topLines !== null && topLines !== void 0 ? topLines : [], yearAgo, now),
|
|
104
|
+
topTime: DataframeManager_1.default.fill(topTime !== null && topTime !== void 0 ? topTime : [], yearAgo, now),
|
|
105
|
+
recentExecution
|
|
106
|
+
};
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
exports.default = new UsageDataManager();
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@forzalabs/remora",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.6-nasco.3",
|
|
4
4
|
"description": "A powerful CLI tool for seamless data translation.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"private": false,
|
|
@@ -47,6 +47,7 @@
|
|
|
47
47
|
"dotenv": "^16.0.3",
|
|
48
48
|
"fast-xml-parser": "^5.2.3",
|
|
49
49
|
"fs-extra": "^11.1.0",
|
|
50
|
+
"hyparquet": "^1.17.4",
|
|
50
51
|
"inquirer": "^8.2.5",
|
|
51
52
|
"json-schema": "^0.4.0",
|
|
52
53
|
"jsonwebtoken": "^9.0.2",
|
package/workers/FilterWorker.js
CHANGED
|
@@ -22,7 +22,7 @@ dotenv_1.default.configDotenv();
|
|
|
22
22
|
const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
|
|
23
23
|
Environment_1.default.load('./');
|
|
24
24
|
try {
|
|
25
|
-
const { datasetName, fromLine, toLine, workerId, datasetFile, datasetDimensions, datasetDelimiter, filterData: filter } = workerData;
|
|
25
|
+
const { datasetName, fromLine, toLine, workerId, executionId, datasetFile, datasetDimensions, datasetDelimiter, filterData: filter } = workerData;
|
|
26
26
|
Affirm_1.default.hasValue(fromLine, `Invalid from line`);
|
|
27
27
|
Affirm_1.default.hasValue(toLine, `Invalid to line`);
|
|
28
28
|
(0, Affirm_1.default)(datasetName, `Invalid dataset name`);
|
|
@@ -31,9 +31,9 @@ const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
31
31
|
(0, Affirm_1.default)(datasetDimensions, `Invalid dataset dimensions`);
|
|
32
32
|
(0, Affirm_1.default)(filter, `Invalid filter data`);
|
|
33
33
|
(0, Affirm_1.default)(datasetDelimiter, `Invalid dataset delimter`);
|
|
34
|
-
const dataset = new Dataset_1.default(datasetName, datasetFile);
|
|
34
|
+
const dataset = new Dataset_1.default(datasetName, datasetFile, undefined, executionId);
|
|
35
35
|
dataset
|
|
36
|
-
.
|
|
36
|
+
.setDimensions(datasetDimensions)
|
|
37
37
|
.setDelimiter(datasetDelimiter);
|
|
38
38
|
const outputPath = dataset['_tempPath'] + workerId;
|
|
39
39
|
const rules = filter.rules.map(x => x.rule);
|
|
@@ -22,7 +22,7 @@ dotenv_1.default.configDotenv();
|
|
|
22
22
|
const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
|
|
23
23
|
Environment_1.default.load('./');
|
|
24
24
|
try {
|
|
25
|
-
const { datasetName, fromLine, toLine, workerId, datasetFile, datasetDimensions, datasetDelimiter, projectionData } = workerData;
|
|
25
|
+
const { datasetName, fromLine, toLine, workerId, datasetFile, executionId, datasetDimensions, datasetDelimiter, projectionData } = workerData;
|
|
26
26
|
Affirm_1.default.hasValue(fromLine, `Invalid from line`);
|
|
27
27
|
Affirm_1.default.hasValue(toLine, `Invalid to line`);
|
|
28
28
|
(0, Affirm_1.default)(datasetName, `Invalid dataset name`);
|
|
@@ -33,9 +33,9 @@ const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
33
33
|
(0, Affirm_1.default)(datasetDelimiter, `Invalid dataset delimter`);
|
|
34
34
|
const consumer = Environment_1.default.getConsumer(projectionData.consumerName);
|
|
35
35
|
(0, Affirm_1.default)(consumer, `Wrong consumer name sent to projection worker: "${projectionData.consumerName}" not found.`);
|
|
36
|
-
const dataset = new Dataset_1.default(datasetName, datasetFile);
|
|
36
|
+
const dataset = new Dataset_1.default(datasetName, datasetFile, undefined, executionId);
|
|
37
37
|
dataset
|
|
38
|
-
.
|
|
38
|
+
.setDimensions(datasetDimensions)
|
|
39
39
|
.setDelimiter(datasetDelimiter);
|
|
40
40
|
const outputPath = dataset['_tempPath'] + workerId;
|
|
41
41
|
yield PostProcessor_1.default.doProjection(consumer, dataset, { outputPath, range: { fromLine, toLine } });
|
|
@@ -22,7 +22,7 @@ dotenv_1.default.configDotenv();
|
|
|
22
22
|
const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
|
|
23
23
|
Environment_1.default.load('./');
|
|
24
24
|
try {
|
|
25
|
-
const { datasetName, fromLine, toLine, workerId, datasetFile, datasetDimensions, datasetDelimiter, transformData } = workerData;
|
|
25
|
+
const { datasetName, fromLine, toLine, workerId, executionId, datasetFile, datasetDimensions, datasetDelimiter, transformData } = workerData;
|
|
26
26
|
Affirm_1.default.hasValue(fromLine, `Invalid from line`);
|
|
27
27
|
Affirm_1.default.hasValue(toLine, `Invalid to line`);
|
|
28
28
|
(0, Affirm_1.default)(datasetName, `Invalid dataset name`);
|
|
@@ -33,9 +33,9 @@ const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
33
33
|
(0, Affirm_1.default)(datasetDelimiter, `Invalid dataset delimter`);
|
|
34
34
|
const consumer = Environment_1.default.getConsumer(transformData.consumerName);
|
|
35
35
|
(0, Affirm_1.default)(consumer, `Wrong consumer name sent to projection worker: "${transformData.consumerName}" not found.`);
|
|
36
|
-
const dataset = new Dataset_1.default(datasetName, datasetFile);
|
|
36
|
+
const dataset = new Dataset_1.default(datasetName, datasetFile, undefined, executionId);
|
|
37
37
|
dataset
|
|
38
|
-
.
|
|
38
|
+
.setDimensions(datasetDimensions)
|
|
39
39
|
.setDelimiter(datasetDelimiter);
|
|
40
40
|
const outputPath = dataset['_tempPath'] + workerId;
|
|
41
41
|
yield TransformationEngine_1.default.apply(consumer, dataset, { outputPath, range: { fromLine, toLine } });
|