@forzalabs/remora 0.1.3-nasco.3 → 0.1.5-nasco.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/Constants.js +1 -1
  2. package/definitions/json_schemas/consumer-schema.json +9 -1
  3. package/definitions/json_schemas/producer-schema.json +2 -1
  4. package/definitions/json_schemas/source-schema.json +14 -1
  5. package/documentation/README.md +1 -0
  6. package/documentation/default_resources/consumer.json +7 -7
  7. package/drivers/DeltaShareDriver.js +178 -0
  8. package/drivers/DriverFactory.js +6 -0
  9. package/drivers/DriverHelper.js +16 -1
  10. package/drivers/LocalDriver.js +1 -0
  11. package/drivers/S3Driver.js +1 -0
  12. package/engines/ai/DeveloperEngine.js +90 -1
  13. package/engines/consumer/ConsumerEngine.js +1 -1
  14. package/engines/consumer/PostProcessor.js +27 -18
  15. package/engines/dataset/Dataset.js +18 -7
  16. package/engines/dataset/DatasetManager.js +58 -12
  17. package/engines/dataset/DatasetRecord.js +17 -4
  18. package/engines/dataset/ParallelDataset.js +29 -7
  19. package/engines/execution/ExecutionEnvironment.js +13 -4
  20. package/engines/execution/ExecutionPlanner.js +2 -1
  21. package/engines/file/FileCompiler.js +2 -1
  22. package/engines/file/FileExporter.js +12 -3
  23. package/engines/parsing/ParseManager.js +7 -2
  24. package/engines/producer/ProducerEngine.js +4 -2
  25. package/engines/transform/JoinEngine.js +10 -6
  26. package/engines/transform/TransformationEngine.js +35 -3
  27. package/engines/transform/TypeCaster.js +20 -9
  28. package/engines/usage/UsageDataManager.js +110 -0
  29. package/engines/validation/Validator.js +0 -3
  30. package/package.json +3 -1
  31. package/workers/FilterWorker.js +3 -3
  32. package/workers/ProjectionWorker.js +3 -3
  33. package/workers/TransformWorker.js +3 -3
@@ -4,34 +4,45 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  const Algo_1 = __importDefault(require("../../core/Algo"));
7
+ const dayjs_1 = __importDefault(require("dayjs"));
8
+ const customParseFormat_1 = __importDefault(require("dayjs/plugin/customParseFormat"));
9
+ const utc_1 = __importDefault(require("dayjs/plugin/utc"));
10
+ dayjs_1.default.extend(customParseFormat_1.default);
11
+ dayjs_1.default.extend(utc_1.default);
7
12
  class TypeCasterClass {
8
13
  /**
9
14
  * Casts the value to the requested type (only if needed)
15
+ * Optional format parameter currently supports:
16
+ * - Parsing dates (type 'date'/'datetime') from string with tokens: yyyy, mm, dd
17
+ * - Formatting dates when casting to string with same tokens
10
18
  */
11
- cast(value, type) {
19
+ cast(value, type, format) {
12
20
  if (!Algo_1.default.hasVal(value))
13
21
  return value;
14
22
  switch (type) {
15
23
  case 'boolean': {
16
24
  if (typeof value === 'boolean')
17
25
  return value;
18
- else
19
- return Boolean(value);
26
+ return Boolean(value);
20
27
  }
21
28
  case 'datetime':
22
- case 'date':
23
- return new Date(value);
29
+ case 'date': {
30
+ let dateValue = null;
31
+ if (format && typeof value === 'string')
32
+ dateValue = dayjs_1.default.utc(value, format, true).toDate();
33
+ else
34
+ dateValue = new Date(value);
35
+ return dateValue.toISOString();
36
+ }
24
37
  case 'number': {
25
38
  if (typeof value === 'number')
26
39
  return value;
27
- else
28
- return Number(value);
40
+ return Number(value);
29
41
  }
30
42
  case 'string': {
31
43
  if (typeof value === 'string')
32
44
  return value;
33
- else
34
- return String(value);
45
+ return String(value);
35
46
  }
36
47
  }
37
48
  }
@@ -0,0 +1,110 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ var __importDefault = (this && this.__importDefault) || function (mod) {
12
+ return (mod && mod.__esModule) ? mod : { "default": mod };
13
+ };
14
+ Object.defineProperty(exports, "__esModule", { value: true });
15
+ const DSTE_1 = __importDefault(require("../../core/dste/DSTE"));
16
+ const DatabaseEngine_1 = __importDefault(require("../../database/DatabaseEngine"));
17
+ const DataframeManager_1 = __importDefault(require("./DataframeManager"));
18
+ class UsageDataManager {
19
+ getUsageDetails() {
20
+ return __awaiter(this, void 0, void 0, function* () {
21
+ const now = DSTE_1.default.now();
22
+ const from = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);
23
+ const prevMonthFrom = new Date(now.getTime() - 60 * 24 * 60 * 60 * 1000);
24
+ const yearAgo = new Date(now.getFullYear(), now.getMonth() - 11, 1);
25
+ const collection = 'usage';
26
+ // Aggregate status counts for current and previous month
27
+ const getStatusCounts = (start, end) => __awaiter(this, void 0, void 0, function* () {
28
+ const results = yield DatabaseEngine_1.default.aggregate(collection, [
29
+ { $match: { startedAt: { $gte: start, $lte: end } } },
30
+ { $group: { _id: '$status', count: { $sum: 1 } } }
31
+ ]);
32
+ let success = 0, failed = 0, total = 0;
33
+ results.forEach(r => {
34
+ total += r.count;
35
+ if (r._id === 'success')
36
+ success = r.count;
37
+ if (r._id === 'failed')
38
+ failed = r.count;
39
+ });
40
+ return { total, success, failed };
41
+ });
42
+ const statusesRequests = yield getStatusCounts(from, now);
43
+ const prevStatusesRequests = yield getStatusCounts(prevMonthFrom, from);
44
+ // Monthly success and fails for last 12 months
45
+ const monthlySuccessPipeline = [
46
+ { $match: { status: 'success', startedAt: { $gte: yearAgo, $lte: now } } },
47
+ { $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
48
+ { $group: { _id: { year: '$year', month: '$month' }, count: { $sum: 1 } } },
49
+ { $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
50
+ { $sort: { x: 1 } }
51
+ ];
52
+ const monthlyFailsPipeline = [
53
+ { $match: { status: 'failed', startedAt: { $gte: yearAgo, $lte: now } } },
54
+ { $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
55
+ { $group: { _id: { year: '$year', month: '$month' }, count: { $sum: 1 } } },
56
+ { $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
57
+ { $sort: { x: 1 } }
58
+ ];
59
+ const rawMonthlySuccess = yield DatabaseEngine_1.default.aggregate(collection, monthlySuccessPipeline);
60
+ const rawMonthlyFails = yield DatabaseEngine_1.default.aggregate(collection, monthlyFailsPipeline);
61
+ // Top lines per month for last 12 months
62
+ const topLinesPipeline = [
63
+ { $match: { startedAt: { $gte: yearAgo, $lte: now } } },
64
+ { $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
65
+ { $group: { _id: { year: '$year', month: '$month' }, itemsCount: { $max: '$itemsCount' } } },
66
+ { $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$itemsCount' } },
67
+ { $sort: { x: 1 } }
68
+ ];
69
+ const topLines = yield DatabaseEngine_1.default.aggregate(collection, topLinesPipeline);
70
+ // Top times per month for last 12 months
71
+ const topTimePipeline = [
72
+ { $match: { startedAt: { $gte: yearAgo, $lte: now } } },
73
+ { $addFields: { durationMs: { $subtract: ['$finishedAt', '$startedAt'] }, year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
74
+ { $group: { _id: { year: '$year', month: '$month' }, maxDuration: { $max: '$durationMs' } } },
75
+ { $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$maxDuration' } },
76
+ { $sort: { x: 1 } }
77
+ ];
78
+ const topTime = yield DatabaseEngine_1.default.aggregate(collection, topTimePipeline);
79
+ // Monthly consumers: for each consumer, per month count
80
+ const consumerPipeline = [
81
+ { $match: { startedAt: { $gte: yearAgo, $lte: now } } },
82
+ { $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
83
+ { $group: { _id: { consumer: '$consumer', year: '$year', month: '$month' }, count: { $sum: 1 } } },
84
+ { $project: { _id: 0, consumer: '$_id.consumer', x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
85
+ { $sort: { consumer: 1, x: 1 } }
86
+ ];
87
+ const consumersData = yield DatabaseEngine_1.default.aggregate(collection, consumerPipeline);
88
+ // transform to consumer array
89
+ const consumerMap = {};
90
+ consumersData.forEach(r => {
91
+ consumerMap[r.consumer] = consumerMap[r.consumer] || [];
92
+ consumerMap[r.consumer].push({ x: r.x, y: r.y });
93
+ });
94
+ const consumers = Object.entries(consumerMap).map(([name, data]) => ({ name, data: DataframeManager_1.default.fill(data !== null && data !== void 0 ? data : [], yearAgo, now) }));
95
+ // Recent executions
96
+ const recentExecution = yield DatabaseEngine_1.default.query(collection, { startedAt: { $gte: from, $lte: now } }, { sort: { startedAt: -1 }, limit: 10 });
97
+ return {
98
+ statusesRequests,
99
+ prevStatusesRequests,
100
+ monthlySuccess: DataframeManager_1.default.fill(rawMonthlySuccess !== null && rawMonthlySuccess !== void 0 ? rawMonthlySuccess : [], yearAgo, now),
101
+ monthlyFails: DataframeManager_1.default.fill(rawMonthlyFails !== null && rawMonthlyFails !== void 0 ? rawMonthlyFails : [], yearAgo, now),
102
+ consumers: consumers,
103
+ topLine: DataframeManager_1.default.fill(topLines !== null && topLines !== void 0 ? topLines : [], yearAgo, now),
104
+ topTime: DataframeManager_1.default.fill(topTime !== null && topTime !== void 0 ? topTime : [], yearAgo, now),
105
+ recentExecution
106
+ };
107
+ });
108
+ }
109
+ }
110
+ exports.default = new UsageDataManager();
@@ -154,9 +154,6 @@ class ValidatorClass {
154
154
  else
155
155
  trxToValidate.push(field.transform);
156
156
  for (const trans of trxToValidate) {
157
- const trxKeys = Object.keys(trans);
158
- if (trxKeys.length !== 1)
159
- errors.push(`There can only be 1 transformation type in your transformation pipeline. Field "${field.key}" got ${trxKeys.length}.`);
160
157
  if ('combine_fields' in trans) {
161
158
  const { combine_fields } = trans;
162
159
  if (!combine_fields.fields || combine_fields.fields.length === 0)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "0.1.3-nasco.3",
3
+ "version": "0.1.5-nasco.3",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,
@@ -43,9 +43,11 @@
43
43
  "chalk": "^4.1.2",
44
44
  "commander": "^10.0.0",
45
45
  "cross-env": "^7.0.3",
46
+ "dayjs": "^1.11.13",
46
47
  "dotenv": "^16.0.3",
47
48
  "fast-xml-parser": "^5.2.3",
48
49
  "fs-extra": "^11.1.0",
50
+ "hyparquet": "^1.17.4",
49
51
  "inquirer": "^8.2.5",
50
52
  "json-schema": "^0.4.0",
51
53
  "jsonwebtoken": "^9.0.2",
@@ -22,7 +22,7 @@ dotenv_1.default.configDotenv();
22
22
  const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
23
23
  Environment_1.default.load('./');
24
24
  try {
25
- const { datasetName, fromLine, toLine, workerId, datasetFile, datasetDimensions, datasetDelimiter, filterData: filter } = workerData;
25
+ const { datasetName, fromLine, toLine, workerId, executionId, datasetFile, datasetDimensions, datasetDelimiter, filterData: filter } = workerData;
26
26
  Affirm_1.default.hasValue(fromLine, `Invalid from line`);
27
27
  Affirm_1.default.hasValue(toLine, `Invalid to line`);
28
28
  (0, Affirm_1.default)(datasetName, `Invalid dataset name`);
@@ -31,9 +31,9 @@ const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
31
31
  (0, Affirm_1.default)(datasetDimensions, `Invalid dataset dimensions`);
32
32
  (0, Affirm_1.default)(filter, `Invalid filter data`);
33
33
  (0, Affirm_1.default)(datasetDelimiter, `Invalid dataset delimter`);
34
- const dataset = new Dataset_1.default(datasetName, datasetFile);
34
+ const dataset = new Dataset_1.default(datasetName, datasetFile, undefined, executionId);
35
35
  dataset
36
- .setDimensinons(datasetDimensions)
36
+ .setDimensions(datasetDimensions)
37
37
  .setDelimiter(datasetDelimiter);
38
38
  const outputPath = dataset['_tempPath'] + workerId;
39
39
  const rules = filter.rules.map(x => x.rule);
@@ -22,7 +22,7 @@ dotenv_1.default.configDotenv();
22
22
  const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
23
23
  Environment_1.default.load('./');
24
24
  try {
25
- const { datasetName, fromLine, toLine, workerId, datasetFile, datasetDimensions, datasetDelimiter, projectionData } = workerData;
25
+ const { datasetName, fromLine, toLine, workerId, datasetFile, executionId, datasetDimensions, datasetDelimiter, projectionData } = workerData;
26
26
  Affirm_1.default.hasValue(fromLine, `Invalid from line`);
27
27
  Affirm_1.default.hasValue(toLine, `Invalid to line`);
28
28
  (0, Affirm_1.default)(datasetName, `Invalid dataset name`);
@@ -33,9 +33,9 @@ const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
33
33
  (0, Affirm_1.default)(datasetDelimiter, `Invalid dataset delimter`);
34
34
  const consumer = Environment_1.default.getConsumer(projectionData.consumerName);
35
35
  (0, Affirm_1.default)(consumer, `Wrong consumer name sent to projection worker: "${projectionData.consumerName}" not found.`);
36
- const dataset = new Dataset_1.default(datasetName, datasetFile);
36
+ const dataset = new Dataset_1.default(datasetName, datasetFile, undefined, executionId);
37
37
  dataset
38
- .setDimensinons(datasetDimensions)
38
+ .setDimensions(datasetDimensions)
39
39
  .setDelimiter(datasetDelimiter);
40
40
  const outputPath = dataset['_tempPath'] + workerId;
41
41
  yield PostProcessor_1.default.doProjection(consumer, dataset, { outputPath, range: { fromLine, toLine } });
@@ -22,7 +22,7 @@ dotenv_1.default.configDotenv();
22
22
  const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
23
23
  Environment_1.default.load('./');
24
24
  try {
25
- const { datasetName, fromLine, toLine, workerId, datasetFile, datasetDimensions, datasetDelimiter, transformData } = workerData;
25
+ const { datasetName, fromLine, toLine, workerId, executionId, datasetFile, datasetDimensions, datasetDelimiter, transformData } = workerData;
26
26
  Affirm_1.default.hasValue(fromLine, `Invalid from line`);
27
27
  Affirm_1.default.hasValue(toLine, `Invalid to line`);
28
28
  (0, Affirm_1.default)(datasetName, `Invalid dataset name`);
@@ -33,9 +33,9 @@ const run = (workerData) => __awaiter(void 0, void 0, void 0, function* () {
33
33
  (0, Affirm_1.default)(datasetDelimiter, `Invalid dataset delimter`);
34
34
  const consumer = Environment_1.default.getConsumer(transformData.consumerName);
35
35
  (0, Affirm_1.default)(consumer, `Wrong consumer name sent to projection worker: "${transformData.consumerName}" not found.`);
36
- const dataset = new Dataset_1.default(datasetName, datasetFile);
36
+ const dataset = new Dataset_1.default(datasetName, datasetFile, undefined, executionId);
37
37
  dataset
38
- .setDimensinons(datasetDimensions)
38
+ .setDimensions(datasetDimensions)
39
39
  .setDelimiter(datasetDelimiter);
40
40
  const outputPath = dataset['_tempPath'] + workerId;
41
41
  yield TransformationEngine_1.default.apply(consumer, dataset, { outputPath, range: { fromLine, toLine } });