@forzalabs/remora 0.0.51-nasco.3 → 0.0.52-nasco.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +1 -1
- package/definitions/json_schemas/consumer-schema.json +1 -1
- package/definitions/json_schemas/producer-schema.json +2 -2
- package/definitions/json_schemas/project-schema.json +39 -2
- package/engines/consumer/ConsumerEngine.js +1 -1
- package/engines/consumer/ConsumerManager.js +6 -1
- package/engines/deployment/DeploymentPlanner.js +1 -1
- package/engines/execution/ExecutionEnvironment.js +2 -2
- package/engines/sql/SQLCompiler.js +1 -1
- package/engines/usage/DataframeManager.js +55 -0
- package/engines/usage/UsageManager.js +61 -0
- package/engines/validation/Validator.js +1 -1
- package/package.json +1 -1
package/Constants.js
CHANGED
|
@@ -28,7 +28,7 @@
|
|
|
28
28
|
"properties": {
|
|
29
29
|
"name": {
|
|
30
30
|
"type": "string",
|
|
31
|
-
"description": "The name of the dimension"
|
|
31
|
+
"description": "The name of the dimension. This is the output name of this dimension."
|
|
32
32
|
},
|
|
33
33
|
"description": {
|
|
34
34
|
"type": "string",
|
|
@@ -45,7 +45,7 @@
|
|
|
45
45
|
},
|
|
46
46
|
"alias": {
|
|
47
47
|
"type": "string",
|
|
48
|
-
"description": "The SQL column or field key that corresponds to this dimension. If left empty, the column name is assumed to be the same as the dimension name"
|
|
48
|
+
"description": "The SQL column or field key that corresponds to this dimension. If left empty, the column name is assumed to be the same as the dimension name."
|
|
49
49
|
},
|
|
50
50
|
"pk": {
|
|
51
51
|
"type": "boolean",
|
|
@@ -1,8 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"title": "Project Schema",
|
|
4
|
+
"description": "Schema for defining remora project configuration",
|
|
3
5
|
"type": "object",
|
|
4
6
|
"required": ["name", "version", "consumers", "producers", "sources", "schemas", "settings"],
|
|
5
7
|
"properties": {
|
|
8
|
+
"$schema": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"format": "uri"
|
|
11
|
+
},
|
|
6
12
|
"name": {
|
|
7
13
|
"type": "string",
|
|
8
14
|
"description": "Name of the remora project"
|
|
@@ -12,6 +18,10 @@
|
|
|
12
18
|
"pattern": "^\\d+\\.\\d+\\.\\d+$",
|
|
13
19
|
"description": "Version of the project in semver format"
|
|
14
20
|
},
|
|
21
|
+
"description": {
|
|
22
|
+
"type": "string",
|
|
23
|
+
"description": "Optional description of the project"
|
|
24
|
+
},
|
|
15
25
|
"consumers": {
|
|
16
26
|
"type": "array",
|
|
17
27
|
"items": {
|
|
@@ -53,11 +63,38 @@
|
|
|
53
63
|
"minimum": 1,
|
|
54
64
|
"description": "Maximum number of rows for SQL queries"
|
|
55
65
|
},
|
|
66
|
+
"STRING_MAX_CHARACTERS_LENGTH": {
|
|
67
|
+
"type": "integer",
|
|
68
|
+
"minimum": 1,
|
|
69
|
+
"description": "Maximum length for string fields"
|
|
70
|
+
},
|
|
71
|
+
"MAX_ITEMS_IN_MEMORY": {
|
|
72
|
+
"type": "integer",
|
|
73
|
+
"minimum": 1,
|
|
74
|
+
"description": "Maximum number of items to keep in memory"
|
|
75
|
+
},
|
|
56
76
|
"DEBUG_MODE": {
|
|
57
77
|
"type": "boolean",
|
|
58
|
-
"description": "Enable logging of internal steps
|
|
78
|
+
"description": "Enable logging of internal steps"
|
|
59
79
|
}
|
|
60
80
|
}
|
|
61
81
|
}
|
|
62
|
-
}
|
|
82
|
+
},
|
|
83
|
+
"additionalProperties": false,
|
|
84
|
+
"examples": [
|
|
85
|
+
{
|
|
86
|
+
"$schema": "https://raw.githubusercontent.com/ForzaLabs/remora-public/refs/heads/main/json_schemas/project-schema.json",
|
|
87
|
+
"name": "analytics-project",
|
|
88
|
+
"version": "1.0.0",
|
|
89
|
+
"description": "Analytics data processing project",
|
|
90
|
+
"consumers": ["/consumers"],
|
|
91
|
+
"producers": ["/producers"],
|
|
92
|
+
"sources": ["/sources"],
|
|
93
|
+
"schemas": ["/schemas"],
|
|
94
|
+
"settings": {
|
|
95
|
+
"SQL_MAX_QUERY_ROWS": 10000,
|
|
96
|
+
"DEBUG_MODE": true
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
]
|
|
63
100
|
}
|
|
@@ -22,7 +22,7 @@ const ExecutionEnvironment_1 = __importDefault(require("../execution/ExecutionEn
|
|
|
22
22
|
const ProducerManager_1 = __importDefault(require("../producer/ProducerManager"));
|
|
23
23
|
const SQLCompiler_1 = __importDefault(require("../sql/SQLCompiler"));
|
|
24
24
|
const SQLUtils_1 = __importDefault(require("../sql/SQLUtils"));
|
|
25
|
-
const UsageManager_1 = __importDefault(require("../UsageManager"));
|
|
25
|
+
const UsageManager_1 = __importDefault(require("../usage/UsageManager"));
|
|
26
26
|
const ConsumerManager_1 = __importDefault(require("./ConsumerManager"));
|
|
27
27
|
class ConsumerEngineClass {
|
|
28
28
|
constructor() {
|
|
@@ -132,7 +132,12 @@ class ConsumerManagerClass {
|
|
|
132
132
|
else {
|
|
133
133
|
const matches = columns.filter(x => x.nameInProducer === field.key);
|
|
134
134
|
(0, Affirm_1.default)(matches.length > 0, `Consumer "${consumer.name}" misconfiguration: the field "${field.key}" is not found in any of the included producers (${consumer.producers.map(x => x.name).join(', ')})`);
|
|
135
|
-
|
|
135
|
+
if (matches.length === 1) {
|
|
136
|
+
// Need to check if the producers have "union" if they do, I don't care about this check
|
|
137
|
+
const cProd = consumer.producers.find(x => x.name === matches[0].owner);
|
|
138
|
+
if (!cProd.union)
|
|
139
|
+
(0, Affirm_1.default)(matches.length === 1, `Consumer "${consumer.name}" misconfiguration: the field "${field.key}" is ambiguos between the fields with same name from the producers: ${matches.map(x => x.owner).join(', ')}`);
|
|
140
|
+
}
|
|
136
141
|
column = matches[0];
|
|
137
142
|
}
|
|
138
143
|
if (!column) {
|
|
@@ -15,7 +15,7 @@ class DeploymentPlannerClass {
|
|
|
15
15
|
switch (output.format) {
|
|
16
16
|
// csv, json, parquet outputs do not need to generate anything at deploy
|
|
17
17
|
case 'SQL': {
|
|
18
|
-
if (output.
|
|
18
|
+
if (output.accelerated && !output.direct)
|
|
19
19
|
plan.push({ type: 'create-materialized-view', output: output });
|
|
20
20
|
else if (!output.direct)
|
|
21
21
|
plan.push({ type: 'create-view', output: output });
|
|
@@ -30,7 +30,7 @@ const Logger_1 = __importDefault(require("../../helper/Logger"));
|
|
|
30
30
|
class ExecutionEnvironment {
|
|
31
31
|
constructor(consumer) {
|
|
32
32
|
this.run = (options) => __awaiter(this, void 0, void 0, function* () {
|
|
33
|
-
var _a;
|
|
33
|
+
var _a, _b;
|
|
34
34
|
(0, Affirm_1.default)(this._consumer, 'Invalid consumer');
|
|
35
35
|
const plan = ExecutionPlanner_1.default.plan(this._consumer, options);
|
|
36
36
|
(0, Affirm_1.default)(plan, `Invalid execution plan`);
|
|
@@ -153,7 +153,7 @@ class ExecutionEnvironment {
|
|
|
153
153
|
}
|
|
154
154
|
}
|
|
155
155
|
catch (error) {
|
|
156
|
-
const ds = (_a = this._resultingDataset) !== null && _a !== void 0 ? _a : this.
|
|
156
|
+
const ds = (_a = this._resultingDataset) !== null && _a !== void 0 ? _a : (_b = this._producedData.at(-1)) === null || _b === void 0 ? void 0 : _b.dataset;
|
|
157
157
|
if (ds)
|
|
158
158
|
Logger_1.default.log(`Failed execution of consumer at step ${currentStep.type}:\n\tSize: ${ds.getSize()}\n\tCycles: ${ds.getCycles()}\n\tOperations: ${Logger_1.default.formatList(ds.getOperations())}`);
|
|
159
159
|
Logger_1.default.log(`\tFailed step: ${currentStep.type}->\n\t${error}`);
|
|
@@ -78,7 +78,7 @@ class SQLCompilerClass {
|
|
|
78
78
|
};
|
|
79
79
|
this.getConsumerReference = (consumer) => {
|
|
80
80
|
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
81
|
-
if (consumer.outputs.some(x => x.format === 'SQL' && x.
|
|
81
|
+
if (consumer.outputs.some(x => x.format === 'SQL' && x.accelerated))
|
|
82
82
|
return `SELECT * FROM "av_remora_${SQLUtils_1.default.sanitizeName(consumer.name)}"`;
|
|
83
83
|
if (consumer.outputs.some(x => x.format === 'SQL' && !x.direct))
|
|
84
84
|
return `SELECT * FROM "v_remora_${SQLUtils_1.default.sanitizeName(consumer.name)}"`;
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
7
|
+
const Helper_1 = __importDefault(require("../../helper/Helper"));
|
|
8
|
+
class DataframeManagerClass {
|
|
9
|
+
fill(points, from, to, onlyLastValue, maintainLastValue) {
|
|
10
|
+
const min = from !== null && from !== void 0 ? from : this.getMinDate(points);
|
|
11
|
+
const max = to !== null && to !== void 0 ? to : this.getMaxDate(points);
|
|
12
|
+
const orderPoints = points.length > 0 ? Algo_1.default.orderBy(points, 'x') : [];
|
|
13
|
+
const filledPoints = [];
|
|
14
|
+
const currentDate = new Date(min);
|
|
15
|
+
while (currentDate <= max) {
|
|
16
|
+
const monthKey = Helper_1.default.formatDateToYYYYMM(currentDate);
|
|
17
|
+
filledPoints.push({ x: monthKey, y: 0 });
|
|
18
|
+
currentDate.setMonth(currentDate.getMonth() + 1);
|
|
19
|
+
}
|
|
20
|
+
for (let i = 0; i < orderPoints.length; i++) {
|
|
21
|
+
const point = orderPoints[i];
|
|
22
|
+
const date = new Date(point.x);
|
|
23
|
+
const filledPoint = filledPoints.find(x => x.x === Helper_1.default.formatDateToYYYYMM(date));
|
|
24
|
+
if (filledPoint) {
|
|
25
|
+
if (!onlyLastValue)
|
|
26
|
+
filledPoint.y += point.y;
|
|
27
|
+
else
|
|
28
|
+
filledPoint.y = point.y;
|
|
29
|
+
if (maintainLastValue) {
|
|
30
|
+
const index = filledPoints.findIndex(x => x.x === Helper_1.default.formatDateToYYYYMM(date));
|
|
31
|
+
for (let k = index; k < filledPoints.length; k++) {
|
|
32
|
+
const nextFilledPoint = filledPoints[k];
|
|
33
|
+
nextFilledPoint.y = filledPoint.y;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
return filledPoints;
|
|
39
|
+
}
|
|
40
|
+
getMinDate(points) {
|
|
41
|
+
if (!points || points.length === 0) {
|
|
42
|
+
const currentDate = new Date();
|
|
43
|
+
return new Date(currentDate.getFullYear() - 1, currentDate.getMonth(), currentDate.getDate());
|
|
44
|
+
}
|
|
45
|
+
return points.reduce((min, point) => (new Date(point.x) < min ? new Date(point === null || point === void 0 ? void 0 : point.x) : min), new Date(points[0].x));
|
|
46
|
+
}
|
|
47
|
+
getMaxDate(points) {
|
|
48
|
+
if (!points || points.length === 0) {
|
|
49
|
+
return new Date();
|
|
50
|
+
}
|
|
51
|
+
return points.reduce((max, point) => (new Date(point.x) > max ? new Date(point.x) : max), new Date(points[0].x));
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
const DataframeManager = new DataframeManagerClass();
|
|
55
|
+
exports.default = DataframeManager;
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
7
|
+
const DSTE_1 = __importDefault(require("../../core/dste/DSTE"));
|
|
8
|
+
const DatabaseEngine_1 = __importDefault(require("../../database/DatabaseEngine"));
|
|
9
|
+
const Helper_1 = __importDefault(require("../../helper/Helper"));
|
|
10
|
+
const Settings_1 = __importDefault(require("../../helper/Settings"));
|
|
11
|
+
class UsageManagerClass {
|
|
12
|
+
constructor() {
|
|
13
|
+
/**
|
|
14
|
+
* TODO: I need to group the usage stats into a bucket daily. When and how I do it is still a question...
|
|
15
|
+
*/
|
|
16
|
+
this.getTodayBucketId = (consumer) => {
|
|
17
|
+
(0, Affirm_1.default)(consumer, `Invalid consumer`);
|
|
18
|
+
const now = DSTE_1.default.now();
|
|
19
|
+
return `${consumer.name}_${now.getUTCFullYear()}_${now.getUTCMonth()}_${now.getUTCDate()}`.toLowerCase();
|
|
20
|
+
};
|
|
21
|
+
this.startUsage = (consumer, user) => {
|
|
22
|
+
const newUsage = {
|
|
23
|
+
_id: Helper_1.default.uuid(),
|
|
24
|
+
consumer: consumer.name,
|
|
25
|
+
startedAt: DSTE_1.default.now(),
|
|
26
|
+
executedBy: { name: user.name, _id: user._id },
|
|
27
|
+
itemsCount: -1,
|
|
28
|
+
status: 'started',
|
|
29
|
+
_signature: ''
|
|
30
|
+
};
|
|
31
|
+
if (Helper_1.default.isDev())
|
|
32
|
+
return { usageId: newUsage._id, usage: Promise.resolve(newUsage) };
|
|
33
|
+
const updateRes = DatabaseEngine_1.default.upsert(Settings_1.default.db.collections.usage, newUsage._id, newUsage);
|
|
34
|
+
return { usageId: newUsage._id, usage: updateRes };
|
|
35
|
+
};
|
|
36
|
+
this.endUsage = (usageId, itemsCount) => {
|
|
37
|
+
const update = {
|
|
38
|
+
itemsCount: itemsCount,
|
|
39
|
+
status: 'success',
|
|
40
|
+
finishedAt: DSTE_1.default.now()
|
|
41
|
+
};
|
|
42
|
+
if (Helper_1.default.isDev())
|
|
43
|
+
return { usageId: null, usage: Promise.resolve(update) };
|
|
44
|
+
const updateRes = DatabaseEngine_1.default.upsert(Settings_1.default.db.collections.usage, usageId, update);
|
|
45
|
+
return { usageId: usageId, usage: updateRes };
|
|
46
|
+
};
|
|
47
|
+
this.failUsage = (usageId, error) => {
|
|
48
|
+
const update = {
|
|
49
|
+
status: 'failed',
|
|
50
|
+
error: error,
|
|
51
|
+
finishedAt: DSTE_1.default.now()
|
|
52
|
+
};
|
|
53
|
+
if (Helper_1.default.isDev())
|
|
54
|
+
return { usageId: null, usage: Promise.resolve(update) };
|
|
55
|
+
const updateRes = DatabaseEngine_1.default.upsert(Settings_1.default.db.collections.usage, usageId, update);
|
|
56
|
+
return { usageId: usageId, usage: updateRes };
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
const UsageManager = new UsageManagerClass();
|
|
61
|
+
exports.default = UsageManager;
|
|
@@ -181,7 +181,7 @@ class ValidatorClass {
|
|
|
181
181
|
}
|
|
182
182
|
for (const output of consumer.outputs) {
|
|
183
183
|
const format = output.format.toUpperCase();
|
|
184
|
-
if (format === 'SQL' && output.
|
|
184
|
+
if (format === 'SQL' && output.accelerated && output.direct)
|
|
185
185
|
errors.push(`An output SQL cannot be both direct and accelerated (output: ${format})`);
|
|
186
186
|
if ((format === 'CSV' || format === 'JSON' || format === 'PARQUET')) {
|
|
187
187
|
if (!output.exportDestination)
|