@forzalabs/remora 0.0.19 → 0.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +6 -2
- package/actions/automap.js +5 -1
- package/actions/deploy.js +0 -1
- package/actions/run.js +9 -3
- package/auth/JWTManager.js +1 -1
- package/database/DatabaseEngine.js +13 -3
- package/definitions/json_schemas/consumer-schema.json +392 -0
- package/definitions/json_schemas/producer-schema.json +4 -0
- package/definitions/transform/Transformations.js +2 -0
- package/drivers/DriverFactory.js +39 -1
- package/drivers/LocalDriver.js +70 -2
- package/drivers/S3Driver.js +52 -0
- package/engines/DataframeManager.js +55 -0
- package/engines/Environment.js +13 -5
- package/engines/ProducerEngine.js +2 -1
- package/engines/UsageDataManager.js +110 -0
- package/engines/UserManager.js +2 -2
- package/engines/Validator.js +1 -1
- package/engines/ai/AutoMapperEngine.js +2 -2
- package/engines/ai/LLM.js +51 -26
- package/engines/consumer/ConsumerManager.js +2 -1
- package/engines/execution/ExecutionEnvironment.js +8 -1
- package/engines/execution/ExecutionPlanner.js +8 -4
- package/engines/file/FileContentBuilder.js +34 -0
- package/engines/file/FileExporter.js +32 -49
- package/engines/transform/TransformationEngine.js +220 -0
- package/engines/transform/TypeCaster.js +33 -0
- package/engines/validation/Validator.js +22 -5
- package/helper/Helper.js +7 -0
- package/index.js +7 -0
- package/licencing/LicenceManager.js +64 -0
- package/package.json +1 -1
package/drivers/LocalDriver.js
CHANGED
|
@@ -52,6 +52,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
52
52
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
53
53
|
};
|
|
54
54
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
55
|
+
exports.LocalDriverDestination = void 0;
|
|
55
56
|
const fs = __importStar(require("fs"));
|
|
56
57
|
const path_1 = __importDefault(require("path"));
|
|
57
58
|
const readline_1 = __importDefault(require("readline"));
|
|
@@ -60,9 +61,9 @@ class LocalDriver {
|
|
|
60
61
|
constructor() {
|
|
61
62
|
this.init = (source) => __awaiter(this, void 0, void 0, function* () {
|
|
62
63
|
const fileURL = source.authentication['path'];
|
|
63
|
-
(0, Affirm_1.default)(fileURL,
|
|
64
|
+
(0, Affirm_1.default)(fileURL, `Missing file path in the authentication of source "${source.name}"`);
|
|
64
65
|
const exist = fs.existsSync(fileURL);
|
|
65
|
-
(0, Affirm_1.default)(exist,
|
|
66
|
+
(0, Affirm_1.default)(exist, `The path (${fileURL}) for source "${source.name}" does NOT exist.`);
|
|
66
67
|
this._path = source.authentication['path'];
|
|
67
68
|
return this;
|
|
68
69
|
});
|
|
@@ -119,4 +120,71 @@ class LocalDriver {
|
|
|
119
120
|
});
|
|
120
121
|
}
|
|
121
122
|
}
|
|
123
|
+
class LocalDriverDestination {
|
|
124
|
+
constructor() {
|
|
125
|
+
this.init = (source) => __awaiter(this, void 0, void 0, function* () {
|
|
126
|
+
(0, Affirm_1.default)(source, `Invalid source`);
|
|
127
|
+
const fileURL = source.authentication['path'];
|
|
128
|
+
(0, Affirm_1.default)(fileURL, `Missing file path in the authentication of source "${source.name}"`);
|
|
129
|
+
const exist = fs.existsSync(fileURL);
|
|
130
|
+
(0, Affirm_1.default)(exist, `The path (${fileURL}) for source "${source.name}" does NOT exist.`);
|
|
131
|
+
this._path = source.authentication['path'];
|
|
132
|
+
return this;
|
|
133
|
+
});
|
|
134
|
+
this.uploadFile = (options) => __awaiter(this, void 0, void 0, function* () {
|
|
135
|
+
(0, Affirm_1.default)(this._path, 'Path not initialized');
|
|
136
|
+
(0, Affirm_1.default)(options, 'Invalid upload options');
|
|
137
|
+
(0, Affirm_1.default)(options.name, 'File name is required');
|
|
138
|
+
(0, Affirm_1.default)(options.content != null, 'File content is required');
|
|
139
|
+
const folder = this._path;
|
|
140
|
+
try {
|
|
141
|
+
if (!fs.existsSync(folder))
|
|
142
|
+
fs.mkdirSync(folder, { recursive: true });
|
|
143
|
+
const filePath = path_1.default.join(folder, options.name);
|
|
144
|
+
fs.writeFileSync(filePath, options.content);
|
|
145
|
+
return { bucket: folder, key: filePath, res: true };
|
|
146
|
+
}
|
|
147
|
+
catch (error) {
|
|
148
|
+
throw new Error(`Failed to upload local file "${options.name}": ${error.message}`);
|
|
149
|
+
}
|
|
150
|
+
});
|
|
151
|
+
this.multipartUpload = (options) => __awaiter(this, void 0, void 0, function* () {
|
|
152
|
+
(0, Affirm_1.default)(this._path, 'Path not initialized');
|
|
153
|
+
(0, Affirm_1.default)(options, 'Invalid upload options');
|
|
154
|
+
(0, Affirm_1.default)(options.name, 'File name is required');
|
|
155
|
+
(0, Affirm_1.default)(options.contents && Array.isArray(options.contents), 'Contents must be an array');
|
|
156
|
+
(0, Affirm_1.default)(options.contents.length > 0, 'Contents array cannot be empty');
|
|
157
|
+
const folder = this._path;
|
|
158
|
+
try {
|
|
159
|
+
if (!fs.existsSync(folder)) {
|
|
160
|
+
fs.mkdirSync(folder, { recursive: true });
|
|
161
|
+
}
|
|
162
|
+
const filePath = path_1.default.join(folder, options.name);
|
|
163
|
+
// Create or truncate the file first
|
|
164
|
+
fs.writeFileSync(filePath, '');
|
|
165
|
+
// Append each chunk
|
|
166
|
+
for (const chunk of options.contents) {
|
|
167
|
+
(0, Affirm_1.default)(typeof chunk === 'string', 'Each chunk must be a string');
|
|
168
|
+
fs.appendFileSync(filePath, chunk);
|
|
169
|
+
}
|
|
170
|
+
return { bucket: folder, key: filePath, res: true };
|
|
171
|
+
}
|
|
172
|
+
catch (error) {
|
|
173
|
+
// Clean up the partial file if it exists
|
|
174
|
+
const filePath = path_1.default.join(folder, options.name);
|
|
175
|
+
if (fs.existsSync(filePath)) {
|
|
176
|
+
try {
|
|
177
|
+
fs.unlinkSync(filePath);
|
|
178
|
+
}
|
|
179
|
+
catch (cleanupError) {
|
|
180
|
+
console.error(`Failed to clean up partial file after error: ${cleanupError.message}`);
|
|
181
|
+
throw cleanupError;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
throw new Error(`Failed to complete local multipart upload for "${options.name}": ${error.message}`);
|
|
185
|
+
}
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
exports.LocalDriverDestination = LocalDriverDestination;
|
|
122
190
|
exports.default = LocalDriver;
|
package/drivers/S3Driver.js
CHANGED
|
@@ -45,6 +45,58 @@ class S3Driver {
|
|
|
45
45
|
(0, Affirm_1.default)(res.$metadata.httpStatusCode === 200, `Failed to upload the file "${name}" to the bucket "${this._bucketName}": status code ${res.$metadata.httpStatusCode}`);
|
|
46
46
|
return { res: true, key: name, bucket: this._bucketName };
|
|
47
47
|
});
|
|
48
|
+
this.multipartUpload = (options) => __awaiter(this, void 0, void 0, function* () {
|
|
49
|
+
(0, Affirm_1.default)(options, `Invalid upload options`);
|
|
50
|
+
(0, Affirm_1.default)(options.contents && options.contents.length > 0, 'No contents provided for multipart upload');
|
|
51
|
+
(0, Affirm_1.default)(options.name, 'No filename provided for multipart upload');
|
|
52
|
+
try {
|
|
53
|
+
// Create the multipart upload
|
|
54
|
+
const createMultipartUploadRes = yield this._client.send(new client_s3_1.CreateMultipartUploadCommand({
|
|
55
|
+
Bucket: this._bucketName,
|
|
56
|
+
Key: options.name
|
|
57
|
+
}));
|
|
58
|
+
const uploadId = createMultipartUploadRes.UploadId;
|
|
59
|
+
(0, Affirm_1.default)(uploadId, 'Failed to initiate multipart upload');
|
|
60
|
+
// Upload each part
|
|
61
|
+
const uploadPromises = options.contents.map((content, index) => __awaiter(this, void 0, void 0, function* () {
|
|
62
|
+
const partNumber = index + 1;
|
|
63
|
+
const uploadPartRes = yield this._client.send(new client_s3_1.UploadPartCommand({
|
|
64
|
+
Bucket: this._bucketName,
|
|
65
|
+
Key: options.name,
|
|
66
|
+
UploadId: uploadId,
|
|
67
|
+
PartNumber: partNumber,
|
|
68
|
+
Body: Buffer.from(content)
|
|
69
|
+
}));
|
|
70
|
+
return {
|
|
71
|
+
PartNumber: partNumber,
|
|
72
|
+
ETag: uploadPartRes.ETag
|
|
73
|
+
};
|
|
74
|
+
}));
|
|
75
|
+
const uploadedParts = yield Promise.all(uploadPromises);
|
|
76
|
+
// Complete the multipart upload
|
|
77
|
+
const completeRes = yield this._client.send(new client_s3_1.CompleteMultipartUploadCommand({
|
|
78
|
+
Bucket: this._bucketName,
|
|
79
|
+
Key: options.name,
|
|
80
|
+
UploadId: uploadId,
|
|
81
|
+
MultipartUpload: {
|
|
82
|
+
Parts: uploadedParts
|
|
83
|
+
}
|
|
84
|
+
}));
|
|
85
|
+
(0, Affirm_1.default)(completeRes.$metadata.httpStatusCode === 200, `Failed to complete multipart upload for "${options.name}": status code ${completeRes.$metadata.httpStatusCode}`);
|
|
86
|
+
return { res: true, key: options.name, bucket: this._bucketName };
|
|
87
|
+
}
|
|
88
|
+
catch (error) {
|
|
89
|
+
// If anything fails, make sure to abort the multipart upload
|
|
90
|
+
if (error.UploadId) {
|
|
91
|
+
yield this._client.send(new client_s3_1.AbortMultipartUploadCommand({
|
|
92
|
+
Bucket: this._bucketName,
|
|
93
|
+
Key: options.name,
|
|
94
|
+
UploadId: error.UploadId
|
|
95
|
+
}));
|
|
96
|
+
}
|
|
97
|
+
throw error;
|
|
98
|
+
}
|
|
99
|
+
});
|
|
48
100
|
}
|
|
49
101
|
}
|
|
50
102
|
exports.default = S3Driver;
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const Algo_1 = __importDefault(require("../core/Algo"));
|
|
7
|
+
const Helper_1 = __importDefault(require("../helper/Helper"));
|
|
8
|
+
class DataframeManagerClass {
|
|
9
|
+
fill(points, from, to, onlyLastValue, maintainLastValue) {
|
|
10
|
+
const min = from !== null && from !== void 0 ? from : this.getMinDate(points);
|
|
11
|
+
const max = to !== null && to !== void 0 ? to : this.getMaxDate(points);
|
|
12
|
+
const orderPoints = points.length > 0 ? Algo_1.default.orderBy(points, 'x') : [];
|
|
13
|
+
const filledPoints = [];
|
|
14
|
+
const currentDate = new Date(min);
|
|
15
|
+
while (currentDate <= max) {
|
|
16
|
+
const monthKey = Helper_1.default.formatDateToYYYYMM(currentDate);
|
|
17
|
+
filledPoints.push({ x: monthKey, y: 0 });
|
|
18
|
+
currentDate.setMonth(currentDate.getMonth() + 1);
|
|
19
|
+
}
|
|
20
|
+
for (let i = 0; i < orderPoints.length; i++) {
|
|
21
|
+
const point = orderPoints[i];
|
|
22
|
+
const date = new Date(point.x);
|
|
23
|
+
const filledPoint = filledPoints.find(x => x.x === Helper_1.default.formatDateToYYYYMM(date));
|
|
24
|
+
if (filledPoint) {
|
|
25
|
+
if (!onlyLastValue)
|
|
26
|
+
filledPoint.y += point.y;
|
|
27
|
+
else
|
|
28
|
+
filledPoint.y = point.y;
|
|
29
|
+
if (maintainLastValue) {
|
|
30
|
+
const index = filledPoints.findIndex(x => x.x === Helper_1.default.formatDateToYYYYMM(date));
|
|
31
|
+
for (let k = index; k < filledPoints.length; k++) {
|
|
32
|
+
const nextFilledPoint = filledPoints[k];
|
|
33
|
+
nextFilledPoint.y = filledPoint.y;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
return filledPoints;
|
|
39
|
+
}
|
|
40
|
+
getMinDate(points) {
|
|
41
|
+
if (!points || points.length === 0) {
|
|
42
|
+
const currentDate = new Date();
|
|
43
|
+
return new Date(currentDate.getFullYear() - 1, currentDate.getMonth(), currentDate.getDate());
|
|
44
|
+
}
|
|
45
|
+
return points.reduce((min, point) => (new Date(point.x) < min ? new Date(point === null || point === void 0 ? void 0 : point.x) : min), new Date(points[0].x));
|
|
46
|
+
}
|
|
47
|
+
getMaxDate(points) {
|
|
48
|
+
if (!points || points.length === 0) {
|
|
49
|
+
return new Date();
|
|
50
|
+
}
|
|
51
|
+
return points.reduce((max, point) => (new Date(point.x) > max ? new Date(point.x) : max), new Date(points[0].x));
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
const DataframeManager = new DataframeManagerClass();
|
|
55
|
+
exports.default = DataframeManager;
|
package/engines/Environment.js
CHANGED
|
@@ -7,7 +7,8 @@ const fs_1 = __importDefault(require("fs"));
|
|
|
7
7
|
const path_1 = __importDefault(require("path"));
|
|
8
8
|
const Affirm_1 = __importDefault(require("../core/Affirm"));
|
|
9
9
|
const SchemaValidator_1 = __importDefault(require("./schema/SchemaValidator"));
|
|
10
|
-
const Validator_1 = __importDefault(require("./Validator"));
|
|
10
|
+
const Validator_1 = __importDefault(require("./validation/Validator"));
|
|
11
|
+
const Constants_1 = __importDefault(require("../Constants"));
|
|
11
12
|
class EnvironmentClass {
|
|
12
13
|
constructor() {
|
|
13
14
|
this._env = null;
|
|
@@ -71,9 +72,16 @@ class EnvironmentClass {
|
|
|
71
72
|
if (!SchemaValidator_1.default.validate('consumer-schema', consumer))
|
|
72
73
|
throw new Error(`Invalid consumer configuration: ${consumer.name}`);
|
|
73
74
|
});
|
|
75
|
+
// Load the project settings
|
|
76
|
+
const envSettings = new Map(Object.entries(Object.assign({}, projectConfig.settings))
|
|
77
|
+
.map(([key, value]) => [key, String(value)]));
|
|
78
|
+
if (!envSettings.has('SQL_MAX_QUERY_ROWS'))
|
|
79
|
+
envSettings.set('SQL_MAX_QUERY_ROWS', Constants_1.default.defaults.SQL_MAX_QUERY_ROWS.toString());
|
|
80
|
+
if (!envSettings.has('STRING_MAX_CHARACTERS_LENGTH'))
|
|
81
|
+
envSettings.set('STRING_MAX_CHARACTERS_LENGTH', Constants_1.default.defaults.STRING_MAX_CHARACTERS_LENGTH.toString());
|
|
74
82
|
// Initialize environment
|
|
75
83
|
this.init({
|
|
76
|
-
settings:
|
|
84
|
+
settings: envSettings,
|
|
77
85
|
sources,
|
|
78
86
|
producers,
|
|
79
87
|
consumers,
|
|
@@ -93,7 +101,7 @@ class EnvironmentClass {
|
|
|
93
101
|
*/
|
|
94
102
|
this.getFirstProducer = (producerName) => {
|
|
95
103
|
(0, Affirm_1.default)(producerName, `Invalid producer name`);
|
|
96
|
-
const prod = this._env.producers.find(x => x.name === producerName);
|
|
104
|
+
const prod = this._env.producers.find(x => x.name.toLocaleLowerCase() === producerName.toLowerCase());
|
|
97
105
|
if (!prod) {
|
|
98
106
|
const consumer = this.getConsumer(producerName);
|
|
99
107
|
(0, Affirm_1.default)(consumer, `Invalid producer name`);
|
|
@@ -103,11 +111,11 @@ class EnvironmentClass {
|
|
|
103
111
|
};
|
|
104
112
|
this.getProducer = (producerName) => {
|
|
105
113
|
(0, Affirm_1.default)(producerName, `Invalid producer name`);
|
|
106
|
-
return this._env.producers.find(x => x.name === producerName);
|
|
114
|
+
return this._env.producers.find(x => x.name.toLowerCase() === producerName.toLowerCase());
|
|
107
115
|
};
|
|
108
116
|
this.getConsumer = (consumerName) => {
|
|
109
117
|
(0, Affirm_1.default)(consumerName, `Invalid consumer name`);
|
|
110
|
-
return this._env.consumers.find(x => x.name === consumerName);
|
|
118
|
+
return this._env.consumers.find(x => x.name.toLowerCase() === consumerName.toLowerCase());
|
|
111
119
|
};
|
|
112
120
|
this.getSchema = (schemaName) => {
|
|
113
121
|
(0, Affirm_1.default)(schemaName, 'Invalid schema name');
|
|
@@ -82,6 +82,7 @@ class ProducerEngineClass {
|
|
|
82
82
|
}
|
|
83
83
|
});
|
|
84
84
|
this.readFile = (producer, options) => __awaiter(this, void 0, void 0, function* () {
|
|
85
|
+
var _a;
|
|
85
86
|
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
86
87
|
(0, Affirm_1.default)(options, 'Invalid options');
|
|
87
88
|
if (options.readmode === 'lines')
|
|
@@ -95,7 +96,7 @@ class ProducerEngineClass {
|
|
|
95
96
|
lines = yield driver.readLinesInRange({ fileKey: producer.settings.fileKey, lineFrom: options.lines.from, lineTo: options.lines.to });
|
|
96
97
|
else
|
|
97
98
|
lines = [(yield driver.download({ fileKey: producer.settings.fileKey }))];
|
|
98
|
-
switch (producer.settings.fileType.toUpperCase()) {
|
|
99
|
+
switch ((_a = producer.settings.fileType) === null || _a === void 0 ? void 0 : _a.toUpperCase()) {
|
|
99
100
|
case 'CSV': {
|
|
100
101
|
return { data: lines, dataType: 'lines-of-text' };
|
|
101
102
|
}
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
const DSTE_1 = __importDefault(require("../core/dste/DSTE"));
|
|
16
|
+
const DatabaseEngine_1 = __importDefault(require("../database/DatabaseEngine"));
|
|
17
|
+
const DataframeManager_1 = __importDefault(require("./DataframeManager"));
|
|
18
|
+
class UsageDataManager {
|
|
19
|
+
getUsageDetails() {
|
|
20
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
21
|
+
const now = DSTE_1.default.now();
|
|
22
|
+
const from = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);
|
|
23
|
+
const prevMonthFrom = new Date(now.getTime() - 60 * 24 * 60 * 60 * 1000);
|
|
24
|
+
const yearAgo = new Date(now.getFullYear(), now.getMonth() - 11, 1);
|
|
25
|
+
const collection = 'usage';
|
|
26
|
+
// Aggregate status counts for current and previous month
|
|
27
|
+
const getStatusCounts = (start, end) => __awaiter(this, void 0, void 0, function* () {
|
|
28
|
+
const results = yield DatabaseEngine_1.default.aggregate(collection, [
|
|
29
|
+
{ $match: { startedAt: { $gte: start, $lte: end } } },
|
|
30
|
+
{ $group: { _id: '$status', count: { $sum: 1 } } }
|
|
31
|
+
]);
|
|
32
|
+
let success = 0, failed = 0, total = 0;
|
|
33
|
+
results.forEach((r) => {
|
|
34
|
+
total += r.count;
|
|
35
|
+
if (r._id === 'success')
|
|
36
|
+
success = r.count;
|
|
37
|
+
if (r._id === 'failed')
|
|
38
|
+
failed = r.count;
|
|
39
|
+
});
|
|
40
|
+
return { total, success, failed };
|
|
41
|
+
});
|
|
42
|
+
const statusesRequests = yield getStatusCounts(from, now);
|
|
43
|
+
const prevStatusesRequests = yield getStatusCounts(prevMonthFrom, from);
|
|
44
|
+
// Monthly success and fails for last 12 months
|
|
45
|
+
const monthlySuccessPipeline = [
|
|
46
|
+
{ $match: { status: 'success', startedAt: { $gte: yearAgo, $lte: now } } },
|
|
47
|
+
{ $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
48
|
+
{ $group: { _id: { year: '$year', month: '$month' }, count: { $sum: 1 } } },
|
|
49
|
+
{ $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
|
|
50
|
+
{ $sort: { x: 1 } }
|
|
51
|
+
];
|
|
52
|
+
const monthlyFailsPipeline = [
|
|
53
|
+
{ $match: { status: 'failed', startedAt: { $gte: yearAgo, $lte: now } } },
|
|
54
|
+
{ $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
55
|
+
{ $group: { _id: { year: '$year', month: '$month' }, count: { $sum: 1 } } },
|
|
56
|
+
{ $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
|
|
57
|
+
{ $sort: { x: 1 } }
|
|
58
|
+
];
|
|
59
|
+
const rawMonthlySuccess = yield DatabaseEngine_1.default.aggregate(collection, monthlySuccessPipeline);
|
|
60
|
+
const rawMonthlyFails = yield DatabaseEngine_1.default.aggregate(collection, monthlyFailsPipeline);
|
|
61
|
+
// Top lines per month for last 12 months
|
|
62
|
+
const topLinesPipeline = [
|
|
63
|
+
{ $match: { startedAt: { $gte: yearAgo, $lte: now } } },
|
|
64
|
+
{ $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
65
|
+
{ $group: { _id: { year: '$year', month: '$month' }, itemsCount: { $max: '$itemsCount' } } },
|
|
66
|
+
{ $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$itemsCount' } },
|
|
67
|
+
{ $sort: { x: 1 } }
|
|
68
|
+
];
|
|
69
|
+
const topLines = yield DatabaseEngine_1.default.aggregate(collection, topLinesPipeline);
|
|
70
|
+
// Top times per month for last 12 months
|
|
71
|
+
const topTimePipeline = [
|
|
72
|
+
{ $match: { startedAt: { $gte: yearAgo, $lte: now } } },
|
|
73
|
+
{ $addFields: { durationMs: { $subtract: ['$finishedAt', '$startedAt'] }, year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
74
|
+
{ $group: { _id: { year: '$year', month: '$month' }, maxDuration: { $max: '$durationMs' } } },
|
|
75
|
+
{ $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$maxDuration' } },
|
|
76
|
+
{ $sort: { x: 1 } }
|
|
77
|
+
];
|
|
78
|
+
const topTime = yield DatabaseEngine_1.default.aggregate(collection, topTimePipeline);
|
|
79
|
+
// Monthly consumers: for each consumer, per month count
|
|
80
|
+
const consumerPipeline = [
|
|
81
|
+
{ $match: { startedAt: { $gte: yearAgo, $lte: now } } },
|
|
82
|
+
{ $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
83
|
+
{ $group: { _id: { consumer: '$consumer', year: '$year', month: '$month' }, count: { $sum: 1 } } },
|
|
84
|
+
{ $project: { _id: 0, consumer: '$_id.consumer', x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
|
|
85
|
+
{ $sort: { consumer: 1, x: 1 } }
|
|
86
|
+
];
|
|
87
|
+
const consumersData = yield DatabaseEngine_1.default.aggregate(collection, consumerPipeline);
|
|
88
|
+
// transform to consumer array
|
|
89
|
+
const consumerMap = {};
|
|
90
|
+
consumersData.forEach((r) => {
|
|
91
|
+
consumerMap[r.consumer] = consumerMap[r.consumer] || [];
|
|
92
|
+
consumerMap[r.consumer].push({ x: r.x, y: r.y });
|
|
93
|
+
});
|
|
94
|
+
const consumers = Object.entries(consumerMap).map(([name, data]) => ({ name, data: DataframeManager_1.default.fill(data !== null && data !== void 0 ? data : [], yearAgo, now) }));
|
|
95
|
+
// Recent executions
|
|
96
|
+
const recentExecution = yield DatabaseEngine_1.default.query(collection, { startedAt: { $gte: from, $lte: now } }, { sort: { startedAt: -1 }, limit: 10 });
|
|
97
|
+
return {
|
|
98
|
+
statusesRequests,
|
|
99
|
+
prevStatusesRequests,
|
|
100
|
+
monthlySuccess: DataframeManager_1.default.fill(rawMonthlySuccess !== null && rawMonthlySuccess !== void 0 ? rawMonthlySuccess : [], yearAgo, now),
|
|
101
|
+
monthlyFails: DataframeManager_1.default.fill(rawMonthlyFails !== null && rawMonthlyFails !== void 0 ? rawMonthlyFails : [], yearAgo, now),
|
|
102
|
+
consumers: consumers,
|
|
103
|
+
topLine: DataframeManager_1.default.fill(topLines !== null && topLines !== void 0 ? topLines : [], yearAgo, now),
|
|
104
|
+
topTime: DataframeManager_1.default.fill(topTime !== null && topTime !== void 0 ? topTime : [], yearAgo, now),
|
|
105
|
+
recentExecution
|
|
106
|
+
};
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
exports.default = new UsageDataManager();
|
package/engines/UserManager.js
CHANGED
|
@@ -46,8 +46,8 @@ const DEV_USER = {
|
|
|
46
46
|
const MOCK_USER = {
|
|
47
47
|
_id: '__mock__',
|
|
48
48
|
auth: { oid: '', provider: 'azure' },
|
|
49
|
-
email: '',
|
|
50
|
-
name: '
|
|
49
|
+
email: 'mock.user@email.com',
|
|
50
|
+
name: 'Mock User',
|
|
51
51
|
roles: ['user'],
|
|
52
52
|
_signature: '',
|
|
53
53
|
lastLogin: new Date().toJSON()
|
package/engines/Validator.js
CHANGED
|
@@ -120,7 +120,7 @@ class ValidatorClass {
|
|
|
120
120
|
const groupingFields = fields.filter(x => x.grouping);
|
|
121
121
|
if (groupingFields.length > 1)
|
|
122
122
|
errors.push(`There can't be 2 fields with grouping defined at the same level (${groupingFields.map(x => x.key).join(', ')}). Level: ${level}`);
|
|
123
|
-
groupingFields.forEach(field => {
|
|
123
|
+
groupingFields.forEach((field) => {
|
|
124
124
|
if (field.grouping)
|
|
125
125
|
errors = [...errors, ...validateGroupingLevels(field.grouping.subFields, level + 1)];
|
|
126
126
|
});
|
|
@@ -20,11 +20,11 @@ class AutoMapperEngineClass {
|
|
|
20
20
|
* input: the first ten lines of the uploaded file
|
|
21
21
|
* outputs: the selected schemas
|
|
22
22
|
*/
|
|
23
|
-
this.map = (input, outputs) => __awaiter(this, void 0, void 0, function* () {
|
|
23
|
+
this.map = (input, outputs, fileName, sources) => __awaiter(this, void 0, void 0, function* () {
|
|
24
24
|
(0, Affirm_1.default)(input, 'Invalid input');
|
|
25
25
|
(0, Affirm_1.default)(outputs, 'Invalid outputs');
|
|
26
26
|
const llm = new LLM_1.default();
|
|
27
|
-
const producersRes = yield llm.inferProducers(input, outputs);
|
|
27
|
+
const producersRes = yield llm.inferProducers(input, outputs, fileName, sources);
|
|
28
28
|
const consumersRes = yield llm.inferConsumers(producersRes.producers, outputs);
|
|
29
29
|
return {
|
|
30
30
|
consumers: consumersRes.consumers,
|
package/engines/ai/LLM.js
CHANGED
|
@@ -25,12 +25,16 @@ You are tasked with creating the PRODUCER(S) that will then be used.
|
|
|
25
25
|
A producer maps directly to a dataset and exposes it's dimensions.
|
|
26
26
|
|
|
27
27
|
## FIELDS
|
|
28
|
-
- alias: the reference to the column or property name if different from the desired name property
|
|
29
28
|
- classification: make your best guess if the field falls under any of these regulations
|
|
30
29
|
|
|
31
30
|
# RULES
|
|
32
31
|
- Add only the required fields to comply with the OUTPUT DATA SPEC
|
|
33
32
|
- Add fields that you think are important
|
|
33
|
+
- The name of the producer must be the same as the name of the dataset.
|
|
34
|
+
- Avoid creating multiple providers with similar data.
|
|
35
|
+
- Try to create the least number of providers
|
|
36
|
+
- Awlays include this exact property as the first -> "$schema": "https://raw.githubusercontent.com/ForzaLabs/remora-public/refs/heads/main/json_schemas/producer-schema.json"
|
|
37
|
+
- Based on the producer select the source that makes the most sense to connect otherwise leave the string "<source_name>"
|
|
34
38
|
|
|
35
39
|
# FORMAT
|
|
36
40
|
The INPUT DATA SPEC will be the first 10 rows from the dataset that the producer needs to map to.
|
|
@@ -42,6 +46,12 @@ The result must be returned as a JSON object.
|
|
|
42
46
|
|
|
43
47
|
# OUTPUT DATA SPEC
|
|
44
48
|
{{output data spec}}
|
|
49
|
+
|
|
50
|
+
# File name
|
|
51
|
+
{{file name}}
|
|
52
|
+
|
|
53
|
+
# SOURCES
|
|
54
|
+
{{sources}}
|
|
45
55
|
`;
|
|
46
56
|
const baseConsumersSystemPrompt = `
|
|
47
57
|
# TASK
|
|
@@ -52,12 +62,14 @@ You are going to receive a list of PRODUCERS that expose some dimensions, and yo
|
|
|
52
62
|
A consumer takes the data from one or more producers and changes it's shape to transform it into the required output schema.
|
|
53
63
|
|
|
54
64
|
## FIELDS
|
|
55
|
-
- fields.grouping: used when you need to create a nested data structure with many items under a single key. If no groupingKey is used, then don't add this field.
|
|
56
65
|
- fields.from: used to distinct between the producers imported by the consumer. The value is the name of the producer.
|
|
57
66
|
|
|
58
67
|
# RULES
|
|
59
|
-
- If a field is not needed, do not add it e.g.
|
|
68
|
+
- If a field is not needed, do not add it e.g.
|
|
60
69
|
- Only import a producer once
|
|
70
|
+
- Awlays include this exact property as the first -> $schema": "https://raw.githubusercontent.com/ForzaLabs/remora-public/refs/heads/main/json_schemas/consumer-schema.json",
|
|
71
|
+
- Use "API" as the only valid output format.
|
|
72
|
+
- The "from" must contain only the name of the producer
|
|
61
73
|
|
|
62
74
|
# FORMAT
|
|
63
75
|
The INPUT DATA SPEC will be the first 10 rows from the dataset that the producer needs to map to.
|
|
@@ -141,7 +153,7 @@ resulting consumer: """
|
|
|
141
153
|
],
|
|
142
154
|
"outputs": [
|
|
143
155
|
{
|
|
144
|
-
"format": "
|
|
156
|
+
"format": "API"
|
|
145
157
|
}
|
|
146
158
|
],
|
|
147
159
|
"producers": [
|
|
@@ -159,36 +171,45 @@ You are going to receive a list of CONSUMERS and you need to return in the corre
|
|
|
159
171
|
|
|
160
172
|
# CONSUMER DEFINITION
|
|
161
173
|
A consumer takes the data from one or more producers and changes it's shape to transform it into the required output schema.
|
|
162
|
-
|
|
163
174
|
## FIELDS
|
|
164
|
-
- fields.grouping: used when you need to create a nested data structure with many items under a single key. If no groupingKey is used, then don't add this field.
|
|
165
175
|
- fields.from: used to distinct between the producers imported by the consumer. The value is the name of the producer.
|
|
166
176
|
|
|
167
177
|
# RULES
|
|
168
|
-
- If a field is not needed, do not add it e.g.
|
|
178
|
+
- If a field is not needed, do not add it e.g.
|
|
169
179
|
- Only import a producer once
|
|
180
|
+
- Awlays include this exact property as the first -> "https://raw.githubusercontent.com/ForzaLabs/remora-public/refs/heads/main/json_schemas/consumer-schema.json",
|
|
181
|
+
- Use "API" as the only valid output format.
|
|
182
|
+
- The "from" must contain only the name of the producer
|
|
170
183
|
|
|
171
184
|
# CONSUMERS
|
|
172
185
|
{{consumers}}
|
|
173
186
|
`;
|
|
174
187
|
class LLM {
|
|
175
188
|
constructor() {
|
|
176
|
-
this.inferProducers = (input, outputs) => __awaiter(this, void 0, void 0, function* () {
|
|
189
|
+
this.inferProducers = (input, outputs, fileName, sources) => __awaiter(this, void 0, void 0, function* () {
|
|
177
190
|
let systemPrompt = baseProducersSystemPrompt;
|
|
178
191
|
systemPrompt = systemPrompt.replace('{{input data spec}}', input.map(x => `- ${JSON.stringify(x)}`).join('\n'));
|
|
179
192
|
systemPrompt = systemPrompt.replace('{{output data spec}}', outputs.map(x => `- ${JSON.stringify(x)}`).join('\n'));
|
|
193
|
+
systemPrompt = systemPrompt.replace('{{file name}}', fileName);
|
|
194
|
+
systemPrompt = systemPrompt.replace('{{sources}}', sources.map(x => `- ${JSON.stringify(x)}`).join('\n'));
|
|
180
195
|
const res = yield this._client.beta.chat.completions.parse({
|
|
181
|
-
model: '
|
|
196
|
+
model: 'gpt-4o',
|
|
182
197
|
messages: [
|
|
183
198
|
{ role: 'system', content: systemPrompt }
|
|
184
199
|
],
|
|
185
200
|
response_format: (0, zod_1.zodResponseFormat)(zod_2.z.object({
|
|
186
201
|
producers: zod_2.z.array(zod_2.z.object({
|
|
202
|
+
$schema: zod_2.z.string().describe('The schema of the producer. This should always be the same.'),
|
|
187
203
|
name: zod_2.z.string(),
|
|
188
204
|
description: zod_2.z.string(),
|
|
189
205
|
dimensions: zod_2.z.array(zod_2.z.object({
|
|
190
206
|
name: zod_2.z.string(),
|
|
191
|
-
alias:
|
|
207
|
+
// alias: z.string().optional(),
|
|
208
|
+
source: zod_2.z.string().describe('The name of the source linked to this producer.'),
|
|
209
|
+
settings: zod_2.z.object({
|
|
210
|
+
fileKey: zod_2.z.string().describe('The name of the file'),
|
|
211
|
+
fileType: zod_2.z.string().describe('The file extension (CSV | JSONL | JSON)')
|
|
212
|
+
}),
|
|
192
213
|
description: zod_2.z.string().optional(),
|
|
193
214
|
type: zod_2.z.enum(['string', 'number', 'datetime']),
|
|
194
215
|
pk: zod_2.z.boolean().optional(),
|
|
@@ -205,12 +226,13 @@ class LLM {
|
|
|
205
226
|
systemPrompt = systemPrompt.replace('{{producers}}', producers.map(x => `- ${JSON.stringify(x)}`).join('\n'));
|
|
206
227
|
systemPrompt = systemPrompt.replace('{{output data spec}}', outputs.map(x => `- ${JSON.stringify(x)}`).join('\n'));
|
|
207
228
|
const item = {
|
|
208
|
-
model: '
|
|
229
|
+
model: 'gpt-4o',
|
|
209
230
|
messages: [
|
|
210
231
|
{ role: 'system', content: systemPrompt }
|
|
211
232
|
],
|
|
212
233
|
response_format: (0, zod_1.zodResponseFormat)(zod_2.z.object({
|
|
213
234
|
consumers: zod_2.z.array(zod_2.z.object({
|
|
235
|
+
$schema: zod_2.z.string().describe('The schema of the consumer. This should always be the same.'),
|
|
214
236
|
name: zod_2.z.string(),
|
|
215
237
|
description: zod_2.z.string(),
|
|
216
238
|
producers: zod_2.z.array(zod_2.z.object({
|
|
@@ -224,13 +246,14 @@ class LLM {
|
|
|
224
246
|
fields: zod_2.z.array(zod_2.z.object({
|
|
225
247
|
key: zod_2.z.string(),
|
|
226
248
|
from: zod_2.z.string().optional(),
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
})
|
|
249
|
+
alias: zod_2.z.string().optional()
|
|
250
|
+
// grouping: z.object({
|
|
251
|
+
// groupingKey: z.string(),
|
|
252
|
+
// subFields: z.array(z.lazy(() => z.object({
|
|
253
|
+
// key: z.string(),
|
|
254
|
+
// from: z.string().optional()
|
|
255
|
+
// })))
|
|
256
|
+
// }).optional()
|
|
234
257
|
})),
|
|
235
258
|
outputs: zod_2.z.array(zod_2.z.object({
|
|
236
259
|
format: zod_2.z.enum(['SQL', 'API', 'CSV', 'PARQUET', 'JSON'])
|
|
@@ -243,12 +266,13 @@ class LLM {
|
|
|
243
266
|
const firstDraft = msg.parsed;
|
|
244
267
|
const qaSystemPrompt = baseQASystemPrompt.replace('{{consumers}}', firstDraft.consumers.map(x => `- ${JSON.stringify(x)}`).join('\n'));
|
|
245
268
|
const res2 = yield this._client.beta.chat.completions.parse({
|
|
246
|
-
model: '
|
|
269
|
+
model: 'gpt-4o',
|
|
247
270
|
messages: [
|
|
248
271
|
{ role: 'system', content: qaSystemPrompt }
|
|
249
272
|
],
|
|
250
273
|
response_format: (0, zod_1.zodResponseFormat)(zod_2.z.object({
|
|
251
274
|
consumers: zod_2.z.array(zod_2.z.object({
|
|
275
|
+
$schema: zod_2.z.string().describe('The schema of the consumer. This should always be the same.'),
|
|
252
276
|
name: zod_2.z.string(),
|
|
253
277
|
description: zod_2.z.string(),
|
|
254
278
|
producers: zod_2.z.array(zod_2.z.object({
|
|
@@ -262,13 +286,14 @@ class LLM {
|
|
|
262
286
|
fields: zod_2.z.array(zod_2.z.object({
|
|
263
287
|
key: zod_2.z.string(),
|
|
264
288
|
from: zod_2.z.string().optional(),
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
}).optional()
|
|
289
|
+
alias: zod_2.z.string().optional()
|
|
290
|
+
// grouping: z.object({
|
|
291
|
+
// groupingKey: z.string().optional(),
|
|
292
|
+
// subFields: z.array(z.lazy(() => z.object({
|
|
293
|
+
// key: z.string(),
|
|
294
|
+
// from: z.string().optional()
|
|
295
|
+
// }))).optional()
|
|
296
|
+
// }).optional()
|
|
272
297
|
})),
|
|
273
298
|
outputs: zod_2.z.array(zod_2.z.object({
|
|
274
299
|
format: zod_2.z.enum(['SQL', 'API', 'CSV', 'PARQUET', 'JSON'])
|
|
@@ -113,7 +113,8 @@ class ConsumerManagerClass {
|
|
|
113
113
|
else {
|
|
114
114
|
const col = ConsumerManager.searchFieldInColumns(field.cField, availableColumns, consumer);
|
|
115
115
|
(0, Affirm_1.default)(col, `Consumer "${consumer.name}" misconfiguration: the requested field "${field.cField.key}" is not found in any of the specified producers ("${consumer.producers.map(x => x.name).join(', ')}")`);
|
|
116
|
-
|
|
116
|
+
// TODO: CHECK THIS FIX IS GOOD
|
|
117
|
+
expandedFields.push(Object.assign(Object.assign({}, field), { dimension: col.dimension, measure: col.measure }));
|
|
117
118
|
}
|
|
118
119
|
return expandedFields;
|
|
119
120
|
};
|