@forzalabs/remora 0.0.20 → 0.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +1 -1
- package/actions/automap.js +5 -1
- package/actions/deploy.js +0 -1
- package/actions/run.js +1 -1
- package/auth/JWTManager.js +1 -1
- package/database/DatabaseEngine.js +13 -3
- package/definitions/json_schemas/consumer-schema.json +392 -0
- package/definitions/json_schemas/producer-schema.json +4 -0
- package/definitions/transform/Transformations.js +2 -0
- package/engines/DataframeManager.js +55 -0
- package/engines/Environment.js +1 -1
- package/engines/UsageDataManager.js +110 -0
- package/engines/UserManager.js +2 -2
- package/engines/ai/AutoMapperEngine.js +2 -2
- package/engines/ai/LLM.js +51 -26
- package/engines/consumer/ConsumerManager.js +2 -1
- package/engines/execution/ExecutionEnvironment.js +8 -1
- package/engines/execution/ExecutionPlanner.js +6 -2
- package/engines/transform/TransformationEngine.js +220 -0
- package/engines/transform/TypeCaster.js +33 -0
- package/engines/validation/Validator.js +22 -5
- package/helper/Helper.js +7 -0
- package/index.js +7 -0
- package/licencing/LicenceManager.js +64 -0
- package/package.json +1 -1
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
const DSTE_1 = __importDefault(require("../core/dste/DSTE"));
|
|
16
|
+
const DatabaseEngine_1 = __importDefault(require("../database/DatabaseEngine"));
|
|
17
|
+
const DataframeManager_1 = __importDefault(require("./DataframeManager"));
|
|
18
|
+
class UsageDataManager {
|
|
19
|
+
getUsageDetails() {
|
|
20
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
21
|
+
const now = DSTE_1.default.now();
|
|
22
|
+
const from = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);
|
|
23
|
+
const prevMonthFrom = new Date(now.getTime() - 60 * 24 * 60 * 60 * 1000);
|
|
24
|
+
const yearAgo = new Date(now.getFullYear(), now.getMonth() - 11, 1);
|
|
25
|
+
const collection = 'usage';
|
|
26
|
+
// Aggregate status counts for current and previous month
|
|
27
|
+
const getStatusCounts = (start, end) => __awaiter(this, void 0, void 0, function* () {
|
|
28
|
+
const results = yield DatabaseEngine_1.default.aggregate(collection, [
|
|
29
|
+
{ $match: { startedAt: { $gte: start, $lte: end } } },
|
|
30
|
+
{ $group: { _id: '$status', count: { $sum: 1 } } }
|
|
31
|
+
]);
|
|
32
|
+
let success = 0, failed = 0, total = 0;
|
|
33
|
+
results.forEach((r) => {
|
|
34
|
+
total += r.count;
|
|
35
|
+
if (r._id === 'success')
|
|
36
|
+
success = r.count;
|
|
37
|
+
if (r._id === 'failed')
|
|
38
|
+
failed = r.count;
|
|
39
|
+
});
|
|
40
|
+
return { total, success, failed };
|
|
41
|
+
});
|
|
42
|
+
const statusesRequests = yield getStatusCounts(from, now);
|
|
43
|
+
const prevStatusesRequests = yield getStatusCounts(prevMonthFrom, from);
|
|
44
|
+
// Monthly success and fails for last 12 months
|
|
45
|
+
const monthlySuccessPipeline = [
|
|
46
|
+
{ $match: { status: 'success', startedAt: { $gte: yearAgo, $lte: now } } },
|
|
47
|
+
{ $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
48
|
+
{ $group: { _id: { year: '$year', month: '$month' }, count: { $sum: 1 } } },
|
|
49
|
+
{ $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
|
|
50
|
+
{ $sort: { x: 1 } }
|
|
51
|
+
];
|
|
52
|
+
const monthlyFailsPipeline = [
|
|
53
|
+
{ $match: { status: 'failed', startedAt: { $gte: yearAgo, $lte: now } } },
|
|
54
|
+
{ $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
55
|
+
{ $group: { _id: { year: '$year', month: '$month' }, count: { $sum: 1 } } },
|
|
56
|
+
{ $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
|
|
57
|
+
{ $sort: { x: 1 } }
|
|
58
|
+
];
|
|
59
|
+
const rawMonthlySuccess = yield DatabaseEngine_1.default.aggregate(collection, monthlySuccessPipeline);
|
|
60
|
+
const rawMonthlyFails = yield DatabaseEngine_1.default.aggregate(collection, monthlyFailsPipeline);
|
|
61
|
+
// Top lines per month for last 12 months
|
|
62
|
+
const topLinesPipeline = [
|
|
63
|
+
{ $match: { startedAt: { $gte: yearAgo, $lte: now } } },
|
|
64
|
+
{ $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
65
|
+
{ $group: { _id: { year: '$year', month: '$month' }, itemsCount: { $max: '$itemsCount' } } },
|
|
66
|
+
{ $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$itemsCount' } },
|
|
67
|
+
{ $sort: { x: 1 } }
|
|
68
|
+
];
|
|
69
|
+
const topLines = yield DatabaseEngine_1.default.aggregate(collection, topLinesPipeline);
|
|
70
|
+
// Top times per month for last 12 months
|
|
71
|
+
const topTimePipeline = [
|
|
72
|
+
{ $match: { startedAt: { $gte: yearAgo, $lte: now } } },
|
|
73
|
+
{ $addFields: { durationMs: { $subtract: ['$finishedAt', '$startedAt'] }, year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
74
|
+
{ $group: { _id: { year: '$year', month: '$month' }, maxDuration: { $max: '$durationMs' } } },
|
|
75
|
+
{ $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$maxDuration' } },
|
|
76
|
+
{ $sort: { x: 1 } }
|
|
77
|
+
];
|
|
78
|
+
const topTime = yield DatabaseEngine_1.default.aggregate(collection, topTimePipeline);
|
|
79
|
+
// Monthly consumers: for each consumer, per month count
|
|
80
|
+
const consumerPipeline = [
|
|
81
|
+
{ $match: { startedAt: { $gte: yearAgo, $lte: now } } },
|
|
82
|
+
{ $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
|
|
83
|
+
{ $group: { _id: { consumer: '$consumer', year: '$year', month: '$month' }, count: { $sum: 1 } } },
|
|
84
|
+
{ $project: { _id: 0, consumer: '$_id.consumer', x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
|
|
85
|
+
{ $sort: { consumer: 1, x: 1 } }
|
|
86
|
+
];
|
|
87
|
+
const consumersData = yield DatabaseEngine_1.default.aggregate(collection, consumerPipeline);
|
|
88
|
+
// transform to consumer array
|
|
89
|
+
const consumerMap = {};
|
|
90
|
+
consumersData.forEach((r) => {
|
|
91
|
+
consumerMap[r.consumer] = consumerMap[r.consumer] || [];
|
|
92
|
+
consumerMap[r.consumer].push({ x: r.x, y: r.y });
|
|
93
|
+
});
|
|
94
|
+
const consumers = Object.entries(consumerMap).map(([name, data]) => ({ name, data: DataframeManager_1.default.fill(data !== null && data !== void 0 ? data : [], yearAgo, now) }));
|
|
95
|
+
// Recent executions
|
|
96
|
+
const recentExecution = yield DatabaseEngine_1.default.query(collection, { startedAt: { $gte: from, $lte: now } }, { sort: { startedAt: -1 }, limit: 10 });
|
|
97
|
+
return {
|
|
98
|
+
statusesRequests,
|
|
99
|
+
prevStatusesRequests,
|
|
100
|
+
monthlySuccess: DataframeManager_1.default.fill(rawMonthlySuccess !== null && rawMonthlySuccess !== void 0 ? rawMonthlySuccess : [], yearAgo, now),
|
|
101
|
+
monthlyFails: DataframeManager_1.default.fill(rawMonthlyFails !== null && rawMonthlyFails !== void 0 ? rawMonthlyFails : [], yearAgo, now),
|
|
102
|
+
consumers: consumers,
|
|
103
|
+
topLine: DataframeManager_1.default.fill(topLines !== null && topLines !== void 0 ? topLines : [], yearAgo, now),
|
|
104
|
+
topTime: DataframeManager_1.default.fill(topTime !== null && topTime !== void 0 ? topTime : [], yearAgo, now),
|
|
105
|
+
recentExecution
|
|
106
|
+
};
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
exports.default = new UsageDataManager();
|
package/engines/UserManager.js
CHANGED
|
@@ -46,8 +46,8 @@ const DEV_USER = {
|
|
|
46
46
|
const MOCK_USER = {
|
|
47
47
|
_id: '__mock__',
|
|
48
48
|
auth: { oid: '', provider: 'azure' },
|
|
49
|
-
email: '',
|
|
50
|
-
name: '
|
|
49
|
+
email: 'mock.user@email.com',
|
|
50
|
+
name: 'Mock User',
|
|
51
51
|
roles: ['user'],
|
|
52
52
|
_signature: '',
|
|
53
53
|
lastLogin: new Date().toJSON()
|
|
@@ -20,11 +20,11 @@ class AutoMapperEngineClass {
|
|
|
20
20
|
* input: the first ten lines of the uploaded file
|
|
21
21
|
* outputs: the selected schemas
|
|
22
22
|
*/
|
|
23
|
-
this.map = (input, outputs) => __awaiter(this, void 0, void 0, function* () {
|
|
23
|
+
this.map = (input, outputs, fileName, sources) => __awaiter(this, void 0, void 0, function* () {
|
|
24
24
|
(0, Affirm_1.default)(input, 'Invalid input');
|
|
25
25
|
(0, Affirm_1.default)(outputs, 'Invalid outputs');
|
|
26
26
|
const llm = new LLM_1.default();
|
|
27
|
-
const producersRes = yield llm.inferProducers(input, outputs);
|
|
27
|
+
const producersRes = yield llm.inferProducers(input, outputs, fileName, sources);
|
|
28
28
|
const consumersRes = yield llm.inferConsumers(producersRes.producers, outputs);
|
|
29
29
|
return {
|
|
30
30
|
consumers: consumersRes.consumers,
|
package/engines/ai/LLM.js
CHANGED
|
@@ -25,12 +25,16 @@ You are tasked with creating the PRODUCER(S) that will then be used.
|
|
|
25
25
|
A producer maps directly to a dataset and exposes it's dimensions.
|
|
26
26
|
|
|
27
27
|
## FIELDS
|
|
28
|
-
- alias: the reference to the column or property name if different from the desired name property
|
|
29
28
|
- classification: make your best guess if the field falls under any of these regulations
|
|
30
29
|
|
|
31
30
|
# RULES
|
|
32
31
|
- Add only the required fields to comply with the OUTPUT DATA SPEC
|
|
33
32
|
- Add fields that you think are important
|
|
33
|
+
- The name of the producer must be the same as the name of the dataset.
|
|
34
|
+
- Avoid creating multiple providers with similar data.
|
|
35
|
+
- Try to create the least number of providers
|
|
36
|
+
- Awlays include this exact property as the first -> "$schema": "https://raw.githubusercontent.com/ForzaLabs/remora-public/refs/heads/main/json_schemas/producer-schema.json"
|
|
37
|
+
- Based on the producer select the source that makes the most sense to connect otherwise leave the string "<source_name>"
|
|
34
38
|
|
|
35
39
|
# FORMAT
|
|
36
40
|
The INPUT DATA SPEC will be the first 10 rows from the dataset that the producer needs to map to.
|
|
@@ -42,6 +46,12 @@ The result must be returned as a JSON object.
|
|
|
42
46
|
|
|
43
47
|
# OUTPUT DATA SPEC
|
|
44
48
|
{{output data spec}}
|
|
49
|
+
|
|
50
|
+
# File name
|
|
51
|
+
{{file name}}
|
|
52
|
+
|
|
53
|
+
# SOURCES
|
|
54
|
+
{{sources}}
|
|
45
55
|
`;
|
|
46
56
|
const baseConsumersSystemPrompt = `
|
|
47
57
|
# TASK
|
|
@@ -52,12 +62,14 @@ You are going to receive a list of PRODUCERS that expose some dimensions, and yo
|
|
|
52
62
|
A consumer takes the data from one or more producers and changes it's shape to transform it into the required output schema.
|
|
53
63
|
|
|
54
64
|
## FIELDS
|
|
55
|
-
- fields.grouping: used when you need to create a nested data structure with many items under a single key. If no groupingKey is used, then don't add this field.
|
|
56
65
|
- fields.from: used to distinct between the producers imported by the consumer. The value is the name of the producer.
|
|
57
66
|
|
|
58
67
|
# RULES
|
|
59
|
-
- If a field is not needed, do not add it e.g.
|
|
68
|
+
- If a field is not needed, do not add it e.g.
|
|
60
69
|
- Only import a producer once
|
|
70
|
+
- Awlays include this exact property as the first -> $schema": "https://raw.githubusercontent.com/ForzaLabs/remora-public/refs/heads/main/json_schemas/consumer-schema.json",
|
|
71
|
+
- Use "API" as the only valid output format.
|
|
72
|
+
- The "from" must contain only the name of the producer
|
|
61
73
|
|
|
62
74
|
# FORMAT
|
|
63
75
|
The INPUT DATA SPEC will be the first 10 rows from the dataset that the producer needs to map to.
|
|
@@ -141,7 +153,7 @@ resulting consumer: """
|
|
|
141
153
|
],
|
|
142
154
|
"outputs": [
|
|
143
155
|
{
|
|
144
|
-
"format": "
|
|
156
|
+
"format": "API"
|
|
145
157
|
}
|
|
146
158
|
],
|
|
147
159
|
"producers": [
|
|
@@ -159,36 +171,45 @@ You are going to receive a list of CONSUMERS and you need to return in the corre
|
|
|
159
171
|
|
|
160
172
|
# CONSUMER DEFINITION
|
|
161
173
|
A consumer takes the data from one or more producers and changes it's shape to transform it into the required output schema.
|
|
162
|
-
|
|
163
174
|
## FIELDS
|
|
164
|
-
- fields.grouping: used when you need to create a nested data structure with many items under a single key. If no groupingKey is used, then don't add this field.
|
|
165
175
|
- fields.from: used to distinct between the producers imported by the consumer. The value is the name of the producer.
|
|
166
176
|
|
|
167
177
|
# RULES
|
|
168
|
-
- If a field is not needed, do not add it e.g.
|
|
178
|
+
- If a field is not needed, do not add it e.g.
|
|
169
179
|
- Only import a producer once
|
|
180
|
+
- Awlays include this exact property as the first -> "https://raw.githubusercontent.com/ForzaLabs/remora-public/refs/heads/main/json_schemas/consumer-schema.json",
|
|
181
|
+
- Use "API" as the only valid output format.
|
|
182
|
+
- The "from" must contain only the name of the producer
|
|
170
183
|
|
|
171
184
|
# CONSUMERS
|
|
172
185
|
{{consumers}}
|
|
173
186
|
`;
|
|
174
187
|
class LLM {
|
|
175
188
|
constructor() {
|
|
176
|
-
this.inferProducers = (input, outputs) => __awaiter(this, void 0, void 0, function* () {
|
|
189
|
+
this.inferProducers = (input, outputs, fileName, sources) => __awaiter(this, void 0, void 0, function* () {
|
|
177
190
|
let systemPrompt = baseProducersSystemPrompt;
|
|
178
191
|
systemPrompt = systemPrompt.replace('{{input data spec}}', input.map(x => `- ${JSON.stringify(x)}`).join('\n'));
|
|
179
192
|
systemPrompt = systemPrompt.replace('{{output data spec}}', outputs.map(x => `- ${JSON.stringify(x)}`).join('\n'));
|
|
193
|
+
systemPrompt = systemPrompt.replace('{{file name}}', fileName);
|
|
194
|
+
systemPrompt = systemPrompt.replace('{{sources}}', sources.map(x => `- ${JSON.stringify(x)}`).join('\n'));
|
|
180
195
|
const res = yield this._client.beta.chat.completions.parse({
|
|
181
|
-
model: '
|
|
196
|
+
model: 'gpt-4o',
|
|
182
197
|
messages: [
|
|
183
198
|
{ role: 'system', content: systemPrompt }
|
|
184
199
|
],
|
|
185
200
|
response_format: (0, zod_1.zodResponseFormat)(zod_2.z.object({
|
|
186
201
|
producers: zod_2.z.array(zod_2.z.object({
|
|
202
|
+
$schema: zod_2.z.string().describe('The schema of the producer. This should always be the same.'),
|
|
187
203
|
name: zod_2.z.string(),
|
|
188
204
|
description: zod_2.z.string(),
|
|
189
205
|
dimensions: zod_2.z.array(zod_2.z.object({
|
|
190
206
|
name: zod_2.z.string(),
|
|
191
|
-
alias:
|
|
207
|
+
// alias: z.string().optional(),
|
|
208
|
+
source: zod_2.z.string().describe('The name of the source linked to this producer.'),
|
|
209
|
+
settings: zod_2.z.object({
|
|
210
|
+
fileKey: zod_2.z.string().describe('The name of the file'),
|
|
211
|
+
fileType: zod_2.z.string().describe('The file extension (CSV | JSONL | JSON)')
|
|
212
|
+
}),
|
|
192
213
|
description: zod_2.z.string().optional(),
|
|
193
214
|
type: zod_2.z.enum(['string', 'number', 'datetime']),
|
|
194
215
|
pk: zod_2.z.boolean().optional(),
|
|
@@ -205,12 +226,13 @@ class LLM {
|
|
|
205
226
|
systemPrompt = systemPrompt.replace('{{producers}}', producers.map(x => `- ${JSON.stringify(x)}`).join('\n'));
|
|
206
227
|
systemPrompt = systemPrompt.replace('{{output data spec}}', outputs.map(x => `- ${JSON.stringify(x)}`).join('\n'));
|
|
207
228
|
const item = {
|
|
208
|
-
model: '
|
|
229
|
+
model: 'gpt-4o',
|
|
209
230
|
messages: [
|
|
210
231
|
{ role: 'system', content: systemPrompt }
|
|
211
232
|
],
|
|
212
233
|
response_format: (0, zod_1.zodResponseFormat)(zod_2.z.object({
|
|
213
234
|
consumers: zod_2.z.array(zod_2.z.object({
|
|
235
|
+
$schema: zod_2.z.string().describe('The schema of the consumer. This should always be the same.'),
|
|
214
236
|
name: zod_2.z.string(),
|
|
215
237
|
description: zod_2.z.string(),
|
|
216
238
|
producers: zod_2.z.array(zod_2.z.object({
|
|
@@ -224,13 +246,14 @@ class LLM {
|
|
|
224
246
|
fields: zod_2.z.array(zod_2.z.object({
|
|
225
247
|
key: zod_2.z.string(),
|
|
226
248
|
from: zod_2.z.string().optional(),
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
})
|
|
249
|
+
alias: zod_2.z.string().optional()
|
|
250
|
+
// grouping: z.object({
|
|
251
|
+
// groupingKey: z.string(),
|
|
252
|
+
// subFields: z.array(z.lazy(() => z.object({
|
|
253
|
+
// key: z.string(),
|
|
254
|
+
// from: z.string().optional()
|
|
255
|
+
// })))
|
|
256
|
+
// }).optional()
|
|
234
257
|
})),
|
|
235
258
|
outputs: zod_2.z.array(zod_2.z.object({
|
|
236
259
|
format: zod_2.z.enum(['SQL', 'API', 'CSV', 'PARQUET', 'JSON'])
|
|
@@ -243,12 +266,13 @@ class LLM {
|
|
|
243
266
|
const firstDraft = msg.parsed;
|
|
244
267
|
const qaSystemPrompt = baseQASystemPrompt.replace('{{consumers}}', firstDraft.consumers.map(x => `- ${JSON.stringify(x)}`).join('\n'));
|
|
245
268
|
const res2 = yield this._client.beta.chat.completions.parse({
|
|
246
|
-
model: '
|
|
269
|
+
model: 'gpt-4o',
|
|
247
270
|
messages: [
|
|
248
271
|
{ role: 'system', content: qaSystemPrompt }
|
|
249
272
|
],
|
|
250
273
|
response_format: (0, zod_1.zodResponseFormat)(zod_2.z.object({
|
|
251
274
|
consumers: zod_2.z.array(zod_2.z.object({
|
|
275
|
+
$schema: zod_2.z.string().describe('The schema of the consumer. This should always be the same.'),
|
|
252
276
|
name: zod_2.z.string(),
|
|
253
277
|
description: zod_2.z.string(),
|
|
254
278
|
producers: zod_2.z.array(zod_2.z.object({
|
|
@@ -262,13 +286,14 @@ class LLM {
|
|
|
262
286
|
fields: zod_2.z.array(zod_2.z.object({
|
|
263
287
|
key: zod_2.z.string(),
|
|
264
288
|
from: zod_2.z.string().optional(),
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
}).optional()
|
|
289
|
+
alias: zod_2.z.string().optional()
|
|
290
|
+
// grouping: z.object({
|
|
291
|
+
// groupingKey: z.string().optional(),
|
|
292
|
+
// subFields: z.array(z.lazy(() => z.object({
|
|
293
|
+
// key: z.string(),
|
|
294
|
+
// from: z.string().optional()
|
|
295
|
+
// }))).optional()
|
|
296
|
+
// }).optional()
|
|
272
297
|
})),
|
|
273
298
|
outputs: zod_2.z.array(zod_2.z.object({
|
|
274
299
|
format: zod_2.z.enum(['SQL', 'API', 'CSV', 'PARQUET', 'JSON'])
|
|
@@ -113,7 +113,8 @@ class ConsumerManagerClass {
|
|
|
113
113
|
else {
|
|
114
114
|
const col = ConsumerManager.searchFieldInColumns(field.cField, availableColumns, consumer);
|
|
115
115
|
(0, Affirm_1.default)(col, `Consumer "${consumer.name}" misconfiguration: the requested field "${field.cField.key}" is not found in any of the specified producers ("${consumer.producers.map(x => x.name).join(', ')}")`);
|
|
116
|
-
|
|
116
|
+
// TODO: CHECK THIS FIX IS GOOD
|
|
117
|
+
expandedFields.push(Object.assign(Object.assign({}, field), { dimension: col.dimension, measure: col.measure }));
|
|
117
118
|
}
|
|
118
119
|
return expandedFields;
|
|
119
120
|
};
|
|
@@ -23,6 +23,7 @@ const SQLBuilder_1 = __importDefault(require("../sql/SQLBuilder"));
|
|
|
23
23
|
const SQLCompiler_1 = __importDefault(require("../sql/SQLCompiler"));
|
|
24
24
|
const ExecutionPlanner_1 = __importDefault(require("./ExecutionPlanner"));
|
|
25
25
|
const RequestExecutor_1 = __importDefault(require("./RequestExecutor"));
|
|
26
|
+
const TransformationEngine_1 = __importDefault(require("../transform/TransformationEngine"));
|
|
26
27
|
class ExecutionEnvironment {
|
|
27
28
|
constructor(consumer) {
|
|
28
29
|
this.run = (options) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -30,7 +31,8 @@ class ExecutionEnvironment {
|
|
|
30
31
|
const plan = ExecutionPlanner_1.default.plan(this._consumer, options);
|
|
31
32
|
(0, Affirm_1.default)(plan, `Invalid execution plan`);
|
|
32
33
|
(0, Affirm_1.default)(plan.length > 0, `Empty execution plan`);
|
|
33
|
-
const
|
|
34
|
+
const start = performance.now();
|
|
35
|
+
const result = { shape: ConsumerEngine_1.default.getOutputShape(this._consumer), _elapsedMS: -1 };
|
|
34
36
|
for (const planStep of plan) {
|
|
35
37
|
switch (planStep.type) {
|
|
36
38
|
case 'compile-consumer-to-SQL': {
|
|
@@ -99,10 +101,15 @@ class ExecutionEnvironment {
|
|
|
99
101
|
this._fetchedData = RequestExecutor_1.default._applyFilters(this._fetchedData, this._consumer.filters.map(x => x.rule));
|
|
100
102
|
break;
|
|
101
103
|
}
|
|
104
|
+
case 'apply-transformations': {
|
|
105
|
+
this._fetchedData = TransformationEngine_1.default.apply(this._consumer, this._fetchedData);
|
|
106
|
+
break;
|
|
107
|
+
}
|
|
102
108
|
default: throw new Error(`Invalid execution plan step type "${planStep.type}"`);
|
|
103
109
|
}
|
|
104
110
|
}
|
|
105
111
|
result.data = this._fetchedData;
|
|
112
|
+
result._elapsedMS = performance.now() - start;
|
|
106
113
|
return result;
|
|
107
114
|
});
|
|
108
115
|
this._consumer = consumer;
|
|
@@ -51,16 +51,20 @@ class ExecutionPlannerClas {
|
|
|
51
51
|
}
|
|
52
52
|
default: throw new Error(`Engine "${producerEngine}" not supported`);
|
|
53
53
|
}
|
|
54
|
-
//
|
|
54
|
+
// At this point I have the data loaded in memory
|
|
55
55
|
// TODO: can I handle streaming data? (e.g. a file that is too big to fit in memory)
|
|
56
56
|
// TODO: how to handle pagination of SQL results?
|
|
57
|
+
// Apply the transormations to the fields of the consumer
|
|
58
|
+
// TODO: transformations can also be applied directly to the producer... how???
|
|
59
|
+
if (consumer.fields.some(x => Algo_1.default.hasVal(x.transform)))
|
|
60
|
+
plan.push({ type: 'apply-transformations' });
|
|
57
61
|
const engineClass = this.getEngineClass(producerEngine);
|
|
58
62
|
for (const output of consumer.outputs) {
|
|
59
63
|
switch (output.format.toUpperCase()) {
|
|
60
64
|
case 'JSON': {
|
|
61
65
|
if (engineClass === 'file' && Algo_1.default.hasVal(options))
|
|
62
66
|
plan.push({ type: 'apply-execution-request-to-result' });
|
|
63
|
-
// TODO: test if it is
|
|
67
|
+
// TODO: test if it is needed and if it doesn't break soething else
|
|
64
68
|
if (engineClass === 'sql')
|
|
65
69
|
plan.push({ type: 'post-process-json' });
|
|
66
70
|
plan.push({ type: 'export-file', output });
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
7
|
+
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
8
|
+
const TypeCaster_1 = __importDefault(require("./TypeCaster"));
|
|
9
|
+
class TransformationEngineClass {
|
|
10
|
+
constructor() {
|
|
11
|
+
this.apply = (consumer, data) => {
|
|
12
|
+
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
13
|
+
Affirm_1.default.hasValue(data, 'Invalid data');
|
|
14
|
+
const fieldsToTransform = consumer.fields.filter(field => Algo_1.default.hasVal(field.transform));
|
|
15
|
+
Affirm_1.default.hasItems(fieldsToTransform, 'No fields with transformations');
|
|
16
|
+
// Process the data records in place to improve performance instead of copying to a new array
|
|
17
|
+
for (const record of data) {
|
|
18
|
+
for (const field of fieldsToTransform) {
|
|
19
|
+
if (!field.transform)
|
|
20
|
+
continue;
|
|
21
|
+
const value = record[field.key];
|
|
22
|
+
if (!Algo_1.default.hasVal(value) && Algo_1.default.hasVal(field.default))
|
|
23
|
+
record[field.key] = field.default;
|
|
24
|
+
else if (!Algo_1.default.hasVal(value))
|
|
25
|
+
continue;
|
|
26
|
+
try {
|
|
27
|
+
record[field.key] = this.applyTransformations(value, field.transform, field.key);
|
|
28
|
+
}
|
|
29
|
+
catch (error) {
|
|
30
|
+
switch (field.onError) {
|
|
31
|
+
case 'set_default':
|
|
32
|
+
record[field.key] = field.default;
|
|
33
|
+
break;
|
|
34
|
+
case 'skip':
|
|
35
|
+
break;
|
|
36
|
+
case 'fail':
|
|
37
|
+
default:
|
|
38
|
+
throw error;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
return data;
|
|
44
|
+
};
|
|
45
|
+
this.applyTransformations = (value, transformations, fieldName) => {
|
|
46
|
+
var _a;
|
|
47
|
+
if (Array.isArray(transformations)) {
|
|
48
|
+
// Process array transformations without creating intermediate arrays
|
|
49
|
+
let result = value;
|
|
50
|
+
for (const transform of transformations) {
|
|
51
|
+
result = this.applyTransformations(result, transform, fieldName);
|
|
52
|
+
}
|
|
53
|
+
return result;
|
|
54
|
+
}
|
|
55
|
+
// Single transformation
|
|
56
|
+
if ('cast' in transformations) {
|
|
57
|
+
return TypeCaster_1.default.cast(value, transformations.cast);
|
|
58
|
+
}
|
|
59
|
+
if ('multiply' in transformations) {
|
|
60
|
+
const num = TypeCaster_1.default.cast(value, 'number');
|
|
61
|
+
if (isNaN(num))
|
|
62
|
+
throw new Error(`Cannot multiply non-numeric value in field '${fieldName}'`);
|
|
63
|
+
return num * transformations.multiply;
|
|
64
|
+
}
|
|
65
|
+
if ('add' in transformations) {
|
|
66
|
+
const num = TypeCaster_1.default.cast(value, 'number');
|
|
67
|
+
if (isNaN(num))
|
|
68
|
+
throw new Error(`Cannot add to non-numeric value in field '${fieldName}'`);
|
|
69
|
+
return num + transformations.add;
|
|
70
|
+
}
|
|
71
|
+
if ('extract' in transformations) {
|
|
72
|
+
const date = TypeCaster_1.default.cast(value, 'date');
|
|
73
|
+
if (isNaN(date.getTime()))
|
|
74
|
+
throw new Error(`Invalid date for extraction in field '${fieldName}'`);
|
|
75
|
+
switch (transformations.extract) {
|
|
76
|
+
case 'year': return date.getFullYear();
|
|
77
|
+
case 'month': return date.getMonth() + 1; // 1-based month
|
|
78
|
+
case 'day': return date.getDate();
|
|
79
|
+
case 'hour': return date.getHours();
|
|
80
|
+
case 'minute': return date.getMinutes();
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
if ('concat' in transformations) {
|
|
84
|
+
if (!Array.isArray(value))
|
|
85
|
+
throw new Error(`Cannot concat non-array value in field '${fieldName}'`);
|
|
86
|
+
return value.join(transformations.concat.separator);
|
|
87
|
+
}
|
|
88
|
+
if ('split' in transformations) {
|
|
89
|
+
if (typeof value !== 'string')
|
|
90
|
+
throw new Error(`Cannot split non-string value in field '${fieldName}'`);
|
|
91
|
+
const parts = value.split(transformations.split.separator);
|
|
92
|
+
if (transformations.split.index >= parts.length) {
|
|
93
|
+
throw new Error(`Split index ${transformations.split.index} out of bounds in field '${fieldName}'`);
|
|
94
|
+
}
|
|
95
|
+
return parts[transformations.split.index];
|
|
96
|
+
}
|
|
97
|
+
if ('regex_match' in transformations) {
|
|
98
|
+
if (typeof value !== 'string')
|
|
99
|
+
throw new Error(`Cannot apply regex_match to non-string value in field '${fieldName}'`);
|
|
100
|
+
try {
|
|
101
|
+
const regex = new RegExp(transformations.regex_match.pattern, transformations.regex_match.flags);
|
|
102
|
+
return regex.test(value);
|
|
103
|
+
}
|
|
104
|
+
catch (error) {
|
|
105
|
+
throw new Error(`Invalid regex pattern in field '${fieldName}': ${error.message}`);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
if ('regex_replace' in transformations) {
|
|
109
|
+
if (typeof value !== 'string')
|
|
110
|
+
throw new Error(`Cannot apply regex_replace to non-string value in field '${fieldName}'`);
|
|
111
|
+
try {
|
|
112
|
+
const regex = new RegExp(transformations.regex_replace.pattern, transformations.regex_replace.flags);
|
|
113
|
+
return value.replace(regex, transformations.regex_replace.replacement);
|
|
114
|
+
}
|
|
115
|
+
catch (error) {
|
|
116
|
+
throw new Error(`Invalid regex pattern in field '${fieldName}': ${error.message}`);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
if ('regex_extract' in transformations) {
|
|
120
|
+
if (typeof value !== 'string')
|
|
121
|
+
throw new Error(`Cannot apply regex_extract to non-string value in field '${fieldName}'`);
|
|
122
|
+
try {
|
|
123
|
+
const regex = new RegExp(transformations.regex_extract.pattern, transformations.regex_extract.flags);
|
|
124
|
+
const matches = value.match(regex);
|
|
125
|
+
if (!matches)
|
|
126
|
+
return null;
|
|
127
|
+
const groupIndex = transformations.regex_extract.group;
|
|
128
|
+
return (_a = matches[groupIndex]) !== null && _a !== void 0 ? _a : null;
|
|
129
|
+
}
|
|
130
|
+
catch (error) {
|
|
131
|
+
throw new Error(`Invalid regex pattern in field '${fieldName}': ${error.message}`);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
if ('trim' in transformations) {
|
|
135
|
+
if (typeof value !== 'string')
|
|
136
|
+
throw new Error(`Cannot trim non-string value in field '${fieldName}'`);
|
|
137
|
+
return value.trim();
|
|
138
|
+
}
|
|
139
|
+
if ('to_lowercase' in transformations) {
|
|
140
|
+
if (typeof value !== 'string')
|
|
141
|
+
throw new Error(`Cannot convert non-string value to lowercase in field '${fieldName}'`);
|
|
142
|
+
return value.toLowerCase();
|
|
143
|
+
}
|
|
144
|
+
if ('to_uppercase' in transformations) {
|
|
145
|
+
if (typeof value !== 'string')
|
|
146
|
+
throw new Error(`Cannot convert non-string value to uppercase in field '${fieldName}'`);
|
|
147
|
+
return value.toUpperCase();
|
|
148
|
+
}
|
|
149
|
+
if ('capitalize' in transformations) {
|
|
150
|
+
if (typeof value !== 'string')
|
|
151
|
+
throw new Error(`Cannot capitalize non-string value in field '${fieldName}'`);
|
|
152
|
+
return value.charAt(0).toUpperCase() + value.slice(1);
|
|
153
|
+
}
|
|
154
|
+
if ('substring' in transformations) {
|
|
155
|
+
if (typeof value !== 'string')
|
|
156
|
+
throw new Error(`Cannot take substring of non-string value in field '${fieldName}'`);
|
|
157
|
+
const { start, end } = transformations.substring;
|
|
158
|
+
return end !== undefined ? value.substring(start, end) : value.substring(start);
|
|
159
|
+
}
|
|
160
|
+
if ('pad_start' in transformations) {
|
|
161
|
+
if (typeof value !== 'string')
|
|
162
|
+
throw new Error(`Cannot pad non-string value in field '${fieldName}'`);
|
|
163
|
+
const { length, char } = transformations.pad_start;
|
|
164
|
+
if (char.length !== 1)
|
|
165
|
+
throw new Error(`Pad character must be exactly one character in field '${fieldName}'`);
|
|
166
|
+
return value.padStart(length, char);
|
|
167
|
+
}
|
|
168
|
+
if ('pad_end' in transformations) {
|
|
169
|
+
if (typeof value !== 'string')
|
|
170
|
+
throw new Error(`Cannot pad non-string value in field '${fieldName}'`);
|
|
171
|
+
const { length, char } = transformations.pad_end;
|
|
172
|
+
if (char.length !== 1)
|
|
173
|
+
throw new Error(`Pad character must be exactly one character in field '${fieldName}'`);
|
|
174
|
+
return value.padEnd(length, char);
|
|
175
|
+
}
|
|
176
|
+
if ('prepend' in transformations)
|
|
177
|
+
return transformations.prepend + TypeCaster_1.default.cast(value, 'string');
|
|
178
|
+
if ('append' in transformations)
|
|
179
|
+
return TypeCaster_1.default.cast(value, 'string') + transformations.append;
|
|
180
|
+
if ('conditional' in transformations) {
|
|
181
|
+
for (const clause of transformations.conditional.clauses) {
|
|
182
|
+
if (this.evaluateCondition(value, clause.if)) {
|
|
183
|
+
return clause.then;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
return transformations.conditional.else !== undefined ? transformations.conditional.else : value;
|
|
187
|
+
}
|
|
188
|
+
return value;
|
|
189
|
+
};
|
|
190
|
+
this.evaluateCondition = (value, condition) => {
|
|
191
|
+
if ('greater_than' in condition) {
|
|
192
|
+
return TypeCaster_1.default.cast(value, 'number') > condition.greater_than;
|
|
193
|
+
}
|
|
194
|
+
if ('greater_than_or_equal' in condition) {
|
|
195
|
+
return TypeCaster_1.default.cast(value, 'number') >= condition.greater_than_or_equal;
|
|
196
|
+
}
|
|
197
|
+
if ('less_than' in condition) {
|
|
198
|
+
return TypeCaster_1.default.cast(value, 'number') < condition.less_than;
|
|
199
|
+
}
|
|
200
|
+
if ('less_than_or_equal' in condition) {
|
|
201
|
+
return TypeCaster_1.default.cast(value, 'number') <= condition.less_than_or_equal;
|
|
202
|
+
}
|
|
203
|
+
if ('equals' in condition) {
|
|
204
|
+
return value === condition.equals;
|
|
205
|
+
}
|
|
206
|
+
if ('not_equals' in condition) {
|
|
207
|
+
return value !== condition.not_equals;
|
|
208
|
+
}
|
|
209
|
+
if ('in' in condition) {
|
|
210
|
+
return condition.in.includes(value);
|
|
211
|
+
}
|
|
212
|
+
if ('not_in' in condition) {
|
|
213
|
+
return !condition.not_in.includes(value);
|
|
214
|
+
}
|
|
215
|
+
return false;
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
const TransformationEngine = new TransformationEngineClass();
|
|
220
|
+
exports.default = TransformationEngine;
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
class TypeCasterClass {
|
|
4
|
+
/**
|
|
5
|
+
* Casts the value to the requested type (only if needed)
|
|
6
|
+
*/
|
|
7
|
+
cast(value, type) {
|
|
8
|
+
switch (type) {
|
|
9
|
+
case 'boolean': {
|
|
10
|
+
if (typeof value === 'boolean')
|
|
11
|
+
return value;
|
|
12
|
+
else
|
|
13
|
+
return Boolean(value);
|
|
14
|
+
}
|
|
15
|
+
case 'date':
|
|
16
|
+
return new Date(value);
|
|
17
|
+
case 'number': {
|
|
18
|
+
if (typeof value === 'number')
|
|
19
|
+
return value;
|
|
20
|
+
else
|
|
21
|
+
return Number(value);
|
|
22
|
+
}
|
|
23
|
+
case 'string': {
|
|
24
|
+
if (typeof value === 'string')
|
|
25
|
+
return value;
|
|
26
|
+
else
|
|
27
|
+
return String(value);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
const TypeCaster = new TypeCasterClass();
|
|
33
|
+
exports.default = TypeCaster;
|