@forzalabs/remora 0.0.20 → 0.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,110 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ var __importDefault = (this && this.__importDefault) || function (mod) {
12
+ return (mod && mod.__esModule) ? mod : { "default": mod };
13
+ };
14
+ Object.defineProperty(exports, "__esModule", { value: true });
15
+ const DSTE_1 = __importDefault(require("../core/dste/DSTE"));
16
+ const DatabaseEngine_1 = __importDefault(require("../database/DatabaseEngine"));
17
+ const DataframeManager_1 = __importDefault(require("./DataframeManager"));
18
+ class UsageDataManager {
19
+ getUsageDetails() {
20
+ return __awaiter(this, void 0, void 0, function* () {
21
+ const now = DSTE_1.default.now();
22
+ const from = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);
23
+ const prevMonthFrom = new Date(now.getTime() - 60 * 24 * 60 * 60 * 1000);
24
+ const yearAgo = new Date(now.getFullYear(), now.getMonth() - 11, 1);
25
+ const collection = 'usage';
26
+ // Aggregate status counts for current and previous month
27
+ const getStatusCounts = (start, end) => __awaiter(this, void 0, void 0, function* () {
28
+ const results = yield DatabaseEngine_1.default.aggregate(collection, [
29
+ { $match: { startedAt: { $gte: start, $lte: end } } },
30
+ { $group: { _id: '$status', count: { $sum: 1 } } }
31
+ ]);
32
+ let success = 0, failed = 0, total = 0;
33
+ results.forEach((r) => {
34
+ total += r.count;
35
+ if (r._id === 'success')
36
+ success = r.count;
37
+ if (r._id === 'failed')
38
+ failed = r.count;
39
+ });
40
+ return { total, success, failed };
41
+ });
42
+ const statusesRequests = yield getStatusCounts(from, now);
43
+ const prevStatusesRequests = yield getStatusCounts(prevMonthFrom, from);
44
+ // Monthly success and fails for last 12 months
45
+ const monthlySuccessPipeline = [
46
+ { $match: { status: 'success', startedAt: { $gte: yearAgo, $lte: now } } },
47
+ { $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
48
+ { $group: { _id: { year: '$year', month: '$month' }, count: { $sum: 1 } } },
49
+ { $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
50
+ { $sort: { x: 1 } }
51
+ ];
52
+ const monthlyFailsPipeline = [
53
+ { $match: { status: 'failed', startedAt: { $gte: yearAgo, $lte: now } } },
54
+ { $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
55
+ { $group: { _id: { year: '$year', month: '$month' }, count: { $sum: 1 } } },
56
+ { $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
57
+ { $sort: { x: 1 } }
58
+ ];
59
+ const rawMonthlySuccess = yield DatabaseEngine_1.default.aggregate(collection, monthlySuccessPipeline);
60
+ const rawMonthlyFails = yield DatabaseEngine_1.default.aggregate(collection, monthlyFailsPipeline);
61
+ // Top lines per month for last 12 months
62
+ const topLinesPipeline = [
63
+ { $match: { startedAt: { $gte: yearAgo, $lte: now } } },
64
+ { $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
65
+ { $group: { _id: { year: '$year', month: '$month' }, itemsCount: { $max: '$itemsCount' } } },
66
+ { $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$itemsCount' } },
67
+ { $sort: { x: 1 } }
68
+ ];
69
+ const topLines = yield DatabaseEngine_1.default.aggregate(collection, topLinesPipeline);
70
+ // Top times per month for last 12 months
71
+ const topTimePipeline = [
72
+ { $match: { startedAt: { $gte: yearAgo, $lte: now } } },
73
+ { $addFields: { durationMs: { $subtract: ['$finishedAt', '$startedAt'] }, year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
74
+ { $group: { _id: { year: '$year', month: '$month' }, maxDuration: { $max: '$durationMs' } } },
75
+ { $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$maxDuration' } },
76
+ { $sort: { x: 1 } }
77
+ ];
78
+ const topTime = yield DatabaseEngine_1.default.aggregate(collection, topTimePipeline);
79
+ // Monthly consumers: for each consumer, per month count
80
+ const consumerPipeline = [
81
+ { $match: { startedAt: { $gte: yearAgo, $lte: now } } },
82
+ { $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
83
+ { $group: { _id: { consumer: '$consumer', year: '$year', month: '$month' }, count: { $sum: 1 } } },
84
+ { $project: { _id: 0, consumer: '$_id.consumer', x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
85
+ { $sort: { consumer: 1, x: 1 } }
86
+ ];
87
+ const consumersData = yield DatabaseEngine_1.default.aggregate(collection, consumerPipeline);
88
+ // transform to consumer array
89
+ const consumerMap = {};
90
+ consumersData.forEach((r) => {
91
+ consumerMap[r.consumer] = consumerMap[r.consumer] || [];
92
+ consumerMap[r.consumer].push({ x: r.x, y: r.y });
93
+ });
94
+ const consumers = Object.entries(consumerMap).map(([name, data]) => ({ name, data: DataframeManager_1.default.fill(data !== null && data !== void 0 ? data : [], yearAgo, now) }));
95
+ // Recent executions
96
+ const recentExecution = yield DatabaseEngine_1.default.query(collection, { startedAt: { $gte: from, $lte: now } }, { sort: { startedAt: -1 }, limit: 10 });
97
+ return {
98
+ statusesRequests,
99
+ prevStatusesRequests,
100
+ monthlySuccess: DataframeManager_1.default.fill(rawMonthlySuccess !== null && rawMonthlySuccess !== void 0 ? rawMonthlySuccess : [], yearAgo, now),
101
+ monthlyFails: DataframeManager_1.default.fill(rawMonthlyFails !== null && rawMonthlyFails !== void 0 ? rawMonthlyFails : [], yearAgo, now),
102
+ consumers: consumers,
103
+ topLine: DataframeManager_1.default.fill(topLines !== null && topLines !== void 0 ? topLines : [], yearAgo, now),
104
+ topTime: DataframeManager_1.default.fill(topTime !== null && topTime !== void 0 ? topTime : [], yearAgo, now),
105
+ recentExecution
106
+ };
107
+ });
108
+ }
109
+ }
110
+ exports.default = new UsageDataManager();
@@ -46,8 +46,8 @@ const DEV_USER = {
46
46
  const MOCK_USER = {
47
47
  _id: '__mock__',
48
48
  auth: { oid: '', provider: 'azure' },
49
- email: '',
50
- name: 'mock',
49
+ email: 'mock.user@email.com',
50
+ name: 'Mock User',
51
51
  roles: ['user'],
52
52
  _signature: '',
53
53
  lastLogin: new Date().toJSON()
@@ -20,11 +20,11 @@ class AutoMapperEngineClass {
20
20
  * input: the first ten lines of the uploaded file
21
21
  * outputs: the selected schemas
22
22
  */
23
- this.map = (input, outputs) => __awaiter(this, void 0, void 0, function* () {
23
+ this.map = (input, outputs, fileName, sources) => __awaiter(this, void 0, void 0, function* () {
24
24
  (0, Affirm_1.default)(input, 'Invalid input');
25
25
  (0, Affirm_1.default)(outputs, 'Invalid outputs');
26
26
  const llm = new LLM_1.default();
27
- const producersRes = yield llm.inferProducers(input, outputs);
27
+ const producersRes = yield llm.inferProducers(input, outputs, fileName, sources);
28
28
  const consumersRes = yield llm.inferConsumers(producersRes.producers, outputs);
29
29
  return {
30
30
  consumers: consumersRes.consumers,
package/engines/ai/LLM.js CHANGED
@@ -25,12 +25,16 @@ You are tasked with creating the PRODUCER(S) that will then be used.
25
25
  A producer maps directly to a dataset and exposes it's dimensions.
26
26
 
27
27
  ## FIELDS
28
- - alias: the reference to the column or property name if different from the desired name property
29
28
  - classification: make your best guess if the field falls under any of these regulations
30
29
 
31
30
  # RULES
32
31
  - Add only the required fields to comply with the OUTPUT DATA SPEC
33
32
  - Add fields that you think are important
33
+ - The name of the producer must be the same as the name of the dataset.
34
+ - Avoid creating multiple providers with similar data.
35
+ - Try to create the least number of providers
36
+ - Awlays include this exact property as the first -> "$schema": "https://raw.githubusercontent.com/ForzaLabs/remora-public/refs/heads/main/json_schemas/producer-schema.json"
37
+ - Based on the producer select the source that makes the most sense to connect otherwise leave the string "<source_name>"
34
38
 
35
39
  # FORMAT
36
40
  The INPUT DATA SPEC will be the first 10 rows from the dataset that the producer needs to map to.
@@ -42,6 +46,12 @@ The result must be returned as a JSON object.
42
46
 
43
47
  # OUTPUT DATA SPEC
44
48
  {{output data spec}}
49
+
50
+ # File name
51
+ {{file name}}
52
+
53
+ # SOURCES
54
+ {{sources}}
45
55
  `;
46
56
  const baseConsumersSystemPrompt = `
47
57
  # TASK
@@ -52,12 +62,14 @@ You are going to receive a list of PRODUCERS that expose some dimensions, and yo
52
62
  A consumer takes the data from one or more producers and changes it's shape to transform it into the required output schema.
53
63
 
54
64
  ## FIELDS
55
- - fields.grouping: used when you need to create a nested data structure with many items under a single key. If no groupingKey is used, then don't add this field.
56
65
  - fields.from: used to distinct between the producers imported by the consumer. The value is the name of the producer.
57
66
 
58
67
  # RULES
59
- - If a field is not needed, do not add it e.g. if there is no need for the "grouping" property, then don't add it.
68
+ - If a field is not needed, do not add it e.g.
60
69
  - Only import a producer once
70
+ - Awlays include this exact property as the first -> $schema": "https://raw.githubusercontent.com/ForzaLabs/remora-public/refs/heads/main/json_schemas/consumer-schema.json",
71
+ - Use "API" as the only valid output format.
72
+ - The "from" must contain only the name of the producer
61
73
 
62
74
  # FORMAT
63
75
  The INPUT DATA SPEC will be the first 10 rows from the dataset that the producer needs to map to.
@@ -141,7 +153,7 @@ resulting consumer: """
141
153
  ],
142
154
  "outputs": [
143
155
  {
144
- "format": "JSON"
156
+ "format": "API"
145
157
  }
146
158
  ],
147
159
  "producers": [
@@ -159,36 +171,45 @@ You are going to receive a list of CONSUMERS and you need to return in the corre
159
171
 
160
172
  # CONSUMER DEFINITION
161
173
  A consumer takes the data from one or more producers and changes it's shape to transform it into the required output schema.
162
-
163
174
  ## FIELDS
164
- - fields.grouping: used when you need to create a nested data structure with many items under a single key. If no groupingKey is used, then don't add this field.
165
175
  - fields.from: used to distinct between the producers imported by the consumer. The value is the name of the producer.
166
176
 
167
177
  # RULES
168
- - If a field is not needed, do not add it e.g. if there is no need for the "grouping" property, then don't add it.
178
+ - If a field is not needed, do not add it e.g.
169
179
  - Only import a producer once
180
+ - Awlays include this exact property as the first -> "https://raw.githubusercontent.com/ForzaLabs/remora-public/refs/heads/main/json_schemas/consumer-schema.json",
181
+ - Use "API" as the only valid output format.
182
+ - The "from" must contain only the name of the producer
170
183
 
171
184
  # CONSUMERS
172
185
  {{consumers}}
173
186
  `;
174
187
  class LLM {
175
188
  constructor() {
176
- this.inferProducers = (input, outputs) => __awaiter(this, void 0, void 0, function* () {
189
+ this.inferProducers = (input, outputs, fileName, sources) => __awaiter(this, void 0, void 0, function* () {
177
190
  let systemPrompt = baseProducersSystemPrompt;
178
191
  systemPrompt = systemPrompt.replace('{{input data spec}}', input.map(x => `- ${JSON.stringify(x)}`).join('\n'));
179
192
  systemPrompt = systemPrompt.replace('{{output data spec}}', outputs.map(x => `- ${JSON.stringify(x)}`).join('\n'));
193
+ systemPrompt = systemPrompt.replace('{{file name}}', fileName);
194
+ systemPrompt = systemPrompt.replace('{{sources}}', sources.map(x => `- ${JSON.stringify(x)}`).join('\n'));
180
195
  const res = yield this._client.beta.chat.completions.parse({
181
- model: 'o3-mini',
196
+ model: 'gpt-4o',
182
197
  messages: [
183
198
  { role: 'system', content: systemPrompt }
184
199
  ],
185
200
  response_format: (0, zod_1.zodResponseFormat)(zod_2.z.object({
186
201
  producers: zod_2.z.array(zod_2.z.object({
202
+ $schema: zod_2.z.string().describe('The schema of the producer. This should always be the same.'),
187
203
  name: zod_2.z.string(),
188
204
  description: zod_2.z.string(),
189
205
  dimensions: zod_2.z.array(zod_2.z.object({
190
206
  name: zod_2.z.string(),
191
- alias: zod_2.z.string().optional(),
207
+ // alias: z.string().optional(),
208
+ source: zod_2.z.string().describe('The name of the source linked to this producer.'),
209
+ settings: zod_2.z.object({
210
+ fileKey: zod_2.z.string().describe('The name of the file'),
211
+ fileType: zod_2.z.string().describe('The file extension (CSV | JSONL | JSON)')
212
+ }),
192
213
  description: zod_2.z.string().optional(),
193
214
  type: zod_2.z.enum(['string', 'number', 'datetime']),
194
215
  pk: zod_2.z.boolean().optional(),
@@ -205,12 +226,13 @@ class LLM {
205
226
  systemPrompt = systemPrompt.replace('{{producers}}', producers.map(x => `- ${JSON.stringify(x)}`).join('\n'));
206
227
  systemPrompt = systemPrompt.replace('{{output data spec}}', outputs.map(x => `- ${JSON.stringify(x)}`).join('\n'));
207
228
  const item = {
208
- model: 'o3-mini',
229
+ model: 'gpt-4o',
209
230
  messages: [
210
231
  { role: 'system', content: systemPrompt }
211
232
  ],
212
233
  response_format: (0, zod_1.zodResponseFormat)(zod_2.z.object({
213
234
  consumers: zod_2.z.array(zod_2.z.object({
235
+ $schema: zod_2.z.string().describe('The schema of the consumer. This should always be the same.'),
214
236
  name: zod_2.z.string(),
215
237
  description: zod_2.z.string(),
216
238
  producers: zod_2.z.array(zod_2.z.object({
@@ -224,13 +246,14 @@ class LLM {
224
246
  fields: zod_2.z.array(zod_2.z.object({
225
247
  key: zod_2.z.string(),
226
248
  from: zod_2.z.string().optional(),
227
- grouping: zod_2.z.object({
228
- groupingKey: zod_2.z.string(),
229
- subFields: zod_2.z.array(zod_2.z.lazy(() => zod_2.z.object({
230
- key: zod_2.z.string(),
231
- from: zod_2.z.string().optional()
232
- })))
233
- }).optional()
249
+ alias: zod_2.z.string().optional()
250
+ // grouping: z.object({
251
+ // groupingKey: z.string(),
252
+ // subFields: z.array(z.lazy(() => z.object({
253
+ // key: z.string(),
254
+ // from: z.string().optional()
255
+ // })))
256
+ // }).optional()
234
257
  })),
235
258
  outputs: zod_2.z.array(zod_2.z.object({
236
259
  format: zod_2.z.enum(['SQL', 'API', 'CSV', 'PARQUET', 'JSON'])
@@ -243,12 +266,13 @@ class LLM {
243
266
  const firstDraft = msg.parsed;
244
267
  const qaSystemPrompt = baseQASystemPrompt.replace('{{consumers}}', firstDraft.consumers.map(x => `- ${JSON.stringify(x)}`).join('\n'));
245
268
  const res2 = yield this._client.beta.chat.completions.parse({
246
- model: 'o3-mini',
269
+ model: 'gpt-4o',
247
270
  messages: [
248
271
  { role: 'system', content: qaSystemPrompt }
249
272
  ],
250
273
  response_format: (0, zod_1.zodResponseFormat)(zod_2.z.object({
251
274
  consumers: zod_2.z.array(zod_2.z.object({
275
+ $schema: zod_2.z.string().describe('The schema of the consumer. This should always be the same.'),
252
276
  name: zod_2.z.string(),
253
277
  description: zod_2.z.string(),
254
278
  producers: zod_2.z.array(zod_2.z.object({
@@ -262,13 +286,14 @@ class LLM {
262
286
  fields: zod_2.z.array(zod_2.z.object({
263
287
  key: zod_2.z.string(),
264
288
  from: zod_2.z.string().optional(),
265
- grouping: zod_2.z.object({
266
- groupingKey: zod_2.z.string().optional(),
267
- subFields: zod_2.z.array(zod_2.z.lazy(() => zod_2.z.object({
268
- key: zod_2.z.string(),
269
- from: zod_2.z.string().optional()
270
- }))).optional()
271
- }).optional()
289
+ alias: zod_2.z.string().optional()
290
+ // grouping: z.object({
291
+ // groupingKey: z.string().optional(),
292
+ // subFields: z.array(z.lazy(() => z.object({
293
+ // key: z.string(),
294
+ // from: z.string().optional()
295
+ // }))).optional()
296
+ // }).optional()
272
297
  })),
273
298
  outputs: zod_2.z.array(zod_2.z.object({
274
299
  format: zod_2.z.enum(['SQL', 'API', 'CSV', 'PARQUET', 'JSON'])
@@ -113,7 +113,8 @@ class ConsumerManagerClass {
113
113
  else {
114
114
  const col = ConsumerManager.searchFieldInColumns(field.cField, availableColumns, consumer);
115
115
  (0, Affirm_1.default)(col, `Consumer "${consumer.name}" misconfiguration: the requested field "${field.cField.key}" is not found in any of the specified producers ("${consumer.producers.map(x => x.name).join(', ')}")`);
116
- expandedFields.push(field);
116
+ // TODO: CHECK THIS FIX IS GOOD
117
+ expandedFields.push(Object.assign(Object.assign({}, field), { dimension: col.dimension, measure: col.measure }));
117
118
  }
118
119
  return expandedFields;
119
120
  };
@@ -23,6 +23,7 @@ const SQLBuilder_1 = __importDefault(require("../sql/SQLBuilder"));
23
23
  const SQLCompiler_1 = __importDefault(require("../sql/SQLCompiler"));
24
24
  const ExecutionPlanner_1 = __importDefault(require("./ExecutionPlanner"));
25
25
  const RequestExecutor_1 = __importDefault(require("./RequestExecutor"));
26
+ const TransformationEngine_1 = __importDefault(require("../transform/TransformationEngine"));
26
27
  class ExecutionEnvironment {
27
28
  constructor(consumer) {
28
29
  this.run = (options) => __awaiter(this, void 0, void 0, function* () {
@@ -30,7 +31,8 @@ class ExecutionEnvironment {
30
31
  const plan = ExecutionPlanner_1.default.plan(this._consumer, options);
31
32
  (0, Affirm_1.default)(plan, `Invalid execution plan`);
32
33
  (0, Affirm_1.default)(plan.length > 0, `Empty execution plan`);
33
- const result = { shape: ConsumerEngine_1.default.getOutputShape(this._consumer) };
34
+ const start = performance.now();
35
+ const result = { shape: ConsumerEngine_1.default.getOutputShape(this._consumer), _elapsedMS: -1 };
34
36
  for (const planStep of plan) {
35
37
  switch (planStep.type) {
36
38
  case 'compile-consumer-to-SQL': {
@@ -99,10 +101,15 @@ class ExecutionEnvironment {
99
101
  this._fetchedData = RequestExecutor_1.default._applyFilters(this._fetchedData, this._consumer.filters.map(x => x.rule));
100
102
  break;
101
103
  }
104
+ case 'apply-transformations': {
105
+ this._fetchedData = TransformationEngine_1.default.apply(this._consumer, this._fetchedData);
106
+ break;
107
+ }
102
108
  default: throw new Error(`Invalid execution plan step type "${planStep.type}"`);
103
109
  }
104
110
  }
105
111
  result.data = this._fetchedData;
112
+ result._elapsedMS = performance.now() - start;
106
113
  return result;
107
114
  });
108
115
  this._consumer = consumer;
@@ -51,16 +51,20 @@ class ExecutionPlannerClas {
51
51
  }
52
52
  default: throw new Error(`Engine "${producerEngine}" not supported`);
53
53
  }
54
- // at this point I have the data loaded in memory
54
+ // At this point I have the data loaded in memory
55
55
  // TODO: can I handle streaming data? (e.g. a file that is too big to fit in memory)
56
56
  // TODO: how to handle pagination of SQL results?
57
+ // Apply the transormations to the fields of the consumer
58
+ // TODO: transformations can also be applied directly to the producer... how???
59
+ if (consumer.fields.some(x => Algo_1.default.hasVal(x.transform)))
60
+ plan.push({ type: 'apply-transformations' });
57
61
  const engineClass = this.getEngineClass(producerEngine);
58
62
  for (const output of consumer.outputs) {
59
63
  switch (output.format.toUpperCase()) {
60
64
  case 'JSON': {
61
65
  if (engineClass === 'file' && Algo_1.default.hasVal(options))
62
66
  plan.push({ type: 'apply-execution-request-to-result' });
63
- // TODO: test if it is need ed and if it doesn't break soething else
67
+ // TODO: test if it is needed and if it doesn't break soething else
64
68
  if (engineClass === 'sql')
65
69
  plan.push({ type: 'post-process-json' });
66
70
  plan.push({ type: 'export-file', output });
@@ -0,0 +1,220 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ const Affirm_1 = __importDefault(require("../../core/Affirm"));
7
+ const Algo_1 = __importDefault(require("../../core/Algo"));
8
+ const TypeCaster_1 = __importDefault(require("./TypeCaster"));
9
+ class TransformationEngineClass {
10
+ constructor() {
11
+ this.apply = (consumer, data) => {
12
+ (0, Affirm_1.default)(consumer, 'Invalid consumer');
13
+ Affirm_1.default.hasValue(data, 'Invalid data');
14
+ const fieldsToTransform = consumer.fields.filter(field => Algo_1.default.hasVal(field.transform));
15
+ Affirm_1.default.hasItems(fieldsToTransform, 'No fields with transformations');
16
+ // Process the data records in place to improve performance instead of copying to a new array
17
+ for (const record of data) {
18
+ for (const field of fieldsToTransform) {
19
+ if (!field.transform)
20
+ continue;
21
+ const value = record[field.key];
22
+ if (!Algo_1.default.hasVal(value) && Algo_1.default.hasVal(field.default))
23
+ record[field.key] = field.default;
24
+ else if (!Algo_1.default.hasVal(value))
25
+ continue;
26
+ try {
27
+ record[field.key] = this.applyTransformations(value, field.transform, field.key);
28
+ }
29
+ catch (error) {
30
+ switch (field.onError) {
31
+ case 'set_default':
32
+ record[field.key] = field.default;
33
+ break;
34
+ case 'skip':
35
+ break;
36
+ case 'fail':
37
+ default:
38
+ throw error;
39
+ }
40
+ }
41
+ }
42
+ }
43
+ return data;
44
+ };
45
+ this.applyTransformations = (value, transformations, fieldName) => {
46
+ var _a;
47
+ if (Array.isArray(transformations)) {
48
+ // Process array transformations without creating intermediate arrays
49
+ let result = value;
50
+ for (const transform of transformations) {
51
+ result = this.applyTransformations(result, transform, fieldName);
52
+ }
53
+ return result;
54
+ }
55
+ // Single transformation
56
+ if ('cast' in transformations) {
57
+ return TypeCaster_1.default.cast(value, transformations.cast);
58
+ }
59
+ if ('multiply' in transformations) {
60
+ const num = TypeCaster_1.default.cast(value, 'number');
61
+ if (isNaN(num))
62
+ throw new Error(`Cannot multiply non-numeric value in field '${fieldName}'`);
63
+ return num * transformations.multiply;
64
+ }
65
+ if ('add' in transformations) {
66
+ const num = TypeCaster_1.default.cast(value, 'number');
67
+ if (isNaN(num))
68
+ throw new Error(`Cannot add to non-numeric value in field '${fieldName}'`);
69
+ return num + transformations.add;
70
+ }
71
+ if ('extract' in transformations) {
72
+ const date = TypeCaster_1.default.cast(value, 'date');
73
+ if (isNaN(date.getTime()))
74
+ throw new Error(`Invalid date for extraction in field '${fieldName}'`);
75
+ switch (transformations.extract) {
76
+ case 'year': return date.getFullYear();
77
+ case 'month': return date.getMonth() + 1; // 1-based month
78
+ case 'day': return date.getDate();
79
+ case 'hour': return date.getHours();
80
+ case 'minute': return date.getMinutes();
81
+ }
82
+ }
83
+ if ('concat' in transformations) {
84
+ if (!Array.isArray(value))
85
+ throw new Error(`Cannot concat non-array value in field '${fieldName}'`);
86
+ return value.join(transformations.concat.separator);
87
+ }
88
+ if ('split' in transformations) {
89
+ if (typeof value !== 'string')
90
+ throw new Error(`Cannot split non-string value in field '${fieldName}'`);
91
+ const parts = value.split(transformations.split.separator);
92
+ if (transformations.split.index >= parts.length) {
93
+ throw new Error(`Split index ${transformations.split.index} out of bounds in field '${fieldName}'`);
94
+ }
95
+ return parts[transformations.split.index];
96
+ }
97
+ if ('regex_match' in transformations) {
98
+ if (typeof value !== 'string')
99
+ throw new Error(`Cannot apply regex_match to non-string value in field '${fieldName}'`);
100
+ try {
101
+ const regex = new RegExp(transformations.regex_match.pattern, transformations.regex_match.flags);
102
+ return regex.test(value);
103
+ }
104
+ catch (error) {
105
+ throw new Error(`Invalid regex pattern in field '${fieldName}': ${error.message}`);
106
+ }
107
+ }
108
+ if ('regex_replace' in transformations) {
109
+ if (typeof value !== 'string')
110
+ throw new Error(`Cannot apply regex_replace to non-string value in field '${fieldName}'`);
111
+ try {
112
+ const regex = new RegExp(transformations.regex_replace.pattern, transformations.regex_replace.flags);
113
+ return value.replace(regex, transformations.regex_replace.replacement);
114
+ }
115
+ catch (error) {
116
+ throw new Error(`Invalid regex pattern in field '${fieldName}': ${error.message}`);
117
+ }
118
+ }
119
+ if ('regex_extract' in transformations) {
120
+ if (typeof value !== 'string')
121
+ throw new Error(`Cannot apply regex_extract to non-string value in field '${fieldName}'`);
122
+ try {
123
+ const regex = new RegExp(transformations.regex_extract.pattern, transformations.regex_extract.flags);
124
+ const matches = value.match(regex);
125
+ if (!matches)
126
+ return null;
127
+ const groupIndex = transformations.regex_extract.group;
128
+ return (_a = matches[groupIndex]) !== null && _a !== void 0 ? _a : null;
129
+ }
130
+ catch (error) {
131
+ throw new Error(`Invalid regex pattern in field '${fieldName}': ${error.message}`);
132
+ }
133
+ }
134
+ if ('trim' in transformations) {
135
+ if (typeof value !== 'string')
136
+ throw new Error(`Cannot trim non-string value in field '${fieldName}'`);
137
+ return value.trim();
138
+ }
139
+ if ('to_lowercase' in transformations) {
140
+ if (typeof value !== 'string')
141
+ throw new Error(`Cannot convert non-string value to lowercase in field '${fieldName}'`);
142
+ return value.toLowerCase();
143
+ }
144
+ if ('to_uppercase' in transformations) {
145
+ if (typeof value !== 'string')
146
+ throw new Error(`Cannot convert non-string value to uppercase in field '${fieldName}'`);
147
+ return value.toUpperCase();
148
+ }
149
+ if ('capitalize' in transformations) {
150
+ if (typeof value !== 'string')
151
+ throw new Error(`Cannot capitalize non-string value in field '${fieldName}'`);
152
+ return value.charAt(0).toUpperCase() + value.slice(1);
153
+ }
154
+ if ('substring' in transformations) {
155
+ if (typeof value !== 'string')
156
+ throw new Error(`Cannot take substring of non-string value in field '${fieldName}'`);
157
+ const { start, end } = transformations.substring;
158
+ return end !== undefined ? value.substring(start, end) : value.substring(start);
159
+ }
160
+ if ('pad_start' in transformations) {
161
+ if (typeof value !== 'string')
162
+ throw new Error(`Cannot pad non-string value in field '${fieldName}'`);
163
+ const { length, char } = transformations.pad_start;
164
+ if (char.length !== 1)
165
+ throw new Error(`Pad character must be exactly one character in field '${fieldName}'`);
166
+ return value.padStart(length, char);
167
+ }
168
+ if ('pad_end' in transformations) {
169
+ if (typeof value !== 'string')
170
+ throw new Error(`Cannot pad non-string value in field '${fieldName}'`);
171
+ const { length, char } = transformations.pad_end;
172
+ if (char.length !== 1)
173
+ throw new Error(`Pad character must be exactly one character in field '${fieldName}'`);
174
+ return value.padEnd(length, char);
175
+ }
176
+ if ('prepend' in transformations)
177
+ return transformations.prepend + TypeCaster_1.default.cast(value, 'string');
178
+ if ('append' in transformations)
179
+ return TypeCaster_1.default.cast(value, 'string') + transformations.append;
180
+ if ('conditional' in transformations) {
181
+ for (const clause of transformations.conditional.clauses) {
182
+ if (this.evaluateCondition(value, clause.if)) {
183
+ return clause.then;
184
+ }
185
+ }
186
+ return transformations.conditional.else !== undefined ? transformations.conditional.else : value;
187
+ }
188
+ return value;
189
+ };
190
+ this.evaluateCondition = (value, condition) => {
191
+ if ('greater_than' in condition) {
192
+ return TypeCaster_1.default.cast(value, 'number') > condition.greater_than;
193
+ }
194
+ if ('greater_than_or_equal' in condition) {
195
+ return TypeCaster_1.default.cast(value, 'number') >= condition.greater_than_or_equal;
196
+ }
197
+ if ('less_than' in condition) {
198
+ return TypeCaster_1.default.cast(value, 'number') < condition.less_than;
199
+ }
200
+ if ('less_than_or_equal' in condition) {
201
+ return TypeCaster_1.default.cast(value, 'number') <= condition.less_than_or_equal;
202
+ }
203
+ if ('equals' in condition) {
204
+ return value === condition.equals;
205
+ }
206
+ if ('not_equals' in condition) {
207
+ return value !== condition.not_equals;
208
+ }
209
+ if ('in' in condition) {
210
+ return condition.in.includes(value);
211
+ }
212
+ if ('not_in' in condition) {
213
+ return !condition.not_in.includes(value);
214
+ }
215
+ return false;
216
+ };
217
+ }
218
+ }
219
+ const TransformationEngine = new TransformationEngineClass();
220
+ exports.default = TransformationEngine;
@@ -0,0 +1,33 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ class TypeCasterClass {
4
+ /**
5
+ * Casts the value to the requested type (only if needed)
6
+ */
7
+ cast(value, type) {
8
+ switch (type) {
9
+ case 'boolean': {
10
+ if (typeof value === 'boolean')
11
+ return value;
12
+ else
13
+ return Boolean(value);
14
+ }
15
+ case 'date':
16
+ return new Date(value);
17
+ case 'number': {
18
+ if (typeof value === 'number')
19
+ return value;
20
+ else
21
+ return Number(value);
22
+ }
23
+ case 'string': {
24
+ if (typeof value === 'string')
25
+ return value;
26
+ else
27
+ return String(value);
28
+ }
29
+ }
30
+ }
31
+ }
32
+ const TypeCaster = new TypeCasterClass();
33
+ exports.default = TypeCaster;