npm - @forzalabs/remora - Versions diffs - 0.0.20 → 0.0.21 - Mend

@forzalabs/remora 0.0.20 → 0.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/Constants.js +1 -1
package/actions/automap.js +5 -1
package/actions/deploy.js +0 -1
package/actions/run.js +1 -1
package/auth/JWTManager.js +1 -1
package/database/DatabaseEngine.js +13 -3
package/definitions/json_schemas/consumer-schema.json +392 -0
package/definitions/json_schemas/producer-schema.json +4 -0
package/definitions/transform/Transformations.js +2 -0
package/engines/DataframeManager.js +55 -0
package/engines/Environment.js +1 -1
package/engines/UsageDataManager.js +110 -0
package/engines/UserManager.js +2 -2
package/engines/ai/AutoMapperEngine.js +2 -2
package/engines/ai/LLM.js +51 -26
package/engines/consumer/ConsumerManager.js +2 -1
package/engines/execution/ExecutionEnvironment.js +8 -1
package/engines/execution/ExecutionPlanner.js +6 -2
package/engines/transform/TransformationEngine.js +220 -0
package/engines/transform/TypeCaster.js +33 -0
package/engines/validation/Validator.js +22 -5
package/helper/Helper.js +7 -0
package/index.js +7 -0
package/licencing/LicenceManager.js +64 -0
package/package.json +1 -1

package/engines/UsageDataManager.js ADDED Viewed

@@ -0,0 +1,110 @@
+"use strict";
+var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
+    function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
+    return new (P || (P = Promise))(function (resolve, reject) {
+        function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
+        function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
+        function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
+        step((generator = generator.apply(thisArg, _arguments || [])).next());
+    });
+};
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+const DSTE_1 = __importDefault(require("../core/dste/DSTE"));
+const DatabaseEngine_1 = __importDefault(require("../database/DatabaseEngine"));
+const DataframeManager_1 = __importDefault(require("./DataframeManager"));
+class UsageDataManager {
+    getUsageDetails() {
+        return __awaiter(this, void 0, void 0, function* () {
+            const now = DSTE_1.default.now();
+            const from = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);
+            const prevMonthFrom = new Date(now.getTime() - 60 * 24 * 60 * 60 * 1000);
+            const yearAgo = new Date(now.getFullYear(), now.getMonth() - 11, 1);
+            const collection = 'usage';
+            // Aggregate status counts for current and previous month
+            const getStatusCounts = (start, end) => __awaiter(this, void 0, void 0, function* () {
+                const results = yield DatabaseEngine_1.default.aggregate(collection, [
+                    { $match: { startedAt: { $gte: start, $lte: end } } },
+                    { $group: { _id: '$status', count: { $sum: 1 } } }
+                ]);
+                let success = 0, failed = 0, total = 0;
+                results.forEach((r) => {
+                    total += r.count;
+                    if (r._id === 'success')
+                        success = r.count;
+                    if (r._id === 'failed')
+                        failed = r.count;
+                });
+                return { total, success, failed };
+            });
+            const statusesRequests = yield getStatusCounts(from, now);
+            const prevStatusesRequests = yield getStatusCounts(prevMonthFrom, from);
+            // Monthly success and fails for last 12 months
+            const monthlySuccessPipeline = [
+                { $match: { status: 'success', startedAt: { $gte: yearAgo, $lte: now } } },
+                { $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
+                { $group: { _id: { year: '$year', month: '$month' }, count: { $sum: 1 } } },
+                { $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
+                { $sort: { x: 1 } }
+            ];
+            const monthlyFailsPipeline = [
+                { $match: { status: 'failed', startedAt: { $gte: yearAgo, $lte: now } } },
+                { $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
+                { $group: { _id: { year: '$year', month: '$month' }, count: { $sum: 1 } } },
+                { $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
+                { $sort: { x: 1 } }
+            ];
+            const rawMonthlySuccess = yield DatabaseEngine_1.default.aggregate(collection, monthlySuccessPipeline);
+            const rawMonthlyFails = yield DatabaseEngine_1.default.aggregate(collection, monthlyFailsPipeline);
+            // Top lines per month for last 12 months
+            const topLinesPipeline = [
+                { $match: { startedAt: { $gte: yearAgo, $lte: now } } },
+                { $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
+                { $group: { _id: { year: '$year', month: '$month' }, itemsCount: { $max: '$itemsCount' } } },
+                { $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$itemsCount' } },
+                { $sort: { x: 1 } }
+            ];
+            const topLines = yield DatabaseEngine_1.default.aggregate(collection, topLinesPipeline);
+            // Top times per month for last 12 months
+            const topTimePipeline = [
+                { $match: { startedAt: { $gte: yearAgo, $lte: now } } },
+                { $addFields: { durationMs: { $subtract: ['$finishedAt', '$startedAt'] }, year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
+                { $group: { _id: { year: '$year', month: '$month' }, maxDuration: { $max: '$durationMs' } } },
+                { $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$maxDuration' } },
+                { $sort: { x: 1 } }
+            ];
+            const topTime = yield DatabaseEngine_1.default.aggregate(collection, topTimePipeline);
+            // Monthly consumers: for each consumer, per month count
+            const consumerPipeline = [
+                { $match: { startedAt: { $gte: yearAgo, $lte: now } } },
+                { $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
+                { $group: { _id: { consumer: '$consumer', year: '$year', month: '$month' }, count: { $sum: 1 } } },
+                { $project: { _id: 0, consumer: '$_id.consumer', x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
+                { $sort: { consumer: 1, x: 1 } }
+            ];
+            const consumersData = yield DatabaseEngine_1.default.aggregate(collection, consumerPipeline);
+            // transform to consumer array
+            const consumerMap = {};
+            consumersData.forEach((r) => {
+                consumerMap[r.consumer] = consumerMap[r.consumer] || [];
+                consumerMap[r.consumer].push({ x: r.x, y: r.y });
+            });
+            const consumers = Object.entries(consumerMap).map(([name, data]) => ({ name, data: DataframeManager_1.default.fill(data !== null && data !== void 0 ? data : [], yearAgo, now) }));
+            // Recent executions
+            const recentExecution = yield DatabaseEngine_1.default.query(collection, { startedAt: { $gte: from, $lte: now } }, { sort: { startedAt: -1 }, limit: 10 });
+            return {
+                statusesRequests,
+                prevStatusesRequests,
+                monthlySuccess: DataframeManager_1.default.fill(rawMonthlySuccess !== null && rawMonthlySuccess !== void 0 ? rawMonthlySuccess : [], yearAgo, now),
+                monthlyFails: DataframeManager_1.default.fill(rawMonthlyFails !== null && rawMonthlyFails !== void 0 ? rawMonthlyFails : [], yearAgo, now),
+                consumers: consumers,
+                topLine: DataframeManager_1.default.fill(topLines !== null && topLines !== void 0 ? topLines : [], yearAgo, now),
+                topTime: DataframeManager_1.default.fill(topTime !== null && topTime !== void 0 ? topTime : [], yearAgo, now),
+                recentExecution
+            };
+        });
+    }
+}
+exports.default = new UsageDataManager();

package/engines/UserManager.js CHANGED Viewed

@@ -46,8 +46,8 @@ const DEV_USER = {
 const MOCK_USER = {
     _id: '__mock__',
     auth: { oid: '', provider: 'azure' },
-    email: '',
-    name: 'mock',
+    email: 'mock.user@email.com',
+    name: 'Mock User',
     roles: ['user'],
     _signature: '',
     lastLogin: new Date().toJSON()

package/engines/ai/AutoMapperEngine.js CHANGED Viewed

@@ -20,11 +20,11 @@ class AutoMapperEngineClass {
          * input: the first ten lines of the uploaded file
          * outputs: the selected schemas
          */
-        this.map = (input, outputs) => __awaiter(this, void 0, void 0, function* () {
+        this.map = (input, outputs, fileName, sources) => __awaiter(this, void 0, void 0, function* () {
             (0, Affirm_1.default)(input, 'Invalid input');
             (0, Affirm_1.default)(outputs, 'Invalid outputs');
             const llm = new LLM_1.default();
-            const producersRes = yield llm.inferProducers(input, outputs);
+            const producersRes = yield llm.inferProducers(input, outputs, fileName, sources);
             const consumersRes = yield llm.inferConsumers(producersRes.producers, outputs);
             return {
                 consumers: consumersRes.consumers,

package/engines/ai/LLM.js CHANGED Viewed

@@ -25,12 +25,16 @@ You are tasked with creating the PRODUCER(S) that will then be used.
 A producer maps directly to a dataset and exposes it's dimensions.
 ## FIELDS
-- alias: the reference to the column or property name if different from the desired name property
 - classification: make your best guess if the field falls under any of these regulations
 # RULES
 - Add only the required fields to comply with the OUTPUT DATA SPEC
 - Add fields that you think are important
+- The name of the producer must be the same as the name of the dataset.
+- Avoid creating multiple providers with similar data.
+- Try to create the least number of providers
+- Awlays include this exact property as the first -> "$schema": "https://raw.githubusercontent.com/ForzaLabs/remora-public/refs/heads/main/json_schemas/producer-schema.json"
+- Based on the producer select the source that makes the most sense to connect otherwise leave the string "<source_name>"
 # FORMAT
 The INPUT DATA SPEC will be the first 10 rows from the dataset that the producer needs to map to.
@@ -42,6 +46,12 @@ The result must be returned as a JSON object.
 # OUTPUT DATA SPEC
 {{output data spec}}
+# File name
+{{file name}}
+# SOURCES
+{{sources}}
 `;
 const baseConsumersSystemPrompt = `
 # TASK
@@ -52,12 +62,14 @@ You are going to receive a list of PRODUCERS that expose some dimensions, and yo
 A consumer takes the data from one or more producers and changes it's shape to transform it into the required output schema.
 ## FIELDS
-- fields.grouping: used when you need to create a nested data structure with many items under a single key. If no groupingKey is used, then don't add this field.
 - fields.from: used to distinct between the producers imported by the consumer. The value is the name of the producer.
 # RULES
-- If a field is not needed, do not add it e.g. if there is no need for the "grouping" property, then don't add it.
+- If a field is not needed, do not add it e.g.
 - Only import a producer once
+- Awlays include this exact property as the first -> $schema": "https://raw.githubusercontent.com/ForzaLabs/remora-public/refs/heads/main/json_schemas/consumer-schema.json",
+- Use "API" as the only valid output format.
+- The "from" must contain only the name of the producer
 # FORMAT
 The INPUT DATA SPEC will be the first 10 rows from the dataset that the producer needs to map to.
@@ -141,7 +153,7 @@ resulting consumer: """
     ],
     "outputs": [
         {
-            "format": "JSON"
+            "format": "API"
         }
     ],
     "producers": [
@@ -159,36 +171,45 @@ You are going to receive a list of CONSUMERS and you need to return in the corre
 # CONSUMER DEFINITION
 A consumer takes the data from one or more producers and changes it's shape to transform it into the required output schema.
 ## FIELDS
-- fields.grouping: used when you need to create a nested data structure with many items under a single key. If no groupingKey is used, then don't add this field.
 - fields.from: used to distinct between the producers imported by the consumer. The value is the name of the producer.
 # RULES
-- If a field is not needed, do not add it e.g. if there is no need for the "grouping" property, then don't add it.
+- If a field is not needed, do not add it e.g.
 - Only import a producer once
+- Awlays include this exact property as the first -> "https://raw.githubusercontent.com/ForzaLabs/remora-public/refs/heads/main/json_schemas/consumer-schema.json",
+- Use "API" as the only valid output format.
+- The "from" must contain only the name of the producer
 # CONSUMERS
 {{consumers}}
 `;
 class LLM {
     constructor() {
-        this.inferProducers = (input, outputs) => __awaiter(this, void 0, void 0, function* () {
+        this.inferProducers = (input, outputs, fileName, sources) => __awaiter(this, void 0, void 0, function* () {
             let systemPrompt = baseProducersSystemPrompt;
             systemPrompt = systemPrompt.replace('{{input data spec}}', input.map(x => `- ${JSON.stringify(x)}`).join('\n'));
             systemPrompt = systemPrompt.replace('{{output data spec}}', outputs.map(x => `- ${JSON.stringify(x)}`).join('\n'));
+            systemPrompt = systemPrompt.replace('{{file name}}', fileName);
+            systemPrompt = systemPrompt.replace('{{sources}}', sources.map(x => `- ${JSON.stringify(x)}`).join('\n'));
             const res = yield this._client.beta.chat.completions.parse({
-                model: 'o3-mini',
+                model: 'gpt-4o',
                 messages: [
                     { role: 'system', content: systemPrompt }
                 ],
                 response_format: (0, zod_1.zodResponseFormat)(zod_2.z.object({
                     producers: zod_2.z.array(zod_2.z.object({
+                        $schema: zod_2.z.string().describe('The schema of the producer. This should always be the same.'),
                         name: zod_2.z.string(),
                         description: zod_2.z.string(),
                         dimensions: zod_2.z.array(zod_2.z.object({
                             name: zod_2.z.string(),
-                            alias: zod_2.z.string().optional(),
+                            // alias: z.string().optional(),
+                            source: zod_2.z.string().describe('The name of the source linked to this producer.'),
+                            settings: zod_2.z.object({
+                                fileKey: zod_2.z.string().describe('The name of the file'),
+                                fileType: zod_2.z.string().describe('The file extension (CSV | JSONL | JSON)')
+                            }),
                             description: zod_2.z.string().optional(),
                             type: zod_2.z.enum(['string', 'number', 'datetime']),
                             pk: zod_2.z.boolean().optional(),
@@ -205,12 +226,13 @@ class LLM {
             systemPrompt = systemPrompt.replace('{{producers}}', producers.map(x => `- ${JSON.stringify(x)}`).join('\n'));
             systemPrompt = systemPrompt.replace('{{output data spec}}', outputs.map(x => `- ${JSON.stringify(x)}`).join('\n'));
             const item = {
-                model: 'o3-mini',
+                model: 'gpt-4o',
                 messages: [
                     { role: 'system', content: systemPrompt }
                 ],
                 response_format: (0, zod_1.zodResponseFormat)(zod_2.z.object({
                     consumers: zod_2.z.array(zod_2.z.object({
+                        $schema: zod_2.z.string().describe('The schema of the consumer. This should always be the same.'),
                         name: zod_2.z.string(),
                         description: zod_2.z.string(),
                         producers: zod_2.z.array(zod_2.z.object({
@@ -224,13 +246,14 @@ class LLM {
                         fields: zod_2.z.array(zod_2.z.object({
                             key: zod_2.z.string(),
                             from: zod_2.z.string().optional(),
-                            grouping: zod_2.z.object({
-                                groupingKey: zod_2.z.string(),
-                                subFields: zod_2.z.array(zod_2.z.lazy(() => zod_2.z.object({
-                                    key: zod_2.z.string(),
-                                    from: zod_2.z.string().optional()
-                                })))
-                            }).optional()
+                            alias: zod_2.z.string().optional()
+                            // grouping: z.object({
+                            //     groupingKey: z.string(),
+                            //     subFields: z.array(z.lazy(() => z.object({
+                            //         key: z.string(),
+                            //         from: z.string().optional()
+                            //     })))
+                            // }).optional()
                         })),
                         outputs: zod_2.z.array(zod_2.z.object({
                             format: zod_2.z.enum(['SQL', 'API', 'CSV', 'PARQUET', 'JSON'])
@@ -243,12 +266,13 @@ class LLM {
             const firstDraft = msg.parsed;
             const qaSystemPrompt = baseQASystemPrompt.replace('{{consumers}}', firstDraft.consumers.map(x => `- ${JSON.stringify(x)}`).join('\n'));
             const res2 = yield this._client.beta.chat.completions.parse({
-                model: 'o3-mini',
+                model: 'gpt-4o',
                 messages: [
                     { role: 'system', content: qaSystemPrompt }
                 ],
                 response_format: (0, zod_1.zodResponseFormat)(zod_2.z.object({
                     consumers: zod_2.z.array(zod_2.z.object({
+                        $schema: zod_2.z.string().describe('The schema of the consumer. This should always be the same.'),
                         name: zod_2.z.string(),
                         description: zod_2.z.string(),
                         producers: zod_2.z.array(zod_2.z.object({
@@ -262,13 +286,14 @@ class LLM {
                         fields: zod_2.z.array(zod_2.z.object({
                             key: zod_2.z.string(),
                             from: zod_2.z.string().optional(),
-                            grouping: zod_2.z.object({
-                                groupingKey: zod_2.z.string().optional(),
-                                subFields: zod_2.z.array(zod_2.z.lazy(() => zod_2.z.object({
-                                    key: zod_2.z.string(),
-                                    from: zod_2.z.string().optional()
-                                }))).optional()
-                            }).optional()
+                            alias: zod_2.z.string().optional()
+                            // grouping: z.object({
+                            //     groupingKey: z.string().optional(),
+                            //     subFields: z.array(z.lazy(() => z.object({
+                            //         key: z.string(),
+                            //         from: z.string().optional()
+                            //     }))).optional()
+                            // }).optional()
                         })),
                         outputs: zod_2.z.array(zod_2.z.object({
                             format: zod_2.z.enum(['SQL', 'API', 'CSV', 'PARQUET', 'JSON'])

package/engines/consumer/ConsumerManager.js CHANGED Viewed

@@ -113,7 +113,8 @@ class ConsumerManagerClass {
             else {
                 const col = ConsumerManager.searchFieldInColumns(field.cField, availableColumns, consumer);
                 (0, Affirm_1.default)(col, `Consumer "${consumer.name}" misconfiguration: the requested field "${field.cField.key}" is not found in any of the specified producers ("${consumer.producers.map(x => x.name).join(', ')}")`);
-                expandedFields.push(field);
+                // TODO: CHECK THIS FIX IS GOOD
+                expandedFields.push(Object.assign(Object.assign({}, field), { dimension: col.dimension, measure: col.measure }));
             }
             return expandedFields;
         };

package/engines/execution/ExecutionEnvironment.js CHANGED Viewed

@@ -23,6 +23,7 @@ const SQLBuilder_1 = __importDefault(require("../sql/SQLBuilder"));
 const SQLCompiler_1 = __importDefault(require("../sql/SQLCompiler"));
 const ExecutionPlanner_1 = __importDefault(require("./ExecutionPlanner"));
 const RequestExecutor_1 = __importDefault(require("./RequestExecutor"));
+const TransformationEngine_1 = __importDefault(require("../transform/TransformationEngine"));
 class ExecutionEnvironment {
     constructor(consumer) {
         this.run = (options) => __awaiter(this, void 0, void 0, function* () {
@@ -30,7 +31,8 @@ class ExecutionEnvironment {
             const plan = ExecutionPlanner_1.default.plan(this._consumer, options);
             (0, Affirm_1.default)(plan, `Invalid execution plan`);
             (0, Affirm_1.default)(plan.length > 0, `Empty execution plan`);
-            const result = { shape: ConsumerEngine_1.default.getOutputShape(this._consumer) };
+            const start = performance.now();
+            const result = { shape: ConsumerEngine_1.default.getOutputShape(this._consumer), _elapsedMS: -1 };
             for (const planStep of plan) {
                 switch (planStep.type) {
                     case 'compile-consumer-to-SQL': {
@@ -99,10 +101,15 @@ class ExecutionEnvironment {
                         this._fetchedData = RequestExecutor_1.default._applyFilters(this._fetchedData, this._consumer.filters.map(x => x.rule));
                         break;
                     }
+                    case 'apply-transformations': {
+                        this._fetchedData = TransformationEngine_1.default.apply(this._consumer, this._fetchedData);
+                        break;
+                    }
                     default: throw new Error(`Invalid execution plan step type "${planStep.type}"`);
                 }
             }
             result.data = this._fetchedData;
+            result._elapsedMS = performance.now() - start;
             return result;
         });
         this._consumer = consumer;

package/engines/execution/ExecutionPlanner.js CHANGED Viewed

@@ -51,16 +51,20 @@ class ExecutionPlannerClas {
                 }
                 default: throw new Error(`Engine "${producerEngine}" not supported`);
             }
-            // at this point I have the data loaded in memory
+            // At this point I have the data loaded in memory
             // TODO: can I handle streaming data? (e.g. a file that is too big to fit in memory)
             // TODO: how to handle pagination of SQL results?
+            // Apply the transormations to the fields of the consumer
+            // TODO: transformations can also be applied directly to the producer... how???
+            if (consumer.fields.some(x => Algo_1.default.hasVal(x.transform)))
+                plan.push({ type: 'apply-transformations' });
             const engineClass = this.getEngineClass(producerEngine);
             for (const output of consumer.outputs) {
                 switch (output.format.toUpperCase()) {
                     case 'JSON': {
                         if (engineClass === 'file' && Algo_1.default.hasVal(options))
                             plan.push({ type: 'apply-execution-request-to-result' });
-                        // TODO: test if it is need ed and if it doesn't break soething else
+                        // TODO: test if it is needed and if it doesn't break soething else
                         if (engineClass === 'sql')
                             plan.push({ type: 'post-process-json' });
                         plan.push({ type: 'export-file', output });

package/engines/transform/TransformationEngine.js ADDED Viewed

@@ -0,0 +1,220 @@
+"use strict";
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+const Affirm_1 = __importDefault(require("../../core/Affirm"));
+const Algo_1 = __importDefault(require("../../core/Algo"));
+const TypeCaster_1 = __importDefault(require("./TypeCaster"));
+class TransformationEngineClass {
+    constructor() {
+        this.apply = (consumer, data) => {
+            (0, Affirm_1.default)(consumer, 'Invalid consumer');
+            Affirm_1.default.hasValue(data, 'Invalid data');
+            const fieldsToTransform = consumer.fields.filter(field => Algo_1.default.hasVal(field.transform));
+            Affirm_1.default.hasItems(fieldsToTransform, 'No fields with transformations');
+            // Process the data records in place to improve performance instead of copying to a new array
+            for (const record of data) {
+                for (const field of fieldsToTransform) {
+                    if (!field.transform)
+                        continue;
+                    const value = record[field.key];
+                    if (!Algo_1.default.hasVal(value) && Algo_1.default.hasVal(field.default))
+                        record[field.key] = field.default;
+                    else if (!Algo_1.default.hasVal(value))
+                        continue;
+                    try {
+                        record[field.key] = this.applyTransformations(value, field.transform, field.key);
+                    }
+                    catch (error) {
+                        switch (field.onError) {
+                            case 'set_default':
+                                record[field.key] = field.default;
+                                break;
+                            case 'skip':
+                                break;
+                            case 'fail':
+                            default:
+                                throw error;
+                        }
+                    }
+                }
+            }
+            return data;
+        };
+        this.applyTransformations = (value, transformations, fieldName) => {
+            var _a;
+            if (Array.isArray(transformations)) {
+                // Process array transformations without creating intermediate arrays
+                let result = value;
+                for (const transform of transformations) {
+                    result = this.applyTransformations(result, transform, fieldName);
+                }
+                return result;
+            }
+            // Single transformation
+            if ('cast' in transformations) {
+                return TypeCaster_1.default.cast(value, transformations.cast);
+            }
+            if ('multiply' in transformations) {
+                const num = TypeCaster_1.default.cast(value, 'number');
+                if (isNaN(num))
+                    throw new Error(`Cannot multiply non-numeric value in field '${fieldName}'`);
+                return num * transformations.multiply;
+            }
+            if ('add' in transformations) {
+                const num = TypeCaster_1.default.cast(value, 'number');
+                if (isNaN(num))
+                    throw new Error(`Cannot add to non-numeric value in field '${fieldName}'`);
+                return num + transformations.add;
+            }
+            if ('extract' in transformations) {
+                const date = TypeCaster_1.default.cast(value, 'date');
+                if (isNaN(date.getTime()))
+                    throw new Error(`Invalid date for extraction in field '${fieldName}'`);
+                switch (transformations.extract) {
+                    case 'year': return date.getFullYear();
+                    case 'month': return date.getMonth() + 1; // 1-based month
+                    case 'day': return date.getDate();
+                    case 'hour': return date.getHours();
+                    case 'minute': return date.getMinutes();
+                }
+            }
+            if ('concat' in transformations) {
+                if (!Array.isArray(value))
+                    throw new Error(`Cannot concat non-array value in field '${fieldName}'`);
+                return value.join(transformations.concat.separator);
+            }
+            if ('split' in transformations) {
+                if (typeof value !== 'string')
+                    throw new Error(`Cannot split non-string value in field '${fieldName}'`);
+                const parts = value.split(transformations.split.separator);
+                if (transformations.split.index >= parts.length) {
+                    throw new Error(`Split index ${transformations.split.index} out of bounds in field '${fieldName}'`);
+                }
+                return parts[transformations.split.index];
+            }
+            if ('regex_match' in transformations) {
+                if (typeof value !== 'string')
+                    throw new Error(`Cannot apply regex_match to non-string value in field '${fieldName}'`);
+                try {
+                    const regex = new RegExp(transformations.regex_match.pattern, transformations.regex_match.flags);
+                    return regex.test(value);
+                }
+                catch (error) {
+                    throw new Error(`Invalid regex pattern in field '${fieldName}': ${error.message}`);
+                }
+            }
+            if ('regex_replace' in transformations) {
+                if (typeof value !== 'string')
+                    throw new Error(`Cannot apply regex_replace to non-string value in field '${fieldName}'`);
+                try {
+                    const regex = new RegExp(transformations.regex_replace.pattern, transformations.regex_replace.flags);
+                    return value.replace(regex, transformations.regex_replace.replacement);
+                }
+                catch (error) {
+                    throw new Error(`Invalid regex pattern in field '${fieldName}': ${error.message}`);
+                }
+            }
+            if ('regex_extract' in transformations) {
+                if (typeof value !== 'string')
+                    throw new Error(`Cannot apply regex_extract to non-string value in field '${fieldName}'`);
+                try {
+                    const regex = new RegExp(transformations.regex_extract.pattern, transformations.regex_extract.flags);
+                    const matches = value.match(regex);
+                    if (!matches)
+                        return null;
+                    const groupIndex = transformations.regex_extract.group;
+                    return (_a = matches[groupIndex]) !== null && _a !== void 0 ? _a : null;
+                }
+                catch (error) {
+                    throw new Error(`Invalid regex pattern in field '${fieldName}': ${error.message}`);
+                }
+            }
+            if ('trim' in transformations) {
+                if (typeof value !== 'string')
+                    throw new Error(`Cannot trim non-string value in field '${fieldName}'`);
+                return value.trim();
+            }
+            if ('to_lowercase' in transformations) {
+                if (typeof value !== 'string')
+                    throw new Error(`Cannot convert non-string value to lowercase in field '${fieldName}'`);
+                return value.toLowerCase();
+            }
+            if ('to_uppercase' in transformations) {
+                if (typeof value !== 'string')
+                    throw new Error(`Cannot convert non-string value to uppercase in field '${fieldName}'`);
+                return value.toUpperCase();
+            }
+            if ('capitalize' in transformations) {
+                if (typeof value !== 'string')
+                    throw new Error(`Cannot capitalize non-string value in field '${fieldName}'`);
+                return value.charAt(0).toUpperCase() + value.slice(1);
+            }
+            if ('substring' in transformations) {
+                if (typeof value !== 'string')
+                    throw new Error(`Cannot take substring of non-string value in field '${fieldName}'`);
+                const { start, end } = transformations.substring;
+                return end !== undefined ? value.substring(start, end) : value.substring(start);
+            }
+            if ('pad_start' in transformations) {
+                if (typeof value !== 'string')
+                    throw new Error(`Cannot pad non-string value in field '${fieldName}'`);
+                const { length, char } = transformations.pad_start;
+                if (char.length !== 1)
+                    throw new Error(`Pad character must be exactly one character in field '${fieldName}'`);
+                return value.padStart(length, char);
+            }
+            if ('pad_end' in transformations) {
+                if (typeof value !== 'string')
+                    throw new Error(`Cannot pad non-string value in field '${fieldName}'`);
+                const { length, char } = transformations.pad_end;
+                if (char.length !== 1)
+                    throw new Error(`Pad character must be exactly one character in field '${fieldName}'`);
+                return value.padEnd(length, char);
+            }
+            if ('prepend' in transformations)
+                return transformations.prepend + TypeCaster_1.default.cast(value, 'string');
+            if ('append' in transformations)
+                return TypeCaster_1.default.cast(value, 'string') + transformations.append;
+            if ('conditional' in transformations) {
+                for (const clause of transformations.conditional.clauses) {
+                    if (this.evaluateCondition(value, clause.if)) {
+                        return clause.then;
+                    }
+                }
+                return transformations.conditional.else !== undefined ? transformations.conditional.else : value;
+            }
+            return value;
+        };
+        this.evaluateCondition = (value, condition) => {
+            if ('greater_than' in condition) {
+                return TypeCaster_1.default.cast(value, 'number') > condition.greater_than;
+            }
+            if ('greater_than_or_equal' in condition) {
+                return TypeCaster_1.default.cast(value, 'number') >= condition.greater_than_or_equal;
+            }
+            if ('less_than' in condition) {
+                return TypeCaster_1.default.cast(value, 'number') < condition.less_than;
+            }
+            if ('less_than_or_equal' in condition) {
+                return TypeCaster_1.default.cast(value, 'number') <= condition.less_than_or_equal;
+            }
+            if ('equals' in condition) {
+                return value === condition.equals;
+            }
+            if ('not_equals' in condition) {
+                return value !== condition.not_equals;
+            }
+            if ('in' in condition) {
+                return condition.in.includes(value);
+            }
+            if ('not_in' in condition) {
+                return !condition.not_in.includes(value);
+            }
+            return false;
+        };
+    }
+}
+const TransformationEngine = new TransformationEngineClass();
+exports.default = TransformationEngine;

package/engines/transform/TypeCaster.js ADDED Viewed

@@ -0,0 +1,33 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+class TypeCasterClass {
+    /**
+     * Casts the value to the requested type (only if needed)
+     */
+    cast(value, type) {
+        switch (type) {
+            case 'boolean': {
+                if (typeof value === 'boolean')
+                    return value;
+                else
+                    return Boolean(value);
+            }
+            case 'date':
+                return new Date(value);
+            case 'number': {
+                if (typeof value === 'number')
+                    return value;
+                else
+                    return Number(value);
+            }
+            case 'string': {
+                if (typeof value === 'string')
+                    return value;
+                else
+                    return String(value);
+            }
+        }
+    }
+}
+const TypeCaster = new TypeCasterClass();
+exports.default = TypeCaster;