npm - @forzalabs/remora - Versions diffs - 0.1.4-nasco.3 → 0.1.5-nasco.3 - Mend

@forzalabs/remora 0.1.4-nasco.3 → 0.1.5-nasco.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/Constants.js +1 -1
package/definitions/json_schemas/consumer-schema.json +6 -2
package/definitions/json_schemas/producer-schema.json +2 -1
package/definitions/json_schemas/source-schema.json +14 -1
package/documentation/README.md +1 -0
package/documentation/default_resources/consumer.json +7 -7
package/drivers/DeltaShareDriver.js +178 -0
package/drivers/DriverFactory.js +6 -0
package/drivers/DriverHelper.js +15 -0
package/engines/ai/DeveloperEngine.js +90 -1
package/engines/consumer/ConsumerEngine.js +1 -1
package/engines/consumer/PostProcessor.js +22 -15
package/engines/dataset/Dataset.js +18 -7
package/engines/dataset/DatasetManager.js +58 -12
package/engines/dataset/DatasetRecord.js +17 -4
package/engines/dataset/ParallelDataset.js +16 -6
package/engines/execution/ExecutionEnvironment.js +13 -4
package/engines/execution/ExecutionPlanner.js +2 -1
package/engines/file/FileCompiler.js +2 -1
package/engines/file/FileExporter.js +12 -3
package/engines/parsing/ParseManager.js +7 -2
package/engines/producer/ProducerEngine.js +4 -2
package/engines/transform/JoinEngine.js +10 -6
package/engines/transform/TransformationEngine.js +31 -2
package/engines/usage/UsageDataManager.js +110 -0
package/package.json +2 -1
package/workers/FilterWorker.js +3 -3
package/workers/ProjectionWorker.js +3 -3
package/workers/TransformWorker.js +3 -3

package/engines/dataset/DatasetManager.js CHANGED Viewed

@@ -21,9 +21,15 @@ const ParseManager_1 = __importDefault(require("../parsing/ParseManager"));
 const Dataset_1 = __importDefault(require("./Dataset"));
 const promises_1 = require("stream/promises");
 const fs_1 = require("fs");
+const DeveloperEngine_1 = __importDefault(require("../ai/DeveloperEngine"));
 class DatasetManagerClass {
     constructor() {
-        this.create = (producer) => {
+        /**
+         * Create a new Dataset for a producer. If an executionId is provided, the dataset files will
+         * be isolated inside a sub-folder specific to that execution to avoid concurrency conflicts
+         * when the same producer / consumer is executed multiple times in parallel.
+         */
+        this.create = (producer, executionId) => {
             (0, Affirm_1.default)(producer, 'Invalid producer');
             const { name, settings: { delimiter, fileKey, fileType, hasHeaderRow, sheetName } } = producer;
             const dataset = new Dataset_1.default(name, {
@@ -32,7 +38,7 @@ class DatasetManagerClass {
                 hasHeaderRow,
                 sheetName,
                 delimiter
-            });
+            }, undefined, executionId);
             return dataset;
         };
         this.buildDimensions = (dataset_1, producer_1, ...args_1) => __awaiter(this, [dataset_1, producer_1, ...args_1], void 0, function* (dataset, producer, discover = false) {
@@ -43,7 +49,7 @@ class DatasetManagerClass {
             return this.buildDimensionsFromFirstLine(firstLine, dataset.getFile(), producer, discover);
         });
         this.buildDimensionsFromFirstLine = (firstLine_1, dsFile_1, producer_1, ...args_1) => __awaiter(this, [firstLine_1, dsFile_1, producer_1, ...args_1], void 0, function* (firstLine, dsFile, producer, discover = false) {
-            var _a, _b, _c, _d, _e, _f;
+            var _a, _b, _c, _d, _e, _f, _g, _h;
             (0, Affirm_1.default)(firstLine, `Invalid first line`);
             (0, Affirm_1.default)(dsFile, `Invalid dataset file`);
             (0, Affirm_1.default)(producer, `Invalid producer`);
@@ -54,10 +60,17 @@ class DatasetManagerClass {
                     const headerLine = firstLine;
                     const rawDimensions = ParseManager_1.default._extractHeader(headerLine, delimiterChar, producer, discover);
                     return {
-                        dimensions: rawDimensions.map(x => ({ key: x.name, name: x.saveAs, index: x.index, hidden: null })),
+                        dimensions: rawDimensions.map(x => ({
+                            key: x.name,
+                            name: x.saveAs,
+                            index: x.index,
+                            hidden: null,
+                            type: x.type
+                        })),
                         delimiter: delimiterChar
                     };
                 }
+                case 'PARQUET':
                 case 'JSONL':
                 case 'JSON': {
                     const source = Environment_1.default.getSource(producer.source);
@@ -67,7 +80,13 @@ class DatasetManagerClass {
                     if (discover) {
                         return {
                             delimiter: (_b = file.delimiter) !== null && _b !== void 0 ? _b : ',',
-                            dimensions: keys.map((x, i) => ({ hidden: false, index: i, key: x, name: x }))
+                            dimensions: keys.map((x, i) => ({
+                                hidden: false,
+                                index: i,
+                                key: x,
+                                name: x,
+                                type: DeveloperEngine_1.default.inferDimensionType(firstObject === null || firstObject === void 0 ? void 0 : firstObject[x])
+                            }))
                         };
                     }
                     const dimensions = [];
@@ -75,34 +94,61 @@ class DatasetManagerClass {
                         const columnKey = (_c = pColumn.aliasInProducer) !== null && _c !== void 0 ? _c : pColumn.nameInProducer;
                         const csvColumnIndex = keys.findIndex(x => x === columnKey);
                         (0, Affirm_1.default)(csvColumnIndex > -1, `The column "${pColumn.nameInProducer}" (with key "${columnKey}") of producer "${producer.name}" doesn't exist in the underlying dataset.`);
-                        dimensions.push({ index: csvColumnIndex, key: columnKey, name: pColumn.nameInProducer, hidden: null });
+                        dimensions.push({
+                            index: csvColumnIndex,
+                            key: columnKey,
+                            name: pColumn.nameInProducer,
+                            hidden: null,
+                            type: (_e = (_d = pColumn.dimension) === null || _d === void 0 ? void 0 : _d.type) !== null && _e !== void 0 ? _e : 'string'
+                        });
                     }
-                    const delimiterChar = (_d = file.delimiter) !== null && _d !== void 0 ? _d : ',';
+                    const delimiterChar = (_f = file.delimiter) !== null && _f !== void 0 ? _f : ',';
                     return { dimensions, delimiter: delimiterChar };
                 }
                 case 'TXT': {
                     if (!file.hasHeaderRow) {
                         // If the file is a TXT and there isn't an header row, then I add a fake one that maps directly to the producer
-                        const delimiterChar = (_e = file.delimiter) !== null && _e !== void 0 ? _e : ',';
+                        const delimiterChar = (_g = file.delimiter) !== null && _g !== void 0 ? _g : ',';
                         const source = Environment_1.default.getSource(producer.source);
                         const columns = FileCompiler_1.default.compileProducer(producer, source);
                         if (discover) {
                             // Since I don't have an header, and I'm discovering, I just create placeholder dimensions based on the same number of columns of the txt
                             return {
                                 delimiter: delimiterChar,
-                                dimensions: firstLine.split(delimiterChar).map((x, i) => ({ hidden: false, index: i, key: `Col ${i + 1}`, name: `Col ${i + 1}` }))
+                                dimensions: firstLine.split(delimiterChar).map((x, i) => ({
+                                    hidden: false,
+                                    index: i,
+                                    key: `Col ${i + 1}`,
+                                    name: `Col ${i + 1}`,
+                                    type: 'string'
+                                }))
                             };
                         }
                         return {
-                            dimensions: columns.map((x, i) => { var _a; return ({ key: (_a = x.aliasInProducer) !== null && _a !== void 0 ? _a : x.nameInProducer, name: x.nameInProducer, index: i, hidden: null }); }),
+                            dimensions: columns.map((x, i) => {
+                                var _a, _b, _c;
+                                return ({
+                                    key: (_a = x.aliasInProducer) !== null && _a !== void 0 ? _a : x.nameInProducer,
+                                    name: x.nameInProducer,
+                                    index: i,
+                                    hidden: null,
+                                    type: (_c = (_b = x.dimension) === null || _b === void 0 ? void 0 : _b.type) !== null && _c !== void 0 ? _c : 'string'
+                                });
+                            }),
                             delimiter: delimiterChar
                         };
                     }
                     else {
-                        const delimiterChar = (_f = producer.settings.delimiter) !== null && _f !== void 0 ? _f : ',';
+                        const delimiterChar = (_h = producer.settings.delimiter) !== null && _h !== void 0 ? _h : ',';
                         const rawDimensions = ParseManager_1.default._extractHeader(firstLine, delimiterChar, producer, discover);
                         return {
-                            dimensions: rawDimensions.map(x => ({ key: x.name, name: x.saveAs, index: x.index, hidden: null })),
+                            dimensions: rawDimensions.map(x => ({
+                                key: x.name,
+                                name: x.saveAs,
+                                index: x.index,
+                                hidden: null,
+                                type: x.type
+                            })),
                             delimiter: delimiterChar
                         };
                     }

package/engines/dataset/DatasetRecord.js CHANGED Viewed

@@ -4,6 +4,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
 };
 Object.defineProperty(exports, "__esModule", { value: true });
 const Algo_1 = __importDefault(require("../../core/Algo"));
+const TypeCaster_1 = __importDefault(require("../transform/TypeCaster"));
 class DatasetRecord {
     constructor(row, dimensions, delimiter) {
         this.parse = (row, delimiter, dimensions) => {
@@ -11,7 +12,7 @@ class DatasetRecord {
                 const parts = row.split(delimiter);
                 for (let i = 0; i < dimensions.length; i++) {
                     const dim = dimensions[i];
-                    this._value[dim.name] = parts[i];
+                    this._value[dim.name] = TypeCaster_1.default.cast(parts[i], dim.type);
                 }
             }
         };
@@ -35,7 +36,7 @@ class DatasetRecord {
             this.parse(row, delimiter, this._dimensions);
         };
         this.wholeUpdateDimension = (update) => {
-            var _a;
+            var _a, _b, _c, _d, _e;
             if (update.toDelete) {
                 // To remove
                 delete this._value[update.currentDimension.name];
@@ -46,7 +47,13 @@ class DatasetRecord {
             }
             else if (!update.currentDimension) {
                 // To create (at the right position)
-                const newDimension = { index: update.newPosition, key: update.newName, name: update.newName, hidden: update.newHidden };
+                const newDimension = {
+                    index: update.newPosition,
+                    key: update.newName,
+                    name: update.newName,
+                    hidden: update.newHidden,
+                    type: (_b = (_a = update.currentDimension) === null || _a === void 0 ? void 0 : _a.type) !== null && _b !== void 0 ? _b : 'string'
+                };
                 this._value[newDimension.name] = null;
                 this._dimensions = [...this._dimensions, newDimension];
             }
@@ -56,7 +63,13 @@ class DatasetRecord {
                 if (index < 0)
                     index = this._dimensions.findIndex(x => x.key === update.currentDimension.key);
                 const currentDim = this._dimensions[index];
-                const updatedDim = { name: update.newName, key: (_a = currentDim.key) !== null && _a !== void 0 ? _a : update.newName, hidden: update.newHidden, index: update.newPosition };
+                const updatedDim = {
+                    name: update.newName,
+                    key: (_c = currentDim.key) !== null && _c !== void 0 ? _c : update.newName,
+                    hidden: update.newHidden,
+                    index: update.newPosition,
+                    type: (_e = (_d = update.currentDimension) === null || _d === void 0 ? void 0 : _d.type) !== null && _e !== void 0 ? _e : 'string'
+                };
                 this._value[updatedDim.name] = this._value[currentDim.name];
                 if (updatedDim.name !== currentDim.name)
                     delete this._value[currentDim.name];

package/engines/dataset/ParallelDataset.js CHANGED Viewed

@@ -25,10 +25,17 @@ class ParallelDatasetClass {
              * I need the init to be called after all the setup has been completed because I need the .env to be loaded
              */
             if (!this._filterPool || !this._projectionPool || !this._transformPool) {
+                const options = {
+                    workerThreadOpts: {
+                        resourceLimits: {
+                            maxOldGenerationSizeMb: Constants_1.default.defaults.MIN_RUNTIME_HEAP_MB
+                        }
+                    }
+                };
                 const workerPath = this._getWorkerPath();
-                this._filterPool = workerpool_1.default.pool(path_1.default.join(workerPath, 'FilterWorker.js'));
-                this._projectionPool = workerpool_1.default.pool(path_1.default.join(workerPath, 'ProjectionWorker.js'));
-                this._transformPool = workerpool_1.default.pool(path_1.default.join(workerPath, 'TransformWorker.js'));
+                this._filterPool = workerpool_1.default.pool(path_1.default.join(workerPath, 'FilterWorker.js'), options);
+                this._projectionPool = workerpool_1.default.pool(path_1.default.join(workerPath, 'ProjectionWorker.js'), options);
+                this._transformPool = workerpool_1.default.pool(path_1.default.join(workerPath, 'TransformWorker.js'), options);
             }
         };
         this._getWorkerPath = () => {
@@ -74,6 +81,7 @@ class ParallelDatasetClass {
                 const workerData = {
                     datasetDimensions: dataset.getDimensions(),
                     datasetFile: dataset.getFile(),
+                    executionId: dataset.getExecutionId(),
                     datasetName: dataset.name,
                     datasetDelimiter: dataset.getDelimiter(),
                     fromLine: fromLine,
@@ -91,7 +99,7 @@ class ParallelDatasetClass {
             yield DatasetManager_1.default.mergeWorkersPaths(results.map(x => x.datasetPath), dataset);
             dataset
                 .setDelimiter(results[0].datasetDelimiter)
-                .setDimensinons(results[0].datasetDimensions);
+                .setDimensions(results[0].datasetDimensions);
             dataset._finishOperation('filter-parallel');
             return dataset;
         });
@@ -111,6 +119,7 @@ class ParallelDatasetClass {
                 const workerData = {
                     datasetDimensions: dataset.getDimensions(),
                     datasetFile: dataset.getFile(),
+                    executionId: dataset.getExecutionId(),
                     datasetName: dataset.name,
                     datasetDelimiter: dataset.getDelimiter(),
                     fromLine: fromLine,
@@ -126,7 +135,7 @@ class ParallelDatasetClass {
             yield DatasetManager_1.default.mergeWorkersPaths(results.map(x => x.datasetPath), dataset);
             dataset
                 .setDelimiter(results[0].datasetDelimiter)
-                .setDimensinons(results[0].datasetDimensions);
+                .setDimensions(results[0].datasetDimensions);
             dataset._finishOperation('projection-parallel');
             return dataset;
         });
@@ -146,6 +155,7 @@ class ParallelDatasetClass {
                 const workerData = {
                     datasetDimensions: dataset.getDimensions(),
                     datasetFile: dataset.getFile(),
+                    executionId: dataset.getExecutionId(),
                     datasetName: dataset.name,
                     datasetDelimiter: dataset.getDelimiter(),
                     fromLine: fromLine,
@@ -161,7 +171,7 @@ class ParallelDatasetClass {
             yield DatasetManager_1.default.mergeWorkersPaths(results.map(x => x.datasetPath), dataset);
             dataset
                 .setDelimiter(results[0].datasetDelimiter)
-                .setDimensinons(results[0].datasetDimensions);
+                .setDimensions(results[0].datasetDimensions);
             dataset._finishOperation('transform-parallel');
             return dataset;
         });

package/engines/execution/ExecutionEnvironment.js CHANGED Viewed

@@ -28,7 +28,7 @@ const Algo_1 = __importDefault(require("../../core/Algo"));
 const Logger_1 = __importDefault(require("../../helper/Logger"));
 const ParallelDataset_1 = __importDefault(require("../dataset/ParallelDataset"));
 class ExecutionEnvironment {
-    constructor(consumer) {
+    constructor(consumer, executionId) {
         this.run = (options) => __awaiter(this, void 0, void 0, function* () {
             var _a, _b, _c, _d;
             (0, Affirm_1.default)(this._consumer, 'Invalid consumer');
@@ -64,7 +64,7 @@ class ExecutionEnvironment {
                             (0, Affirm_1.default)(planStep.producer, `Invalid producer in execute-SQL step`);
                             const driver = yield DriverFactory_1.default.instantiateSource(planStep.source);
                             const queryData = (yield driver.query(this._envData.finalSQL)).rows;
-                            let dataset = DatasetManager_1.default.create(planStep.producer);
+                            let dataset = DatasetManager_1.default.create(planStep.producer, this._executionId);
                             dataset = yield dataset.loadFromMemory(queryData, planStep.producer);
                             this._storeIntermidiate(planStep, dataset);
                             break;
@@ -74,7 +74,7 @@ class ExecutionEnvironment {
                             const { producer } = planStep;
                             const source = Environment_1.default.getSource(producer.source);
                             (0, Affirm_1.default)(source, `Source "${producer.source}" of producer "${producer.name}" not found.`);
-                            let dataset = DatasetManager_1.default.create(producer);
+                            let dataset = DatasetManager_1.default.create(producer, this._executionId);
                             dataset = yield dataset.load(source);
                             this._storeIntermidiate(planStep, dataset);
                             break;
@@ -102,7 +102,7 @@ class ExecutionEnvironment {
                         case 'export-file': {
                             (0, Affirm_1.default)(planStep.output, `Invalid output in export-file step`);
                             (0, Affirm_1.default)(this._resultingDataset, 'Invalid resulting dataset in export-file step');
-                            const res = yield FileExporter_1.default.export(this._consumer, planStep.output, this._resultingDataset);
+                            const res = yield FileExporter_1.default.export(this._consumer, planStep.output, this._resultingDataset, this._executionId);
                             result.fileUri = res;
                             break;
                         }
@@ -156,6 +156,13 @@ class ExecutionEnvironment {
                 if (ds)
                     Logger_1.default.log(`Failed execution of consumer at step ${currentStep.type}:\n\tSize: ${ds.getCount()}\n\tCycles: ${ds.getCycles()}\n\tOperations: ${Logger_1.default.formatList(ds.getOperations())}`);
                 Logger_1.default.log(`\tFailed step: ${currentStep.type}->\n\t${error}`);
+                // IMPORTANT: cleanup all the datasets to not leave any data around and to avoid memory leaks
+                const datasets = [
+                    ...this._producedData.map(x => x.dataset),
+                    this._resultingDataset
+                ].filter(Algo_1.default.hasVal);
+                const promises = datasets.map(x => x.destroy());
+                yield Promise.all(promises);
                 throw error;
             }
             Logger_1.default.log(`Completed execution of consumer:\n\tSize: ${result._stats.size}\n\tCycles: ${result._stats.cycles}\n\tTime: ${result._stats.elapsedMS}\n\tOperations: ${Logger_1.default.formatList(result._stats.operations)}`);
@@ -184,6 +191,8 @@ class ExecutionEnvironment {
         this._envData = { consumerSQL: null, executionRequestSQL: null, finalSQL: null };
         this._producedData = [];
         this._resultingDataset = null;
+        // A short unique id to isolate temp dataset files & output names
+        this._executionId = executionId;
     }
 }
 exports.default = ExecutionEnvironment;

package/engines/execution/ExecutionPlanner.js CHANGED Viewed

@@ -108,7 +108,8 @@ class ExecutionPlannerClas {
                     break;
                 }
                 case 'local':
-                case 'aws-s3': {
+                case 'aws-s3':
+                case 'delta-share': {
                     plan.push({ type: 'load-dataset', producer });
                     plan.push({ type: 'prepare-dataset', producer });
                     if (producer.dimensions.some(x => { var _a, _b; return ((_a = x.alias) === null || _a === void 0 ? void 0 : _a.includes('{')) || ((_b = x.alias) === null || _b === void 0 ? void 0 : _b.includes('[')); }))

package/engines/file/FileCompiler.js CHANGED Viewed

@@ -7,9 +7,10 @@ const Affirm_1 = __importDefault(require("../../core/Affirm"));
 class FileCompilerClass {
     constructor() {
         this.compileProducer = (producer, source) => {
+            var _a;
             (0, Affirm_1.default)(producer, `Invalid producer`);
             (0, Affirm_1.default)(source, `Invalid source`);
-            (0, Affirm_1.default)(producer.settings.fileKey, `Missing required file key in producer settings`);
+            (0, Affirm_1.default)((_a = producer.settings.fileKey) !== null && _a !== void 0 ? _a : producer.settings.sqlTable, `Missing required file key in producer settings`);
             (0, Affirm_1.default)(producer.settings.fileType, `Missing required file type in producer settings`);
             (0, Affirm_1.default)(!producer.measures || producer.measures.length === 0, `Cannot use "measure" with a producer linked to a file (only dimensions are allowed).`);
             const columns = producer.dimensions.map(x => ({

package/engines/file/FileExporter.js CHANGED Viewed

@@ -20,7 +20,7 @@ const DriverFactory_1 = __importDefault(require("../../drivers/DriverFactory"));
 const Environment_1 = __importDefault(require("../Environment"));
 class FileExporterClass {
     constructor() {
-        this.export = (consumer, output, dataset) => __awaiter(this, void 0, void 0, function* () {
+        this.export = (consumer, output, dataset, executionId) => __awaiter(this, void 0, void 0, function* () {
             (0, Affirm_1.default)(consumer, `Invalid consumer`);
             (0, Affirm_1.default)(output, `Invalid output`);
             (0, Affirm_1.default)(dataset, `Invalid export dataset`);
@@ -32,7 +32,7 @@ class FileExporterClass {
                 : output.format === 'JSON'
                     ? 'jsonl'
                     : 'txt';
-            const name = this._composeFileName(consumer, extension);
+            const name = this._composeFileName(consumer, output, extension, executionId);
             const uploadRes = yield driver.uploadStream({
                 dataset,
                 name,
@@ -67,7 +67,16 @@ class FileExporterClass {
             }
             return chunks;
         };
-        this._composeFileName = (consumer, extension) => `${consumer.name}_${Algo_1.default.replaceAll(DSTE_1.default.now().toISOString().split('.')[0], ':', '-')}.${extension}`;
+        this._composeFileName = (consumer, output, extension, executionId) => {
+            if (output.exportName && output.exportName.trim().length > 0) {
+                // Ensure no extension duplication
+                const sanitized = output.exportName.replace(/\.[^.]+$/, '');
+                return `${sanitized}.${extension}`;
+            }
+            const baseTs = Algo_1.default.replaceAll(DSTE_1.default.now().toISOString().split('.')[0], ':', '-');
+            const suffix = executionId ? `_${executionId}` : '';
+            return `${consumer.name}_${baseTs}${suffix}.${extension}`;
+        };
     }
 }
 const FileExporter = new FileExporterClass();

package/engines/parsing/ParseManager.js CHANGED Viewed

@@ -9,7 +9,7 @@ const FileCompiler_1 = __importDefault(require("../file/FileCompiler"));
 class ParseManagerClass {
     constructor() {
         this._extractHeader = (headerLine, delimiter, producer, discover) => {
-            var _a;
+            var _a, _b, _c;
             (0, Affirm_1.default)(headerLine, `Invalid CSV header line for producer "${producer.name}"`);
             (0, Affirm_1.default)(delimiter, 'Invalid CSV delimiter');
             (0, Affirm_1.default)(producer, 'Invalid producer');
@@ -24,7 +24,12 @@ class ParseManagerClass {
                 const columnKey = (_a = pColumn.aliasInProducer) !== null && _a !== void 0 ? _a : pColumn.nameInProducer;
                 const csvColumnIndex = headerColumns.findIndex(x => x === columnKey);
                 (0, Affirm_1.default)(csvColumnIndex > -1, `The column "${pColumn.nameInProducer}" (with key "${columnKey}") of producer "${producer.name}" doesn't exist in the underlying dataset.`);
-                csvColumns.push({ index: csvColumnIndex, name: columnKey, saveAs: pColumn.nameInProducer });
+                csvColumns.push({
+                    index: csvColumnIndex,
+                    name: columnKey,
+                    saveAs: pColumn.nameInProducer,
+                    type: (_c = (_b = pColumn.dimension) === null || _b === void 0 ? void 0 : _b.type) !== null && _c !== void 0 ? _c : 'string'
+                });
             }
             return csvColumns;
         };

package/engines/producer/ProducerEngine.js CHANGED Viewed

@@ -34,7 +34,8 @@ class ProducerEngineClass {
                     (0, Affirm_1.default)(sql, `Invalid SQL from compilation for producer "${producer.name}"`);
                     return sql;
                 }
-                case 'aws-s3': {
+                case 'aws-s3':
+                case 'delta-share': {
                     const columns = FileCompiler_1.default.compileProducer(producer, source);
                     (0, Affirm_1.default)(columns, `Invalid columns from compilation for producer "${producer.name}"`);
                     break;
@@ -141,7 +142,8 @@ class ProducerEngineClass {
                     break;
                 }
                 case 'local':
-                case 'aws-s3': {
+                case 'aws-s3':
+                case 'delta-share': {
                     const fileData = yield this.readFile(producer, { readmode: 'lines', lines: { from: 0, to: sampleSize } });
                     dataset = yield dataset.loadFromMemory(fileData.data, producer, discover);
                     break;

package/engines/transform/JoinEngine.js CHANGED Viewed

@@ -102,12 +102,16 @@ class JoinEngineClass {
                 fileType: 'CSV'
             });
             // Get dimensions for the result dataset based on consumer columns
-            const resultDimensions = consumerColumns.map((col, index) => ({
-                name: col.consumerAlias || col.consumerKey,
-                key: col.consumerAlias || col.consumerKey,
-                index,
-                hidden: null
-            }));
+            const resultDimensions = consumerColumns.map((col, index) => {
+                var _a, _b;
+                return ({
+                    name: col.consumerAlias || col.consumerKey,
+                    key: col.consumerAlias || col.consumerKey,
+                    index,
+                    type: (_b = (_a = col.dimension) === null || _a === void 0 ? void 0 : _a.type) !== null && _b !== void 0 ? _b : 'string',
+                    hidden: null
+                });
+            });
             // Initialize the result dataset with proper dimensions
             resultDataset.getDimensions().length = 0;
             resultDataset.getDimensions().push(...resultDimensions);

package/engines/transform/TransformationEngine.js CHANGED Viewed

@@ -22,7 +22,7 @@ class TransformationEngineClass {
             (0, Affirm_1.default)(dataset, 'Invalid data');
             const fieldsToTransform = consumer.fields.filter(field => Algo_1.default.hasVal(field.transform));
             Affirm_1.default.hasItems(fieldsToTransform, 'No fields with transformations');
-            return yield dataset.map(record => {
+            yield dataset.map(record => {
                 var _a;
                 for (const field of fieldsToTransform) {
                     if (!field.transform)
@@ -54,6 +54,17 @@ class TransformationEngineClass {
                 }
                 return record;
             }, options);
+            /**
+             * Some transformations (for now only "cast") change the underlying type of the dataset dimension
+             * Here I update the dimension type of the dataset.
+             * TODO: I think that we may have a bug if you cast AND then do an operation on the number, since it reverts back to being a string in the same trnasformation chain, since the dimension type update is applied only at the end of all the transformations
+             */
+            for (const field of fieldsToTransform) {
+                if (!field.transform)
+                    continue;
+                this.applyDimensionsChanges(field.transform, field, dataset);
+            }
+            return dataset;
         });
         this.isFieldCombinationTransformation = (transformation) => {
             if (Array.isArray(transformation)) {
@@ -77,7 +88,7 @@ class TransformationEngineClass {
                 const casted = TypeCaster_1.default.cast(value, cast, format);
                 if (cast === 'number' && isNaN(casted))
                     throw new Error(`Cannot cast non-numeric value in field '${field.key}'`);
-                if (cast === 'date' && casted instanceof Date && isNaN(casted.getTime()))
+                if (cast === 'datetime' && casted instanceof Date && isNaN(casted.getTime()))
                     throw new Error(`Cannot cast value to date in field '${field.key}'`);
                 return casted;
             }
@@ -263,6 +274,24 @@ class TransformationEngineClass {
             }
             return false;
         };
+        this.applyDimensionsChanges = (transformations, field, dataset) => {
+            if (Array.isArray(transformations)) {
+                for (const transform of transformations) {
+                    this.applyDimensionsChanges(transform, field, dataset);
+                }
+                return dataset;
+            }
+            // Single transformation
+            if ('cast' in transformations) {
+                const { cast } = transformations;
+                let oldDimension = dataset.getDimensions().find(x => x.name === field.key);
+                if (!oldDimension)
+                    oldDimension = dataset.getDimensions().find(x => x.key === field.key);
+                const newDimension = Object.assign(Object.assign({}, structuredClone(oldDimension)), { type: cast });
+                dataset.setSingleDimension(newDimension, oldDimension);
+            }
+            return dataset;
+        };
     }
 }
 const TransformationEngine = new TransformationEngineClass();

package/engines/usage/UsageDataManager.js ADDED Viewed

@@ -0,0 +1,110 @@
+"use strict";
+var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
+    function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
+    return new (P || (P = Promise))(function (resolve, reject) {
+        function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
+        function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
+        function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
+        step((generator = generator.apply(thisArg, _arguments || [])).next());
+    });
+};
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+const DSTE_1 = __importDefault(require("../../core/dste/DSTE"));
+const DatabaseEngine_1 = __importDefault(require("../../database/DatabaseEngine"));
+const DataframeManager_1 = __importDefault(require("./DataframeManager"));
+class UsageDataManager {
+    getUsageDetails() {
+        return __awaiter(this, void 0, void 0, function* () {
+            const now = DSTE_1.default.now();
+            const from = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);
+            const prevMonthFrom = new Date(now.getTime() - 60 * 24 * 60 * 60 * 1000);
+            const yearAgo = new Date(now.getFullYear(), now.getMonth() - 11, 1);
+            const collection = 'usage';
+            // Aggregate status counts for current and previous month
+            const getStatusCounts = (start, end) => __awaiter(this, void 0, void 0, function* () {
+                const results = yield DatabaseEngine_1.default.aggregate(collection, [
+                    { $match: { startedAt: { $gte: start, $lte: end } } },
+                    { $group: { _id: '$status', count: { $sum: 1 } } }
+                ]);
+                let success = 0, failed = 0, total = 0;
+                results.forEach(r => {
+                    total += r.count;
+                    if (r._id === 'success')
+                        success = r.count;
+                    if (r._id === 'failed')
+                        failed = r.count;
+                });
+                return { total, success, failed };
+            });
+            const statusesRequests = yield getStatusCounts(from, now);
+            const prevStatusesRequests = yield getStatusCounts(prevMonthFrom, from);
+            // Monthly success and fails for last 12 months
+            const monthlySuccessPipeline = [
+                { $match: { status: 'success', startedAt: { $gte: yearAgo, $lte: now } } },
+                { $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
+                { $group: { _id: { year: '$year', month: '$month' }, count: { $sum: 1 } } },
+                { $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
+                { $sort: { x: 1 } }
+            ];
+            const monthlyFailsPipeline = [
+                { $match: { status: 'failed', startedAt: { $gte: yearAgo, $lte: now } } },
+                { $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
+                { $group: { _id: { year: '$year', month: '$month' }, count: { $sum: 1 } } },
+                { $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
+                { $sort: { x: 1 } }
+            ];
+            const rawMonthlySuccess = yield DatabaseEngine_1.default.aggregate(collection, monthlySuccessPipeline);
+            const rawMonthlyFails = yield DatabaseEngine_1.default.aggregate(collection, monthlyFailsPipeline);
+            // Top lines per month for last 12 months
+            const topLinesPipeline = [
+                { $match: { startedAt: { $gte: yearAgo, $lte: now } } },
+                { $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
+                { $group: { _id: { year: '$year', month: '$month' }, itemsCount: { $max: '$itemsCount' } } },
+                { $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$itemsCount' } },
+                { $sort: { x: 1 } }
+            ];
+            const topLines = yield DatabaseEngine_1.default.aggregate(collection, topLinesPipeline);
+            // Top times per month for last 12 months
+            const topTimePipeline = [
+                { $match: { startedAt: { $gte: yearAgo, $lte: now } } },
+                { $addFields: { durationMs: { $subtract: ['$finishedAt', '$startedAt'] }, year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
+                { $group: { _id: { year: '$year', month: '$month' }, maxDuration: { $max: '$durationMs' } } },
+                { $project: { _id: 0, x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$maxDuration' } },
+                { $sort: { x: 1 } }
+            ];
+            const topTime = yield DatabaseEngine_1.default.aggregate(collection, topTimePipeline);
+            // Monthly consumers: for each consumer, per month count
+            const consumerPipeline = [
+                { $match: { startedAt: { $gte: yearAgo, $lte: now } } },
+                { $addFields: { year: { $year: '$startedAt' }, month: { $month: '$startedAt' } } },
+                { $group: { _id: { consumer: '$consumer', year: '$year', month: '$month' }, count: { $sum: 1 } } },
+                { $project: { _id: 0, consumer: '$_id.consumer', x: { $concat: [{ $toString: '$_id.year' }, '-', { $toString: '$_id.month' }] }, y: '$count' } },
+                { $sort: { consumer: 1, x: 1 } }
+            ];
+            const consumersData = yield DatabaseEngine_1.default.aggregate(collection, consumerPipeline);
+            // transform to consumer array
+            const consumerMap = {};
+            consumersData.forEach(r => {
+                consumerMap[r.consumer] = consumerMap[r.consumer] || [];
+                consumerMap[r.consumer].push({ x: r.x, y: r.y });
+            });
+            const consumers = Object.entries(consumerMap).map(([name, data]) => ({ name, data: DataframeManager_1.default.fill(data !== null && data !== void 0 ? data : [], yearAgo, now) }));
+            // Recent executions
+            const recentExecution = yield DatabaseEngine_1.default.query(collection, { startedAt: { $gte: from, $lte: now } }, { sort: { startedAt: -1 }, limit: 10 });
+            return {
+                statusesRequests,
+                prevStatusesRequests,
+                monthlySuccess: DataframeManager_1.default.fill(rawMonthlySuccess !== null && rawMonthlySuccess !== void 0 ? rawMonthlySuccess : [], yearAgo, now),
+                monthlyFails: DataframeManager_1.default.fill(rawMonthlyFails !== null && rawMonthlyFails !== void 0 ? rawMonthlyFails : [], yearAgo, now),
+                consumers: consumers,
+                topLine: DataframeManager_1.default.fill(topLines !== null && topLines !== void 0 ? topLines : [], yearAgo, now),
+                topTime: DataframeManager_1.default.fill(topTime !== null && topTime !== void 0 ? topTime : [], yearAgo, now),
+                recentExecution
+            };
+        });
+    }
+}
+exports.default = new UsageDataManager();

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@forzalabs/remora",
-    "version": "0.1.4-nasco.3",
+    "version": "0.1.5-nasco.3",
     "description": "A powerful CLI tool for seamless data translation.",
     "main": "index.js",
     "private": false,
@@ -47,6 +47,7 @@
         "dotenv": "^16.0.3",
         "fast-xml-parser": "^5.2.3",
         "fs-extra": "^11.1.0",
+        "hyparquet": "^1.17.4",
         "inquirer": "^8.2.5",
         "json-schema": "^0.4.0",
         "jsonwebtoken": "^9.0.2",