@forzalabs/remora 0.1.8-nasco.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +10 -3
- package/actions/automap.js +77 -0
- package/actions/deploy.js +1 -1
- package/actions/run.js +9 -4
- package/actions/sample.js +176 -0
- package/database/DatabaseEngine.js +18 -3
- package/definitions/DatasetDefinitions.js +2 -0
- package/definitions/json_schemas/producer-schema.json +39 -1
- package/definitions/json_schemas/source-schema.json +76 -3
- package/drivers/DriverFactory.js +6 -0
- package/drivers/DriverHelper.js +18 -6
- package/drivers/HttpApiDriver.js +204 -0
- package/drivers/LocalDriver.js +21 -7
- package/drivers/S3Driver.js +24 -8
- package/engines/UserManager.js +12 -0
- package/engines/ai/LLM.js +4 -24
- package/engines/consumer/ConsumerEngine.js +2 -2
- package/engines/dataset/Dataset.js +1 -1
- package/engines/dataset/DatasetManager.js +68 -25
- package/engines/dataset/DatasetRecord.js +5 -3
- package/engines/execution/ExecutionPlanner.js +2 -1
- package/engines/parsing/CSVParser.js +59 -0
- package/engines/parsing/ParseManager.js +21 -4
- package/engines/producer/ProducerEngine.js +13 -4
- package/engines/scheduler/CronScheduler.js +2 -3
- package/engines/scheduler/QueueManager.js +2 -3
- package/engines/transform/TransformationEngine.js +18 -0
- package/engines/usage/UsageManager.js +4 -2
- package/engines/validation/Validator.js +17 -0
- package/index.js +20 -0
- package/package.json +3 -2
|
@@ -22,6 +22,7 @@ const Dataset_1 = __importDefault(require("./Dataset"));
|
|
|
22
22
|
const promises_1 = require("stream/promises");
|
|
23
23
|
const fs_1 = require("fs");
|
|
24
24
|
const DeveloperEngine_1 = __importDefault(require("../ai/DeveloperEngine"));
|
|
25
|
+
const Constants_1 = __importDefault(require("../../Constants"));
|
|
25
26
|
class DatasetManagerClass {
|
|
26
27
|
constructor() {
|
|
27
28
|
/**
|
|
@@ -30,14 +31,19 @@ class DatasetManagerClass {
|
|
|
30
31
|
* when the same producer / consumer is executed multiple times in parallel.
|
|
31
32
|
*/
|
|
32
33
|
this.create = (producer, executionId) => {
|
|
34
|
+
var _a, _b;
|
|
33
35
|
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
34
|
-
const { name, settings: { delimiter, fileKey, fileType, hasHeaderRow, sheetName } } = producer;
|
|
36
|
+
const { name, settings: { delimiter, fileKey, fileType, hasHeaderRow, sheetName, httpApi } } = producer;
|
|
37
|
+
// Check if any dimension has sourceFilename flag set to true
|
|
38
|
+
const hasSourceFilenameDimension = (_b = (_a = producer.dimensions) === null || _a === void 0 ? void 0 : _a.some(d => d.sourceFilename === true)) !== null && _b !== void 0 ? _b : false;
|
|
35
39
|
const dataset = new Dataset_1.default(name, {
|
|
36
40
|
fileKey,
|
|
37
41
|
fileType,
|
|
38
42
|
hasHeaderRow,
|
|
39
43
|
sheetName,
|
|
40
|
-
delimiter
|
|
44
|
+
delimiter,
|
|
45
|
+
httpApi,
|
|
46
|
+
includeSourceFilename: hasSourceFilenameDimension
|
|
41
47
|
}, undefined, executionId);
|
|
42
48
|
return dataset;
|
|
43
49
|
};
|
|
@@ -49,7 +55,7 @@ class DatasetManagerClass {
|
|
|
49
55
|
return this.buildDimensionsFromFirstLine(firstLine, dataset.getFile(), producer, discover);
|
|
50
56
|
});
|
|
51
57
|
this.buildDimensionsFromFirstLine = (firstLine_1, dsFile_1, producer_1, ...args_1) => __awaiter(this, [firstLine_1, dsFile_1, producer_1, ...args_1], void 0, function* (firstLine, dsFile, producer, discover = false) {
|
|
52
|
-
var _a, _b, _c, _d, _e, _f, _g, _h;
|
|
58
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o;
|
|
53
59
|
(0, Affirm_1.default)(firstLine, `Invalid first line`);
|
|
54
60
|
(0, Affirm_1.default)(dsFile, `Invalid dataset file`);
|
|
55
61
|
(0, Affirm_1.default)(producer, `Invalid producer`);
|
|
@@ -77,6 +83,12 @@ class DatasetManagerClass {
|
|
|
77
83
|
const columns = FileCompiler_1.default.compileProducer(producer, source);
|
|
78
84
|
const firstObject = JSON.parse(firstLine);
|
|
79
85
|
const keys = Object.keys(firstObject);
|
|
86
|
+
// If includeSourceFilename is enabled, the driver has added $source_filename column
|
|
87
|
+
// We need to add it to the keys list so dimensions can reference it
|
|
88
|
+
const includeSourceFilename = file.includeSourceFilename === true;
|
|
89
|
+
if (includeSourceFilename) {
|
|
90
|
+
keys.push(Constants_1.default.SOURCE_FILENAME_COLUMN);
|
|
91
|
+
}
|
|
80
92
|
if (discover) {
|
|
81
93
|
return {
|
|
82
94
|
delimiter: (_b = file.delimiter) !== null && _b !== void 0 ? _b : ',',
|
|
@@ -91,7 +103,21 @@ class DatasetManagerClass {
|
|
|
91
103
|
}
|
|
92
104
|
const dimensions = [];
|
|
93
105
|
for (const pColumn of columns) {
|
|
94
|
-
|
|
106
|
+
// Handle sourceFilename dimension specially - it maps to the $source_filename column added by the driver
|
|
107
|
+
if (((_c = pColumn.dimension) === null || _c === void 0 ? void 0 : _c.sourceFilename) === true) {
|
|
108
|
+
if (includeSourceFilename) {
|
|
109
|
+
const sourceFilenameIndex = keys.findIndex(x => x === Constants_1.default.SOURCE_FILENAME_COLUMN);
|
|
110
|
+
dimensions.push({
|
|
111
|
+
index: sourceFilenameIndex,
|
|
112
|
+
key: Constants_1.default.SOURCE_FILENAME_COLUMN,
|
|
113
|
+
name: pColumn.nameInProducer,
|
|
114
|
+
hidden: null,
|
|
115
|
+
type: (_e = (_d = pColumn.dimension) === null || _d === void 0 ? void 0 : _d.type) !== null && _e !== void 0 ? _e : 'string'
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
const columnKey = (_f = pColumn.aliasInProducer) !== null && _f !== void 0 ? _f : pColumn.nameInProducer;
|
|
95
121
|
const csvColumnIndex = keys.findIndex(x => x === columnKey);
|
|
96
122
|
(0, Affirm_1.default)(csvColumnIndex > -1, `The column "${pColumn.nameInProducer}" (with key "${columnKey}") of producer "${producer.name}" doesn't exist in the underlying dataset.`);
|
|
97
123
|
dimensions.push({
|
|
@@ -99,47 +125,64 @@ class DatasetManagerClass {
|
|
|
99
125
|
key: columnKey,
|
|
100
126
|
name: pColumn.nameInProducer,
|
|
101
127
|
hidden: null,
|
|
102
|
-
type: (
|
|
128
|
+
type: (_h = (_g = pColumn.dimension) === null || _g === void 0 ? void 0 : _g.type) !== null && _h !== void 0 ? _h : 'string'
|
|
103
129
|
});
|
|
104
130
|
}
|
|
105
|
-
const delimiterChar = (
|
|
131
|
+
const delimiterChar = (_j = file.delimiter) !== null && _j !== void 0 ? _j : ',';
|
|
106
132
|
return { dimensions, delimiter: delimiterChar };
|
|
107
133
|
}
|
|
108
134
|
case 'TXT': {
|
|
109
135
|
if (!file.hasHeaderRow) {
|
|
110
136
|
// If the file is a TXT and there isn't an header row, then I add a fake one that maps directly to the producer
|
|
111
|
-
const delimiterChar = (
|
|
137
|
+
const delimiterChar = (_k = file.delimiter) !== null && _k !== void 0 ? _k : ',';
|
|
112
138
|
const source = Environment_1.default.getSource(producer.source);
|
|
113
139
|
const columns = FileCompiler_1.default.compileProducer(producer, source);
|
|
140
|
+
const includeSourceFilename = file.includeSourceFilename === true;
|
|
114
141
|
if (discover) {
|
|
115
142
|
// Since I don't have an header, and I'm discovering, I just create placeholder dimensions based on the same number of columns of the txt
|
|
143
|
+
const colValues = firstLine.split(delimiterChar);
|
|
144
|
+
const dimensions = colValues.map((x, i) => ({
|
|
145
|
+
hidden: false,
|
|
146
|
+
index: i,
|
|
147
|
+
key: `Col ${i + 1}`,
|
|
148
|
+
name: `Col ${i + 1}`,
|
|
149
|
+
type: 'string'
|
|
150
|
+
}));
|
|
116
151
|
return {
|
|
117
152
|
delimiter: delimiterChar,
|
|
118
|
-
dimensions
|
|
119
|
-
hidden: false,
|
|
120
|
-
index: i,
|
|
121
|
-
key: `Col ${i + 1}`,
|
|
122
|
-
name: `Col ${i + 1}`,
|
|
123
|
-
type: 'string'
|
|
124
|
-
}))
|
|
153
|
+
dimensions
|
|
125
154
|
};
|
|
126
155
|
}
|
|
156
|
+
// Filter out sourceFilename columns for index-based mapping, but track them for later
|
|
157
|
+
const regularColumns = columns.filter(x => { var _a; return ((_a = x.dimension) === null || _a === void 0 ? void 0 : _a.sourceFilename) !== true; });
|
|
158
|
+
const sourceFilenameColumn = columns.find(x => { var _a; return ((_a = x.dimension) === null || _a === void 0 ? void 0 : _a.sourceFilename) === true; });
|
|
159
|
+
const dimensions = regularColumns.map((x, i) => {
|
|
160
|
+
var _a, _b, _c;
|
|
161
|
+
return ({
|
|
162
|
+
key: (_a = x.aliasInProducer) !== null && _a !== void 0 ? _a : x.nameInProducer,
|
|
163
|
+
name: x.nameInProducer,
|
|
164
|
+
index: i,
|
|
165
|
+
hidden: null,
|
|
166
|
+
type: (_c = (_b = x.dimension) === null || _b === void 0 ? void 0 : _b.type) !== null && _c !== void 0 ? _c : 'string'
|
|
167
|
+
});
|
|
168
|
+
});
|
|
169
|
+
// Add sourceFilename dimension at the end if enabled
|
|
170
|
+
if (sourceFilenameColumn && includeSourceFilename) {
|
|
171
|
+
dimensions.push({
|
|
172
|
+
key: Constants_1.default.SOURCE_FILENAME_COLUMN,
|
|
173
|
+
name: sourceFilenameColumn.nameInProducer,
|
|
174
|
+
index: regularColumns.length, // Index after all regular columns
|
|
175
|
+
hidden: null,
|
|
176
|
+
type: (_m = (_l = sourceFilenameColumn.dimension) === null || _l === void 0 ? void 0 : _l.type) !== null && _m !== void 0 ? _m : 'string'
|
|
177
|
+
});
|
|
178
|
+
}
|
|
127
179
|
return {
|
|
128
|
-
dimensions
|
|
129
|
-
var _a, _b, _c;
|
|
130
|
-
return ({
|
|
131
|
-
key: (_a = x.aliasInProducer) !== null && _a !== void 0 ? _a : x.nameInProducer,
|
|
132
|
-
name: x.nameInProducer,
|
|
133
|
-
index: i,
|
|
134
|
-
hidden: null,
|
|
135
|
-
type: (_c = (_b = x.dimension) === null || _b === void 0 ? void 0 : _b.type) !== null && _c !== void 0 ? _c : 'string'
|
|
136
|
-
});
|
|
137
|
-
}),
|
|
180
|
+
dimensions,
|
|
138
181
|
delimiter: delimiterChar
|
|
139
182
|
};
|
|
140
183
|
}
|
|
141
184
|
else {
|
|
142
|
-
const delimiterChar = (
|
|
185
|
+
const delimiterChar = (_o = producer.settings.delimiter) !== null && _o !== void 0 ? _o : ',';
|
|
143
186
|
const rawDimensions = ParseManager_1.default._extractHeader(firstLine, delimiterChar, producer, discover);
|
|
144
187
|
return {
|
|
145
188
|
dimensions: rawDimensions.map(x => ({
|
|
@@ -4,19 +4,21 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
7
|
+
const CSVParser_1 = __importDefault(require("../parsing/CSVParser"));
|
|
7
8
|
const TypeCaster_1 = __importDefault(require("../transform/TypeCaster"));
|
|
8
9
|
class DatasetRecord {
|
|
9
10
|
constructor(row, dimensions, delimiter) {
|
|
10
11
|
this.parse = (row, delimiter, dimensions) => {
|
|
11
12
|
if (!this.isEmpty() && dimensions.length > 0) {
|
|
12
|
-
const parts =
|
|
13
|
+
const parts = CSVParser_1.default.parseRow(row, delimiter);
|
|
13
14
|
for (let i = 0; i < dimensions.length; i++) {
|
|
14
15
|
const dim = dimensions[i];
|
|
15
|
-
|
|
16
|
+
// Use dim.index to get the correct column from the file, not the loop index
|
|
17
|
+
this._value[dim.name] = TypeCaster_1.default.cast(parts[dim.index], dim.type, dim.format);
|
|
16
18
|
}
|
|
17
19
|
}
|
|
18
20
|
};
|
|
19
|
-
this.stringify = () => this._dimensions.map(x => this._value[x.name]).join(this._delimiter);
|
|
21
|
+
this.stringify = () => this._dimensions.map(x => `"${this._value[x.name]}"`).join(this._delimiter);
|
|
20
22
|
this.isEmpty = () => { var _a; return ((_a = this._row) === null || _a === void 0 ? void 0 : _a.trim().length) === 0; };
|
|
21
23
|
this.getRaw = () => this._row;
|
|
22
24
|
this.getValue = (dimension) => this._value[dimension];
|
|
@@ -112,7 +112,8 @@ class ExecutionPlannerClas {
|
|
|
112
112
|
}
|
|
113
113
|
case 'local':
|
|
114
114
|
case 'aws-s3':
|
|
115
|
-
case 'delta-share':
|
|
115
|
+
case 'delta-share':
|
|
116
|
+
case 'http-api': {
|
|
116
117
|
plan.push({ type: 'load-dataset', producer });
|
|
117
118
|
plan.push({ type: 'prepare-dataset', producer });
|
|
118
119
|
if (producer.dimensions.some(x => { var _a, _b; return ((_a = x.alias) === null || _a === void 0 ? void 0 : _a.includes('{')) || ((_b = x.alias) === null || _b === void 0 ? void 0 : _b.includes('[')); }))
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
7
|
+
class CSVParserClass {
|
|
8
|
+
constructor() {
|
|
9
|
+
this.parseRow = (row, delimiter) => {
|
|
10
|
+
(0, Affirm_1.default)(row, 'Invalid row');
|
|
11
|
+
(0, Affirm_1.default)(delimiter, 'Invalid delimiter');
|
|
12
|
+
const fields = [];
|
|
13
|
+
let currentField = '';
|
|
14
|
+
let inQuotes = false;
|
|
15
|
+
let i = 0;
|
|
16
|
+
while (i < row.length) {
|
|
17
|
+
const char = row[i];
|
|
18
|
+
const nextChar = row[i + 1];
|
|
19
|
+
if (char === '"') {
|
|
20
|
+
if (!inQuotes) {
|
|
21
|
+
// Starting a quoted field
|
|
22
|
+
inQuotes = true;
|
|
23
|
+
}
|
|
24
|
+
else if (nextChar === '"') {
|
|
25
|
+
// Escaped quote (double quotes within quoted field)
|
|
26
|
+
currentField += '"';
|
|
27
|
+
i++; // Skip the next quote
|
|
28
|
+
}
|
|
29
|
+
else {
|
|
30
|
+
// Ending a quoted field
|
|
31
|
+
inQuotes = false;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
else if (char === delimiter && !inQuotes) {
|
|
35
|
+
// Field separator found outside quotes
|
|
36
|
+
fields.push(currentField.trim());
|
|
37
|
+
currentField = '';
|
|
38
|
+
}
|
|
39
|
+
else if (char === '\r' || char === '\n') {
|
|
40
|
+
// Handle line endings - only break if not in quotes
|
|
41
|
+
if (!inQuotes) {
|
|
42
|
+
break;
|
|
43
|
+
}
|
|
44
|
+
currentField += char;
|
|
45
|
+
}
|
|
46
|
+
else {
|
|
47
|
+
// Regular character
|
|
48
|
+
currentField += char;
|
|
49
|
+
}
|
|
50
|
+
i++;
|
|
51
|
+
}
|
|
52
|
+
// Add the last field
|
|
53
|
+
fields.push(currentField.trim());
|
|
54
|
+
return fields;
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
const CSVParser = new CSVParserClass();
|
|
59
|
+
exports.default = CSVParser;
|
|
@@ -6,29 +6,46 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
6
6
|
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
7
7
|
const Environment_1 = __importDefault(require("../Environment"));
|
|
8
8
|
const FileCompiler_1 = __importDefault(require("../file/FileCompiler"));
|
|
9
|
+
const CSVParser_1 = __importDefault(require("./CSVParser"));
|
|
10
|
+
const Constants_1 = __importDefault(require("../../Constants"));
|
|
9
11
|
class ParseManagerClass {
|
|
10
12
|
constructor() {
|
|
11
13
|
this._extractHeader = (headerLine, delimiter, producer, discover) => {
|
|
12
|
-
var _a, _b, _c;
|
|
14
|
+
var _a, _b, _c, _d, _e, _f;
|
|
13
15
|
(0, Affirm_1.default)(headerLine, `Invalid CSV header line for producer "${producer.name}"`);
|
|
14
16
|
(0, Affirm_1.default)(delimiter, 'Invalid CSV delimiter');
|
|
15
17
|
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
16
18
|
const source = Environment_1.default.getSource(producer.source);
|
|
17
19
|
let columns = FileCompiler_1.default.compileProducer(producer, source);
|
|
18
|
-
const headerColumns =
|
|
20
|
+
const headerColumns = CSVParser_1.default.parseRow(headerLine, delimiter).map(x => x.trim());
|
|
19
21
|
// If I'm discovering the file, then it means that the dimensions are not set, so I use the ones that I get from the file directly
|
|
20
22
|
if (discover)
|
|
21
23
|
columns = headerColumns.map(x => ({ nameInProducer: x }));
|
|
22
24
|
const csvColumns = [];
|
|
23
25
|
for (const pColumn of columns) {
|
|
24
|
-
|
|
26
|
+
// Skip sourceFilename dimensions - they don't exist in the source file
|
|
27
|
+
// They are added dynamically by the driver when reading the file
|
|
28
|
+
if (((_a = pColumn.dimension) === null || _a === void 0 ? void 0 : _a.sourceFilename) === true) {
|
|
29
|
+
// Find the index of $source_filename in the header (it was added by the driver)
|
|
30
|
+
const sourceFilenameIndex = headerColumns.findIndex(x => x === Constants_1.default.SOURCE_FILENAME_COLUMN);
|
|
31
|
+
if (sourceFilenameIndex > -1) {
|
|
32
|
+
csvColumns.push({
|
|
33
|
+
index: sourceFilenameIndex,
|
|
34
|
+
name: Constants_1.default.SOURCE_FILENAME_COLUMN,
|
|
35
|
+
saveAs: pColumn.nameInProducer,
|
|
36
|
+
type: (_c = (_b = pColumn.dimension) === null || _b === void 0 ? void 0 : _b.type) !== null && _c !== void 0 ? _c : 'string'
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
const columnKey = (_d = pColumn.aliasInProducer) !== null && _d !== void 0 ? _d : pColumn.nameInProducer;
|
|
25
42
|
const csvColumnIndex = headerColumns.findIndex(x => x === columnKey);
|
|
26
43
|
(0, Affirm_1.default)(csvColumnIndex > -1, `The column "${pColumn.nameInProducer}" (with key "${columnKey}") of producer "${producer.name}" doesn't exist in the underlying dataset.`);
|
|
27
44
|
csvColumns.push({
|
|
28
45
|
index: csvColumnIndex,
|
|
29
46
|
name: columnKey,
|
|
30
47
|
saveAs: pColumn.nameInProducer,
|
|
31
|
-
type: (
|
|
48
|
+
type: (_f = (_e = pColumn.dimension) === null || _e === void 0 ? void 0 : _e.type) !== null && _f !== void 0 ? _f : 'string'
|
|
32
49
|
});
|
|
33
50
|
}
|
|
34
51
|
return csvColumns;
|
|
@@ -65,7 +65,7 @@ class ProducerEngineClass {
|
|
|
65
65
|
}
|
|
66
66
|
});
|
|
67
67
|
this.readFile = (producer, options) => __awaiter(this, void 0, void 0, function* () {
|
|
68
|
-
var _a;
|
|
68
|
+
var _a, _b, _c;
|
|
69
69
|
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
70
70
|
(0, Affirm_1.default)(options, 'Invalid options');
|
|
71
71
|
if (options.readmode === 'lines')
|
|
@@ -79,16 +79,25 @@ class ProducerEngineClass {
|
|
|
79
79
|
let lines = [];
|
|
80
80
|
switch (options.readmode) {
|
|
81
81
|
case 'lines':
|
|
82
|
-
lines = yield driver.readLinesInRange({
|
|
82
|
+
lines = yield driver.readLinesInRange({
|
|
83
|
+
fileKey,
|
|
84
|
+
fileType,
|
|
85
|
+
options: { lineFrom: options.lines.from, lineTo: options.lines.to, sheetName, hasHeaderRow },
|
|
86
|
+
httpApi: (_a = producer.settings) === null || _a === void 0 ? void 0 : _a.httpApi
|
|
87
|
+
});
|
|
83
88
|
break;
|
|
84
89
|
case 'all':
|
|
85
|
-
lines = yield driver.readAll({
|
|
90
|
+
lines = yield driver.readAll({
|
|
91
|
+
fileKey, fileType,
|
|
92
|
+
options: { sheetName, hasHeaderRow },
|
|
93
|
+
httpApi: (_b = producer.settings) === null || _b === void 0 ? void 0 : _b.httpApi
|
|
94
|
+
});
|
|
86
95
|
break;
|
|
87
96
|
case 'download':
|
|
88
97
|
dataset = yield driver.download(dataset);
|
|
89
98
|
break;
|
|
90
99
|
}
|
|
91
|
-
switch ((
|
|
100
|
+
switch ((_c = producer.settings.fileType) === null || _c === void 0 ? void 0 : _c.toUpperCase()) {
|
|
92
101
|
case 'CSV':
|
|
93
102
|
case 'TXT':
|
|
94
103
|
return { data: lines, dataset, dataType: 'lines-of-text' };
|
|
@@ -124,9 +124,8 @@ class CronScheduler {
|
|
|
124
124
|
return __awaiter(this, void 0, void 0, function* () {
|
|
125
125
|
try {
|
|
126
126
|
console.log(`Executing CRON job for consumer "${consumer.name}" output ${outputIndex}`);
|
|
127
|
-
const user = UserManager_1.default.
|
|
128
|
-
|
|
129
|
-
const result = yield ConsumerEngine_1.default.execute(consumer, {}, user);
|
|
127
|
+
const user = UserManager_1.default.getRemoraWorkerUser();
|
|
128
|
+
const result = yield ConsumerEngine_1.default.execute(consumer, {}, { _id: user._id, name: user.name, type: 'actor' }, { invokedBy: 'CRON' });
|
|
130
129
|
console.log(`CRON job completed successfully for consumer "${consumer.name}" output ${outputIndex}`);
|
|
131
130
|
// Log execution statistics
|
|
132
131
|
if (result && result._stats) {
|
|
@@ -198,9 +198,8 @@ class QueueManager {
|
|
|
198
198
|
}
|
|
199
199
|
}
|
|
200
200
|
console.log(`Processing queue message for consumer "${mapping.consumer.name}" output ${mapping.outputIndex}`);
|
|
201
|
-
const user = UserManager_1.default.
|
|
202
|
-
|
|
203
|
-
const result = yield ConsumerEngine_1.default.execute(mapping.consumer, {}, user);
|
|
201
|
+
const user = UserManager_1.default.getRemoraWorkerUser();
|
|
202
|
+
const result = yield ConsumerEngine_1.default.execute(mapping.consumer, {}, { _id: user._id, name: user.name, type: 'actor' }, { invokedBy: 'QUEUE' });
|
|
204
203
|
console.log(`Queue trigger completed successfully for consumer "${mapping.consumer.name}" output ${mapping.outputIndex}`);
|
|
205
204
|
// Log execution statistics
|
|
206
205
|
if (result && result._stats) {
|
|
@@ -15,6 +15,8 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
15
15
|
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
16
16
|
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
17
17
|
const TypeCaster_1 = __importDefault(require("./TypeCaster"));
|
|
18
|
+
const CryptoEngine_1 = __importDefault(require("../CryptoEngine"));
|
|
19
|
+
const DeveloperEngine_1 = __importDefault(require("../ai/DeveloperEngine"));
|
|
18
20
|
class TransformationEngineClass {
|
|
19
21
|
constructor() {
|
|
20
22
|
this.apply = (consumer, dataset, options) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -237,6 +239,9 @@ class TransformationEngineClass {
|
|
|
237
239
|
return fieldValues.join(separator);
|
|
238
240
|
}
|
|
239
241
|
}
|
|
242
|
+
if ('mask' in transformations) {
|
|
243
|
+
return this.applyMasking(value, transformations.mask, field);
|
|
244
|
+
}
|
|
240
245
|
if ('conditional' in transformations) {
|
|
241
246
|
for (const clause of transformations.conditional.clauses) {
|
|
242
247
|
if (this.evaluateCondition(value, clause.if)) {
|
|
@@ -274,6 +279,19 @@ class TransformationEngineClass {
|
|
|
274
279
|
}
|
|
275
280
|
return false;
|
|
276
281
|
};
|
|
282
|
+
this.applyMasking = (value, maskType, field) => {
|
|
283
|
+
if (!Algo_1.default.hasVal(value))
|
|
284
|
+
return value;
|
|
285
|
+
if (maskType === 'none')
|
|
286
|
+
return value;
|
|
287
|
+
const valueType = DeveloperEngine_1.default.inferDimensionType(value);
|
|
288
|
+
try {
|
|
289
|
+
return CryptoEngine_1.default.hashValue(maskType, String(value), valueType);
|
|
290
|
+
}
|
|
291
|
+
catch (error) {
|
|
292
|
+
throw new Error(`Failed to apply masking transformation '${maskType}' to field '${field.key}': ${error.message}`);
|
|
293
|
+
}
|
|
294
|
+
};
|
|
277
295
|
this.applyDimensionsChanges = (transformations, field, dataset) => {
|
|
278
296
|
if (Array.isArray(transformations)) {
|
|
279
297
|
for (const transform of transformations) {
|
|
@@ -18,14 +18,16 @@ class UsageManagerClass {
|
|
|
18
18
|
const now = DSTE_1.default.now();
|
|
19
19
|
return `${consumer.name}_${now.getUTCFullYear()}_${now.getUTCMonth()}_${now.getUTCDate()}`.toLowerCase();
|
|
20
20
|
};
|
|
21
|
-
this.startUsage = (consumer, user) => {
|
|
21
|
+
this.startUsage = (consumer, user, details) => {
|
|
22
|
+
var _a;
|
|
22
23
|
const newUsage = {
|
|
23
24
|
_id: Helper_1.default.uuid(),
|
|
24
25
|
consumer: consumer.name,
|
|
25
26
|
startedAt: DSTE_1.default.now(),
|
|
26
|
-
executedBy:
|
|
27
|
+
executedBy: user,
|
|
27
28
|
itemsCount: -1,
|
|
28
29
|
status: 'started',
|
|
30
|
+
invokedBy: (_a = details === null || details === void 0 ? void 0 : details.invokedBy) !== null && _a !== void 0 ? _a : 'UNKNOWN',
|
|
29
31
|
_signature: ''
|
|
30
32
|
};
|
|
31
33
|
if (Helper_1.default.isDev())
|
|
@@ -51,6 +51,23 @@ class ValidatorClass {
|
|
|
51
51
|
errors.push(`Missing parameter "source" in producer`);
|
|
52
52
|
if (producer.dimensions.some(x => x.name.includes('{') || x.name.includes('[')))
|
|
53
53
|
errors.push(`Invalid dimension name found in producer "${producer.name}": can't use characters "{" or "[" in dimension names`);
|
|
54
|
+
// Validate sourceFilename dimension usage
|
|
55
|
+
const sourceFilenameDimensions = producer.dimensions.filter(x => x.sourceFilename === true);
|
|
56
|
+
if (sourceFilenameDimensions.length > 1) {
|
|
57
|
+
errors.push(`Producer "${producer.name}" has multiple dimensions with sourceFilename=true. Only one dimension can have this flag.`);
|
|
58
|
+
}
|
|
59
|
+
if (sourceFilenameDimensions.length > 0) {
|
|
60
|
+
const source = Environment_1.default.getSource(producer.source);
|
|
61
|
+
if (source) {
|
|
62
|
+
const validEngines = ['local', 'aws-s3'];
|
|
63
|
+
if (!validEngines.includes(source.engine)) {
|
|
64
|
+
errors.push(`Producer "${producer.name}" has a dimension with sourceFilename=true but the source engine "${source.engine}" doesn't support this feature. Only "local" and "aws-s3" sources support sourceFilename.`);
|
|
65
|
+
}
|
|
66
|
+
if (!producer.settings.fileKey && !producer.settings.fileType) {
|
|
67
|
+
errors.push(`Producer "${producer.name}" has a dimension with sourceFilename=true but is not a file-based producer. sourceFilename requires fileKey and fileType to be set.`);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
54
71
|
}
|
|
55
72
|
catch (e) {
|
|
56
73
|
if (errors.length === 0)
|
package/index.js
CHANGED
|
@@ -17,6 +17,8 @@ const create_consumer_1 = require("./actions/create_consumer");
|
|
|
17
17
|
const Constants_1 = __importDefault(require("./Constants"));
|
|
18
18
|
const LicenceManager_1 = __importDefault(require("./licencing/LicenceManager"));
|
|
19
19
|
const Runtime_1 = __importDefault(require("./helper/Runtime"));
|
|
20
|
+
const automap_1 = require("./actions/automap");
|
|
21
|
+
const sample_1 = require("./actions/sample");
|
|
20
22
|
dotenv_1.default.configDotenv();
|
|
21
23
|
const program = new commander_1.Command();
|
|
22
24
|
// Validate the remora licence
|
|
@@ -75,9 +77,27 @@ program
|
|
|
75
77
|
.command('create-producer <name>')
|
|
76
78
|
.description('Create a new producer configuration with default settings')
|
|
77
79
|
.action(create_producer_1.create_producer);
|
|
80
|
+
program
|
|
81
|
+
.command('automap')
|
|
82
|
+
.description('Automatically map a producer to consumers using specified schemas.')
|
|
83
|
+
.argument('<producer>', 'The producer to analyze')
|
|
84
|
+
.argument('<schemas...>', 'One or more schema names to map against')
|
|
85
|
+
.action(automap_1.automap);
|
|
78
86
|
program
|
|
79
87
|
.command('create-consumer <name>')
|
|
80
88
|
.description('Create a new consumer configuration with default settings')
|
|
81
89
|
.option('-p, --producer <name>', 'Producer to create a one-to-one mapping from')
|
|
82
90
|
.action((name, options) => (0, create_consumer_1.create_consumer)(name, options.producer));
|
|
91
|
+
program
|
|
92
|
+
.command('sample <name>')
|
|
93
|
+
.description('Sample data from a producer or consumer and display it in a formatted table')
|
|
94
|
+
.option('-s, --size <number>', 'Number of sample rows to display (default: 10)', '10')
|
|
95
|
+
.action((name, options) => {
|
|
96
|
+
const sampleSize = parseInt(options.size, 10);
|
|
97
|
+
if (isNaN(sampleSize) || sampleSize <= 0) {
|
|
98
|
+
console.error('Sample size must be a positive number');
|
|
99
|
+
process.exit(1);
|
|
100
|
+
}
|
|
101
|
+
(0, sample_1.sample)(name, sampleSize);
|
|
102
|
+
});
|
|
83
103
|
program.parse(process.argv);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@forzalabs/remora",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "A powerful CLI tool for seamless data translation.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"private": false,
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
"copy-static-file": "npx tsx ./scripts/CopyStaticFile.js",
|
|
23
23
|
"build": "npm i && npm run sync && tsc --outDir .build && npm run copy-static-file",
|
|
24
24
|
"fast-build": "tsc --outDir .build",
|
|
25
|
-
"upload": "npm run build && cd .build && npm publish --
|
|
25
|
+
"upload": "npm run build && cd .build && npm publish --access=public"
|
|
26
26
|
},
|
|
27
27
|
"keywords": [
|
|
28
28
|
"nextjs",
|
|
@@ -56,6 +56,7 @@
|
|
|
56
56
|
"mongodb": "^6.15.0",
|
|
57
57
|
"next": "^13.4.1",
|
|
58
58
|
"node-cron": "^4.2.1",
|
|
59
|
+
"openai": "^6.0.0",
|
|
59
60
|
"ora": "^5.4.1",
|
|
60
61
|
"react": "^18.2.0",
|
|
61
62
|
"react-dom": "^18.2.0",
|