@forzalabs/remora 0.0.27 → 0.0.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +1 -1
- package/definitions/json_schemas/producer-schema.json +17 -4
- package/drivers/DriverFactory.js +6 -40
- package/drivers/LocalDriver.js +80 -49
- package/drivers/S3Driver.js +160 -2
- package/engines/CryptoEngine.js +20 -1
- package/engines/ParseManager.js +20 -3
- package/engines/ProducerEngine.js +33 -17
- package/engines/RandomEngine.js +35 -0
- package/engines/consumer/PostProcessor.js +7 -5
- package/engines/execution/ExecutionPlanner.js +2 -1
- package/package.json +4 -1
package/Constants.js
CHANGED
|
@@ -68,9 +68,11 @@
|
|
|
68
68
|
"enum": [
|
|
69
69
|
"hash",
|
|
70
70
|
"mask",
|
|
71
|
-
"crypt"
|
|
71
|
+
"crypt",
|
|
72
|
+
"random",
|
|
73
|
+
"seeded-random"
|
|
72
74
|
],
|
|
73
|
-
"description": "Masking type to apply to this dimension"
|
|
75
|
+
"description": "Masking type to apply to this dimension. 'hash' replaces with a hashed value. 'mask' replaces characters with a mask character. 'crypt' encrypts the value. 'random' replaces with a random value. 'seeded-random' replaces with a random value generated from a seed."
|
|
74
76
|
}
|
|
75
77
|
},
|
|
76
78
|
"required": [
|
|
@@ -132,13 +134,24 @@
|
|
|
132
134
|
"enum": [
|
|
133
135
|
"JSON",
|
|
134
136
|
"JSONL",
|
|
135
|
-
"CSV"
|
|
137
|
+
"CSV",
|
|
138
|
+
"TXT",
|
|
139
|
+
"XLS",
|
|
140
|
+
"XLSX"
|
|
136
141
|
],
|
|
137
142
|
"description": "The type of file to read"
|
|
138
143
|
},
|
|
139
144
|
"delimiter": {
|
|
140
145
|
"type": "string",
|
|
141
|
-
"description": "The column delimiter for
|
|
146
|
+
"description": "The column delimiter for CSV or TXT files if different from the default (,)."
|
|
147
|
+
},
|
|
148
|
+
"hasHeaderRow": {
|
|
149
|
+
"type": "boolean",
|
|
150
|
+
"description": "For TXT files, specifies whether the file has a header row containing column names. Defaults to true."
|
|
151
|
+
},
|
|
152
|
+
"sheetName": {
|
|
153
|
+
"type": "string",
|
|
154
|
+
"description": "For Excel files (.xls/.xlsx), specifies the name of the sheet to read data from. If not specified, the first sheet will be used."
|
|
142
155
|
}
|
|
143
156
|
},
|
|
144
157
|
"additionalProperties": false
|
package/drivers/DriverFactory.js
CHANGED
|
@@ -1,37 +1,4 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
-
if (k2 === undefined) k2 = k;
|
|
4
|
-
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
-
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
-
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
-
}
|
|
8
|
-
Object.defineProperty(o, k2, desc);
|
|
9
|
-
}) : (function(o, m, k, k2) {
|
|
10
|
-
if (k2 === undefined) k2 = k;
|
|
11
|
-
o[k2] = m[k];
|
|
12
|
-
}));
|
|
13
|
-
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
-
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
-
}) : function(o, v) {
|
|
16
|
-
o["default"] = v;
|
|
17
|
-
});
|
|
18
|
-
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
-
var ownKeys = function(o) {
|
|
20
|
-
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
-
var ar = [];
|
|
22
|
-
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
-
return ar;
|
|
24
|
-
};
|
|
25
|
-
return ownKeys(o);
|
|
26
|
-
};
|
|
27
|
-
return function (mod) {
|
|
28
|
-
if (mod && mod.__esModule) return mod;
|
|
29
|
-
var result = {};
|
|
30
|
-
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
-
__setModuleDefault(result, mod);
|
|
32
|
-
return result;
|
|
33
|
-
};
|
|
34
|
-
})();
|
|
35
2
|
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
36
3
|
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
37
4
|
return new (P || (P = Promise))(function (resolve, reject) {
|
|
@@ -45,10 +12,9 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
45
12
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
46
13
|
};
|
|
47
14
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
48
|
-
const LocalDriver_1 =
|
|
15
|
+
const LocalDriver_1 = require("./LocalDriver");
|
|
49
16
|
const RedshiftDriver_1 = __importDefault(require("./RedshiftDriver"));
|
|
50
|
-
const S3Driver_1 =
|
|
51
|
-
const S3SourceDriver_1 = __importDefault(require("./S3SourceDriver"));
|
|
17
|
+
const S3Driver_1 = require("./S3Driver");
|
|
52
18
|
class DriverFactoryClass {
|
|
53
19
|
constructor() {
|
|
54
20
|
this.instantiateSource = (source) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -60,12 +26,12 @@ class DriverFactoryClass {
|
|
|
60
26
|
return driver;
|
|
61
27
|
}
|
|
62
28
|
case 'aws-s3': {
|
|
63
|
-
const driver = new
|
|
29
|
+
const driver = new S3Driver_1.S3SourceDriver();
|
|
64
30
|
yield driver.init(source);
|
|
65
31
|
return driver;
|
|
66
32
|
}
|
|
67
33
|
case 'local': {
|
|
68
|
-
const driver = new LocalDriver_1.
|
|
34
|
+
const driver = new LocalDriver_1.LocalSourceDriver();
|
|
69
35
|
yield driver.init(source);
|
|
70
36
|
return driver;
|
|
71
37
|
}
|
|
@@ -76,12 +42,12 @@ class DriverFactoryClass {
|
|
|
76
42
|
switch (source.engine) {
|
|
77
43
|
// TODO: implement all the other engines
|
|
78
44
|
case 'aws-s3': {
|
|
79
|
-
const driver = new S3Driver_1.
|
|
45
|
+
const driver = new S3Driver_1.S3DestinationDriver();
|
|
80
46
|
yield driver.init(source);
|
|
81
47
|
return driver;
|
|
82
48
|
}
|
|
83
49
|
case 'local': {
|
|
84
|
-
const driver = new LocalDriver_1.
|
|
50
|
+
const driver = new LocalDriver_1.LocalDestinationDriver();
|
|
85
51
|
yield driver.init(source);
|
|
86
52
|
return driver;
|
|
87
53
|
}
|
package/drivers/LocalDriver.js
CHANGED
|
@@ -52,12 +52,14 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
52
52
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
53
53
|
};
|
|
54
54
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
55
|
-
exports.
|
|
55
|
+
exports.LocalDestinationDriver = exports.LocalSourceDriver = void 0;
|
|
56
56
|
const fs = __importStar(require("fs"));
|
|
57
57
|
const path_1 = __importDefault(require("path"));
|
|
58
58
|
const readline_1 = __importDefault(require("readline"));
|
|
59
59
|
const Affirm_1 = __importDefault(require("../core/Affirm"));
|
|
60
|
-
|
|
60
|
+
const Algo_1 = __importDefault(require("../core/Algo"));
|
|
61
|
+
const xlsx_1 = __importDefault(require("xlsx"));
|
|
62
|
+
class LocalSourceDriver {
|
|
61
63
|
constructor() {
|
|
62
64
|
this.init = (source) => __awaiter(this, void 0, void 0, function* () {
|
|
63
65
|
const fileURL = source.authentication['path'];
|
|
@@ -68,79 +70,109 @@ class LocalDriver {
|
|
|
68
70
|
return this;
|
|
69
71
|
});
|
|
70
72
|
this.download = (request) => __awaiter(this, void 0, void 0, function* () {
|
|
71
|
-
var _a, e_1, _b, _c;
|
|
72
73
|
(0, Affirm_1.default)(this._path, `Invalid path`);
|
|
73
74
|
(0, Affirm_1.default)(request, `Invalid download request`);
|
|
74
75
|
(0, Affirm_1.default)(request.fileKey, `Invalid file key for download request`);
|
|
75
|
-
|
|
76
|
-
const
|
|
77
|
-
const
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
76
|
+
(0, Affirm_1.default)(request.fileType, `Invalid file type for download request`);
|
|
77
|
+
const { fileKey, options } = request;
|
|
78
|
+
const fileUrl = path_1.default.join(this._path, fileKey);
|
|
79
|
+
switch (request.fileType) {
|
|
80
|
+
case 'CSV':
|
|
81
|
+
case 'JSON':
|
|
82
|
+
case 'JSONL':
|
|
83
|
+
case 'TXT':
|
|
84
|
+
return yield this._readLines(fileUrl);
|
|
85
|
+
case 'XLS':
|
|
86
|
+
case 'XLSX':
|
|
87
|
+
return yield this._readExcelLines(fileUrl, options === null || options === void 0 ? void 0 : options.sheetName);
|
|
86
88
|
}
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
89
|
+
});
|
|
90
|
+
this.readLinesInRange = (request) => __awaiter(this, void 0, void 0, function* () {
|
|
91
|
+
(0, Affirm_1.default)(this._path, `Invalid path`);
|
|
92
|
+
(0, Affirm_1.default)(request, 'Invalid read options');
|
|
93
|
+
(0, Affirm_1.default)(request.fileKey, 'Invalid file key');
|
|
94
|
+
(0, Affirm_1.default)(request.fileType, `Invalid file type`);
|
|
95
|
+
(0, Affirm_1.default)(request.options, `Invalid request options`);
|
|
96
|
+
Affirm_1.default.hasValue(request.options.lineFrom, `Invalid request options line from`);
|
|
97
|
+
Affirm_1.default.hasValue(request.options.lineTo, `Invalid request options line to`);
|
|
98
|
+
const { fileKey, fileType, options: { lineFrom, lineTo, sheetName } } = request;
|
|
99
|
+
const fileUrl = path_1.default.join(this._path, fileKey);
|
|
100
|
+
switch (fileType) {
|
|
101
|
+
case 'CSV':
|
|
102
|
+
case 'JSON':
|
|
103
|
+
case 'JSONL':
|
|
104
|
+
case 'TXT':
|
|
105
|
+
return yield this._readLines(fileUrl, lineFrom, lineTo);
|
|
106
|
+
case 'XLS':
|
|
107
|
+
case 'XLSX':
|
|
108
|
+
return yield this._readExcelLines(fileUrl, sheetName, lineFrom, lineTo);
|
|
93
109
|
}
|
|
94
|
-
reader.close();
|
|
95
|
-
stream.close();
|
|
96
|
-
return lines;
|
|
97
110
|
});
|
|
98
|
-
this.
|
|
99
|
-
var _a, e_2, _b, _c;
|
|
111
|
+
this.exist = (producer) => __awaiter(this, void 0, void 0, function* () {
|
|
100
112
|
(0, Affirm_1.default)(this._path, `Invalid path`);
|
|
101
|
-
(0, Affirm_1.default)(
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
const
|
|
105
|
-
|
|
113
|
+
(0, Affirm_1.default)(producer, `Invalid producer`);
|
|
114
|
+
const fileKey = producer.settings.fileKey;
|
|
115
|
+
(0, Affirm_1.default)(fileKey, `Invalid file key for download request`);
|
|
116
|
+
const fileUrl = path_1.default.join(this._path, fileKey);
|
|
117
|
+
return fs.existsSync(fileUrl);
|
|
118
|
+
});
|
|
119
|
+
this._readLines = (fileUri, lineFrom, lineTo) => __awaiter(this, void 0, void 0, function* () {
|
|
120
|
+
var _a, e_1, _b, _c;
|
|
121
|
+
const stream = fs.createReadStream(fileUri);
|
|
106
122
|
const reader = readline_1.default.createInterface({ input: stream, crlfDelay: Infinity });
|
|
107
123
|
const lines = [];
|
|
108
124
|
let lineCounter = 0;
|
|
109
125
|
try {
|
|
110
|
-
for (var _d = true,
|
|
111
|
-
_c =
|
|
126
|
+
for (var _d = true, reader_1 = __asyncValues(reader), reader_1_1; reader_1_1 = yield reader_1.next(), _a = reader_1_1.done, !_a; _d = true) {
|
|
127
|
+
_c = reader_1_1.value;
|
|
112
128
|
_d = false;
|
|
113
129
|
const line = _c;
|
|
114
|
-
if (
|
|
130
|
+
if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo)) {
|
|
131
|
+
if (lineCounter >= lineFrom && lineCounter < lineTo) {
|
|
132
|
+
lines.push(line);
|
|
133
|
+
}
|
|
134
|
+
lineCounter++;
|
|
135
|
+
if (lineCounter >= lineTo)
|
|
136
|
+
break;
|
|
137
|
+
}
|
|
138
|
+
else {
|
|
115
139
|
lines.push(line);
|
|
116
140
|
}
|
|
117
|
-
lineCounter++;
|
|
118
|
-
if (lineCounter >= lineTo)
|
|
119
|
-
break;
|
|
120
141
|
}
|
|
121
142
|
}
|
|
122
|
-
catch (
|
|
143
|
+
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
123
144
|
finally {
|
|
124
145
|
try {
|
|
125
|
-
if (!_d && !_a && (_b =
|
|
146
|
+
if (!_d && !_a && (_b = reader_1.return)) yield _b.call(reader_1);
|
|
126
147
|
}
|
|
127
|
-
finally { if (
|
|
148
|
+
finally { if (e_1) throw e_1.error; }
|
|
128
149
|
}
|
|
129
150
|
reader.close();
|
|
130
151
|
stream.close();
|
|
131
152
|
return lines;
|
|
132
153
|
});
|
|
133
|
-
this.
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
154
|
+
this._readExcelLines = (fileUri, sheetName, lineFrom, lineTo) => __awaiter(this, void 0, void 0, function* () {
|
|
155
|
+
const excel = xlsx_1.default.readFile(fileUri);
|
|
156
|
+
let targetSheetName = sheetName;
|
|
157
|
+
if (!targetSheetName) {
|
|
158
|
+
(0, Affirm_1.default)(excel.SheetNames.length > 0, 'The Excel file has no sheets.');
|
|
159
|
+
targetSheetName = excel.SheetNames[0];
|
|
160
|
+
}
|
|
161
|
+
else {
|
|
162
|
+
(0, Affirm_1.default)(excel.SheetNames.includes(targetSheetName), `The sheet "${targetSheetName}" doesn't exist in the excel (available: ${excel.SheetNames.join(', ')})`);
|
|
163
|
+
}
|
|
164
|
+
const sheet = excel.Sheets[targetSheetName];
|
|
165
|
+
const csv = xlsx_1.default.utils.sheet_to_csv(sheet);
|
|
166
|
+
const lines = csv.split('\n');
|
|
167
|
+
if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo))
|
|
168
|
+
return lines.slice(lineFrom, lineTo + 1);
|
|
169
|
+
else
|
|
170
|
+
return lines;
|
|
140
171
|
});
|
|
141
172
|
}
|
|
142
173
|
}
|
|
143
|
-
|
|
174
|
+
exports.LocalSourceDriver = LocalSourceDriver;
|
|
175
|
+
class LocalDestinationDriver {
|
|
144
176
|
constructor() {
|
|
145
177
|
this.init = (source) => __awaiter(this, void 0, void 0, function* () {
|
|
146
178
|
(0, Affirm_1.default)(source, `Invalid source`);
|
|
@@ -206,5 +238,4 @@ class LocalDriverDestination {
|
|
|
206
238
|
});
|
|
207
239
|
}
|
|
208
240
|
}
|
|
209
|
-
exports.
|
|
210
|
-
exports.default = LocalDriver;
|
|
241
|
+
exports.LocalDestinationDriver = LocalDestinationDriver;
|
package/drivers/S3Driver.js
CHANGED
|
@@ -8,14 +8,25 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
8
8
|
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
9
|
});
|
|
10
10
|
};
|
|
11
|
+
var __asyncValues = (this && this.__asyncValues) || function (o) {
|
|
12
|
+
if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
|
|
13
|
+
var m = o[Symbol.asyncIterator], i;
|
|
14
|
+
return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
|
|
15
|
+
function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
|
|
16
|
+
function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
|
|
17
|
+
};
|
|
11
18
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
19
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
20
|
};
|
|
14
21
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
22
|
+
exports.S3SourceDriver = exports.S3DestinationDriver = void 0;
|
|
15
23
|
const client_s3_1 = require("@aws-sdk/client-s3");
|
|
16
24
|
const Affirm_1 = __importDefault(require("../core/Affirm"));
|
|
17
25
|
const SecretManager_1 = __importDefault(require("../engines/SecretManager"));
|
|
18
|
-
|
|
26
|
+
const readline_1 = __importDefault(require("readline"));
|
|
27
|
+
const Algo_1 = __importDefault(require("../core/Algo"));
|
|
28
|
+
const xlsx_1 = __importDefault(require("xlsx"));
|
|
29
|
+
class S3DestinationDriver {
|
|
19
30
|
constructor() {
|
|
20
31
|
this.init = (source) => __awaiter(this, void 0, void 0, function* () {
|
|
21
32
|
this._bucketName = source.authentication['bucket'];
|
|
@@ -99,4 +110,151 @@ class S3Driver {
|
|
|
99
110
|
});
|
|
100
111
|
}
|
|
101
112
|
}
|
|
102
|
-
exports.
|
|
113
|
+
exports.S3DestinationDriver = S3DestinationDriver;
|
|
114
|
+
class S3SourceDriver {
|
|
115
|
+
constructor() {
|
|
116
|
+
this.init = (source) => __awaiter(this, void 0, void 0, function* () {
|
|
117
|
+
this._bucketName = source.authentication['bucket'];
|
|
118
|
+
const sessionToken = SecretManager_1.default.replaceSecret(source.authentication['sessionToken']);
|
|
119
|
+
const config = {
|
|
120
|
+
region: source.authentication['region'],
|
|
121
|
+
credentials: {
|
|
122
|
+
accessKeyId: SecretManager_1.default.replaceSecret(source.authentication['accessKey']),
|
|
123
|
+
secretAccessKey: SecretManager_1.default.replaceSecret(source.authentication['secretKey']),
|
|
124
|
+
sessionToken: sessionToken ? sessionToken : undefined
|
|
125
|
+
}
|
|
126
|
+
};
|
|
127
|
+
this._client = new client_s3_1.S3Client(config);
|
|
128
|
+
// TODO: is there a way to test if the connection was successful? like a query or scan that I can do?
|
|
129
|
+
return this;
|
|
130
|
+
});
|
|
131
|
+
this.download = (request) => __awaiter(this, void 0, void 0, function* () {
|
|
132
|
+
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "connect()" first');
|
|
133
|
+
(0, Affirm_1.default)(request, `Invalid download request`);
|
|
134
|
+
(0, Affirm_1.default)(request.fileKey, `Invalid file key for download request`);
|
|
135
|
+
const { fileKey, fileType, options } = request;
|
|
136
|
+
const bucket = this._bucketName;
|
|
137
|
+
const response = yield this._client.send(new client_s3_1.GetObjectCommand({
|
|
138
|
+
Bucket: bucket,
|
|
139
|
+
Key: fileKey
|
|
140
|
+
}));
|
|
141
|
+
(0, Affirm_1.default)(response.Body, 'Failed to fetch object from S3');
|
|
142
|
+
const stream = response.Body;
|
|
143
|
+
switch (fileType) {
|
|
144
|
+
case 'CSV':
|
|
145
|
+
case 'JSON':
|
|
146
|
+
case 'JSONL':
|
|
147
|
+
case 'TXT':
|
|
148
|
+
return yield this._readLines(stream);
|
|
149
|
+
case 'XLS':
|
|
150
|
+
case 'XLSX':
|
|
151
|
+
return yield this._readExcelLines(stream, options === null || options === void 0 ? void 0 : options.sheetName);
|
|
152
|
+
}
|
|
153
|
+
});
|
|
154
|
+
this.readLinesInRange = (request) => __awaiter(this, void 0, void 0, function* () {
|
|
155
|
+
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "connect()" first');
|
|
156
|
+
(0, Affirm_1.default)(request, 'Invalid read request');
|
|
157
|
+
(0, Affirm_1.default)(request.options, 'Invalid read request options');
|
|
158
|
+
const { fileKey, fileType, options: { sheetName, lineFrom, lineTo } } = request;
|
|
159
|
+
const bucket = this._bucketName;
|
|
160
|
+
const response = yield this._client.send(new client_s3_1.GetObjectCommand({
|
|
161
|
+
Bucket: bucket,
|
|
162
|
+
Key: fileKey
|
|
163
|
+
}));
|
|
164
|
+
(0, Affirm_1.default)(response.Body, 'Failed to fetch object from S3');
|
|
165
|
+
const stream = response.Body;
|
|
166
|
+
switch (fileType) {
|
|
167
|
+
case 'CSV':
|
|
168
|
+
case 'JSON':
|
|
169
|
+
case 'JSONL':
|
|
170
|
+
case 'TXT':
|
|
171
|
+
return yield this._readLines(stream, lineFrom, lineTo);
|
|
172
|
+
case 'XLS':
|
|
173
|
+
case 'XLSX':
|
|
174
|
+
return yield this._readExcelLines(stream, sheetName, lineFrom, lineTo);
|
|
175
|
+
}
|
|
176
|
+
});
|
|
177
|
+
this.exist = (producer) => __awaiter(this, void 0, void 0, function* () {
|
|
178
|
+
var _a;
|
|
179
|
+
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "connect()" first');
|
|
180
|
+
(0, Affirm_1.default)(producer, 'Invalid read producer');
|
|
181
|
+
const bucket = this._bucketName;
|
|
182
|
+
const fileKey = producer.settings.fileKey;
|
|
183
|
+
(0, Affirm_1.default)(fileKey, `Invalid file key for download request`);
|
|
184
|
+
try {
|
|
185
|
+
yield this._client.send(new client_s3_1.HeadObjectCommand({ Bucket: bucket, Key: fileKey }));
|
|
186
|
+
return true;
|
|
187
|
+
}
|
|
188
|
+
catch (error) {
|
|
189
|
+
if (((_a = error.$metadata) === null || _a === void 0 ? void 0 : _a.httpStatusCode) === 404 || error.name === 'NotFound')
|
|
190
|
+
return false;
|
|
191
|
+
throw error;
|
|
192
|
+
}
|
|
193
|
+
});
|
|
194
|
+
this._readLines = (stream, lineFrom, lineTo) => __awaiter(this, void 0, void 0, function* () {
|
|
195
|
+
var _a, e_1, _b, _c;
|
|
196
|
+
const reader = readline_1.default.createInterface({ input: stream, crlfDelay: Infinity });
|
|
197
|
+
const lines = [];
|
|
198
|
+
let lineCounter = 0;
|
|
199
|
+
try {
|
|
200
|
+
for (var _d = true, reader_1 = __asyncValues(reader), reader_1_1; reader_1_1 = yield reader_1.next(), _a = reader_1_1.done, !_a; _d = true) {
|
|
201
|
+
_c = reader_1_1.value;
|
|
202
|
+
_d = false;
|
|
203
|
+
const line = _c;
|
|
204
|
+
if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo)) {
|
|
205
|
+
if (lineCounter >= lineFrom && lineCounter < lineTo) {
|
|
206
|
+
lines.push(line);
|
|
207
|
+
}
|
|
208
|
+
lineCounter++;
|
|
209
|
+
if (lineCounter >= lineTo)
|
|
210
|
+
break;
|
|
211
|
+
}
|
|
212
|
+
else {
|
|
213
|
+
lines.push(line);
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
218
|
+
finally {
|
|
219
|
+
try {
|
|
220
|
+
if (!_d && !_a && (_b = reader_1.return)) yield _b.call(reader_1);
|
|
221
|
+
}
|
|
222
|
+
finally { if (e_1) throw e_1.error; }
|
|
223
|
+
}
|
|
224
|
+
reader.close();
|
|
225
|
+
return lines;
|
|
226
|
+
});
|
|
227
|
+
this._readExcelLines = (stream, sheetName, lineFrom, lineTo) => __awaiter(this, void 0, void 0, function* () {
|
|
228
|
+
var _a, stream_1, stream_1_1;
|
|
229
|
+
var _b, e_2, _c, _d;
|
|
230
|
+
(0, Affirm_1.default)(sheetName, `Invalid sheetname`);
|
|
231
|
+
const chunks = [];
|
|
232
|
+
try {
|
|
233
|
+
for (_a = true, stream_1 = __asyncValues(stream); stream_1_1 = yield stream_1.next(), _b = stream_1_1.done, !_b; _a = true) {
|
|
234
|
+
_d = stream_1_1.value;
|
|
235
|
+
_a = false;
|
|
236
|
+
const chunk = _d;
|
|
237
|
+
chunks.push(chunk);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
catch (e_2_1) { e_2 = { error: e_2_1 }; }
|
|
241
|
+
finally {
|
|
242
|
+
try {
|
|
243
|
+
if (!_a && !_b && (_c = stream_1.return)) yield _c.call(stream_1);
|
|
244
|
+
}
|
|
245
|
+
finally { if (e_2) throw e_2.error; }
|
|
246
|
+
}
|
|
247
|
+
const buffer = Buffer.concat(chunks);
|
|
248
|
+
const excel = xlsx_1.default.read(buffer, { type: 'buffer' });
|
|
249
|
+
(0, Affirm_1.default)(excel.SheetNames.includes(sheetName), `The sheet "${sheetName}" doesn't exist in the excel (available: ${excel.SheetNames.join(', ')})`);
|
|
250
|
+
const sheet = excel.Sheets[sheetName];
|
|
251
|
+
const csv = xlsx_1.default.utils.sheet_to_csv(sheet);
|
|
252
|
+
const lines = csv.split('\n');
|
|
253
|
+
if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo))
|
|
254
|
+
return lines.slice(lineFrom, lineTo + 1);
|
|
255
|
+
else
|
|
256
|
+
return lines;
|
|
257
|
+
});
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
exports.S3SourceDriver = S3SourceDriver;
|
package/engines/CryptoEngine.js
CHANGED
|
@@ -5,6 +5,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
const crypto_1 = __importDefault(require("crypto"));
|
|
7
7
|
const Algo_1 = __importDefault(require("../core/Algo"));
|
|
8
|
+
const RandomEngine_1 = __importDefault(require("./RandomEngine"));
|
|
8
9
|
class CryptoEngineClass {
|
|
9
10
|
constructor() {
|
|
10
11
|
this.hashQuery = (maskType, fieldReference, fieldName) => {
|
|
@@ -24,7 +25,7 @@ class CryptoEngineClass {
|
|
|
24
25
|
this.valueToHash = (value) => {
|
|
25
26
|
return crypto_1.default.createHash('sha256').update(JSON.stringify(value)).digest('hex');
|
|
26
27
|
};
|
|
27
|
-
this.hashValue = (maskType, value) => {
|
|
28
|
+
this.hashValue = (maskType, value, valueType) => {
|
|
28
29
|
if (!Algo_1.default.hasVal(value))
|
|
29
30
|
return value;
|
|
30
31
|
if (!Algo_1.default.hasVal(maskType))
|
|
@@ -32,6 +33,24 @@ class CryptoEngineClass {
|
|
|
32
33
|
switch (maskType) {
|
|
33
34
|
case 'hash':
|
|
34
35
|
return this.valueToHash(value);
|
|
36
|
+
case 'random': {
|
|
37
|
+
switch (valueType) {
|
|
38
|
+
case 'datetime': return RandomEngine_1.default.rngDate();
|
|
39
|
+
case 'number': return RandomEngine_1.default.rng();
|
|
40
|
+
case 'string': return this.valueToHash(value);
|
|
41
|
+
default:
|
|
42
|
+
throw new Error('Not implemented yet');
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
case 'seeded-random': {
|
|
46
|
+
switch (valueType) {
|
|
47
|
+
case 'datetime': return RandomEngine_1.default.sRngDate(value);
|
|
48
|
+
case 'number': return RandomEngine_1.default.sRng(value);
|
|
49
|
+
case 'string': return this.valueToHash(value);
|
|
50
|
+
default:
|
|
51
|
+
throw new Error('Not implemented yet');
|
|
52
|
+
}
|
|
53
|
+
}
|
|
35
54
|
case 'crypt':
|
|
36
55
|
throw new Error('Not implemented yet');
|
|
37
56
|
case 'mask':
|
package/engines/ParseManager.js
CHANGED
|
@@ -18,9 +18,9 @@ class ParseManagerClass {
|
|
|
18
18
|
(0, Affirm_1.default)(lines, 'Invalid csv lines');
|
|
19
19
|
Affirm_1.default.hasValue(lines.length, 'Invalid csv lines length');
|
|
20
20
|
const delimiterChar = (_a = producer.settings.delimiter) !== null && _a !== void 0 ? _a : ',';
|
|
21
|
-
const
|
|
22
|
-
const
|
|
23
|
-
const
|
|
21
|
+
const { header, records } = this._getClassifiedRows(lines, delimiterChar, producer);
|
|
22
|
+
const headerColumns = this._extractHeader(header, delimiterChar, producer, discover);
|
|
23
|
+
const rows = records.map(x => x.split(delimiterChar).map(k => k.trim()));
|
|
24
24
|
const result = [];
|
|
25
25
|
for (const row of rows) {
|
|
26
26
|
const rowObject = {};
|
|
@@ -32,6 +32,23 @@ class ParseManagerClass {
|
|
|
32
32
|
}
|
|
33
33
|
return result;
|
|
34
34
|
};
|
|
35
|
+
this._getClassifiedRows = (lines, delimiterChar, producer) => {
|
|
36
|
+
if (producer.settings.fileType === 'TXT' && !producer.settings.hasHeaderRow) {
|
|
37
|
+
// If the file is a TXT and there isn't an header row, then I add a fake one that maps directly to the producer
|
|
38
|
+
const source = Environment_1.default.getSource(producer.source);
|
|
39
|
+
const columns = FileCompiler_1.default.compileProducer(producer, source);
|
|
40
|
+
return {
|
|
41
|
+
header: columns.map(x => x.nameInProducer).join(delimiterChar),
|
|
42
|
+
records: lines
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
return {
|
|
47
|
+
header: lines[0],
|
|
48
|
+
records: lines.slice(1)
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
};
|
|
35
52
|
this._extractHeader = (headerLine, delimiter, producer, discover) => {
|
|
36
53
|
var _a;
|
|
37
54
|
(0, Affirm_1.default)(headerLine, `Invalid CSV header line for producer "${producer.name}"`);
|
|
@@ -91,19 +91,24 @@ class ProducerEngineClass {
|
|
|
91
91
|
(0, Affirm_1.default)(source, `No source found for producer "${producer.name}" with name "${producer.source}"`);
|
|
92
92
|
const driver = yield DriverFactory_1.default.instantiateSource(source);
|
|
93
93
|
(0, Affirm_1.default)(driver, `No driver found for producer "${producer.name}" with driver type "${source.engine}"`);
|
|
94
|
+
const { settings: { fileKey, fileType, sheetName } } = producer;
|
|
94
95
|
let lines = [];
|
|
95
96
|
if (options.readmode === 'lines')
|
|
96
|
-
lines = yield driver.readLinesInRange({ fileKey
|
|
97
|
+
lines = yield driver.readLinesInRange({ fileKey, fileType, options: { lineFrom: options.lines.from, lineTo: options.lines.to, sheetName } });
|
|
97
98
|
else
|
|
98
|
-
lines = yield driver.download({ fileKey:
|
|
99
|
+
lines = yield driver.download({ fileKey, fileType, options: { sheetName } });
|
|
99
100
|
switch ((_a = producer.settings.fileType) === null || _a === void 0 ? void 0 : _a.toUpperCase()) {
|
|
100
|
-
case 'CSV':
|
|
101
|
+
case 'CSV':
|
|
102
|
+
case 'TXT':
|
|
103
|
+
return { data: lines, dataType: 'lines-of-text' };
|
|
104
|
+
case 'XLS':
|
|
105
|
+
case 'XLSX':
|
|
101
106
|
return { data: lines, dataType: 'lines-of-text' };
|
|
102
|
-
}
|
|
103
107
|
case 'JSONL':
|
|
104
108
|
case 'JSON': {
|
|
105
|
-
if (lines.length === 1)
|
|
109
|
+
if (lines.length === 1) {
|
|
106
110
|
lines = lines[0].split('\n');
|
|
111
|
+
}
|
|
107
112
|
const json = lines.map(x => JSON.parse(x));
|
|
108
113
|
return { data: json, dataType: 'array-of-json' };
|
|
109
114
|
}
|
|
@@ -112,7 +117,7 @@ class ProducerEngineClass {
|
|
|
112
117
|
}
|
|
113
118
|
});
|
|
114
119
|
this.readSampleData = (producer_1, ...args_1) => __awaiter(this, [producer_1, ...args_1], void 0, function* (producer, sampleSize = 10, discover = false) {
|
|
115
|
-
var _a
|
|
120
|
+
var _a;
|
|
116
121
|
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
117
122
|
(0, Affirm_1.default)(sampleSize > 0, 'Sample size must be greater than 0');
|
|
118
123
|
const source = Environment_1.default.getSource(producer.source);
|
|
@@ -131,18 +136,29 @@ class ProducerEngineClass {
|
|
|
131
136
|
case 'local':
|
|
132
137
|
case 'aws-s3': {
|
|
133
138
|
const fileData = yield this.readFile(producer, { readmode: 'lines', lines: { from: 0, to: sampleSize } });
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
139
|
+
const fileType = (_a = producer.settings.fileType) === null || _a === void 0 ? void 0 : _a.toUpperCase();
|
|
140
|
+
switch (fileType) {
|
|
141
|
+
case 'CSV':
|
|
142
|
+
case 'TXT':
|
|
143
|
+
case 'XLS':
|
|
144
|
+
case 'XLSX': {
|
|
145
|
+
sampleData = ParseManager_1.default.csvLinesToJson(fileData.data, producer, discover);
|
|
146
|
+
break;
|
|
147
|
+
}
|
|
148
|
+
case 'JSON':
|
|
149
|
+
case 'JSONL': {
|
|
150
|
+
// With JSON or JSONL the readFile function already parses the strings
|
|
151
|
+
if (typeof fileData.data[0] === 'object')
|
|
152
|
+
sampleData = fileData.data;
|
|
153
|
+
else
|
|
154
|
+
sampleData = fileData.data.map(line => JSON.parse(line));
|
|
155
|
+
sampleData = sampleData.slice(0, sampleSize);
|
|
156
|
+
break;
|
|
157
|
+
}
|
|
158
|
+
default: {
|
|
140
159
|
sampleData = fileData.data;
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
}
|
|
144
|
-
else {
|
|
145
|
-
sampleData = fileData.data;
|
|
160
|
+
break;
|
|
161
|
+
}
|
|
146
162
|
}
|
|
147
163
|
break;
|
|
148
164
|
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const seedrandom_1 = __importDefault(require("seedrandom"));
|
|
7
|
+
const Algo_1 = __importDefault(require("../core/Algo"));
|
|
8
|
+
class RandomEngineClass {
|
|
9
|
+
constructor() {
|
|
10
|
+
this.rng = (min, max) => {
|
|
11
|
+
const rng = Math.random;
|
|
12
|
+
if (Algo_1.default.hasVal(min) || Algo_1.default.hasVal(max))
|
|
13
|
+
return Math.floor(rng() * (max - min + 1)) + min;
|
|
14
|
+
else
|
|
15
|
+
return rng();
|
|
16
|
+
};
|
|
17
|
+
this.sRng = (seed, min, max) => {
|
|
18
|
+
const rng = (0, seedrandom_1.default)(String(seed));
|
|
19
|
+
if (Algo_1.default.hasVal(min) || Algo_1.default.hasVal(max))
|
|
20
|
+
return Math.floor(rng() * (max - min + 1)) + min;
|
|
21
|
+
else
|
|
22
|
+
return rng();
|
|
23
|
+
};
|
|
24
|
+
this.rngDate = (min, max) => {
|
|
25
|
+
const randomNumber = this.rng(min, max);
|
|
26
|
+
return new Date(randomNumber).toJSON();
|
|
27
|
+
};
|
|
28
|
+
this.sRngDate = (seed, min, max) => {
|
|
29
|
+
const randomNumber = this.sRng(seed, min, max);
|
|
30
|
+
return new Date(randomNumber).toJSON();
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
const RandomEngine = new RandomEngineClass();
|
|
35
|
+
exports.default = RandomEngine;
|
|
@@ -33,15 +33,16 @@ class PostProcessorClass {
|
|
|
33
33
|
const groups = Algo_1.default.groupBy(items, groupingRule.groupingKey);
|
|
34
34
|
const projections = [];
|
|
35
35
|
groups.forEach(gItems => {
|
|
36
|
-
var _a;
|
|
36
|
+
var _a, _b, _c;
|
|
37
37
|
const projected = {};
|
|
38
38
|
const first = gItems[0];
|
|
39
39
|
for (const field of allFields) {
|
|
40
40
|
const { key, alias, grouping } = field.cField;
|
|
41
41
|
const fieldKey = alias !== null && alias !== void 0 ? alias : key;
|
|
42
42
|
const maskType = (_a = field.dimension) === null || _a === void 0 ? void 0 : _a.mask;
|
|
43
|
+
const fieldType = (_c = (_b = field.dimension) === null || _b === void 0 ? void 0 : _b.type) !== null && _c !== void 0 ? _c : 'string';
|
|
43
44
|
if (!field.cField.grouping) {
|
|
44
|
-
projected[fieldKey] = CryptoEngine_1.default.hashValue(maskType, first[fieldKey]);
|
|
45
|
+
projected[fieldKey] = CryptoEngine_1.default.hashValue(maskType, first[fieldKey], fieldType);
|
|
45
46
|
}
|
|
46
47
|
else {
|
|
47
48
|
const { subFields } = grouping;
|
|
@@ -64,7 +65,7 @@ class PostProcessorClass {
|
|
|
64
65
|
const fieldType = (_c = (_b = field.dimension) === null || _b === void 0 ? void 0 : _b.type) !== null && _c !== void 0 ? _c : 'string';
|
|
65
66
|
const fieldValue = this._getFieldValue(x, field);
|
|
66
67
|
if (Algo_1.default.hasVal(maskType))
|
|
67
|
-
projected[fieldKey] = CryptoEngine_1.default.hashValue(maskType, fieldValue);
|
|
68
|
+
projected[fieldKey] = CryptoEngine_1.default.hashValue(maskType, fieldValue, fieldType);
|
|
68
69
|
else
|
|
69
70
|
projected[fieldKey] = TypeCaster_1.default.cast(fieldValue, fieldType);
|
|
70
71
|
}
|
|
@@ -87,9 +88,10 @@ class PostProcessorClass {
|
|
|
87
88
|
const columns = FileCompiler_1.default.compileProducer(producer, source);
|
|
88
89
|
(0, Affirm_1.default)(columns, `Invalid columns from compilation for producer "${producer.name}"`);
|
|
89
90
|
const unpackDimension = (item, dimension) => {
|
|
90
|
-
var _a;
|
|
91
|
+
var _a, _b, _c;
|
|
91
92
|
const { nameInProducer, aliasInProducer } = dimension;
|
|
92
93
|
const maskType = (_a = dimension.dimension.mask) !== null && _a !== void 0 ? _a : undefined;
|
|
94
|
+
const fieldType = (_c = (_b = dimension.dimension) === null || _b === void 0 ? void 0 : _b.type) !== null && _c !== void 0 ? _c : 'string';
|
|
93
95
|
const keys = aliasInProducer.split('.');
|
|
94
96
|
let prevValue = item;
|
|
95
97
|
for (const key of keys) {
|
|
@@ -114,7 +116,7 @@ class PostProcessorClass {
|
|
|
114
116
|
prevValue = prevValue === null || prevValue === void 0 ? void 0 : prevValue[key];
|
|
115
117
|
}
|
|
116
118
|
}
|
|
117
|
-
prevValue = CryptoEngine_1.default.hashValue(maskType, prevValue);
|
|
119
|
+
prevValue = CryptoEngine_1.default.hashValue(maskType, prevValue, fieldType);
|
|
118
120
|
const res = { [nameInProducer]: prevValue };
|
|
119
121
|
return res;
|
|
120
122
|
};
|
|
@@ -104,7 +104,8 @@ class ExecutionPlannerClas {
|
|
|
104
104
|
plan.push({ type: 'read-file-lines', producer, lines: { from: (_a = options.offset) !== null && _a !== void 0 ? _a : 0, to: options.limit ? (options.offset + options.limit) : undefined } });
|
|
105
105
|
else
|
|
106
106
|
plan.push({ type: 'read-file-whole', producer });
|
|
107
|
-
|
|
107
|
+
const fileType = (_b = producer.settings.fileType) === null || _b === void 0 ? void 0 : _b.toUpperCase();
|
|
108
|
+
if (fileType === 'CSV' || fileType === 'TXT' || fileType === 'XLS' || fileType === 'XLSX')
|
|
108
109
|
plan.push({ type: 'csv-to-json', producer });
|
|
109
110
|
if (producer.dimensions.some(x => { var _a, _b; return ((_a = x.alias) === null || _a === void 0 ? void 0 : _a.includes('{')) || ((_b = x.alias) === null || _b === void 0 ? void 0 : _b.includes('[')); }))
|
|
110
111
|
plan.push({ type: 'nested-field-unpacking', producer });
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@forzalabs/remora",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.29",
|
|
4
4
|
"description": "A powerful CLI tool for seamless data translation.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"private": false,
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
},
|
|
10
10
|
"scripts": {
|
|
11
11
|
"sync": "cd ../dev_ops && npm run sync",
|
|
12
|
+
"tsc-check": "npx tsc --noemit",
|
|
12
13
|
"init": "npx tsx ./src/index.ts init",
|
|
13
14
|
"version": "npx tsx ./src/index.ts -v",
|
|
14
15
|
"run": "npx tsx ./src/index.ts run",
|
|
@@ -53,6 +54,8 @@
|
|
|
53
54
|
"ora": "^5.4.1",
|
|
54
55
|
"react": "^18.2.0",
|
|
55
56
|
"react-dom": "^18.2.0",
|
|
57
|
+
"seedrandom": "^3.0.5",
|
|
58
|
+
"xlsx": "^0.18.5",
|
|
56
59
|
"zod": "^3.24.2"
|
|
57
60
|
},
|
|
58
61
|
"devDependencies": {
|