@forzalabs/remora 0.0.27 → 0.0.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Constants.js CHANGED
@@ -1,7 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  const CONSTANTS = {
4
- cliVersion: '0.0.27',
4
+ cliVersion: '0.0.29',
5
5
  lambdaVersion: 1,
6
6
  port: 5069,
7
7
  defaults: {
@@ -68,9 +68,11 @@
68
68
  "enum": [
69
69
  "hash",
70
70
  "mask",
71
- "crypt"
71
+ "crypt",
72
+ "random",
73
+ "seeded-random"
72
74
  ],
73
- "description": "Masking type to apply to this dimension"
75
+ "description": "Masking type to apply to this dimension. 'hash' replaces with a hashed value. 'mask' replaces characters with a mask character. 'crypt' encrypts the value. 'random' replaces with a random value. 'seeded-random' replaces with a random value generated from a seed."
74
76
  }
75
77
  },
76
78
  "required": [
@@ -132,13 +134,24 @@
132
134
  "enum": [
133
135
  "JSON",
134
136
  "JSONL",
135
- "CSV"
137
+ "CSV",
138
+ "TXT",
139
+ "XLS",
140
+ "XLSX"
136
141
  ],
137
142
  "description": "The type of file to read"
138
143
  },
139
144
  "delimiter": {
140
145
  "type": "string",
141
- "description": "The column delimiter for the CSV file if different from the default (,)."
146
+ "description": "The column delimiter for CSV or TXT files if different from the default (,)."
147
+ },
148
+ "hasHeaderRow": {
149
+ "type": "boolean",
150
+ "description": "For TXT files, specifies whether the file has a header row containing column names. Defaults to true."
151
+ },
152
+ "sheetName": {
153
+ "type": "string",
154
+ "description": "For Excel files (.xls/.xlsx), specifies the name of the sheet to read data from. If not specified, the first sheet will be used."
142
155
  }
143
156
  },
144
157
  "additionalProperties": false
@@ -1,37 +1,4 @@
1
1
  "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
35
2
  var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
36
3
  function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
37
4
  return new (P || (P = Promise))(function (resolve, reject) {
@@ -45,10 +12,9 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
45
12
  return (mod && mod.__esModule) ? mod : { "default": mod };
46
13
  };
47
14
  Object.defineProperty(exports, "__esModule", { value: true });
48
- const LocalDriver_1 = __importStar(require("./LocalDriver"));
15
+ const LocalDriver_1 = require("./LocalDriver");
49
16
  const RedshiftDriver_1 = __importDefault(require("./RedshiftDriver"));
50
- const S3Driver_1 = __importDefault(require("./S3Driver"));
51
- const S3SourceDriver_1 = __importDefault(require("./S3SourceDriver"));
17
+ const S3Driver_1 = require("./S3Driver");
52
18
  class DriverFactoryClass {
53
19
  constructor() {
54
20
  this.instantiateSource = (source) => __awaiter(this, void 0, void 0, function* () {
@@ -60,12 +26,12 @@ class DriverFactoryClass {
60
26
  return driver;
61
27
  }
62
28
  case 'aws-s3': {
63
- const driver = new S3SourceDriver_1.default();
29
+ const driver = new S3Driver_1.S3SourceDriver();
64
30
  yield driver.init(source);
65
31
  return driver;
66
32
  }
67
33
  case 'local': {
68
- const driver = new LocalDriver_1.default();
34
+ const driver = new LocalDriver_1.LocalSourceDriver();
69
35
  yield driver.init(source);
70
36
  return driver;
71
37
  }
@@ -76,12 +42,12 @@ class DriverFactoryClass {
76
42
  switch (source.engine) {
77
43
  // TODO: implement all the other engines
78
44
  case 'aws-s3': {
79
- const driver = new S3Driver_1.default();
45
+ const driver = new S3Driver_1.S3DestinationDriver();
80
46
  yield driver.init(source);
81
47
  return driver;
82
48
  }
83
49
  case 'local': {
84
- const driver = new LocalDriver_1.LocalDriverDestination();
50
+ const driver = new LocalDriver_1.LocalDestinationDriver();
85
51
  yield driver.init(source);
86
52
  return driver;
87
53
  }
@@ -52,12 +52,14 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
52
52
  return (mod && mod.__esModule) ? mod : { "default": mod };
53
53
  };
54
54
  Object.defineProperty(exports, "__esModule", { value: true });
55
- exports.LocalDriverDestination = void 0;
55
+ exports.LocalDestinationDriver = exports.LocalSourceDriver = void 0;
56
56
  const fs = __importStar(require("fs"));
57
57
  const path_1 = __importDefault(require("path"));
58
58
  const readline_1 = __importDefault(require("readline"));
59
59
  const Affirm_1 = __importDefault(require("../core/Affirm"));
60
- class LocalDriver {
60
+ const Algo_1 = __importDefault(require("../core/Algo"));
61
+ const xlsx_1 = __importDefault(require("xlsx"));
62
+ class LocalSourceDriver {
61
63
  constructor() {
62
64
  this.init = (source) => __awaiter(this, void 0, void 0, function* () {
63
65
  const fileURL = source.authentication['path'];
@@ -68,79 +70,109 @@ class LocalDriver {
68
70
  return this;
69
71
  });
70
72
  this.download = (request) => __awaiter(this, void 0, void 0, function* () {
71
- var _a, e_1, _b, _c;
72
73
  (0, Affirm_1.default)(this._path, `Invalid path`);
73
74
  (0, Affirm_1.default)(request, `Invalid download request`);
74
75
  (0, Affirm_1.default)(request.fileKey, `Invalid file key for download request`);
75
- const fileUrl = path_1.default.join(this._path, request.fileKey);
76
- const stream = fs.createReadStream(fileUrl);
77
- const reader = readline_1.default.createInterface({ input: stream, crlfDelay: Infinity });
78
- const lines = [];
79
- try {
80
- for (var _d = true, reader_1 = __asyncValues(reader), reader_1_1; reader_1_1 = yield reader_1.next(), _a = reader_1_1.done, !_a; _d = true) {
81
- _c = reader_1_1.value;
82
- _d = false;
83
- const line = _c;
84
- lines.push(line);
85
- }
76
+ (0, Affirm_1.default)(request.fileType, `Invalid file type for download request`);
77
+ const { fileKey, options } = request;
78
+ const fileUrl = path_1.default.join(this._path, fileKey);
79
+ switch (request.fileType) {
80
+ case 'CSV':
81
+ case 'JSON':
82
+ case 'JSONL':
83
+ case 'TXT':
84
+ return yield this._readLines(fileUrl);
85
+ case 'XLS':
86
+ case 'XLSX':
87
+ return yield this._readExcelLines(fileUrl, options === null || options === void 0 ? void 0 : options.sheetName);
86
88
  }
87
- catch (e_1_1) { e_1 = { error: e_1_1 }; }
88
- finally {
89
- try {
90
- if (!_d && !_a && (_b = reader_1.return)) yield _b.call(reader_1);
91
- }
92
- finally { if (e_1) throw e_1.error; }
89
+ });
90
+ this.readLinesInRange = (request) => __awaiter(this, void 0, void 0, function* () {
91
+ (0, Affirm_1.default)(this._path, `Invalid path`);
92
+ (0, Affirm_1.default)(request, 'Invalid read options');
93
+ (0, Affirm_1.default)(request.fileKey, 'Invalid file key');
94
+ (0, Affirm_1.default)(request.fileType, `Invalid file type`);
95
+ (0, Affirm_1.default)(request.options, `Invalid request options`);
96
+ Affirm_1.default.hasValue(request.options.lineFrom, `Invalid request options line from`);
97
+ Affirm_1.default.hasValue(request.options.lineTo, `Invalid request options line to`);
98
+ const { fileKey, fileType, options: { lineFrom, lineTo, sheetName } } = request;
99
+ const fileUrl = path_1.default.join(this._path, fileKey);
100
+ switch (fileType) {
101
+ case 'CSV':
102
+ case 'JSON':
103
+ case 'JSONL':
104
+ case 'TXT':
105
+ return yield this._readLines(fileUrl, lineFrom, lineTo);
106
+ case 'XLS':
107
+ case 'XLSX':
108
+ return yield this._readExcelLines(fileUrl, sheetName, lineFrom, lineTo);
93
109
  }
94
- reader.close();
95
- stream.close();
96
- return lines;
97
110
  });
98
- this.readLinesInRange = (readOptions) => __awaiter(this, void 0, void 0, function* () {
99
- var _a, e_2, _b, _c;
111
+ this.exist = (producer) => __awaiter(this, void 0, void 0, function* () {
100
112
  (0, Affirm_1.default)(this._path, `Invalid path`);
101
- (0, Affirm_1.default)(readOptions, 'Invalid read options');
102
- (0, Affirm_1.default)(readOptions.fileKey, 'Invalid file key');
103
- const fileUrl = path_1.default.join(this._path, readOptions.fileKey);
104
- const { lineFrom, lineTo } = readOptions;
105
- const stream = fs.createReadStream(fileUrl);
113
+ (0, Affirm_1.default)(producer, `Invalid producer`);
114
+ const fileKey = producer.settings.fileKey;
115
+ (0, Affirm_1.default)(fileKey, `Invalid file key for download request`);
116
+ const fileUrl = path_1.default.join(this._path, fileKey);
117
+ return fs.existsSync(fileUrl);
118
+ });
119
+ this._readLines = (fileUri, lineFrom, lineTo) => __awaiter(this, void 0, void 0, function* () {
120
+ var _a, e_1, _b, _c;
121
+ const stream = fs.createReadStream(fileUri);
106
122
  const reader = readline_1.default.createInterface({ input: stream, crlfDelay: Infinity });
107
123
  const lines = [];
108
124
  let lineCounter = 0;
109
125
  try {
110
- for (var _d = true, reader_2 = __asyncValues(reader), reader_2_1; reader_2_1 = yield reader_2.next(), _a = reader_2_1.done, !_a; _d = true) {
111
- _c = reader_2_1.value;
126
+ for (var _d = true, reader_1 = __asyncValues(reader), reader_1_1; reader_1_1 = yield reader_1.next(), _a = reader_1_1.done, !_a; _d = true) {
127
+ _c = reader_1_1.value;
112
128
  _d = false;
113
129
  const line = _c;
114
- if (lineCounter >= lineFrom && lineCounter < lineTo) {
130
+ if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo)) {
131
+ if (lineCounter >= lineFrom && lineCounter < lineTo) {
132
+ lines.push(line);
133
+ }
134
+ lineCounter++;
135
+ if (lineCounter >= lineTo)
136
+ break;
137
+ }
138
+ else {
115
139
  lines.push(line);
116
140
  }
117
- lineCounter++;
118
- if (lineCounter >= lineTo)
119
- break;
120
141
  }
121
142
  }
122
- catch (e_2_1) { e_2 = { error: e_2_1 }; }
143
+ catch (e_1_1) { e_1 = { error: e_1_1 }; }
123
144
  finally {
124
145
  try {
125
- if (!_d && !_a && (_b = reader_2.return)) yield _b.call(reader_2);
146
+ if (!_d && !_a && (_b = reader_1.return)) yield _b.call(reader_1);
126
147
  }
127
- finally { if (e_2) throw e_2.error; }
148
+ finally { if (e_1) throw e_1.error; }
128
149
  }
129
150
  reader.close();
130
151
  stream.close();
131
152
  return lines;
132
153
  });
133
- this.exist = (producer) => __awaiter(this, void 0, void 0, function* () {
134
- (0, Affirm_1.default)(this._path, `Invalid path`);
135
- (0, Affirm_1.default)(producer, `Invalid producer`);
136
- const fileKey = producer.settings.fileKey;
137
- (0, Affirm_1.default)(fileKey, `Invalid file key for download request`);
138
- const fileUrl = path_1.default.join(this._path, fileKey);
139
- return fs.existsSync(fileUrl);
154
+ this._readExcelLines = (fileUri, sheetName, lineFrom, lineTo) => __awaiter(this, void 0, void 0, function* () {
155
+ const excel = xlsx_1.default.readFile(fileUri);
156
+ let targetSheetName = sheetName;
157
+ if (!targetSheetName) {
158
+ (0, Affirm_1.default)(excel.SheetNames.length > 0, 'The Excel file has no sheets.');
159
+ targetSheetName = excel.SheetNames[0];
160
+ }
161
+ else {
162
+ (0, Affirm_1.default)(excel.SheetNames.includes(targetSheetName), `The sheet "${targetSheetName}" doesn't exist in the excel (available: ${excel.SheetNames.join(', ')})`);
163
+ }
164
+ const sheet = excel.Sheets[targetSheetName];
165
+ const csv = xlsx_1.default.utils.sheet_to_csv(sheet);
166
+ const lines = csv.split('\n');
167
+ if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo))
168
+ return lines.slice(lineFrom, lineTo + 1);
169
+ else
170
+ return lines;
140
171
  });
141
172
  }
142
173
  }
143
- class LocalDriverDestination {
174
+ exports.LocalSourceDriver = LocalSourceDriver;
175
+ class LocalDestinationDriver {
144
176
  constructor() {
145
177
  this.init = (source) => __awaiter(this, void 0, void 0, function* () {
146
178
  (0, Affirm_1.default)(source, `Invalid source`);
@@ -206,5 +238,4 @@ class LocalDriverDestination {
206
238
  });
207
239
  }
208
240
  }
209
- exports.LocalDriverDestination = LocalDriverDestination;
210
- exports.default = LocalDriver;
241
+ exports.LocalDestinationDriver = LocalDestinationDriver;
@@ -8,14 +8,25 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
8
8
  step((generator = generator.apply(thisArg, _arguments || [])).next());
9
9
  });
10
10
  };
11
+ var __asyncValues = (this && this.__asyncValues) || function (o) {
12
+ if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
13
+ var m = o[Symbol.asyncIterator], i;
14
+ return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
15
+ function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
16
+ function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
17
+ };
11
18
  var __importDefault = (this && this.__importDefault) || function (mod) {
12
19
  return (mod && mod.__esModule) ? mod : { "default": mod };
13
20
  };
14
21
  Object.defineProperty(exports, "__esModule", { value: true });
22
+ exports.S3SourceDriver = exports.S3DestinationDriver = void 0;
15
23
  const client_s3_1 = require("@aws-sdk/client-s3");
16
24
  const Affirm_1 = __importDefault(require("../core/Affirm"));
17
25
  const SecretManager_1 = __importDefault(require("../engines/SecretManager"));
18
- class S3Driver {
26
+ const readline_1 = __importDefault(require("readline"));
27
+ const Algo_1 = __importDefault(require("../core/Algo"));
28
+ const xlsx_1 = __importDefault(require("xlsx"));
29
+ class S3DestinationDriver {
19
30
  constructor() {
20
31
  this.init = (source) => __awaiter(this, void 0, void 0, function* () {
21
32
  this._bucketName = source.authentication['bucket'];
@@ -99,4 +110,151 @@ class S3Driver {
99
110
  });
100
111
  }
101
112
  }
102
- exports.default = S3Driver;
113
+ exports.S3DestinationDriver = S3DestinationDriver;
114
+ class S3SourceDriver {
115
+ constructor() {
116
+ this.init = (source) => __awaiter(this, void 0, void 0, function* () {
117
+ this._bucketName = source.authentication['bucket'];
118
+ const sessionToken = SecretManager_1.default.replaceSecret(source.authentication['sessionToken']);
119
+ const config = {
120
+ region: source.authentication['region'],
121
+ credentials: {
122
+ accessKeyId: SecretManager_1.default.replaceSecret(source.authentication['accessKey']),
123
+ secretAccessKey: SecretManager_1.default.replaceSecret(source.authentication['secretKey']),
124
+ sessionToken: sessionToken ? sessionToken : undefined
125
+ }
126
+ };
127
+ this._client = new client_s3_1.S3Client(config);
128
+ // TODO: is there a way to test if the connection was successful? like a query or scan that I can do?
129
+ return this;
130
+ });
131
+ this.download = (request) => __awaiter(this, void 0, void 0, function* () {
132
+ (0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "connect()" first');
133
+ (0, Affirm_1.default)(request, `Invalid download request`);
134
+ (0, Affirm_1.default)(request.fileKey, `Invalid file key for download request`);
135
+ const { fileKey, fileType, options } = request;
136
+ const bucket = this._bucketName;
137
+ const response = yield this._client.send(new client_s3_1.GetObjectCommand({
138
+ Bucket: bucket,
139
+ Key: fileKey
140
+ }));
141
+ (0, Affirm_1.default)(response.Body, 'Failed to fetch object from S3');
142
+ const stream = response.Body;
143
+ switch (fileType) {
144
+ case 'CSV':
145
+ case 'JSON':
146
+ case 'JSONL':
147
+ case 'TXT':
148
+ return yield this._readLines(stream);
149
+ case 'XLS':
150
+ case 'XLSX':
151
+ return yield this._readExcelLines(stream, options === null || options === void 0 ? void 0 : options.sheetName);
152
+ }
153
+ });
154
+ this.readLinesInRange = (request) => __awaiter(this, void 0, void 0, function* () {
155
+ (0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "connect()" first');
156
+ (0, Affirm_1.default)(request, 'Invalid read request');
157
+ (0, Affirm_1.default)(request.options, 'Invalid read request options');
158
+ const { fileKey, fileType, options: { sheetName, lineFrom, lineTo } } = request;
159
+ const bucket = this._bucketName;
160
+ const response = yield this._client.send(new client_s3_1.GetObjectCommand({
161
+ Bucket: bucket,
162
+ Key: fileKey
163
+ }));
164
+ (0, Affirm_1.default)(response.Body, 'Failed to fetch object from S3');
165
+ const stream = response.Body;
166
+ switch (fileType) {
167
+ case 'CSV':
168
+ case 'JSON':
169
+ case 'JSONL':
170
+ case 'TXT':
171
+ return yield this._readLines(stream, lineFrom, lineTo);
172
+ case 'XLS':
173
+ case 'XLSX':
174
+ return yield this._readExcelLines(stream, sheetName, lineFrom, lineTo);
175
+ }
176
+ });
177
+ this.exist = (producer) => __awaiter(this, void 0, void 0, function* () {
178
+ var _a;
179
+ (0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "connect()" first');
180
+ (0, Affirm_1.default)(producer, 'Invalid read producer');
181
+ const bucket = this._bucketName;
182
+ const fileKey = producer.settings.fileKey;
183
+ (0, Affirm_1.default)(fileKey, `Invalid file key for download request`);
184
+ try {
185
+ yield this._client.send(new client_s3_1.HeadObjectCommand({ Bucket: bucket, Key: fileKey }));
186
+ return true;
187
+ }
188
+ catch (error) {
189
+ if (((_a = error.$metadata) === null || _a === void 0 ? void 0 : _a.httpStatusCode) === 404 || error.name === 'NotFound')
190
+ return false;
191
+ throw error;
192
+ }
193
+ });
194
+ this._readLines = (stream, lineFrom, lineTo) => __awaiter(this, void 0, void 0, function* () {
195
+ var _a, e_1, _b, _c;
196
+ const reader = readline_1.default.createInterface({ input: stream, crlfDelay: Infinity });
197
+ const lines = [];
198
+ let lineCounter = 0;
199
+ try {
200
+ for (var _d = true, reader_1 = __asyncValues(reader), reader_1_1; reader_1_1 = yield reader_1.next(), _a = reader_1_1.done, !_a; _d = true) {
201
+ _c = reader_1_1.value;
202
+ _d = false;
203
+ const line = _c;
204
+ if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo)) {
205
+ if (lineCounter >= lineFrom && lineCounter < lineTo) {
206
+ lines.push(line);
207
+ }
208
+ lineCounter++;
209
+ if (lineCounter >= lineTo)
210
+ break;
211
+ }
212
+ else {
213
+ lines.push(line);
214
+ }
215
+ }
216
+ }
217
+ catch (e_1_1) { e_1 = { error: e_1_1 }; }
218
+ finally {
219
+ try {
220
+ if (!_d && !_a && (_b = reader_1.return)) yield _b.call(reader_1);
221
+ }
222
+ finally { if (e_1) throw e_1.error; }
223
+ }
224
+ reader.close();
225
+ return lines;
226
+ });
227
+ this._readExcelLines = (stream, sheetName, lineFrom, lineTo) => __awaiter(this, void 0, void 0, function* () {
228
+ var _a, stream_1, stream_1_1;
229
+ var _b, e_2, _c, _d;
230
+ (0, Affirm_1.default)(sheetName, `Invalid sheetname`);
231
+ const chunks = [];
232
+ try {
233
+ for (_a = true, stream_1 = __asyncValues(stream); stream_1_1 = yield stream_1.next(), _b = stream_1_1.done, !_b; _a = true) {
234
+ _d = stream_1_1.value;
235
+ _a = false;
236
+ const chunk = _d;
237
+ chunks.push(chunk);
238
+ }
239
+ }
240
+ catch (e_2_1) { e_2 = { error: e_2_1 }; }
241
+ finally {
242
+ try {
243
+ if (!_a && !_b && (_c = stream_1.return)) yield _c.call(stream_1);
244
+ }
245
+ finally { if (e_2) throw e_2.error; }
246
+ }
247
+ const buffer = Buffer.concat(chunks);
248
+ const excel = xlsx_1.default.read(buffer, { type: 'buffer' });
249
+ (0, Affirm_1.default)(excel.SheetNames.includes(sheetName), `The sheet "${sheetName}" doesn't exist in the excel (available: ${excel.SheetNames.join(', ')})`);
250
+ const sheet = excel.Sheets[sheetName];
251
+ const csv = xlsx_1.default.utils.sheet_to_csv(sheet);
252
+ const lines = csv.split('\n');
253
+ if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo))
254
+ return lines.slice(lineFrom, lineTo + 1);
255
+ else
256
+ return lines;
257
+ });
258
+ }
259
+ }
260
+ exports.S3SourceDriver = S3SourceDriver;
@@ -5,6 +5,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  const crypto_1 = __importDefault(require("crypto"));
7
7
  const Algo_1 = __importDefault(require("../core/Algo"));
8
+ const RandomEngine_1 = __importDefault(require("./RandomEngine"));
8
9
  class CryptoEngineClass {
9
10
  constructor() {
10
11
  this.hashQuery = (maskType, fieldReference, fieldName) => {
@@ -24,7 +25,7 @@ class CryptoEngineClass {
24
25
  this.valueToHash = (value) => {
25
26
  return crypto_1.default.createHash('sha256').update(JSON.stringify(value)).digest('hex');
26
27
  };
27
- this.hashValue = (maskType, value) => {
28
+ this.hashValue = (maskType, value, valueType) => {
28
29
  if (!Algo_1.default.hasVal(value))
29
30
  return value;
30
31
  if (!Algo_1.default.hasVal(maskType))
@@ -32,6 +33,24 @@ class CryptoEngineClass {
32
33
  switch (maskType) {
33
34
  case 'hash':
34
35
  return this.valueToHash(value);
36
+ case 'random': {
37
+ switch (valueType) {
38
+ case 'datetime': return RandomEngine_1.default.rngDate();
39
+ case 'number': return RandomEngine_1.default.rng();
40
+ case 'string': return this.valueToHash(value);
41
+ default:
42
+ throw new Error('Not implemented yet');
43
+ }
44
+ }
45
+ case 'seeded-random': {
46
+ switch (valueType) {
47
+ case 'datetime': return RandomEngine_1.default.sRngDate(value);
48
+ case 'number': return RandomEngine_1.default.sRng(value);
49
+ case 'string': return this.valueToHash(value);
50
+ default:
51
+ throw new Error('Not implemented yet');
52
+ }
53
+ }
35
54
  case 'crypt':
36
55
  throw new Error('Not implemented yet');
37
56
  case 'mask':
@@ -18,9 +18,9 @@ class ParseManagerClass {
18
18
  (0, Affirm_1.default)(lines, 'Invalid csv lines');
19
19
  Affirm_1.default.hasValue(lines.length, 'Invalid csv lines length');
20
20
  const delimiterChar = (_a = producer.settings.delimiter) !== null && _a !== void 0 ? _a : ',';
21
- const headerRow = lines[0];
22
- const rows = lines.slice(1).map(x => x.split(delimiterChar).map(k => k.trim()));
23
- const headerColumns = this._extractHeader(headerRow, delimiterChar, producer, discover);
21
+ const { header, records } = this._getClassifiedRows(lines, delimiterChar, producer);
22
+ const headerColumns = this._extractHeader(header, delimiterChar, producer, discover);
23
+ const rows = records.map(x => x.split(delimiterChar).map(k => k.trim()));
24
24
  const result = [];
25
25
  for (const row of rows) {
26
26
  const rowObject = {};
@@ -32,6 +32,23 @@ class ParseManagerClass {
32
32
  }
33
33
  return result;
34
34
  };
35
+ this._getClassifiedRows = (lines, delimiterChar, producer) => {
36
+ if (producer.settings.fileType === 'TXT' && !producer.settings.hasHeaderRow) {
37
+ // If the file is a TXT and there isn't an header row, then I add a fake one that maps directly to the producer
38
+ const source = Environment_1.default.getSource(producer.source);
39
+ const columns = FileCompiler_1.default.compileProducer(producer, source);
40
+ return {
41
+ header: columns.map(x => x.nameInProducer).join(delimiterChar),
42
+ records: lines
43
+ };
44
+ }
45
+ else {
46
+ return {
47
+ header: lines[0],
48
+ records: lines.slice(1)
49
+ };
50
+ }
51
+ };
35
52
  this._extractHeader = (headerLine, delimiter, producer, discover) => {
36
53
  var _a;
37
54
  (0, Affirm_1.default)(headerLine, `Invalid CSV header line for producer "${producer.name}"`);
@@ -91,19 +91,24 @@ class ProducerEngineClass {
91
91
  (0, Affirm_1.default)(source, `No source found for producer "${producer.name}" with name "${producer.source}"`);
92
92
  const driver = yield DriverFactory_1.default.instantiateSource(source);
93
93
  (0, Affirm_1.default)(driver, `No driver found for producer "${producer.name}" with driver type "${source.engine}"`);
94
+ const { settings: { fileKey, fileType, sheetName } } = producer;
94
95
  let lines = [];
95
96
  if (options.readmode === 'lines')
96
- lines = yield driver.readLinesInRange({ fileKey: producer.settings.fileKey, lineFrom: options.lines.from, lineTo: options.lines.to });
97
+ lines = yield driver.readLinesInRange({ fileKey, fileType, options: { lineFrom: options.lines.from, lineTo: options.lines.to, sheetName } });
97
98
  else
98
- lines = yield driver.download({ fileKey: producer.settings.fileKey });
99
+ lines = yield driver.download({ fileKey, fileType, options: { sheetName } });
99
100
  switch ((_a = producer.settings.fileType) === null || _a === void 0 ? void 0 : _a.toUpperCase()) {
100
- case 'CSV': {
101
+ case 'CSV':
102
+ case 'TXT':
103
+ return { data: lines, dataType: 'lines-of-text' };
104
+ case 'XLS':
105
+ case 'XLSX':
101
106
  return { data: lines, dataType: 'lines-of-text' };
102
- }
103
107
  case 'JSONL':
104
108
  case 'JSON': {
105
- if (lines.length === 1)
109
+ if (lines.length === 1) {
106
110
  lines = lines[0].split('\n');
111
+ }
107
112
  const json = lines.map(x => JSON.parse(x));
108
113
  return { data: json, dataType: 'array-of-json' };
109
114
  }
@@ -112,7 +117,7 @@ class ProducerEngineClass {
112
117
  }
113
118
  });
114
119
  this.readSampleData = (producer_1, ...args_1) => __awaiter(this, [producer_1, ...args_1], void 0, function* (producer, sampleSize = 10, discover = false) {
115
- var _a, _b, _c;
120
+ var _a;
116
121
  (0, Affirm_1.default)(producer, 'Invalid producer');
117
122
  (0, Affirm_1.default)(sampleSize > 0, 'Sample size must be greater than 0');
118
123
  const source = Environment_1.default.getSource(producer.source);
@@ -131,18 +136,29 @@ class ProducerEngineClass {
131
136
  case 'local':
132
137
  case 'aws-s3': {
133
138
  const fileData = yield this.readFile(producer, { readmode: 'lines', lines: { from: 0, to: sampleSize } });
134
- if (((_a = producer.settings.fileType) === null || _a === void 0 ? void 0 : _a.toUpperCase()) === 'CSV') {
135
- sampleData = ParseManager_1.default.csvLinesToJson(fileData.data, producer, discover);
136
- }
137
- else if (((_b = producer.settings.fileType) === null || _b === void 0 ? void 0 : _b.toUpperCase()) === 'JSON' || ((_c = producer.settings.fileType) === null || _c === void 0 ? void 0 : _c.toUpperCase()) === 'JSONL') {
138
- // With JSON or JSONL the readFile function already parses the strings
139
- if (typeof fileData.data[0] === 'object')
139
+ const fileType = (_a = producer.settings.fileType) === null || _a === void 0 ? void 0 : _a.toUpperCase();
140
+ switch (fileType) {
141
+ case 'CSV':
142
+ case 'TXT':
143
+ case 'XLS':
144
+ case 'XLSX': {
145
+ sampleData = ParseManager_1.default.csvLinesToJson(fileData.data, producer, discover);
146
+ break;
147
+ }
148
+ case 'JSON':
149
+ case 'JSONL': {
150
+ // With JSON or JSONL the readFile function already parses the strings
151
+ if (typeof fileData.data[0] === 'object')
152
+ sampleData = fileData.data;
153
+ else
154
+ sampleData = fileData.data.map(line => JSON.parse(line));
155
+ sampleData = sampleData.slice(0, sampleSize);
156
+ break;
157
+ }
158
+ default: {
140
159
  sampleData = fileData.data;
141
- else
142
- sampleData = fileData.data.map(line => JSON.parse(line));
143
- }
144
- else {
145
- sampleData = fileData.data;
160
+ break;
161
+ }
146
162
  }
147
163
  break;
148
164
  }
@@ -0,0 +1,35 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ const seedrandom_1 = __importDefault(require("seedrandom"));
7
+ const Algo_1 = __importDefault(require("../core/Algo"));
8
+ class RandomEngineClass {
9
+ constructor() {
10
+ this.rng = (min, max) => {
11
+ const rng = Math.random;
12
+ if (Algo_1.default.hasVal(min) || Algo_1.default.hasVal(max))
13
+ return Math.floor(rng() * (max - min + 1)) + min;
14
+ else
15
+ return rng();
16
+ };
17
+ this.sRng = (seed, min, max) => {
18
+ const rng = (0, seedrandom_1.default)(String(seed));
19
+ if (Algo_1.default.hasVal(min) || Algo_1.default.hasVal(max))
20
+ return Math.floor(rng() * (max - min + 1)) + min;
21
+ else
22
+ return rng();
23
+ };
24
+ this.rngDate = (min, max) => {
25
+ const randomNumber = this.rng(min, max);
26
+ return new Date(randomNumber).toJSON();
27
+ };
28
+ this.sRngDate = (seed, min, max) => {
29
+ const randomNumber = this.sRng(seed, min, max);
30
+ return new Date(randomNumber).toJSON();
31
+ };
32
+ }
33
+ }
34
+ const RandomEngine = new RandomEngineClass();
35
+ exports.default = RandomEngine;
@@ -33,15 +33,16 @@ class PostProcessorClass {
33
33
  const groups = Algo_1.default.groupBy(items, groupingRule.groupingKey);
34
34
  const projections = [];
35
35
  groups.forEach(gItems => {
36
- var _a;
36
+ var _a, _b, _c;
37
37
  const projected = {};
38
38
  const first = gItems[0];
39
39
  for (const field of allFields) {
40
40
  const { key, alias, grouping } = field.cField;
41
41
  const fieldKey = alias !== null && alias !== void 0 ? alias : key;
42
42
  const maskType = (_a = field.dimension) === null || _a === void 0 ? void 0 : _a.mask;
43
+ const fieldType = (_c = (_b = field.dimension) === null || _b === void 0 ? void 0 : _b.type) !== null && _c !== void 0 ? _c : 'string';
43
44
  if (!field.cField.grouping) {
44
- projected[fieldKey] = CryptoEngine_1.default.hashValue(maskType, first[fieldKey]);
45
+ projected[fieldKey] = CryptoEngine_1.default.hashValue(maskType, first[fieldKey], fieldType);
45
46
  }
46
47
  else {
47
48
  const { subFields } = grouping;
@@ -64,7 +65,7 @@ class PostProcessorClass {
64
65
  const fieldType = (_c = (_b = field.dimension) === null || _b === void 0 ? void 0 : _b.type) !== null && _c !== void 0 ? _c : 'string';
65
66
  const fieldValue = this._getFieldValue(x, field);
66
67
  if (Algo_1.default.hasVal(maskType))
67
- projected[fieldKey] = CryptoEngine_1.default.hashValue(maskType, fieldValue);
68
+ projected[fieldKey] = CryptoEngine_1.default.hashValue(maskType, fieldValue, fieldType);
68
69
  else
69
70
  projected[fieldKey] = TypeCaster_1.default.cast(fieldValue, fieldType);
70
71
  }
@@ -87,9 +88,10 @@ class PostProcessorClass {
87
88
  const columns = FileCompiler_1.default.compileProducer(producer, source);
88
89
  (0, Affirm_1.default)(columns, `Invalid columns from compilation for producer "${producer.name}"`);
89
90
  const unpackDimension = (item, dimension) => {
90
- var _a;
91
+ var _a, _b, _c;
91
92
  const { nameInProducer, aliasInProducer } = dimension;
92
93
  const maskType = (_a = dimension.dimension.mask) !== null && _a !== void 0 ? _a : undefined;
94
+ const fieldType = (_c = (_b = dimension.dimension) === null || _b === void 0 ? void 0 : _b.type) !== null && _c !== void 0 ? _c : 'string';
93
95
  const keys = aliasInProducer.split('.');
94
96
  let prevValue = item;
95
97
  for (const key of keys) {
@@ -114,7 +116,7 @@ class PostProcessorClass {
114
116
  prevValue = prevValue === null || prevValue === void 0 ? void 0 : prevValue[key];
115
117
  }
116
118
  }
117
- prevValue = CryptoEngine_1.default.hashValue(maskType, prevValue);
119
+ prevValue = CryptoEngine_1.default.hashValue(maskType, prevValue, fieldType);
118
120
  const res = { [nameInProducer]: prevValue };
119
121
  return res;
120
122
  };
@@ -104,7 +104,8 @@ class ExecutionPlannerClas {
104
104
  plan.push({ type: 'read-file-lines', producer, lines: { from: (_a = options.offset) !== null && _a !== void 0 ? _a : 0, to: options.limit ? (options.offset + options.limit) : undefined } });
105
105
  else
106
106
  plan.push({ type: 'read-file-whole', producer });
107
- if (((_b = producer.settings.fileType) === null || _b === void 0 ? void 0 : _b.toUpperCase()) === 'CSV')
107
+ const fileType = (_b = producer.settings.fileType) === null || _b === void 0 ? void 0 : _b.toUpperCase();
108
+ if (fileType === 'CSV' || fileType === 'TXT' || fileType === 'XLS' || fileType === 'XLSX')
108
109
  plan.push({ type: 'csv-to-json', producer });
109
110
  if (producer.dimensions.some(x => { var _a, _b; return ((_a = x.alias) === null || _a === void 0 ? void 0 : _a.includes('{')) || ((_b = x.alias) === null || _b === void 0 ? void 0 : _b.includes('[')); }))
110
111
  plan.push({ type: 'nested-field-unpacking', producer });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "0.0.27",
3
+ "version": "0.0.29",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,
@@ -9,6 +9,7 @@
9
9
  },
10
10
  "scripts": {
11
11
  "sync": "cd ../dev_ops && npm run sync",
12
+ "tsc-check": "npx tsc --noemit",
12
13
  "init": "npx tsx ./src/index.ts init",
13
14
  "version": "npx tsx ./src/index.ts -v",
14
15
  "run": "npx tsx ./src/index.ts run",
@@ -53,6 +54,8 @@
53
54
  "ora": "^5.4.1",
54
55
  "react": "^18.2.0",
55
56
  "react-dom": "^18.2.0",
57
+ "seedrandom": "^3.0.5",
58
+ "xlsx": "^0.18.5",
56
59
  "zod": "^3.24.2"
57
60
  },
58
61
  "devDependencies": {