@forzalabs/remora 0.0.26 → 0.0.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Constants.js CHANGED
@@ -1,7 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  const CONSTANTS = {
4
- cliVersion: '0.0.26',
4
+ cliVersion: '0.0.28',
5
5
  lambdaVersion: 1,
6
6
  port: 5069,
7
7
  defaults: {
@@ -132,13 +132,24 @@
132
132
  "enum": [
133
133
  "JSON",
134
134
  "JSONL",
135
- "CSV"
135
+ "CSV",
136
+ "TXT",
137
+ "XLS",
138
+ "XLSX"
136
139
  ],
137
140
  "description": "The type of file to read"
138
141
  },
139
142
  "delimiter": {
140
143
  "type": "string",
141
- "description": "The column delimiter for the CSV file if different from the default (,)."
144
+ "description": "The column delimiter for CSV or TXT files if different from the default (,)."
145
+ },
146
+ "hasHeaderRow": {
147
+ "type": "boolean",
148
+ "description": "For TXT files, specifies whether the file has a header row containing column names. Defaults to true."
149
+ },
150
+ "sheetName": {
151
+ "type": "string",
152
+ "description": "For Excel files (.xls/.xlsx), specifies the name of the sheet to read data from. If not specified, the first sheet will be used."
142
153
  }
143
154
  },
144
155
  "additionalProperties": false
@@ -97,6 +97,10 @@
97
97
  "clusterId": {
98
98
  "type": "string",
99
99
  "description": "Redshift cluster identifier"
100
+ },
101
+ "path": {
102
+ "type": "string",
103
+ "description": "The folder path"
100
104
  }
101
105
  },
102
106
  "required": ["method"]
@@ -1,37 +1,4 @@
1
1
  "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
35
2
  var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
36
3
  function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
37
4
  return new (P || (P = Promise))(function (resolve, reject) {
@@ -45,10 +12,9 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
45
12
  return (mod && mod.__esModule) ? mod : { "default": mod };
46
13
  };
47
14
  Object.defineProperty(exports, "__esModule", { value: true });
48
- const LocalDriver_1 = __importStar(require("./LocalDriver"));
15
+ const LocalDriver_1 = require("./LocalDriver");
49
16
  const RedshiftDriver_1 = __importDefault(require("./RedshiftDriver"));
50
- const S3Driver_1 = __importDefault(require("./S3Driver"));
51
- const S3SourceDriver_1 = __importDefault(require("./S3SourceDriver"));
17
+ const S3Driver_1 = require("./S3Driver");
52
18
  class DriverFactoryClass {
53
19
  constructor() {
54
20
  this.instantiateSource = (source) => __awaiter(this, void 0, void 0, function* () {
@@ -60,12 +26,12 @@ class DriverFactoryClass {
60
26
  return driver;
61
27
  }
62
28
  case 'aws-s3': {
63
- const driver = new S3SourceDriver_1.default();
29
+ const driver = new S3Driver_1.S3SourceDriver();
64
30
  yield driver.init(source);
65
31
  return driver;
66
32
  }
67
33
  case 'local': {
68
- const driver = new LocalDriver_1.default();
34
+ const driver = new LocalDriver_1.LocalSourceDriver();
69
35
  yield driver.init(source);
70
36
  return driver;
71
37
  }
@@ -76,12 +42,12 @@ class DriverFactoryClass {
76
42
  switch (source.engine) {
77
43
  // TODO: implement all the other engines
78
44
  case 'aws-s3': {
79
- const driver = new S3Driver_1.default();
45
+ const driver = new S3Driver_1.S3DestinationDriver();
80
46
  yield driver.init(source);
81
47
  return driver;
82
48
  }
83
49
  case 'local': {
84
- const driver = new LocalDriver_1.LocalDriverDestination();
50
+ const driver = new LocalDriver_1.LocalDestinationDriver();
85
51
  yield driver.init(source);
86
52
  return driver;
87
53
  }
@@ -52,12 +52,14 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
52
52
  return (mod && mod.__esModule) ? mod : { "default": mod };
53
53
  };
54
54
  Object.defineProperty(exports, "__esModule", { value: true });
55
- exports.LocalDriverDestination = void 0;
55
+ exports.LocalDestinationDriver = exports.LocalSourceDriver = void 0;
56
56
  const fs = __importStar(require("fs"));
57
57
  const path_1 = __importDefault(require("path"));
58
58
  const readline_1 = __importDefault(require("readline"));
59
59
  const Affirm_1 = __importDefault(require("../core/Affirm"));
60
- class LocalDriver {
60
+ const Algo_1 = __importDefault(require("../core/Algo"));
61
+ const xlsx_1 = __importDefault(require("xlsx"));
62
+ class LocalSourceDriver {
61
63
  constructor() {
62
64
  this.init = (source) => __awaiter(this, void 0, void 0, function* () {
63
65
  const fileURL = source.authentication['path'];
@@ -68,79 +70,109 @@ class LocalDriver {
68
70
  return this;
69
71
  });
70
72
  this.download = (request) => __awaiter(this, void 0, void 0, function* () {
71
- var _a, e_1, _b, _c;
72
73
  (0, Affirm_1.default)(this._path, `Invalid path`);
73
74
  (0, Affirm_1.default)(request, `Invalid download request`);
74
75
  (0, Affirm_1.default)(request.fileKey, `Invalid file key for download request`);
75
- const fileUrl = path_1.default.join(this._path, request.fileKey);
76
- const stream = fs.createReadStream(fileUrl);
77
- const reader = readline_1.default.createInterface({ input: stream, crlfDelay: Infinity });
78
- const lines = [];
79
- try {
80
- for (var _d = true, reader_1 = __asyncValues(reader), reader_1_1; reader_1_1 = yield reader_1.next(), _a = reader_1_1.done, !_a; _d = true) {
81
- _c = reader_1_1.value;
82
- _d = false;
83
- const line = _c;
84
- lines.push(line);
85
- }
76
+ (0, Affirm_1.default)(request.fileType, `Invalid file type for download request`);
77
+ const { fileKey, options } = request;
78
+ const fileUrl = path_1.default.join(this._path, fileKey);
79
+ switch (request.fileType) {
80
+ case 'CSV':
81
+ case 'JSON':
82
+ case 'JSONL':
83
+ case 'TXT':
84
+ return yield this._readLines(fileUrl);
85
+ case 'XLS':
86
+ case 'XLSX':
87
+ return yield this._readExcelLines(fileUrl, options === null || options === void 0 ? void 0 : options.sheetName);
86
88
  }
87
- catch (e_1_1) { e_1 = { error: e_1_1 }; }
88
- finally {
89
- try {
90
- if (!_d && !_a && (_b = reader_1.return)) yield _b.call(reader_1);
91
- }
92
- finally { if (e_1) throw e_1.error; }
89
+ });
90
+ this.readLinesInRange = (request) => __awaiter(this, void 0, void 0, function* () {
91
+ (0, Affirm_1.default)(this._path, `Invalid path`);
92
+ (0, Affirm_1.default)(request, 'Invalid read options');
93
+ (0, Affirm_1.default)(request.fileKey, 'Invalid file key');
94
+ (0, Affirm_1.default)(request.fileType, `Invalid file type`);
95
+ (0, Affirm_1.default)(request.options, `Invalid request options`);
96
+ Affirm_1.default.hasValue(request.options.lineFrom, `Invalid request options line from`);
97
+ Affirm_1.default.hasValue(request.options.lineTo, `Invalid request options line to`);
98
+ const { fileKey, fileType, options: { lineFrom, lineTo, sheetName } } = request;
99
+ const fileUrl = path_1.default.join(this._path, fileKey);
100
+ switch (fileType) {
101
+ case 'CSV':
102
+ case 'JSON':
103
+ case 'JSONL':
104
+ case 'TXT':
105
+ return yield this._readLines(fileUrl, lineFrom, lineTo);
106
+ case 'XLS':
107
+ case 'XLSX':
108
+ return yield this._readExcelLines(fileUrl, sheetName, lineFrom, lineTo);
93
109
  }
94
- reader.close();
95
- stream.close();
96
- return lines;
97
110
  });
98
- this.readLinesInRange = (readOptions) => __awaiter(this, void 0, void 0, function* () {
99
- var _a, e_2, _b, _c;
111
+ this.exist = (producer) => __awaiter(this, void 0, void 0, function* () {
100
112
  (0, Affirm_1.default)(this._path, `Invalid path`);
101
- (0, Affirm_1.default)(readOptions, 'Invalid read options');
102
- (0, Affirm_1.default)(readOptions.fileKey, 'Invalid file key');
103
- const fileUrl = path_1.default.join(this._path, readOptions.fileKey);
104
- const { lineFrom, lineTo } = readOptions;
105
- const stream = fs.createReadStream(fileUrl);
113
+ (0, Affirm_1.default)(producer, `Invalid producer`);
114
+ const fileKey = producer.settings.fileKey;
115
+ (0, Affirm_1.default)(fileKey, `Invalid file key for download request`);
116
+ const fileUrl = path_1.default.join(this._path, fileKey);
117
+ return fs.existsSync(fileUrl);
118
+ });
119
+ this._readLines = (fileUri, lineFrom, lineTo) => __awaiter(this, void 0, void 0, function* () {
120
+ var _a, e_1, _b, _c;
121
+ const stream = fs.createReadStream(fileUri);
106
122
  const reader = readline_1.default.createInterface({ input: stream, crlfDelay: Infinity });
107
123
  const lines = [];
108
124
  let lineCounter = 0;
109
125
  try {
110
- for (var _d = true, reader_2 = __asyncValues(reader), reader_2_1; reader_2_1 = yield reader_2.next(), _a = reader_2_1.done, !_a; _d = true) {
111
- _c = reader_2_1.value;
126
+ for (var _d = true, reader_1 = __asyncValues(reader), reader_1_1; reader_1_1 = yield reader_1.next(), _a = reader_1_1.done, !_a; _d = true) {
127
+ _c = reader_1_1.value;
112
128
  _d = false;
113
129
  const line = _c;
114
- if (lineCounter >= lineFrom && lineCounter < lineTo) {
130
+ if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo)) {
131
+ if (lineCounter >= lineFrom && lineCounter < lineTo) {
132
+ lines.push(line);
133
+ }
134
+ lineCounter++;
135
+ if (lineCounter >= lineTo)
136
+ break;
137
+ }
138
+ else {
115
139
  lines.push(line);
116
140
  }
117
- lineCounter++;
118
- if (lineCounter >= lineTo)
119
- break;
120
141
  }
121
142
  }
122
- catch (e_2_1) { e_2 = { error: e_2_1 }; }
143
+ catch (e_1_1) { e_1 = { error: e_1_1 }; }
123
144
  finally {
124
145
  try {
125
- if (!_d && !_a && (_b = reader_2.return)) yield _b.call(reader_2);
146
+ if (!_d && !_a && (_b = reader_1.return)) yield _b.call(reader_1);
126
147
  }
127
- finally { if (e_2) throw e_2.error; }
148
+ finally { if (e_1) throw e_1.error; }
128
149
  }
129
150
  reader.close();
130
151
  stream.close();
131
152
  return lines;
132
153
  });
133
- this.exist = (producer) => __awaiter(this, void 0, void 0, function* () {
134
- (0, Affirm_1.default)(this._path, `Invalid path`);
135
- (0, Affirm_1.default)(producer, `Invalid producer`);
136
- const fileKey = producer.settings.fileKey;
137
- (0, Affirm_1.default)(fileKey, `Invalid file key for download request`);
138
- const fileUrl = path_1.default.join(this._path, fileKey);
139
- return fs.existsSync(fileUrl);
154
+ this._readExcelLines = (fileUri, sheetName, lineFrom, lineTo) => __awaiter(this, void 0, void 0, function* () {
155
+ const excel = xlsx_1.default.readFile(fileUri);
156
+ let targetSheetName = sheetName;
157
+ if (!targetSheetName) {
158
+ (0, Affirm_1.default)(excel.SheetNames.length > 0, 'The Excel file has no sheets.');
159
+ targetSheetName = excel.SheetNames[0];
160
+ }
161
+ else {
162
+ (0, Affirm_1.default)(excel.SheetNames.includes(targetSheetName), `The sheet "${targetSheetName}" doesn't exist in the excel (available: ${excel.SheetNames.join(', ')})`);
163
+ }
164
+ const sheet = excel.Sheets[targetSheetName];
165
+ const csv = xlsx_1.default.utils.sheet_to_csv(sheet);
166
+ const lines = csv.split('\n');
167
+ if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo))
168
+ return lines.slice(lineFrom, lineTo + 1);
169
+ else
170
+ return lines;
140
171
  });
141
172
  }
142
173
  }
143
- class LocalDriverDestination {
174
+ exports.LocalSourceDriver = LocalSourceDriver;
175
+ class LocalDestinationDriver {
144
176
  constructor() {
145
177
  this.init = (source) => __awaiter(this, void 0, void 0, function* () {
146
178
  (0, Affirm_1.default)(source, `Invalid source`);
@@ -206,5 +238,4 @@ class LocalDriverDestination {
206
238
  });
207
239
  }
208
240
  }
209
- exports.LocalDriverDestination = LocalDriverDestination;
210
- exports.default = LocalDriver;
241
+ exports.LocalDestinationDriver = LocalDestinationDriver;
@@ -8,14 +8,25 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
8
8
  step((generator = generator.apply(thisArg, _arguments || [])).next());
9
9
  });
10
10
  };
11
+ var __asyncValues = (this && this.__asyncValues) || function (o) {
12
+ if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
13
+ var m = o[Symbol.asyncIterator], i;
14
+ return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
15
+ function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
16
+ function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
17
+ };
11
18
  var __importDefault = (this && this.__importDefault) || function (mod) {
12
19
  return (mod && mod.__esModule) ? mod : { "default": mod };
13
20
  };
14
21
  Object.defineProperty(exports, "__esModule", { value: true });
22
+ exports.S3SourceDriver = exports.S3DestinationDriver = void 0;
15
23
  const client_s3_1 = require("@aws-sdk/client-s3");
16
24
  const Affirm_1 = __importDefault(require("../core/Affirm"));
17
25
  const SecretManager_1 = __importDefault(require("../engines/SecretManager"));
18
- class S3Driver {
26
+ const readline_1 = __importDefault(require("readline"));
27
+ const Algo_1 = __importDefault(require("../core/Algo"));
28
+ const xlsx_1 = __importDefault(require("xlsx"));
29
+ class S3DestinationDriver {
19
30
  constructor() {
20
31
  this.init = (source) => __awaiter(this, void 0, void 0, function* () {
21
32
  this._bucketName = source.authentication['bucket'];
@@ -99,4 +110,151 @@ class S3Driver {
99
110
  });
100
111
  }
101
112
  }
102
- exports.default = S3Driver;
113
+ exports.S3DestinationDriver = S3DestinationDriver;
114
+ class S3SourceDriver {
115
+ constructor() {
116
+ this.init = (source) => __awaiter(this, void 0, void 0, function* () {
117
+ this._bucketName = source.authentication['bucket'];
118
+ const sessionToken = SecretManager_1.default.replaceSecret(source.authentication['sessionToken']);
119
+ const config = {
120
+ region: source.authentication['region'],
121
+ credentials: {
122
+ accessKeyId: SecretManager_1.default.replaceSecret(source.authentication['accessKey']),
123
+ secretAccessKey: SecretManager_1.default.replaceSecret(source.authentication['secretKey']),
124
+ sessionToken: sessionToken ? sessionToken : undefined
125
+ }
126
+ };
127
+ this._client = new client_s3_1.S3Client(config);
128
+ // TODO: is there a way to test if the connection was successful? like a query or scan that I can do?
129
+ return this;
130
+ });
131
+ this.download = (request) => __awaiter(this, void 0, void 0, function* () {
132
+ (0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "connect()" first');
133
+ (0, Affirm_1.default)(request, `Invalid download request`);
134
+ (0, Affirm_1.default)(request.fileKey, `Invalid file key for download request`);
135
+ const { fileKey, fileType, options } = request;
136
+ const bucket = this._bucketName;
137
+ const response = yield this._client.send(new client_s3_1.GetObjectCommand({
138
+ Bucket: bucket,
139
+ Key: fileKey
140
+ }));
141
+ (0, Affirm_1.default)(response.Body, 'Failed to fetch object from S3');
142
+ const stream = response.Body;
143
+ switch (fileType) {
144
+ case 'CSV':
145
+ case 'JSON':
146
+ case 'JSONL':
147
+ case 'TXT':
148
+ return yield this._readLines(stream);
149
+ case 'XLS':
150
+ case 'XLSX':
151
+ return yield this._readExcelLines(stream, options === null || options === void 0 ? void 0 : options.sheetName);
152
+ }
153
+ });
154
+ this.readLinesInRange = (request) => __awaiter(this, void 0, void 0, function* () {
155
+ (0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "connect()" first');
156
+ (0, Affirm_1.default)(request, 'Invalid read request');
157
+ (0, Affirm_1.default)(request.options, 'Invalid read request options');
158
+ const { fileKey, fileType, options: { sheetName, lineFrom, lineTo } } = request;
159
+ const bucket = this._bucketName;
160
+ const response = yield this._client.send(new client_s3_1.GetObjectCommand({
161
+ Bucket: bucket,
162
+ Key: fileKey
163
+ }));
164
+ (0, Affirm_1.default)(response.Body, 'Failed to fetch object from S3');
165
+ const stream = response.Body;
166
+ switch (fileType) {
167
+ case 'CSV':
168
+ case 'JSON':
169
+ case 'JSONL':
170
+ case 'TXT':
171
+ return yield this._readLines(stream, lineFrom, lineTo);
172
+ case 'XLS':
173
+ case 'XLSX':
174
+ return yield this._readExcelLines(stream, sheetName, lineFrom, lineTo);
175
+ }
176
+ });
177
+ this.exist = (producer) => __awaiter(this, void 0, void 0, function* () {
178
+ var _a;
179
+ (0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "connect()" first');
180
+ (0, Affirm_1.default)(producer, 'Invalid read producer');
181
+ const bucket = this._bucketName;
182
+ const fileKey = producer.settings.fileKey;
183
+ (0, Affirm_1.default)(fileKey, `Invalid file key for download request`);
184
+ try {
185
+ yield this._client.send(new client_s3_1.HeadObjectCommand({ Bucket: bucket, Key: fileKey }));
186
+ return true;
187
+ }
188
+ catch (error) {
189
+ if (((_a = error.$metadata) === null || _a === void 0 ? void 0 : _a.httpStatusCode) === 404 || error.name === 'NotFound')
190
+ return false;
191
+ throw error;
192
+ }
193
+ });
194
+ this._readLines = (stream, lineFrom, lineTo) => __awaiter(this, void 0, void 0, function* () {
195
+ var _a, e_1, _b, _c;
196
+ const reader = readline_1.default.createInterface({ input: stream, crlfDelay: Infinity });
197
+ const lines = [];
198
+ let lineCounter = 0;
199
+ try {
200
+ for (var _d = true, reader_1 = __asyncValues(reader), reader_1_1; reader_1_1 = yield reader_1.next(), _a = reader_1_1.done, !_a; _d = true) {
201
+ _c = reader_1_1.value;
202
+ _d = false;
203
+ const line = _c;
204
+ if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo)) {
205
+ if (lineCounter >= lineFrom && lineCounter < lineTo) {
206
+ lines.push(line);
207
+ }
208
+ lineCounter++;
209
+ if (lineCounter >= lineTo)
210
+ break;
211
+ }
212
+ else {
213
+ lines.push(line);
214
+ }
215
+ }
216
+ }
217
+ catch (e_1_1) { e_1 = { error: e_1_1 }; }
218
+ finally {
219
+ try {
220
+ if (!_d && !_a && (_b = reader_1.return)) yield _b.call(reader_1);
221
+ }
222
+ finally { if (e_1) throw e_1.error; }
223
+ }
224
+ reader.close();
225
+ return lines;
226
+ });
227
+ this._readExcelLines = (stream, sheetName, lineFrom, lineTo) => __awaiter(this, void 0, void 0, function* () {
228
+ var _a, stream_1, stream_1_1;
229
+ var _b, e_2, _c, _d;
230
+ (0, Affirm_1.default)(sheetName, `Invalid sheetname`);
231
+ const chunks = [];
232
+ try {
233
+ for (_a = true, stream_1 = __asyncValues(stream); stream_1_1 = yield stream_1.next(), _b = stream_1_1.done, !_b; _a = true) {
234
+ _d = stream_1_1.value;
235
+ _a = false;
236
+ const chunk = _d;
237
+ chunks.push(chunk);
238
+ }
239
+ }
240
+ catch (e_2_1) { e_2 = { error: e_2_1 }; }
241
+ finally {
242
+ try {
243
+ if (!_a && !_b && (_c = stream_1.return)) yield _c.call(stream_1);
244
+ }
245
+ finally { if (e_2) throw e_2.error; }
246
+ }
247
+ const buffer = Buffer.concat(chunks);
248
+ const excel = xlsx_1.default.read(buffer, { type: 'buffer' });
249
+ (0, Affirm_1.default)(excel.SheetNames.includes(sheetName), `The sheet "${sheetName}" doesn't exist in the excel (available: ${excel.SheetNames.join(', ')})`);
250
+ const sheet = excel.Sheets[sheetName];
251
+ const csv = xlsx_1.default.utils.sheet_to_csv(sheet);
252
+ const lines = csv.split('\n');
253
+ if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo))
254
+ return lines.slice(lineFrom, lineTo + 1);
255
+ else
256
+ return lines;
257
+ });
258
+ }
259
+ }
260
+ exports.S3SourceDriver = S3SourceDriver;
@@ -18,9 +18,9 @@ class ParseManagerClass {
18
18
  (0, Affirm_1.default)(lines, 'Invalid csv lines');
19
19
  Affirm_1.default.hasValue(lines.length, 'Invalid csv lines length');
20
20
  const delimiterChar = (_a = producer.settings.delimiter) !== null && _a !== void 0 ? _a : ',';
21
- const headerRow = lines[0];
22
- const rows = lines.slice(1).map(x => x.split(delimiterChar).map(k => k.trim()));
23
- const headerColumns = this._extractHeader(headerRow, delimiterChar, producer, discover);
21
+ const { header, records } = this._getClassifiedRows(lines, delimiterChar, producer);
22
+ const headerColumns = this._extractHeader(header, delimiterChar, producer, discover);
23
+ const rows = records.map(x => x.split(delimiterChar).map(k => k.trim()));
24
24
  const result = [];
25
25
  for (const row of rows) {
26
26
  const rowObject = {};
@@ -32,6 +32,23 @@ class ParseManagerClass {
32
32
  }
33
33
  return result;
34
34
  };
35
+ this._getClassifiedRows = (lines, delimiterChar, producer) => {
36
+ if (producer.settings.fileType === 'TXT' && !producer.settings.hasHeaderRow) {
37
+ // If the file is a TXT and there isn't an header row, then I add a fake one that maps directly to the producer
38
+ const source = Environment_1.default.getSource(producer.source);
39
+ const columns = FileCompiler_1.default.compileProducer(producer, source);
40
+ return {
41
+ header: columns.map(x => x.nameInProducer).join(delimiterChar),
42
+ records: lines
43
+ };
44
+ }
45
+ else {
46
+ return {
47
+ header: lines[0],
48
+ records: lines.slice(1)
49
+ };
50
+ }
51
+ };
35
52
  this._extractHeader = (headerLine, delimiter, producer, discover) => {
36
53
  var _a;
37
54
  (0, Affirm_1.default)(headerLine, `Invalid CSV header line for producer "${producer.name}"`);
@@ -91,19 +91,24 @@ class ProducerEngineClass {
91
91
  (0, Affirm_1.default)(source, `No source found for producer "${producer.name}" with name "${producer.source}"`);
92
92
  const driver = yield DriverFactory_1.default.instantiateSource(source);
93
93
  (0, Affirm_1.default)(driver, `No driver found for producer "${producer.name}" with driver type "${source.engine}"`);
94
+ const { settings: { fileKey, fileType, sheetName } } = producer;
94
95
  let lines = [];
95
96
  if (options.readmode === 'lines')
96
- lines = yield driver.readLinesInRange({ fileKey: producer.settings.fileKey, lineFrom: options.lines.from, lineTo: options.lines.to });
97
+ lines = yield driver.readLinesInRange({ fileKey, fileType, options: { lineFrom: options.lines.from, lineTo: options.lines.to, sheetName } });
97
98
  else
98
- lines = yield driver.download({ fileKey: producer.settings.fileKey });
99
+ lines = yield driver.download({ fileKey, fileType, options: { sheetName } });
99
100
  switch ((_a = producer.settings.fileType) === null || _a === void 0 ? void 0 : _a.toUpperCase()) {
100
- case 'CSV': {
101
+ case 'CSV':
102
+ case 'TXT':
103
+ return { data: lines, dataType: 'lines-of-text' };
104
+ case 'XLS':
105
+ case 'XLSX':
101
106
  return { data: lines, dataType: 'lines-of-text' };
102
- }
103
107
  case 'JSONL':
104
108
  case 'JSON': {
105
- if (lines.length === 1)
109
+ if (lines.length === 1) {
106
110
  lines = lines[0].split('\n');
111
+ }
107
112
  const json = lines.map(x => JSON.parse(x));
108
113
  return { data: json, dataType: 'array-of-json' };
109
114
  }
@@ -112,7 +117,7 @@ class ProducerEngineClass {
112
117
  }
113
118
  });
114
119
  this.readSampleData = (producer_1, ...args_1) => __awaiter(this, [producer_1, ...args_1], void 0, function* (producer, sampleSize = 10, discover = false) {
115
- var _a, _b, _c;
120
+ var _a;
116
121
  (0, Affirm_1.default)(producer, 'Invalid producer');
117
122
  (0, Affirm_1.default)(sampleSize > 0, 'Sample size must be greater than 0');
118
123
  const source = Environment_1.default.getSource(producer.source);
@@ -131,18 +136,29 @@ class ProducerEngineClass {
131
136
  case 'local':
132
137
  case 'aws-s3': {
133
138
  const fileData = yield this.readFile(producer, { readmode: 'lines', lines: { from: 0, to: sampleSize } });
134
- if (((_a = producer.settings.fileType) === null || _a === void 0 ? void 0 : _a.toUpperCase()) === 'CSV') {
135
- sampleData = ParseManager_1.default.csvLinesToJson(fileData.data, producer, discover);
136
- }
137
- else if (((_b = producer.settings.fileType) === null || _b === void 0 ? void 0 : _b.toUpperCase()) === 'JSON' || ((_c = producer.settings.fileType) === null || _c === void 0 ? void 0 : _c.toUpperCase()) === 'JSONL') {
138
- // With JSON or JSONL the readFile function already parses the strings
139
- if (typeof fileData.data[0] === 'object')
139
+ const fileType = (_a = producer.settings.fileType) === null || _a === void 0 ? void 0 : _a.toUpperCase();
140
+ switch (fileType) {
141
+ case 'CSV':
142
+ case 'TXT':
143
+ case 'XLS':
144
+ case 'XLSX': {
145
+ sampleData = ParseManager_1.default.csvLinesToJson(fileData.data, producer, discover);
146
+ break;
147
+ }
148
+ case 'JSON':
149
+ case 'JSONL': {
150
+ // With JSON or JSONL the readFile function already parses the strings
151
+ if (typeof fileData.data[0] === 'object')
152
+ sampleData = fileData.data;
153
+ else
154
+ sampleData = fileData.data.map(line => JSON.parse(line));
155
+ sampleData = sampleData.slice(0, sampleSize);
156
+ break;
157
+ }
158
+ default: {
140
159
  sampleData = fileData.data;
141
- else
142
- sampleData = fileData.data.map(line => JSON.parse(line));
143
- }
144
- else {
145
- sampleData = fileData.data;
160
+ break;
161
+ }
146
162
  }
147
163
  break;
148
164
  }
package/engines/ai/LLM.js CHANGED
@@ -202,14 +202,14 @@ class LLM {
202
202
  $schema: zod_2.z.string().describe('The schema of the producer. This should always be the same.'),
203
203
  name: zod_2.z.string(),
204
204
  description: zod_2.z.string(),
205
+ source: zod_2.z.string().describe('The name of the source linked to this producer.'),
206
+ settings: zod_2.z.object({
207
+ fileKey: zod_2.z.string().describe('The name of the file'),
208
+ fileType: zod_2.z.string().describe('The file extension (CSV | JSONL | JSON)')
209
+ }),
205
210
  dimensions: zod_2.z.array(zod_2.z.object({
206
211
  name: zod_2.z.string(),
207
212
  // alias: z.string().optional(),
208
- source: zod_2.z.string().describe('The name of the source linked to this producer.'),
209
- settings: zod_2.z.object({
210
- fileKey: zod_2.z.string().describe('The name of the file'),
211
- fileType: zod_2.z.string().describe('The file extension (CSV | JSONL | JSON)')
212
- }),
213
213
  description: zod_2.z.string().optional(),
214
214
  type: zod_2.z.enum(['string', 'number', 'datetime']),
215
215
  pk: zod_2.z.boolean().optional(),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "0.0.26",
3
+ "version": "0.0.28",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,
@@ -9,6 +9,7 @@
9
9
  },
10
10
  "scripts": {
11
11
  "sync": "cd ../dev_ops && npm run sync",
12
+ "tsc-check": "npx tsc --noemit",
12
13
  "init": "npx tsx ./src/index.ts init",
13
14
  "version": "npx tsx ./src/index.ts -v",
14
15
  "run": "npx tsx ./src/index.ts run",
@@ -53,6 +54,7 @@
53
54
  "ora": "^5.4.1",
54
55
  "react": "^18.2.0",
55
56
  "react-dom": "^18.2.0",
57
+ "xlsx": "^0.18.5",
56
58
  "zod": "^3.24.2"
57
59
  },
58
60
  "devDependencies": {