@forzalabs/remora 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/engines/scheduler/CronScheduler.js +2 -2
  2. package/engines/scheduler/QueueManager.js +2 -2
  3. package/package.json +1 -1
  4. package/settings.js +12 -0
  5. package/documentation/default_resources/schema.json +0 -36
  6. package/drivers/LocalDriver.js +0 -542
  7. package/drivers/S3Driver.js +0 -563
  8. package/drivers/S3SourceDriver.js +0 -132
  9. package/engines/DataframeManager.js +0 -55
  10. package/engines/ParseManager.js +0 -75
  11. package/engines/ProducerEngine.js +0 -160
  12. package/engines/UsageDataManager.js +0 -110
  13. package/engines/UsageManager.js +0 -61
  14. package/engines/Validator.js +0 -157
  15. package/engines/consumer/ConsumerEngine.js +0 -128
  16. package/engines/consumer/PostProcessor.js +0 -253
  17. package/engines/dataset/ParallelDataset.js +0 -184
  18. package/engines/dataset/TransformWorker.js +0 -2
  19. package/engines/dataset/definitions.js +0 -2
  20. package/engines/dataset/example-parallel-transform.js +0 -2
  21. package/engines/dataset/test-parallel.js +0 -2
  22. package/engines/deployment/DeploymentPlanner.js +0 -39
  23. package/engines/execution/ExecutionEnvironment.js +0 -209
  24. package/engines/execution/ExecutionPlanner.js +0 -131
  25. package/engines/file/FileCompiler.js +0 -29
  26. package/engines/file/FileContentBuilder.js +0 -34
  27. package/engines/schema/SchemaEngine.js +0 -33
  28. package/engines/sql/SQLBuilder.js +0 -96
  29. package/engines/sql/SQLCompiler.js +0 -141
  30. package/engines/sql/SQLUtils.js +0 -22
  31. package/workers/FilterWorker.js +0 -62
  32. package/workers/ProjectionWorker.js +0 -63
  33. package/workers/TransformWorker.js +0 -63
  34. package/workers/TsWorker.js +0 -14
@@ -47,8 +47,8 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
47
47
  Object.defineProperty(exports, "__esModule", { value: true });
48
48
  const cron = __importStar(require("node-cron"));
49
49
  const Environment_1 = __importDefault(require("../Environment"));
50
- const UserManager_1 = __importDefault(require("../UserManager"));
51
50
  const ExecutorOrchestrator_1 = __importDefault(require("../../executors/ExecutorOrchestrator"));
51
+ const settings_1 = require("../../settings");
52
52
  class CronScheduler {
53
53
  constructor() {
54
54
  this.scheduledJobs = new Map();
@@ -124,7 +124,7 @@ class CronScheduler {
124
124
  return __awaiter(this, void 0, void 0, function* () {
125
125
  try {
126
126
  console.log(`Executing CRON job for consumer "${consumer.name}" output ${outputIndex}`);
127
- const user = UserManager_1.default.getRemoraWorkerUser();
127
+ const user = settings_1.REMORA_WORKER_USER;
128
128
  const runner = { _id: user._id, name: user.name, type: 'actor' };
129
129
  const result = yield ExecutorOrchestrator_1.default.launch({
130
130
  consumer,
@@ -14,9 +14,9 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
14
14
  Object.defineProperty(exports, "__esModule", { value: true });
15
15
  const client_sqs_1 = require("@aws-sdk/client-sqs");
16
16
  const Environment_1 = __importDefault(require("../Environment"));
17
- const UserManager_1 = __importDefault(require("../UserManager"));
18
17
  const SecretManager_1 = __importDefault(require("../SecretManager"));
19
18
  const ExecutorOrchestrator_1 = __importDefault(require("../../executors/ExecutorOrchestrator"));
19
+ const settings_1 = require("../../settings");
20
20
  class QueueManager {
21
21
  constructor() {
22
22
  this.queueMappings = new Map();
@@ -198,7 +198,7 @@ class QueueManager {
198
198
  }
199
199
  }
200
200
  console.log(`Processing queue message for consumer "${mapping.consumer.name}" output ${mapping.outputIndex}`);
201
- const user = UserManager_1.default.getRemoraWorkerUser();
201
+ const user = settings_1.REMORA_WORKER_USER;
202
202
  const result = yield ExecutorOrchestrator_1.default.launch({
203
203
  consumer: mapping.consumer,
204
204
  details: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "1.0.1",
3
+ "version": "1.0.3",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,
package/settings.js ADDED
@@ -0,0 +1,12 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.REMORA_WORKER_USER = void 0;
4
+ exports.REMORA_WORKER_USER = {
5
+ _id: '__remora_worker__',
6
+ auth: { oid: '', provider: 'internal' },
7
+ email: '',
8
+ name: 'Remora Worker',
9
+ roles: ['root'],
10
+ _signature: '',
11
+ lastLogin: new Date().toJSON()
12
+ };
@@ -1,36 +0,0 @@
1
- {
2
- "$schema": "http://json-schema.org/draft-07/schema#",
3
- "title": "<example schema>",
4
- "type": "object",
5
- "version": 1,
6
- "description": "<schema description>",
7
- "properties": {
8
- "<required id field>": {
9
- "type": "<number | string | etc.>",
10
- "description": "<field description>"
11
- },
12
- "<required name field>": {
13
- "type": "<string>",
14
- "description": "<field description>"
15
- },
16
- "<status field with enum>": {
17
- "type": "<string>",
18
- "description": "<field description>",
19
- "enum": [
20
- "<example status 1>",
21
- "<example status 2>",
22
- "<example status 3>"
23
- ]
24
- },
25
- "<datetime field example>": {
26
- "type": "<string>",
27
- "format": "<date-time>",
28
- "description": "<field description>"
29
- }
30
- },
31
- "required": [
32
- "<required id field>",
33
- "<required name field>"
34
- ],
35
- "additionalProperties": false
36
- }
@@ -1,542 +0,0 @@
1
- "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
35
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
36
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
37
- return new (P || (P = Promise))(function (resolve, reject) {
38
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
39
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
40
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
41
- step((generator = generator.apply(thisArg, _arguments || [])).next());
42
- });
43
- };
44
- var __asyncValues = (this && this.__asyncValues) || function (o) {
45
- if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
46
- var m = o[Symbol.asyncIterator], i;
47
- return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
48
- function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
49
- function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
50
- };
51
- var __importDefault = (this && this.__importDefault) || function (mod) {
52
- return (mod && mod.__esModule) ? mod : { "default": mod };
53
- };
54
- Object.defineProperty(exports, "__esModule", { value: true });
55
- exports.LocalDestinationDriver = exports.LocalSourceDriver = void 0;
56
- const fs = __importStar(require("fs"));
57
- const path_1 = __importDefault(require("path"));
58
- const readline_1 = __importDefault(require("readline"));
59
- const Affirm_1 = __importDefault(require("../core/Affirm"));
60
- const Algo_1 = __importDefault(require("../core/Algo"));
61
- const xlsx_1 = __importDefault(require("xlsx"));
62
- const XMLParser_1 = __importDefault(require("../engines/parsing/XMLParser"));
63
- const XLSParser_1 = __importDefault(require("../engines/parsing/XLSParser"));
64
- const Helper_1 = __importDefault(require("../helper/Helper"));
65
- const ParseHelper_1 = __importDefault(require("../engines/parsing/ParseHelper"));
66
- const FileExporter_1 = __importDefault(require("../engines/file/FileExporter"));
67
- const Logger_1 = __importDefault(require("../helper/Logger"));
68
- const DriverHelper_1 = __importDefault(require("./DriverHelper"));
69
- const stream_1 = require("stream");
70
- class LocalSourceDriver {
71
- constructor() {
72
- this.init = (source) => __awaiter(this, void 0, void 0, function* () {
73
- const fileURL = source.authentication['path'];
74
- (0, Affirm_1.default)(fileURL, `Missing file path in the authentication of source "${source.name}"`);
75
- const exist = fs.existsSync(fileURL);
76
- (0, Affirm_1.default)(exist, `The path (${fileURL}) for source "${source.name}" does NOT exist.`);
77
- this._path = source.authentication['path'];
78
- return this;
79
- });
80
- this.readAll = (request) => __awaiter(this, void 0, void 0, function* () {
81
- (0, Affirm_1.default)(this._path, `Invalid path`);
82
- (0, Affirm_1.default)(request, `Invalid download request`);
83
- (0, Affirm_1.default)(request.fileKey, `Invalid file key for download request`);
84
- (0, Affirm_1.default)(request.fileType, `Invalid file type for download request`);
85
- const { fileKey } = request;
86
- if (fileKey.includes('%')) {
87
- const allFileKeys = this.listFiles(fileKey);
88
- Logger_1.default.log(`Matched ${allFileKeys.length} files, copying to locally and creating unified dataset.`);
89
- const firstPath = path_1.default.join(this._path, allFileKeys[0]);
90
- const headerLine = (yield DriverHelper_1.default.quickReadFile(firstPath, 1))[0];
91
- const promises = allFileKeys.map((x, i) => this._get(Object.assign(Object.assign({}, request), { fileKey: x }), headerLine, i));
92
- const results = yield Promise.all(promises);
93
- return results.flat();
94
- }
95
- else {
96
- return yield this._get(request, '');
97
- }
98
- });
99
- this.readLinesInRange = (request) => __awaiter(this, void 0, void 0, function* () {
100
- (0, Affirm_1.default)(this._path, `Invalid path`);
101
- (0, Affirm_1.default)(request, 'Invalid read options');
102
- (0, Affirm_1.default)(request.fileKey, 'Invalid file key');
103
- (0, Affirm_1.default)(request.fileType, `Invalid file type`);
104
- (0, Affirm_1.default)(request.options, `Invalid request options`);
105
- Affirm_1.default.hasValue(request.options.lineFrom, `Invalid request options line from`);
106
- Affirm_1.default.hasValue(request.options.lineTo, `Invalid request options line to`);
107
- const { fileKey } = request;
108
- if (fileKey.includes('%')) {
109
- const allFileKeys = this.listFiles(fileKey);
110
- Logger_1.default.log(`Matched ${allFileKeys.length} files, copying to locally and creating unified dataset.`);
111
- const firstPath = path_1.default.join(this._path, allFileKeys[0]);
112
- const headerLine = (yield DriverHelper_1.default.quickReadFile(firstPath, 1))[0];
113
- const promises = allFileKeys.map((x, i) => this._get(Object.assign(Object.assign({}, request), { fileKey: x }), headerLine, i));
114
- const results = yield Promise.all(promises);
115
- return results.flat();
116
- }
117
- else {
118
- return yield this._get(request, '');
119
- }
120
- });
121
- this.download = (dataset) => __awaiter(this, void 0, void 0, function* () {
122
- (0, Affirm_1.default)(this._path, `Invalid path`);
123
- (0, Affirm_1.default)(dataset, `Invalid dataset`);
124
- const file = dataset.getFile();
125
- (0, Affirm_1.default)(file, 'Invalid dataset file');
126
- (0, Affirm_1.default)(file.fileKey, 'Invalid file key');
127
- (0, Affirm_1.default)(file.fileType, `Invalid file type`);
128
- const includeSourceFilename = file.includeSourceFilename === true;
129
- const copyLocally = (fileKey_1, headerLine_1, ...args_1) => __awaiter(this, [fileKey_1, headerLine_1, ...args_1], void 0, function* (fileKey, headerLine, appendMode = false, sourceFilename, stream) {
130
- const sourceFilePath = path_1.default.join(this._path, fileKey);
131
- (0, Affirm_1.default)(fs.existsSync(sourceFilePath), `Source file does not exist: ${sourceFilePath}`);
132
- // Copy and validate header in a single stream pass
133
- const readStream = fs.createReadStream(sourceFilePath);
134
- let streamToUse = readStream;
135
- if (['XLS', 'XLSX'].includes(file.fileType))
136
- streamToUse = stream;
137
- else
138
- streamToUse = readStream;
139
- return DriverHelper_1.default.appendToUnifiedFile({
140
- stream: streamToUse,
141
- fileKey,
142
- destinationPath: dataset.getPath(),
143
- append: appendMode,
144
- headerLine,
145
- fileType: file.fileType,
146
- hasHeaderRow: file.hasHeaderRow,
147
- delimiter: dataset.getDelimiter(),
148
- sourceFilename
149
- });
150
- });
151
- const getTotalLineCount = (fileKey, appendMode, fileType, sourceFilename) => __awaiter(this, void 0, void 0, function* () {
152
- let totalLineCount;
153
- let streamXLS;
154
- switch (fileType) {
155
- case 'XLS':
156
- case 'XLSX':
157
- streamXLS = (yield XLSParser_1.default.getStreamXls(path_1.default.join(this._path, fileKey), file.sheetName));
158
- totalLineCount = yield copyLocally(fileKey, dataset.getFirstLine(), appendMode, sourceFilename, streamXLS);
159
- break;
160
- default:
161
- totalLineCount = yield copyLocally(fileKey, dataset.getFirstLine(), false, sourceFilename);
162
- break;
163
- }
164
- return totalLineCount;
165
- });
166
- const { fileKey } = file;
167
- let totalLineCount = 0;
168
- let sourceFilename;
169
- if (fileKey.includes('%')) {
170
- const allFileKeys = this.listFiles(fileKey);
171
- yield DriverHelper_1.default.setHeaderFromFile(allFileKeys[0], file, this._path, dataset);
172
- Logger_1.default.log(`Matched ${allFileKeys.length} files, copying locally and creating unified dataset.`);
173
- Affirm_1.default.hasItems(allFileKeys, `The file key "${fileKey}" doesn't have any matches in path "${this._path}".`);
174
- totalLineCount = 0;
175
- // Copy files sequentially to avoid file conflicts
176
- for (let i = 0; i < allFileKeys.length; i++) {
177
- const currentFileKey = allFileKeys[i];
178
- // Pass the filename (just the basename) if includeSourceFilename is enabled
179
- const sourceFilename = includeSourceFilename ? path_1.default.basename(currentFileKey) : undefined;
180
- totalLineCount += yield getTotalLineCount(currentFileKey, true, file.fileType, sourceFilename); // Append mode for subsequent files
181
- }
182
- dataset.setCount(totalLineCount);
183
- return dataset;
184
- }
185
- else {
186
- sourceFilename = includeSourceFilename ? path_1.default.basename(fileKey) : undefined;
187
- yield DriverHelper_1.default.setHeaderFromFile(fileKey, file, this._path, dataset);
188
- totalLineCount = (yield getTotalLineCount(fileKey, false, file.fileType, sourceFilename));
189
- dataset.setCount(totalLineCount);
190
- return dataset;
191
- }
192
- });
193
- this.exist = (producer) => __awaiter(this, void 0, void 0, function* () {
194
- (0, Affirm_1.default)(this._path, `Invalid path`);
195
- (0, Affirm_1.default)(producer, `Invalid producer`);
196
- const fileKey = producer.settings.fileKey;
197
- (0, Affirm_1.default)(fileKey, `Invalid file key for download request`);
198
- if (fileKey.includes('%')) {
199
- const allFileKeys = this.listFiles(fileKey);
200
- return allFileKeys.length > 0;
201
- }
202
- else {
203
- const fileUrl = path_1.default.join(this._path, fileKey);
204
- return fs.existsSync(fileUrl);
205
- }
206
- });
207
- this._readLines = (fileUri, lineFrom, lineTo) => __awaiter(this, void 0, void 0, function* () {
208
- var _a, e_1, _b, _c;
209
- const stream = fs.createReadStream(fileUri);
210
- const reader = readline_1.default.createInterface({ input: stream, crlfDelay: Infinity });
211
- const lines = [];
212
- let lineCounter = 0;
213
- try {
214
- for (var _d = true, reader_1 = __asyncValues(reader), reader_1_1; reader_1_1 = yield reader_1.next(), _a = reader_1_1.done, !_a; _d = true) {
215
- _c = reader_1_1.value;
216
- _d = false;
217
- const line = _c;
218
- if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo)) {
219
- if (lineCounter >= lineFrom && lineCounter < lineTo) {
220
- if (line && line.length > 0)
221
- lines.push(line);
222
- }
223
- lineCounter++;
224
- if (lineCounter >= lineTo)
225
- break;
226
- }
227
- else {
228
- if (line && line.length > 0)
229
- lines.push(line);
230
- }
231
- }
232
- }
233
- catch (e_1_1) { e_1 = { error: e_1_1 }; }
234
- finally {
235
- try {
236
- if (!_d && !_a && (_b = reader_1.return)) yield _b.call(reader_1);
237
- }
238
- finally { if (e_1) throw e_1.error; }
239
- }
240
- reader.close();
241
- stream.close();
242
- return lines;
243
- });
244
- this._readExcelLines = (fileUri, sheetName, lineFrom, lineTo) => __awaiter(this, void 0, void 0, function* () {
245
- const excel = xlsx_1.default.readFile(fileUri);
246
- let targetSheetName = sheetName;
247
- if (!targetSheetName) {
248
- (0, Affirm_1.default)(excel.SheetNames.length > 0, 'The Excel file has no sheets.');
249
- targetSheetName = excel.SheetNames[0];
250
- }
251
- else {
252
- (0, Affirm_1.default)(excel.SheetNames.includes(targetSheetName), `The sheet "${targetSheetName}" doesn't exist in the excel (available: ${excel.SheetNames.join(', ')})`);
253
- }
254
- const sheet = excel.Sheets[targetSheetName];
255
- const csv = xlsx_1.default.utils.sheet_to_csv(sheet);
256
- const lines = csv.split('\n');
257
- if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo))
258
- return lines.slice(lineFrom, lineTo + 1);
259
- else
260
- return lines;
261
- });
262
- this._readXmlLines = (fileUri, lineFrom, lineTo) => __awaiter(this, void 0, void 0, function* () {
263
- const fileContent = fs.readFileSync(fileUri, 'utf-8');
264
- const jsonData = XMLParser_1.default.xmlToJson(fileContent);
265
- // Convert JSON data to string lines. This might need adjustment based on XML structure.
266
- // Assuming jsonData is an array of objects, where each object is a record.
267
- let lines = Array.isArray(jsonData) ? jsonData.map(item => JSON.stringify(item)) : [JSON.stringify(jsonData)];
268
- if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo)) {
269
- lines = lines.slice(lineFrom, lineTo + 1);
270
- }
271
- return lines;
272
- });
273
- this._get = (request, headerLine, index) => __awaiter(this, void 0, void 0, function* () {
274
- const { fileKey, fileType, options } = request;
275
- let lineFrom, lineTo, sheetName, hasHeaderRow;
276
- if (options) {
277
- lineFrom = options.lineFrom;
278
- lineTo = options.lineTo;
279
- sheetName = options.sheetName;
280
- hasHeaderRow = options.hasHeaderRow;
281
- }
282
- const fileUrl = path_1.default.join(this._path, fileKey);
283
- let lines = [];
284
- switch (fileType) {
285
- case 'CSV':
286
- case 'JSON':
287
- case 'JSONL':
288
- case 'TXT':
289
- if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo))
290
- lines = yield this._readLines(fileUrl, lineFrom, lineTo);
291
- else
292
- lines = yield this._readLines(fileUrl);
293
- break;
294
- case 'XLS':
295
- case 'XLSX':
296
- if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo))
297
- lines = yield this._readExcelLines(fileUrl, sheetName, lineFrom, lineTo);
298
- else
299
- lines = yield this._readExcelLines(fileUrl, sheetName);
300
- break;
301
- case 'XML':
302
- if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo))
303
- lines = yield this._readXmlLines(fileUrl, lineFrom, lineTo);
304
- else
305
- lines = yield this._readXmlLines(fileUrl);
306
- break;
307
- }
308
- const firstLine = lines[0];
309
- if (headerLine && headerLine.trim() !== '' && firstLine.trim() !== headerLine.trim()) {
310
- const msg = `Error creating unified dataset: file "${fileKey}" has a different header line than the other files in this dataset\n\t-${fileKey}: ${firstLine}\n\t-main: ${headerLine}`;
311
- Logger_1.default.log(msg);
312
- throw new Error(msg);
313
- }
314
- // If this is not the first file read in a pattern match AND the file type has an header,
315
- // then I need to remove the header from the resulting lines or the header will be duplicated
316
- if (index > 0 && ParseHelper_1.default.shouldHaveHeader(fileType, hasHeaderRow)) {
317
- lines = lines.slice(1);
318
- }
319
- return lines;
320
- });
321
- this.listFiles = (filekeyPattern) => {
322
- (0, Affirm_1.default)(this._path, 'Path not initialized');
323
- try {
324
- // Get all files in the directory (recursively if needed)
325
- const getAllFiles = (dirPath, basePath = '') => {
326
- const files = [];
327
- const items = fs.readdirSync(dirPath);
328
- for (const item of items) {
329
- const fullPath = path_1.default.join(dirPath, item);
330
- const relativePath = basePath ? path_1.default.join(basePath, item) : item;
331
- const stats = fs.statSync(fullPath);
332
- if (stats.isDirectory()) {
333
- // Recursively get files from subdirectories
334
- files.push(...getAllFiles(fullPath, relativePath));
335
- }
336
- else if (stats.isFile()) {
337
- files.push(relativePath);
338
- }
339
- }
340
- return files;
341
- };
342
- const allFiles = getAllFiles(this._path);
343
- return Helper_1.default.matchPattern(filekeyPattern, allFiles);
344
- }
345
- catch (error) {
346
- throw new Error(`Failed to list files in directory "${this._path}": ${error.message}`);
347
- }
348
- };
349
- this.readFile = (fileKey) => {
350
- (0, Affirm_1.default)(this._path, 'Path not initialized');
351
- (0, Affirm_1.default)(fileKey, 'Invalid file key');
352
- const filePath = path_1.default.join(this._path, fileKey);
353
- (0, Affirm_1.default)(fs.existsSync(filePath), `Source file does not exist: ${filePath}`);
354
- return fs.readFileSync(filePath);
355
- };
356
- this.deleteFile = (fileKey) => {
357
- (0, Affirm_1.default)(this._path, 'Path not initialized');
358
- (0, Affirm_1.default)(fileKey, 'Invalid file key');
359
- const filePath = path_1.default.join(this._path, fileKey);
360
- if (fs.existsSync(filePath)) {
361
- fs.unlinkSync(filePath);
362
- }
363
- };
364
- this.moveFile = (sourceFileKey, destinationPath, destinationFileKey) => {
365
- (0, Affirm_1.default)(this._path, 'Path not initialized');
366
- (0, Affirm_1.default)(sourceFileKey, 'Invalid source file key');
367
- (0, Affirm_1.default)(destinationPath, 'Invalid destination path');
368
- (0, Affirm_1.default)(destinationFileKey, 'Invalid destination file key');
369
- const sourceFilePath = path_1.default.join(this._path, sourceFileKey);
370
- const destinationFilePath = path_1.default.join(destinationPath, destinationFileKey);
371
- (0, Affirm_1.default)(fs.existsSync(sourceFilePath), `Source file does not exist: ${sourceFilePath}`);
372
- // Ensure destination directory exists
373
- const destinationDir = path_1.default.dirname(destinationFilePath);
374
- if (!fs.existsSync(destinationDir)) {
375
- fs.mkdirSync(destinationDir, { recursive: true });
376
- }
377
- fs.renameSync(sourceFilePath, destinationFilePath);
378
- };
379
- this.ready = (producer) => __awaiter(this, void 0, void 0, function* () {
380
- (0, Affirm_1.default)(producer, 'Invalid producer');
381
- const { fileKey } = producer.settings;
382
- if (fileKey.includes('%')) {
383
- const allFileKeys = this.listFiles(fileKey);
384
- const allFilePaths = allFileKeys.map(x => path_1.default.join(this._path, x));
385
- const readStreams = allFilePaths.map(x => fs.createReadStream(x));
386
- let pass = new stream_1.PassThrough();
387
- for (const [index, stream] of readStreams.entries())
388
- pass = stream.pipe(pass, { end: index === readStreams.length - 1 });
389
- return pass;
390
- }
391
- else {
392
- const sourceFilePath = path_1.default.join(this._path, fileKey);
393
- const readStream = fs.createReadStream(sourceFilePath);
394
- return readStream;
395
- }
396
- });
397
- }
398
- }
399
- exports.LocalSourceDriver = LocalSourceDriver;
400
- class LocalDestinationDriver {
401
- constructor() {
402
- this.init = (source) => __awaiter(this, void 0, void 0, function* () {
403
- (0, Affirm_1.default)(source, `Invalid source`);
404
- const fileURL = source.authentication['path'];
405
- (0, Affirm_1.default)(fileURL, `Missing file path in the authentication of source "${source.name}"`);
406
- const exist = fs.existsSync(fileURL);
407
- (0, Affirm_1.default)(exist, `The path (${fileURL}) for source "${source.name}" does NOT exist.`);
408
- this._path = source.authentication['path'];
409
- return this;
410
- });
411
- this.uploadFile = (options) => __awaiter(this, void 0, void 0, function* () {
412
- (0, Affirm_1.default)(this._path, 'Path not initialized');
413
- (0, Affirm_1.default)(options, 'Invalid upload options');
414
- (0, Affirm_1.default)(options.name, 'File name is required');
415
- (0, Affirm_1.default)(options.content != null, 'File content is required');
416
- const folder = this._path;
417
- try {
418
- if (!fs.existsSync(folder))
419
- fs.mkdirSync(folder, { recursive: true });
420
- const filePath = path_1.default.join(folder, options.name);
421
- fs.writeFileSync(filePath, options.content);
422
- return { bucket: folder, key: filePath, res: true };
423
- }
424
- catch (error) {
425
- throw new Error(`Failed to upload local file "${options.name}": ${error.message}`);
426
- }
427
- });
428
- this.uploadStream = (options) => __awaiter(this, void 0, void 0, function* () {
429
- (0, Affirm_1.default)(options, `Invalid upload options`);
430
- const { dataset, name, recordProjection } = options;
431
- (0, Affirm_1.default)(dataset, 'No streaming dataset');
432
- (0, Affirm_1.default)(name, 'No filename provided for upload stream');
433
- (0, Affirm_1.default)(recordProjection, 'No recordProjection for upload stream');
434
- const folder = this._path;
435
- try {
436
- if (!fs.existsSync(folder))
437
- fs.mkdirSync(folder, { recursive: true });
438
- const filePath = path_1.default.join(folder, options.name);
439
- fs.writeFileSync(filePath, '');
440
- yield dataset.streamBatches((batch) => __awaiter(this, void 0, void 0, function* () {
441
- const chunks = FileExporter_1.default.prepareBatch(batch, options);
442
- for (const chunk of chunks)
443
- fs.appendFileSync(filePath, chunk);
444
- }));
445
- return { bucket: folder, key: filePath, res: true };
446
- }
447
- catch (error) {
448
- // Clean up the partial file if it exists
449
- const filePath = path_1.default.join(folder, options.name);
450
- if (fs.existsSync(filePath)) {
451
- try {
452
- fs.unlinkSync(filePath);
453
- }
454
- catch (cleanupError) {
455
- console.error(`Failed to clean up partial file after error: ${cleanupError.message}`);
456
- throw cleanupError;
457
- }
458
- }
459
- throw new Error(`Failed to complete local multipart upload for "${options.name}": ${error.message}`);
460
- }
461
- });
462
- this.saveFile = (fileKey, content) => {
463
- (0, Affirm_1.default)(this._path, 'Path not initialized');
464
- (0, Affirm_1.default)(fileKey, 'Invalid file key');
465
- (0, Affirm_1.default)(content, 'Invalid content');
466
- const filePath = path_1.default.join(this._path, fileKey);
467
- const directory = path_1.default.dirname(filePath);
468
- // Create directory if it doesn't exist
469
- if (!fs.existsSync(directory)) {
470
- fs.mkdirSync(directory, { recursive: true });
471
- }
472
- fs.writeFileSync(filePath, content);
473
- return Promise.resolve();
474
- };
475
- this.copyFromLocal = (sourceFilePath, destinationFileKey) => {
476
- (0, Affirm_1.default)(this._path, 'Path not initialized');
477
- (0, Affirm_1.default)(sourceFilePath, 'Invalid source file path');
478
- (0, Affirm_1.default)(destinationFileKey, 'Invalid destination file key');
479
- const destinationFilePath = path_1.default.join(this._path, destinationFileKey);
480
- const destinationDir = path_1.default.dirname(destinationFilePath);
481
- // Ensure destination directory exists
482
- if (!fs.existsSync(destinationDir)) {
483
- fs.mkdirSync(destinationDir, { recursive: true });
484
- }
485
- fs.copyFileSync(sourceFilePath, destinationFilePath);
486
- };
487
- this.copyFromS3 = (s3Driver, sourceFileKey, destinationFileKey) => __awaiter(this, void 0, void 0, function* () {
488
- const fileContent = yield s3Driver.downloadFile(sourceFileKey);
489
- yield this.saveFile(destinationFileKey, fileContent);
490
- });
491
- this.ready = (destinationPath) => __awaiter(this, void 0, void 0, function* () {
492
- return fs.createWriteStream(destinationPath);
493
- });
494
- this.move = (fromPath, toName) => __awaiter(this, void 0, void 0, function* () {
495
- try {
496
- const toFilePath = path_1.default.join(this._path, toName);
497
- fs.renameSync(fromPath, toFilePath);
498
- return { bucket: '', key: toFilePath, res: true };
499
- }
500
- catch (error) {
501
- Logger_1.default.error(error);
502
- return { bucket: '', key: '', res: false };
503
- }
504
- });
505
- this.transformAndMove = (fromPath, transform, toName) => __awaiter(this, void 0, void 0, function* () {
506
- var _a, e_2, _b, _c;
507
- try {
508
- const toFilePath = path_1.default.join(this._path, toName);
509
- const decoder = new TextDecoder();
510
- const reader = fs.createReadStream(fromPath);
511
- const writer = fs.createWriteStream(toFilePath);
512
- try {
513
- for (var _d = true, reader_2 = __asyncValues(reader), reader_2_1; reader_2_1 = yield reader_2.next(), _a = reader_2_1.done, !_a; _d = true) {
514
- _c = reader_2_1.value;
515
- _d = false;
516
- const chunk = _c;
517
- const decoded = decoder.decode(chunk);
518
- const lines = decoded.split('\n');
519
- for (const line of lines) {
520
- writer.write(transform(line) + '\n');
521
- }
522
- }
523
- }
524
- catch (e_2_1) { e_2 = { error: e_2_1 }; }
525
- finally {
526
- try {
527
- if (!_d && !_a && (_b = reader_2.return)) yield _b.call(reader_2);
528
- }
529
- finally { if (e_2) throw e_2.error; }
530
- }
531
- writer.close();
532
- reader.close();
533
- return { bucket: '', key: toFilePath, res: true };
534
- }
535
- catch (error) {
536
- Logger_1.default.error(error);
537
- return { bucket: '', key: '', res: false };
538
- }
539
- });
540
- }
541
- }
542
- exports.LocalDestinationDriver = LocalDestinationDriver;