@forzalabs/remora 1.0.21 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/actions/automap.js +26 -42
- package/actions/compile.js +27 -43
- package/actions/create_consumer.js +24 -40
- package/actions/create_producer.js +16 -32
- package/actions/debug.js +18 -34
- package/actions/deploy.js +30 -46
- package/actions/discover.js +13 -29
- package/actions/init.js +29 -45
- package/actions/mock.js +16 -32
- package/actions/run.js +34 -52
- package/actions/sample.js +42 -58
- package/index.js +38 -43
- package/package.json +4 -4
- package/workers/ExecutorWorker.js +18 -32
- package/Constants.js +0 -34
- package/core/Affirm.js +0 -42
- package/core/Algo.js +0 -160
- package/core/dste/DSTE.js +0 -113
- package/core/logger/DebugLogService.js +0 -48
- package/core/logger/DevelopmentLogService.js +0 -70
- package/core/logger/LocalLogService.js +0 -70
- package/core/logger/Logger.js +0 -54
- package/database/DatabaseEngine.js +0 -149
- package/database/DatabaseStructure.js +0 -27
- package/definitions/DatasetDefinitions.js +0 -2
- package/definitions/ExecutorDefinitions.js +0 -2
- package/definitions/ProcessENV.js +0 -2
- package/definitions/agents/DestinationDriver.js +0 -2
- package/definitions/agents/SourceDriver.js +0 -2
- package/definitions/cli.js +0 -2
- package/definitions/database/ApiKeys.js +0 -2
- package/definitions/database/Stored.js +0 -7
- package/definitions/database/UsageStat.js +0 -2
- package/definitions/database/User.js +0 -2
- package/definitions/json_schemas/consumer-schema.json +0 -1226
- package/definitions/json_schemas/producer-schema.json +0 -308
- package/definitions/json_schemas/project-schema.json +0 -100
- package/definitions/json_schemas/source-schema.json +0 -249
- package/definitions/requests/ConsumerRequest.js +0 -2
- package/definitions/requests/Developer.js +0 -2
- package/definitions/requests/Mapping.js +0 -2
- package/definitions/requests/ProducerRequest.js +0 -2
- package/definitions/requests/Request.js +0 -2
- package/definitions/resources/Compiled.js +0 -2
- package/definitions/resources/Consumer.js +0 -2
- package/definitions/resources/Environment.js +0 -2
- package/definitions/resources/Library.js +0 -2
- package/definitions/resources/Producer.js +0 -2
- package/definitions/resources/Project.js +0 -2
- package/definitions/resources/Schema.js +0 -2
- package/definitions/resources/Source.js +0 -2
- package/definitions/temp.js +0 -2
- package/definitions/transform/Transformations.js +0 -2
- package/drivers/DeltaShareDriver.js +0 -186
- package/drivers/DriverFactory.js +0 -72
- package/drivers/DriverHelper.js +0 -248
- package/drivers/HttpApiDriver.js +0 -208
- package/drivers/RedshiftDriver.js +0 -184
- package/drivers/files/LocalDestinationDriver.js +0 -146
- package/drivers/files/LocalSourceDriver.js +0 -405
- package/drivers/s3/S3DestinationDriver.js +0 -197
- package/drivers/s3/S3SourceDriver.js +0 -495
- package/engines/CryptoEngine.js +0 -75
- package/engines/Environment.js +0 -170
- package/engines/ProcessENVManager.js +0 -83
- package/engines/RandomEngine.js +0 -47
- package/engines/SecretManager.js +0 -23
- package/engines/UserManager.js +0 -66
- package/engines/ai/AutoMapperEngine.js +0 -37
- package/engines/ai/DeveloperEngine.js +0 -497
- package/engines/ai/LLM.js +0 -255
- package/engines/consumer/ConsumerManager.js +0 -218
- package/engines/consumer/ConsumerOnFinishManager.js +0 -202
- package/engines/dataset/Dataset.js +0 -824
- package/engines/dataset/DatasetManager.js +0 -211
- package/engines/dataset/DatasetRecord.js +0 -120
- package/engines/dataset/DatasetRecordPool.js +0 -77
- package/engines/execution/RequestExecutor.js +0 -67
- package/engines/parsing/CSVParser.js +0 -60
- package/engines/parsing/LineParser.js +0 -71
- package/engines/parsing/ParseCompression.js +0 -101
- package/engines/parsing/ParseHelper.js +0 -18
- package/engines/parsing/ParseManager.js +0 -54
- package/engines/parsing/XLSParser.js +0 -87
- package/engines/parsing/XMLParser.js +0 -115
- package/engines/producer/ProducerEngine.js +0 -127
- package/engines/producer/ProducerManager.js +0 -43
- package/engines/scheduler/CronScheduler.js +0 -222
- package/engines/scheduler/QueueManager.js +0 -314
- package/engines/schema/SchemaValidator.js +0 -67
- package/engines/transform/JoinEngine.js +0 -232
- package/engines/transform/TransformationEngine.js +0 -277
- package/engines/transform/TypeCaster.js +0 -59
- package/engines/usage/DataframeManager.js +0 -55
- package/engines/usage/UsageDataManager.js +0 -151
- package/engines/usage/UsageManager.js +0 -65
- package/engines/validation/Validator.js +0 -216
- package/executors/ConsumerExecutor.js +0 -280
- package/executors/Executor.js +0 -177
- package/executors/ExecutorOrchestrator.js +0 -331
- package/executors/ExecutorPerformance.js +0 -17
- package/executors/ExecutorProgress.js +0 -54
- package/executors/ExecutorScope.js +0 -52
- package/executors/OutputExecutor.js +0 -118
- package/executors/ProducerExecutor.js +0 -108
- package/helper/Helper.js +0 -149
- package/helper/Logger.js +0 -84
- package/helper/Runtime.js +0 -20
- package/helper/Settings.js +0 -13
- package/licencing/LicenceManager.js +0 -64
- package/settings.js +0 -12
|
@@ -1,495 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
-
});
|
|
10
|
-
};
|
|
11
|
-
var __asyncValues = (this && this.__asyncValues) || function (o) {
|
|
12
|
-
if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
|
|
13
|
-
var m = o[Symbol.asyncIterator], i;
|
|
14
|
-
return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
|
|
15
|
-
function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
|
|
16
|
-
function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
|
|
17
|
-
};
|
|
18
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
19
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
20
|
-
};
|
|
21
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
22
|
-
const client_s3_1 = require("@aws-sdk/client-s3");
|
|
23
|
-
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
24
|
-
const SecretManager_1 = __importDefault(require("../../engines/SecretManager"));
|
|
25
|
-
const promises_1 = require("stream/promises");
|
|
26
|
-
const readline_1 = __importDefault(require("readline"));
|
|
27
|
-
const path_1 = __importDefault(require("path"));
|
|
28
|
-
const fs_1 = __importDefault(require("fs"));
|
|
29
|
-
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
30
|
-
const xlsx_1 = __importDefault(require("xlsx"));
|
|
31
|
-
const XMLParser_1 = __importDefault(require("../../engines/parsing/XMLParser"));
|
|
32
|
-
const Helper_1 = __importDefault(require("../../helper/Helper"));
|
|
33
|
-
const ParseHelper_1 = __importDefault(require("../../engines/parsing/ParseHelper"));
|
|
34
|
-
const DriverHelper_1 = __importDefault(require("../DriverHelper"));
|
|
35
|
-
const Logger_1 = __importDefault(require("../../helper/Logger"));
|
|
36
|
-
const Constants_1 = __importDefault(require("../../Constants"));
|
|
37
|
-
const XLSParser_1 = __importDefault(require("../../engines/parsing/XLSParser"));
|
|
38
|
-
const ExecutorScope_1 = __importDefault(require("../../executors/ExecutorScope"));
|
|
39
|
-
class S3SourceDriver {
|
|
40
|
-
constructor() {
|
|
41
|
-
this.init = (source) => __awaiter(this, void 0, void 0, function* () {
|
|
42
|
-
this._bucketName = source.authentication['bucket'];
|
|
43
|
-
const sessionToken = SecretManager_1.default.replaceSecret(source.authentication['sessionToken']);
|
|
44
|
-
const config = {
|
|
45
|
-
region: source.authentication['region'],
|
|
46
|
-
credentials: {
|
|
47
|
-
accessKeyId: SecretManager_1.default.replaceSecret(source.authentication['accessKey']),
|
|
48
|
-
secretAccessKey: SecretManager_1.default.replaceSecret(source.authentication['secretKey']),
|
|
49
|
-
sessionToken: sessionToken ? sessionToken : undefined
|
|
50
|
-
}
|
|
51
|
-
};
|
|
52
|
-
this._client = new client_s3_1.S3Client(config);
|
|
53
|
-
// TODO: is there a way to test if the connection was successful? like a query or scan that I can do?
|
|
54
|
-
return this;
|
|
55
|
-
});
|
|
56
|
-
this.readAll = (request) => __awaiter(this, void 0, void 0, function* () {
|
|
57
|
-
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "connect()" first');
|
|
58
|
-
(0, Affirm_1.default)(request, `Invalid download request`);
|
|
59
|
-
(0, Affirm_1.default)(request.fileKey, `Invalid file key for download request`);
|
|
60
|
-
const { fileKey } = request;
|
|
61
|
-
if (fileKey.includes('%')) {
|
|
62
|
-
const allFileKeys = yield this.listFiles(fileKey);
|
|
63
|
-
(0, Affirm_1.default)(allFileKeys.length < 50, `Pattern ${fileKey} of producer requested to S3 matches more than 50 files (${allFileKeys.length}), this is more than the S3 allowed limit. Please refine your pattern, remove some files or use a separate bucket.`);
|
|
64
|
-
const promises = allFileKeys.map((x, i) => this._get(Object.assign(Object.assign({}, request), { fileKey: x }), i));
|
|
65
|
-
const results = yield Promise.all(promises);
|
|
66
|
-
return results.flat();
|
|
67
|
-
}
|
|
68
|
-
else {
|
|
69
|
-
return yield this._get(request);
|
|
70
|
-
}
|
|
71
|
-
});
|
|
72
|
-
this.readLinesInRange = (request) => __awaiter(this, void 0, void 0, function* () {
|
|
73
|
-
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "connect()" first');
|
|
74
|
-
(0, Affirm_1.default)(request, 'Invalid read request');
|
|
75
|
-
(0, Affirm_1.default)(request.options, 'Invalid read request options');
|
|
76
|
-
const { fileKey } = request;
|
|
77
|
-
if (fileKey.includes('%')) {
|
|
78
|
-
const allFileKeys = yield this.listFiles(fileKey);
|
|
79
|
-
const promises = allFileKeys.map((x, i) => this._get(Object.assign(Object.assign({}, request), { fileKey: x }), i));
|
|
80
|
-
const results = yield Promise.all(promises);
|
|
81
|
-
return results.flat();
|
|
82
|
-
}
|
|
83
|
-
else {
|
|
84
|
-
return yield this._get(request);
|
|
85
|
-
}
|
|
86
|
-
});
|
|
87
|
-
this.download = (dataset) => __awaiter(this, void 0, void 0, function* () {
|
|
88
|
-
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "connect()" first');
|
|
89
|
-
(0, Affirm_1.default)(dataset, 'Invalid dataset');
|
|
90
|
-
const file = dataset.getFile();
|
|
91
|
-
(0, Affirm_1.default)(file, 'Invalid dataset file');
|
|
92
|
-
(0, Affirm_1.default)(file.fileKey, 'Invalid file key');
|
|
93
|
-
(0, Affirm_1.default)(file.fileType, `Invalid file type`);
|
|
94
|
-
const includeSourceFilename = file.includeSourceFilename === true;
|
|
95
|
-
const downloadLocally = (fileUrl_1, headerLine_1, ...args_1) => __awaiter(this, [fileUrl_1, headerLine_1, ...args_1], void 0, function* (fileUrl, headerLine, appendMode = false, sourceFilename) {
|
|
96
|
-
// Download and validate header in a single stream pass
|
|
97
|
-
const command = new client_s3_1.GetObjectCommand({
|
|
98
|
-
Bucket: this._bucketName,
|
|
99
|
-
Key: fileUrl
|
|
100
|
-
});
|
|
101
|
-
const response = yield this._client.send(command);
|
|
102
|
-
(0, Affirm_1.default)(response.Body, 'Failed to fetch object from S3');
|
|
103
|
-
let stream;
|
|
104
|
-
switch (file.fileType) {
|
|
105
|
-
case 'XLS':
|
|
106
|
-
case 'XLSX':
|
|
107
|
-
stream = yield XLSParser_1.default.parseXLSStream(response.Body, file.sheetName);
|
|
108
|
-
break;
|
|
109
|
-
default:
|
|
110
|
-
stream = response.Body;
|
|
111
|
-
break;
|
|
112
|
-
}
|
|
113
|
-
return DriverHelper_1.default.appendToUnifiedFile({
|
|
114
|
-
stream,
|
|
115
|
-
fileKey: fileUrl,
|
|
116
|
-
destinationPath: dataset.getPath(),
|
|
117
|
-
append: appendMode,
|
|
118
|
-
headerLine,
|
|
119
|
-
fileType: file.fileType,
|
|
120
|
-
hasHeaderRow: file.hasHeaderRow,
|
|
121
|
-
delimiter: dataset.getDelimiter(),
|
|
122
|
-
sourceFilename
|
|
123
|
-
});
|
|
124
|
-
});
|
|
125
|
-
const { fileKey } = file;
|
|
126
|
-
const setFirstLineFromStream = (stream) => __awaiter(this, void 0, void 0, function* () {
|
|
127
|
-
var _a, e_1, _b, _c;
|
|
128
|
-
const rl = readline_1.default.createInterface({ input: stream, crlfDelay: Infinity });
|
|
129
|
-
let firstLine = '';
|
|
130
|
-
switch (file.fileType) {
|
|
131
|
-
case 'XLSX':
|
|
132
|
-
case 'XLS':
|
|
133
|
-
firstLine = yield XLSParser_1.default.getHeaderXlsFromStream(stream, file.sheetName);
|
|
134
|
-
break;
|
|
135
|
-
case 'CSV':
|
|
136
|
-
case 'JSON':
|
|
137
|
-
case 'JSONL':
|
|
138
|
-
case 'TXT':
|
|
139
|
-
try {
|
|
140
|
-
for (var _d = true, rl_1 = __asyncValues(rl), rl_1_1; rl_1_1 = yield rl_1.next(), _a = rl_1_1.done, !_a; _d = true) {
|
|
141
|
-
_c = rl_1_1.value;
|
|
142
|
-
_d = false;
|
|
143
|
-
const line = _c;
|
|
144
|
-
firstLine = line;
|
|
145
|
-
break;
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
149
|
-
finally {
|
|
150
|
-
try {
|
|
151
|
-
if (!_d && !_a && (_b = rl_1.return)) yield _b.call(rl_1);
|
|
152
|
-
}
|
|
153
|
-
finally { if (e_1) throw e_1.error; }
|
|
154
|
-
}
|
|
155
|
-
rl.close();
|
|
156
|
-
break;
|
|
157
|
-
}
|
|
158
|
-
// If including source filename, append a placeholder column name to the header
|
|
159
|
-
if (file.includeSourceFilename) {
|
|
160
|
-
firstLine = firstLine + dataset.getDelimiter() + Constants_1.default.SOURCE_FILENAME_COLUMN;
|
|
161
|
-
}
|
|
162
|
-
dataset.setFirstLine(firstLine);
|
|
163
|
-
return firstLine;
|
|
164
|
-
});
|
|
165
|
-
if (fileKey.includes('%')) {
|
|
166
|
-
const allFileKeys = yield this.listFiles(fileKey);
|
|
167
|
-
Logger_1.default.log(`Matched ${allFileKeys.length} files, copying locally and creating unified dataset.`);
|
|
168
|
-
Affirm_1.default.hasItems(allFileKeys, `The file key "${fileKey}" doesn't have any matches in bucket "${this._bucketName}".`);
|
|
169
|
-
// Get header line from the first file
|
|
170
|
-
const firstFileCommand = new client_s3_1.GetObjectCommand({
|
|
171
|
-
Bucket: this._bucketName,
|
|
172
|
-
Key: allFileKeys[0]
|
|
173
|
-
});
|
|
174
|
-
const firstFileResponse = yield this._client.send(firstFileCommand);
|
|
175
|
-
(0, Affirm_1.default)(firstFileResponse.Body, 'Failed to fetch first file from S3');
|
|
176
|
-
const firstFileStream = firstFileResponse.Body;
|
|
177
|
-
const headerLine = yield setFirstLineFromStream(firstFileStream);
|
|
178
|
-
let totalLineCount = 0;
|
|
179
|
-
// Download files sequentially to avoid file conflicts
|
|
180
|
-
for (let i = 0; i < allFileKeys.length; i++) {
|
|
181
|
-
const currentFileKey = allFileKeys[i];
|
|
182
|
-
// Pass the filename (just the basename) if includeSourceFilename is enabled
|
|
183
|
-
const sourceFilename = includeSourceFilename ? path_1.default.basename(currentFileKey) : undefined;
|
|
184
|
-
totalLineCount += yield downloadLocally(currentFileKey, headerLine, i > 0, sourceFilename); // Append mode for subsequent files
|
|
185
|
-
}
|
|
186
|
-
dataset.setCount(totalLineCount);
|
|
187
|
-
return dataset;
|
|
188
|
-
}
|
|
189
|
-
else {
|
|
190
|
-
// Get header line from the single file
|
|
191
|
-
const firstFileCommand = new client_s3_1.GetObjectCommand({
|
|
192
|
-
Bucket: this._bucketName,
|
|
193
|
-
Key: fileKey
|
|
194
|
-
});
|
|
195
|
-
const firstFileResponse = yield this._client.send(firstFileCommand);
|
|
196
|
-
(0, Affirm_1.default)(firstFileResponse.Body, 'Failed to fetch first file from S3');
|
|
197
|
-
const firstFileStream = firstFileResponse.Body;
|
|
198
|
-
const headerLine = yield setFirstLineFromStream(firstFileStream);
|
|
199
|
-
// Pass the filename if includeSourceFilename is enabled
|
|
200
|
-
const sourceFilename = includeSourceFilename ? path_1.default.basename(fileKey) : undefined;
|
|
201
|
-
const totalLineCount = yield downloadLocally(fileKey, headerLine, false, sourceFilename);
|
|
202
|
-
dataset.setCount(totalLineCount);
|
|
203
|
-
return dataset;
|
|
204
|
-
}
|
|
205
|
-
});
|
|
206
|
-
this.exist = (producer) => __awaiter(this, void 0, void 0, function* () {
|
|
207
|
-
var _a;
|
|
208
|
-
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "connect()" first');
|
|
209
|
-
(0, Affirm_1.default)(producer, 'Invalid read producer');
|
|
210
|
-
const bucket = this._bucketName;
|
|
211
|
-
const fileKey = producer.settings.fileKey;
|
|
212
|
-
(0, Affirm_1.default)(fileKey, `Invalid file key for download request`);
|
|
213
|
-
if (fileKey.includes('%')) {
|
|
214
|
-
const allFileKeys = yield this.listFiles(fileKey);
|
|
215
|
-
return allFileKeys.length > 0;
|
|
216
|
-
}
|
|
217
|
-
else {
|
|
218
|
-
try {
|
|
219
|
-
yield this._client.send(new client_s3_1.HeadObjectCommand({ Bucket: bucket, Key: fileKey }));
|
|
220
|
-
return true;
|
|
221
|
-
}
|
|
222
|
-
catch (error) {
|
|
223
|
-
if (((_a = error.$metadata) === null || _a === void 0 ? void 0 : _a.httpStatusCode) === 404 || error.name === 'NotFound')
|
|
224
|
-
return false;
|
|
225
|
-
throw error;
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
});
|
|
229
|
-
this._readLines = (stream, lineFrom, lineTo) => __awaiter(this, void 0, void 0, function* () {
|
|
230
|
-
var _a, e_2, _b, _c;
|
|
231
|
-
const reader = readline_1.default.createInterface({ input: stream, crlfDelay: Infinity });
|
|
232
|
-
const lines = [];
|
|
233
|
-
let lineCounter = 0;
|
|
234
|
-
try {
|
|
235
|
-
for (var _d = true, reader_1 = __asyncValues(reader), reader_1_1; reader_1_1 = yield reader_1.next(), _a = reader_1_1.done, !_a; _d = true) {
|
|
236
|
-
_c = reader_1_1.value;
|
|
237
|
-
_d = false;
|
|
238
|
-
const line = _c;
|
|
239
|
-
if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo)) {
|
|
240
|
-
if (lineCounter >= lineFrom && lineCounter < lineTo) {
|
|
241
|
-
if (line && line.length > 0)
|
|
242
|
-
lines.push(line);
|
|
243
|
-
}
|
|
244
|
-
lineCounter++;
|
|
245
|
-
if (lineCounter >= lineTo)
|
|
246
|
-
break;
|
|
247
|
-
}
|
|
248
|
-
else {
|
|
249
|
-
if (line && line.length > 0)
|
|
250
|
-
lines.push(line);
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
}
|
|
254
|
-
catch (e_2_1) { e_2 = { error: e_2_1 }; }
|
|
255
|
-
finally {
|
|
256
|
-
try {
|
|
257
|
-
if (!_d && !_a && (_b = reader_1.return)) yield _b.call(reader_1);
|
|
258
|
-
}
|
|
259
|
-
finally { if (e_2) throw e_2.error; }
|
|
260
|
-
}
|
|
261
|
-
reader.close();
|
|
262
|
-
return lines;
|
|
263
|
-
});
|
|
264
|
-
this._readExcelLines = (stream, sheetName, lineFrom, lineTo) => __awaiter(this, void 0, void 0, function* () {
|
|
265
|
-
var _a, stream_1, stream_1_1;
|
|
266
|
-
var _b, e_3, _c, _d;
|
|
267
|
-
(0, Affirm_1.default)(sheetName, `Invalid sheetname`);
|
|
268
|
-
const chunks = [];
|
|
269
|
-
try {
|
|
270
|
-
for (_a = true, stream_1 = __asyncValues(stream); stream_1_1 = yield stream_1.next(), _b = stream_1_1.done, !_b; _a = true) {
|
|
271
|
-
_d = stream_1_1.value;
|
|
272
|
-
_a = false;
|
|
273
|
-
const chunk = _d;
|
|
274
|
-
chunks.push(chunk);
|
|
275
|
-
}
|
|
276
|
-
}
|
|
277
|
-
catch (e_3_1) { e_3 = { error: e_3_1 }; }
|
|
278
|
-
finally {
|
|
279
|
-
try {
|
|
280
|
-
if (!_a && !_b && (_c = stream_1.return)) yield _c.call(stream_1);
|
|
281
|
-
}
|
|
282
|
-
finally { if (e_3) throw e_3.error; }
|
|
283
|
-
}
|
|
284
|
-
const buffer = Buffer.concat(chunks);
|
|
285
|
-
const excel = xlsx_1.default.read(buffer, { type: 'buffer' });
|
|
286
|
-
(0, Affirm_1.default)(excel.SheetNames.includes(sheetName), `The sheet "${sheetName}" doesn't exist in the excel (available: ${excel.SheetNames.join(', ')})`);
|
|
287
|
-
const sheet = excel.Sheets[sheetName];
|
|
288
|
-
const csv = xlsx_1.default.utils.sheet_to_csv(sheet);
|
|
289
|
-
const lines = csv.split('\n');
|
|
290
|
-
if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo))
|
|
291
|
-
return lines.slice(lineFrom, lineTo + 1);
|
|
292
|
-
else
|
|
293
|
-
return lines;
|
|
294
|
-
});
|
|
295
|
-
this._readXmlLines = (stream, lineFrom, lineTo) => __awaiter(this, void 0, void 0, function* () {
|
|
296
|
-
var _a, stream_2, stream_2_1;
|
|
297
|
-
var _b, e_4, _c, _d;
|
|
298
|
-
const chunks = [];
|
|
299
|
-
try {
|
|
300
|
-
for (_a = true, stream_2 = __asyncValues(stream); stream_2_1 = yield stream_2.next(), _b = stream_2_1.done, !_b; _a = true) {
|
|
301
|
-
_d = stream_2_1.value;
|
|
302
|
-
_a = false;
|
|
303
|
-
const chunk = _d;
|
|
304
|
-
chunks.push(chunk);
|
|
305
|
-
}
|
|
306
|
-
}
|
|
307
|
-
catch (e_4_1) { e_4 = { error: e_4_1 }; }
|
|
308
|
-
finally {
|
|
309
|
-
try {
|
|
310
|
-
if (!_a && !_b && (_c = stream_2.return)) yield _c.call(stream_2);
|
|
311
|
-
}
|
|
312
|
-
finally { if (e_4) throw e_4.error; }
|
|
313
|
-
}
|
|
314
|
-
const buffer = Buffer.concat(chunks);
|
|
315
|
-
const jsonData = XMLParser_1.default.xmlToJson(buffer);
|
|
316
|
-
// Convert JSON data to string lines. This might need adjustment based on XML structure.
|
|
317
|
-
let lines = Array.isArray(jsonData) ? jsonData.map(item => JSON.stringify(item)) : [JSON.stringify(jsonData)];
|
|
318
|
-
if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo)) {
|
|
319
|
-
lines = lines.slice(lineFrom, lineTo + 1);
|
|
320
|
-
}
|
|
321
|
-
return lines;
|
|
322
|
-
});
|
|
323
|
-
this._get = (request, index) => __awaiter(this, void 0, void 0, function* () {
|
|
324
|
-
const { fileKey, fileType, options } = request;
|
|
325
|
-
const bucket = this._bucketName;
|
|
326
|
-
let lineFrom, lineTo, sheetName, hasHeaderRow;
|
|
327
|
-
if (options) {
|
|
328
|
-
lineFrom = options.lineFrom;
|
|
329
|
-
lineTo = options.lineTo;
|
|
330
|
-
sheetName = options.sheetName;
|
|
331
|
-
hasHeaderRow = options.hasHeaderRow;
|
|
332
|
-
}
|
|
333
|
-
const response = yield this._client.send(new client_s3_1.GetObjectCommand({
|
|
334
|
-
Bucket: bucket,
|
|
335
|
-
Key: fileKey
|
|
336
|
-
}));
|
|
337
|
-
(0, Affirm_1.default)(response.Body, 'Failed to fetch object from S3');
|
|
338
|
-
const stream = response.Body;
|
|
339
|
-
let lines = [];
|
|
340
|
-
switch (fileType) {
|
|
341
|
-
case 'CSV':
|
|
342
|
-
case 'JSON':
|
|
343
|
-
case 'JSONL':
|
|
344
|
-
case 'TXT':
|
|
345
|
-
if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo))
|
|
346
|
-
lines = yield this._readLines(stream, lineFrom, lineTo);
|
|
347
|
-
else
|
|
348
|
-
lines = yield this._readLines(stream);
|
|
349
|
-
break;
|
|
350
|
-
case 'XLS':
|
|
351
|
-
case 'XLSX':
|
|
352
|
-
if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo))
|
|
353
|
-
lines = yield this._readExcelLines(stream, sheetName, lineFrom, lineTo);
|
|
354
|
-
else
|
|
355
|
-
lines = yield this._readExcelLines(stream, sheetName);
|
|
356
|
-
break;
|
|
357
|
-
case 'XML':
|
|
358
|
-
if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo))
|
|
359
|
-
lines = yield this._readXmlLines(stream, lineFrom, lineTo);
|
|
360
|
-
else
|
|
361
|
-
lines = yield this._readXmlLines(stream);
|
|
362
|
-
break;
|
|
363
|
-
}
|
|
364
|
-
// If this is not the first file read in a pattern match AND the file type has an header,
|
|
365
|
-
// then I need to remove the header from the resulting lines or the header will be duplicated
|
|
366
|
-
if (index > 0 && ParseHelper_1.default.shouldHaveHeader(fileType, hasHeaderRow)) {
|
|
367
|
-
lines = lines.slice(1);
|
|
368
|
-
}
|
|
369
|
-
return lines;
|
|
370
|
-
});
|
|
371
|
-
this._listFiles = (fileKeyPattern, maxKeys, continuationToken) => __awaiter(this, void 0, void 0, function* () {
|
|
372
|
-
var _a;
|
|
373
|
-
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "connect()" first');
|
|
374
|
-
// Convert SQL-like pattern to prefix and pattern parts for filtering
|
|
375
|
-
let prefix = '';
|
|
376
|
-
if (fileKeyPattern) {
|
|
377
|
-
if (fileKeyPattern.includes('%')) {
|
|
378
|
-
const parts = fileKeyPattern.split('%').filter(part => part.length > 0);
|
|
379
|
-
// If pattern starts with text before first %, use it as prefix for S3 optimization
|
|
380
|
-
if (!fileKeyPattern.startsWith('%') && parts[0]) {
|
|
381
|
-
prefix = parts[0];
|
|
382
|
-
}
|
|
383
|
-
}
|
|
384
|
-
else {
|
|
385
|
-
// No wildcard, use the entire pattern as prefix
|
|
386
|
-
prefix = fileKeyPattern;
|
|
387
|
-
}
|
|
388
|
-
}
|
|
389
|
-
const listParams = {
|
|
390
|
-
Bucket: this._bucketName,
|
|
391
|
-
Prefix: prefix || undefined,
|
|
392
|
-
MaxKeys: maxKeys || 10000,
|
|
393
|
-
ContinuationToken: continuationToken
|
|
394
|
-
};
|
|
395
|
-
try {
|
|
396
|
-
const response = yield this._client.send(new client_s3_1.ListObjectsV2Command(listParams));
|
|
397
|
-
const files = ((_a = response.Contents) === null || _a === void 0 ? void 0 : _a.map(obj => obj.Key).filter(key => key !== undefined)) || [];
|
|
398
|
-
const matchingFiles = Helper_1.default.matchPattern(fileKeyPattern, files);
|
|
399
|
-
return {
|
|
400
|
-
files: matchingFiles,
|
|
401
|
-
nextContinuationToken: response.NextContinuationToken
|
|
402
|
-
};
|
|
403
|
-
}
|
|
404
|
-
catch (error) {
|
|
405
|
-
throw new Error(`Failed to list files in bucket "${this._bucketName}": ${error.message}`);
|
|
406
|
-
}
|
|
407
|
-
});
|
|
408
|
-
this.listFiles = (fileKeyPattern, maxKeys) => __awaiter(this, void 0, void 0, function* () {
|
|
409
|
-
const allFiles = [];
|
|
410
|
-
let continuationToken = undefined;
|
|
411
|
-
do {
|
|
412
|
-
const result = yield this._listFiles(fileKeyPattern, maxKeys, continuationToken);
|
|
413
|
-
allFiles.push(...result.files);
|
|
414
|
-
continuationToken = result.nextContinuationToken;
|
|
415
|
-
// If maxKeys is specified and we've reached the limit, break
|
|
416
|
-
if (maxKeys && allFiles.length >= maxKeys) {
|
|
417
|
-
return allFiles.slice(0, maxKeys);
|
|
418
|
-
}
|
|
419
|
-
} while (continuationToken);
|
|
420
|
-
return allFiles;
|
|
421
|
-
});
|
|
422
|
-
this.downloadFile = (fileKey) => __awaiter(this, void 0, void 0, function* () {
|
|
423
|
-
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "init()" first');
|
|
424
|
-
(0, Affirm_1.default)(fileKey, 'Invalid file key');
|
|
425
|
-
const response = yield this._client.send(new client_s3_1.GetObjectCommand({
|
|
426
|
-
Bucket: this._bucketName,
|
|
427
|
-
Key: fileKey
|
|
428
|
-
}));
|
|
429
|
-
(0, Affirm_1.default)(response.Body, 'Failed to fetch object from S3');
|
|
430
|
-
const content = yield response.Body.transformToByteArray();
|
|
431
|
-
return Buffer.from(content);
|
|
432
|
-
});
|
|
433
|
-
this.deleteFile = (fileKey) => __awaiter(this, void 0, void 0, function* () {
|
|
434
|
-
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "init()" first');
|
|
435
|
-
(0, Affirm_1.default)(fileKey, 'Invalid file key');
|
|
436
|
-
yield this._client.send(new client_s3_1.DeleteObjectCommand({
|
|
437
|
-
Bucket: this._bucketName,
|
|
438
|
-
Key: fileKey
|
|
439
|
-
}));
|
|
440
|
-
});
|
|
441
|
-
this.copyFile = (sourceFileKey, destinationBucket, destinationFileKey) => __awaiter(this, void 0, void 0, function* () {
|
|
442
|
-
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "init()" first');
|
|
443
|
-
(0, Affirm_1.default)(sourceFileKey, 'Invalid source file key');
|
|
444
|
-
(0, Affirm_1.default)(destinationBucket, 'Invalid destination bucket');
|
|
445
|
-
(0, Affirm_1.default)(destinationFileKey, 'Invalid destination file key');
|
|
446
|
-
yield this._client.send(new client_s3_1.CopyObjectCommand({
|
|
447
|
-
CopySource: `${this._bucketName}/${sourceFileKey}`,
|
|
448
|
-
Bucket: destinationBucket,
|
|
449
|
-
Key: destinationFileKey
|
|
450
|
-
}));
|
|
451
|
-
});
|
|
452
|
-
this.ready = (request) => __awaiter(this, void 0, void 0, function* () {
|
|
453
|
-
(0, Affirm_1.default)(request, 'Invalid producer');
|
|
454
|
-
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "init()" first');
|
|
455
|
-
const { producer, scope } = request;
|
|
456
|
-
const { fileKey } = producer.settings;
|
|
457
|
-
(0, Affirm_1.default)(fileKey, 'Invalid file key');
|
|
458
|
-
const streamToFile = (s3Key, localPath) => __awaiter(this, void 0, void 0, function* () {
|
|
459
|
-
const command = new client_s3_1.GetObjectCommand({
|
|
460
|
-
Bucket: this._bucketName,
|
|
461
|
-
Key: s3Key
|
|
462
|
-
});
|
|
463
|
-
const response = yield this._client.send(command);
|
|
464
|
-
(0, Affirm_1.default)(response.Body, `Failed to fetch object "${s3Key}" from S3`);
|
|
465
|
-
// Ensure the directory for the file exists
|
|
466
|
-
const fileDir = path_1.default.dirname(localPath);
|
|
467
|
-
if (!fs_1.default.existsSync(fileDir)) {
|
|
468
|
-
fs_1.default.mkdirSync(fileDir, { recursive: true });
|
|
469
|
-
}
|
|
470
|
-
const writeStream = fs_1.default.createWriteStream(localPath);
|
|
471
|
-
yield (0, promises_1.pipeline)(response.Body, writeStream);
|
|
472
|
-
});
|
|
473
|
-
if (fileKey.includes('%')) {
|
|
474
|
-
const allFileKeys = yield this.listFiles(fileKey);
|
|
475
|
-
Affirm_1.default.hasItems(allFileKeys, `The file key pattern "${fileKey}" doesn't have any matches in bucket "${this._bucketName}".`);
|
|
476
|
-
// Stream each file to local temp storage sequentially to avoid overwhelming the connection
|
|
477
|
-
const allFilePaths = [];
|
|
478
|
-
for (const s3Key of allFileKeys) {
|
|
479
|
-
const localPath = ExecutorScope_1.default.getProducerPath(scope, producer, s3Key);
|
|
480
|
-
ExecutorScope_1.default.ensurePath(localPath);
|
|
481
|
-
yield streamToFile(s3Key, localPath);
|
|
482
|
-
allFilePaths.push(localPath);
|
|
483
|
-
}
|
|
484
|
-
return { files: allFilePaths.map(x => ({ fullUri: x })) };
|
|
485
|
-
}
|
|
486
|
-
else {
|
|
487
|
-
const localPath = ExecutorScope_1.default.getProducerPath(scope, producer, fileKey);
|
|
488
|
-
ExecutorScope_1.default.ensurePath(localPath);
|
|
489
|
-
yield streamToFile(fileKey, localPath);
|
|
490
|
-
return { files: [{ fullUri: localPath }] };
|
|
491
|
-
}
|
|
492
|
-
});
|
|
493
|
-
}
|
|
494
|
-
}
|
|
495
|
-
exports.default = S3SourceDriver;
|
package/engines/CryptoEngine.js
DELETED
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
const crypto_1 = __importDefault(require("crypto"));
|
|
7
|
-
const Algo_1 = __importDefault(require("../core/Algo"));
|
|
8
|
-
const RandomEngine_1 = __importDefault(require("./RandomEngine"));
|
|
9
|
-
const ProcessENVManager_1 = __importDefault(require("./ProcessENVManager"));
|
|
10
|
-
class CryptoEngineClass {
|
|
11
|
-
constructor() {
|
|
12
|
-
var _a;
|
|
13
|
-
this._salt = '';
|
|
14
|
-
this.hashQuery = (maskType, fieldReference, fieldName) => {
|
|
15
|
-
if (!Algo_1.default.hasVal(maskType))
|
|
16
|
-
return;
|
|
17
|
-
switch (maskType) {
|
|
18
|
-
case 'hash':
|
|
19
|
-
return `SHA2(${fieldReference}, 256) AS "${fieldName}"`;
|
|
20
|
-
case 'crypt':
|
|
21
|
-
throw new Error('Not implemented yet');
|
|
22
|
-
case 'mask':
|
|
23
|
-
throw new Error('Not implemented yet');
|
|
24
|
-
case 'none':
|
|
25
|
-
return `${fieldReference} AS "${fieldName}"`;
|
|
26
|
-
default:
|
|
27
|
-
throw new Error('This type is not ');
|
|
28
|
-
}
|
|
29
|
-
};
|
|
30
|
-
this.valueToHash = (value) => {
|
|
31
|
-
const textValue = JSON.stringify(value);
|
|
32
|
-
return crypto_1.default.createHash('sha256').update(textValue).digest('hex');
|
|
33
|
-
};
|
|
34
|
-
this.hashValue = (maskType, value, valueType) => {
|
|
35
|
-
if (!Algo_1.default.hasVal(value))
|
|
36
|
-
return value;
|
|
37
|
-
if (!Algo_1.default.hasVal(maskType))
|
|
38
|
-
return value;
|
|
39
|
-
const saltedValue = this._salt + value;
|
|
40
|
-
switch (maskType) {
|
|
41
|
-
case 'hash':
|
|
42
|
-
return this.valueToHash(saltedValue);
|
|
43
|
-
case 'random': {
|
|
44
|
-
switch (valueType) {
|
|
45
|
-
case 'datetime': return RandomEngine_1.default.rngDate();
|
|
46
|
-
case 'number': return RandomEngine_1.default.rng();
|
|
47
|
-
case 'string': return this.valueToHash(saltedValue);
|
|
48
|
-
default:
|
|
49
|
-
throw new Error('Not implemented yet');
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
|
-
case 'seeded-random': {
|
|
53
|
-
switch (valueType) {
|
|
54
|
-
case 'datetime': return RandomEngine_1.default.sRngDate(saltedValue);
|
|
55
|
-
case 'number': return RandomEngine_1.default.sRng(saltedValue);
|
|
56
|
-
case 'string': return this.valueToHash(saltedValue);
|
|
57
|
-
default:
|
|
58
|
-
throw new Error('Not implemented yet');
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
case 'crypt':
|
|
62
|
-
throw new Error('Not implemented yet');
|
|
63
|
-
case 'mask':
|
|
64
|
-
throw new Error('Not implemented yet');
|
|
65
|
-
case 'none':
|
|
66
|
-
return value;
|
|
67
|
-
default:
|
|
68
|
-
throw new Error(`This type doesn't exist`);
|
|
69
|
-
}
|
|
70
|
-
};
|
|
71
|
-
this._salt = (_a = ProcessENVManager_1.default.getEnvVariable('REMORA_SALT')) !== null && _a !== void 0 ? _a : '';
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
const CryptoEngine = new CryptoEngineClass();
|
|
75
|
-
exports.default = CryptoEngine;
|