@forzalabs/remora 1.0.13 → 1.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +2 -1
- package/database/DatabaseEngine.js +3 -2
- package/definitions/json_schemas/consumer-schema.json +39 -23
- package/drivers/DeltaShareDriver.js +2 -2
- package/drivers/HttpApiDriver.js +3 -3
- package/drivers/RedshiftDriver.js +2 -2
- package/drivers/files/LocalDestinationDriver.js +0 -55
- package/drivers/files/LocalSourceDriver.js +3 -2
- package/drivers/s3/S3DestinationDriver.js +107 -69
- package/drivers/s3/S3SourceDriver.js +44 -4
- package/engines/Environment.js +1 -1
- package/engines/parsing/LineParser.js +19 -0
- package/engines/validation/Validator.js +7 -2
- package/executors/ConsumerExecutor.js +129 -14
- package/executors/Executor.js +19 -11
- package/executors/ExecutorOrchestrator.js +37 -29
- package/executors/ExecutorScope.js +52 -0
- package/executors/OutputExecutor.js +4 -4
- package/executors/ProducerExecutor.js +2 -2
- package/package.json +2 -2
- package/workers/ExecutorWorker.js +3 -1
- package/auth/AdminManager.js +0 -48
- package/auth/ApiKeysManager.js +0 -45
- package/auth/JWTManager.js +0 -56
- package/database/DatabaseInitializer.js +0 -80
- package/engines/file/FileExporter.js +0 -58
- package/workers/definitions.js +0 -2
package/Constants.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
const CONSTANTS = {
|
|
4
|
-
cliVersion: '1.0.
|
|
4
|
+
cliVersion: '1.0.14',
|
|
5
5
|
backendVersion: 1,
|
|
6
6
|
backendPort: 5088,
|
|
7
7
|
workerVersion: 2,
|
|
@@ -12,6 +12,7 @@ const CONSTANTS = {
|
|
|
12
12
|
*/
|
|
13
13
|
SOURCE_FILENAME_COLUMN: '$source_filename',
|
|
14
14
|
defaults: {
|
|
15
|
+
REMORA_PATH: './remora',
|
|
15
16
|
PRODUCER_TEMP_FOLDER: '.temp',
|
|
16
17
|
SQL_MAX_QUERY_ROWS: 10000,
|
|
17
18
|
STRING_MAX_CHARACTERS_LENGTH: 10000000,
|
|
@@ -20,17 +20,18 @@ class DatabaseEngineClass {
|
|
|
20
20
|
this.MAX_TRY_CONNECTION = 3;
|
|
21
21
|
this.db = () => this._db;
|
|
22
22
|
this.connect = () => __awaiter(this, void 0, void 0, function* () {
|
|
23
|
-
var _a;
|
|
24
23
|
// WARNING: this was changed during the deployment to ECS...
|
|
25
24
|
// I've reverted it, but maybe it needs to be changed or looked into...
|
|
25
|
+
var _a;
|
|
26
26
|
this._uri = ((_a = process.env.MONGO_URI) !== null && _a !== void 0 ? _a : Helper_1.default.isDev())
|
|
27
27
|
? 'mongodb://mongo:27017/remora'
|
|
28
28
|
: 'mongodb://localhost:27017/remora';
|
|
29
29
|
this._client = new mongodb_1.MongoClient(this._uri);
|
|
30
30
|
const errors = [];
|
|
31
|
+
this._client = new mongodb_1.MongoClient(this._uri);
|
|
31
32
|
for (let i = 0; i < this.MAX_TRY_CONNECTION; i++) {
|
|
32
33
|
try {
|
|
33
|
-
console.log(`Attempting to connect to mongo
|
|
34
|
+
console.log(`Attempting to connect to mongo "${this._uri}`);
|
|
34
35
|
yield this._client.connect();
|
|
35
36
|
this._db = this._client.db(Settings_1.default.db.name);
|
|
36
37
|
this._connected = true;
|
|
@@ -57,29 +57,6 @@
|
|
|
57
57
|
"additionalProperties": false
|
|
58
58
|
}
|
|
59
59
|
},
|
|
60
|
-
"custom": {
|
|
61
|
-
"type": "object",
|
|
62
|
-
"description": "Custom code to extract fields from producers, transform them, and return results",
|
|
63
|
-
"properties": {
|
|
64
|
-
"language": {
|
|
65
|
-
"type": "string",
|
|
66
|
-
"enum": [
|
|
67
|
-
"js",
|
|
68
|
-
"python"
|
|
69
|
-
],
|
|
70
|
-
"description": "The language of the custom code"
|
|
71
|
-
},
|
|
72
|
-
"code": {
|
|
73
|
-
"type": "string",
|
|
74
|
-
"description": "The custom code to execute"
|
|
75
|
-
}
|
|
76
|
-
},
|
|
77
|
-
"required": [
|
|
78
|
-
"language",
|
|
79
|
-
"code"
|
|
80
|
-
],
|
|
81
|
-
"additionalProperties": false
|
|
82
|
-
},
|
|
83
60
|
"union": {
|
|
84
61
|
"type": "boolean",
|
|
85
62
|
"description": "Merges the data from the various producers in a single dataset. They must have the same output dimensions. If true, then you can't set any joins on any producer, since all producers are merged in a single dataset."
|
|
@@ -392,6 +369,45 @@
|
|
|
392
369
|
"distinct": {
|
|
393
370
|
"type": "boolean",
|
|
394
371
|
"description": "If true, then the result set will only contain DISTINCT values"
|
|
372
|
+
},
|
|
373
|
+
"distinctOn": {
|
|
374
|
+
"type": "object",
|
|
375
|
+
"description": "Performs a distinct operation on specific key(s) and applies collision resolution rules to determine which record to keep when duplicates are found",
|
|
376
|
+
"properties": {
|
|
377
|
+
"keys": {
|
|
378
|
+
"type": "array",
|
|
379
|
+
"items": {
|
|
380
|
+
"type": "string"
|
|
381
|
+
},
|
|
382
|
+
"minItems": 1,
|
|
383
|
+
"description": "The field(s) to use for determining uniqueness. Use the 'alias' if specified. Can be a single field or multiple fields for composite keys."
|
|
384
|
+
},
|
|
385
|
+
"resolution": {
|
|
386
|
+
"type": "object",
|
|
387
|
+
"description": "Rules to determine which record to keep when duplicates are found",
|
|
388
|
+
"properties": {
|
|
389
|
+
"strategy": {
|
|
390
|
+
"type": "string",
|
|
391
|
+
"enum": ["first", "last", "min", "max"],
|
|
392
|
+
"description": "Strategy for resolving which record to keep. 'first': Keep the first record based on orderBy field. 'last': Keep the last record based on orderBy field. 'min': Keep the record with the minimum value in orderBy field. 'max': Keep the record with the maximum value in orderBy field."
|
|
393
|
+
},
|
|
394
|
+
"orderBy": {
|
|
395
|
+
"type": "string",
|
|
396
|
+
"description": "Required for 'first', 'last', 'min', 'max' strategies. The field to use for ordering/comparison when selecting the record to keep."
|
|
397
|
+
},
|
|
398
|
+
"direction": {
|
|
399
|
+
"type": "string",
|
|
400
|
+
"enum": ["asc", "desc"],
|
|
401
|
+
"default": "asc",
|
|
402
|
+
"description": "For 'first' and 'last' strategies, the sort direction."
|
|
403
|
+
}
|
|
404
|
+
},
|
|
405
|
+
"required": ["strategy"],
|
|
406
|
+
"additionalProperties": false
|
|
407
|
+
}
|
|
408
|
+
},
|
|
409
|
+
"required": ["keys", "resolution"],
|
|
410
|
+
"additionalProperties": false
|
|
395
411
|
}
|
|
396
412
|
},
|
|
397
413
|
"additionalProperties": false
|
|
@@ -177,8 +177,8 @@ class DeltaShareSourceDriver {
|
|
|
177
177
|
.map(x => JSON.parse(x));
|
|
178
178
|
return deltaLines;
|
|
179
179
|
});
|
|
180
|
-
this.ready = (
|
|
181
|
-
void
|
|
180
|
+
this.ready = (request) => __awaiter(this, void 0, void 0, function* () {
|
|
181
|
+
void request;
|
|
182
182
|
throw new Error('DeltaShareSourceDriver.ready is not supported: Delta Sharing does not support readiness checks');
|
|
183
183
|
});
|
|
184
184
|
}
|
package/drivers/HttpApiDriver.js
CHANGED
|
@@ -198,10 +198,10 @@ class HttpApiSourceDriver {
|
|
|
198
198
|
}
|
|
199
199
|
return itemsData;
|
|
200
200
|
};
|
|
201
|
-
this.ready = (
|
|
202
|
-
void
|
|
201
|
+
this.ready = (request) => __awaiter(this, void 0, void 0, function* () {
|
|
202
|
+
void request;
|
|
203
203
|
throw new Error('Not implemented yet');
|
|
204
|
-
};
|
|
204
|
+
});
|
|
205
205
|
}
|
|
206
206
|
}
|
|
207
207
|
exports.HttpApiSourceDriver = HttpApiSourceDriver;
|
|
@@ -175,8 +175,8 @@ class RedshiftDriver {
|
|
|
175
175
|
}
|
|
176
176
|
return records;
|
|
177
177
|
};
|
|
178
|
-
this.ready = (
|
|
179
|
-
void
|
|
178
|
+
this.ready = (request) => __awaiter(this, void 0, void 0, function* () {
|
|
179
|
+
void request;
|
|
180
180
|
throw new Error('Not implemented yet');
|
|
181
181
|
});
|
|
182
182
|
}
|
|
@@ -57,7 +57,6 @@ const promises_1 = __importDefault(require("fs/promises"));
|
|
|
57
57
|
const readline_1 = __importDefault(require("readline"));
|
|
58
58
|
const path_1 = __importDefault(require("path"));
|
|
59
59
|
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
60
|
-
const FileExporter_1 = __importDefault(require("../../engines/file/FileExporter"));
|
|
61
60
|
const Logger_1 = __importDefault(require("../../helper/Logger"));
|
|
62
61
|
class LocalDestinationDriver {
|
|
63
62
|
constructor() {
|
|
@@ -70,57 +69,6 @@ class LocalDestinationDriver {
|
|
|
70
69
|
this._path = source.authentication['path'];
|
|
71
70
|
return this;
|
|
72
71
|
});
|
|
73
|
-
this.uploadFile = (options) => __awaiter(this, void 0, void 0, function* () {
|
|
74
|
-
(0, Affirm_1.default)(this._path, 'Path not initialized');
|
|
75
|
-
(0, Affirm_1.default)(options, 'Invalid upload options');
|
|
76
|
-
(0, Affirm_1.default)(options.name, 'File name is required');
|
|
77
|
-
(0, Affirm_1.default)(options.content != null, 'File content is required');
|
|
78
|
-
const folder = this._path;
|
|
79
|
-
try {
|
|
80
|
-
if (!fs.existsSync(folder))
|
|
81
|
-
fs.mkdirSync(folder, { recursive: true });
|
|
82
|
-
const filePath = path_1.default.join(folder, options.name);
|
|
83
|
-
fs.writeFileSync(filePath, options.content);
|
|
84
|
-
return { bucket: folder, key: filePath, res: true };
|
|
85
|
-
}
|
|
86
|
-
catch (error) {
|
|
87
|
-
throw new Error(`Failed to upload local file "${options.name}": ${error.message}`);
|
|
88
|
-
}
|
|
89
|
-
});
|
|
90
|
-
this.uploadStream = (options) => __awaiter(this, void 0, void 0, function* () {
|
|
91
|
-
(0, Affirm_1.default)(options, `Invalid upload options`);
|
|
92
|
-
const { dataset, name, recordProjection } = options;
|
|
93
|
-
(0, Affirm_1.default)(dataset, 'No streaming dataset');
|
|
94
|
-
(0, Affirm_1.default)(name, 'No filename provided for upload stream');
|
|
95
|
-
(0, Affirm_1.default)(recordProjection, 'No recordProjection for upload stream');
|
|
96
|
-
const folder = this._path;
|
|
97
|
-
try {
|
|
98
|
-
if (!fs.existsSync(folder))
|
|
99
|
-
fs.mkdirSync(folder, { recursive: true });
|
|
100
|
-
const filePath = path_1.default.join(folder, options.name);
|
|
101
|
-
fs.writeFileSync(filePath, '');
|
|
102
|
-
yield dataset.streamBatches((batch) => __awaiter(this, void 0, void 0, function* () {
|
|
103
|
-
const chunks = FileExporter_1.default.prepareBatch(batch, options);
|
|
104
|
-
for (const chunk of chunks)
|
|
105
|
-
fs.appendFileSync(filePath, chunk);
|
|
106
|
-
}));
|
|
107
|
-
return { bucket: folder, key: filePath, res: true };
|
|
108
|
-
}
|
|
109
|
-
catch (error) {
|
|
110
|
-
// Clean up the partial file if it exists
|
|
111
|
-
const filePath = path_1.default.join(folder, options.name);
|
|
112
|
-
if (fs.existsSync(filePath)) {
|
|
113
|
-
try {
|
|
114
|
-
fs.unlinkSync(filePath);
|
|
115
|
-
}
|
|
116
|
-
catch (cleanupError) {
|
|
117
|
-
console.error(`Failed to clean up partial file after error: ${cleanupError.message}`);
|
|
118
|
-
throw cleanupError;
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
throw new Error(`Failed to complete local multipart upload for "${options.name}": ${error.message}`);
|
|
122
|
-
}
|
|
123
|
-
});
|
|
124
72
|
this.saveFile = (fileKey, content) => {
|
|
125
73
|
(0, Affirm_1.default)(this._path, 'Path not initialized');
|
|
126
74
|
(0, Affirm_1.default)(fileKey, 'Invalid file key');
|
|
@@ -150,9 +98,6 @@ class LocalDestinationDriver {
|
|
|
150
98
|
const fileContent = yield s3Driver.downloadFile(sourceFileKey);
|
|
151
99
|
yield this.saveFile(destinationFileKey, fileContent);
|
|
152
100
|
});
|
|
153
|
-
this.ready = (destinationPath) => __awaiter(this, void 0, void 0, function* () {
|
|
154
|
-
return fs.createWriteStream(destinationPath);
|
|
155
|
-
});
|
|
156
101
|
this.move = (fromPath, toName) => __awaiter(this, void 0, void 0, function* () {
|
|
157
102
|
try {
|
|
158
103
|
const toFilePath = path_1.default.join(this._path, toName);
|
|
@@ -374,8 +374,9 @@ class LocalSourceDriver {
|
|
|
374
374
|
}
|
|
375
375
|
fs.renameSync(sourceFilePath, destinationFilePath);
|
|
376
376
|
};
|
|
377
|
-
this.ready = (
|
|
378
|
-
(0, Affirm_1.default)(
|
|
377
|
+
this.ready = (request) => __awaiter(this, void 0, void 0, function* () {
|
|
378
|
+
(0, Affirm_1.default)(request, 'Invalid ready request');
|
|
379
|
+
const { producer } = request;
|
|
379
380
|
// TODO: extra logic for encoded files (xml, xls, ...) to be decoded and prepared locally as a plain CSV
|
|
380
381
|
// then return the uri to this new temporary file
|
|
381
382
|
const { fileKey } = producer.settings;
|
|
@@ -8,6 +8,26 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
8
8
|
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
9
|
});
|
|
10
10
|
};
|
|
11
|
+
var __asyncValues = (this && this.__asyncValues) || function (o) {
|
|
12
|
+
if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
|
|
13
|
+
var m = o[Symbol.asyncIterator], i;
|
|
14
|
+
return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
|
|
15
|
+
function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
|
|
16
|
+
function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
|
|
17
|
+
};
|
|
18
|
+
var __await = (this && this.__await) || function (v) { return this instanceof __await ? (this.v = v, this) : new __await(v); }
|
|
19
|
+
var __asyncGenerator = (this && this.__asyncGenerator) || function (thisArg, _arguments, generator) {
|
|
20
|
+
if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
|
|
21
|
+
var g = generator.apply(thisArg, _arguments || []), i, q = [];
|
|
22
|
+
return i = Object.create((typeof AsyncIterator === "function" ? AsyncIterator : Object).prototype), verb("next"), verb("throw"), verb("return", awaitReturn), i[Symbol.asyncIterator] = function () { return this; }, i;
|
|
23
|
+
function awaitReturn(f) { return function (v) { return Promise.resolve(v).then(f, reject); }; }
|
|
24
|
+
function verb(n, f) { if (g[n]) { i[n] = function (v) { return new Promise(function (a, b) { q.push([n, v, a, b]) > 1 || resume(n, v); }); }; if (f) i[n] = f(i[n]); } }
|
|
25
|
+
function resume(n, v) { try { step(g[n](v)); } catch (e) { settle(q[0][3], e); } }
|
|
26
|
+
function step(r) { r.value instanceof __await ? Promise.resolve(r.value.v).then(fulfill, reject) : settle(q[0][2], r); }
|
|
27
|
+
function fulfill(value) { resume("next", value); }
|
|
28
|
+
function reject(value) { resume("throw", value); }
|
|
29
|
+
function settle(f, v) { if (f(v), q.shift(), q.length) resume(q[0][0], q[0][1]); }
|
|
30
|
+
};
|
|
11
31
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
32
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
33
|
};
|
|
@@ -15,7 +35,9 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
15
35
|
const client_s3_1 = require("@aws-sdk/client-s3");
|
|
16
36
|
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
17
37
|
const SecretManager_1 = __importDefault(require("../../engines/SecretManager"));
|
|
18
|
-
const
|
|
38
|
+
const path_1 = __importDefault(require("path"));
|
|
39
|
+
const fs_1 = __importDefault(require("fs"));
|
|
40
|
+
const readline_1 = __importDefault(require("readline"));
|
|
19
41
|
class S3DestinationDriver {
|
|
20
42
|
constructor() {
|
|
21
43
|
this.init = (source) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -33,32 +55,74 @@ class S3DestinationDriver {
|
|
|
33
55
|
// TODO: is there a way to test if the connection was successful? like a query or scan that I can do?
|
|
34
56
|
return this;
|
|
35
57
|
});
|
|
36
|
-
this.
|
|
37
|
-
(0, Affirm_1.default)(
|
|
38
|
-
|
|
39
|
-
|
|
58
|
+
this.copyFromS3 = (sourceBucket, sourceFileKey, destinationFileKey) => __awaiter(this, void 0, void 0, function* () {
|
|
59
|
+
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "init()" first');
|
|
60
|
+
(0, Affirm_1.default)(sourceBucket, 'Invalid source bucket');
|
|
61
|
+
(0, Affirm_1.default)(sourceFileKey, 'Invalid source file key');
|
|
62
|
+
(0, Affirm_1.default)(destinationFileKey, 'Invalid destination file key');
|
|
63
|
+
yield this._client.send(new client_s3_1.CopyObjectCommand({
|
|
64
|
+
CopySource: `${sourceBucket}/${sourceFileKey}`,
|
|
65
|
+
Bucket: this._bucketName,
|
|
66
|
+
Key: destinationFileKey
|
|
67
|
+
}));
|
|
68
|
+
});
|
|
69
|
+
this.saveFile = (fileKey, content) => __awaiter(this, void 0, void 0, function* () {
|
|
70
|
+
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "init()" first');
|
|
71
|
+
(0, Affirm_1.default)(fileKey, 'Invalid file key');
|
|
72
|
+
(0, Affirm_1.default)(content, 'Invalid content');
|
|
73
|
+
yield this._client.send(new client_s3_1.PutObjectCommand({
|
|
40
74
|
Bucket: this._bucketName,
|
|
41
|
-
Key:
|
|
75
|
+
Key: fileKey,
|
|
42
76
|
Body: content
|
|
77
|
+
}));
|
|
78
|
+
});
|
|
79
|
+
this.move = (fromPath, toName) => __awaiter(this, void 0, void 0, function* () {
|
|
80
|
+
(0, Affirm_1.default)(fromPath, 'Invalid source path');
|
|
81
|
+
(0, Affirm_1.default)(toName, 'Invalid destination name');
|
|
82
|
+
(0, Affirm_1.default)(fs_1.default.existsSync(fromPath), `Source file does not exist: ${fromPath}`);
|
|
83
|
+
const readStream = fs_1.default.createReadStream(fromPath);
|
|
84
|
+
return this._multipartUpload(toName, readStream);
|
|
85
|
+
});
|
|
86
|
+
this.transformAndMove = (fromPath, transform, toName) => __awaiter(this, void 0, void 0, function* () {
|
|
87
|
+
(0, Affirm_1.default)(fromPath, 'Invalid source path');
|
|
88
|
+
(0, Affirm_1.default)(transform, 'Invalid transform function');
|
|
89
|
+
(0, Affirm_1.default)(toName, 'Invalid destination name');
|
|
90
|
+
(0, Affirm_1.default)(fs_1.default.existsSync(fromPath), `Source file does not exist: ${fromPath}`);
|
|
91
|
+
const reader = fs_1.default.createReadStream(fromPath);
|
|
92
|
+
const lineReader = readline_1.default.createInterface({ input: reader, crlfDelay: Infinity });
|
|
93
|
+
const transformedLines = function () {
|
|
94
|
+
return __asyncGenerator(this, arguments, function* () {
|
|
95
|
+
var _a, e_1, _b, _c;
|
|
96
|
+
try {
|
|
97
|
+
for (var _d = true, lineReader_1 = __asyncValues(lineReader), lineReader_1_1; lineReader_1_1 = yield __await(lineReader_1.next()), _a = lineReader_1_1.done, !_a; _d = true) {
|
|
98
|
+
_c = lineReader_1_1.value;
|
|
99
|
+
_d = false;
|
|
100
|
+
const line = _c;
|
|
101
|
+
yield yield __await(transform(line) + '\n');
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
105
|
+
finally {
|
|
106
|
+
try {
|
|
107
|
+
if (!_d && !_a && (_b = lineReader_1.return)) yield __await(_b.call(lineReader_1));
|
|
108
|
+
}
|
|
109
|
+
finally { if (e_1) throw e_1.error; }
|
|
110
|
+
}
|
|
111
|
+
});
|
|
43
112
|
};
|
|
44
|
-
|
|
45
|
-
const res = yield this._client.send(command);
|
|
46
|
-
(0, Affirm_1.default)(res.$metadata.httpStatusCode === 200, `Failed to upload the file "${name}" to the bucket "${this._bucketName}": status code ${res.$metadata.httpStatusCode}`);
|
|
47
|
-
return { res: true, key: name, bucket: this._bucketName };
|
|
113
|
+
return this._multipartUpload(toName, transformedLines());
|
|
48
114
|
});
|
|
49
|
-
this.
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
(0, Affirm_1.default)(name, 'No filename provided for upload stream');
|
|
54
|
-
(0, Affirm_1.default)(recordProjection, 'No recordProjection for upload stream');
|
|
115
|
+
this._multipartUpload = (toName, dataSource) => __awaiter(this, void 0, void 0, function* () {
|
|
116
|
+
var _a, dataSource_1, dataSource_1_1;
|
|
117
|
+
var _b, e_2, _c, _d;
|
|
118
|
+
let uploadId;
|
|
55
119
|
try {
|
|
56
120
|
// Create the multipart upload
|
|
57
121
|
const createMultipartUploadRes = yield this._client.send(new client_s3_1.CreateMultipartUploadCommand({
|
|
58
122
|
Bucket: this._bucketName,
|
|
59
|
-
Key:
|
|
123
|
+
Key: toName
|
|
60
124
|
}));
|
|
61
|
-
|
|
125
|
+
uploadId = createMultipartUploadRes.UploadId;
|
|
62
126
|
(0, Affirm_1.default)(uploadId, 'Failed to initiate multipart upload');
|
|
63
127
|
const uploadedParts = [];
|
|
64
128
|
let partNumber = 1;
|
|
@@ -67,7 +131,7 @@ class S3DestinationDriver {
|
|
|
67
131
|
const uploadPart = (buffer) => __awaiter(this, void 0, void 0, function* () {
|
|
68
132
|
const uploadPartRes = yield this._client.send(new client_s3_1.UploadPartCommand({
|
|
69
133
|
Bucket: this._bucketName,
|
|
70
|
-
Key:
|
|
134
|
+
Key: toName,
|
|
71
135
|
UploadId: uploadId,
|
|
72
136
|
PartNumber: partNumber,
|
|
73
137
|
Body: buffer
|
|
@@ -78,18 +142,28 @@ class S3DestinationDriver {
|
|
|
78
142
|
});
|
|
79
143
|
partNumber++;
|
|
80
144
|
});
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
145
|
+
try {
|
|
146
|
+
for (_a = true, dataSource_1 = __asyncValues(dataSource); dataSource_1_1 = yield dataSource_1.next(), _b = dataSource_1_1.done, !_b; _a = true) {
|
|
147
|
+
_d = dataSource_1_1.value;
|
|
148
|
+
_a = false;
|
|
149
|
+
const chunk = _d;
|
|
150
|
+
const chunkBuffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
|
85
151
|
accumulatedBuffer = Buffer.concat([accumulatedBuffer, chunkBuffer]);
|
|
86
152
|
// If accumulated buffer is at least 5MB, upload it as a part
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
accumulatedBuffer =
|
|
153
|
+
while (accumulatedBuffer.length >= MIN_PART_SIZE) {
|
|
154
|
+
const partBuffer = accumulatedBuffer.subarray(0, MIN_PART_SIZE);
|
|
155
|
+
accumulatedBuffer = accumulatedBuffer.subarray(MIN_PART_SIZE);
|
|
156
|
+
yield uploadPart(partBuffer);
|
|
90
157
|
}
|
|
91
158
|
}
|
|
92
|
-
}
|
|
159
|
+
}
|
|
160
|
+
catch (e_2_1) { e_2 = { error: e_2_1 }; }
|
|
161
|
+
finally {
|
|
162
|
+
try {
|
|
163
|
+
if (!_a && !_b && (_c = dataSource_1.return)) yield _c.call(dataSource_1);
|
|
164
|
+
}
|
|
165
|
+
finally { if (e_2) throw e_2.error; }
|
|
166
|
+
}
|
|
93
167
|
// Upload any remaining data as the final part (even if smaller than 5MB)
|
|
94
168
|
if (accumulatedBuffer.length > 0) {
|
|
95
169
|
yield uploadPart(accumulatedBuffer);
|
|
@@ -97,63 +171,27 @@ class S3DestinationDriver {
|
|
|
97
171
|
// Complete the multipart upload
|
|
98
172
|
const completeRes = yield this._client.send(new client_s3_1.CompleteMultipartUploadCommand({
|
|
99
173
|
Bucket: this._bucketName,
|
|
100
|
-
Key:
|
|
174
|
+
Key: toName,
|
|
101
175
|
UploadId: uploadId,
|
|
102
176
|
MultipartUpload: {
|
|
103
177
|
Parts: uploadedParts
|
|
104
178
|
}
|
|
105
179
|
}));
|
|
106
|
-
(0, Affirm_1.default)(completeRes.$metadata.httpStatusCode === 200, `Failed to complete multipart upload for "${
|
|
107
|
-
return { res: true, key:
|
|
180
|
+
(0, Affirm_1.default)(completeRes.$metadata.httpStatusCode === 200, `Failed to complete multipart upload for "${toName}": status code ${completeRes.$metadata.httpStatusCode}`);
|
|
181
|
+
return { res: true, key: path_1.default.join(this._bucketName, toName), bucket: this._bucketName };
|
|
108
182
|
}
|
|
109
183
|
catch (error) {
|
|
110
184
|
// If anything fails, make sure to abort the multipart upload
|
|
111
|
-
if (
|
|
185
|
+
if (uploadId) {
|
|
112
186
|
yield this._client.send(new client_s3_1.AbortMultipartUploadCommand({
|
|
113
187
|
Bucket: this._bucketName,
|
|
114
|
-
Key:
|
|
115
|
-
UploadId:
|
|
188
|
+
Key: toName,
|
|
189
|
+
UploadId: uploadId
|
|
116
190
|
}));
|
|
117
191
|
}
|
|
118
192
|
throw error;
|
|
119
193
|
}
|
|
120
194
|
});
|
|
121
|
-
this.copyFromS3 = (sourceBucket, sourceFileKey, destinationFileKey) => __awaiter(this, void 0, void 0, function* () {
|
|
122
|
-
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "init()" first');
|
|
123
|
-
(0, Affirm_1.default)(sourceBucket, 'Invalid source bucket');
|
|
124
|
-
(0, Affirm_1.default)(sourceFileKey, 'Invalid source file key');
|
|
125
|
-
(0, Affirm_1.default)(destinationFileKey, 'Invalid destination file key');
|
|
126
|
-
yield this._client.send(new client_s3_1.CopyObjectCommand({
|
|
127
|
-
CopySource: `${sourceBucket}/${sourceFileKey}`,
|
|
128
|
-
Bucket: this._bucketName,
|
|
129
|
-
Key: destinationFileKey
|
|
130
|
-
}));
|
|
131
|
-
});
|
|
132
|
-
this.saveFile = (fileKey, content) => __awaiter(this, void 0, void 0, function* () {
|
|
133
|
-
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "init()" first');
|
|
134
|
-
(0, Affirm_1.default)(fileKey, 'Invalid file key');
|
|
135
|
-
(0, Affirm_1.default)(content, 'Invalid content');
|
|
136
|
-
yield this._client.send(new client_s3_1.PutObjectCommand({
|
|
137
|
-
Bucket: this._bucketName,
|
|
138
|
-
Key: fileKey,
|
|
139
|
-
Body: content
|
|
140
|
-
}));
|
|
141
|
-
});
|
|
142
|
-
this.ready = (destinationPath) => {
|
|
143
|
-
void destinationPath;
|
|
144
|
-
throw new Error('Not implemented yet');
|
|
145
|
-
};
|
|
146
|
-
this.move = (fromPath, toName) => {
|
|
147
|
-
void fromPath;
|
|
148
|
-
void toName;
|
|
149
|
-
throw new Error('Not implemented yet');
|
|
150
|
-
};
|
|
151
|
-
this.transformAndMove = (fromPath, transform, toName) => {
|
|
152
|
-
void fromPath;
|
|
153
|
-
void toName;
|
|
154
|
-
void transform;
|
|
155
|
-
throw new Error('Not implemented yet');
|
|
156
|
-
};
|
|
157
195
|
}
|
|
158
196
|
}
|
|
159
197
|
exports.default = S3DestinationDriver;
|
|
@@ -22,8 +22,10 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
22
22
|
const client_s3_1 = require("@aws-sdk/client-s3");
|
|
23
23
|
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
24
24
|
const SecretManager_1 = __importDefault(require("../../engines/SecretManager"));
|
|
25
|
+
const promises_1 = require("stream/promises");
|
|
25
26
|
const readline_1 = __importDefault(require("readline"));
|
|
26
27
|
const path_1 = __importDefault(require("path"));
|
|
28
|
+
const fs_1 = __importDefault(require("fs"));
|
|
27
29
|
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
28
30
|
const xlsx_1 = __importDefault(require("xlsx"));
|
|
29
31
|
const XMLParser_1 = __importDefault(require("../../engines/parsing/XMLParser"));
|
|
@@ -33,6 +35,7 @@ const DriverHelper_1 = __importDefault(require("../DriverHelper"));
|
|
|
33
35
|
const Logger_1 = __importDefault(require("../../helper/Logger"));
|
|
34
36
|
const Constants_1 = __importDefault(require("../../Constants"));
|
|
35
37
|
const XLSParser_1 = __importDefault(require("../../engines/parsing/XLSParser"));
|
|
38
|
+
const ExecutorScope_1 = __importDefault(require("../../executors/ExecutorScope"));
|
|
36
39
|
class S3SourceDriver {
|
|
37
40
|
constructor() {
|
|
38
41
|
this.init = (source) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -446,10 +449,47 @@ class S3SourceDriver {
|
|
|
446
449
|
Key: destinationFileKey
|
|
447
450
|
}));
|
|
448
451
|
});
|
|
449
|
-
this.ready = (
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
452
|
+
this.ready = (request) => __awaiter(this, void 0, void 0, function* () {
|
|
453
|
+
(0, Affirm_1.default)(request, 'Invalid producer');
|
|
454
|
+
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "init()" first');
|
|
455
|
+
const { producer, scope } = request;
|
|
456
|
+
const { fileKey } = producer.settings;
|
|
457
|
+
(0, Affirm_1.default)(fileKey, 'Invalid file key');
|
|
458
|
+
const streamToFile = (s3Key, localPath) => __awaiter(this, void 0, void 0, function* () {
|
|
459
|
+
const command = new client_s3_1.GetObjectCommand({
|
|
460
|
+
Bucket: this._bucketName,
|
|
461
|
+
Key: s3Key
|
|
462
|
+
});
|
|
463
|
+
const response = yield this._client.send(command);
|
|
464
|
+
(0, Affirm_1.default)(response.Body, `Failed to fetch object "${s3Key}" from S3`);
|
|
465
|
+
// Ensure the directory for the file exists
|
|
466
|
+
const fileDir = path_1.default.dirname(localPath);
|
|
467
|
+
if (!fs_1.default.existsSync(fileDir)) {
|
|
468
|
+
fs_1.default.mkdirSync(fileDir, { recursive: true });
|
|
469
|
+
}
|
|
470
|
+
const writeStream = fs_1.default.createWriteStream(localPath);
|
|
471
|
+
yield (0, promises_1.pipeline)(response.Body, writeStream);
|
|
472
|
+
});
|
|
473
|
+
if (fileKey.includes('%')) {
|
|
474
|
+
const allFileKeys = yield this.listFiles(fileKey);
|
|
475
|
+
Affirm_1.default.hasItems(allFileKeys, `The file key pattern "${fileKey}" doesn't have any matches in bucket "${this._bucketName}".`);
|
|
476
|
+
// Stream each file to local temp storage sequentially to avoid overwhelming the connection
|
|
477
|
+
const allFilePaths = [];
|
|
478
|
+
for (const s3Key of allFileKeys) {
|
|
479
|
+
const localPath = ExecutorScope_1.default.getProducerPath(scope, producer, s3Key);
|
|
480
|
+
ExecutorScope_1.default.ensurePath(localPath);
|
|
481
|
+
yield streamToFile(s3Key, localPath);
|
|
482
|
+
allFilePaths.push(localPath);
|
|
483
|
+
}
|
|
484
|
+
return { files: allFilePaths.map(x => ({ fullUri: x })) };
|
|
485
|
+
}
|
|
486
|
+
else {
|
|
487
|
+
const localPath = ExecutorScope_1.default.getProducerPath(scope, producer, fileKey);
|
|
488
|
+
ExecutorScope_1.default.ensurePath(localPath);
|
|
489
|
+
yield streamToFile(fileKey, localPath);
|
|
490
|
+
return { files: [{ fullUri: localPath }] };
|
|
491
|
+
}
|
|
492
|
+
});
|
|
453
493
|
}
|
|
454
494
|
}
|
|
455
495
|
exports.default = S3SourceDriver;
|
package/engines/Environment.js
CHANGED
|
@@ -45,7 +45,7 @@ class EnvironmentClass {
|
|
|
45
45
|
}
|
|
46
46
|
}
|
|
47
47
|
catch (error) {
|
|
48
|
-
throw new Error(`Error loading configuration from ${configPath}: ${error.message}`);
|
|
48
|
+
throw new Error(`Error loading from ${path_1.default.resolve(remoraPath)} configuration from ${configPath}: ${error.message}`);
|
|
49
49
|
}
|
|
50
50
|
}
|
|
51
51
|
return configs;
|
|
@@ -46,6 +46,25 @@ class LineParserClass {
|
|
|
46
46
|
throw new Error(`File type ${fileType} not implemented yet.`);
|
|
47
47
|
}
|
|
48
48
|
};
|
|
49
|
+
/**
|
|
50
|
+
* Used ONLY to parse internal records (inside the .dataset) since I know they are already prepared in the correct way
|
|
51
|
+
*/
|
|
52
|
+
this._internalParseCSV = (line, fields, delimiter) => {
|
|
53
|
+
var _a, _b;
|
|
54
|
+
const parts = CSVParser_1.default.parseRow(line, delimiter !== null && delimiter !== void 0 ? delimiter : ',');
|
|
55
|
+
const record = {};
|
|
56
|
+
for (const [index, field] of fields.entries()) {
|
|
57
|
+
const fieldKey = field.finalKey;
|
|
58
|
+
record[fieldKey] = TypeCaster_1.default.cast(parts[index], (_b = (_a = field.dimension) === null || _a === void 0 ? void 0 : _a.type) !== null && _b !== void 0 ? _b : 'string');
|
|
59
|
+
}
|
|
60
|
+
return record;
|
|
61
|
+
};
|
|
62
|
+
/**
|
|
63
|
+
* Used ONLY to parse internal records (inside the .dataset) since I know they are already prepared in the correct way
|
|
64
|
+
*/
|
|
65
|
+
this._internalParseJSON = (line) => {
|
|
66
|
+
return JSON.parse(line);
|
|
67
|
+
};
|
|
49
68
|
}
|
|
50
69
|
}
|
|
51
70
|
const LineParser = new LineParserClass();
|
|
@@ -119,11 +119,11 @@ class ValidatorClass {
|
|
|
119
119
|
// For now we only support connecting producers of the same engine type to a consumer, so we give an error if we detect different ones
|
|
120
120
|
const uniqEngines = Algo_1.default.uniqBy(sources, 'engine');
|
|
121
121
|
if (uniqEngines.length !== 1)
|
|
122
|
-
errors.push(`Sources with different engines
|
|
122
|
+
errors.push(`Sources with different engines are used in the consumer "${consumer.name}" (${uniqEngines.join(', ')})`);
|
|
123
123
|
// For now we also only support consumers that have producers ALL having the same exact source
|
|
124
124
|
const uniqNames = Algo_1.default.uniqBy(sources, 'name');
|
|
125
125
|
if (uniqNames.length !== 1)
|
|
126
|
-
errors.push(`Producers with different sources
|
|
126
|
+
errors.push(`Producers with different sources are used in the consumer "${consumer.name}" (${uniqNames.join(', ')})`);
|
|
127
127
|
if (consumer.filters && consumer.filters.length > 0) {
|
|
128
128
|
if (consumer.filters.some(x => x.sql && x.rule))
|
|
129
129
|
errors.push(`A single consumer can't have both filters based on SQL and filters based on rules.`);
|
|
@@ -188,6 +188,11 @@ class ValidatorClass {
|
|
|
188
188
|
errors.push(`The export destination "${output.exportDestination}" was not found in the sources.`);
|
|
189
189
|
}
|
|
190
190
|
}
|
|
191
|
+
// Validate distinct
|
|
192
|
+
if (consumer.options) {
|
|
193
|
+
if (Algo_1.default.hasVal(consumer.options.distinct) && Algo_1.default.hasVal(consumer.options.distinctOn))
|
|
194
|
+
errors.push(`Can't specify a "distinct" and a "distinctOn" clause on the same consumer (${consumer.name}); use one or the other.`);
|
|
195
|
+
}
|
|
191
196
|
}
|
|
192
197
|
catch (e) {
|
|
193
198
|
if (errors.length === 0)
|