@forzalabs/remora 0.1.5-nasco.3 → 0.1.7-nasco.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +1 -1
- package/database/DatabaseEngine.js +17 -14
- package/definitions/json_schemas/consumer-schema.json +68 -3
- package/drivers/DeltaShareDriver.js +9 -5
- package/drivers/LocalDriver.js +59 -0
- package/drivers/S3Driver.js +51 -0
- package/engines/Environment.js +4 -0
- package/engines/consumer/ConsumerOnFinishManager.js +188 -0
- package/engines/dataset/Dataset.js +7 -4
- package/engines/dataset/DatasetRecord.js +1 -1
- package/engines/dataset/ParallelDataset.js +3 -0
- package/engines/execution/ExecutionEnvironment.js +11 -0
- package/engines/execution/ExecutionPlanner.js +3 -0
- package/engines/producer/ProducerEngine.js +1 -0
- package/engines/scheduler/CronScheduler.js +215 -0
- package/engines/scheduler/QueueManager.js +307 -0
- package/engines/transform/TransformationEngine.js +2 -2
- package/engines/transform/TypeCaster.js +12 -4
- package/engines/usage/UsageDataManager.js +41 -0
- package/package.json +3 -1
package/Constants.js
CHANGED
|
@@ -20,23 +20,28 @@ class DatabaseEngineClass {
|
|
|
20
20
|
this.MAX_TRY_CONNECTION = 3;
|
|
21
21
|
this.db = () => this._db;
|
|
22
22
|
this.connect = () => __awaiter(this, void 0, void 0, function* () {
|
|
23
|
+
var _a;
|
|
24
|
+
// WARNING: this was changed during the deployment to ECS... I've reverted, but maybe it needs to be changed or looked into...
|
|
25
|
+
this._uri = ((_a = process.env.MONGO_URI) !== null && _a !== void 0 ? _a : Helper_1.default.isDev())
|
|
26
|
+
? 'mongodb://mongo:27017/remora'
|
|
27
|
+
: 'mongodb://localhost:27017/remora';
|
|
28
|
+
this._client = new mongodb_1.MongoClient(this._uri);
|
|
23
29
|
const errors = [];
|
|
24
30
|
for (let i = 0; i < this.MAX_TRY_CONNECTION; i++) {
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
catch (error) {
|
|
33
|
-
errors.push(i + '° connection to MongoDB throws this error:', error);
|
|
34
|
-
}
|
|
31
|
+
try {
|
|
32
|
+
console.log(`Attempting to connect to mongo: "${this._uri}"`);
|
|
33
|
+
yield this._client.connect();
|
|
34
|
+
this._db = this._client.db(Settings_1.default.db.name);
|
|
35
|
+
this._connected = true;
|
|
36
|
+
console.log('Connected to MongoDB');
|
|
37
|
+
break;
|
|
35
38
|
}
|
|
36
|
-
|
|
37
|
-
|
|
39
|
+
catch (error) {
|
|
40
|
+
errors.push((i + 1) + ': connection to MongoDB throws this error:', error);
|
|
38
41
|
}
|
|
39
42
|
}
|
|
43
|
+
if (!this._connected)
|
|
44
|
+
console.error(`Despite ${this.MAX_TRY_CONNECTION} retries it was not possible to connect to mongoDb, these are the errors encountered:\n` + errors.join('\n'));
|
|
40
45
|
});
|
|
41
46
|
this.disconnect = () => __awaiter(this, void 0, void 0, function* () {
|
|
42
47
|
try {
|
|
@@ -121,8 +126,6 @@ class DatabaseEngineClass {
|
|
|
121
126
|
throw error;
|
|
122
127
|
}
|
|
123
128
|
});
|
|
124
|
-
const uri = Helper_1.default.isDev() ? 'mongodb://mongo:27017/remora' : 'mongodb://localhost:27017/remora';
|
|
125
|
-
this._client = new mongodb_1.MongoClient(uri);
|
|
126
129
|
}
|
|
127
130
|
}
|
|
128
131
|
const DatabaseEngine = new DatabaseEngineClass();
|
|
@@ -235,13 +235,47 @@
|
|
|
235
235
|
"type": "string",
|
|
236
236
|
"enum": [
|
|
237
237
|
"CRON",
|
|
238
|
-
"API"
|
|
238
|
+
"API",
|
|
239
|
+
"QUEUE"
|
|
239
240
|
],
|
|
240
|
-
"description": "The type of trigger schedule"
|
|
241
|
+
"description": "The type of trigger schedule. CRON: time-based scheduling. API: HTTP endpoint trigger. QUEUE: SQS queue message trigger (supports shared queues with message type filtering)."
|
|
241
242
|
},
|
|
242
243
|
"value": {
|
|
243
244
|
"type": "string",
|
|
244
|
-
"description": "The value for the trigger (e.g.,
|
|
245
|
+
"description": "The value for the trigger. For CRON: cron expression (e.g., '0 0 * * *'). For API: endpoint path. For QUEUE: SQS queue URL or queue name (will construct full URL using metadata.region and metadata.accountId if needed)."
|
|
246
|
+
},
|
|
247
|
+
"metadata": {
|
|
248
|
+
"type": "object",
|
|
249
|
+
"description": "Additional metadata for the trigger (e.g., AWS credentials, message type filter)",
|
|
250
|
+
"properties": {
|
|
251
|
+
"messageType": {
|
|
252
|
+
"type": "string",
|
|
253
|
+
"description": "Optional message type filter for QUEUE triggers. Only messages with matching 'type', 'messageType', or 'eventType' fields will be processed by this consumer. Messages without a matching type will be left in the queue for other consumers, enabling shared queue usage."
|
|
254
|
+
},
|
|
255
|
+
"region": {
|
|
256
|
+
"type": "string",
|
|
257
|
+
"description": "AWS region for the queue (for QUEUE triggers)"
|
|
258
|
+
},
|
|
259
|
+
"accountId": {
|
|
260
|
+
"type": "string",
|
|
261
|
+
"description": "AWS account ID for constructing queue URL (for QUEUE triggers)"
|
|
262
|
+
},
|
|
263
|
+
"accessKeyId": {
|
|
264
|
+
"type": "string",
|
|
265
|
+
"description": "AWS access key ID for queue authentication (for QUEUE triggers)"
|
|
266
|
+
},
|
|
267
|
+
"secretAccessKey": {
|
|
268
|
+
"type": "string",
|
|
269
|
+
"description": "AWS secret access key for queue authentication (for QUEUE triggers)"
|
|
270
|
+
},
|
|
271
|
+
"sessionToken": {
|
|
272
|
+
"type": "string",
|
|
273
|
+
"description": "AWS session token for temporary credentials (for QUEUE triggers)"
|
|
274
|
+
}
|
|
275
|
+
},
|
|
276
|
+
"additionalProperties": {
|
|
277
|
+
"type": "string"
|
|
278
|
+
}
|
|
245
279
|
}
|
|
246
280
|
},
|
|
247
281
|
"required": [
|
|
@@ -249,6 +283,20 @@
|
|
|
249
283
|
"value"
|
|
250
284
|
],
|
|
251
285
|
"additionalProperties": false
|
|
286
|
+
},
|
|
287
|
+
"onSuccess": {
|
|
288
|
+
"type": "array",
|
|
289
|
+
"description": "Actions to perform when the output operation completes successfully",
|
|
290
|
+
"items": {
|
|
291
|
+
"$ref": "#/definitions/consumerOutputOnFinish"
|
|
292
|
+
}
|
|
293
|
+
},
|
|
294
|
+
"onError": {
|
|
295
|
+
"type": "array",
|
|
296
|
+
"description": "Actions to perform when the output operation fails",
|
|
297
|
+
"items": {
|
|
298
|
+
"$ref": "#/definitions/consumerOutputOnFinish"
|
|
299
|
+
}
|
|
252
300
|
}
|
|
253
301
|
},
|
|
254
302
|
"required": [
|
|
@@ -792,6 +840,23 @@
|
|
|
792
840
|
"additionalProperties": false
|
|
793
841
|
}
|
|
794
842
|
]
|
|
843
|
+
},
|
|
844
|
+
"consumerOutputOnFinish": {
|
|
845
|
+
"type": "object",
|
|
846
|
+
"description": "Actions to perform when output operations complete",
|
|
847
|
+
"properties": {
|
|
848
|
+
"action": {
|
|
849
|
+
"type": "string",
|
|
850
|
+
"enum": ["move-file"],
|
|
851
|
+
"description": "The action to perform"
|
|
852
|
+
},
|
|
853
|
+
"moveToDestination": {
|
|
854
|
+
"type": "string",
|
|
855
|
+
"description": "If the action is 'move-file', this specifies the source destination in remora where the source file should be moved"
|
|
856
|
+
}
|
|
857
|
+
},
|
|
858
|
+
"required": ["action"],
|
|
859
|
+
"additionalProperties": false
|
|
795
860
|
}
|
|
796
861
|
},
|
|
797
862
|
"examples": [
|
|
@@ -13,6 +13,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
13
13
|
};
|
|
14
14
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
15
|
const Affirm_1 = __importDefault(require("../core/Affirm"));
|
|
16
|
+
const SecretManager_1 = __importDefault(require("../engines/SecretManager"));
|
|
16
17
|
const DriverHelper_1 = __importDefault(require("./DriverHelper"));
|
|
17
18
|
/**
|
|
18
19
|
* Delta Share (Databricks Delta Sharing) Source Driver
|
|
@@ -31,7 +32,7 @@ class DeltaShareSourceDriver {
|
|
|
31
32
|
const { authentication } = source;
|
|
32
33
|
(0, Affirm_1.default)(authentication, 'Invalid authentication for delta-share source');
|
|
33
34
|
this._shareUrl = authentication.host;
|
|
34
|
-
this._bearerToken = authentication.bearerToken || authentication.sessionToken || authentication.password;
|
|
35
|
+
this._bearerToken = SecretManager_1.default.replaceSecret(authentication.bearerToken || authentication.sessionToken || authentication.password);
|
|
35
36
|
this._share = authentication.share;
|
|
36
37
|
this._schema = authentication.schema;
|
|
37
38
|
this._table = authentication.table;
|
|
@@ -58,7 +59,8 @@ class DeltaShareSourceDriver {
|
|
|
58
59
|
(0, Affirm_1.default)(request, `Invalid download request`);
|
|
59
60
|
(0, Affirm_1.default)(!request.fileKey.includes('%'), `On a delta-share the file key can not include "%"`);
|
|
60
61
|
const deltaFiles = yield this._getAllFilesInTables(this._table);
|
|
61
|
-
const
|
|
62
|
+
const hyparquet = yield import('hyparquet');
|
|
63
|
+
const { asyncBufferFromUrl, parquetReadObjects } = hyparquet;
|
|
62
64
|
const lines = [];
|
|
63
65
|
for (const deltaFile of deltaFiles) {
|
|
64
66
|
const byteLength = (_b = (_a = deltaFile.file.deltaSingleAction.add) === null || _a === void 0 ? void 0 : _a.size) !== null && _b !== void 0 ? _b : (_c = deltaFile.file.deltaSingleAction.remove) === null || _c === void 0 ? void 0 : _c.size;
|
|
@@ -75,7 +77,8 @@ class DeltaShareSourceDriver {
|
|
|
75
77
|
(0, Affirm_1.default)(request.options.lineFrom !== undefined && request.options.lineTo !== undefined, 'Missing read range');
|
|
76
78
|
const deltaFiles = yield this._getAllFilesInTables(this._table);
|
|
77
79
|
const { options: { lineFrom, lineTo } } = request;
|
|
78
|
-
const
|
|
80
|
+
const hyparquet = yield import('hyparquet');
|
|
81
|
+
const { asyncBufferFromUrl, parquetReadObjects } = hyparquet;
|
|
79
82
|
const lines = [];
|
|
80
83
|
let index = 0;
|
|
81
84
|
for (const deltaFile of deltaFiles) {
|
|
@@ -96,7 +99,8 @@ class DeltaShareSourceDriver {
|
|
|
96
99
|
var _a, _b, _c;
|
|
97
100
|
(0, Affirm_1.default)(dataset, 'Invalid dataset');
|
|
98
101
|
const deltaFiles = yield this._getAllFilesInTables(this._table);
|
|
99
|
-
const
|
|
102
|
+
const hyparquet = yield import('hyparquet');
|
|
103
|
+
const { asyncBufferFromUrl, parquetReadObjects } = hyparquet;
|
|
100
104
|
// For each file, download it with the hyparquet package, read lines, then save locally to create the dataset
|
|
101
105
|
let index = 0;
|
|
102
106
|
let totalLineCount = 0;
|
|
@@ -142,7 +146,7 @@ class DeltaShareSourceDriver {
|
|
|
142
146
|
Authorization: `Bearer ${this._bearerToken}`
|
|
143
147
|
}
|
|
144
148
|
});
|
|
145
|
-
(0, Affirm_1.default)(res.ok, `Error fetching version from the delta share: ${res.status} ${res.statusText}`);
|
|
149
|
+
(0, Affirm_1.default)(res.ok, `Error fetching version from the delta share: ${res.status} ${res.statusText} (${yield res.text()})`);
|
|
146
150
|
const version = res.headers['delta-table-version'];
|
|
147
151
|
return version;
|
|
148
152
|
});
|
package/drivers/LocalDriver.js
CHANGED
|
@@ -320,6 +320,36 @@ class LocalSourceDriver {
|
|
|
320
320
|
throw new Error(`Failed to list files in directory "${this._path}": ${error.message}`);
|
|
321
321
|
}
|
|
322
322
|
};
|
|
323
|
+
this.readFile = (fileKey) => {
|
|
324
|
+
(0, Affirm_1.default)(this._path, 'Path not initialized');
|
|
325
|
+
(0, Affirm_1.default)(fileKey, 'Invalid file key');
|
|
326
|
+
const filePath = path_1.default.join(this._path, fileKey);
|
|
327
|
+
(0, Affirm_1.default)(fs.existsSync(filePath), `Source file does not exist: ${filePath}`);
|
|
328
|
+
return fs.readFileSync(filePath);
|
|
329
|
+
};
|
|
330
|
+
this.deleteFile = (fileKey) => {
|
|
331
|
+
(0, Affirm_1.default)(this._path, 'Path not initialized');
|
|
332
|
+
(0, Affirm_1.default)(fileKey, 'Invalid file key');
|
|
333
|
+
const filePath = path_1.default.join(this._path, fileKey);
|
|
334
|
+
if (fs.existsSync(filePath)) {
|
|
335
|
+
fs.unlinkSync(filePath);
|
|
336
|
+
}
|
|
337
|
+
};
|
|
338
|
+
this.moveFile = (sourceFileKey, destinationPath, destinationFileKey) => {
|
|
339
|
+
(0, Affirm_1.default)(this._path, 'Path not initialized');
|
|
340
|
+
(0, Affirm_1.default)(sourceFileKey, 'Invalid source file key');
|
|
341
|
+
(0, Affirm_1.default)(destinationPath, 'Invalid destination path');
|
|
342
|
+
(0, Affirm_1.default)(destinationFileKey, 'Invalid destination file key');
|
|
343
|
+
const sourceFilePath = path_1.default.join(this._path, sourceFileKey);
|
|
344
|
+
const destinationFilePath = path_1.default.join(destinationPath, destinationFileKey);
|
|
345
|
+
(0, Affirm_1.default)(fs.existsSync(sourceFilePath), `Source file does not exist: ${sourceFilePath}`);
|
|
346
|
+
// Ensure destination directory exists
|
|
347
|
+
const destinationDir = path_1.default.dirname(destinationFilePath);
|
|
348
|
+
if (!fs.existsSync(destinationDir)) {
|
|
349
|
+
fs.mkdirSync(destinationDir, { recursive: true });
|
|
350
|
+
}
|
|
351
|
+
fs.renameSync(sourceFilePath, destinationFilePath);
|
|
352
|
+
};
|
|
323
353
|
}
|
|
324
354
|
}
|
|
325
355
|
exports.LocalSourceDriver = LocalSourceDriver;
|
|
@@ -385,6 +415,35 @@ class LocalDestinationDriver {
|
|
|
385
415
|
throw new Error(`Failed to complete local multipart upload for "${options.name}": ${error.message}`);
|
|
386
416
|
}
|
|
387
417
|
});
|
|
418
|
+
this.saveFile = (fileKey, content) => {
|
|
419
|
+
(0, Affirm_1.default)(this._path, 'Path not initialized');
|
|
420
|
+
(0, Affirm_1.default)(fileKey, 'Invalid file key');
|
|
421
|
+
(0, Affirm_1.default)(content, 'Invalid content');
|
|
422
|
+
const filePath = path_1.default.join(this._path, fileKey);
|
|
423
|
+
const directory = path_1.default.dirname(filePath);
|
|
424
|
+
// Create directory if it doesn't exist
|
|
425
|
+
if (!fs.existsSync(directory)) {
|
|
426
|
+
fs.mkdirSync(directory, { recursive: true });
|
|
427
|
+
}
|
|
428
|
+
fs.writeFileSync(filePath, content);
|
|
429
|
+
return Promise.resolve();
|
|
430
|
+
};
|
|
431
|
+
this.copyFromLocal = (sourceFilePath, destinationFileKey) => {
|
|
432
|
+
(0, Affirm_1.default)(this._path, 'Path not initialized');
|
|
433
|
+
(0, Affirm_1.default)(sourceFilePath, 'Invalid source file path');
|
|
434
|
+
(0, Affirm_1.default)(destinationFileKey, 'Invalid destination file key');
|
|
435
|
+
const destinationFilePath = path_1.default.join(this._path, destinationFileKey);
|
|
436
|
+
const destinationDir = path_1.default.dirname(destinationFilePath);
|
|
437
|
+
// Ensure destination directory exists
|
|
438
|
+
if (!fs.existsSync(destinationDir)) {
|
|
439
|
+
fs.mkdirSync(destinationDir, { recursive: true });
|
|
440
|
+
}
|
|
441
|
+
fs.copyFileSync(sourceFilePath, destinationFilePath);
|
|
442
|
+
};
|
|
443
|
+
this.copyFromS3 = (s3Driver, sourceFileKey, destinationFileKey) => __awaiter(this, void 0, void 0, function* () {
|
|
444
|
+
const fileContent = yield s3Driver.downloadFile(sourceFileKey);
|
|
445
|
+
yield this.saveFile(destinationFileKey, fileContent);
|
|
446
|
+
});
|
|
388
447
|
}
|
|
389
448
|
}
|
|
390
449
|
exports.LocalDestinationDriver = LocalDestinationDriver;
|
package/drivers/S3Driver.js
CHANGED
|
@@ -134,6 +134,27 @@ class S3DestinationDriver {
|
|
|
134
134
|
throw error;
|
|
135
135
|
}
|
|
136
136
|
});
|
|
137
|
+
this.copyFromS3 = (sourceBucket, sourceFileKey, destinationFileKey) => __awaiter(this, void 0, void 0, function* () {
|
|
138
|
+
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "init()" first');
|
|
139
|
+
(0, Affirm_1.default)(sourceBucket, 'Invalid source bucket');
|
|
140
|
+
(0, Affirm_1.default)(sourceFileKey, 'Invalid source file key');
|
|
141
|
+
(0, Affirm_1.default)(destinationFileKey, 'Invalid destination file key');
|
|
142
|
+
yield this._client.send(new client_s3_1.CopyObjectCommand({
|
|
143
|
+
CopySource: `${sourceBucket}/${sourceFileKey}`,
|
|
144
|
+
Bucket: this._bucketName,
|
|
145
|
+
Key: destinationFileKey
|
|
146
|
+
}));
|
|
147
|
+
});
|
|
148
|
+
this.saveFile = (fileKey, content) => __awaiter(this, void 0, void 0, function* () {
|
|
149
|
+
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "init()" first');
|
|
150
|
+
(0, Affirm_1.default)(fileKey, 'Invalid file key');
|
|
151
|
+
(0, Affirm_1.default)(content, 'Invalid content');
|
|
152
|
+
yield this._client.send(new client_s3_1.PutObjectCommand({
|
|
153
|
+
Bucket: this._bucketName,
|
|
154
|
+
Key: fileKey,
|
|
155
|
+
Body: content
|
|
156
|
+
}));
|
|
157
|
+
});
|
|
137
158
|
}
|
|
138
159
|
}
|
|
139
160
|
exports.S3DestinationDriver = S3DestinationDriver;
|
|
@@ -491,6 +512,36 @@ class S3SourceDriver {
|
|
|
491
512
|
} while (continuationToken);
|
|
492
513
|
return allFiles;
|
|
493
514
|
});
|
|
515
|
+
this.downloadFile = (fileKey) => __awaiter(this, void 0, void 0, function* () {
|
|
516
|
+
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "init()" first');
|
|
517
|
+
(0, Affirm_1.default)(fileKey, 'Invalid file key');
|
|
518
|
+
const response = yield this._client.send(new client_s3_1.GetObjectCommand({
|
|
519
|
+
Bucket: this._bucketName,
|
|
520
|
+
Key: fileKey
|
|
521
|
+
}));
|
|
522
|
+
(0, Affirm_1.default)(response.Body, 'Failed to fetch object from S3');
|
|
523
|
+
const content = yield response.Body.transformToByteArray();
|
|
524
|
+
return Buffer.from(content);
|
|
525
|
+
});
|
|
526
|
+
this.deleteFile = (fileKey) => __awaiter(this, void 0, void 0, function* () {
|
|
527
|
+
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "init()" first');
|
|
528
|
+
(0, Affirm_1.default)(fileKey, 'Invalid file key');
|
|
529
|
+
yield this._client.send(new client_s3_1.DeleteObjectCommand({
|
|
530
|
+
Bucket: this._bucketName,
|
|
531
|
+
Key: fileKey
|
|
532
|
+
}));
|
|
533
|
+
});
|
|
534
|
+
this.copyFile = (sourceFileKey, destinationBucket, destinationFileKey) => __awaiter(this, void 0, void 0, function* () {
|
|
535
|
+
(0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "init()" first');
|
|
536
|
+
(0, Affirm_1.default)(sourceFileKey, 'Invalid source file key');
|
|
537
|
+
(0, Affirm_1.default)(destinationBucket, 'Invalid destination bucket');
|
|
538
|
+
(0, Affirm_1.default)(destinationFileKey, 'Invalid destination file key');
|
|
539
|
+
yield this._client.send(new client_s3_1.CopyObjectCommand({
|
|
540
|
+
CopySource: `${this._bucketName}/${sourceFileKey}`,
|
|
541
|
+
Bucket: destinationBucket,
|
|
542
|
+
Key: destinationFileKey
|
|
543
|
+
}));
|
|
544
|
+
});
|
|
494
545
|
}
|
|
495
546
|
}
|
|
496
547
|
exports.S3SourceDriver = S3SourceDriver;
|
package/engines/Environment.js
CHANGED
|
@@ -128,6 +128,10 @@ class EnvironmentClass {
|
|
|
128
128
|
(0, Affirm_1.default)(consumerName, `Invalid consumer name`);
|
|
129
129
|
return this._env.consumers.find(x => x.name.toLowerCase() === consumerName.toLowerCase());
|
|
130
130
|
};
|
|
131
|
+
this.getAllConsumers = () => {
|
|
132
|
+
(0, Affirm_1.default)(this._env, 'Environment not initialized');
|
|
133
|
+
return this._env.consumers || [];
|
|
134
|
+
};
|
|
131
135
|
this.getSchema = (schemaName) => {
|
|
132
136
|
(0, Affirm_1.default)(schemaName, 'Invalid schema name');
|
|
133
137
|
return this._env.schemas.find(x => x.title === schemaName);
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
16
|
+
const Environment_1 = __importDefault(require("../Environment"));
|
|
17
|
+
const DriverFactory_1 = __importDefault(require("../../drivers/DriverFactory"));
|
|
18
|
+
class ConsumerOnFinishManagerClass {
|
|
19
|
+
constructor() {
|
|
20
|
+
this.performOnSuccessActions = (consumer, output) => __awaiter(this, void 0, void 0, function* () {
|
|
21
|
+
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
22
|
+
(0, Affirm_1.default)(output, 'Invalid output');
|
|
23
|
+
if (!output.onSuccess || output.onSuccess.length === 0)
|
|
24
|
+
return;
|
|
25
|
+
for (const onSuccess of output.onSuccess) {
|
|
26
|
+
switch (onSuccess.action) {
|
|
27
|
+
case 'move-file': {
|
|
28
|
+
yield this.moveSourceFiles(consumer, onSuccess.moveToDestination);
|
|
29
|
+
break;
|
|
30
|
+
}
|
|
31
|
+
default:
|
|
32
|
+
throw new Error(`On success action "${onSuccess.action}" is not implemented yet.`);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
});
|
|
36
|
+
this.moveSourceFiles = (consumer, moveDestination) => __awaiter(this, void 0, void 0, function* () {
|
|
37
|
+
(0, Affirm_1.default)(moveDestination, 'Move destination is required for move-file action');
|
|
38
|
+
const destinationSource = Environment_1.default.getSource(moveDestination);
|
|
39
|
+
(0, Affirm_1.default)(destinationSource, `Destination source "${moveDestination}" not found`);
|
|
40
|
+
// Get all unique source files from all producers used by this consumer
|
|
41
|
+
const sourceFilesToMove = [];
|
|
42
|
+
for (const consumerProducer of consumer.producers) {
|
|
43
|
+
const producer = Environment_1.default.getProducer(consumerProducer.name);
|
|
44
|
+
(0, Affirm_1.default)(producer, `Producer "${consumerProducer.name}" not found`);
|
|
45
|
+
const source = Environment_1.default.getSource(producer.source);
|
|
46
|
+
(0, Affirm_1.default)(source, `Source "${producer.source}" not found`);
|
|
47
|
+
// Only handle file-based sources that have fileKey
|
|
48
|
+
if (producer.settings.fileKey) {
|
|
49
|
+
// Handle wildcard patterns (files with %)
|
|
50
|
+
if (producer.settings.fileKey.includes('%')) {
|
|
51
|
+
// Get all files matching the pattern
|
|
52
|
+
const matchingFiles = yield this.getMatchingFiles(source, producer.settings.fileKey);
|
|
53
|
+
for (const fileKey of matchingFiles) {
|
|
54
|
+
sourceFilesToMove.push({
|
|
55
|
+
sourceName: producer.source,
|
|
56
|
+
filePath: this.getSourceFilePath(source, fileKey),
|
|
57
|
+
fileKey
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
else {
|
|
62
|
+
// Single file
|
|
63
|
+
sourceFilesToMove.push({
|
|
64
|
+
sourceName: producer.source,
|
|
65
|
+
filePath: this.getSourceFilePath(source, producer.settings.fileKey),
|
|
66
|
+
fileKey: producer.settings.fileKey
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
// Move all files to destination
|
|
72
|
+
yield this.moveFiles(sourceFilesToMove, destinationSource);
|
|
73
|
+
});
|
|
74
|
+
this.getMatchingFiles = (source, fileKeyPattern) => __awaiter(this, void 0, void 0, function* () {
|
|
75
|
+
const sourceDriver = yield DriverFactory_1.default.instantiateSource(source);
|
|
76
|
+
if (source.engine === 'aws-s3') {
|
|
77
|
+
// S3 driver has a public listFiles method that handles patterns
|
|
78
|
+
const s3Driver = sourceDriver;
|
|
79
|
+
return yield s3Driver.listFiles(fileKeyPattern);
|
|
80
|
+
}
|
|
81
|
+
else if (source.engine === 'local') {
|
|
82
|
+
// Local driver now has a public listFiles method consistent with S3
|
|
83
|
+
const localDriver = sourceDriver;
|
|
84
|
+
return localDriver.listFiles(fileKeyPattern);
|
|
85
|
+
}
|
|
86
|
+
throw new Error(`Unsupported source engine for file listing: ${source.engine}`);
|
|
87
|
+
});
|
|
88
|
+
this.getSourceFilePath = (source, fileKey) => {
|
|
89
|
+
if (source.engine === 'local') {
|
|
90
|
+
return `${source.authentication['path']}/${fileKey}`;
|
|
91
|
+
}
|
|
92
|
+
else if (source.engine === 'aws-s3') {
|
|
93
|
+
// For S3, we return the key as the path since S3 uses keys instead of file paths
|
|
94
|
+
return fileKey;
|
|
95
|
+
}
|
|
96
|
+
throw new Error(`Unsupported source engine for file move: ${source.engine}`);
|
|
97
|
+
};
|
|
98
|
+
this.moveFiles = (files, destinationSource) => __awaiter(this, void 0, void 0, function* () {
|
|
99
|
+
for (const file of files) {
|
|
100
|
+
const sourceSource = Environment_1.default.getSource(file.sourceName);
|
|
101
|
+
if (sourceSource.engine === 'local' && destinationSource.engine === 'local') {
|
|
102
|
+
// Local to Local move
|
|
103
|
+
yield this.moveLocalToLocal(file.filePath, destinationSource, file.fileKey);
|
|
104
|
+
}
|
|
105
|
+
else if (sourceSource.engine === 'local' && destinationSource.engine === 'aws-s3') {
|
|
106
|
+
// Local to S3 move
|
|
107
|
+
yield this.moveLocalToS3(file.filePath, destinationSource, file.fileKey);
|
|
108
|
+
}
|
|
109
|
+
else if (sourceSource.engine === 'aws-s3' && destinationSource.engine === 'local') {
|
|
110
|
+
// S3 to Local move
|
|
111
|
+
yield this.moveS3ToLocal(sourceSource, file.fileKey, destinationSource);
|
|
112
|
+
}
|
|
113
|
+
else if (sourceSource.engine === 'aws-s3' && destinationSource.engine === 'aws-s3') {
|
|
114
|
+
// S3 to S3 move
|
|
115
|
+
yield this.moveS3ToS3(sourceSource, file.fileKey, destinationSource);
|
|
116
|
+
}
|
|
117
|
+
else {
|
|
118
|
+
throw new Error(`Unsupported move operation from ${sourceSource.engine} to ${destinationSource.engine}`);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
});
|
|
122
|
+
this.moveLocalToLocal = (sourceFilePath, destinationSource, fileKey) => __awaiter(this, void 0, void 0, function* () {
|
|
123
|
+
const sourceDriver = yield DriverFactory_1.default.instantiateSource(this.findSourceForPath(sourceFilePath));
|
|
124
|
+
const destinationDriver = yield DriverFactory_1.default.instantiateDestination(destinationSource);
|
|
125
|
+
// Read file from source
|
|
126
|
+
const fileContent = sourceDriver.readFile(fileKey);
|
|
127
|
+
// Save to destination
|
|
128
|
+
yield destinationDriver.saveFile(fileKey, fileContent);
|
|
129
|
+
// Delete from source
|
|
130
|
+
sourceDriver.deleteFile(fileKey);
|
|
131
|
+
});
|
|
132
|
+
this.findSourceForPath = (filePath) => {
|
|
133
|
+
// Extract directory from file path for source creation
|
|
134
|
+
const directory = filePath.substring(0, filePath.lastIndexOf('/'));
|
|
135
|
+
return {
|
|
136
|
+
name: 'temp-source',
|
|
137
|
+
engine: 'local',
|
|
138
|
+
authentication: { path: directory }
|
|
139
|
+
};
|
|
140
|
+
};
|
|
141
|
+
this.moveLocalToS3 = (sourceFilePath, destinationSource, fileKey) => __awaiter(this, void 0, void 0, function* () {
|
|
142
|
+
const sourceDriver = yield DriverFactory_1.default.instantiateSource(this.findSourceForPath(sourceFilePath));
|
|
143
|
+
const destinationDriver = yield DriverFactory_1.default.instantiateDestination(destinationSource);
|
|
144
|
+
// Read file from local source
|
|
145
|
+
const fileContent = sourceDriver.readFile(fileKey);
|
|
146
|
+
// Upload to S3 destination
|
|
147
|
+
yield destinationDriver.saveFile(fileKey, fileContent);
|
|
148
|
+
// Remove source file after successful upload
|
|
149
|
+
sourceDriver.deleteFile(fileKey);
|
|
150
|
+
});
|
|
151
|
+
this.moveS3ToLocal = (sourceSource, fileKey, destinationSource) => __awaiter(this, void 0, void 0, function* () {
|
|
152
|
+
const sourceDriver = yield DriverFactory_1.default.instantiateSource(sourceSource);
|
|
153
|
+
const destinationDriver = yield DriverFactory_1.default.instantiateDestination(destinationSource);
|
|
154
|
+
// Download from S3
|
|
155
|
+
const content = yield sourceDriver.downloadFile(fileKey);
|
|
156
|
+
// Save to local destination
|
|
157
|
+
yield destinationDriver.saveFile(fileKey, content);
|
|
158
|
+
// Delete from S3 source
|
|
159
|
+
yield sourceDriver.deleteFile(fileKey);
|
|
160
|
+
});
|
|
161
|
+
this.moveS3ToS3 = (sourceSource, fileKey, destinationSource) => __awaiter(this, void 0, void 0, function* () {
|
|
162
|
+
const sourceDriver = yield DriverFactory_1.default.instantiateSource(sourceSource);
|
|
163
|
+
const destinationDriver = yield DriverFactory_1.default.instantiateDestination(destinationSource);
|
|
164
|
+
// Copy from source S3 to destination S3
|
|
165
|
+
yield destinationDriver.copyFromS3(sourceSource.authentication['bucket'], fileKey, fileKey);
|
|
166
|
+
// Delete from source S3
|
|
167
|
+
yield sourceDriver.deleteFile(fileKey);
|
|
168
|
+
});
|
|
169
|
+
this.performOnErrorActions = (consumer, output) => __awaiter(this, void 0, void 0, function* () {
|
|
170
|
+
(0, Affirm_1.default)(consumer, 'Invalid consumer');
|
|
171
|
+
(0, Affirm_1.default)(output, 'Invalid output');
|
|
172
|
+
if (!output.onError || output.onError.length === 0)
|
|
173
|
+
return;
|
|
174
|
+
for (const onError of output.onError) {
|
|
175
|
+
switch (onError.action) {
|
|
176
|
+
case 'move-file': {
|
|
177
|
+
yield this.moveSourceFiles(consumer, onError.moveToDestination);
|
|
178
|
+
break;
|
|
179
|
+
}
|
|
180
|
+
default:
|
|
181
|
+
throw new Error(`On success action "${onError.action}" is not implemented yet.`);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
const ConsumerOnFinishManager = new ConsumerOnFinishManagerClass();
|
|
188
|
+
exports.default = ConsumerOnFinishManager;
|
|
@@ -34,6 +34,7 @@ const DriverFactory_1 = __importDefault(require("../../drivers/DriverFactory"));
|
|
|
34
34
|
const Helper_1 = __importDefault(require("../../helper/Helper"));
|
|
35
35
|
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
36
36
|
const Environment_1 = __importDefault(require("../Environment"));
|
|
37
|
+
const Logger_1 = __importDefault(require("../../helper/Logger"));
|
|
37
38
|
class Dataset {
|
|
38
39
|
constructor(name, file, batchSize, executionId) {
|
|
39
40
|
var _a;
|
|
@@ -171,7 +172,8 @@ class Dataset {
|
|
|
171
172
|
}
|
|
172
173
|
}
|
|
173
174
|
catch (error) {
|
|
174
|
-
|
|
175
|
+
Logger_1.default.log(`Error parsing line ${line}\n${lineCount}: ${error}`);
|
|
176
|
+
lineCount++;
|
|
175
177
|
}
|
|
176
178
|
}
|
|
177
179
|
}
|
|
@@ -306,7 +308,7 @@ class Dataset {
|
|
|
306
308
|
}
|
|
307
309
|
}
|
|
308
310
|
catch (error) {
|
|
309
|
-
|
|
311
|
+
Logger_1.default.log(`Error parsing line during sort: ${error}`);
|
|
310
312
|
}
|
|
311
313
|
}
|
|
312
314
|
}
|
|
@@ -537,7 +539,7 @@ class Dataset {
|
|
|
537
539
|
}
|
|
538
540
|
}
|
|
539
541
|
catch (error) {
|
|
540
|
-
|
|
542
|
+
Logger_1.default.log(`Error parsing line ${line}\n${lineCount}: ${error}`);
|
|
541
543
|
}
|
|
542
544
|
}
|
|
543
545
|
}
|
|
@@ -628,7 +630,8 @@ class Dataset {
|
|
|
628
630
|
}
|
|
629
631
|
}
|
|
630
632
|
catch (error) {
|
|
631
|
-
|
|
633
|
+
Logger_1.default.log(`Error parsing line ${line}\n${lineCount}: ${error}`);
|
|
634
|
+
lineCount++;
|
|
632
635
|
}
|
|
633
636
|
}
|
|
634
637
|
}
|
|
@@ -12,7 +12,7 @@ class DatasetRecord {
|
|
|
12
12
|
const parts = row.split(delimiter);
|
|
13
13
|
for (let i = 0; i < dimensions.length; i++) {
|
|
14
14
|
const dim = dimensions[i];
|
|
15
|
-
this._value[dim.name] = TypeCaster_1.default.cast(parts[i], dim.type);
|
|
15
|
+
this._value[dim.name] = TypeCaster_1.default.cast(parts[i], dim.type, dim.format);
|
|
16
16
|
}
|
|
17
17
|
}
|
|
18
18
|
};
|
|
@@ -43,6 +43,9 @@ class ParallelDatasetClass {
|
|
|
43
43
|
const currentDir = __dirname;
|
|
44
44
|
if (process.env.NODE_ENV === 'dev' || process.env.NODE_ENV === 'development')
|
|
45
45
|
return path_1.default.resolve('./.build/workers');
|
|
46
|
+
const forcedPath = process.env.REMORA_WORKERS_PATH;
|
|
47
|
+
if (forcedPath && forcedPath.length > 0)
|
|
48
|
+
return path_1.default.join(__dirname, forcedPath);
|
|
46
49
|
// Check if we're in a published npm package (no .build in path)
|
|
47
50
|
if (!currentDir.includes('.build')) {
|
|
48
51
|
// We're in the published package, workers are relative to package root
|
|
@@ -27,6 +27,7 @@ const Environment_1 = __importDefault(require("../Environment"));
|
|
|
27
27
|
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
28
28
|
const Logger_1 = __importDefault(require("../../helper/Logger"));
|
|
29
29
|
const ParallelDataset_1 = __importDefault(require("../dataset/ParallelDataset"));
|
|
30
|
+
const ConsumerOnFinishManager_1 = __importDefault(require("../consumer/ConsumerOnFinishManager"));
|
|
30
31
|
class ExecutionEnvironment {
|
|
31
32
|
constructor(consumer, executionId) {
|
|
32
33
|
this.run = (options) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -127,6 +128,10 @@ class ExecutionEnvironment {
|
|
|
127
128
|
this._resultingDataset = yield PostProcessor_1.default.distinct(this._resultingDataset);
|
|
128
129
|
break;
|
|
129
130
|
}
|
|
131
|
+
case 'perform-on-success-actions': {
|
|
132
|
+
yield ConsumerOnFinishManager_1.default.performOnSuccessActions(this._consumer, planStep.output);
|
|
133
|
+
break;
|
|
134
|
+
}
|
|
130
135
|
case 'save-execution-stats': {
|
|
131
136
|
(0, Affirm_1.default)(this._resultingDataset, `Invalid result dataset in save-execution-stats`);
|
|
132
137
|
result._stats = {
|
|
@@ -156,6 +161,12 @@ class ExecutionEnvironment {
|
|
|
156
161
|
if (ds)
|
|
157
162
|
Logger_1.default.log(`Failed execution of consumer at step ${currentStep.type}:\n\tSize: ${ds.getCount()}\n\tCycles: ${ds.getCycles()}\n\tOperations: ${Logger_1.default.formatList(ds.getOperations())}`);
|
|
158
163
|
Logger_1.default.log(`\tFailed step: ${currentStep.type}->\n\t${error}`);
|
|
164
|
+
try {
|
|
165
|
+
yield ConsumerOnFinishManager_1.default.performOnErrorActions(this._consumer, currentStep.output);
|
|
166
|
+
}
|
|
167
|
+
catch (error) {
|
|
168
|
+
Logger_1.default.log(`Error when trying to perform onError actions on failed consumer ${error}`);
|
|
169
|
+
}
|
|
159
170
|
// IMPORTANT: cleanup all the datasets to not leave any data around and to avoid memory leaks
|
|
160
171
|
const datasets = [
|
|
161
172
|
...this._producedData.map(x => x.dataset),
|