@forzalabs/remora 0.0.58-nasco.3 → 0.0.60-nasco.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +4 -2
- package/drivers/LocalDriver.js +3 -2
- package/drivers/S3Driver.js +18 -15
- package/engines/dataset/DatasetManager.js +18 -5
- package/engines/file/FileExporter.js +20 -4
- package/helper/Runtime.js +20 -0
- package/index.js +9 -0
- package/package.json +1 -1
package/Constants.js
CHANGED
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
const CONSTANTS = {
|
|
4
|
-
cliVersion: '0.0.
|
|
4
|
+
cliVersion: '0.0.59-nasco',
|
|
5
5
|
lambdaVersion: 1,
|
|
6
6
|
port: 5069,
|
|
7
7
|
defaults: {
|
|
8
8
|
PRODUCER_TEMP_FOLDER: '.temp',
|
|
9
9
|
SQL_MAX_QUERY_ROWS: 10000,
|
|
10
10
|
STRING_MAX_CHARACTERS_LENGTH: 10000000,
|
|
11
|
-
MAX_ITEMS_IN_MEMORY: 200000
|
|
11
|
+
MAX_ITEMS_IN_MEMORY: 200000,
|
|
12
|
+
MIN_RUNTIME_HEAP_MB: 4000,
|
|
13
|
+
RECOMMENDED_RUNTIME_HEAP_MB: 8000
|
|
12
14
|
}
|
|
13
15
|
};
|
|
14
16
|
exports.default = CONSTANTS;
|
package/drivers/LocalDriver.js
CHANGED
|
@@ -396,8 +396,9 @@ class LocalDestinationDriver {
|
|
|
396
396
|
const filePath = path_1.default.join(folder, options.name);
|
|
397
397
|
fs.writeFileSync(filePath, '');
|
|
398
398
|
yield dataset.streamBatches((batch) => __awaiter(this, void 0, void 0, function* () {
|
|
399
|
-
const
|
|
400
|
-
|
|
399
|
+
const chunks = FileExporter_1.default.prepareBatch(batch, options);
|
|
400
|
+
for (const chunk of chunks)
|
|
401
|
+
fs.appendFileSync(filePath, chunk);
|
|
401
402
|
}));
|
|
402
403
|
return { bucket: folder, key: filePath, res: true };
|
|
403
404
|
}
|
package/drivers/S3Driver.js
CHANGED
|
@@ -129,21 +129,24 @@ class S3DestinationDriver {
|
|
|
129
129
|
const uploadId = createMultipartUploadRes.UploadId;
|
|
130
130
|
(0, Affirm_1.default)(uploadId, 'Failed to initiate multipart upload');
|
|
131
131
|
const uploadedParts = [];
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
const
|
|
135
|
-
const
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
132
|
+
let partNumber = 1;
|
|
133
|
+
yield dataset.streamBatches((batch) => __awaiter(this, void 0, void 0, function* () {
|
|
134
|
+
const chunks = FileExporter_1.default.prepareBatch(batch, options);
|
|
135
|
+
for (const chunk of chunks) {
|
|
136
|
+
const body = Buffer.from(chunk);
|
|
137
|
+
const uploadPartRes = yield this._client.send(new client_s3_1.UploadPartCommand({
|
|
138
|
+
Bucket: this._bucketName,
|
|
139
|
+
Key: name,
|
|
140
|
+
UploadId: uploadId,
|
|
141
|
+
PartNumber: partNumber,
|
|
142
|
+
Body: body
|
|
143
|
+
}));
|
|
144
|
+
uploadedParts.push({
|
|
145
|
+
PartNumber: partNumber,
|
|
146
|
+
ETag: uploadPartRes.ETag
|
|
147
|
+
});
|
|
148
|
+
partNumber++;
|
|
149
|
+
}
|
|
147
150
|
}));
|
|
148
151
|
// Complete the multipart upload
|
|
149
152
|
const completeRes = yield this._client.send(new client_s3_1.CompleteMultipartUploadCommand({
|
|
@@ -41,7 +41,7 @@ class DatasetManagerClass {
|
|
|
41
41
|
return this.buildDimensionsFromFirstLine(firstLine, dataset.getFile(), producer, discover);
|
|
42
42
|
});
|
|
43
43
|
this.buildDimensionsFromFirstLine = (firstLine_1, dsFile_1, producer_1, ...args_1) => __awaiter(this, [firstLine_1, dsFile_1, producer_1, ...args_1], void 0, function* (firstLine, dsFile, producer, discover = false) {
|
|
44
|
-
var _a, _b, _c, _d, _e;
|
|
44
|
+
var _a, _b, _c, _d, _e, _f;
|
|
45
45
|
(0, Affirm_1.default)(firstLine, `Invalid first line`);
|
|
46
46
|
(0, Affirm_1.default)(dsFile, `Invalid dataset file`);
|
|
47
47
|
(0, Affirm_1.default)(producer, `Invalid producer`);
|
|
@@ -62,29 +62,42 @@ class DatasetManagerClass {
|
|
|
62
62
|
const columns = FileCompiler_1.default.compileProducer(producer, source);
|
|
63
63
|
const firstObject = JSON.parse(firstLine);
|
|
64
64
|
const keys = Object.keys(firstObject);
|
|
65
|
+
if (discover) {
|
|
66
|
+
return {
|
|
67
|
+
delimiter: (_b = file.delimiter) !== null && _b !== void 0 ? _b : ',',
|
|
68
|
+
dimensions: keys.map((x, i) => ({ hidden: false, index: i, key: x, name: x }))
|
|
69
|
+
};
|
|
70
|
+
}
|
|
65
71
|
const dimensions = [];
|
|
66
72
|
for (const pColumn of columns) {
|
|
67
|
-
const columnKey = (
|
|
73
|
+
const columnKey = (_c = pColumn.aliasInProducer) !== null && _c !== void 0 ? _c : pColumn.nameInProducer;
|
|
68
74
|
const csvColumnIndex = keys.findIndex(x => x === columnKey);
|
|
69
75
|
(0, Affirm_1.default)(csvColumnIndex > -1, `The column "${pColumn.nameInProducer}" (with key "${columnKey}") of producer "${producer.name}" doesn't exist in the underlying dataset.`);
|
|
70
76
|
dimensions.push({ index: csvColumnIndex, key: columnKey, name: pColumn.nameInProducer, hidden: null });
|
|
71
77
|
}
|
|
72
|
-
const delimiterChar = (
|
|
78
|
+
const delimiterChar = (_d = file.delimiter) !== null && _d !== void 0 ? _d : ',';
|
|
73
79
|
return { dimensions, delimiter: delimiterChar };
|
|
74
80
|
}
|
|
75
81
|
case 'TXT': {
|
|
76
82
|
if (!file.hasHeaderRow) {
|
|
77
83
|
// If the file is a TXT and there isn't an header row, then I add a fake one that maps directly to the producer
|
|
78
|
-
const delimiterChar = (
|
|
84
|
+
const delimiterChar = (_e = file.delimiter) !== null && _e !== void 0 ? _e : ',';
|
|
79
85
|
const source = Environment_1.default.getSource(producer.source);
|
|
80
86
|
const columns = FileCompiler_1.default.compileProducer(producer, source);
|
|
87
|
+
if (discover) {
|
|
88
|
+
// Since I don't have an header, and I'm discovering, I just create placeholder dimensions based on the same number of columns of the txt
|
|
89
|
+
return {
|
|
90
|
+
delimiter: delimiterChar,
|
|
91
|
+
dimensions: firstLine.split(delimiterChar).map((x, i) => ({ hidden: false, index: i, key: `Col ${i + 1}`, name: `Col ${i + 1}` }))
|
|
92
|
+
};
|
|
93
|
+
}
|
|
81
94
|
return {
|
|
82
95
|
dimensions: columns.map((x, i) => { var _a; return ({ key: (_a = x.aliasInProducer) !== null && _a !== void 0 ? _a : x.nameInProducer, name: x.nameInProducer, index: i, hidden: null }); }),
|
|
83
96
|
delimiter: delimiterChar
|
|
84
97
|
};
|
|
85
98
|
}
|
|
86
99
|
else {
|
|
87
|
-
const delimiterChar = (
|
|
100
|
+
const delimiterChar = (_f = producer.settings.delimiter) !== null && _f !== void 0 ? _f : ',';
|
|
88
101
|
const rawDimensions = ParseManager_1.default._extractHeader(firstLine, delimiterChar, producer, discover);
|
|
89
102
|
return {
|
|
90
103
|
dimensions: rawDimensions.map(x => ({ key: x.name, name: x.saveAs, index: x.index, hidden: null })),
|
|
@@ -12,6 +12,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
12
12
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
13
|
};
|
|
14
14
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
const Constants_1 = __importDefault(require("../../Constants"));
|
|
15
16
|
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
16
17
|
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
17
18
|
const DSTE_1 = __importDefault(require("../../core/dste/DSTE"));
|
|
@@ -42,15 +43,30 @@ class FileExporterClass {
|
|
|
42
43
|
this.prepareBatch = (batch, options) => {
|
|
43
44
|
switch (options.recordProjection.format) {
|
|
44
45
|
case 'JSON': {
|
|
45
|
-
const
|
|
46
|
-
return
|
|
46
|
+
const jsonRecords = batch.map(x => x.toJSON());
|
|
47
|
+
return this._splitIntoChunks(jsonRecords, '\n');
|
|
47
48
|
}
|
|
48
49
|
case 'CSV': {
|
|
49
|
-
const
|
|
50
|
-
return
|
|
50
|
+
const csvRecords = batch.map(x => x.toCSV(options.recordProjection.delimiter));
|
|
51
|
+
return this._splitIntoChunks(csvRecords, '\n');
|
|
51
52
|
}
|
|
52
53
|
}
|
|
53
54
|
};
|
|
55
|
+
this._splitIntoChunks = (records, separator) => {
|
|
56
|
+
if (records.length === 0)
|
|
57
|
+
return [''];
|
|
58
|
+
const sampleRecord = records[0];
|
|
59
|
+
const sampleLength = sampleRecord.length + separator.length; // Include separator in calculation
|
|
60
|
+
const recordsPerChunk = Math.floor(Constants_1.default.defaults.STRING_MAX_CHARACTERS_LENGTH / sampleLength);
|
|
61
|
+
// Ensure at least 1 record per chunk
|
|
62
|
+
const chunkSize = Math.max(1, recordsPerChunk);
|
|
63
|
+
const chunks = [];
|
|
64
|
+
for (let i = 0; i < records.length; i += chunkSize) {
|
|
65
|
+
const chunk = records.slice(i, i + chunkSize);
|
|
66
|
+
chunks.push(chunk.join(separator));
|
|
67
|
+
}
|
|
68
|
+
return chunks;
|
|
69
|
+
};
|
|
54
70
|
this._composeFileName = (consumer, extension) => `${consumer.name}_${Algo_1.default.replaceAll(DSTE_1.default.now().toISOString().split('.')[0], ':', '-')}.${extension}`;
|
|
55
71
|
}
|
|
56
72
|
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const node_v8_1 = __importDefault(require("node:v8"));
|
|
7
|
+
class RuntimeClass {
|
|
8
|
+
constructor() {
|
|
9
|
+
this.getHeap = () => {
|
|
10
|
+
const { heap_size_limit, used_heap_size } = node_v8_1.default.getHeapStatistics();
|
|
11
|
+
return {
|
|
12
|
+
heapSizeMB: this._toMB(heap_size_limit),
|
|
13
|
+
usedHeapMB: this._toMB(used_heap_size)
|
|
14
|
+
};
|
|
15
|
+
};
|
|
16
|
+
this._toMB = (bytes) => Math.round(bytes / (1024 * 1024) * 100) / 100;
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
const Runtime = new RuntimeClass();
|
|
20
|
+
exports.default = Runtime;
|
package/index.js
CHANGED
|
@@ -16,14 +16,23 @@ const create_producer_1 = require("./actions/create_producer");
|
|
|
16
16
|
const create_consumer_1 = require("./actions/create_consumer");
|
|
17
17
|
const Constants_1 = __importDefault(require("./Constants"));
|
|
18
18
|
const LicenceManager_1 = __importDefault(require("./licencing/LicenceManager"));
|
|
19
|
+
const Runtime_1 = __importDefault(require("./helper/Runtime"));
|
|
19
20
|
dotenv_1.default.configDotenv();
|
|
20
21
|
const program = new commander_1.Command();
|
|
22
|
+
// Validate the remora licence
|
|
21
23
|
const remoraLicenceKey = process.env.REMORA_LICENCE_KEY;
|
|
22
24
|
const check = LicenceManager_1.default.validate(remoraLicenceKey);
|
|
23
25
|
if (!check.valid) {
|
|
24
26
|
console.error(`Invalid Remora licence key, the product is not active: remember to set "REMORA_LICENCE_KEY" environment variable.`);
|
|
25
27
|
process.exit(1);
|
|
26
28
|
}
|
|
29
|
+
// Runtime check on heap size to warn user of insufficent runtime resources
|
|
30
|
+
const { heapSizeMB } = Runtime_1.default.getHeap();
|
|
31
|
+
if (heapSizeMB < Constants_1.default.defaults.MIN_RUNTIME_HEAP_MB)
|
|
32
|
+
console.warn(`Remora is running with ${heapSizeMB}MB of runtime heap, which is below the bare minimum of ${Constants_1.default.defaults.MIN_RUNTIME_HEAP_MB}MB (Recommended: ${Constants_1.default.defaults.RECOMMENDED_RUNTIME_HEAP_MB}MB).`);
|
|
33
|
+
else if (heapSizeMB < Constants_1.default.defaults.RECOMMENDED_RUNTIME_HEAP_MB)
|
|
34
|
+
console.warn(`Remora is running with ${heapSizeMB} MB of runtime heap, which is below the recommended of ${Constants_1.default.defaults.RECOMMENDED_RUNTIME_HEAP_MB} MB.`);
|
|
35
|
+
// Initialize all commands
|
|
27
36
|
program
|
|
28
37
|
.version(Constants_1.default.cliVersion + '', '-v, --version', 'Display the version of the CLI')
|
|
29
38
|
.description('CLI tool for setting up and managing Data-Remora');
|