@forzalabs/remora 0.0.61-nasco.3 → 0.0.63-nasco.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +1 -1
- package/drivers/DriverHelper.js +1 -0
- package/drivers/S3Driver.js +27 -13
- package/engines/CryptoEngine.js +11 -6
- package/engines/RandomEngine.js +14 -2
- package/engines/dataset/DatasetRecord.js +4 -1
- package/engines/file/FileExporter.js +1 -1
- package/package.json +2 -2
package/Constants.js
CHANGED
package/drivers/DriverHelper.js
CHANGED
|
@@ -29,6 +29,7 @@ const DriverHelper = {
|
|
|
29
29
|
appendToUnifiedFile: (options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
30
30
|
(0, Affirm_1.default)(options, 'Invalid options');
|
|
31
31
|
const { append, destinationPath, fileKey, headerLine, stream, fileType, hasHeaderRow, delimiter } = options;
|
|
32
|
+
(0, Affirm_1.default)(headerLine, `Invalid header line`);
|
|
32
33
|
const keys = (fileType === 'JSON' || fileType === 'JSONL')
|
|
33
34
|
? Object.keys(JSON.parse(headerLine))
|
|
34
35
|
: [];
|
package/drivers/S3Driver.js
CHANGED
|
@@ -78,24 +78,38 @@ class S3DestinationDriver {
|
|
|
78
78
|
(0, Affirm_1.default)(uploadId, 'Failed to initiate multipart upload');
|
|
79
79
|
const uploadedParts = [];
|
|
80
80
|
let partNumber = 1;
|
|
81
|
+
const MIN_PART_SIZE = 5 * 1024 * 1024; // 5MB
|
|
82
|
+
let accumulatedBuffer = Buffer.alloc(0);
|
|
83
|
+
const uploadPart = (buffer) => __awaiter(this, void 0, void 0, function* () {
|
|
84
|
+
const uploadPartRes = yield this._client.send(new client_s3_1.UploadPartCommand({
|
|
85
|
+
Bucket: this._bucketName,
|
|
86
|
+
Key: name,
|
|
87
|
+
UploadId: uploadId,
|
|
88
|
+
PartNumber: partNumber,
|
|
89
|
+
Body: buffer
|
|
90
|
+
}));
|
|
91
|
+
uploadedParts.push({
|
|
92
|
+
PartNumber: partNumber,
|
|
93
|
+
ETag: uploadPartRes.ETag
|
|
94
|
+
});
|
|
95
|
+
partNumber++;
|
|
96
|
+
});
|
|
81
97
|
yield dataset.streamBatches((batch) => __awaiter(this, void 0, void 0, function* () {
|
|
82
98
|
const chunks = FileExporter_1.default.prepareBatch(batch, options);
|
|
83
99
|
for (const chunk of chunks) {
|
|
84
|
-
const
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
}));
|
|
92
|
-
uploadedParts.push({
|
|
93
|
-
PartNumber: partNumber,
|
|
94
|
-
ETag: uploadPartRes.ETag
|
|
95
|
-
});
|
|
96
|
-
partNumber++;
|
|
100
|
+
const chunkBuffer = Buffer.from(chunk);
|
|
101
|
+
accumulatedBuffer = Buffer.concat([accumulatedBuffer, chunkBuffer]);
|
|
102
|
+
// If accumulated buffer is at least 5MB, upload it as a part
|
|
103
|
+
if (accumulatedBuffer.length >= MIN_PART_SIZE) {
|
|
104
|
+
yield uploadPart(accumulatedBuffer);
|
|
105
|
+
accumulatedBuffer = Buffer.alloc(0);
|
|
106
|
+
}
|
|
97
107
|
}
|
|
98
108
|
}));
|
|
109
|
+
// Upload any remaining data as the final part (even if smaller than 5MB)
|
|
110
|
+
if (accumulatedBuffer.length > 0) {
|
|
111
|
+
yield uploadPart(accumulatedBuffer);
|
|
112
|
+
}
|
|
99
113
|
// Complete the multipart upload
|
|
100
114
|
const completeRes = yield this._client.send(new client_s3_1.CompleteMultipartUploadCommand({
|
|
101
115
|
Bucket: this._bucketName,
|
package/engines/CryptoEngine.js
CHANGED
|
@@ -8,6 +8,8 @@ const Algo_1 = __importDefault(require("../core/Algo"));
|
|
|
8
8
|
const RandomEngine_1 = __importDefault(require("./RandomEngine"));
|
|
9
9
|
class CryptoEngineClass {
|
|
10
10
|
constructor() {
|
|
11
|
+
var _a;
|
|
12
|
+
this._salt = '';
|
|
11
13
|
this.hashQuery = (maskType, fieldReference, fieldName) => {
|
|
12
14
|
if (!Algo_1.default.hasVal(maskType))
|
|
13
15
|
return;
|
|
@@ -25,30 +27,32 @@ class CryptoEngineClass {
|
|
|
25
27
|
}
|
|
26
28
|
};
|
|
27
29
|
this.valueToHash = (value) => {
|
|
28
|
-
|
|
30
|
+
const textValue = JSON.stringify(value);
|
|
31
|
+
return crypto_1.default.createHash('sha256').update(textValue).digest('hex');
|
|
29
32
|
};
|
|
30
33
|
this.hashValue = (maskType, value, valueType) => {
|
|
31
34
|
if (!Algo_1.default.hasVal(value))
|
|
32
35
|
return value;
|
|
33
36
|
if (!Algo_1.default.hasVal(maskType))
|
|
34
37
|
return value;
|
|
38
|
+
const saltedValue = this._salt + value;
|
|
35
39
|
switch (maskType) {
|
|
36
40
|
case 'hash':
|
|
37
|
-
return this.valueToHash(
|
|
41
|
+
return this.valueToHash(saltedValue);
|
|
38
42
|
case 'random': {
|
|
39
43
|
switch (valueType) {
|
|
40
44
|
case 'datetime': return RandomEngine_1.default.rngDate();
|
|
41
45
|
case 'number': return RandomEngine_1.default.rng();
|
|
42
|
-
case 'string': return this.valueToHash(
|
|
46
|
+
case 'string': return this.valueToHash(saltedValue);
|
|
43
47
|
default:
|
|
44
48
|
throw new Error('Not implemented yet');
|
|
45
49
|
}
|
|
46
50
|
}
|
|
47
51
|
case 'seeded-random': {
|
|
48
52
|
switch (valueType) {
|
|
49
|
-
case 'datetime': return RandomEngine_1.default.sRngDate(
|
|
50
|
-
case 'number': return RandomEngine_1.default.sRng(
|
|
51
|
-
case 'string': return this.valueToHash(
|
|
53
|
+
case 'datetime': return RandomEngine_1.default.sRngDate(saltedValue);
|
|
54
|
+
case 'number': return RandomEngine_1.default.sRng(saltedValue);
|
|
55
|
+
case 'string': return this.valueToHash(saltedValue);
|
|
52
56
|
default:
|
|
53
57
|
throw new Error('Not implemented yet');
|
|
54
58
|
}
|
|
@@ -63,6 +67,7 @@ class CryptoEngineClass {
|
|
|
63
67
|
throw new Error(`This type doesn't exist`);
|
|
64
68
|
}
|
|
65
69
|
};
|
|
70
|
+
this._salt = (_a = process.env.REMORA_SALT) !== null && _a !== void 0 ? _a : '';
|
|
66
71
|
}
|
|
67
72
|
}
|
|
68
73
|
const CryptoEngine = new CryptoEngineClass();
|
package/engines/RandomEngine.js
CHANGED
|
@@ -22,11 +22,23 @@ class RandomEngineClass {
|
|
|
22
22
|
return rng();
|
|
23
23
|
};
|
|
24
24
|
this.rngDate = (min, max) => {
|
|
25
|
-
|
|
25
|
+
let myMin = min;
|
|
26
|
+
let myMax = max;
|
|
27
|
+
if (!Algo_1.default.hasVal(min) && !Algo_1.default.hasVal(max)) {
|
|
28
|
+
myMin = new Date('1900-01-01').getTime();
|
|
29
|
+
myMax = new Date().getTime();
|
|
30
|
+
}
|
|
31
|
+
const randomNumber = this.rng(myMin, myMax);
|
|
26
32
|
return new Date(randomNumber).toJSON();
|
|
27
33
|
};
|
|
28
34
|
this.sRngDate = (seed, min, max) => {
|
|
29
|
-
|
|
35
|
+
let myMin = min;
|
|
36
|
+
let myMax = max;
|
|
37
|
+
if (!Algo_1.default.hasVal(min) && !Algo_1.default.hasVal(max)) {
|
|
38
|
+
myMin = new Date('1900-01-01').getTime();
|
|
39
|
+
myMax = new Date().getTime();
|
|
40
|
+
}
|
|
41
|
+
const randomNumber = this.sRng(seed, myMin, myMax);
|
|
30
42
|
return new Date(randomNumber).toJSON();
|
|
31
43
|
};
|
|
32
44
|
}
|
|
@@ -19,7 +19,10 @@ class DatasetRecord {
|
|
|
19
19
|
this.isEmpty = () => { var _a; return ((_a = this._row) === null || _a === void 0 ? void 0 : _a.trim().length) === 0; };
|
|
20
20
|
this.getRaw = () => this._row;
|
|
21
21
|
this.getValue = (dimension) => this._value[dimension];
|
|
22
|
-
this.setValue = (dimension, value) =>
|
|
22
|
+
this.setValue = (dimension, value) => {
|
|
23
|
+
this._value[dimension] = value;
|
|
24
|
+
return this;
|
|
25
|
+
};
|
|
23
26
|
/**
|
|
24
27
|
* Reinitialize the record with new data instead of creating a new instance
|
|
25
28
|
* This is used for object pooling optimization
|
|
@@ -63,7 +63,7 @@ class FileExporterClass {
|
|
|
63
63
|
const chunks = [];
|
|
64
64
|
for (let i = 0; i < records.length; i += chunkSize) {
|
|
65
65
|
const chunk = records.slice(i, i + chunkSize);
|
|
66
|
-
chunks.push(chunk.join(separator));
|
|
66
|
+
chunks.push(chunk.join(separator) + separator);
|
|
67
67
|
}
|
|
68
68
|
return chunks;
|
|
69
69
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@forzalabs/remora",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.63-nasco.3",
|
|
4
4
|
"description": "A powerful CLI tool for seamless data translation.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"private": false,
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
},
|
|
10
10
|
"scripts": {
|
|
11
11
|
"sync": "cd ../dev_ops && npm run sync",
|
|
12
|
-
"dev": "clear && npx tsx scripts/
|
|
12
|
+
"dev": "clear && npx tsx scripts/threaded.ts",
|
|
13
13
|
"tsc-check": "npx tsc --noemit",
|
|
14
14
|
"init": "npx tsx ./src/index.ts init",
|
|
15
15
|
"version": "npx tsx ./src/index.ts -v",
|