@forzalabs/remora 0.0.58-nasco.3 → 0.0.60-nasco.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Constants.js CHANGED
@@ -1,14 +1,16 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  const CONSTANTS = {
4
- cliVersion: '0.0.58-nasco',
4
+ cliVersion: '0.0.59-nasco',
5
5
  lambdaVersion: 1,
6
6
  port: 5069,
7
7
  defaults: {
8
8
  PRODUCER_TEMP_FOLDER: '.temp',
9
9
  SQL_MAX_QUERY_ROWS: 10000,
10
10
  STRING_MAX_CHARACTERS_LENGTH: 10000000,
11
- MAX_ITEMS_IN_MEMORY: 200000
11
+ MAX_ITEMS_IN_MEMORY: 200000,
12
+ MIN_RUNTIME_HEAP_MB: 4000,
13
+ RECOMMENDED_RUNTIME_HEAP_MB: 8000
12
14
  }
13
15
  };
14
16
  exports.default = CONSTANTS;
@@ -396,8 +396,9 @@ class LocalDestinationDriver {
396
396
  const filePath = path_1.default.join(folder, options.name);
397
397
  fs.writeFileSync(filePath, '');
398
398
  yield dataset.streamBatches((batch) => __awaiter(this, void 0, void 0, function* () {
399
- const content = FileExporter_1.default.prepareBatch(batch, options);
400
- fs.appendFileSync(filePath, content);
399
+ const chunks = FileExporter_1.default.prepareBatch(batch, options);
400
+ for (const chunk of chunks)
401
+ fs.appendFileSync(filePath, chunk);
401
402
  }));
402
403
  return { bucket: folder, key: filePath, res: true };
403
404
  }
@@ -129,21 +129,24 @@ class S3DestinationDriver {
129
129
  const uploadId = createMultipartUploadRes.UploadId;
130
130
  (0, Affirm_1.default)(uploadId, 'Failed to initiate multipart upload');
131
131
  const uploadedParts = [];
132
- yield dataset.streamBatches((batch, index) => __awaiter(this, void 0, void 0, function* () {
133
- const content = FileExporter_1.default.prepareBatch(batch, options);
134
- const partNumber = index + 1;
135
- const body = Buffer.from(content);
136
- const uploadPartRes = yield this._client.send(new client_s3_1.UploadPartCommand({
137
- Bucket: this._bucketName,
138
- Key: name,
139
- UploadId: uploadId,
140
- PartNumber: partNumber,
141
- Body: body
142
- }));
143
- uploadedParts.push({
144
- PartNumber: partNumber,
145
- ETag: uploadPartRes.ETag
146
- });
132
+ let partNumber = 1;
133
+ yield dataset.streamBatches((batch) => __awaiter(this, void 0, void 0, function* () {
134
+ const chunks = FileExporter_1.default.prepareBatch(batch, options);
135
+ for (const chunk of chunks) {
136
+ const body = Buffer.from(chunk);
137
+ const uploadPartRes = yield this._client.send(new client_s3_1.UploadPartCommand({
138
+ Bucket: this._bucketName,
139
+ Key: name,
140
+ UploadId: uploadId,
141
+ PartNumber: partNumber,
142
+ Body: body
143
+ }));
144
+ uploadedParts.push({
145
+ PartNumber: partNumber,
146
+ ETag: uploadPartRes.ETag
147
+ });
148
+ partNumber++;
149
+ }
147
150
  }));
148
151
  // Complete the multipart upload
149
152
  const completeRes = yield this._client.send(new client_s3_1.CompleteMultipartUploadCommand({
@@ -41,7 +41,7 @@ class DatasetManagerClass {
41
41
  return this.buildDimensionsFromFirstLine(firstLine, dataset.getFile(), producer, discover);
42
42
  });
43
43
  this.buildDimensionsFromFirstLine = (firstLine_1, dsFile_1, producer_1, ...args_1) => __awaiter(this, [firstLine_1, dsFile_1, producer_1, ...args_1], void 0, function* (firstLine, dsFile, producer, discover = false) {
44
- var _a, _b, _c, _d, _e;
44
+ var _a, _b, _c, _d, _e, _f;
45
45
  (0, Affirm_1.default)(firstLine, `Invalid first line`);
46
46
  (0, Affirm_1.default)(dsFile, `Invalid dataset file`);
47
47
  (0, Affirm_1.default)(producer, `Invalid producer`);
@@ -62,29 +62,42 @@ class DatasetManagerClass {
62
62
  const columns = FileCompiler_1.default.compileProducer(producer, source);
63
63
  const firstObject = JSON.parse(firstLine);
64
64
  const keys = Object.keys(firstObject);
65
+ if (discover) {
66
+ return {
67
+ delimiter: (_b = file.delimiter) !== null && _b !== void 0 ? _b : ',',
68
+ dimensions: keys.map((x, i) => ({ hidden: false, index: i, key: x, name: x }))
69
+ };
70
+ }
65
71
  const dimensions = [];
66
72
  for (const pColumn of columns) {
67
- const columnKey = (_b = pColumn.aliasInProducer) !== null && _b !== void 0 ? _b : pColumn.nameInProducer;
73
+ const columnKey = (_c = pColumn.aliasInProducer) !== null && _c !== void 0 ? _c : pColumn.nameInProducer;
68
74
  const csvColumnIndex = keys.findIndex(x => x === columnKey);
69
75
  (0, Affirm_1.default)(csvColumnIndex > -1, `The column "${pColumn.nameInProducer}" (with key "${columnKey}") of producer "${producer.name}" doesn't exist in the underlying dataset.`);
70
76
  dimensions.push({ index: csvColumnIndex, key: columnKey, name: pColumn.nameInProducer, hidden: null });
71
77
  }
72
- const delimiterChar = (_c = file.delimiter) !== null && _c !== void 0 ? _c : ',';
78
+ const delimiterChar = (_d = file.delimiter) !== null && _d !== void 0 ? _d : ',';
73
79
  return { dimensions, delimiter: delimiterChar };
74
80
  }
75
81
  case 'TXT': {
76
82
  if (!file.hasHeaderRow) {
77
83
  // If the file is a TXT and there isn't an header row, then I add a fake one that maps directly to the producer
78
- const delimiterChar = (_d = file.delimiter) !== null && _d !== void 0 ? _d : ',';
84
+ const delimiterChar = (_e = file.delimiter) !== null && _e !== void 0 ? _e : ',';
79
85
  const source = Environment_1.default.getSource(producer.source);
80
86
  const columns = FileCompiler_1.default.compileProducer(producer, source);
87
+ if (discover) {
88
+ // Since I don't have an header, and I'm discovering, I just create placeholder dimensions based on the same number of columns of the txt
89
+ return {
90
+ delimiter: delimiterChar,
91
+ dimensions: firstLine.split(delimiterChar).map((x, i) => ({ hidden: false, index: i, key: `Col ${i + 1}`, name: `Col ${i + 1}` }))
92
+ };
93
+ }
81
94
  return {
82
95
  dimensions: columns.map((x, i) => { var _a; return ({ key: (_a = x.aliasInProducer) !== null && _a !== void 0 ? _a : x.nameInProducer, name: x.nameInProducer, index: i, hidden: null }); }),
83
96
  delimiter: delimiterChar
84
97
  };
85
98
  }
86
99
  else {
87
- const delimiterChar = (_e = producer.settings.delimiter) !== null && _e !== void 0 ? _e : ',';
100
+ const delimiterChar = (_f = producer.settings.delimiter) !== null && _f !== void 0 ? _f : ',';
88
101
  const rawDimensions = ParseManager_1.default._extractHeader(firstLine, delimiterChar, producer, discover);
89
102
  return {
90
103
  dimensions: rawDimensions.map(x => ({ key: x.name, name: x.saveAs, index: x.index, hidden: null })),
@@ -12,6 +12,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
12
12
  return (mod && mod.__esModule) ? mod : { "default": mod };
13
13
  };
14
14
  Object.defineProperty(exports, "__esModule", { value: true });
15
+ const Constants_1 = __importDefault(require("../../Constants"));
15
16
  const Affirm_1 = __importDefault(require("../../core/Affirm"));
16
17
  const Algo_1 = __importDefault(require("../../core/Algo"));
17
18
  const DSTE_1 = __importDefault(require("../../core/dste/DSTE"));
@@ -42,15 +43,30 @@ class FileExporterClass {
42
43
  this.prepareBatch = (batch, options) => {
43
44
  switch (options.recordProjection.format) {
44
45
  case 'JSON': {
45
- const content = batch.map(x => x.toJSON()).join('\n');
46
- return content;
46
+ const jsonRecords = batch.map(x => x.toJSON());
47
+ return this._splitIntoChunks(jsonRecords, '\n');
47
48
  }
48
49
  case 'CSV': {
49
- const content = batch.map(x => x.toCSV(options.recordProjection.delimiter)).join('\n');
50
- return content;
50
+ const csvRecords = batch.map(x => x.toCSV(options.recordProjection.delimiter));
51
+ return this._splitIntoChunks(csvRecords, '\n');
51
52
  }
52
53
  }
53
54
  };
55
+ this._splitIntoChunks = (records, separator) => {
56
+ if (records.length === 0)
57
+ return [''];
58
+ const sampleRecord = records[0];
59
+ const sampleLength = sampleRecord.length + separator.length; // Include separator in calculation
60
+ const recordsPerChunk = Math.floor(Constants_1.default.defaults.STRING_MAX_CHARACTERS_LENGTH / sampleLength);
61
+ // Ensure at least 1 record per chunk
62
+ const chunkSize = Math.max(1, recordsPerChunk);
63
+ const chunks = [];
64
+ for (let i = 0; i < records.length; i += chunkSize) {
65
+ const chunk = records.slice(i, i + chunkSize);
66
+ chunks.push(chunk.join(separator));
67
+ }
68
+ return chunks;
69
+ };
54
70
  this._composeFileName = (consumer, extension) => `${consumer.name}_${Algo_1.default.replaceAll(DSTE_1.default.now().toISOString().split('.')[0], ':', '-')}.${extension}`;
55
71
  }
56
72
  }
@@ -0,0 +1,20 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ const node_v8_1 = __importDefault(require("node:v8"));
7
+ class RuntimeClass {
8
+ constructor() {
9
+ this.getHeap = () => {
10
+ const { heap_size_limit, used_heap_size } = node_v8_1.default.getHeapStatistics();
11
+ return {
12
+ heapSizeMB: this._toMB(heap_size_limit),
13
+ usedHeapMB: this._toMB(used_heap_size)
14
+ };
15
+ };
16
+ this._toMB = (bytes) => Math.round(bytes / (1024 * 1024) * 100) / 100;
17
+ }
18
+ }
19
+ const Runtime = new RuntimeClass();
20
+ exports.default = Runtime;
package/index.js CHANGED
@@ -16,14 +16,23 @@ const create_producer_1 = require("./actions/create_producer");
16
16
  const create_consumer_1 = require("./actions/create_consumer");
17
17
  const Constants_1 = __importDefault(require("./Constants"));
18
18
  const LicenceManager_1 = __importDefault(require("./licencing/LicenceManager"));
19
+ const Runtime_1 = __importDefault(require("./helper/Runtime"));
19
20
  dotenv_1.default.configDotenv();
20
21
  const program = new commander_1.Command();
22
+ // Validate the remora licence
21
23
  const remoraLicenceKey = process.env.REMORA_LICENCE_KEY;
22
24
  const check = LicenceManager_1.default.validate(remoraLicenceKey);
23
25
  if (!check.valid) {
24
26
  console.error(`Invalid Remora licence key, the product is not active: remember to set "REMORA_LICENCE_KEY" environment variable.`);
25
27
  process.exit(1);
26
28
  }
29
+ // Runtime check on heap size to warn user of insufficent runtime resources
30
+ const { heapSizeMB } = Runtime_1.default.getHeap();
31
+ if (heapSizeMB < Constants_1.default.defaults.MIN_RUNTIME_HEAP_MB)
32
+ console.warn(`Remora is running with ${heapSizeMB}MB of runtime heap, which is below the bare minimum of ${Constants_1.default.defaults.MIN_RUNTIME_HEAP_MB}MB (Recommended: ${Constants_1.default.defaults.RECOMMENDED_RUNTIME_HEAP_MB}MB).`);
33
+ else if (heapSizeMB < Constants_1.default.defaults.RECOMMENDED_RUNTIME_HEAP_MB)
34
+ console.warn(`Remora is running with ${heapSizeMB} MB of runtime heap, which is below the recommended of ${Constants_1.default.defaults.RECOMMENDED_RUNTIME_HEAP_MB} MB.`);
35
+ // Initialize all commands
27
36
  program
28
37
  .version(Constants_1.default.cliVersion + '', '-v, --version', 'Display the version of the CLI')
29
38
  .description('CLI tool for setting up and managing Data-Remora');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "0.0.58-nasco.3",
3
+ "version": "0.0.60-nasco.3",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,