@forzalabs/remora 0.0.57-nasco.3 → 0.0.59-nasco.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Constants.js CHANGED
@@ -1,14 +1,16 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  const CONSTANTS = {
4
- cliVersion: '0.0.57-nasco',
4
+ cliVersion: '0.0.59-nasco',
5
5
  lambdaVersion: 1,
6
6
  port: 5069,
7
7
  defaults: {
8
8
  PRODUCER_TEMP_FOLDER: '.temp',
9
9
  SQL_MAX_QUERY_ROWS: 10000,
10
10
  STRING_MAX_CHARACTERS_LENGTH: 10000000,
11
- MAX_ITEMS_IN_MEMORY: 200000
11
+ MAX_ITEMS_IN_MEMORY: 200000,
12
+ MIN_RUNTIME_HEAP_MB: 4000,
13
+ RECOMMENDED_RUNTIME_HEAP_MB: 8000
12
14
  }
13
15
  };
14
16
  exports.default = CONSTANTS;
@@ -28,29 +28,84 @@ const Affirm_1 = __importDefault(require("../core/Affirm"));
28
28
  const DriverHelper = {
29
29
  appendToUnifiedFile: (options) => __awaiter(void 0, void 0, void 0, function* () {
30
30
  (0, Affirm_1.default)(options, 'Invalid options');
31
- const { append, destinationPath, fileKey, headerLine, stream, fileType, hasHeaderRow } = options;
31
+ const { append, destinationPath, fileKey, headerLine, stream, fileType, hasHeaderRow, delimiter } = options;
32
+ const keys = (fileType === 'JSON' || fileType === 'JSONL') ? Object.keys(headerLine) : [];
32
33
  const shouldValidateHeader = fileType === 'CSV' || (fileType === 'TXT' && hasHeaderRow === true);
33
34
  let isFirstLine = true;
34
35
  let hasValidatedHeader = shouldValidateHeader ? false : true;
36
+ let leftoverData = '';
37
+ let globalIndex = 0;
35
38
  const headerValidationTransform = new stream_1.Transform({
36
39
  transform(chunk, encoding, callback) {
37
- if (!hasValidatedHeader) {
38
- const chunkStr = chunk.toString();
39
- const lines = chunkStr.split('\n');
40
- if (isFirstLine && lines.length > 0) {
41
- const firstLine = lines[0];
42
- if (shouldValidateHeader && headerLine && headerLine.trim() !== '' && firstLine.trim() !== headerLine.trim()) {
43
- const msg = `Error creating unified dataset: file "${fileKey}" has a different header line than the other files in this dataset\n\t-${fileKey}: ${firstLine}\n\t-main: ${headerLine}`;
40
+ const chunkStr = leftoverData + chunk.toString();
41
+ const lines = chunkStr.split('\n');
42
+ // Keep the last line as leftover if it doesn't end with newline
43
+ leftoverData = lines.pop() || '';
44
+ const filteredLines = [];
45
+ for (let i = 0; i < lines.length; i++) {
46
+ const line = lines[i];
47
+ // Header validation for first line
48
+ if (!hasValidatedHeader && isFirstLine && i === 0) {
49
+ if (shouldValidateHeader && headerLine && headerLine.trim() !== '' && line.trim() !== headerLine.trim()) {
50
+ const msg = `Error creating unified dataset: file "${fileKey}" has a different header line than the other files in this dataset\n\t-${fileKey}: ${line}\n\t-main: ${headerLine}`;
44
51
  Logger_1.default.log(msg);
45
52
  return callback(new Error(msg));
46
53
  }
47
54
  hasValidatedHeader = true;
48
55
  isFirstLine = false;
49
56
  }
57
+ // Apply your filtering logic here
58
+ if (shouldIncludeLine(line, globalIndex)) {
59
+ filteredLines.push(processLine(line));
60
+ }
61
+ globalIndex++;
62
+ }
63
+ // Output filtered lines
64
+ if (filteredLines.length > 0) {
65
+ const output = filteredLines.join('\n') + '\n';
66
+ callback(null, Buffer.from(output));
67
+ }
68
+ else {
69
+ callback(null, null); // No data to output
50
70
  }
51
- callback(null, chunk);
71
+ },
72
+ flush(callback) {
73
+ // Process any remaining data
74
+ if (leftoverData.trim()) {
75
+ if (shouldIncludeLine(leftoverData, -1)) {
76
+ callback(null, Buffer.from(processLine(leftoverData)));
77
+ }
78
+ else {
79
+ callback(null, null);
80
+ }
81
+ }
82
+ else {
83
+ callback(null, null);
84
+ }
85
+ globalIndex++;
52
86
  }
53
87
  });
88
+ // Helper function to determine if a line should be included
89
+ const shouldIncludeLine = (line, lineIndex) => {
90
+ // For flat files (csv, txt) ignore the first line of the header (I already saved that line)
91
+ if (lineIndex === 0 && shouldValidateHeader)
92
+ return false;
93
+ // Skip empty lines
94
+ if (line.trim() === '')
95
+ return false;
96
+ return true;
97
+ };
98
+ const processLine = (line) => {
99
+ switch (fileType) {
100
+ case 'JSON':
101
+ case 'JSONL': {
102
+ const parsed = JSON.parse(line);
103
+ return keys.map(k => parsed[k]).join(delimiter);
104
+ }
105
+ default:
106
+ return line;
107
+ }
108
+ };
54
109
  const writeOptions = append ? { flags: 'a' } : {};
55
110
  const writeStream = (0, fs_1.createWriteStream)(destinationPath, writeOptions);
56
111
  yield (0, promises_1.pipeline)(stream, headerValidationTransform, writeStream);
@@ -135,7 +135,8 @@ class LocalSourceDriver {
135
135
  append: appendMode,
136
136
  headerLine,
137
137
  fileType: file.fileType,
138
- hasHeaderRow: file.hasHeaderRow
138
+ hasHeaderRow: file.hasHeaderRow,
139
+ delimiter: dataset.getDelimiter()
139
140
  });
140
141
  });
141
142
  const { fileKey } = file;
@@ -144,6 +145,7 @@ class LocalSourceDriver {
144
145
  Logger_1.default.log(`Matched ${allFileKeys.length} files, copying locally and creating unified dataset.`);
145
146
  // Get header line from the first file
146
147
  const headerLine = (yield DriverHelper_1.default.quickReadFile(path_1.default.join(this._path, allFileKeys[0]), 1))[0];
148
+ dataset.setFirstLine(headerLine);
147
149
  // Copy files sequentially to avoid file conflicts
148
150
  for (let i = 0; i < allFileKeys.length; i++) {
149
151
  yield copyLocally(allFileKeys[i], headerLine, i > 0); // Append mode for subsequent files
@@ -152,6 +154,8 @@ class LocalSourceDriver {
152
154
  }
153
155
  else {
154
156
  // For single file, no header validation needed
157
+ const headerLine = (yield DriverHelper_1.default.quickReadFile(path_1.default.join(this._path, fileKey), 1))[0];
158
+ dataset.setFirstLine(headerLine);
155
159
  yield copyLocally(fileKey, '', false);
156
160
  return dataset;
157
161
  }
@@ -392,8 +396,9 @@ class LocalDestinationDriver {
392
396
  const filePath = path_1.default.join(folder, options.name);
393
397
  fs.writeFileSync(filePath, '');
394
398
  yield dataset.streamBatches((batch) => __awaiter(this, void 0, void 0, function* () {
395
- const content = FileExporter_1.default.prepareBatch(batch, options);
396
- fs.appendFileSync(filePath, content);
399
+ const chunks = FileExporter_1.default.prepareBatch(batch, options);
400
+ for (const chunk of chunks)
401
+ fs.appendFileSync(filePath, chunk);
397
402
  }));
398
403
  return { bucket: folder, key: filePath, res: true };
399
404
  }
@@ -129,21 +129,24 @@ class S3DestinationDriver {
129
129
  const uploadId = createMultipartUploadRes.UploadId;
130
130
  (0, Affirm_1.default)(uploadId, 'Failed to initiate multipart upload');
131
131
  const uploadedParts = [];
132
- yield dataset.streamBatches((batch, index) => __awaiter(this, void 0, void 0, function* () {
133
- const content = FileExporter_1.default.prepareBatch(batch, options);
134
- const partNumber = index + 1;
135
- const body = Buffer.from(content);
136
- const uploadPartRes = yield this._client.send(new client_s3_1.UploadPartCommand({
137
- Bucket: this._bucketName,
138
- Key: name,
139
- UploadId: uploadId,
140
- PartNumber: partNumber,
141
- Body: body
142
- }));
143
- uploadedParts.push({
144
- PartNumber: partNumber,
145
- ETag: uploadPartRes.ETag
146
- });
132
+ let partNumber = 1;
133
+ yield dataset.streamBatches((batch) => __awaiter(this, void 0, void 0, function* () {
134
+ const chunks = FileExporter_1.default.prepareBatch(batch, options);
135
+ for (const chunk of chunks) {
136
+ const body = Buffer.from(chunk);
137
+ const uploadPartRes = yield this._client.send(new client_s3_1.UploadPartCommand({
138
+ Bucket: this._bucketName,
139
+ Key: name,
140
+ UploadId: uploadId,
141
+ PartNumber: partNumber,
142
+ Body: body
143
+ }));
144
+ uploadedParts.push({
145
+ PartNumber: partNumber,
146
+ ETag: uploadPartRes.ETag
147
+ });
148
+ partNumber++;
149
+ }
147
150
  }));
148
151
  // Complete the multipart upload
149
152
  const completeRes = yield this._client.send(new client_s3_1.CompleteMultipartUploadCommand({
@@ -243,7 +246,8 @@ class S3SourceDriver {
243
246
  append: appendMode,
244
247
  headerLine,
245
248
  fileType: file.fileType,
246
- hasHeaderRow: file.hasHeaderRow
249
+ hasHeaderRow: file.hasHeaderRow,
250
+ delimiter: dataset.getDelimiter()
247
251
  });
248
252
  });
249
253
  const { fileKey } = file;
@@ -259,6 +263,7 @@ class S3SourceDriver {
259
263
  (0, Affirm_1.default)(firstFileResponse.Body, 'Failed to fetch first file from S3');
260
264
  const firstFileStream = firstFileResponse.Body;
261
265
  const headerLine = yield this.getFirstLineFromStream(firstFileStream);
266
+ dataset.setFirstLine(headerLine);
262
267
  // Download files sequentially to avoid file conflicts
263
268
  for (let i = 0; i < allFileKeys.length; i++) {
264
269
  yield downloadLocally(allFileKeys[i], headerLine, i > 0); // Append mode for subsequent files
@@ -266,6 +271,16 @@ class S3SourceDriver {
266
271
  return dataset;
267
272
  }
268
273
  else {
274
+ // Get header line from the first file
275
+ const firstFileCommand = new client_s3_1.GetObjectCommand({
276
+ Bucket: this._bucketName,
277
+ Key: fileKey
278
+ });
279
+ const firstFileResponse = yield this._client.send(firstFileCommand);
280
+ (0, Affirm_1.default)(firstFileResponse.Body, 'Failed to fetch first file from S3');
281
+ const firstFileStream = firstFileResponse.Body;
282
+ const headerLine = yield this.getFirstLineFromStream(firstFileStream);
283
+ dataset.setFirstLine(headerLine);
269
284
  // For single file, no header validation needed
270
285
  yield downloadLocally(fileKey, '');
271
286
  return dataset;
@@ -15,6 +15,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
15
15
  const Affirm_1 = __importDefault(require("../../core/Affirm"));
16
16
  const Algo_1 = __importDefault(require("../../core/Algo"));
17
17
  const CryptoEngine_1 = __importDefault(require("../CryptoEngine"));
18
+ const DatasetManager_1 = __importDefault(require("../dataset/DatasetManager"));
18
19
  const DatasetRecord_1 = __importDefault(require("../dataset/DatasetRecord"));
19
20
  const Environment_1 = __importDefault(require("../Environment"));
20
21
  const FileCompiler_1 = __importDefault(require("../file/FileCompiler"));
@@ -25,6 +26,7 @@ class PostProcessorClass {
25
26
  constructor() {
26
27
  /**
27
28
  * Maps an array of objects and projects it to another array of objects but with different shape:
29
+ * - updates the dimensions of the dataset (drop, rename, reorder, hide)
28
30
  * - type casting
29
31
  * - default field values
30
32
  * - masking/hashing of data
@@ -33,9 +35,20 @@ class PostProcessorClass {
33
35
  (0, Affirm_1.default)(consumer, 'Invalid consumer');
34
36
  (0, Affirm_1.default)(dataset, 'Invalid dataset');
35
37
  const fields = ConsumerManager_1.default.getExpandedFields(consumer);
36
- let newDataset = yield dataset.wholeUpdateDimensions(fields);
37
- newDataset = yield newDataset.map(record => {
38
+ const dimensionsUpdates = DatasetManager_1.default.computeDimensionsUpdates(dataset, consumer);
39
+ let updatedDimensions = null;
40
+ const newDataset = yield dataset.map(record => {
38
41
  var _a, _b;
42
+ // First apply the updates to the dimensions of this record
43
+ if (dimensionsUpdates.length > 0) {
44
+ for (const update of dimensionsUpdates) {
45
+ record.wholeUpdateDimension(update);
46
+ }
47
+ record.sortDimensions();
48
+ }
49
+ if (!updatedDimensions)
50
+ updatedDimensions = record._dimensions;
51
+ // Finally apply the rules and changes of the consumer fields to the record
39
52
  for (const field of fields) {
40
53
  const { key, alias } = field.cField;
41
54
  const fieldKey = alias !== null && alias !== void 0 ? alias : key;
@@ -49,6 +62,7 @@ class PostProcessorClass {
49
62
  }
50
63
  return record;
51
64
  });
65
+ newDataset.setDimensinons(updatedDimensions);
52
66
  return newDataset;
53
67
  });
54
68
  /**
@@ -37,7 +37,6 @@ const Environment_1 = __importDefault(require("../Environment"));
37
37
  class Dataset {
38
38
  constructor(name, file, batchSize) {
39
39
  var _a;
40
- this._pipeline = [];
41
40
  this.getPath = () => this._path;
42
41
  this.setPath = (path) => {
43
42
  this._path = path;
@@ -45,11 +44,11 @@ class Dataset {
45
44
  };
46
45
  this.getFile = () => this._file;
47
46
  this.getBatchSize = () => this._batchSize;
48
- this.setBatchSize = (size) => {
49
- this._batchSize = size;
50
- this._recordPool.resize(size);
47
+ this.setFirstLine = (firstLine) => {
48
+ this._firstLine = firstLine;
51
49
  return this;
52
50
  };
51
+ this.getFirstLine = () => this._firstLine;
53
52
  this.getSize = () => this._size;
54
53
  this.getCycles = () => this._iterations;
55
54
  this.getDelimiter = () => this._delimiter;
@@ -633,25 +632,11 @@ class Dataset {
633
632
  this._delimiter = delimiter;
634
633
  this._dimensions = dimensions;
635
634
  switch (this._file.fileType) {
636
- case 'TXT': {
637
- if (this._file.hasHeaderRow)
638
- yield this.filter((x, i) => i > 0 && !x.isEmpty());
639
- break;
640
- }
641
- case 'CSV': {
642
- yield this.filter((x, i) => i > 0 && !x.isEmpty());
643
- break;
644
- }
635
+ case 'TXT':
636
+ case 'CSV':
645
637
  case 'JSON':
646
- case 'JSONL': {
647
- // Convert the JSON to the internal CSV format
648
- yield this.map(record => {
649
- const parsed = JSON.parse(record.getRaw());
650
- const preparedRow = this._dimensions.map(d => parsed[d.key]).join(this._delimiter);
651
- return new DatasetRecord_1.default(preparedRow, this._dimensions, this._delimiter);
652
- });
638
+ case 'JSONL':
653
639
  break;
654
- }
655
640
  case 'XLS':
656
641
  case 'XLSX': {
657
642
  const excel = xlsx_1.default.readFile(this._path);
@@ -689,6 +674,10 @@ class Dataset {
689
674
  return this;
690
675
  });
691
676
  this.getDimensions = () => this._dimensions;
677
+ this.setDimensinons = (dimensions) => {
678
+ this._dimensions = dimensions;
679
+ return this;
680
+ };
692
681
  /**
693
682
  * Update the record pool when dimensions change
694
683
  */
@@ -696,55 +685,6 @@ class Dataset {
696
685
  // Update all pooled records with current dimensions
697
686
  this._recordPool.updateDimensions(this._dimensions, this._delimiter);
698
687
  };
699
- /**
700
- * - remove dimension
701
- * - rename a dimension
702
- * - change hidden flag
703
- * - move a dimension
704
- */
705
- this.wholeUpdateDimensions = (fields) => __awaiter(this, void 0, void 0, function* () {
706
- var _a;
707
- let updates = [];
708
- // Add all the updates
709
- for (let i = 0; i < fields.length; i++) {
710
- const { cField } = fields[i];
711
- const currentMatch = structuredClone(this._dimensions.find(x => x.name === cField.key));
712
- if (!currentMatch && !cField.fixed)
713
- throw new Error(`The requested field "${cField.key}" from the consumer is not present in the underlying dataset "${this._name}" (${this._dimensions.map(x => x.name).join(', ')})`);
714
- updates.push({
715
- currentDimension: currentMatch,
716
- newName: (_a = cField.alias) !== null && _a !== void 0 ? _a : cField.key,
717
- newHidden: cField.hidden,
718
- newPosition: i,
719
- toDelete: false
720
- });
721
- }
722
- // Add all the updates to remove dimensions
723
- for (const dim of this._dimensions) {
724
- if (!updates.find(x => { var _a; return ((_a = x.currentDimension) === null || _a === void 0 ? void 0 : _a.name) === dim.name; }))
725
- updates.push({ currentDimension: dim, toDelete: true });
726
- }
727
- // Now keep only the updates that actually change something
728
- updates = updates.filter(x => x.toDelete
729
- || !x.currentDimension
730
- || (x.currentDimension && (x.currentDimension.name !== x.newName
731
- || (Algo_1.default.hasVal(x.newHidden) && x.newHidden !== x.currentDimension.hidden)
732
- || x.newPosition !== x.currentDimension.index)));
733
- if (updates.length === 0)
734
- return this;
735
- let updatedDimensions = null;
736
- const newDataset = yield this.map(record => {
737
- for (const update of updates) {
738
- record.wholeUpdateDimension(update);
739
- }
740
- record._dimensions.sort((a, b) => a.index - b.index);
741
- if (!updatedDimensions)
742
- updatedDimensions = record._dimensions;
743
- return record;
744
- });
745
- this._dimensions = updatedDimensions;
746
- return newDataset;
747
- });
748
688
  this.print = (...args_1) => __awaiter(this, [...args_1], void 0, function* (count = 3, full = false) {
749
689
  console.log(`DS ${this._name} (${this._size} | ${this._iterations})`);
750
690
  console.log(this._dimensions.map(x => x.name).join(this._delimiter));
@@ -861,11 +801,11 @@ class Dataset {
861
801
  this._file = file;
862
802
  this._batchSize = (_a = batchSize !== null && batchSize !== void 0 ? batchSize : parseInt(Environment_1.default.get('MAX_ITEMS_IN_MEMORY'))) !== null && _a !== void 0 ? _a : Constants_1.default.defaults.MAX_ITEMS_IN_MEMORY;
863
803
  this._dimensions = [];
804
+ this._firstLine = '';
864
805
  this._delimiter = ',';
865
806
  this._size = 0;
866
807
  this._iterations = 0;
867
808
  this._operations = [];
868
- this._pipeline = [];
869
809
  // Initialize record pool for optimization
870
810
  this._recordPool = new DatasetRecordPool_1.default(this._batchSize);
871
811
  const datasetName = this._name
@@ -13,6 +13,8 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
13
13
  };
14
14
  Object.defineProperty(exports, "__esModule", { value: true });
15
15
  const Affirm_1 = __importDefault(require("../../core/Affirm"));
16
+ const Algo_1 = __importDefault(require("../../core/Algo"));
17
+ const ConsumerManager_1 = __importDefault(require("../consumer/ConsumerManager"));
16
18
  const Environment_1 = __importDefault(require("../Environment"));
17
19
  const FileCompiler_1 = __importDefault(require("../file/FileCompiler"));
18
20
  const ParseManager_1 = __importDefault(require("../parsing/ParseManager"));
@@ -34,7 +36,8 @@ class DatasetManagerClass {
34
36
  this.buildDimensions = (dataset_1, producer_1, ...args_1) => __awaiter(this, [dataset_1, producer_1, ...args_1], void 0, function* (dataset, producer, discover = false) {
35
37
  (0, Affirm_1.default)(dataset, `Invalid dataset`);
36
38
  (0, Affirm_1.default)(producer, `Invalid producer`);
37
- const firstLine = (yield dataset.readLines(1))[0].getRaw();
39
+ const firstLine = dataset.getFirstLine();
40
+ (0, Affirm_1.default)(firstLine, `The first line of the dataset was not set.`);
38
41
  return this.buildDimensionsFromFirstLine(firstLine, dataset.getFile(), producer, discover);
39
42
  });
40
43
  this.buildDimensionsFromFirstLine = (firstLine_1, dsFile_1, producer_1, ...args_1) => __awaiter(this, [firstLine_1, dsFile_1, producer_1, ...args_1], void 0, function* (firstLine, dsFile, producer, discover = false) {
@@ -97,6 +100,40 @@ class DatasetManagerClass {
97
100
  break;
98
101
  }
99
102
  });
103
+ this.computeDimensionsUpdates = (dataset, consumer) => {
104
+ var _a;
105
+ (0, Affirm_1.default)(dataset, 'Invalid dataset');
106
+ (0, Affirm_1.default)(consumer, 'Invalid consumer');
107
+ const fields = ConsumerManager_1.default.getExpandedFields(consumer);
108
+ const dimensions = dataset.getDimensions();
109
+ let updates = [];
110
+ // Add all the updates
111
+ for (let i = 0; i < fields.length; i++) {
112
+ const { cField } = fields[i];
113
+ const currentMatch = structuredClone(dimensions.find(x => x.name === cField.key));
114
+ if (!currentMatch && !cField.fixed)
115
+ throw new Error(`The requested field "${cField.key}" from the consumer is not present in the underlying dataset "${dataset['_name']}" (${dimensions.map(x => x.name).join(', ')})`);
116
+ updates.push({
117
+ currentDimension: currentMatch,
118
+ newName: (_a = cField.alias) !== null && _a !== void 0 ? _a : cField.key,
119
+ newHidden: cField.hidden,
120
+ newPosition: i,
121
+ toDelete: false
122
+ });
123
+ }
124
+ // Add all the updates to remove dimensions
125
+ for (const dim of dimensions) {
126
+ if (!updates.find(x => { var _a; return ((_a = x.currentDimension) === null || _a === void 0 ? void 0 : _a.name) === dim.name; }))
127
+ updates.push({ currentDimension: dim, toDelete: true });
128
+ }
129
+ // Now keep only the updates that actually change something
130
+ updates = updates.filter(x => x.toDelete
131
+ || !x.currentDimension
132
+ || (x.currentDimension && (x.currentDimension.name !== x.newName
133
+ || (Algo_1.default.hasVal(x.newHidden) && x.newHidden !== x.currentDimension.hidden)
134
+ || x.newPosition !== x.currentDimension.index)));
135
+ return updates;
136
+ };
100
137
  }
101
138
  }
102
139
  const DatasetManager = new DatasetManagerClass();
@@ -63,6 +63,12 @@ class DatasetRecord {
63
63
  }
64
64
  return this;
65
65
  };
66
+ this.sortDimensions = () => {
67
+ const isOutOfOrder = this._dimensions.some((dim, index) => dim.index !== index);
68
+ if (isOutOfOrder) {
69
+ this._dimensions.sort((a, b) => a.index - b.index);
70
+ }
71
+ };
66
72
  this.toJSON = () => {
67
73
  if (this._dimensions.some(x => x.hidden)) {
68
74
  // remove the not wanted dimension
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -12,6 +12,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
12
12
  return (mod && mod.__esModule) ? mod : { "default": mod };
13
13
  };
14
14
  Object.defineProperty(exports, "__esModule", { value: true });
15
+ const Constants_1 = __importDefault(require("../../Constants"));
15
16
  const Affirm_1 = __importDefault(require("../../core/Affirm"));
16
17
  const Algo_1 = __importDefault(require("../../core/Algo"));
17
18
  const DSTE_1 = __importDefault(require("../../core/dste/DSTE"));
@@ -42,15 +43,30 @@ class FileExporterClass {
42
43
  this.prepareBatch = (batch, options) => {
43
44
  switch (options.recordProjection.format) {
44
45
  case 'JSON': {
45
- const content = batch.map(x => x.toJSON()).join('\n');
46
- return content;
46
+ const jsonRecords = batch.map(x => x.toJSON());
47
+ return this._splitIntoChunks(jsonRecords, '\n');
47
48
  }
48
49
  case 'CSV': {
49
- const content = batch.map(x => x.toCSV(options.recordProjection.delimiter)).join('\n');
50
- return content;
50
+ const csvRecords = batch.map(x => x.toCSV(options.recordProjection.delimiter));
51
+ return this._splitIntoChunks(csvRecords, '\n');
51
52
  }
52
53
  }
53
54
  };
55
+ this._splitIntoChunks = (records, separator) => {
56
+ if (records.length === 0)
57
+ return [''];
58
+ const sampleRecord = records[0];
59
+ const sampleLength = sampleRecord.length + separator.length; // Include separator in calculation
60
+ const recordsPerChunk = Math.floor(Constants_1.default.defaults.STRING_MAX_CHARACTERS_LENGTH / sampleLength);
61
+ // Ensure at least 1 record per chunk
62
+ const chunkSize = Math.max(1, recordsPerChunk);
63
+ const chunks = [];
64
+ for (let i = 0; i < records.length; i += chunkSize) {
65
+ const chunk = records.slice(i, i + chunkSize);
66
+ chunks.push(chunk.join(separator));
67
+ }
68
+ return chunks;
69
+ };
54
70
  this._composeFileName = (consumer, extension) => `${consumer.name}_${Algo_1.default.replaceAll(DSTE_1.default.now().toISOString().split('.')[0], ':', '-')}.${extension}`;
55
71
  }
56
72
  }
@@ -0,0 +1,20 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ const node_v8_1 = __importDefault(require("node:v8"));
7
+ class RuntimeClass {
8
+ constructor() {
9
+ this.getHeap = () => {
10
+ const { heap_size_limit, used_heap_size } = node_v8_1.default.getHeapStatistics();
11
+ return {
12
+ heapSizeMB: this._toMB(heap_size_limit),
13
+ usedHeapMB: this._toMB(used_heap_size)
14
+ };
15
+ };
16
+ this._toMB = (bytes) => Math.round(bytes / (1024 * 1024) * 100) / 100;
17
+ }
18
+ }
19
+ const Runtime = new RuntimeClass();
20
+ exports.default = Runtime;
package/index.js CHANGED
@@ -16,14 +16,23 @@ const create_producer_1 = require("./actions/create_producer");
16
16
  const create_consumer_1 = require("./actions/create_consumer");
17
17
  const Constants_1 = __importDefault(require("./Constants"));
18
18
  const LicenceManager_1 = __importDefault(require("./licencing/LicenceManager"));
19
+ const Runtime_1 = __importDefault(require("./helper/Runtime"));
19
20
  dotenv_1.default.configDotenv();
20
21
  const program = new commander_1.Command();
22
+ // Validate the remora licence
21
23
  const remoraLicenceKey = process.env.REMORA_LICENCE_KEY;
22
24
  const check = LicenceManager_1.default.validate(remoraLicenceKey);
23
25
  if (!check.valid) {
24
26
  console.error(`Invalid Remora licence key, the product is not active: remember to set "REMORA_LICENCE_KEY" environment variable.`);
25
27
  process.exit(1);
26
28
  }
29
+ // Runtime check on heap size to warn user of insufficent runtime resources
30
+ const { heapSizeMB } = Runtime_1.default.getHeap();
31
+ if (heapSizeMB < Constants_1.default.defaults.MIN_RUNTIME_HEAP_MB)
32
+ console.warn(`Remora is running with ${heapSizeMB}MB of runtime heap, which is below the bare minimum of ${Constants_1.default.defaults.MIN_RUNTIME_HEAP_MB}MB (Recommended: ${Constants_1.default.defaults.RECOMMENDED_RUNTIME_HEAP_MB}MB).`);
33
+ else if (heapSizeMB < Constants_1.default.defaults.RECOMMENDED_RUNTIME_HEAP_MB)
34
+ console.warn(`Remora is running with ${heapSizeMB} MB of runtime heap, which is below the recommended of ${Constants_1.default.defaults.RECOMMENDED_RUNTIME_HEAP_MB} MB.`);
35
+ // Initialize all commands
27
36
  program
28
37
  .version(Constants_1.default.cliVersion + '', '-v, --version', 'Display the version of the CLI')
29
38
  .description('CLI tool for setting up and managing Data-Remora');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "0.0.57-nasco.3",
3
+ "version": "0.0.59-nasco.3",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,