@forzalabs/remora 0.0.54-nasco.3 → 0.0.56-nasco.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Constants.js CHANGED
@@ -1,7 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  const CONSTANTS = {
4
- cliVersion: '0.0.54-nasco',
4
+ cliVersion: '0.0.56-nasco',
5
5
  lambdaVersion: 1,
6
6
  port: 5069,
7
7
  defaults: {
@@ -26,14 +26,17 @@ const readline_1 = require("readline");
26
26
  const Constants_1 = __importDefault(require("../../Constants"));
27
27
  const DatasetManager_1 = __importDefault(require("./DatasetManager"));
28
28
  const DatasetRecord_1 = __importDefault(require("./DatasetRecord"));
29
+ const DatasetRecordPool_1 = __importDefault(require("./DatasetRecordPool"));
29
30
  const xlsx_1 = __importDefault(require("xlsx"));
30
31
  const Affirm_1 = __importDefault(require("../../core/Affirm"));
31
32
  const XMLParser_1 = __importDefault(require("../parsing/XMLParser"));
32
33
  const DriverFactory_1 = __importDefault(require("../../drivers/DriverFactory"));
33
34
  const Helper_1 = __importDefault(require("../../helper/Helper"));
34
35
  const Algo_1 = __importDefault(require("../../core/Algo"));
36
+ const Environment_1 = __importDefault(require("../Environment"));
35
37
  class Dataset {
36
- constructor(name, file, batchSize = Constants_1.default.defaults.MAX_ITEMS_IN_MEMORY) {
38
+ constructor(name, file, batchSize) {
39
+ var _a;
37
40
  this._pipeline = [];
38
41
  this.getPath = () => this._path;
39
42
  this.setPath = (path) => {
@@ -44,6 +47,7 @@ class Dataset {
44
47
  this.getBatchSize = () => this._batchSize;
45
48
  this.setBatchSize = (size) => {
46
49
  this._batchSize = size;
50
+ this._recordPool.resize(size);
47
51
  return this;
48
52
  };
49
53
  this.getSize = () => this._size;
@@ -80,6 +84,7 @@ class Dataset {
80
84
  const firstLine = typeof firstItem === 'object' ? JSON.stringify(firstItem) : String(firstItem);
81
85
  const buildRes = yield DatasetManager_1.default.buildDimensionsFromFirstLine(firstLine, this._file, producer, discover);
82
86
  this._dimensions = buildRes.dimensions;
87
+ this._updateRecordPoolDimensions();
83
88
  // Clear existing file content
84
89
  this.clear();
85
90
  // Convert objects to DatasetRecord format and write to file
@@ -121,16 +126,17 @@ class Dataset {
121
126
  const readStream = (0, fs_2.createReadStream)(inputPath);
122
127
  const writeStream = (0, fs_2.createWriteStream)(outputPath);
123
128
  const rl = (0, readline_1.createInterface)({ input: readStream, crlfDelay: Infinity });
129
+ const dimensions = Algo_1.default.deepClone(this._dimensions);
124
130
  let batch = [];
125
131
  let lineCount = 0;
126
- const dimensions = Algo_1.default.deepClone(this._dimensions);
127
132
  try {
128
133
  for (var _d = true, rl_1 = __asyncValues(rl), rl_1_1; rl_1_1 = yield rl_1.next(), _a = rl_1_1.done, !_a; _d = true) {
129
134
  _c = rl_1_1.value;
130
135
  _d = false;
131
136
  const line = _c;
132
137
  try {
133
- const record = new DatasetRecord_1.default(line, dimensions, this._delimiter);
138
+ // Reuse record from pool and reinitialize it with new line data
139
+ const record = this._recordPool.getNext(line, dimensions, this._delimiter);
134
140
  batch.push(record);
135
141
  lineCount++;
136
142
  if (batch.length >= this._batchSize) {
@@ -139,6 +145,7 @@ class Dataset {
139
145
  writeStream.write(transformedRecord.stringify() + '\n');
140
146
  }
141
147
  batch = [];
148
+ this._recordPool.reset(); // Reset pool index for next batch
142
149
  }
143
150
  }
144
151
  catch (error) {
@@ -682,6 +689,13 @@ class Dataset {
682
689
  return this;
683
690
  });
684
691
  this.getDimensions = () => this._dimensions;
692
+ /**
693
+ * Update the record pool when dimensions change
694
+ */
695
+ this._updateRecordPoolDimensions = () => {
696
+ // Update all pooled records with current dimensions
697
+ this._recordPool.updateDimensions(this._dimensions, this._delimiter);
698
+ };
685
699
  /**
686
700
  * - remove dimension
687
701
  * - rename a dimension
@@ -845,13 +859,15 @@ class Dataset {
845
859
  };
846
860
  this._name = name;
847
861
  this._file = file;
848
- this._batchSize = batchSize;
862
+ this._batchSize = (_a = batchSize !== null && batchSize !== void 0 ? batchSize : parseInt(Environment_1.default.get('MAX_ITEMS_IN_MEMORY'))) !== null && _a !== void 0 ? _a : Constants_1.default.defaults.MAX_ITEMS_IN_MEMORY;
849
863
  this._dimensions = [];
850
864
  this._delimiter = ',';
851
865
  this._size = 0;
852
866
  this._iterations = 0;
853
867
  this._operations = [];
854
868
  this._pipeline = [];
869
+ // Initialize record pool for optimization
870
+ this._recordPool = new DatasetRecordPool_1.default(this._batchSize);
855
871
  const datasetName = this._name
856
872
  .replace(/[^a-zA-Z0-9_-]/g, '_')
857
873
  .replace(/_{2,}/g, '_')
@@ -20,6 +20,17 @@ class DatasetRecord {
20
20
  this.getRaw = () => this._row;
21
21
  this.getValue = (dimension) => this._value[dimension];
22
22
  this.setValue = (dimension, value) => this._value[dimension] = value;
23
+ /**
24
+ * Reinitialize the record with new data instead of creating a new instance
25
+ * This is used for object pooling optimization
26
+ */
27
+ this.reinitialize = (row, dimensions, delimiter) => {
28
+ this._row = row;
29
+ this._dimensions = dimensions;
30
+ this._delimiter = delimiter;
31
+ this._value = {};
32
+ this.parse(row, delimiter, this._dimensions);
33
+ };
23
34
  this.wholeUpdateDimension = (update) => {
24
35
  var _a;
25
36
  if (update.toDelete) {
@@ -35,7 +46,9 @@ class DatasetRecord {
35
46
  }
36
47
  else {
37
48
  // Change: name, hidden, position
38
- const index = this._dimensions.findIndex(x => x.key === update.currentDimension.name);
49
+ let index = this._dimensions.findIndex(x => x.key === update.currentDimension.name);
50
+ if (index < 0)
51
+ index = this._dimensions.findIndex(x => x.key === update.currentDimension.key);
39
52
  const currentDim = this._dimensions[index];
40
53
  const updatedDim = { name: update.newName, key: (_a = currentDim.key) !== null && _a !== void 0 ? _a : update.newName, hidden: update.newHidden, index: update.newPosition };
41
54
  this._value[updatedDim.name] = this._value[currentDim.name];
@@ -0,0 +1,77 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ const DatasetRecord_1 = __importDefault(require("./DatasetRecord"));
7
+ /**
8
+ * A pool of DatasetRecord objects to optimize memory allocation during batch processing
9
+ */
10
+ class DatasetRecordPool {
11
+ constructor(poolSize) {
12
+ /**
13
+ * Initialize the pool with empty DatasetRecord objects
14
+ */
15
+ this._initializePool = () => {
16
+ this._pool = [];
17
+ for (let i = 0; i < this._poolSize; i++) {
18
+ this._pool.push(new DatasetRecord_1.default('', [], ','));
19
+ }
20
+ this._poolIndex = 0;
21
+ };
22
+ /**
23
+ * Get the next available record from the pool and reinitialize it with new data
24
+ * @param line The raw line data
25
+ * @param dimensions The dataset dimensions
26
+ * @param delimiter The delimiter to use
27
+ * @returns A reinitialized DatasetRecord from the pool
28
+ */
29
+ this.getNext = (line, dimensions, delimiter) => {
30
+ const record = this._pool[this._poolIndex];
31
+ record.reinitialize(line, dimensions, delimiter);
32
+ this._poolIndex = (this._poolIndex + 1) % this._poolSize;
33
+ return record;
34
+ };
35
+ /**
36
+ * Reset the pool index to start from the beginning
37
+ * This should be called when starting a new batch
38
+ */
39
+ this.reset = () => {
40
+ this._poolIndex = 0;
41
+ };
42
+ /**
43
+ * Update the pool size and reinitialize if necessary
44
+ * @param newSize The new pool size
45
+ */
46
+ this.resize = (newSize) => {
47
+ if (newSize !== this._poolSize) {
48
+ this._poolSize = newSize;
49
+ this._initializePool();
50
+ }
51
+ };
52
+ /**
53
+ * Update all pooled records with new dimensions and delimiter
54
+ * This should be called when dataset dimensions change
55
+ * @param dimensions The new dimensions
56
+ * @param delimiter The new delimiter
57
+ */
58
+ this.updateDimensions = (dimensions, delimiter) => {
59
+ for (const record of this._pool) {
60
+ record.reinitialize('', dimensions, delimiter);
61
+ }
62
+ };
63
+ /**
64
+ * Get the current pool size
65
+ */
66
+ this.getSize = () => this._poolSize;
67
+ /**
68
+ * Get the current pool index
69
+ */
70
+ this.getCurrentIndex = () => this._poolIndex;
71
+ this._poolSize = poolSize;
72
+ this._poolIndex = 0;
73
+ this._pool = [];
74
+ this._initializePool();
75
+ }
76
+ }
77
+ exports.default = DatasetRecordPool;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "0.0.54-nasco.3",
3
+ "version": "0.0.56-nasco.3",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,