@forzalabs/remora 0.0.55-nasco.3 → 0.0.56-nasco.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Constants.js CHANGED
@@ -1,7 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  const CONSTANTS = {
4
- cliVersion: '0.0.55-nasco',
4
+ cliVersion: '0.0.56-nasco',
5
5
  lambdaVersion: 1,
6
6
  port: 5069,
7
7
  defaults: {
@@ -26,6 +26,7 @@ const readline_1 = require("readline");
26
26
  const Constants_1 = __importDefault(require("../../Constants"));
27
27
  const DatasetManager_1 = __importDefault(require("./DatasetManager"));
28
28
  const DatasetRecord_1 = __importDefault(require("./DatasetRecord"));
29
+ const DatasetRecordPool_1 = __importDefault(require("./DatasetRecordPool"));
29
30
  const xlsx_1 = __importDefault(require("xlsx"));
30
31
  const Affirm_1 = __importDefault(require("../../core/Affirm"));
31
32
  const XMLParser_1 = __importDefault(require("../parsing/XMLParser"));
@@ -46,6 +47,7 @@ class Dataset {
46
47
  this.getBatchSize = () => this._batchSize;
47
48
  this.setBatchSize = (size) => {
48
49
  this._batchSize = size;
50
+ this._recordPool.resize(size);
49
51
  return this;
50
52
  };
51
53
  this.getSize = () => this._size;
@@ -82,6 +84,7 @@ class Dataset {
82
84
  const firstLine = typeof firstItem === 'object' ? JSON.stringify(firstItem) : String(firstItem);
83
85
  const buildRes = yield DatasetManager_1.default.buildDimensionsFromFirstLine(firstLine, this._file, producer, discover);
84
86
  this._dimensions = buildRes.dimensions;
87
+ this._updateRecordPoolDimensions();
85
88
  // Clear existing file content
86
89
  this.clear();
87
90
  // Convert objects to DatasetRecord format and write to file
@@ -123,16 +126,17 @@ class Dataset {
123
126
  const readStream = (0, fs_2.createReadStream)(inputPath);
124
127
  const writeStream = (0, fs_2.createWriteStream)(outputPath);
125
128
  const rl = (0, readline_1.createInterface)({ input: readStream, crlfDelay: Infinity });
129
+ const dimensions = Algo_1.default.deepClone(this._dimensions);
126
130
  let batch = [];
127
131
  let lineCount = 0;
128
- const dimensions = Algo_1.default.deepClone(this._dimensions);
129
132
  try {
130
133
  for (var _d = true, rl_1 = __asyncValues(rl), rl_1_1; rl_1_1 = yield rl_1.next(), _a = rl_1_1.done, !_a; _d = true) {
131
134
  _c = rl_1_1.value;
132
135
  _d = false;
133
136
  const line = _c;
134
137
  try {
135
- const record = new DatasetRecord_1.default(line, dimensions, this._delimiter);
138
+ // Reuse record from pool and reinitialize it with new line data
139
+ const record = this._recordPool.getNext(line, dimensions, this._delimiter);
136
140
  batch.push(record);
137
141
  lineCount++;
138
142
  if (batch.length >= this._batchSize) {
@@ -141,6 +145,7 @@ class Dataset {
141
145
  writeStream.write(transformedRecord.stringify() + '\n');
142
146
  }
143
147
  batch = [];
148
+ this._recordPool.reset(); // Reset pool index for next batch
144
149
  }
145
150
  }
146
151
  catch (error) {
@@ -684,6 +689,13 @@ class Dataset {
684
689
  return this;
685
690
  });
686
691
  this.getDimensions = () => this._dimensions;
692
+ /**
693
+ * Update the record pool when dimensions change
694
+ */
695
+ this._updateRecordPoolDimensions = () => {
696
+ // Update all pooled records with current dimensions
697
+ this._recordPool.updateDimensions(this._dimensions, this._delimiter);
698
+ };
687
699
  /**
688
700
  * - remove dimension
689
701
  * - rename a dimension
@@ -854,6 +866,8 @@ class Dataset {
854
866
  this._iterations = 0;
855
867
  this._operations = [];
856
868
  this._pipeline = [];
869
+ // Initialize record pool for optimization
870
+ this._recordPool = new DatasetRecordPool_1.default(this._batchSize);
857
871
  const datasetName = this._name
858
872
  .replace(/[^a-zA-Z0-9_-]/g, '_')
859
873
  .replace(/_{2,}/g, '_')
@@ -20,6 +20,17 @@ class DatasetRecord {
20
20
  this.getRaw = () => this._row;
21
21
  this.getValue = (dimension) => this._value[dimension];
22
22
  this.setValue = (dimension, value) => this._value[dimension] = value;
23
+ /**
24
+ * Reinitialize the record with new data instead of creating a new instance
25
+ * This is used for object pooling optimization
26
+ */
27
+ this.reinitialize = (row, dimensions, delimiter) => {
28
+ this._row = row;
29
+ this._dimensions = dimensions;
30
+ this._delimiter = delimiter;
31
+ this._value = {};
32
+ this.parse(row, delimiter, this._dimensions);
33
+ };
23
34
  this.wholeUpdateDimension = (update) => {
24
35
  var _a;
25
36
  if (update.toDelete) {
@@ -35,7 +46,9 @@ class DatasetRecord {
35
46
  }
36
47
  else {
37
48
  // Change: name, hidden, position
38
- const index = this._dimensions.findIndex(x => x.key === update.currentDimension.name);
49
+ let index = this._dimensions.findIndex(x => x.key === update.currentDimension.name);
50
+ if (index < 0)
51
+ index = this._dimensions.findIndex(x => x.key === update.currentDimension.key);
39
52
  const currentDim = this._dimensions[index];
40
53
  const updatedDim = { name: update.newName, key: (_a = currentDim.key) !== null && _a !== void 0 ? _a : update.newName, hidden: update.newHidden, index: update.newPosition };
41
54
  this._value[updatedDim.name] = this._value[currentDim.name];
@@ -0,0 +1,77 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ const DatasetRecord_1 = __importDefault(require("./DatasetRecord"));
7
+ /**
8
+ * A pool of DatasetRecord objects to optimize memory allocation during batch processing
9
+ */
10
+ class DatasetRecordPool {
11
+ constructor(poolSize) {
12
+ /**
13
+ * Initialize the pool with empty DatasetRecord objects
14
+ */
15
+ this._initializePool = () => {
16
+ this._pool = [];
17
+ for (let i = 0; i < this._poolSize; i++) {
18
+ this._pool.push(new DatasetRecord_1.default('', [], ','));
19
+ }
20
+ this._poolIndex = 0;
21
+ };
22
+ /**
23
+ * Get the next available record from the pool and reinitialize it with new data
24
+ * @param line The raw line data
25
+ * @param dimensions The dataset dimensions
26
+ * @param delimiter The delimiter to use
27
+ * @returns A reinitialized DatasetRecord from the pool
28
+ */
29
+ this.getNext = (line, dimensions, delimiter) => {
30
+ const record = this._pool[this._poolIndex];
31
+ record.reinitialize(line, dimensions, delimiter);
32
+ this._poolIndex = (this._poolIndex + 1) % this._poolSize;
33
+ return record;
34
+ };
35
+ /**
36
+ * Reset the pool index to start from the beginning
37
+ * This should be called when starting a new batch
38
+ */
39
+ this.reset = () => {
40
+ this._poolIndex = 0;
41
+ };
42
+ /**
43
+ * Update the pool size and reinitialize if necessary
44
+ * @param newSize The new pool size
45
+ */
46
+ this.resize = (newSize) => {
47
+ if (newSize !== this._poolSize) {
48
+ this._poolSize = newSize;
49
+ this._initializePool();
50
+ }
51
+ };
52
+ /**
53
+ * Update all pooled records with new dimensions and delimiter
54
+ * This should be called when dataset dimensions change
55
+ * @param dimensions The new dimensions
56
+ * @param delimiter The new delimiter
57
+ */
58
+ this.updateDimensions = (dimensions, delimiter) => {
59
+ for (const record of this._pool) {
60
+ record.reinitialize('', dimensions, delimiter);
61
+ }
62
+ };
63
+ /**
64
+ * Get the current pool size
65
+ */
66
+ this.getSize = () => this._poolSize;
67
+ /**
68
+ * Get the current pool index
69
+ */
70
+ this.getCurrentIndex = () => this._poolIndex;
71
+ this._poolSize = poolSize;
72
+ this._poolIndex = 0;
73
+ this._pool = [];
74
+ this._initializePool();
75
+ }
76
+ }
77
+ exports.default = DatasetRecordPool;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "0.0.55-nasco.3",
3
+ "version": "0.0.56-nasco.3",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,
@@ -9,7 +9,7 @@
9
9
  },
10
10
  "scripts": {
11
11
  "sync": "cd ../dev_ops && npm run sync",
12
- "dev": "npx tsx scripts/dev.ts",
12
+ "dev": "clear && npx tsx scripts/dev.ts",
13
13
  "tsc-check": "npx tsc --noemit",
14
14
  "init": "npx tsx ./src/index.ts init",
15
15
  "version": "npx tsx ./src/index.ts -v",