@forzalabs/remora 0.0.55-nasco.3 → 0.0.56-nasco.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Constants.js
CHANGED
|
@@ -26,6 +26,7 @@ const readline_1 = require("readline");
|
|
|
26
26
|
const Constants_1 = __importDefault(require("../../Constants"));
|
|
27
27
|
const DatasetManager_1 = __importDefault(require("./DatasetManager"));
|
|
28
28
|
const DatasetRecord_1 = __importDefault(require("./DatasetRecord"));
|
|
29
|
+
const DatasetRecordPool_1 = __importDefault(require("./DatasetRecordPool"));
|
|
29
30
|
const xlsx_1 = __importDefault(require("xlsx"));
|
|
30
31
|
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
31
32
|
const XMLParser_1 = __importDefault(require("../parsing/XMLParser"));
|
|
@@ -46,6 +47,7 @@ class Dataset {
|
|
|
46
47
|
this.getBatchSize = () => this._batchSize;
|
|
47
48
|
this.setBatchSize = (size) => {
|
|
48
49
|
this._batchSize = size;
|
|
50
|
+
this._recordPool.resize(size);
|
|
49
51
|
return this;
|
|
50
52
|
};
|
|
51
53
|
this.getSize = () => this._size;
|
|
@@ -82,6 +84,7 @@ class Dataset {
|
|
|
82
84
|
const firstLine = typeof firstItem === 'object' ? JSON.stringify(firstItem) : String(firstItem);
|
|
83
85
|
const buildRes = yield DatasetManager_1.default.buildDimensionsFromFirstLine(firstLine, this._file, producer, discover);
|
|
84
86
|
this._dimensions = buildRes.dimensions;
|
|
87
|
+
this._updateRecordPoolDimensions();
|
|
85
88
|
// Clear existing file content
|
|
86
89
|
this.clear();
|
|
87
90
|
// Convert objects to DatasetRecord format and write to file
|
|
@@ -123,16 +126,17 @@ class Dataset {
|
|
|
123
126
|
const readStream = (0, fs_2.createReadStream)(inputPath);
|
|
124
127
|
const writeStream = (0, fs_2.createWriteStream)(outputPath);
|
|
125
128
|
const rl = (0, readline_1.createInterface)({ input: readStream, crlfDelay: Infinity });
|
|
129
|
+
const dimensions = Algo_1.default.deepClone(this._dimensions);
|
|
126
130
|
let batch = [];
|
|
127
131
|
let lineCount = 0;
|
|
128
|
-
const dimensions = Algo_1.default.deepClone(this._dimensions);
|
|
129
132
|
try {
|
|
130
133
|
for (var _d = true, rl_1 = __asyncValues(rl), rl_1_1; rl_1_1 = yield rl_1.next(), _a = rl_1_1.done, !_a; _d = true) {
|
|
131
134
|
_c = rl_1_1.value;
|
|
132
135
|
_d = false;
|
|
133
136
|
const line = _c;
|
|
134
137
|
try {
|
|
135
|
-
|
|
138
|
+
// Reuse record from pool and reinitialize it with new line data
|
|
139
|
+
const record = this._recordPool.getNext(line, dimensions, this._delimiter);
|
|
136
140
|
batch.push(record);
|
|
137
141
|
lineCount++;
|
|
138
142
|
if (batch.length >= this._batchSize) {
|
|
@@ -141,6 +145,7 @@ class Dataset {
|
|
|
141
145
|
writeStream.write(transformedRecord.stringify() + '\n');
|
|
142
146
|
}
|
|
143
147
|
batch = [];
|
|
148
|
+
this._recordPool.reset(); // Reset pool index for next batch
|
|
144
149
|
}
|
|
145
150
|
}
|
|
146
151
|
catch (error) {
|
|
@@ -684,6 +689,13 @@ class Dataset {
|
|
|
684
689
|
return this;
|
|
685
690
|
});
|
|
686
691
|
this.getDimensions = () => this._dimensions;
|
|
692
|
+
/**
|
|
693
|
+
* Update the record pool when dimensions change
|
|
694
|
+
*/
|
|
695
|
+
this._updateRecordPoolDimensions = () => {
|
|
696
|
+
// Update all pooled records with current dimensions
|
|
697
|
+
this._recordPool.updateDimensions(this._dimensions, this._delimiter);
|
|
698
|
+
};
|
|
687
699
|
/**
|
|
688
700
|
* - remove dimension
|
|
689
701
|
* - rename a dimension
|
|
@@ -854,6 +866,8 @@ class Dataset {
|
|
|
854
866
|
this._iterations = 0;
|
|
855
867
|
this._operations = [];
|
|
856
868
|
this._pipeline = [];
|
|
869
|
+
// Initialize record pool for optimization
|
|
870
|
+
this._recordPool = new DatasetRecordPool_1.default(this._batchSize);
|
|
857
871
|
const datasetName = this._name
|
|
858
872
|
.replace(/[^a-zA-Z0-9_-]/g, '_')
|
|
859
873
|
.replace(/_{2,}/g, '_')
|
|
@@ -20,6 +20,17 @@ class DatasetRecord {
|
|
|
20
20
|
this.getRaw = () => this._row;
|
|
21
21
|
this.getValue = (dimension) => this._value[dimension];
|
|
22
22
|
this.setValue = (dimension, value) => this._value[dimension] = value;
|
|
23
|
+
/**
|
|
24
|
+
* Reinitialize the record with new data instead of creating a new instance
|
|
25
|
+
* This is used for object pooling optimization
|
|
26
|
+
*/
|
|
27
|
+
this.reinitialize = (row, dimensions, delimiter) => {
|
|
28
|
+
this._row = row;
|
|
29
|
+
this._dimensions = dimensions;
|
|
30
|
+
this._delimiter = delimiter;
|
|
31
|
+
this._value = {};
|
|
32
|
+
this.parse(row, delimiter, this._dimensions);
|
|
33
|
+
};
|
|
23
34
|
this.wholeUpdateDimension = (update) => {
|
|
24
35
|
var _a;
|
|
25
36
|
if (update.toDelete) {
|
|
@@ -35,7 +46,9 @@ class DatasetRecord {
|
|
|
35
46
|
}
|
|
36
47
|
else {
|
|
37
48
|
// Change: name, hidden, position
|
|
38
|
-
|
|
49
|
+
let index = this._dimensions.findIndex(x => x.key === update.currentDimension.name);
|
|
50
|
+
if (index < 0)
|
|
51
|
+
index = this._dimensions.findIndex(x => x.key === update.currentDimension.key);
|
|
39
52
|
const currentDim = this._dimensions[index];
|
|
40
53
|
const updatedDim = { name: update.newName, key: (_a = currentDim.key) !== null && _a !== void 0 ? _a : update.newName, hidden: update.newHidden, index: update.newPosition };
|
|
41
54
|
this._value[updatedDim.name] = this._value[currentDim.name];
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const DatasetRecord_1 = __importDefault(require("./DatasetRecord"));
|
|
7
|
+
/**
|
|
8
|
+
* A pool of DatasetRecord objects to optimize memory allocation during batch processing
|
|
9
|
+
*/
|
|
10
|
+
class DatasetRecordPool {
|
|
11
|
+
constructor(poolSize) {
|
|
12
|
+
/**
|
|
13
|
+
* Initialize the pool with empty DatasetRecord objects
|
|
14
|
+
*/
|
|
15
|
+
this._initializePool = () => {
|
|
16
|
+
this._pool = [];
|
|
17
|
+
for (let i = 0; i < this._poolSize; i++) {
|
|
18
|
+
this._pool.push(new DatasetRecord_1.default('', [], ','));
|
|
19
|
+
}
|
|
20
|
+
this._poolIndex = 0;
|
|
21
|
+
};
|
|
22
|
+
/**
|
|
23
|
+
* Get the next available record from the pool and reinitialize it with new data
|
|
24
|
+
* @param line The raw line data
|
|
25
|
+
* @param dimensions The dataset dimensions
|
|
26
|
+
* @param delimiter The delimiter to use
|
|
27
|
+
* @returns A reinitialized DatasetRecord from the pool
|
|
28
|
+
*/
|
|
29
|
+
this.getNext = (line, dimensions, delimiter) => {
|
|
30
|
+
const record = this._pool[this._poolIndex];
|
|
31
|
+
record.reinitialize(line, dimensions, delimiter);
|
|
32
|
+
this._poolIndex = (this._poolIndex + 1) % this._poolSize;
|
|
33
|
+
return record;
|
|
34
|
+
};
|
|
35
|
+
/**
|
|
36
|
+
* Reset the pool index to start from the beginning
|
|
37
|
+
* This should be called when starting a new batch
|
|
38
|
+
*/
|
|
39
|
+
this.reset = () => {
|
|
40
|
+
this._poolIndex = 0;
|
|
41
|
+
};
|
|
42
|
+
/**
|
|
43
|
+
* Update the pool size and reinitialize if necessary
|
|
44
|
+
* @param newSize The new pool size
|
|
45
|
+
*/
|
|
46
|
+
this.resize = (newSize) => {
|
|
47
|
+
if (newSize !== this._poolSize) {
|
|
48
|
+
this._poolSize = newSize;
|
|
49
|
+
this._initializePool();
|
|
50
|
+
}
|
|
51
|
+
};
|
|
52
|
+
/**
|
|
53
|
+
* Update all pooled records with new dimensions and delimiter
|
|
54
|
+
* This should be called when dataset dimensions change
|
|
55
|
+
* @param dimensions The new dimensions
|
|
56
|
+
* @param delimiter The new delimiter
|
|
57
|
+
*/
|
|
58
|
+
this.updateDimensions = (dimensions, delimiter) => {
|
|
59
|
+
for (const record of this._pool) {
|
|
60
|
+
record.reinitialize('', dimensions, delimiter);
|
|
61
|
+
}
|
|
62
|
+
};
|
|
63
|
+
/**
|
|
64
|
+
* Get the current pool size
|
|
65
|
+
*/
|
|
66
|
+
this.getSize = () => this._poolSize;
|
|
67
|
+
/**
|
|
68
|
+
* Get the current pool index
|
|
69
|
+
*/
|
|
70
|
+
this.getCurrentIndex = () => this._poolIndex;
|
|
71
|
+
this._poolSize = poolSize;
|
|
72
|
+
this._poolIndex = 0;
|
|
73
|
+
this._pool = [];
|
|
74
|
+
this._initializePool();
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
exports.default = DatasetRecordPool;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@forzalabs/remora",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.56-nasco.3",
|
|
4
4
|
"description": "A powerful CLI tool for seamless data translation.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"private": false,
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
},
|
|
10
10
|
"scripts": {
|
|
11
11
|
"sync": "cd ../dev_ops && npm run sync",
|
|
12
|
-
"dev": "npx tsx scripts/dev.ts",
|
|
12
|
+
"dev": "clear && npx tsx scripts/dev.ts",
|
|
13
13
|
"tsc-check": "npx tsc --noemit",
|
|
14
14
|
"init": "npx tsx ./src/index.ts init",
|
|
15
15
|
"version": "npx tsx ./src/index.ts -v",
|