xy-scale 1.4.43 → 1.4.45
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/index.js +2 -2
- package/package.json +1 -1
- package/src/datasets.js +125 -27
- package/src/utilities.js +1 -25
- package/src/validators.js +22 -0
- package/src/zscore2.js +145 -0
- package/test/test.js +6 -1
- package/src/balancing.js +0 -187
package/README.md
CHANGED
|
@@ -29,7 +29,8 @@ Builds supervised-learning datasets and splits them into training and testing ar
|
|
|
29
29
|
#### Parameters
|
|
30
30
|
|
|
31
31
|
- `arrObj` (Array<Object>): Source dataset.
|
|
32
|
-
- `
|
|
32
|
+
- `trainSize` (Number, required).
|
|
33
|
+
- `testSize` (Number, required).
|
|
33
34
|
- `yCallbackFunc` (Function, optional): Builds the output object for each row. Returning `null` or `undefined` skips the row.
|
|
34
35
|
- `xCallbackFunc` (Function, optional): Builds the feature object for each row. Returning `null` or `undefined` skips the row.
|
|
35
36
|
- `validateRows` (Function, optional): Extra row filter executed before the callbacks.
|
package/index.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { parseTrainingXY, parseProductionX } from "./src/datasets.js"
|
|
2
2
|
import {arrayToTimesteps } from "./src/timeSteps.js"
|
|
3
|
-
import { arrayShuffle
|
|
3
|
+
import { arrayShuffle } from "./src/utilities.js"
|
|
4
4
|
|
|
5
|
-
export { parseTrainingXY, parseProductionX, arrayToTimesteps, arrayShuffle
|
|
5
|
+
export { parseTrainingXY, parseProductionX, arrayToTimesteps, arrayShuffle }
|
package/package.json
CHANGED
package/src/datasets.js
CHANGED
|
@@ -1,22 +1,34 @@
|
|
|
1
1
|
import { arrayShuffle } from "./utilities.js";
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
2
|
+
import { validateFirstRow, validateArray, hasInvalidNumbers, validateSizes } from "./validators.js";
|
|
3
|
+
import {zscore2d} from './zscore2.js'
|
|
4
4
|
|
|
5
5
|
export const parseTrainingXY = ({
|
|
6
6
|
arrObj = [],
|
|
7
|
-
|
|
7
|
+
trainSize = null,
|
|
8
|
+
testSize = null,
|
|
8
9
|
yCallbackFunc = row => row,
|
|
9
10
|
xCallbackFunc = row => row,
|
|
10
11
|
validateRows = () => true,
|
|
11
12
|
shuffle = false,
|
|
12
|
-
balancing = '',
|
|
13
13
|
state = {},
|
|
14
|
+
showSource = false,
|
|
15
|
+
scaling = null
|
|
14
16
|
}) => {
|
|
15
17
|
validateArray(arrObj, { min: 2 }, 'parseTrainingXY');
|
|
16
18
|
validateFirstRow(arrObj[0]);
|
|
17
19
|
|
|
20
|
+
const arrObjSize = arrObj.length;
|
|
21
|
+
|
|
22
|
+
validateSizes({arrObjSize, trainSize, testSize});
|
|
23
|
+
|
|
24
|
+
if(![null, 'zscore'].includes(scaling)) {
|
|
25
|
+
throw new Error(`Invalid "scaling" property. Accepting null or "zscore".`)
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const totalSize = trainSize + testSize;
|
|
18
29
|
let flatX = [];
|
|
19
30
|
let flatY = [];
|
|
31
|
+
let source = [];
|
|
20
32
|
|
|
21
33
|
let keyNamesX = null;
|
|
22
34
|
let keyNamesY = null;
|
|
@@ -27,6 +39,7 @@ export const parseTrainingXY = ({
|
|
|
27
39
|
try {
|
|
28
40
|
if (!validateRows({ objRow: arrObj, index: x, state })) continue;
|
|
29
41
|
|
|
42
|
+
|
|
30
43
|
const parsedX = xCallbackFunc({ objRow: arrObj, index: x, state });
|
|
31
44
|
const parsedY = yCallbackFunc({ objRow: arrObj, index: x, state });
|
|
32
45
|
|
|
@@ -75,6 +88,11 @@ export const parseTrainingXY = ({
|
|
|
75
88
|
flatX.push(rowX);
|
|
76
89
|
flatY.push(rowY);
|
|
77
90
|
|
|
91
|
+
if(showSource) {
|
|
92
|
+
source.push(arrObj[x])
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
|
|
78
96
|
} catch(err) {
|
|
79
97
|
throw new Error(`[BUG] - Skipped row index=${x}: ${err.message}`);
|
|
80
98
|
}
|
|
@@ -88,6 +106,10 @@ export const parseTrainingXY = ({
|
|
|
88
106
|
x: flatX[i],
|
|
89
107
|
y: flatY[i]
|
|
90
108
|
};
|
|
109
|
+
|
|
110
|
+
if(showSource) {
|
|
111
|
+
merged[i].source = source[i]
|
|
112
|
+
}
|
|
91
113
|
}
|
|
92
114
|
|
|
93
115
|
const shuffled = arrayShuffle(merged);
|
|
@@ -95,9 +117,18 @@ export const parseTrainingXY = ({
|
|
|
95
117
|
flatX = new Array(shuffled.length);
|
|
96
118
|
flatY = new Array(shuffled.length);
|
|
97
119
|
|
|
120
|
+
if(showSource) {
|
|
121
|
+
source = new Array(shuffled.length)
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
|
|
98
125
|
for (let i = 0; i < shuffled.length; i++) {
|
|
99
126
|
flatX[i] = shuffled[i].x;
|
|
100
127
|
flatY[i] = shuffled[i].y;
|
|
128
|
+
|
|
129
|
+
if(showSource) {
|
|
130
|
+
source[i] = shuffled[i].source;
|
|
131
|
+
}
|
|
101
132
|
}
|
|
102
133
|
}
|
|
103
134
|
|
|
@@ -110,27 +141,43 @@ export const parseTrainingXY = ({
|
|
|
110
141
|
labelCounts,
|
|
111
142
|
};
|
|
112
143
|
|
|
113
|
-
const
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
let
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
144
|
+
const startSize = arrObjSize - totalSize
|
|
145
|
+
|
|
146
|
+
flatX.splice(0, startSize) //keeps the last items
|
|
147
|
+
flatY.splice(0, startSize) //keeps the last items
|
|
148
|
+
|
|
149
|
+
let trainX = flatX.slice(0, trainSize);
|
|
150
|
+
let trainY = flatY.slice(0, trainSize);
|
|
151
|
+
|
|
152
|
+
let stats = null
|
|
153
|
+
|
|
154
|
+
if(scaling === 'zscore') {
|
|
155
|
+
let trainNormalized = zscore2d(trainX)
|
|
156
|
+
stats = trainNormalized.stats
|
|
157
|
+
trainX = trainNormalized.data
|
|
158
|
+
trainNormalized = null
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
let testX = flatX.slice(-testSize);
|
|
162
|
+
|
|
163
|
+
if(scaling === 'zscore') {
|
|
164
|
+
testX = zscore2d(testX, stats).data
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
let testY = flatY.slice(-testSize);
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
flatX = null
|
|
171
|
+
flatY = null
|
|
172
|
+
|
|
173
|
+
let trainSource
|
|
174
|
+
let testSource
|
|
175
|
+
|
|
176
|
+
if(showSource) {
|
|
177
|
+
source.splice(0, startSize) //keeps the last items
|
|
178
|
+
trainSource = source.slice(0, trainSize);
|
|
179
|
+
testSource = source.slice(-testSize);
|
|
180
|
+
source = null
|
|
134
181
|
}
|
|
135
182
|
|
|
136
183
|
return {
|
|
@@ -140,7 +187,10 @@ export const parseTrainingXY = ({
|
|
|
140
187
|
testY,
|
|
141
188
|
configX,
|
|
142
189
|
configY,
|
|
143
|
-
|
|
190
|
+
trainSource,
|
|
191
|
+
testSource,
|
|
192
|
+
stats
|
|
193
|
+
}
|
|
144
194
|
};
|
|
145
195
|
export const parseProductionX = ({
|
|
146
196
|
arrObj = [],
|
|
@@ -149,13 +199,23 @@ export const parseProductionX = ({
|
|
|
149
199
|
validateRows = () => true,
|
|
150
200
|
shuffle = false,
|
|
151
201
|
state = {},
|
|
202
|
+
showSource = false,
|
|
203
|
+
scaling = null,
|
|
204
|
+
stats
|
|
152
205
|
}) => {
|
|
153
206
|
let flatX = [];
|
|
207
|
+
let source = [];
|
|
154
208
|
let keyNamesX = null;
|
|
155
209
|
|
|
156
210
|
validateArray(arrObj, { min: 1 }, 'parseProductionX');
|
|
157
211
|
validateFirstRow(arrObj[0]);
|
|
158
212
|
|
|
213
|
+
if(![null, 'zscore'].includes(scaling)) {
|
|
214
|
+
throw new Error(`Invalid "scaling" property. Accepting null or "zscore".`)
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
const arrObjSize = arrObj.length
|
|
218
|
+
|
|
159
219
|
if (yCallbackFunc != null) {
|
|
160
220
|
throw new Error('The property "yCallbackFunc" must not be set in "parseProductionX".');
|
|
161
221
|
}
|
|
@@ -185,21 +245,59 @@ export const parseProductionX = ({
|
|
|
185
245
|
}
|
|
186
246
|
|
|
187
247
|
flatX.push(rowX);
|
|
248
|
+
|
|
249
|
+
if(showSource) {
|
|
250
|
+
source.push(arrObj[x])
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
|
|
188
254
|
} catch(err) {
|
|
189
255
|
throw new Error(`[BUG] - Skipped row index=${x}: ${err.message}`);
|
|
190
256
|
}
|
|
191
257
|
}
|
|
192
258
|
|
|
193
259
|
if (shuffle) {
|
|
194
|
-
|
|
260
|
+
const merged = new Array(flatX.length);
|
|
261
|
+
|
|
262
|
+
for (let i = 0; i < flatX.length; i++) {
|
|
263
|
+
merged[i] = {
|
|
264
|
+
x: flatX[i]
|
|
265
|
+
};
|
|
266
|
+
|
|
267
|
+
if(showSource) {
|
|
268
|
+
merged[i].source = source[i]
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
const shuffled = arrayShuffle(merged);
|
|
273
|
+
|
|
274
|
+
flatX = new Array(shuffled.length);
|
|
275
|
+
|
|
276
|
+
if(showSource) {
|
|
277
|
+
source = new Array(shuffled.length)
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
for (let i = 0; i < shuffled.length; i++) {
|
|
282
|
+
flatX[i] = shuffled[i].x;
|
|
283
|
+
|
|
284
|
+
if(showSource) {
|
|
285
|
+
source[i] = shuffled[i].source
|
|
286
|
+
}
|
|
287
|
+
}
|
|
195
288
|
}
|
|
196
289
|
|
|
197
290
|
const configX = {
|
|
198
291
|
keyNames: keyNamesX ?? [],
|
|
199
292
|
};
|
|
200
293
|
|
|
294
|
+
if(scaling === 'zscore') {
|
|
295
|
+
flatX = zscore2d(flatX, stats).data
|
|
296
|
+
}
|
|
297
|
+
|
|
201
298
|
return {
|
|
202
299
|
X: flatX,
|
|
300
|
+
source,
|
|
203
301
|
configX,
|
|
204
302
|
};
|
|
205
303
|
};
|
package/src/utilities.js
CHANGED
|
@@ -10,28 +10,4 @@ export const arrayShuffle = X => {
|
|
|
10
10
|
}
|
|
11
11
|
|
|
12
12
|
return shuffledX;
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
export const arrayShuffleXY = (X, Y) => {
|
|
16
|
-
if (!Array.isArray(X) || !Array.isArray(Y)) {
|
|
17
|
-
throw new TypeError('Both X and Y must be arrays');
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
if (X.length !== Y.length) {
|
|
21
|
-
throw new Error('X and Y must have the same length');
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
// Copy to avoid mutating the originals
|
|
25
|
-
const shuffledX = [...X];
|
|
26
|
-
const shuffledY = [...Y];
|
|
27
|
-
|
|
28
|
-
// Shuffle both arrays using the same swaps
|
|
29
|
-
for (let i = shuffledX.length - 1; i > 0; i--) {
|
|
30
|
-
const j = Math.floor(Math.random() * (i + 1));
|
|
31
|
-
|
|
32
|
-
[shuffledX[i], shuffledX[j]] = [shuffledX[j], shuffledX[i]];
|
|
33
|
-
[shuffledY[i], shuffledY[j]] = [shuffledY[j], shuffledY[i]];
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
return { X: shuffledX, Y: shuffledY };
|
|
37
|
-
};
|
|
13
|
+
}
|
package/src/validators.js
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
|
|
2
2
|
export const isNumber = v => v != null && Number.isFinite(v)
|
|
3
3
|
|
|
4
|
+
export const isPositiveInteger = value => Number.isInteger(value) && value > 0
|
|
5
|
+
|
|
4
6
|
export const isKeyPairObject = param => {
|
|
5
7
|
return (
|
|
6
8
|
param !== null &&
|
|
@@ -132,4 +134,24 @@ export const arraysAreNotEqualSize = (list, callerName) => {
|
|
|
132
134
|
}
|
|
133
135
|
|
|
134
136
|
return false
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
export const validateSizes = ({arrObjSize, trainSize, testSize}) => {
|
|
140
|
+
|
|
141
|
+
if(!isPositiveInteger(trainSize)) {
|
|
142
|
+
throw new Error(`Invalid property: "trainSize" (${trainSize}) must be a non-negative integer.`)
|
|
143
|
+
}
|
|
144
|
+
if(!isPositiveInteger(testSize)) {
|
|
145
|
+
throw new Error(`Invalid property: "testSize" (${testSize}) must be a non-negative integer.`)
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
if (!Number.isInteger(arrObjSize) || arrObjSize < 0) {
|
|
149
|
+
throw new Error(`Invalid property: "arrObjSize" (${arrObjSize}) must be a non-negative integer.`)
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
if(arrObjSize < (trainSize + testSize)) {
|
|
153
|
+
throw new Error(`Invalid property: The sum of "trainSize" + "testSize" (${trainSize + testSize}) must not be larger than "arrObj.length" (${arrObjSize}).`)
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
return true
|
|
135
157
|
}
|
package/src/zscore2.js
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
const toArray = typedOrArray => Array.from(typedOrArray);
|
|
2
|
+
|
|
3
|
+
const normalizeStats = (stats, cols) => {
|
|
4
|
+
const mean = new Float64Array(cols);
|
|
5
|
+
const std = new Float64Array(cols);
|
|
6
|
+
const scale = new Float64Array(cols);
|
|
7
|
+
const count = new Uint32Array(cols);
|
|
8
|
+
|
|
9
|
+
const meanIn = Array.isArray(stats?.mean) ? stats.mean : [];
|
|
10
|
+
const stdIn = Array.isArray(stats?.std) ? stats.std : [];
|
|
11
|
+
const scaleIn = Array.isArray(stats?.scale) ? stats.scale : [];
|
|
12
|
+
const countIn = Array.isArray(stats?.count) ? stats.count : [];
|
|
13
|
+
|
|
14
|
+
for (let j = 0; j < cols; j++) {
|
|
15
|
+
const m = Number(meanIn[j]);
|
|
16
|
+
const s = Number(stdIn[j]);
|
|
17
|
+
const sc = Number(scaleIn[j]);
|
|
18
|
+
const c = Number(countIn[j]);
|
|
19
|
+
|
|
20
|
+
mean[j] = Number.isFinite(m) ? m : 0;
|
|
21
|
+
std[j] = Number.isFinite(s) && s > 0 ? s : 0;
|
|
22
|
+
count[j] = Number.isFinite(c) && c > 0 ? c : 0;
|
|
23
|
+
|
|
24
|
+
if (Number.isFinite(sc) && sc > 0) {
|
|
25
|
+
scale[j] = sc;
|
|
26
|
+
if (!(std[j] > 0)) { std[j] = 1 / sc; }
|
|
27
|
+
} else if (std[j] > 0) {
|
|
28
|
+
scale[j] = 1 / std[j];
|
|
29
|
+
} else {
|
|
30
|
+
scale[j] = 0;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
return { mean, std, scale, count };
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
const fitStats = (arr, cols) => {
|
|
38
|
+
const rows = arr.length;
|
|
39
|
+
const mean = new Float64Array(cols);
|
|
40
|
+
const m2 = new Float64Array(cols);
|
|
41
|
+
const count = new Uint32Array(cols);
|
|
42
|
+
|
|
43
|
+
for (let i = 0; i < rows; i++) {
|
|
44
|
+
const row = arr[i];
|
|
45
|
+
|
|
46
|
+
if (!Array.isArray(row)) {
|
|
47
|
+
throw new TypeError(`[zscore2d] Invalid row at index=${i}. Expected an array.`);
|
|
48
|
+
}
|
|
49
|
+
if (row.length !== cols) {
|
|
50
|
+
throw new Error(`[zscore2d] Inconsistent row size at index=${i}. Expected ${cols}, got ${row.length}.`);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
for (let j = 0; j < cols; j++) {
|
|
54
|
+
const x = Number(row[j]);
|
|
55
|
+
if (!Number.isFinite(x)) { continue; }
|
|
56
|
+
|
|
57
|
+
count[j]++;
|
|
58
|
+
|
|
59
|
+
const delta = x - mean[j];
|
|
60
|
+
mean[j] += delta / count[j];
|
|
61
|
+
m2[j] += delta * (x - mean[j]);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const std = new Float64Array(cols);
|
|
66
|
+
const scale = new Float64Array(cols);
|
|
67
|
+
|
|
68
|
+
for (let j = 0; j < cols; j++) {
|
|
69
|
+
const variance = count[j] > 1 ? (m2[j] / count[j]) : 0;
|
|
70
|
+
const s = variance > 0 ? Math.sqrt(variance) : 0;
|
|
71
|
+
std[j] = s;
|
|
72
|
+
scale[j] = s > 0 ? (1 / s) : 0;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return { mean, std, scale, count };
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
const scaleFromStats = (arr, normalizedStats) => {
|
|
79
|
+
const rows = arr.length;
|
|
80
|
+
const cols = normalizedStats.mean.length;
|
|
81
|
+
const out = new Array(rows);
|
|
82
|
+
|
|
83
|
+
for (let i = 0; i < rows; i++) {
|
|
84
|
+
const row = arr[i];
|
|
85
|
+
|
|
86
|
+
if (!Array.isArray(row)) {
|
|
87
|
+
throw new TypeError(`[zscore2d] Invalid row at index=${i}. Expected an array.`);
|
|
88
|
+
}
|
|
89
|
+
if (row.length !== cols) {
|
|
90
|
+
throw new Error(`[zscore2d] Inconsistent row size at index=${i}. Expected ${cols}, got ${row.length}.`);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const normalized = new Array(cols);
|
|
94
|
+
for (let j = 0; j < cols; j++) {
|
|
95
|
+
const x = Number(row[j]);
|
|
96
|
+
const sc = normalizedStats.scale[j];
|
|
97
|
+
|
|
98
|
+
normalized[j] = (Number.isFinite(x) && sc > 0)
|
|
99
|
+
? (x - normalizedStats.mean[j]) * sc
|
|
100
|
+
: 0;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
out[i] = normalized;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return out;
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
export const zscore2d = (arr, stats = null) => {
|
|
110
|
+
if (!Array.isArray(arr)) {
|
|
111
|
+
throw new TypeError('[zscore2d] "arr" must be a 2D array.');
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
if (arr.length === 0) {
|
|
115
|
+
const normalizedStats = normalizeStats(stats ?? {}, 0);
|
|
116
|
+
return {
|
|
117
|
+
stats: {
|
|
118
|
+
mean: toArray(normalizedStats.mean),
|
|
119
|
+
std: toArray(normalizedStats.std),
|
|
120
|
+
scale: toArray(normalizedStats.scale),
|
|
121
|
+
count: toArray(normalizedStats.count)
|
|
122
|
+
},
|
|
123
|
+
data: []
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const firstRow = arr[0];
|
|
128
|
+
if (!Array.isArray(firstRow)) {
|
|
129
|
+
throw new TypeError('[zscore2d] "arr" must be a 2D array of rows.');
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const cols = firstRow.length;
|
|
133
|
+
const normalizedStats = stats ? normalizeStats(stats, cols) : fitStats(arr, cols);
|
|
134
|
+
const data = scaleFromStats(arr, normalizedStats);
|
|
135
|
+
|
|
136
|
+
return {
|
|
137
|
+
stats: {
|
|
138
|
+
mean: toArray(normalizedStats.mean),
|
|
139
|
+
std: toArray(normalizedStats.std),
|
|
140
|
+
scale: toArray(normalizedStats.scale),
|
|
141
|
+
count: toArray(normalizedStats.count)
|
|
142
|
+
},
|
|
143
|
+
data
|
|
144
|
+
};
|
|
145
|
+
};
|
package/test/test.js
CHANGED
|
@@ -31,6 +31,9 @@ const test = async () => {
|
|
|
31
31
|
|
|
32
32
|
const arrObj = indicators.getData()
|
|
33
33
|
|
|
34
|
+
const trainSize = Math.round(arrObj.length * 0.8)
|
|
35
|
+
const testSize = arrObj.length - trainSize
|
|
36
|
+
|
|
34
37
|
const {
|
|
35
38
|
trainX,
|
|
36
39
|
trainY,
|
|
@@ -39,8 +42,10 @@ const test = async () => {
|
|
|
39
42
|
configX,
|
|
40
43
|
configY
|
|
41
44
|
} = parseTrainingXY({
|
|
45
|
+
scaling: 'zscore',
|
|
42
46
|
arrObj,
|
|
43
|
-
|
|
47
|
+
trainSize,
|
|
48
|
+
testSize,
|
|
44
49
|
yCallbackFunc,
|
|
45
50
|
xCallbackFunc,
|
|
46
51
|
validateRows: ({objRow, index}) => {
|
package/src/balancing.js
DELETED
|
@@ -1,187 +0,0 @@
|
|
|
1
|
-
const isValidLabelValue = (value) => {
|
|
2
|
-
if (typeof value === "string" || typeof value === "boolean") return true;
|
|
3
|
-
if (typeof value === "number") return Number.isFinite(value);
|
|
4
|
-
|
|
5
|
-
if (Array.isArray(value)) {
|
|
6
|
-
return value.every(isValidLabelValue);
|
|
7
|
-
}
|
|
8
|
-
|
|
9
|
-
return false;
|
|
10
|
-
};
|
|
11
|
-
|
|
12
|
-
const makeLabelKey = (label) => {
|
|
13
|
-
if (!isValidLabelValue(label)) {
|
|
14
|
-
throw new Error(
|
|
15
|
-
"Invalid Y label. Allowed types: finite numbers, strings, booleans, or nested arrays of those."
|
|
16
|
-
);
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
return JSON.stringify(label);
|
|
20
|
-
};
|
|
21
|
-
|
|
22
|
-
const validateXY = (X, Y) => {
|
|
23
|
-
if (!Array.isArray(X) || !Array.isArray(Y)) {
|
|
24
|
-
throw new Error("X and Y must be arrays.");
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
if (X.length !== Y.length) {
|
|
28
|
-
throw new Error("X and Y must have the same length.");
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
if (X.length === 0) {
|
|
32
|
-
throw new Error("X and Y cannot be empty.");
|
|
33
|
-
}
|
|
34
|
-
};
|
|
35
|
-
|
|
36
|
-
const mulberry32 = (seed) => {
|
|
37
|
-
let t = seed >>> 0;
|
|
38
|
-
|
|
39
|
-
return () => {
|
|
40
|
-
t += 0x6D2B79F5;
|
|
41
|
-
let r = Math.imul(t ^ (t >>> 15), t | 1);
|
|
42
|
-
r ^= r + Math.imul(r ^ (r >>> 7), r | 61);
|
|
43
|
-
return ((r ^ (r >>> 14)) >>> 0) / 4294967296;
|
|
44
|
-
};
|
|
45
|
-
};
|
|
46
|
-
|
|
47
|
-
const resolveRandom = ({ random, seed } = {}) => {
|
|
48
|
-
if (typeof random === "function") return random;
|
|
49
|
-
if (Number.isInteger(seed)) return mulberry32(seed);
|
|
50
|
-
return Math.random;
|
|
51
|
-
};
|
|
52
|
-
|
|
53
|
-
const shuffleInPlace = (arr, random = Math.random) => {
|
|
54
|
-
for (let i = arr.length - 1; i > 0; i--) {
|
|
55
|
-
const j = Math.floor(random() * (i + 1));
|
|
56
|
-
[arr[i], arr[j]] = [arr[j], arr[i]];
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
return arr;
|
|
60
|
-
};
|
|
61
|
-
|
|
62
|
-
const sampleWithoutReplacement = (samples, size, random = Math.random) => {
|
|
63
|
-
if (size > samples.length) {
|
|
64
|
-
throw new Error("Cannot sample more items than available without replacement.");
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
const copy = [...samples];
|
|
68
|
-
shuffleInPlace(copy, random);
|
|
69
|
-
return copy.slice(0, size);
|
|
70
|
-
};
|
|
71
|
-
|
|
72
|
-
const sampleWithReplacement = (samples, size, random = Math.random) => {
|
|
73
|
-
if (samples.length === 0) {
|
|
74
|
-
throw new Error("Cannot sample from an empty array.");
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
const out = [];
|
|
78
|
-
|
|
79
|
-
for (let i = 0; i < size; i++) {
|
|
80
|
-
const idx = Math.floor(random() * samples.length);
|
|
81
|
-
out.push(samples[idx]);
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
return out;
|
|
85
|
-
};
|
|
86
|
-
|
|
87
|
-
const maybeClone = (value, clone) => {
|
|
88
|
-
if (!clone) return value;
|
|
89
|
-
return structuredClone(value);
|
|
90
|
-
};
|
|
91
|
-
|
|
92
|
-
const groupXYByLabel = (X, Y, { cloneX = false } = {}) => {
|
|
93
|
-
validateXY(X, Y);
|
|
94
|
-
|
|
95
|
-
const groups = new Map();
|
|
96
|
-
|
|
97
|
-
Y.forEach((label, i) => {
|
|
98
|
-
const key = makeLabelKey(label);
|
|
99
|
-
|
|
100
|
-
if (!groups.has(key)) {
|
|
101
|
-
groups.set(key, []);
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
groups.get(key).push({
|
|
105
|
-
x: maybeClone(X[i], cloneX),
|
|
106
|
-
y: label,
|
|
107
|
-
});
|
|
108
|
-
});
|
|
109
|
-
|
|
110
|
-
return groups;
|
|
111
|
-
};
|
|
112
|
-
|
|
113
|
-
export const oversampleXY = (X, Y, options = {}) => {
|
|
114
|
-
const {
|
|
115
|
-
random,
|
|
116
|
-
seed,
|
|
117
|
-
shuffleResult = true,
|
|
118
|
-
cloneX = false,
|
|
119
|
-
} = options;
|
|
120
|
-
|
|
121
|
-
const rng = resolveRandom({ random, seed });
|
|
122
|
-
const groups = groupXYByLabel(X, Y, { cloneX });
|
|
123
|
-
|
|
124
|
-
const counts = [...groups.values()].map((samples) => samples.length);
|
|
125
|
-
const maxCount = Math.max(...counts);
|
|
126
|
-
|
|
127
|
-
const combined = [];
|
|
128
|
-
|
|
129
|
-
for (const samples of groups.values()) {
|
|
130
|
-
const originals = [...samples];
|
|
131
|
-
const extrasNeeded = maxCount - originals.length;
|
|
132
|
-
|
|
133
|
-
const extras =
|
|
134
|
-
extrasNeeded > 0
|
|
135
|
-
? sampleWithReplacement(samples, extrasNeeded, rng).map((sample) => ({
|
|
136
|
-
x: maybeClone(sample.x, cloneX),
|
|
137
|
-
y: sample.y,
|
|
138
|
-
}))
|
|
139
|
-
: [];
|
|
140
|
-
|
|
141
|
-
combined.push(...originals, ...extras);
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
if (shuffleResult) {
|
|
145
|
-
shuffleInPlace(combined, rng);
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
return {
|
|
149
|
-
X: combined.map(({ x }) => x),
|
|
150
|
-
Y: combined.map(({ y }) => y),
|
|
151
|
-
};
|
|
152
|
-
};
|
|
153
|
-
|
|
154
|
-
export const undersampleXY = (X, Y, options = {}) => {
|
|
155
|
-
const {
|
|
156
|
-
random,
|
|
157
|
-
seed,
|
|
158
|
-
shuffleResult = true,
|
|
159
|
-
cloneX = false,
|
|
160
|
-
} = options;
|
|
161
|
-
|
|
162
|
-
const rng = resolveRandom({ random, seed });
|
|
163
|
-
const groups = groupXYByLabel(X, Y, { cloneX });
|
|
164
|
-
|
|
165
|
-
const counts = [...groups.values()].map((samples) => samples.length);
|
|
166
|
-
const minCount = Math.min(...counts);
|
|
167
|
-
|
|
168
|
-
const combined = [];
|
|
169
|
-
|
|
170
|
-
for (const samples of groups.values()) {
|
|
171
|
-
const selected = sampleWithoutReplacement(samples, minCount, rng).map((sample) => ({
|
|
172
|
-
x: maybeClone(sample.x, cloneX),
|
|
173
|
-
y: sample.y,
|
|
174
|
-
}));
|
|
175
|
-
|
|
176
|
-
combined.push(...selected);
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
if (shuffleResult) {
|
|
180
|
-
shuffleInPlace(combined, rng);
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
return {
|
|
184
|
-
X: combined.map(({ x }) => x),
|
|
185
|
-
Y: combined.map(({ y }) => y),
|
|
186
|
-
};
|
|
187
|
-
};
|