xy-scale 1.4.42 → 1.4.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +2 -2
- package/package.json +1 -1
- package/src/datasets.js +34 -20
- package/src/utilities.js +1 -25
- package/src/balancing.js +0 -187
package/index.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { parseTrainingXY, parseProductionX } from "./src/datasets.js"
|
|
2
2
|
import {arrayToTimesteps } from "./src/timeSteps.js"
|
|
3
|
-
import { arrayShuffle
|
|
3
|
+
import { arrayShuffle } from "./src/utilities.js"
|
|
4
4
|
|
|
5
|
-
export { parseTrainingXY, parseProductionX, arrayToTimesteps, arrayShuffle
|
|
5
|
+
export { parseTrainingXY, parseProductionX, arrayToTimesteps, arrayShuffle }
|
package/package.json
CHANGED
package/src/datasets.js
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { arrayShuffle } from "./utilities.js";
|
|
2
|
-
import { oversampleXY, undersampleXY } from "./balancing.js";
|
|
3
2
|
import { validateFirstRow, validateArray, hasInvalidNumbers } from "./validators.js";
|
|
4
3
|
|
|
5
4
|
export const parseTrainingXY = ({
|
|
@@ -9,7 +8,6 @@ export const parseTrainingXY = ({
|
|
|
9
8
|
xCallbackFunc = row => row,
|
|
10
9
|
validateRows = () => true,
|
|
11
10
|
shuffle = false,
|
|
12
|
-
balancing = '',
|
|
13
11
|
state = {},
|
|
14
12
|
}) => {
|
|
15
13
|
validateArray(arrObj, { min: 2 }, 'parseTrainingXY');
|
|
@@ -17,6 +15,8 @@ export const parseTrainingXY = ({
|
|
|
17
15
|
|
|
18
16
|
let flatX = [];
|
|
19
17
|
let flatY = [];
|
|
18
|
+
let source = [];
|
|
19
|
+
|
|
20
20
|
|
|
21
21
|
let keyNamesX = null;
|
|
22
22
|
let keyNamesY = null;
|
|
@@ -27,6 +27,7 @@ export const parseTrainingXY = ({
|
|
|
27
27
|
try {
|
|
28
28
|
if (!validateRows({ objRow: arrObj, index: x, state })) continue;
|
|
29
29
|
|
|
30
|
+
|
|
30
31
|
const parsedX = xCallbackFunc({ objRow: arrObj, index: x, state });
|
|
31
32
|
const parsedY = yCallbackFunc({ objRow: arrObj, index: x, state });
|
|
32
33
|
|
|
@@ -74,6 +75,7 @@ export const parseTrainingXY = ({
|
|
|
74
75
|
|
|
75
76
|
flatX.push(rowX);
|
|
76
77
|
flatY.push(rowY);
|
|
78
|
+
source.push(arrObj[x])
|
|
77
79
|
|
|
78
80
|
} catch(err) {
|
|
79
81
|
throw new Error(`[BUG] - Skipped row index=${x}: ${err.message}`);
|
|
@@ -86,7 +88,8 @@ export const parseTrainingXY = ({
|
|
|
86
88
|
for (let i = 0; i < flatX.length; i++) {
|
|
87
89
|
merged[i] = {
|
|
88
90
|
x: flatX[i],
|
|
89
|
-
y: flatY[i]
|
|
91
|
+
y: flatY[i],
|
|
92
|
+
source: source[i]
|
|
90
93
|
};
|
|
91
94
|
}
|
|
92
95
|
|
|
@@ -94,10 +97,12 @@ export const parseTrainingXY = ({
|
|
|
94
97
|
|
|
95
98
|
flatX = new Array(shuffled.length);
|
|
96
99
|
flatY = new Array(shuffled.length);
|
|
100
|
+
source = new Array(shuffled.length)
|
|
97
101
|
|
|
98
102
|
for (let i = 0; i < shuffled.length; i++) {
|
|
99
103
|
flatX[i] = shuffled[i].x;
|
|
100
104
|
flatY[i] = shuffled[i].y;
|
|
105
|
+
source[i] = shuffled[i].source
|
|
101
106
|
}
|
|
102
107
|
}
|
|
103
108
|
|
|
@@ -116,22 +121,8 @@ export const parseTrainingXY = ({
|
|
|
116
121
|
let trainY = flatY.slice(0, splitIndex);
|
|
117
122
|
let testX = flatX.slice(splitIndex);
|
|
118
123
|
let testY = flatY.slice(splitIndex);
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
let balance;
|
|
122
|
-
|
|
123
|
-
if (balancing === 'oversample') {
|
|
124
|
-
balance = oversampleXY(trainX, trainY);
|
|
125
|
-
trainX = balance.X;
|
|
126
|
-
trainY = balance.Y;
|
|
127
|
-
} else if (balancing === 'undersample') {
|
|
128
|
-
balance = undersampleXY(trainX, trainY);
|
|
129
|
-
trainX = balance.X;
|
|
130
|
-
trainY = balance.Y;
|
|
131
|
-
} else {
|
|
132
|
-
throw Error('balancing argument only accepts "", "oversample" and "undersample". Defaults to "".');
|
|
133
|
-
}
|
|
134
|
-
}
|
|
124
|
+
let trainSource = source.slice(0, splitIndex);
|
|
125
|
+
let testSource = source.slice(splitIndex);
|
|
135
126
|
|
|
136
127
|
return {
|
|
137
128
|
trainX,
|
|
@@ -140,6 +131,8 @@ export const parseTrainingXY = ({
|
|
|
140
131
|
testY,
|
|
141
132
|
configX,
|
|
142
133
|
configY,
|
|
134
|
+
trainSource,
|
|
135
|
+
testSource
|
|
143
136
|
};
|
|
144
137
|
};
|
|
145
138
|
export const parseProductionX = ({
|
|
@@ -151,6 +144,7 @@ export const parseProductionX = ({
|
|
|
151
144
|
state = {},
|
|
152
145
|
}) => {
|
|
153
146
|
let flatX = [];
|
|
147
|
+
let source = [];
|
|
154
148
|
let keyNamesX = null;
|
|
155
149
|
|
|
156
150
|
validateArray(arrObj, { min: 1 }, 'parseProductionX');
|
|
@@ -185,13 +179,32 @@ export const parseProductionX = ({
|
|
|
185
179
|
}
|
|
186
180
|
|
|
187
181
|
flatX.push(rowX);
|
|
182
|
+
source.push(arrObj[x])
|
|
183
|
+
|
|
188
184
|
} catch(err) {
|
|
189
185
|
throw new Error(`[BUG] - Skipped row index=${x}: ${err.message}`);
|
|
190
186
|
}
|
|
191
187
|
}
|
|
192
188
|
|
|
193
189
|
if (shuffle) {
|
|
194
|
-
|
|
190
|
+
const merged = new Array(flatX.length);
|
|
191
|
+
|
|
192
|
+
for (let i = 0; i < flatX.length; i++) {
|
|
193
|
+
merged[i] = {
|
|
194
|
+
x: flatX[i],
|
|
195
|
+
source: source[i]
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
const shuffled = arrayShuffle(merged);
|
|
200
|
+
|
|
201
|
+
flatX = new Array(shuffled.length);
|
|
202
|
+
source = new Array(shuffled.length)
|
|
203
|
+
|
|
204
|
+
for (let i = 0; i < shuffled.length; i++) {
|
|
205
|
+
flatX[i] = shuffled[i].x;
|
|
206
|
+
source[i] = shuffled[i].source
|
|
207
|
+
}
|
|
195
208
|
}
|
|
196
209
|
|
|
197
210
|
const configX = {
|
|
@@ -200,6 +213,7 @@ export const parseProductionX = ({
|
|
|
200
213
|
|
|
201
214
|
return {
|
|
202
215
|
X: flatX,
|
|
216
|
+
source,
|
|
203
217
|
configX,
|
|
204
218
|
};
|
|
205
219
|
};
|
package/src/utilities.js
CHANGED
|
@@ -10,28 +10,4 @@ export const arrayShuffle = X => {
|
|
|
10
10
|
}
|
|
11
11
|
|
|
12
12
|
return shuffledX;
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
export const arrayShuffleXY = (X, Y) => {
|
|
16
|
-
if (!Array.isArray(X) || !Array.isArray(Y)) {
|
|
17
|
-
throw new TypeError('Both X and Y must be arrays');
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
if (X.length !== Y.length) {
|
|
21
|
-
throw new Error('X and Y must have the same length');
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
// Copy to avoid mutating the originals
|
|
25
|
-
const shuffledX = [...X];
|
|
26
|
-
const shuffledY = [...Y];
|
|
27
|
-
|
|
28
|
-
// Shuffle both arrays using the same swaps
|
|
29
|
-
for (let i = shuffledX.length - 1; i > 0; i--) {
|
|
30
|
-
const j = Math.floor(Math.random() * (i + 1));
|
|
31
|
-
|
|
32
|
-
[shuffledX[i], shuffledX[j]] = [shuffledX[j], shuffledX[i]];
|
|
33
|
-
[shuffledY[i], shuffledY[j]] = [shuffledY[j], shuffledY[i]];
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
return { X: shuffledX, Y: shuffledY };
|
|
37
|
-
};
|
|
13
|
+
}
|
package/src/balancing.js
DELETED
|
@@ -1,187 +0,0 @@
|
|
|
1
|
-
const isValidLabelValue = (value) => {
|
|
2
|
-
if (typeof value === "string" || typeof value === "boolean") return true;
|
|
3
|
-
if (typeof value === "number") return Number.isFinite(value);
|
|
4
|
-
|
|
5
|
-
if (Array.isArray(value)) {
|
|
6
|
-
return value.every(isValidLabelValue);
|
|
7
|
-
}
|
|
8
|
-
|
|
9
|
-
return false;
|
|
10
|
-
};
|
|
11
|
-
|
|
12
|
-
const makeLabelKey = (label) => {
|
|
13
|
-
if (!isValidLabelValue(label)) {
|
|
14
|
-
throw new Error(
|
|
15
|
-
"Invalid Y label. Allowed types: finite numbers, strings, booleans, or nested arrays of those."
|
|
16
|
-
);
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
return JSON.stringify(label);
|
|
20
|
-
};
|
|
21
|
-
|
|
22
|
-
const validateXY = (X, Y) => {
|
|
23
|
-
if (!Array.isArray(X) || !Array.isArray(Y)) {
|
|
24
|
-
throw new Error("X and Y must be arrays.");
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
if (X.length !== Y.length) {
|
|
28
|
-
throw new Error("X and Y must have the same length.");
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
if (X.length === 0) {
|
|
32
|
-
throw new Error("X and Y cannot be empty.");
|
|
33
|
-
}
|
|
34
|
-
};
|
|
35
|
-
|
|
36
|
-
const mulberry32 = (seed) => {
|
|
37
|
-
let t = seed >>> 0;
|
|
38
|
-
|
|
39
|
-
return () => {
|
|
40
|
-
t += 0x6D2B79F5;
|
|
41
|
-
let r = Math.imul(t ^ (t >>> 15), t | 1);
|
|
42
|
-
r ^= r + Math.imul(r ^ (r >>> 7), r | 61);
|
|
43
|
-
return ((r ^ (r >>> 14)) >>> 0) / 4294967296;
|
|
44
|
-
};
|
|
45
|
-
};
|
|
46
|
-
|
|
47
|
-
const resolveRandom = ({ random, seed } = {}) => {
|
|
48
|
-
if (typeof random === "function") return random;
|
|
49
|
-
if (Number.isInteger(seed)) return mulberry32(seed);
|
|
50
|
-
return Math.random;
|
|
51
|
-
};
|
|
52
|
-
|
|
53
|
-
const shuffleInPlace = (arr, random = Math.random) => {
|
|
54
|
-
for (let i = arr.length - 1; i > 0; i--) {
|
|
55
|
-
const j = Math.floor(random() * (i + 1));
|
|
56
|
-
[arr[i], arr[j]] = [arr[j], arr[i]];
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
return arr;
|
|
60
|
-
};
|
|
61
|
-
|
|
62
|
-
const sampleWithoutReplacement = (samples, size, random = Math.random) => {
|
|
63
|
-
if (size > samples.length) {
|
|
64
|
-
throw new Error("Cannot sample more items than available without replacement.");
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
const copy = [...samples];
|
|
68
|
-
shuffleInPlace(copy, random);
|
|
69
|
-
return copy.slice(0, size);
|
|
70
|
-
};
|
|
71
|
-
|
|
72
|
-
const sampleWithReplacement = (samples, size, random = Math.random) => {
|
|
73
|
-
if (samples.length === 0) {
|
|
74
|
-
throw new Error("Cannot sample from an empty array.");
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
const out = [];
|
|
78
|
-
|
|
79
|
-
for (let i = 0; i < size; i++) {
|
|
80
|
-
const idx = Math.floor(random() * samples.length);
|
|
81
|
-
out.push(samples[idx]);
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
return out;
|
|
85
|
-
};
|
|
86
|
-
|
|
87
|
-
const maybeClone = (value, clone) => {
|
|
88
|
-
if (!clone) return value;
|
|
89
|
-
return structuredClone(value);
|
|
90
|
-
};
|
|
91
|
-
|
|
92
|
-
const groupXYByLabel = (X, Y, { cloneX = false } = {}) => {
|
|
93
|
-
validateXY(X, Y);
|
|
94
|
-
|
|
95
|
-
const groups = new Map();
|
|
96
|
-
|
|
97
|
-
Y.forEach((label, i) => {
|
|
98
|
-
const key = makeLabelKey(label);
|
|
99
|
-
|
|
100
|
-
if (!groups.has(key)) {
|
|
101
|
-
groups.set(key, []);
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
groups.get(key).push({
|
|
105
|
-
x: maybeClone(X[i], cloneX),
|
|
106
|
-
y: label,
|
|
107
|
-
});
|
|
108
|
-
});
|
|
109
|
-
|
|
110
|
-
return groups;
|
|
111
|
-
};
|
|
112
|
-
|
|
113
|
-
export const oversampleXY = (X, Y, options = {}) => {
|
|
114
|
-
const {
|
|
115
|
-
random,
|
|
116
|
-
seed,
|
|
117
|
-
shuffleResult = true,
|
|
118
|
-
cloneX = false,
|
|
119
|
-
} = options;
|
|
120
|
-
|
|
121
|
-
const rng = resolveRandom({ random, seed });
|
|
122
|
-
const groups = groupXYByLabel(X, Y, { cloneX });
|
|
123
|
-
|
|
124
|
-
const counts = [...groups.values()].map((samples) => samples.length);
|
|
125
|
-
const maxCount = Math.max(...counts);
|
|
126
|
-
|
|
127
|
-
const combined = [];
|
|
128
|
-
|
|
129
|
-
for (const samples of groups.values()) {
|
|
130
|
-
const originals = [...samples];
|
|
131
|
-
const extrasNeeded = maxCount - originals.length;
|
|
132
|
-
|
|
133
|
-
const extras =
|
|
134
|
-
extrasNeeded > 0
|
|
135
|
-
? sampleWithReplacement(samples, extrasNeeded, rng).map((sample) => ({
|
|
136
|
-
x: maybeClone(sample.x, cloneX),
|
|
137
|
-
y: sample.y,
|
|
138
|
-
}))
|
|
139
|
-
: [];
|
|
140
|
-
|
|
141
|
-
combined.push(...originals, ...extras);
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
if (shuffleResult) {
|
|
145
|
-
shuffleInPlace(combined, rng);
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
return {
|
|
149
|
-
X: combined.map(({ x }) => x),
|
|
150
|
-
Y: combined.map(({ y }) => y),
|
|
151
|
-
};
|
|
152
|
-
};
|
|
153
|
-
|
|
154
|
-
export const undersampleXY = (X, Y, options = {}) => {
|
|
155
|
-
const {
|
|
156
|
-
random,
|
|
157
|
-
seed,
|
|
158
|
-
shuffleResult = true,
|
|
159
|
-
cloneX = false,
|
|
160
|
-
} = options;
|
|
161
|
-
|
|
162
|
-
const rng = resolveRandom({ random, seed });
|
|
163
|
-
const groups = groupXYByLabel(X, Y, { cloneX });
|
|
164
|
-
|
|
165
|
-
const counts = [...groups.values()].map((samples) => samples.length);
|
|
166
|
-
const minCount = Math.min(...counts);
|
|
167
|
-
|
|
168
|
-
const combined = [];
|
|
169
|
-
|
|
170
|
-
for (const samples of groups.values()) {
|
|
171
|
-
const selected = sampleWithoutReplacement(samples, minCount, rng).map((sample) => ({
|
|
172
|
-
x: maybeClone(sample.x, cloneX),
|
|
173
|
-
y: sample.y,
|
|
174
|
-
}));
|
|
175
|
-
|
|
176
|
-
combined.push(...selected);
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
if (shuffleResult) {
|
|
180
|
-
shuffleInPlace(combined, rng);
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
return {
|
|
184
|
-
X: combined.map(({ x }) => x),
|
|
185
|
-
Y: combined.map(({ y }) => y),
|
|
186
|
-
};
|
|
187
|
-
};
|