xy-scale 1.4.43 → 1.4.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import { parseTrainingXY, parseProductionX } from "./src/datasets.js"
2
2
  import {arrayToTimesteps } from "./src/timeSteps.js"
3
- import { arrayShuffle, arrayShuffleXY } from "./src/utilities.js"
3
+ import { arrayShuffle } from "./src/utilities.js"
4
4
 
5
- export { parseTrainingXY, parseProductionX, arrayToTimesteps, arrayShuffle, arrayShuffleXY }
5
+ export { parseTrainingXY, parseProductionX, arrayToTimesteps, arrayShuffle }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "xy-scale",
3
- "version": "1.4.43",
3
+ "version": "1.4.44",
4
4
  "main": "./index.js",
5
5
  "type": "module",
6
6
  "scripts": {
package/src/datasets.js CHANGED
@@ -1,5 +1,4 @@
1
1
  import { arrayShuffle } from "./utilities.js";
2
- import { oversampleXY, undersampleXY } from "./balancing.js";
3
2
  import { validateFirstRow, validateArray, hasInvalidNumbers } from "./validators.js";
4
3
 
5
4
  export const parseTrainingXY = ({
@@ -9,7 +8,6 @@ export const parseTrainingXY = ({
9
8
  xCallbackFunc = row => row,
10
9
  validateRows = () => true,
11
10
  shuffle = false,
12
- balancing = '',
13
11
  state = {},
14
12
  }) => {
15
13
  validateArray(arrObj, { min: 2 }, 'parseTrainingXY');
@@ -17,6 +15,8 @@ export const parseTrainingXY = ({
17
15
 
18
16
  let flatX = [];
19
17
  let flatY = [];
18
+ let source = [];
19
+
20
20
 
21
21
  let keyNamesX = null;
22
22
  let keyNamesY = null;
@@ -27,6 +27,7 @@ export const parseTrainingXY = ({
27
27
  try {
28
28
  if (!validateRows({ objRow: arrObj, index: x, state })) continue;
29
29
 
30
+
30
31
  const parsedX = xCallbackFunc({ objRow: arrObj, index: x, state });
31
32
  const parsedY = yCallbackFunc({ objRow: arrObj, index: x, state });
32
33
 
@@ -74,6 +75,7 @@ export const parseTrainingXY = ({
74
75
 
75
76
  flatX.push(rowX);
76
77
  flatY.push(rowY);
78
+ source.push(arrObj[x])
77
79
 
78
80
  } catch(err) {
79
81
  throw new Error(`[BUG] - Skipped row index=${x}: ${err.message}`);
@@ -86,7 +88,8 @@ export const parseTrainingXY = ({
86
88
  for (let i = 0; i < flatX.length; i++) {
87
89
  merged[i] = {
88
90
  x: flatX[i],
89
- y: flatY[i]
91
+ y: flatY[i],
92
+ source: source[i]
90
93
  };
91
94
  }
92
95
 
@@ -94,10 +97,12 @@ export const parseTrainingXY = ({
94
97
 
95
98
  flatX = new Array(shuffled.length);
96
99
  flatY = new Array(shuffled.length);
100
+ source = new Array(shuffled.length)
97
101
 
98
102
  for (let i = 0; i < shuffled.length; i++) {
99
103
  flatX[i] = shuffled[i].x;
100
104
  flatY[i] = shuffled[i].y;
105
+ source[i] = shuffled[i].source
101
106
  }
102
107
  }
103
108
 
@@ -116,22 +121,8 @@ export const parseTrainingXY = ({
116
121
  let trainY = flatY.slice(0, splitIndex);
117
122
  let testX = flatX.slice(splitIndex);
118
123
  let testY = flatY.slice(splitIndex);
119
-
120
- if (balancing) {
121
- let balance;
122
-
123
- if (balancing === 'oversample') {
124
- balance = oversampleXY(trainX, trainY);
125
- trainX = balance.X;
126
- trainY = balance.Y;
127
- } else if (balancing === 'undersample') {
128
- balance = undersampleXY(trainX, trainY);
129
- trainX = balance.X;
130
- trainY = balance.Y;
131
- } else {
132
- throw Error('balancing argument only accepts "", "oversample" and "undersample". Defaults to "".');
133
- }
134
- }
124
+ let trainSource = source.slice(0, splitIndex);
125
+ let testSource = source.slice(splitIndex);
135
126
 
136
127
  return {
137
128
  trainX,
@@ -140,6 +131,8 @@ export const parseTrainingXY = ({
140
131
  testY,
141
132
  configX,
142
133
  configY,
134
+ trainSource,
135
+ testSource
143
136
  };
144
137
  };
145
138
  export const parseProductionX = ({
@@ -151,6 +144,7 @@ export const parseProductionX = ({
151
144
  state = {},
152
145
  }) => {
153
146
  let flatX = [];
147
+ let source = [];
154
148
  let keyNamesX = null;
155
149
 
156
150
  validateArray(arrObj, { min: 1 }, 'parseProductionX');
@@ -185,13 +179,32 @@ export const parseProductionX = ({
185
179
  }
186
180
 
187
181
  flatX.push(rowX);
182
+ source.push(arrObj[x])
183
+
188
184
  } catch(err) {
189
185
  throw new Error(`[BUG] - Skipped row index=${x}: ${err.message}`);
190
186
  }
191
187
  }
192
188
 
193
189
  if (shuffle) {
194
- flatX = arrayShuffle(flatX);
190
+ const merged = new Array(flatX.length);
191
+
192
+ for (let i = 0; i < flatX.length; i++) {
193
+ merged[i] = {
194
+ x: flatX[i],
195
+ source: source[i]
196
+ };
197
+ }
198
+
199
+ const shuffled = arrayShuffle(merged);
200
+
201
+ flatX = new Array(shuffled.length);
202
+ source = new Array(shuffled.length)
203
+
204
+ for (let i = 0; i < shuffled.length; i++) {
205
+ flatX[i] = shuffled[i].x;
206
+ source[i] = shuffled[i].source
207
+ }
195
208
  }
196
209
 
197
210
  const configX = {
@@ -200,6 +213,7 @@ export const parseProductionX = ({
200
213
 
201
214
  return {
202
215
  X: flatX,
216
+ source,
203
217
  configX,
204
218
  };
205
219
  };
package/src/utilities.js CHANGED
@@ -10,28 +10,4 @@ export const arrayShuffle = X => {
10
10
  }
11
11
 
12
12
  return shuffledX;
13
- }
14
-
15
- export const arrayShuffleXY = (X, Y) => {
16
- if (!Array.isArray(X) || !Array.isArray(Y)) {
17
- throw new TypeError('Both X and Y must be arrays');
18
- }
19
-
20
- if (X.length !== Y.length) {
21
- throw new Error('X and Y must have the same length');
22
- }
23
-
24
- // Copy to avoid mutating the originals
25
- const shuffledX = [...X];
26
- const shuffledY = [...Y];
27
-
28
- // Shuffle both arrays using the same swaps
29
- for (let i = shuffledX.length - 1; i > 0; i--) {
30
- const j = Math.floor(Math.random() * (i + 1));
31
-
32
- [shuffledX[i], shuffledX[j]] = [shuffledX[j], shuffledX[i]];
33
- [shuffledY[i], shuffledY[j]] = [shuffledY[j], shuffledY[i]];
34
- }
35
-
36
- return { X: shuffledX, Y: shuffledY };
37
- };
13
+ }
package/src/balancing.js DELETED
@@ -1,187 +0,0 @@
1
- const isValidLabelValue = (value) => {
2
- if (typeof value === "string" || typeof value === "boolean") return true;
3
- if (typeof value === "number") return Number.isFinite(value);
4
-
5
- if (Array.isArray(value)) {
6
- return value.every(isValidLabelValue);
7
- }
8
-
9
- return false;
10
- };
11
-
12
- const makeLabelKey = (label) => {
13
- if (!isValidLabelValue(label)) {
14
- throw new Error(
15
- "Invalid Y label. Allowed types: finite numbers, strings, booleans, or nested arrays of those."
16
- );
17
- }
18
-
19
- return JSON.stringify(label);
20
- };
21
-
22
- const validateXY = (X, Y) => {
23
- if (!Array.isArray(X) || !Array.isArray(Y)) {
24
- throw new Error("X and Y must be arrays.");
25
- }
26
-
27
- if (X.length !== Y.length) {
28
- throw new Error("X and Y must have the same length.");
29
- }
30
-
31
- if (X.length === 0) {
32
- throw new Error("X and Y cannot be empty.");
33
- }
34
- };
35
-
36
- const mulberry32 = (seed) => {
37
- let t = seed >>> 0;
38
-
39
- return () => {
40
- t += 0x6D2B79F5;
41
- let r = Math.imul(t ^ (t >>> 15), t | 1);
42
- r ^= r + Math.imul(r ^ (r >>> 7), r | 61);
43
- return ((r ^ (r >>> 14)) >>> 0) / 4294967296;
44
- };
45
- };
46
-
47
- const resolveRandom = ({ random, seed } = {}) => {
48
- if (typeof random === "function") return random;
49
- if (Number.isInteger(seed)) return mulberry32(seed);
50
- return Math.random;
51
- };
52
-
53
- const shuffleInPlace = (arr, random = Math.random) => {
54
- for (let i = arr.length - 1; i > 0; i--) {
55
- const j = Math.floor(random() * (i + 1));
56
- [arr[i], arr[j]] = [arr[j], arr[i]];
57
- }
58
-
59
- return arr;
60
- };
61
-
62
- const sampleWithoutReplacement = (samples, size, random = Math.random) => {
63
- if (size > samples.length) {
64
- throw new Error("Cannot sample more items than available without replacement.");
65
- }
66
-
67
- const copy = [...samples];
68
- shuffleInPlace(copy, random);
69
- return copy.slice(0, size);
70
- };
71
-
72
- const sampleWithReplacement = (samples, size, random = Math.random) => {
73
- if (samples.length === 0) {
74
- throw new Error("Cannot sample from an empty array.");
75
- }
76
-
77
- const out = [];
78
-
79
- for (let i = 0; i < size; i++) {
80
- const idx = Math.floor(random() * samples.length);
81
- out.push(samples[idx]);
82
- }
83
-
84
- return out;
85
- };
86
-
87
- const maybeClone = (value, clone) => {
88
- if (!clone) return value;
89
- return structuredClone(value);
90
- };
91
-
92
- const groupXYByLabel = (X, Y, { cloneX = false } = {}) => {
93
- validateXY(X, Y);
94
-
95
- const groups = new Map();
96
-
97
- Y.forEach((label, i) => {
98
- const key = makeLabelKey(label);
99
-
100
- if (!groups.has(key)) {
101
- groups.set(key, []);
102
- }
103
-
104
- groups.get(key).push({
105
- x: maybeClone(X[i], cloneX),
106
- y: label,
107
- });
108
- });
109
-
110
- return groups;
111
- };
112
-
113
- export const oversampleXY = (X, Y, options = {}) => {
114
- const {
115
- random,
116
- seed,
117
- shuffleResult = true,
118
- cloneX = false,
119
- } = options;
120
-
121
- const rng = resolveRandom({ random, seed });
122
- const groups = groupXYByLabel(X, Y, { cloneX });
123
-
124
- const counts = [...groups.values()].map((samples) => samples.length);
125
- const maxCount = Math.max(...counts);
126
-
127
- const combined = [];
128
-
129
- for (const samples of groups.values()) {
130
- const originals = [...samples];
131
- const extrasNeeded = maxCount - originals.length;
132
-
133
- const extras =
134
- extrasNeeded > 0
135
- ? sampleWithReplacement(samples, extrasNeeded, rng).map((sample) => ({
136
- x: maybeClone(sample.x, cloneX),
137
- y: sample.y,
138
- }))
139
- : [];
140
-
141
- combined.push(...originals, ...extras);
142
- }
143
-
144
- if (shuffleResult) {
145
- shuffleInPlace(combined, rng);
146
- }
147
-
148
- return {
149
- X: combined.map(({ x }) => x),
150
- Y: combined.map(({ y }) => y),
151
- };
152
- };
153
-
154
- export const undersampleXY = (X, Y, options = {}) => {
155
- const {
156
- random,
157
- seed,
158
- shuffleResult = true,
159
- cloneX = false,
160
- } = options;
161
-
162
- const rng = resolveRandom({ random, seed });
163
- const groups = groupXYByLabel(X, Y, { cloneX });
164
-
165
- const counts = [...groups.values()].map((samples) => samples.length);
166
- const minCount = Math.min(...counts);
167
-
168
- const combined = [];
169
-
170
- for (const samples of groups.values()) {
171
- const selected = sampleWithoutReplacement(samples, minCount, rng).map((sample) => ({
172
- x: maybeClone(sample.x, cloneX),
173
- y: sample.y,
174
- }));
175
-
176
- combined.push(...selected);
177
- }
178
-
179
- if (shuffleResult) {
180
- shuffleInPlace(combined, rng);
181
- }
182
-
183
- return {
184
- X: combined.map(({ x }) => x),
185
- Y: combined.map(({ y }) => y),
186
- };
187
- };