xy-scale 1.4.44 → 1.4.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -29,7 +29,8 @@ Builds supervised-learning datasets and splits them into training and testing ar
29
29
  #### Parameters
30
30
 
31
31
  - `arrObj` (Array<Object>): Source dataset.
32
- - `trainingSplit` (Number, optional): Fraction of rows used for training. Default: `0.8`.
32
+ - `trainSize` (Number, required).
33
+ - `testSize` (Number, required).
33
34
  - `yCallbackFunc` (Function, optional): Builds the output object for each row. Returning `null` or `undefined` skips the row.
34
35
  - `xCallbackFunc` (Function, optional): Builds the feature object for each row. Returning `null` or `undefined` skips the row.
35
36
  - `validateRows` (Function, optional): Extra row filter executed before the callbacks.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "xy-scale",
3
- "version": "1.4.44",
3
+ "version": "1.4.45",
4
4
  "main": "./index.js",
5
5
  "type": "module",
6
6
  "scripts": {
package/src/datasets.js CHANGED
@@ -1,23 +1,35 @@
1
1
  import { arrayShuffle } from "./utilities.js";
2
- import { validateFirstRow, validateArray, hasInvalidNumbers } from "./validators.js";
2
+ import { validateFirstRow, validateArray, hasInvalidNumbers, validateSizes } from "./validators.js";
3
+ import {zscore2d} from './zscore2.js'
3
4
 
4
5
  export const parseTrainingXY = ({
5
6
  arrObj = [],
6
- trainingSplit = 0.8,
7
+ trainSize = null,
8
+ testSize = null,
7
9
  yCallbackFunc = row => row,
8
10
  xCallbackFunc = row => row,
9
11
  validateRows = () => true,
10
12
  shuffle = false,
11
13
  state = {},
14
+ showSource = false,
15
+ scaling = null
12
16
  }) => {
13
17
  validateArray(arrObj, { min: 2 }, 'parseTrainingXY');
14
18
  validateFirstRow(arrObj[0]);
15
19
 
20
+ const arrObjSize = arrObj.length;
21
+
22
+ validateSizes({arrObjSize, trainSize, testSize});
23
+
24
+ if(![null, 'zscore'].includes(scaling)) {
25
+ throw new Error(`Invalid "scaling" property. Accepting null or "zscore".`)
26
+ }
27
+
28
+ const totalSize = trainSize + testSize;
16
29
  let flatX = [];
17
30
  let flatY = [];
18
31
  let source = [];
19
32
 
20
-
21
33
  let keyNamesX = null;
22
34
  let keyNamesY = null;
23
35
 
@@ -75,7 +87,11 @@ export const parseTrainingXY = ({
75
87
 
76
88
  flatX.push(rowX);
77
89
  flatY.push(rowY);
78
- source.push(arrObj[x])
90
+
91
+ if(showSource) {
92
+ source.push(arrObj[x])
93
+ }
94
+
79
95
 
80
96
  } catch(err) {
81
97
  throw new Error(`[BUG] - Skipped row index=${x}: ${err.message}`);
@@ -88,21 +104,31 @@ export const parseTrainingXY = ({
88
104
  for (let i = 0; i < flatX.length; i++) {
89
105
  merged[i] = {
90
106
  x: flatX[i],
91
- y: flatY[i],
92
- source: source[i]
107
+ y: flatY[i]
93
108
  };
109
+
110
+ if(showSource) {
111
+ merged[i].source = source[i]
112
+ }
94
113
  }
95
114
 
96
115
  const shuffled = arrayShuffle(merged);
97
116
 
98
117
  flatX = new Array(shuffled.length);
99
118
  flatY = new Array(shuffled.length);
100
- source = new Array(shuffled.length)
119
+
120
+ if(showSource) {
121
+ source = new Array(shuffled.length)
122
+ }
123
+
101
124
 
102
125
  for (let i = 0; i < shuffled.length; i++) {
103
126
  flatX[i] = shuffled[i].x;
104
127
  flatY[i] = shuffled[i].y;
105
- source[i] = shuffled[i].source
128
+
129
+ if(showSource) {
130
+ source[i] = shuffled[i].source;
131
+ }
106
132
  }
107
133
  }
108
134
 
@@ -115,14 +141,44 @@ export const parseTrainingXY = ({
115
141
  labelCounts,
116
142
  };
117
143
 
118
- const splitIndex = Math.floor(flatX.length * trainingSplit);
144
+ const startSize = arrObjSize - totalSize
145
+
146
+ flatX.splice(0, startSize) //keeps the last items
147
+ flatY.splice(0, startSize) //keeps the last items
148
+
149
+ let trainX = flatX.slice(0, trainSize);
150
+ let trainY = flatY.slice(0, trainSize);
151
+
152
+ let stats = null
153
+
154
+ if(scaling === 'zscore') {
155
+ let trainNormalized = zscore2d(trainX)
156
+ stats = trainNormalized.stats
157
+ trainX = trainNormalized.data
158
+ trainNormalized = null
159
+ }
160
+
161
+ let testX = flatX.slice(-testSize);
162
+
163
+ if(scaling === 'zscore') {
164
+ testX = zscore2d(testX, stats).data
165
+ }
166
+
167
+ let testY = flatY.slice(-testSize);
119
168
 
120
- let trainX = flatX.slice(0, splitIndex);
121
- let trainY = flatY.slice(0, splitIndex);
122
- let testX = flatX.slice(splitIndex);
123
- let testY = flatY.slice(splitIndex);
124
- let trainSource = source.slice(0, splitIndex);
125
- let testSource = source.slice(splitIndex);
169
+
170
+ flatX = null
171
+ flatY = null
172
+
173
+ let trainSource
174
+ let testSource
175
+
176
+ if(showSource) {
177
+ source.splice(0, startSize) //keeps the last items
178
+ trainSource = source.slice(0, trainSize);
179
+ testSource = source.slice(-testSize);
180
+ source = null
181
+ }
126
182
 
127
183
  return {
128
184
  trainX,
@@ -132,8 +188,9 @@ export const parseTrainingXY = ({
132
188
  configX,
133
189
  configY,
134
190
  trainSource,
135
- testSource
136
- };
191
+ testSource,
192
+ stats
193
+ }
137
194
  };
138
195
  export const parseProductionX = ({
139
196
  arrObj = [],
@@ -142,6 +199,9 @@ export const parseProductionX = ({
142
199
  validateRows = () => true,
143
200
  shuffle = false,
144
201
  state = {},
202
+ showSource = false,
203
+ scaling = null,
204
+ stats
145
205
  }) => {
146
206
  let flatX = [];
147
207
  let source = [];
@@ -150,6 +210,12 @@ export const parseProductionX = ({
150
210
  validateArray(arrObj, { min: 1 }, 'parseProductionX');
151
211
  validateFirstRow(arrObj[0]);
152
212
 
213
+ if(![null, 'zscore'].includes(scaling)) {
214
+ throw new Error(`Invalid "scaling" property. Accepting null or "zscore".`)
215
+ }
216
+
217
+ const arrObjSize = arrObj.length
218
+
153
219
  if (yCallbackFunc != null) {
154
220
  throw new Error('The property "yCallbackFunc" must not be set in "parseProductionX".');
155
221
  }
@@ -179,7 +245,11 @@ export const parseProductionX = ({
179
245
  }
180
246
 
181
247
  flatX.push(rowX);
182
- source.push(arrObj[x])
248
+
249
+ if(showSource) {
250
+ source.push(arrObj[x])
251
+ }
252
+
183
253
 
184
254
  } catch(err) {
185
255
  throw new Error(`[BUG] - Skipped row index=${x}: ${err.message}`);
@@ -191,19 +261,29 @@ export const parseProductionX = ({
191
261
 
192
262
  for (let i = 0; i < flatX.length; i++) {
193
263
  merged[i] = {
194
- x: flatX[i],
195
- source: source[i]
264
+ x: flatX[i]
196
265
  };
266
+
267
+ if(showSource) {
268
+ merged[i].source = source[i]
269
+ }
197
270
  }
198
271
 
199
272
  const shuffled = arrayShuffle(merged);
200
273
 
201
274
  flatX = new Array(shuffled.length);
202
- source = new Array(shuffled.length)
275
+
276
+ if(showSource) {
277
+ source = new Array(shuffled.length)
278
+ }
279
+
203
280
 
204
281
  for (let i = 0; i < shuffled.length; i++) {
205
282
  flatX[i] = shuffled[i].x;
206
- source[i] = shuffled[i].source
283
+
284
+ if(showSource) {
285
+ source[i] = shuffled[i].source
286
+ }
207
287
  }
208
288
  }
209
289
 
@@ -211,6 +291,10 @@ export const parseProductionX = ({
211
291
  keyNames: keyNamesX ?? [],
212
292
  };
213
293
 
294
+ if(scaling === 'zscore') {
295
+ flatX = zscore2d(flatX, stats).data
296
+ }
297
+
214
298
  return {
215
299
  X: flatX,
216
300
  source,
package/src/validators.js CHANGED
@@ -1,6 +1,8 @@
1
1
 
2
2
  export const isNumber = v => v != null && Number.isFinite(v)
3
3
 
4
+ export const isPositiveInteger = value => Number.isInteger(value) && value > 0
5
+
4
6
  export const isKeyPairObject = param => {
5
7
  return (
6
8
  param !== null &&
@@ -132,4 +134,24 @@ export const arraysAreNotEqualSize = (list, callerName) => {
132
134
  }
133
135
 
134
136
  return false
137
+ }
138
+
139
+ export const validateSizes = ({arrObjSize, trainSize, testSize}) => {
140
+
141
+ if(!isPositiveInteger(trainSize)) {
142
+ throw new Error(`Invalid property: "trainSize" (${trainSize}) must be a non-negative integer.`)
143
+ }
144
+ if(!isPositiveInteger(testSize)) {
145
+ throw new Error(`Invalid property: "testSize" (${testSize}) must be a non-negative integer.`)
146
+ }
147
+
148
+ if (!Number.isInteger(arrObjSize) || arrObjSize < 0) {
149
+ throw new Error(`Invalid property: "arrObjSize" (${arrObjSize}) must be a non-negative integer.`)
150
+ }
151
+
152
+ if(arrObjSize < (trainSize + testSize)) {
153
+ throw new Error(`Invalid property: The sum of "trainSize" + "testSize" (${trainSize + testSize}) must not be larger than "arrObj.length" (${arrObjSize}).`)
154
+ }
155
+
156
+ return true
135
157
  }
package/src/zscore2.js ADDED
@@ -0,0 +1,145 @@
1
+ const toArray = typedOrArray => Array.from(typedOrArray);
2
+
3
+ const normalizeStats = (stats, cols) => {
4
+ const mean = new Float64Array(cols);
5
+ const std = new Float64Array(cols);
6
+ const scale = new Float64Array(cols);
7
+ const count = new Uint32Array(cols);
8
+
9
+ const meanIn = Array.isArray(stats?.mean) ? stats.mean : [];
10
+ const stdIn = Array.isArray(stats?.std) ? stats.std : [];
11
+ const scaleIn = Array.isArray(stats?.scale) ? stats.scale : [];
12
+ const countIn = Array.isArray(stats?.count) ? stats.count : [];
13
+
14
+ for (let j = 0; j < cols; j++) {
15
+ const m = Number(meanIn[j]);
16
+ const s = Number(stdIn[j]);
17
+ const sc = Number(scaleIn[j]);
18
+ const c = Number(countIn[j]);
19
+
20
+ mean[j] = Number.isFinite(m) ? m : 0;
21
+ std[j] = Number.isFinite(s) && s > 0 ? s : 0;
22
+ count[j] = Number.isFinite(c) && c > 0 ? c : 0;
23
+
24
+ if (Number.isFinite(sc) && sc > 0) {
25
+ scale[j] = sc;
26
+ if (!(std[j] > 0)) { std[j] = 1 / sc; }
27
+ } else if (std[j] > 0) {
28
+ scale[j] = 1 / std[j];
29
+ } else {
30
+ scale[j] = 0;
31
+ }
32
+ }
33
+
34
+ return { mean, std, scale, count };
35
+ };
36
+
37
+ const fitStats = (arr, cols) => {
38
+ const rows = arr.length;
39
+ const mean = new Float64Array(cols);
40
+ const m2 = new Float64Array(cols);
41
+ const count = new Uint32Array(cols);
42
+
43
+ for (let i = 0; i < rows; i++) {
44
+ const row = arr[i];
45
+
46
+ if (!Array.isArray(row)) {
47
+ throw new TypeError(`[zscore2d] Invalid row at index=${i}. Expected an array.`);
48
+ }
49
+ if (row.length !== cols) {
50
+ throw new Error(`[zscore2d] Inconsistent row size at index=${i}. Expected ${cols}, got ${row.length}.`);
51
+ }
52
+
53
+ for (let j = 0; j < cols; j++) {
54
+ const x = Number(row[j]);
55
+ if (!Number.isFinite(x)) { continue; }
56
+
57
+ count[j]++;
58
+
59
+ const delta = x - mean[j];
60
+ mean[j] += delta / count[j];
61
+ m2[j] += delta * (x - mean[j]);
62
+ }
63
+ }
64
+
65
+ const std = new Float64Array(cols);
66
+ const scale = new Float64Array(cols);
67
+
68
+ for (let j = 0; j < cols; j++) {
69
+ const variance = count[j] > 1 ? (m2[j] / count[j]) : 0;
70
+ const s = variance > 0 ? Math.sqrt(variance) : 0;
71
+ std[j] = s;
72
+ scale[j] = s > 0 ? (1 / s) : 0;
73
+ }
74
+
75
+ return { mean, std, scale, count };
76
+ };
77
+
78
+ const scaleFromStats = (arr, normalizedStats) => {
79
+ const rows = arr.length;
80
+ const cols = normalizedStats.mean.length;
81
+ const out = new Array(rows);
82
+
83
+ for (let i = 0; i < rows; i++) {
84
+ const row = arr[i];
85
+
86
+ if (!Array.isArray(row)) {
87
+ throw new TypeError(`[zscore2d] Invalid row at index=${i}. Expected an array.`);
88
+ }
89
+ if (row.length !== cols) {
90
+ throw new Error(`[zscore2d] Inconsistent row size at index=${i}. Expected ${cols}, got ${row.length}.`);
91
+ }
92
+
93
+ const normalized = new Array(cols);
94
+ for (let j = 0; j < cols; j++) {
95
+ const x = Number(row[j]);
96
+ const sc = normalizedStats.scale[j];
97
+
98
+ normalized[j] = (Number.isFinite(x) && sc > 0)
99
+ ? (x - normalizedStats.mean[j]) * sc
100
+ : 0;
101
+ }
102
+
103
+ out[i] = normalized;
104
+ }
105
+
106
+ return out;
107
+ };
108
+
109
+ export const zscore2d = (arr, stats = null) => {
110
+ if (!Array.isArray(arr)) {
111
+ throw new TypeError('[zscore2d] "arr" must be a 2D array.');
112
+ }
113
+
114
+ if (arr.length === 0) {
115
+ const normalizedStats = normalizeStats(stats ?? {}, 0);
116
+ return {
117
+ stats: {
118
+ mean: toArray(normalizedStats.mean),
119
+ std: toArray(normalizedStats.std),
120
+ scale: toArray(normalizedStats.scale),
121
+ count: toArray(normalizedStats.count)
122
+ },
123
+ data: []
124
+ };
125
+ }
126
+
127
+ const firstRow = arr[0];
128
+ if (!Array.isArray(firstRow)) {
129
+ throw new TypeError('[zscore2d] "arr" must be a 2D array of rows.');
130
+ }
131
+
132
+ const cols = firstRow.length;
133
+ const normalizedStats = stats ? normalizeStats(stats, cols) : fitStats(arr, cols);
134
+ const data = scaleFromStats(arr, normalizedStats);
135
+
136
+ return {
137
+ stats: {
138
+ mean: toArray(normalizedStats.mean),
139
+ std: toArray(normalizedStats.std),
140
+ scale: toArray(normalizedStats.scale),
141
+ count: toArray(normalizedStats.count)
142
+ },
143
+ data
144
+ };
145
+ };
package/test/test.js CHANGED
@@ -31,6 +31,9 @@ const test = async () => {
31
31
 
32
32
  const arrObj = indicators.getData()
33
33
 
34
+ const trainSize = Math.round(arrObj.length * 0.8)
35
+ const testSize = arrObj.length - trainSize
36
+
34
37
  const {
35
38
  trainX,
36
39
  trainY,
@@ -39,8 +42,10 @@ const test = async () => {
39
42
  configX,
40
43
  configY
41
44
  } = parseTrainingXY({
45
+ scaling: 'zscore',
42
46
  arrObj,
43
- trainingSplit: 0.50,
47
+ trainSize,
48
+ testSize,
44
49
  yCallbackFunc,
45
50
  xCallbackFunc,
46
51
  validateRows: ({objRow, index}) => {