xy-scale 1.4.32 → 1.4.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- var XY_Scale;(()=>{"use strict";var e={d:(t,r)=>{for(var n in r)e.o(r,n)&&!e.o(t,n)&&Object.defineProperty(t,n,{enumerable:!0,get:r[n]})},o:(e,t)=>Object.prototype.hasOwnProperty.call(e,t),r:e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})}},t={};e.r(t),e.d(t,{arrayToTimesteps:()=>i,parseProductionX:()=>s,parseTrainingXY:()=>a});const r=e=>null==e||!Number.isFinite(e),n=e=>{const t=[...e];for(let e=t.length-1;e>0;e--){const r=Math.floor(Math.random()*(e+1));[t[e],t[r]]=[t[r],t[e]]}return t},o=(e,{min:t=-1/0,max:r=1/0},n)=>{if(!Array.isArray(e))throw new Error(`Invalid property. "${n}" expected an array.`);if(e.length<t)throw new Error(`Invalid property value. Array "${n}" expected at least ${r} items.`);if(e.length>r)throw new Error(`Invalid property value. Array "${n}" expected at max ${r} items.`);return!0},l=e=>{for(const[t,r]of Object.entries(e)){if("number"==typeof r&&Number.isNaN(r))throw new Error(`Invalid value at index 0 property "${t}": value is "${r}". Expected a numeric value.`);if(null===r)throw new Error(`Invalid value at index 0 property "${t}": value is "${r}".`)}return!0},a=({arrObj:e=[],trainingSplit:t=.8,yCallbackFunc:a=e=>e,xCallbackFunc:s=e=>e,validateRows:i=()=>!0,shuffle:c=!1,balancing:u="",state:h={}})=>{let f=[],p=[];const y=[];o(e,{min:5},"parseTrainingXY"),l(e[0]);for(let t=0;t<e.length;t++){if(!i({objRow:e,index:t,state:h}))continue;const r=s({objRow:e,index:t,state:h}),n=a({objRow:e,index:t,state:h});null!=r&&null!=n&&(f.push(r),p.push(n),y.push(t))}if(c){const e=new Array(f.length);for(let t=0;t<f.length;t++)e[t]={x:f[t],y:p[t],sourceIndex:y[t]};const t=n(e);f=new Array(t.length),p=new Array(t.length);for(let e=0;e<t.length;e++)f[e]=t[e].x,p[e]=t[e].y,y[e]=t[e].sourceIndex}const d=f.length,g=p.length,w=d?Object.keys(f[0]).filter((e=>"tempIdx"!==e)):[],b=g?Object.keys(p[0]).filter((e=>"tempIdx"!==e)):[],m=new Array(d),x=new Array(g),v={keyNames:w},j={keyNames:b};for(let e=0;e<d;e++){const t=f[e],n=y[e],o=new Array(w.length);for(let e=0;e<w.length;e++){const l=w[e],a=t[l];if(r(a))throw new Error(`Invalid property value (${a}) returned from "xCallbackFunc" on index "${n}" property "${l}".`);o[e]=a}m[e]=o}for(let e=0;e<g;e++){const t=p[e],r=new Array(b.length);for(let e=0;e<b.length;e++)r[e]=t[b[e]];x[e]=r}const A=Math.floor(m.length*t);let O=m.slice(0,A),X=x.slice(0,A),$=m.slice(A),E=x.slice(A);if(u){let e;if("oversample"===u)e=((e,t)=>{const r={},n={};t.forEach(((o,l)=>{r[o]||(r[o]=0,n[o]=[]),r[o]++,n[o].push([e[l],t[l]])}));const o=Math.max(...Object.values(r)),l=[],a=[];return Object.keys(n).forEach((e=>{const t=n[e],r=t.length;for(let e=0;e<o;e++){const n=t[e%r];l.push(n[0]),a.push(n[1])}})),{X:l,Y:a}})(O,X),O=e.X,X=e.Y;else{if("undersample"!==u)throw Error('balancing argument only accepts "", "oversample" and "undersample". Defaults to "".');e=((e,t)=>{const r={},n={};t.forEach(((o,l)=>{r[o]||(r[o]=0,n[o]=[]),r[o]++,n[o].push([e[l],t[l]])}));const o=Math.min(...Object.values(r)),l=[],a=[];return Object.keys(n).forEach((e=>{const t=n[e];for(let e=0;e<o;e++){const r=t[e];l.push(r[0]),a.push(r[1])}})),{X:l,Y:a}})(O,X),O=e.X,X=e.Y}}return{trainX:O,trainY:X,testX:$,testY:E,configX:v,configY:j}},s=({arrObj:e=[],xCallbackFunc:t=e=>e,validateRows:a=()=>!0,shuffle:s=!1,state:i={}})=>{let c=[],u=[];o(e,{min:5},"parseProductionX"),l(e[0]);for(let r=0;r<e.length;r++){if(!a({objRow:e,index:r,state:i}))continue;const n=t({objRow:e,index:r,state:i});null!=n&&!1!==n&&(c.push(n),u.push(r))}if(s){const e=new Array(c.length);for(let t=0;t<c.length;t++)e[t]={x:c[t],sourceIndex:u[t]};const t=n(e);c=new Array(t.length),u=new Array(t.length);for(let e=0;e<t.length;e++)c[e]=t[e].x,u[e]=t[e].sourceIndex}const h=c.length,f=h?Object.keys(c[0]).filter((e=>"tempIdx"!==e)):[],p=new Array(h),y={keyNames:f};for(let e=0;e<h;e++){const t=c[e],n=u[e],o=new Array(f.length);for(let e=0;e<f.length;e++){const l=f[e],a=t[l];if(r(a))throw new Error(`Invalid property value (${a}) returned from "xCallbackFunc" on index "${n}" property "${l}".`);o[e]=a}p[e]=o}return{X:p,configX:y}},i=(e,t)=>{if(0===t)return e;if(t<0)throw new Error("timeSteps must be greater than 0");const r=[];for(let n=0;n<=e.length-t;n++)r.push(e.slice(n,n+t));return r};XY_Scale=t})();
1
+ var XY_Scale;(()=>{"use strict";var e={d:(r,t)=>{for(var n in t)e.o(t,n)&&!e.o(r,n)&&Object.defineProperty(r,n,{enumerable:!0,get:t[n]})},o:(e,r)=>Object.prototype.hasOwnProperty.call(e,r),r:e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})}},r={};e.r(r),e.d(r,{arrayToTimesteps:()=>A,parseProductionX:()=>v,parseTrainingXY:()=>$});const t=e=>null==e||!Number.isFinite(e),n=e=>{const r=[...e];for(let e=r.length-1;e>0;e--){const t=Math.floor(Math.random()*(e+1));[r[e],r[t]]=[r[t],r[e]]}return r},a=e=>"string"==typeof e||"boolean"==typeof e||("number"==typeof e?Number.isFinite(e):!!Array.isArray(e)&&e.every(a)),o=({random:e,seed:r}={})=>"function"==typeof e?e:Number.isInteger(r)?(e=>{let r=e>>>0;return()=>{r+=1831565813;let e=Math.imul(r^r>>>15,1|r);return e^=e+Math.imul(e^e>>>7,61|e),((e^e>>>14)>>>0)/4294967296}})(r):Math.random,s=(e,r=Math.random)=>{for(let t=e.length-1;t>0;t--){const n=Math.floor(r()*(t+1));[e[t],e[n]]=[e[n],e[t]]}return e},l=(e,r,t=Math.random)=>{if(r>e.length)throw new Error("Cannot sample more items than available without replacement.");const n=[...e];return s(n,t),n.slice(0,r)},i=(e,r,t=Math.random)=>{if(0===e.length)throw new Error("Cannot sample from an empty array.");const n=[];for(let a=0;a<r;a++){const r=Math.floor(t()*e.length);n.push(e[r])}return n},c=(e,r)=>r?structuredClone(e):e,h=(e,r,{cloneX:t=!1}={})=>{((e,r)=>{if(!Array.isArray(e)||!Array.isArray(r))throw new Error("X and Y must be arrays.");if(e.length!==r.length)throw new Error("X and Y must have the same length.");if(0===e.length)throw new Error("X and Y cannot be empty.")})(e,r);const n=new Map;return r.forEach(((r,o)=>{const s=(e=>{if(!a(e))throw new Error("Invalid Y label. Allowed types: finite numbers, strings, booleans, or nested arrays of those.");return JSON.stringify(e)})(r);n.has(s)||n.set(s,[]),n.get(s).push({x:c(e[o],t),y:r})})),n},u=(e,{min:r=-1/0,max:t=1/0},n)=>{if(!Array.isArray(e))throw new Error(`Invalid property. "${n}" expected an array.`);if(e.length<r)throw new Error(`Invalid property value. Array "${n}" expected at least ${t} items.`);if(e.length>t)throw new Error(`Invalid property value. Array "${n}" expected at max ${t} items.`);return!0},d=e=>{for(const[r,t]of Object.entries(e)){if("number"==typeof t&&Number.isNaN(t))throw new Error(`Invalid value at index 0 property "${r}": value is "${t}". Expected a numeric value.`);if(null===t)throw new Error(`Invalid value at index 0 property "${r}": value is "${t}".`)}return!0},f=e=>{return Array.isArray(e)?"array":(r=e,"[object Object]"===Object.prototype.toString.call(r)?"object":typeof e);var r},p=e=>Object.keys(e).filter((e=>"tempIdx"!==e)),y=({parentPath:e="",key:r,isArrayParent:t=!1})=>`${e}${t?`[${r}]`:e?`.${r}`:`${r}`}`,g=({rowLabel:e,sourceIndex:r,path:t="",detail:n,strictFlagName:a})=>new Error(`${e} schema error at index "${r}"${t?` path "${t}"`:""}. ${n} Set "${a}" to false to disable this validation.`),m=({referenceRow:e,currentRow:r,rowLabel:t,currentIndex:n,strictFlagName:a})=>{const o=f(e),s=f(r);if(o!==s)throw g({rowLabel:t,sourceIndex:n,detail:`Expected row type "${o}" based on the first parsed ${t} row, but got "${s}".`,strictFlagName:a});if("array"!==o&&"object"!==o)throw g({rowLabel:t,sourceIndex:n,detail:`Parsed ${t} rows must be arrays or plain objects.`,strictFlagName:a});const l=p(e),i=p(r),c=new Set(l),h=new Set(i),u=Array.isArray(e);for(let e=0;e<l.length;e++){const r=l[e];if(!h.has(r))throw g({rowLabel:t,sourceIndex:n,path:y({key:r,isArrayParent:u}),detail:`Missing required ${u?"index":"property"} "${r}" found in the first parsed ${t} row.`,strictFlagName:a})}for(let e=0;e<i.length;e++){const r=i[e];if(!c.has(r))throw g({rowLabel:t,sourceIndex:n,path:y({key:r,isArrayParent:u}),detail:`Unexpected ${u?"index":"property"} "${r}" not present in the first parsed ${t} row.`,strictFlagName:a})}return l},w=e=>Array.isArray(e)?"array":typeof e,b=({referenceValue:e,currentValue:r,sourceIndex:t,path:n,strictFlagName:a})=>{const o=w(e),s=w(r);if(o!==s)throw g({rowLabel:"Y",sourceIndex:t,path:n,detail:`Expected type "${o}" based on the first parsed Y row, but got "${s}".`,strictFlagName:a});if("array"!==o){if("number"!==o&&"boolean"!==o&&"string"!==o)throw g({rowLabel:"Y",sourceIndex:t,path:n,detail:`Unsupported Y value type "${o}". Y values must be numbers, booleans, strings, or nested arrays of those types.`,strictFlagName:a})}else{if(e.length!==r.length)throw g({rowLabel:"Y",sourceIndex:t,path:n,detail:`Expected array length "${e.length}" based on the first parsed Y row, but got "${r.length}".`,strictFlagName:a});for(let o=0;o<e.length;o++)b({referenceValue:e[o],currentValue:r[o],sourceIndex:t,path:y({parentPath:n,key:o,isArrayParent:!0}),strictFlagName:a})}},x=({rows:e,sourceIndexes:r,strictFlagName:t})=>{if(!e.length)return;const n=e[0],a=(e,r)=>{m({referenceRow:n,currentRow:e,rowLabel:"X",currentIndex:r,strictFlagName:t})};e.length>1&&a(e[e.length-1],r[e.length-1]);for(let t=0;t<e.length;t++)a(e[t],r[t])},$=({arrObj:e=[],trainingSplit:r=.8,yCallbackFunc:a=e=>e,xCallbackFunc:f=e=>e,validateRows:g=()=>!0,shuffle:w=!1,balancing:$="",strictXSchema:v=!0,strictYSchema:A=!0,state:I={}})=>{let N=[],X=[];const F=[];u(e,{min:5},"parseTrainingXY"),d(e[0]);for(let r=0;r<e.length;r++){if(!g({objRow:e,index:r,state:I}))continue;const t=f({objRow:e,index:r,state:I}),n=a({objRow:e,index:r,state:I});null!=t&&null!=n&&(N.push(t),X.push(n),F.push(r))}if(v&&x({rows:N,sourceIndexes:F,strictFlagName:"strictXSchema"}),A&&(({rows:e,sourceIndexes:r,strictFlagName:t})=>{if(!e.length)return;const n=e[0],a=(e,r)=>{const a=m({referenceRow:n,currentRow:e,rowLabel:"Y",currentIndex:r,strictFlagName:t}),o=Array.isArray(n);for(let s=0;s<a.length;s++){const l=a[s],i=y({key:l,isArrayParent:o});b({referenceValue:n[l],currentValue:e[l],sourceIndex:r,path:i,strictFlagName:t})}};e.length>1&&a(e[e.length-1],r[e.length-1]);for(let t=0;t<e.length;t++)a(e[t],r[t])})({rows:X,sourceIndexes:F,strictFlagName:"strictYSchema"}),w){const e=new Array(N.length);for(let r=0;r<N.length;r++)e[r]={x:N[r],y:X[r],sourceIndex:F[r]};const r=n(e);N=new Array(r.length),X=new Array(r.length);for(let e=0;e<r.length;e++)N[e]=r[e].x,X[e]=r[e].y,F[e]=r[e].sourceIndex}const Y=N.length,E=X.length,S=Y?p(N[0]):[],j=E?p(X[0]):[],M=new Array(Y),R=new Array(E),k={keyNames:S},P={keyNames:j};for(let e=0;e<Y;e++){const r=N[e],n=F[e],a=new Array(S.length);for(let e=0;e<S.length;e++){const o=S[e],s=r[o];if(t(s))throw new Error(`Invalid property value (${s}) returned from "xCallbackFunc" on index "${n}" property "${o}".`);a[e]=s}M[e]=a}for(let e=0;e<E;e++){const r=X[e],t=new Array(j.length);for(let e=0;e<j.length;e++)t[e]=r[j[e]];R[e]=t}const O=Math.floor(M.length*r);let L=M.slice(0,O),C=R.slice(0,O),T=M.slice(O),V=R.slice(O);if($){let e;if("oversample"===$)e=((e,r,t={})=>{const{random:n,seed:a,shuffleResult:l=!0,cloneX:u=!1}=t,d=o({random:n,seed:a}),f=h(e,r,{cloneX:u}),p=[...f.values()].map((e=>e.length)),y=Math.max(...p),g=[];for(const e of f.values()){const r=[...e],t=y-r.length,n=t>0?i(e,t,d).map((e=>({x:c(e.x,u),y:e.y}))):[];g.push(...r,...n)}return l&&s(g,d),{X:g.map((({x:e})=>e)),Y:g.map((({y:e})=>e))}})(L,C),L=e.X,C=e.Y;else{if("undersample"!==$)throw Error('balancing argument only accepts "", "oversample" and "undersample". Defaults to "".');e=((e,r,t={})=>{const{random:n,seed:a,shuffleResult:i=!0,cloneX:u=!1}=t,d=o({random:n,seed:a}),f=h(e,r,{cloneX:u}),p=[...f.values()].map((e=>e.length)),y=Math.min(...p),g=[];for(const e of f.values()){const r=l(e,y,d).map((e=>({x:c(e.x,u),y:e.y})));g.push(...r)}return i&&s(g,d),{X:g.map((({x:e})=>e)),Y:g.map((({y:e})=>e))}})(L,C),L=e.X,C=e.Y}}return{trainX:L,trainY:C,testX:T,testY:V,configX:k,configY:P}},v=({arrObj:e=[],xCallbackFunc:r=e=>e,validateRows:a=()=>!0,shuffle:o=!1,strictXSchema:s=!0,state:l={}})=>{let i=[],c=[];u(e,{min:5},"parseProductionX"),d(e[0]);for(let t=0;t<e.length;t++){if(!a({objRow:e,index:t,state:l}))continue;const n=r({objRow:e,index:t,state:l});null!=n&&!1!==n&&(i.push(n),c.push(t))}if(s&&x({rows:i,sourceIndexes:c,strictFlagName:"strictXSchema"}),o){const e=new Array(i.length);for(let r=0;r<i.length;r++)e[r]={x:i[r],sourceIndex:c[r]};const r=n(e);i=new Array(r.length),c=new Array(r.length);for(let e=0;e<r.length;e++)i[e]=r[e].x,c[e]=r[e].sourceIndex}const h=i.length,f=h?p(i[0]):[],y=new Array(h),g={keyNames:f};for(let e=0;e<h;e++){const r=i[e],n=c[e],a=new Array(f.length);for(let e=0;e<f.length;e++){const o=f[e],s=r[o];if(t(s))throw new Error(`Invalid property value (${s}) returned from "xCallbackFunc" on index "${n}" property "${o}".`);a[e]=s}y[e]=a}return{X:y,configX:g}},A=(e,r)=>{if(0===r)return e;if(r<0)throw new Error("timeSteps must be greater than 0");const t=[];for(let n=0;n<=e.length-r;n++)t.push(e.slice(n,n+r));return t};XY_Scale=r})();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "xy-scale",
3
- "version": "1.4.32",
3
+ "version": "1.4.34",
4
4
  "main": "./index.js",
5
5
  "type": "module",
6
6
  "scripts": {
package/src/balancing.js CHANGED
@@ -1,71 +1,187 @@
1
- export const oversampleXY = (X, Y) => {
2
-
3
- const labelCounts = {};
4
- const data = {};
5
-
6
- // Count occurrences of each label and group by label
7
- Y.forEach((label, i) => {
8
- if (!labelCounts[label]) {
9
- labelCounts[label] = 0;
10
- data[label] = [];
11
- }
12
- labelCounts[label]++;
13
- data[label].push([X[i], Y[i]]);
14
- });
15
-
16
- // Find the max label count
17
- const maxCount = Math.max(...Object.values(labelCounts));
18
-
19
- const oversampledX = [];
20
- const oversampledY = [];
21
-
22
- // Oversample each label to match the max count
23
- Object.keys(data).forEach(label => {
24
- const samples = data[label];
25
- const numSamples = samples.length;
26
-
27
- for (let i = 0; i < maxCount; i++) {
28
- const sample = samples[i % numSamples]; // Cycle through samples
29
- oversampledX.push(sample[0]);
30
- oversampledY.push(sample[1]);
31
- }
32
- });
33
-
34
- return { X: oversampledX, Y: oversampledY };
1
+ const isValidLabelValue = (value) => {
2
+ if (typeof value === "string" || typeof value === "boolean") return true;
3
+ if (typeof value === "number") return Number.isFinite(value);
4
+
5
+ if (Array.isArray(value)) {
6
+ return value.every(isValidLabelValue);
35
7
  }
36
-
37
-
38
- export const undersampleXY = (X, Y) => {
39
-
40
- const labelCounts = {};
41
- const data = {};
42
-
43
- // Count occurrences of each label and group by label
44
- Y.forEach((label, i) => {
45
- if (!labelCounts[label]) {
46
- labelCounts[label] = 0;
47
- data[label] = [];
48
- }
49
- labelCounts[label]++;
50
- data[label].push([X[i], Y[i]]);
51
- });
52
-
53
- // Find the minimum label count
54
- const minCount = Math.min(...Object.values(labelCounts));
55
-
56
- const undersampledX = [];
57
- const undersampledY = [];
58
-
59
- // Undersample each label to match the minimum count
60
- Object.keys(data).forEach(label => {
61
- const samples = data[label];
62
- for (let i = 0; i < minCount; i++) {
63
- const sample = samples[i]; // Use first minCount samples
64
- undersampledX.push(sample[0]);
65
- undersampledY.push(sample[1]);
66
- }
8
+
9
+ return false;
10
+ };
11
+
12
+ const makeLabelKey = (label) => {
13
+ if (!isValidLabelValue(label)) {
14
+ throw new Error(
15
+ "Invalid Y label. Allowed types: finite numbers, strings, booleans, or nested arrays of those."
16
+ );
17
+ }
18
+
19
+ return JSON.stringify(label);
20
+ };
21
+
22
+ const validateXY = (X, Y) => {
23
+ if (!Array.isArray(X) || !Array.isArray(Y)) {
24
+ throw new Error("X and Y must be arrays.");
25
+ }
26
+
27
+ if (X.length !== Y.length) {
28
+ throw new Error("X and Y must have the same length.");
29
+ }
30
+
31
+ if (X.length === 0) {
32
+ throw new Error("X and Y cannot be empty.");
33
+ }
34
+ };
35
+
36
+ const mulberry32 = (seed) => {
37
+ let t = seed >>> 0;
38
+
39
+ return () => {
40
+ t += 0x6D2B79F5;
41
+ let r = Math.imul(t ^ (t >>> 15), t | 1);
42
+ r ^= r + Math.imul(r ^ (r >>> 7), r | 61);
43
+ return ((r ^ (r >>> 14)) >>> 0) / 4294967296;
44
+ };
45
+ };
46
+
47
+ const resolveRandom = ({ random, seed } = {}) => {
48
+ if (typeof random === "function") return random;
49
+ if (Number.isInteger(seed)) return mulberry32(seed);
50
+ return Math.random;
51
+ };
52
+
53
+ const shuffleInPlace = (arr, random = Math.random) => {
54
+ for (let i = arr.length - 1; i > 0; i--) {
55
+ const j = Math.floor(random() * (i + 1));
56
+ [arr[i], arr[j]] = [arr[j], arr[i]];
57
+ }
58
+
59
+ return arr;
60
+ };
61
+
62
+ const sampleWithoutReplacement = (samples, size, random = Math.random) => {
63
+ if (size > samples.length) {
64
+ throw new Error("Cannot sample more items than available without replacement.");
65
+ }
66
+
67
+ const copy = [...samples];
68
+ shuffleInPlace(copy, random);
69
+ return copy.slice(0, size);
70
+ };
71
+
72
+ const sampleWithReplacement = (samples, size, random = Math.random) => {
73
+ if (samples.length === 0) {
74
+ throw new Error("Cannot sample from an empty array.");
75
+ }
76
+
77
+ const out = [];
78
+
79
+ for (let i = 0; i < size; i++) {
80
+ const idx = Math.floor(random() * samples.length);
81
+ out.push(samples[idx]);
82
+ }
83
+
84
+ return out;
85
+ };
86
+
87
+ const maybeClone = (value, clone) => {
88
+ if (!clone) return value;
89
+ return structuredClone(value);
90
+ };
91
+
92
+ const groupXYByLabel = (X, Y, { cloneX = false } = {}) => {
93
+ validateXY(X, Y);
94
+
95
+ const groups = new Map();
96
+
97
+ Y.forEach((label, i) => {
98
+ const key = makeLabelKey(label);
99
+
100
+ if (!groups.has(key)) {
101
+ groups.set(key, []);
102
+ }
103
+
104
+ groups.get(key).push({
105
+ x: maybeClone(X[i], cloneX),
106
+ y: label,
67
107
  });
68
-
69
- return { X: undersampledX, Y: undersampledY };
108
+ });
109
+
110
+ return groups;
111
+ };
112
+
113
+ export const oversampleXY = (X, Y, options = {}) => {
114
+ const {
115
+ random,
116
+ seed,
117
+ shuffleResult = true,
118
+ cloneX = false,
119
+ } = options;
120
+
121
+ const rng = resolveRandom({ random, seed });
122
+ const groups = groupXYByLabel(X, Y, { cloneX });
123
+
124
+ const counts = [...groups.values()].map((samples) => samples.length);
125
+ const maxCount = Math.max(...counts);
126
+
127
+ const combined = [];
128
+
129
+ for (const samples of groups.values()) {
130
+ const originals = [...samples];
131
+ const extrasNeeded = maxCount - originals.length;
132
+
133
+ const extras =
134
+ extrasNeeded > 0
135
+ ? sampleWithReplacement(samples, extrasNeeded, rng).map((sample) => ({
136
+ x: maybeClone(sample.x, cloneX),
137
+ y: sample.y,
138
+ }))
139
+ : [];
140
+
141
+ combined.push(...originals, ...extras);
142
+ }
143
+
144
+ if (shuffleResult) {
145
+ shuffleInPlace(combined, rng);
146
+ }
147
+
148
+ return {
149
+ X: combined.map(({ x }) => x),
150
+ Y: combined.map(({ y }) => y),
151
+ };
152
+ };
153
+
154
+ export const undersampleXY = (X, Y, options = {}) => {
155
+ const {
156
+ random,
157
+ seed,
158
+ shuffleResult = true,
159
+ cloneX = false,
160
+ } = options;
161
+
162
+ const rng = resolveRandom({ random, seed });
163
+ const groups = groupXYByLabel(X, Y, { cloneX });
164
+
165
+ const counts = [...groups.values()].map((samples) => samples.length);
166
+ const minCount = Math.min(...counts);
167
+
168
+ const combined = [];
169
+
170
+ for (const samples of groups.values()) {
171
+ const selected = sampleWithoutReplacement(samples, minCount, rng).map((sample) => ({
172
+ x: maybeClone(sample.x, cloneX),
173
+ y: sample.y,
174
+ }));
175
+
176
+ combined.push(...selected);
70
177
  }
71
-
178
+
179
+ if (shuffleResult) {
180
+ shuffleInPlace(combined, rng);
181
+ }
182
+
183
+ return {
184
+ X: combined.map(({ x }) => x),
185
+ Y: combined.map(({ y }) => y),
186
+ };
187
+ };
package/src/datasets.js CHANGED
@@ -4,6 +4,229 @@ import { validateFirstRow, validateArray } from "./validators.js";
4
4
 
5
5
  // ADD A PARAM max correlation that will measure the correlation between variables if defined
6
6
 
7
+ const isPlainObject = value =>
8
+ Object.prototype.toString.call(value) === '[object Object]';
9
+
10
+ const getRowKind = row => {
11
+ if (Array.isArray(row)) return 'array';
12
+ if (isPlainObject(row)) return 'object';
13
+ return typeof row;
14
+ };
15
+
16
+ const getComparableKeys = row =>
17
+ Object.keys(row).filter(key => key !== 'tempIdx');
18
+
19
+ const buildPath = ({ parentPath = '', key, isArrayParent = false }) => {
20
+ const nextPart = isArrayParent ? `[${key}]` : (parentPath ? `.${key}` : `${key}`);
21
+ return `${parentPath}${nextPart}`;
22
+ };
23
+
24
+ const createStrictSchemaError = ({
25
+ rowLabel,
26
+ sourceIndex,
27
+ path = '',
28
+ detail,
29
+ strictFlagName,
30
+ }) => {
31
+ const pathText = path ? ` path "${path}"` : '';
32
+ return new Error(
33
+ `${rowLabel} schema error at index "${sourceIndex}"${pathText}. ${detail} Set "${strictFlagName}" to false to disable this validation.`
34
+ );
35
+ };
36
+
37
+ const assertSameTopLevelSchema = ({
38
+ referenceRow,
39
+ currentRow,
40
+ rowLabel,
41
+ currentIndex,
42
+ strictFlagName,
43
+ }) => {
44
+ const referenceKind = getRowKind(referenceRow);
45
+ const currentKind = getRowKind(currentRow);
46
+
47
+ if (referenceKind !== currentKind) {
48
+ throw createStrictSchemaError({
49
+ rowLabel,
50
+ sourceIndex: currentIndex,
51
+ detail: `Expected row type "${referenceKind}" based on the first parsed ${rowLabel} row, but got "${currentKind}".`,
52
+ strictFlagName,
53
+ });
54
+ }
55
+
56
+ if (referenceKind !== 'array' && referenceKind !== 'object') {
57
+ throw createStrictSchemaError({
58
+ rowLabel,
59
+ sourceIndex: currentIndex,
60
+ detail: `Parsed ${rowLabel} rows must be arrays or plain objects.`,
61
+ strictFlagName,
62
+ });
63
+ }
64
+
65
+ const referenceKeys = getComparableKeys(referenceRow);
66
+ const currentKeys = getComparableKeys(currentRow);
67
+
68
+ const referenceKeySet = new Set(referenceKeys);
69
+ const currentKeySet = new Set(currentKeys);
70
+ const isArrayRow = Array.isArray(referenceRow);
71
+
72
+ for (let i = 0; i < referenceKeys.length; i++) {
73
+ const key = referenceKeys[i];
74
+
75
+ if (!currentKeySet.has(key)) {
76
+ throw createStrictSchemaError({
77
+ rowLabel,
78
+ sourceIndex: currentIndex,
79
+ path: buildPath({ key, isArrayParent: isArrayRow }),
80
+ detail: `Missing required ${isArrayRow ? 'index' : 'property'} "${key}" found in the first parsed ${rowLabel} row.`,
81
+ strictFlagName,
82
+ });
83
+ }
84
+ }
85
+
86
+ for (let i = 0; i < currentKeys.length; i++) {
87
+ const key = currentKeys[i];
88
+
89
+ if (!referenceKeySet.has(key)) {
90
+ throw createStrictSchemaError({
91
+ rowLabel,
92
+ sourceIndex: currentIndex,
93
+ path: buildPath({ key, isArrayParent: isArrayRow }),
94
+ detail: `Unexpected ${isArrayRow ? 'index' : 'property'} "${key}" not present in the first parsed ${rowLabel} row.`,
95
+ strictFlagName,
96
+ });
97
+ }
98
+ }
99
+
100
+ return referenceKeys;
101
+ };
102
+
103
+ const getYValueKind = value => {
104
+ if (Array.isArray(value)) return 'array';
105
+ return typeof value;
106
+ };
107
+
108
+ const assertSameYValueSchema = ({
109
+ referenceValue,
110
+ currentValue,
111
+ sourceIndex,
112
+ path,
113
+ strictFlagName,
114
+ }) => {
115
+ const referenceKind = getYValueKind(referenceValue);
116
+ const currentKind = getYValueKind(currentValue);
117
+
118
+ if (referenceKind !== currentKind) {
119
+ throw createStrictSchemaError({
120
+ rowLabel: 'Y',
121
+ sourceIndex,
122
+ path,
123
+ detail: `Expected type "${referenceKind}" based on the first parsed Y row, but got "${currentKind}".`,
124
+ strictFlagName,
125
+ });
126
+ }
127
+
128
+ if (referenceKind === 'array') {
129
+ if (referenceValue.length !== currentValue.length) {
130
+ throw createStrictSchemaError({
131
+ rowLabel: 'Y',
132
+ sourceIndex,
133
+ path,
134
+ detail: `Expected array length "${referenceValue.length}" based on the first parsed Y row, but got "${currentValue.length}".`,
135
+ strictFlagName,
136
+ });
137
+ }
138
+
139
+ for (let i = 0; i < referenceValue.length; i++) {
140
+ assertSameYValueSchema({
141
+ referenceValue: referenceValue[i],
142
+ currentValue: currentValue[i],
143
+ sourceIndex,
144
+ path: buildPath({ parentPath: path, key: i, isArrayParent: true }),
145
+ strictFlagName,
146
+ });
147
+ }
148
+
149
+ return;
150
+ }
151
+
152
+ if (
153
+ referenceKind !== 'number' &&
154
+ referenceKind !== 'boolean' &&
155
+ referenceKind !== 'string'
156
+ ) {
157
+ throw createStrictSchemaError({
158
+ rowLabel: 'Y',
159
+ sourceIndex,
160
+ path,
161
+ detail: `Unsupported Y value type "${referenceKind}". Y values must be numbers, booleans, strings, or nested arrays of those types.`,
162
+ strictFlagName,
163
+ });
164
+ }
165
+ };
166
+
167
+ const validateStrictXRows = ({ rows, sourceIndexes, strictFlagName }) => {
168
+ if (!rows.length) return;
169
+
170
+ const referenceRow = rows[0];
171
+
172
+ const compareRow = (currentRow, currentIndex) => {
173
+ assertSameTopLevelSchema({
174
+ referenceRow,
175
+ currentRow,
176
+ rowLabel: 'X',
177
+ currentIndex,
178
+ strictFlagName,
179
+ });
180
+ };
181
+
182
+ if (rows.length > 1) {
183
+ compareRow(rows[rows.length - 1], sourceIndexes[rows.length - 1]);
184
+ }
185
+
186
+ for (let i = 0; i < rows.length; i++) {
187
+ compareRow(rows[i], sourceIndexes[i]);
188
+ }
189
+ };
190
+
191
+ const validateStrictYRows = ({ rows, sourceIndexes, strictFlagName }) => {
192
+ if (!rows.length) return;
193
+
194
+ const referenceRow = rows[0];
195
+
196
+ const compareRow = (currentRow, currentIndex) => {
197
+ const referenceKeys = assertSameTopLevelSchema({
198
+ referenceRow,
199
+ currentRow,
200
+ rowLabel: 'Y',
201
+ currentIndex,
202
+ strictFlagName,
203
+ });
204
+
205
+ const isArrayRow = Array.isArray(referenceRow);
206
+
207
+ for (let i = 0; i < referenceKeys.length; i++) {
208
+ const key = referenceKeys[i];
209
+ const path = buildPath({ key, isArrayParent: isArrayRow });
210
+
211
+ assertSameYValueSchema({
212
+ referenceValue: referenceRow[key],
213
+ currentValue: currentRow[key],
214
+ sourceIndex: currentIndex,
215
+ path,
216
+ strictFlagName,
217
+ });
218
+ }
219
+ };
220
+
221
+ if (rows.length > 1) {
222
+ compareRow(rows[rows.length - 1], sourceIndexes[rows.length - 1]);
223
+ }
224
+
225
+ for (let i = 0; i < rows.length; i++) {
226
+ compareRow(rows[i], sourceIndexes[i]);
227
+ }
228
+ };
229
+
7
230
  export const parseTrainingXY = ({
8
231
  arrObj = [], // array of objects
9
232
  trainingSplit = 0.8, // numeric float between 0.01 and 0.99
@@ -12,6 +235,8 @@ export const parseTrainingXY = ({
12
235
  validateRows = () => true, // accepted callback functions
13
236
  shuffle = false, // only booleans
14
237
  balancing = '', // accepted '', 'oversample' or 'undersample'
238
+ strictXSchema = true,
239
+ strictYSchema = true,
15
240
  state = {}, // accepted object or classes
16
241
  }) => {
17
242
  let X = [];
@@ -21,7 +246,6 @@ export const parseTrainingXY = ({
21
246
  validateArray(arrObj, { min: 5 }, 'parseTrainingXY');
22
247
  validateFirstRow(arrObj[0]);
23
248
 
24
- // if parsedX or parsedY is undefined/null the current row will be excluded from training
25
249
  for (let x = 0; x < arrObj.length; x++) {
26
250
  if (!validateRows({ objRow: arrObj, index: x, state })) continue;
27
251
 
@@ -40,6 +264,22 @@ export const parseTrainingXY = ({
40
264
  }
41
265
  }
42
266
 
267
+ if (strictXSchema) {
268
+ validateStrictXRows({
269
+ rows: X,
270
+ sourceIndexes,
271
+ strictFlagName: 'strictXSchema',
272
+ });
273
+ }
274
+
275
+ if (strictYSchema) {
276
+ validateStrictYRows({
277
+ rows: Y,
278
+ sourceIndexes,
279
+ strictFlagName: 'strictYSchema',
280
+ });
281
+ }
282
+
43
283
  if (shuffle) {
44
284
  const merged = new Array(X.length);
45
285
 
@@ -66,8 +306,8 @@ export const parseTrainingXY = ({
66
306
  const xLen = X.length;
67
307
  const yLen = Y.length;
68
308
 
69
- const xKeys = xLen ? Object.keys(X[0]).filter(key => key !== 'tempIdx') : [];
70
- const yKeys = yLen ? Object.keys(Y[0]).filter(key => key !== 'tempIdx') : [];
309
+ const xKeys = xLen ? getComparableKeys(X[0]) : [];
310
+ const yKeys = yLen ? getComparableKeys(Y[0]) : [];
71
311
 
72
312
  const flatX = new Array(xLen);
73
313
  const flatY = new Array(yLen);
@@ -150,6 +390,7 @@ export const parseProductionX = ({
150
390
  xCallbackFunc = row => row,
151
391
  validateRows = () => true,
152
392
  shuffle = false,
393
+ strictXSchema = true,
153
394
  state = {},
154
395
  }) => {
155
396
  let X = [];
@@ -169,6 +410,14 @@ export const parseProductionX = ({
169
410
  }
170
411
  }
171
412
 
413
+ if (strictXSchema) {
414
+ validateStrictXRows({
415
+ rows: X,
416
+ sourceIndexes,
417
+ strictFlagName: 'strictXSchema',
418
+ });
419
+ }
420
+
172
421
  if (shuffle) {
173
422
  const merged = new Array(X.length);
174
423
 
@@ -191,7 +440,7 @@ export const parseProductionX = ({
191
440
  }
192
441
 
193
442
  const xLen = X.length;
194
- const xKeys = xLen ? Object.keys(X[0]).filter(key => key !== 'tempIdx') : [];
443
+ const xKeys = xLen ? getComparableKeys(X[0]) : [];
195
444
  const flatX = new Array(xLen);
196
445
 
197
446
  const configX = {