xy-scale 1.4.33 → 1.4.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- var XY_Scale;(()=>{"use strict";var e={d:(r,t)=>{for(var n in t)e.o(t,n)&&!e.o(r,n)&&Object.defineProperty(r,n,{enumerable:!0,get:t[n]})},o:(e,r)=>Object.prototype.hasOwnProperty.call(e,r),r:e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})}},r={};e.r(r),e.d(r,{arrayToTimesteps:()=>y,parseProductionX:()=>g,parseTrainingXY:()=>f});const t=e=>null==e||!Number.isFinite(e),n=e=>{const r=[...e];for(let e=r.length-1;e>0;e--){const t=Math.floor(Math.random()*(e+1));[r[e],r[t]]=[r[t],r[e]]}return r},a=(e,{min:r=-1/0,max:t=1/0},n)=>{if(!Array.isArray(e))throw new Error(`Invalid property. "${n}" expected an array.`);if(e.length<r)throw new Error(`Invalid property value. Array "${n}" expected at least ${t} items.`);if(e.length>t)throw new Error(`Invalid property value. Array "${n}" expected at max ${t} items.`);return!0},o=e=>{for(const[r,t]of Object.entries(e)){if("number"==typeof t&&Number.isNaN(t))throw new Error(`Invalid value at index 0 property "${r}": value is "${t}". Expected a numeric value.`);if(null===t)throw new Error(`Invalid value at index 0 property "${r}": value is "${t}".`)}return!0},s=e=>{return Array.isArray(e)?"array":(r=e,"[object Object]"===Object.prototype.toString.call(r)?"object":typeof e);var r},l=e=>Object.keys(e).filter((e=>"tempIdx"!==e)),c=({parentPath:e="",key:r,isArrayParent:t=!1})=>`${e}${t?`[${r}]`:e?`.${r}`:`${r}`}`,i=({rowLabel:e,sourceIndex:r,path:t="",detail:n,strictFlagName:a})=>new Error(`${e} schema error at index "${r}"${t?` path "${t}"`:""}. ${n} Set "${a}" to false to disable this validation.`),u=({referenceRow:e,currentRow:r,rowLabel:t,currentIndex:n,strictFlagName:a})=>{const o=s(e),u=s(r);if(o!==u)throw i({rowLabel:t,sourceIndex:n,detail:`Expected row type "${o}" based on the first parsed ${t} row, but got "${u}".`,strictFlagName:a});if("array"!==o&&"object"!==o)throw i({rowLabel:t,sourceIndex:n,detail:`Parsed ${t} rows must be arrays or plain objects.`,strictFlagName:a});const h=l(e),d=l(r),p=new Set(h),f=new Set(d),g=Array.isArray(e);for(let e=0;e<h.length;e++){const r=h[e];if(!f.has(r))throw i({rowLabel:t,sourceIndex:n,path:c({key:r,isArrayParent:g}),detail:`Missing required ${g?"index":"property"} "${r}" found in the first parsed ${t} row.`,strictFlagName:a})}for(let e=0;e<d.length;e++){const r=d[e];if(!p.has(r))throw i({rowLabel:t,sourceIndex:n,path:c({key:r,isArrayParent:g}),detail:`Unexpected ${g?"index":"property"} "${r}" not present in the first parsed ${t} row.`,strictFlagName:a})}return h},h=e=>Array.isArray(e)?"array":typeof e,d=({referenceValue:e,currentValue:r,sourceIndex:t,path:n,strictFlagName:a})=>{const o=h(e),s=h(r);if(o!==s)throw i({rowLabel:"Y",sourceIndex:t,path:n,detail:`Expected type "${o}" based on the first parsed Y row, but got "${s}".`,strictFlagName:a});if("array"!==o){if("number"!==o&&"boolean"!==o&&"string"!==o)throw i({rowLabel:"Y",sourceIndex:t,path:n,detail:`Unsupported Y value type "${o}". Y values must be numbers, booleans, strings, or nested arrays of those types.`,strictFlagName:a})}else{if(e.length!==r.length)throw i({rowLabel:"Y",sourceIndex:t,path:n,detail:`Expected array length "${e.length}" based on the first parsed Y row, but got "${r.length}".`,strictFlagName:a});for(let o=0;o<e.length;o++)d({referenceValue:e[o],currentValue:r[o],sourceIndex:t,path:c({parentPath:n,key:o,isArrayParent:!0}),strictFlagName:a})}},p=({rows:e,sourceIndexes:r,strictFlagName:t})=>{if(!e.length)return;const n=e[0],a=(e,r)=>{u({referenceRow:n,currentRow:e,rowLabel:"X",currentIndex:r,strictFlagName:t})};e.length>1&&a(e[e.length-1],r[e.length-1]);for(let t=0;t<e.length;t++)a(e[t],r[t])},f=({arrObj:e=[],trainingSplit:r=.8,yCallbackFunc:s=e=>e,xCallbackFunc:i=e=>e,validateRows:h=()=>!0,shuffle:f=!1,balancing:g="",strictXSchema:y=!0,strictYSchema:w=!0,state:b={}})=>{let m=[],x=[];const $=[];a(e,{min:5},"parseTrainingXY"),o(e[0]);for(let r=0;r<e.length;r++){if(!h({objRow:e,index:r,state:b}))continue;const t=i({objRow:e,index:r,state:b}),n=s({objRow:e,index:r,state:b});null!=t&&null!=n&&(m.push(t),x.push(n),$.push(r))}if(y&&p({rows:m,sourceIndexes:$,strictFlagName:"strictXSchema"}),w&&(({rows:e,sourceIndexes:r,strictFlagName:t})=>{if(!e.length)return;const n=e[0],a=(e,r)=>{const a=u({referenceRow:n,currentRow:e,rowLabel:"Y",currentIndex:r,strictFlagName:t}),o=Array.isArray(n);for(let s=0;s<a.length;s++){const l=a[s],i=c({key:l,isArrayParent:o});d({referenceValue:n[l],currentValue:e[l],sourceIndex:r,path:i,strictFlagName:t})}};e.length>1&&a(e[e.length-1],r[e.length-1]);for(let t=0;t<e.length;t++)a(e[t],r[t])})({rows:x,sourceIndexes:$,strictFlagName:"strictYSchema"}),f){const e=new Array(m.length);for(let r=0;r<m.length;r++)e[r]={x:m[r],y:x[r],sourceIndex:$[r]};const r=n(e);m=new Array(r.length),x=new Array(r.length);for(let e=0;e<r.length;e++)m[e]=r[e].x,x[e]=r[e].y,$[e]=r[e].sourceIndex}const v=m.length,I=x.length,A=v?l(m[0]):[],N=I?l(x[0]):[],F=new Array(v),j=new Array(I),Y={keyNames:A},X={keyNames:N};for(let e=0;e<v;e++){const r=m[e],n=$[e],a=new Array(A.length);for(let e=0;e<A.length;e++){const o=A[e],s=r[o];if(t(s))throw new Error(`Invalid property value (${s}) returned from "xCallbackFunc" on index "${n}" property "${o}".`);a[e]=s}F[e]=a}for(let e=0;e<I;e++){const r=x[e],t=new Array(N.length);for(let e=0;e<N.length;e++)t[e]=r[N[e]];j[e]=t}const S=Math.floor(F.length*r);let E=F.slice(0,S),k=j.slice(0,S),O=F.slice(S),P=j.slice(S);if(g){let e;if("oversample"===g)e=((e,r)=>{const t={},n={};r.forEach(((a,o)=>{t[a]||(t[a]=0,n[a]=[]),t[a]++,n[a].push([e[o],r[o]])}));const a=Math.max(...Object.values(t)),o=[],s=[];return Object.keys(n).forEach((e=>{const r=n[e],t=r.length;for(let e=0;e<a;e++){const n=r[e%t];o.push(n[0]),s.push(n[1])}})),{X:o,Y:s}})(E,k),E=e.X,k=e.Y;else{if("undersample"!==g)throw Error('balancing argument only accepts "", "oversample" and "undersample". Defaults to "".');e=((e,r)=>{const t={},n={};r.forEach(((a,o)=>{t[a]||(t[a]=0,n[a]=[]),t[a]++,n[a].push([e[o],r[o]])}));const a=Math.min(...Object.values(t)),o=[],s=[];return Object.keys(n).forEach((e=>{const r=n[e];for(let e=0;e<a;e++){const t=r[e];o.push(t[0]),s.push(t[1])}})),{X:o,Y:s}})(E,k),E=e.X,k=e.Y}}return{trainX:E,trainY:k,testX:O,testY:P,configX:Y,configY:X}},g=({arrObj:e=[],xCallbackFunc:r=e=>e,validateRows:s=()=>!0,shuffle:c=!1,strictXSchema:i=!0,state:u={}})=>{let h=[],d=[];a(e,{min:5},"parseProductionX"),o(e[0]);for(let t=0;t<e.length;t++){if(!s({objRow:e,index:t,state:u}))continue;const n=r({objRow:e,index:t,state:u});null!=n&&!1!==n&&(h.push(n),d.push(t))}if(i&&p({rows:h,sourceIndexes:d,strictFlagName:"strictXSchema"}),c){const e=new Array(h.length);for(let r=0;r<h.length;r++)e[r]={x:h[r],sourceIndex:d[r]};const r=n(e);h=new Array(r.length),d=new Array(r.length);for(let e=0;e<r.length;e++)h[e]=r[e].x,d[e]=r[e].sourceIndex}const f=h.length,g=f?l(h[0]):[],y=new Array(f),w={keyNames:g};for(let e=0;e<f;e++){const r=h[e],n=d[e],a=new Array(g.length);for(let e=0;e<g.length;e++){const o=g[e],s=r[o];if(t(s))throw new Error(`Invalid property value (${s}) returned from "xCallbackFunc" on index "${n}" property "${o}".`);a[e]=s}y[e]=a}return{X:y,configX:w}},y=(e,r)=>{if(0===r)return e;if(r<0)throw new Error("timeSteps must be greater than 0");const t=[];for(let n=0;n<=e.length-r;n++)t.push(e.slice(n,n+r));return t};XY_Scale=r})();
1
+ var XY_Scale;(()=>{"use strict";var e={d:(r,t)=>{for(var n in t)e.o(t,n)&&!e.o(r,n)&&Object.defineProperty(r,n,{enumerable:!0,get:t[n]})},o:(e,r)=>Object.prototype.hasOwnProperty.call(e,r),r:e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})}},r={};e.r(r),e.d(r,{arrayToTimesteps:()=>A,parseProductionX:()=>v,parseTrainingXY:()=>$});const t=e=>null==e||!Number.isFinite(e),n=e=>{const r=[...e];for(let e=r.length-1;e>0;e--){const t=Math.floor(Math.random()*(e+1));[r[e],r[t]]=[r[t],r[e]]}return r},a=e=>"string"==typeof e||"boolean"==typeof e||("number"==typeof e?Number.isFinite(e):!!Array.isArray(e)&&e.every(a)),o=({random:e,seed:r}={})=>"function"==typeof e?e:Number.isInteger(r)?(e=>{let r=e>>>0;return()=>{r+=1831565813;let e=Math.imul(r^r>>>15,1|r);return e^=e+Math.imul(e^e>>>7,61|e),((e^e>>>14)>>>0)/4294967296}})(r):Math.random,s=(e,r=Math.random)=>{for(let t=e.length-1;t>0;t--){const n=Math.floor(r()*(t+1));[e[t],e[n]]=[e[n],e[t]]}return e},l=(e,r,t=Math.random)=>{if(r>e.length)throw new Error("Cannot sample more items than available without replacement.");const n=[...e];return s(n,t),n.slice(0,r)},i=(e,r,t=Math.random)=>{if(0===e.length)throw new Error("Cannot sample from an empty array.");const n=[];for(let a=0;a<r;a++){const r=Math.floor(t()*e.length);n.push(e[r])}return n},c=(e,r)=>r?structuredClone(e):e,h=(e,r,{cloneX:t=!1}={})=>{((e,r)=>{if(!Array.isArray(e)||!Array.isArray(r))throw new Error("X and Y must be arrays.");if(e.length!==r.length)throw new Error("X and Y must have the same length.");if(0===e.length)throw new Error("X and Y cannot be empty.")})(e,r);const n=new Map;return r.forEach(((r,o)=>{const s=(e=>{if(!a(e))throw new Error("Invalid Y label. Allowed types: finite numbers, strings, booleans, or nested arrays of those.");return JSON.stringify(e)})(r);n.has(s)||n.set(s,[]),n.get(s).push({x:c(e[o],t),y:r})})),n},u=(e,{min:r=-1/0,max:t=1/0},n)=>{if(!Array.isArray(e))throw new Error(`Invalid property. "${n}" expected an array.`);if(e.length<r)throw new Error(`Invalid property value. Array "${n}" expected at least ${t} items.`);if(e.length>t)throw new Error(`Invalid property value. Array "${n}" expected at max ${t} items.`);return!0},d=e=>{for(const[r,t]of Object.entries(e)){if("number"==typeof t&&Number.isNaN(t))throw new Error(`Invalid value at index 0 property "${r}": value is "${t}". Expected a numeric value.`);if(null===t)throw new Error(`Invalid value at index 0 property "${r}": value is "${t}".`)}return!0},f=e=>{return Array.isArray(e)?"array":(r=e,"[object Object]"===Object.prototype.toString.call(r)?"object":typeof e);var r},p=e=>Object.keys(e).filter((e=>"tempIdx"!==e)),y=({parentPath:e="",key:r,isArrayParent:t=!1})=>`${e}${t?`[${r}]`:e?`.${r}`:`${r}`}`,g=({rowLabel:e,sourceIndex:r,path:t="",detail:n,strictFlagName:a})=>new Error(`${e} schema error at index "${r}"${t?` path "${t}"`:""}. ${n} Set "${a}" to false to disable this validation.`),m=({referenceRow:e,currentRow:r,rowLabel:t,currentIndex:n,strictFlagName:a})=>{const o=f(e),s=f(r);if(o!==s)throw g({rowLabel:t,sourceIndex:n,detail:`Expected row type "${o}" based on the first parsed ${t} row, but got "${s}".`,strictFlagName:a});if("array"!==o&&"object"!==o)throw g({rowLabel:t,sourceIndex:n,detail:`Parsed ${t} rows must be arrays or plain objects.`,strictFlagName:a});const l=p(e),i=p(r),c=new Set(l),h=new Set(i),u=Array.isArray(e);for(let e=0;e<l.length;e++){const r=l[e];if(!h.has(r))throw g({rowLabel:t,sourceIndex:n,path:y({key:r,isArrayParent:u}),detail:`Missing required ${u?"index":"property"} "${r}" found in the first parsed ${t} row.`,strictFlagName:a})}for(let e=0;e<i.length;e++){const r=i[e];if(!c.has(r))throw g({rowLabel:t,sourceIndex:n,path:y({key:r,isArrayParent:u}),detail:`Unexpected ${u?"index":"property"} "${r}" not present in the first parsed ${t} row.`,strictFlagName:a})}return l},w=e=>Array.isArray(e)?"array":typeof e,b=({referenceValue:e,currentValue:r,sourceIndex:t,path:n,strictFlagName:a})=>{const o=w(e),s=w(r);if(o!==s)throw g({rowLabel:"Y",sourceIndex:t,path:n,detail:`Expected type "${o}" based on the first parsed Y row, but got "${s}".`,strictFlagName:a});if("array"!==o){if("number"!==o&&"boolean"!==o&&"string"!==o)throw g({rowLabel:"Y",sourceIndex:t,path:n,detail:`Unsupported Y value type "${o}". Y values must be numbers, booleans, strings, or nested arrays of those types.`,strictFlagName:a})}else{if(e.length!==r.length)throw g({rowLabel:"Y",sourceIndex:t,path:n,detail:`Expected array length "${e.length}" based on the first parsed Y row, but got "${r.length}".`,strictFlagName:a});for(let o=0;o<e.length;o++)b({referenceValue:e[o],currentValue:r[o],sourceIndex:t,path:y({parentPath:n,key:o,isArrayParent:!0}),strictFlagName:a})}},x=({rows:e,sourceIndexes:r,strictFlagName:t})=>{if(!e.length)return;const n=e[0],a=(e,r)=>{m({referenceRow:n,currentRow:e,rowLabel:"X",currentIndex:r,strictFlagName:t})};e.length>1&&a(e[e.length-1],r[e.length-1]);for(let t=0;t<e.length;t++)a(e[t],r[t])},$=({arrObj:e=[],trainingSplit:r=.8,yCallbackFunc:a=e=>e,xCallbackFunc:f=e=>e,validateRows:g=()=>!0,shuffle:w=!1,balancing:$="",strictXSchema:v=!0,strictYSchema:A=!0,state:I={}})=>{let N=[],X=[];const F=[];u(e,{min:5},"parseTrainingXY"),d(e[0]);for(let r=0;r<e.length;r++){if(!g({objRow:e,index:r,state:I}))continue;const t=f({objRow:e,index:r,state:I}),n=a({objRow:e,index:r,state:I});null!=t&&null!=n&&(N.push(t),X.push(n),F.push(r))}if(v&&x({rows:N,sourceIndexes:F,strictFlagName:"strictXSchema"}),A&&(({rows:e,sourceIndexes:r,strictFlagName:t})=>{if(!e.length)return;const n=e[0],a=(e,r)=>{const a=m({referenceRow:n,currentRow:e,rowLabel:"Y",currentIndex:r,strictFlagName:t}),o=Array.isArray(n);for(let s=0;s<a.length;s++){const l=a[s],i=y({key:l,isArrayParent:o});b({referenceValue:n[l],currentValue:e[l],sourceIndex:r,path:i,strictFlagName:t})}};e.length>1&&a(e[e.length-1],r[e.length-1]);for(let t=0;t<e.length;t++)a(e[t],r[t])})({rows:X,sourceIndexes:F,strictFlagName:"strictYSchema"}),w){const e=new Array(N.length);for(let r=0;r<N.length;r++)e[r]={x:N[r],y:X[r],sourceIndex:F[r]};const r=n(e);N=new Array(r.length),X=new Array(r.length);for(let e=0;e<r.length;e++)N[e]=r[e].x,X[e]=r[e].y,F[e]=r[e].sourceIndex}const Y=N.length,E=X.length,S=Y?p(N[0]):[],j=E?p(X[0]):[],M=new Array(Y),R=new Array(E),k={keyNames:S},P={keyNames:j};for(let e=0;e<Y;e++){const r=N[e],n=F[e],a=new Array(S.length);for(let e=0;e<S.length;e++){const o=S[e],s=r[o];if(t(s))throw new Error(`Invalid property value (${s}) returned from "xCallbackFunc" on index "${n}" property "${o}".`);a[e]=s}M[e]=a}for(let e=0;e<E;e++){const r=X[e],t=new Array(j.length);for(let e=0;e<j.length;e++)t[e]=r[j[e]];R[e]=t}const O=Math.floor(M.length*r);let L=M.slice(0,O),C=R.slice(0,O),T=M.slice(O),V=R.slice(O);if($){let e;if("oversample"===$)e=((e,r,t={})=>{const{random:n,seed:a,shuffleResult:l=!0,cloneX:u=!1}=t,d=o({random:n,seed:a}),f=h(e,r,{cloneX:u}),p=[...f.values()].map((e=>e.length)),y=Math.max(...p),g=[];for(const e of f.values()){const r=[...e],t=y-r.length,n=t>0?i(e,t,d).map((e=>({x:c(e.x,u),y:e.y}))):[];g.push(...r,...n)}return l&&s(g,d),{X:g.map((({x:e})=>e)),Y:g.map((({y:e})=>e))}})(L,C),L=e.X,C=e.Y;else{if("undersample"!==$)throw Error('balancing argument only accepts "", "oversample" and "undersample". Defaults to "".');e=((e,r,t={})=>{const{random:n,seed:a,shuffleResult:i=!0,cloneX:u=!1}=t,d=o({random:n,seed:a}),f=h(e,r,{cloneX:u}),p=[...f.values()].map((e=>e.length)),y=Math.min(...p),g=[];for(const e of f.values()){const r=l(e,y,d).map((e=>({x:c(e.x,u),y:e.y})));g.push(...r)}return i&&s(g,d),{X:g.map((({x:e})=>e)),Y:g.map((({y:e})=>e))}})(L,C),L=e.X,C=e.Y}}return{trainX:L,trainY:C,testX:T,testY:V,configX:k,configY:P}},v=({arrObj:e=[],xCallbackFunc:r=e=>e,validateRows:a=()=>!0,shuffle:o=!1,strictXSchema:s=!0,state:l={}})=>{let i=[],c=[];u(e,{min:5},"parseProductionX"),d(e[0]);for(let t=0;t<e.length;t++){if(!a({objRow:e,index:t,state:l}))continue;const n=r({objRow:e,index:t,state:l});null!=n&&!1!==n&&(i.push(n),c.push(t))}if(s&&x({rows:i,sourceIndexes:c,strictFlagName:"strictXSchema"}),o){const e=new Array(i.length);for(let r=0;r<i.length;r++)e[r]={x:i[r],sourceIndex:c[r]};const r=n(e);i=new Array(r.length),c=new Array(r.length);for(let e=0;e<r.length;e++)i[e]=r[e].x,c[e]=r[e].sourceIndex}const h=i.length,f=h?p(i[0]):[],y=new Array(h),g={keyNames:f};for(let e=0;e<h;e++){const r=i[e],n=c[e],a=new Array(f.length);for(let e=0;e<f.length;e++){const o=f[e],s=r[o];if(t(s))throw new Error(`Invalid property value (${s}) returned from "xCallbackFunc" on index "${n}" property "${o}".`);a[e]=s}y[e]=a}return{X:y,configX:g}},A=(e,r)=>{if(0===r)return e;if(r<0)throw new Error("timeSteps must be greater than 0");const t=[];for(let n=0;n<=e.length-r;n++)t.push(e.slice(n,n+r));return t};XY_Scale=r})();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "xy-scale",
3
- "version": "1.4.33",
3
+ "version": "1.4.34",
4
4
  "main": "./index.js",
5
5
  "type": "module",
6
6
  "scripts": {
package/src/balancing.js CHANGED
@@ -1,71 +1,187 @@
1
- export const oversampleXY = (X, Y) => {
2
-
3
- const labelCounts = {};
4
- const data = {};
5
-
6
- // Count occurrences of each label and group by label
7
- Y.forEach((label, i) => {
8
- if (!labelCounts[label]) {
9
- labelCounts[label] = 0;
10
- data[label] = [];
11
- }
12
- labelCounts[label]++;
13
- data[label].push([X[i], Y[i]]);
14
- });
15
-
16
- // Find the max label count
17
- const maxCount = Math.max(...Object.values(labelCounts));
18
-
19
- const oversampledX = [];
20
- const oversampledY = [];
21
-
22
- // Oversample each label to match the max count
23
- Object.keys(data).forEach(label => {
24
- const samples = data[label];
25
- const numSamples = samples.length;
26
-
27
- for (let i = 0; i < maxCount; i++) {
28
- const sample = samples[i % numSamples]; // Cycle through samples
29
- oversampledX.push(sample[0]);
30
- oversampledY.push(sample[1]);
31
- }
32
- });
33
-
34
- return { X: oversampledX, Y: oversampledY };
1
+ const isValidLabelValue = (value) => {
2
+ if (typeof value === "string" || typeof value === "boolean") return true;
3
+ if (typeof value === "number") return Number.isFinite(value);
4
+
5
+ if (Array.isArray(value)) {
6
+ return value.every(isValidLabelValue);
35
7
  }
36
-
37
-
38
- export const undersampleXY = (X, Y) => {
39
-
40
- const labelCounts = {};
41
- const data = {};
42
-
43
- // Count occurrences of each label and group by label
44
- Y.forEach((label, i) => {
45
- if (!labelCounts[label]) {
46
- labelCounts[label] = 0;
47
- data[label] = [];
48
- }
49
- labelCounts[label]++;
50
- data[label].push([X[i], Y[i]]);
51
- });
52
-
53
- // Find the minimum label count
54
- const minCount = Math.min(...Object.values(labelCounts));
55
-
56
- const undersampledX = [];
57
- const undersampledY = [];
58
-
59
- // Undersample each label to match the minimum count
60
- Object.keys(data).forEach(label => {
61
- const samples = data[label];
62
- for (let i = 0; i < minCount; i++) {
63
- const sample = samples[i]; // Use first minCount samples
64
- undersampledX.push(sample[0]);
65
- undersampledY.push(sample[1]);
66
- }
8
+
9
+ return false;
10
+ };
11
+
12
+ const makeLabelKey = (label) => {
13
+ if (!isValidLabelValue(label)) {
14
+ throw new Error(
15
+ "Invalid Y label. Allowed types: finite numbers, strings, booleans, or nested arrays of those."
16
+ );
17
+ }
18
+
19
+ return JSON.stringify(label);
20
+ };
21
+
22
+ const validateXY = (X, Y) => {
23
+ if (!Array.isArray(X) || !Array.isArray(Y)) {
24
+ throw new Error("X and Y must be arrays.");
25
+ }
26
+
27
+ if (X.length !== Y.length) {
28
+ throw new Error("X and Y must have the same length.");
29
+ }
30
+
31
+ if (X.length === 0) {
32
+ throw new Error("X and Y cannot be empty.");
33
+ }
34
+ };
35
+
36
+ const mulberry32 = (seed) => {
37
+ let t = seed >>> 0;
38
+
39
+ return () => {
40
+ t += 0x6D2B79F5;
41
+ let r = Math.imul(t ^ (t >>> 15), t | 1);
42
+ r ^= r + Math.imul(r ^ (r >>> 7), r | 61);
43
+ return ((r ^ (r >>> 14)) >>> 0) / 4294967296;
44
+ };
45
+ };
46
+
47
+ const resolveRandom = ({ random, seed } = {}) => {
48
+ if (typeof random === "function") return random;
49
+ if (Number.isInteger(seed)) return mulberry32(seed);
50
+ return Math.random;
51
+ };
52
+
53
+ const shuffleInPlace = (arr, random = Math.random) => {
54
+ for (let i = arr.length - 1; i > 0; i--) {
55
+ const j = Math.floor(random() * (i + 1));
56
+ [arr[i], arr[j]] = [arr[j], arr[i]];
57
+ }
58
+
59
+ return arr;
60
+ };
61
+
62
+ const sampleWithoutReplacement = (samples, size, random = Math.random) => {
63
+ if (size > samples.length) {
64
+ throw new Error("Cannot sample more items than available without replacement.");
65
+ }
66
+
67
+ const copy = [...samples];
68
+ shuffleInPlace(copy, random);
69
+ return copy.slice(0, size);
70
+ };
71
+
72
+ const sampleWithReplacement = (samples, size, random = Math.random) => {
73
+ if (samples.length === 0) {
74
+ throw new Error("Cannot sample from an empty array.");
75
+ }
76
+
77
+ const out = [];
78
+
79
+ for (let i = 0; i < size; i++) {
80
+ const idx = Math.floor(random() * samples.length);
81
+ out.push(samples[idx]);
82
+ }
83
+
84
+ return out;
85
+ };
86
+
87
+ const maybeClone = (value, clone) => {
88
+ if (!clone) return value;
89
+ return structuredClone(value);
90
+ };
91
+
92
+ const groupXYByLabel = (X, Y, { cloneX = false } = {}) => {
93
+ validateXY(X, Y);
94
+
95
+ const groups = new Map();
96
+
97
+ Y.forEach((label, i) => {
98
+ const key = makeLabelKey(label);
99
+
100
+ if (!groups.has(key)) {
101
+ groups.set(key, []);
102
+ }
103
+
104
+ groups.get(key).push({
105
+ x: maybeClone(X[i], cloneX),
106
+ y: label,
67
107
  });
68
-
69
- return { X: undersampledX, Y: undersampledY };
108
+ });
109
+
110
+ return groups;
111
+ };
112
+
113
+ export const oversampleXY = (X, Y, options = {}) => {
114
+ const {
115
+ random,
116
+ seed,
117
+ shuffleResult = true,
118
+ cloneX = false,
119
+ } = options;
120
+
121
+ const rng = resolveRandom({ random, seed });
122
+ const groups = groupXYByLabel(X, Y, { cloneX });
123
+
124
+ const counts = [...groups.values()].map((samples) => samples.length);
125
+ const maxCount = Math.max(...counts);
126
+
127
+ const combined = [];
128
+
129
+ for (const samples of groups.values()) {
130
+ const originals = [...samples];
131
+ const extrasNeeded = maxCount - originals.length;
132
+
133
+ const extras =
134
+ extrasNeeded > 0
135
+ ? sampleWithReplacement(samples, extrasNeeded, rng).map((sample) => ({
136
+ x: maybeClone(sample.x, cloneX),
137
+ y: sample.y,
138
+ }))
139
+ : [];
140
+
141
+ combined.push(...originals, ...extras);
142
+ }
143
+
144
+ if (shuffleResult) {
145
+ shuffleInPlace(combined, rng);
146
+ }
147
+
148
+ return {
149
+ X: combined.map(({ x }) => x),
150
+ Y: combined.map(({ y }) => y),
151
+ };
152
+ };
153
+
154
+ export const undersampleXY = (X, Y, options = {}) => {
155
+ const {
156
+ random,
157
+ seed,
158
+ shuffleResult = true,
159
+ cloneX = false,
160
+ } = options;
161
+
162
+ const rng = resolveRandom({ random, seed });
163
+ const groups = groupXYByLabel(X, Y, { cloneX });
164
+
165
+ const counts = [...groups.values()].map((samples) => samples.length);
166
+ const minCount = Math.min(...counts);
167
+
168
+ const combined = [];
169
+
170
+ for (const samples of groups.values()) {
171
+ const selected = sampleWithoutReplacement(samples, minCount, rng).map((sample) => ({
172
+ x: maybeClone(sample.x, cloneX),
173
+ y: sample.y,
174
+ }));
175
+
176
+ combined.push(...selected);
70
177
  }
71
-
178
+
179
+ if (shuffleResult) {
180
+ shuffleInPlace(combined, rng);
181
+ }
182
+
183
+ return {
184
+ X: combined.map(({ x }) => x),
185
+ Y: combined.map(({ y }) => y),
186
+ };
187
+ };