xy-scale 1.4.33 → 1.4.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- var XY_Scale;(()=>{"use strict";var e={d:(r,t)=>{for(var n in t)e.o(t,n)&&!e.o(r,n)&&Object.defineProperty(r,n,{enumerable:!0,get:t[n]})},o:(e,r)=>Object.prototype.hasOwnProperty.call(e,r),r:e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})}},r={};e.r(r),e.d(r,{arrayToTimesteps:()=>y,parseProductionX:()=>g,parseTrainingXY:()=>f});const t=e=>null==e||!Number.isFinite(e),n=e=>{const r=[...e];for(let e=r.length-1;e>0;e--){const t=Math.floor(Math.random()*(e+1));[r[e],r[t]]=[r[t],r[e]]}return r},a=(e,{min:r=-1/0,max:t=1/0},n)=>{if(!Array.isArray(e))throw new Error(`Invalid property. "${n}" expected an array.`);if(e.length<r)throw new Error(`Invalid property value. Array "${n}" expected at least ${t} items.`);if(e.length>t)throw new Error(`Invalid property value. Array "${n}" expected at max ${t} items.`);return!0},o=e=>{for(const[r,t]of Object.entries(e)){if("number"==typeof t&&Number.isNaN(t))throw new Error(`Invalid value at index 0 property "${r}": value is "${t}". Expected a numeric value.`);if(null===t)throw new Error(`Invalid value at index 0 property "${r}": value is "${t}".`)}return!0},s=e=>{return Array.isArray(e)?"array":(r=e,"[object Object]"===Object.prototype.toString.call(r)?"object":typeof e);var r},l=e=>Object.keys(e).filter((e=>"tempIdx"!==e)),c=({parentPath:e="",key:r,isArrayParent:t=!1})=>`${e}${t?`[${r}]`:e?`.${r}`:`${r}`}`,i=({rowLabel:e,sourceIndex:r,path:t="",detail:n,strictFlagName:a})=>new Error(`${e} schema error at index "${r}"${t?` path "${t}"`:""}. ${n} Set "${a}" to false to disable this validation.`),u=({referenceRow:e,currentRow:r,rowLabel:t,currentIndex:n,strictFlagName:a})=>{const o=s(e),u=s(r);if(o!==u)throw i({rowLabel:t,sourceIndex:n,detail:`Expected row type "${o}" based on the first parsed ${t} row, but got "${u}".`,strictFlagName:a});if("array"!==o&&"object"!==o)throw i({rowLabel:t,sourceIndex:n,detail:`Parsed ${t} rows must be arrays or plain objects.`,strictFlagName:a});const h=l(e),d=l(r),p=new Set(h),f=new Set(d),g=Array.isArray(e);for(let e=0;e<h.length;e++){const r=h[e];if(!f.has(r))throw i({rowLabel:t,sourceIndex:n,path:c({key:r,isArrayParent:g}),detail:`Missing required ${g?"index":"property"} "${r}" found in the first parsed ${t} row.`,strictFlagName:a})}for(let e=0;e<d.length;e++){const r=d[e];if(!p.has(r))throw i({rowLabel:t,sourceIndex:n,path:c({key:r,isArrayParent:g}),detail:`Unexpected ${g?"index":"property"} "${r}" not present in the first parsed ${t} row.`,strictFlagName:a})}return h},h=e=>Array.isArray(e)?"array":typeof e,d=({referenceValue:e,currentValue:r,sourceIndex:t,path:n,strictFlagName:a})=>{const o=h(e),s=h(r);if(o!==s)throw i({rowLabel:"Y",sourceIndex:t,path:n,detail:`Expected type "${o}" based on the first parsed Y row, but got "${s}".`,strictFlagName:a});if("array"!==o){if("number"!==o&&"boolean"!==o&&"string"!==o)throw i({rowLabel:"Y",sourceIndex:t,path:n,detail:`Unsupported Y value type "${o}". Y values must be numbers, booleans, strings, or nested arrays of those types.`,strictFlagName:a})}else{if(e.length!==r.length)throw i({rowLabel:"Y",sourceIndex:t,path:n,detail:`Expected array length "${e.length}" based on the first parsed Y row, but got "${r.length}".`,strictFlagName:a});for(let o=0;o<e.length;o++)d({referenceValue:e[o],currentValue:r[o],sourceIndex:t,path:c({parentPath:n,key:o,isArrayParent:!0}),strictFlagName:a})}},p=({rows:e,sourceIndexes:r,strictFlagName:t})=>{if(!e.length)return;const n=e[0],a=(e,r)=>{u({referenceRow:n,currentRow:e,rowLabel:"X",currentIndex:r,strictFlagName:t})};e.length>1&&a(e[e.length-1],r[e.length-1]);for(let t=0;t<e.length;t++)a(e[t],r[t])},f=({arrObj:e=[],trainingSplit:r=.8,yCallbackFunc:s=e=>e,xCallbackFunc:i=e=>e,validateRows:h=()=>!0,shuffle:f=!1,balancing:g="",strictXSchema:y=!0,strictYSchema:w=!0,state:b={}})=>{let m=[],x=[];const $=[];a(e,{min:5},"parseTrainingXY"),o(e[0]);for(let r=0;r<e.length;r++){if(!h({objRow:e,index:r,state:b}))continue;const t=i({objRow:e,index:r,state:b}),n=s({objRow:e,index:r,state:b});null!=t&&null!=n&&(m.push(t),x.push(n),$.push(r))}if(y&&p({rows:m,sourceIndexes:$,strictFlagName:"strictXSchema"}),w&&(({rows:e,sourceIndexes:r,strictFlagName:t})=>{if(!e.length)return;const n=e[0],a=(e,r)=>{const a=u({referenceRow:n,currentRow:e,rowLabel:"Y",currentIndex:r,strictFlagName:t}),o=Array.isArray(n);for(let s=0;s<a.length;s++){const l=a[s],i=c({key:l,isArrayParent:o});d({referenceValue:n[l],currentValue:e[l],sourceIndex:r,path:i,strictFlagName:t})}};e.length>1&&a(e[e.length-1],r[e.length-1]);for(let t=0;t<e.length;t++)a(e[t],r[t])})({rows:x,sourceIndexes:$,strictFlagName:"strictYSchema"}),f){const e=new Array(m.length);for(let r=0;r<m.length;r++)e[r]={x:m[r],y:x[r],sourceIndex:$[r]};const r=n(e);m=new Array(r.length),x=new Array(r.length);for(let e=0;e<r.length;e++)m[e]=r[e].x,x[e]=r[e].y,$[e]=r[e].sourceIndex}const v=m.length,I=x.length,A=v?l(m[0]):[],N=I?l(x[0]):[],F=new Array(v),j=new Array(I),Y={keyNames:A},X={keyNames:N};for(let e=0;e<v;e++){const r=m[e],n=$[e],a=new Array(A.length);for(let e=0;e<A.length;e++){const o=A[e],s=r[o];if(t(s))throw new Error(`Invalid property value (${s}) returned from "xCallbackFunc" on index "${n}" property "${o}".`);a[e]=s}F[e]=a}for(let e=0;e<I;e++){const r=x[e],t=new Array(N.length);for(let e=0;e<N.length;e++)t[e]=r[N[e]];j[e]=t}const S=Math.floor(F.length*r);let E=F.slice(0,S),k=j.slice(0,S),O=F.slice(S),P=j.slice(S);if(g){let e;if("oversample"===g)e=((e,r)=>{const t={},n={};r.forEach(((a,o)=>{t[a]||(t[a]=0,n[a]=[]),t[a]++,n[a].push([e[o],r[o]])}));const a=Math.max(...Object.values(t)),o=[],s=[];return Object.keys(n).forEach((e=>{const r=n[e],t=r.length;for(let e=0;e<a;e++){const n=r[e%t];o.push(n[0]),s.push(n[1])}})),{X:o,Y:s}})(E,k),E=e.X,k=e.Y;else{if("undersample"!==g)throw Error('balancing argument only accepts "", "oversample" and "undersample". Defaults to "".');e=((e,r)=>{const t={},n={};r.forEach(((a,o)=>{t[a]||(t[a]=0,n[a]=[]),t[a]++,n[a].push([e[o],r[o]])}));const a=Math.min(...Object.values(t)),o=[],s=[];return Object.keys(n).forEach((e=>{const r=n[e];for(let e=0;e<a;e++){const t=r[e];o.push(t[0]),s.push(t[1])}})),{X:o,Y:s}})(E,k),E=e.X,k=e.Y}}return{trainX:E,trainY:k,testX:O,testY:P,configX:Y,configY:X}},g=({arrObj:e=[],xCallbackFunc:r=e=>e,validateRows:s=()=>!0,shuffle:c=!1,strictXSchema:i=!0,state:u={}})=>{let h=[],d=[];a(e,{min:5},"parseProductionX"),o(e[0]);for(let t=0;t<e.length;t++){if(!s({objRow:e,index:t,state:u}))continue;const n=r({objRow:e,index:t,state:u});null!=n&&!1!==n&&(h.push(n),d.push(t))}if(i&&p({rows:h,sourceIndexes:d,strictFlagName:"strictXSchema"}),c){const e=new Array(h.length);for(let r=0;r<h.length;r++)e[r]={x:h[r],sourceIndex:d[r]};const r=n(e);h=new Array(r.length),d=new Array(r.length);for(let e=0;e<r.length;e++)h[e]=r[e].x,d[e]=r[e].sourceIndex}const f=h.length,g=f?l(h[0]):[],y=new Array(f),w={keyNames:g};for(let e=0;e<f;e++){const r=h[e],n=d[e],a=new Array(g.length);for(let e=0;e<g.length;e++){const o=g[e],s=r[o];if(t(s))throw new Error(`Invalid property value (${s}) returned from "xCallbackFunc" on index "${n}" property "${o}".`);a[e]=s}y[e]=a}return{X:y,configX:w}},y=(e,r)=>{if(0===r)return e;if(r<0)throw new Error("timeSteps must be greater than 0");const t=[];for(let n=0;n<=e.length-r;n++)t.push(e.slice(n,n+r));return t};XY_Scale=r})();
1
+ var XY_Scale;(()=>{"use strict";var e={d:(r,t)=>{for(var n in t)e.o(t,n)&&!e.o(r,n)&&Object.defineProperty(r,n,{enumerable:!0,get:t[n]})},o:(e,r)=>Object.prototype.hasOwnProperty.call(e,r),r:e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})}},r={};e.r(r),e.d(r,{arrayToTimesteps:()=>A,parseProductionX:()=>v,parseTrainingXY:()=>$});const t=e=>null==e||!Number.isFinite(e),n=e=>{const r=[...e];for(let e=r.length-1;e>0;e--){const t=Math.floor(Math.random()*(e+1));[r[e],r[t]]=[r[t],r[e]]}return r},a=e=>"string"==typeof e||"boolean"==typeof e||("number"==typeof e?Number.isFinite(e):!!Array.isArray(e)&&e.every(a)),o=({random:e,seed:r}={})=>"function"==typeof e?e:Number.isInteger(r)?(e=>{let r=e>>>0;return()=>{r+=1831565813;let e=Math.imul(r^r>>>15,1|r);return e^=e+Math.imul(e^e>>>7,61|e),((e^e>>>14)>>>0)/4294967296}})(r):Math.random,s=(e,r=Math.random)=>{for(let t=e.length-1;t>0;t--){const n=Math.floor(r()*(t+1));[e[t],e[n]]=[e[n],e[t]]}return e},l=(e,r,t=Math.random)=>{if(r>e.length)throw new Error("Cannot sample more items than available without replacement.");const n=[...e];return s(n,t),n.slice(0,r)},i=(e,r,t=Math.random)=>{if(0===e.length)throw new Error("Cannot sample from an empty array.");const n=[];for(let a=0;a<r;a++){const r=Math.floor(t()*e.length);n.push(e[r])}return n},c=(e,r)=>r?structuredClone(e):e,h=(e,r,{cloneX:t=!1}={})=>{((e,r)=>{if(!Array.isArray(e)||!Array.isArray(r))throw new Error("X and Y must be arrays.");if(e.length!==r.length)throw new Error("X and Y must have the same length.");if(0===e.length)throw new Error("X and Y cannot be empty.")})(e,r);const n=new Map;return r.forEach(((r,o)=>{const s=(e=>{if(!a(e))throw new Error("Invalid Y label. Allowed types: finite numbers, strings, booleans, or nested arrays of those.");return JSON.stringify(e)})(r);n.has(s)||n.set(s,[]),n.get(s).push({x:c(e[o],t),y:r})})),n},u=(e,{min:r=-1/0,max:t=1/0},n)=>{if(!Array.isArray(e))throw new Error(`Invalid property. "${n}" expected an array.`);if(e.length<r)throw new Error(`Invalid property value. Array "${n}" expected at least ${t} items.`);if(e.length>t)throw new Error(`Invalid property value. Array "${n}" expected at max ${t} items.`);return!0},d=e=>{for(const[r,t]of Object.entries(e)){if("number"==typeof t&&Number.isNaN(t))throw new Error(`Invalid value at index 0 property "${r}": value is "${t}". Expected a numeric value.`);if(null===t)throw new Error(`Invalid value at index 0 property "${r}": value is "${t}".`)}return!0},f=e=>{return Array.isArray(e)?"array":(r=e,"[object Object]"===Object.prototype.toString.call(r)?"object":typeof e);var r},p=e=>Object.keys(e).filter((e=>"tempIdx"!==e)),y=({parentPath:e="",key:r,isArrayParent:t=!1})=>`${e}${t?`[${r}]`:e?`.${r}`:`${r}`}`,g=({rowLabel:e,sourceIndex:r,path:t="",detail:n,strictFlagName:a})=>new Error(`${e} schema error at index "${r}"${t?` path "${t}"`:""}. ${n} Set "${a}" to false to disable this validation.`),m=({referenceRow:e,currentRow:r,rowLabel:t,currentIndex:n,strictFlagName:a})=>{const o=f(e),s=f(r);if(o!==s)throw g({rowLabel:t,sourceIndex:n,detail:`Expected row type "${o}" based on the first parsed ${t} row, but got "${s}".`,strictFlagName:a});if("array"!==o&&"object"!==o)throw g({rowLabel:t,sourceIndex:n,detail:`Parsed ${t} rows must be arrays or plain objects.`,strictFlagName:a});const l=p(e),i=p(r),c=new Set(l),h=new Set(i),u=Array.isArray(e);for(let e=0;e<l.length;e++){const r=l[e];if(!h.has(r))throw g({rowLabel:t,sourceIndex:n,path:y({key:r,isArrayParent:u}),detail:`Missing required ${u?"index":"property"} "${r}" found in the first parsed ${t} row.`,strictFlagName:a})}for(let e=0;e<i.length;e++){const r=i[e];if(!c.has(r))throw g({rowLabel:t,sourceIndex:n,path:y({key:r,isArrayParent:u}),detail:`Unexpected ${u?"index":"property"} "${r}" not present in the first parsed ${t} row.`,strictFlagName:a})}return l},w=e=>Array.isArray(e)?"array":typeof e,b=({referenceValue:e,currentValue:r,sourceIndex:t,path:n,strictFlagName:a})=>{const o=w(e),s=w(r);if(o!==s)throw g({rowLabel:"Y",sourceIndex:t,path:n,detail:`Expected type "${o}" based on the first parsed Y row, but got "${s}".`,strictFlagName:a});if("array"!==o){if("number"!==o&&"boolean"!==o&&"string"!==o)throw g({rowLabel:"Y",sourceIndex:t,path:n,detail:`Unsupported Y value type "${o}". Y values must be numbers, booleans, strings, or nested arrays of those types.`,strictFlagName:a})}else{if(e.length!==r.length)throw g({rowLabel:"Y",sourceIndex:t,path:n,detail:`Expected array length "${e.length}" based on the first parsed Y row, but got "${r.length}".`,strictFlagName:a});for(let o=0;o<e.length;o++)b({referenceValue:e[o],currentValue:r[o],sourceIndex:t,path:y({parentPath:n,key:o,isArrayParent:!0}),strictFlagName:a})}},x=({rows:e,sourceIndexes:r,strictFlagName:t})=>{if(!e.length)return;const n=e[0],a=(e,r)=>{m({referenceRow:n,currentRow:e,rowLabel:"X",currentIndex:r,strictFlagName:t})};e.length>1&&a(e[e.length-1],r[e.length-1]);for(let t=0;t<e.length;t++)a(e[t],r[t])},$=({arrObj:e=[],trainingSplit:r=.8,yCallbackFunc:a=e=>e,xCallbackFunc:f=e=>e,validateRows:g=()=>!0,shuffle:w=!1,balancing:$="",strictXSchema:v=!0,strictYSchema:A=!0,state:I={}})=>{let N=[],X=[];const F=[];u(e,{min:5},"parseTrainingXY"),d(e[0]);for(let r=0;r<e.length;r++){if(!g({objRow:e,index:r,state:I}))continue;const t=f({objRow:e,index:r,state:I}),n=a({objRow:e,index:r,state:I});null!=t&&null!=n&&(N.push(t),X.push(n),F.push(r))}if(v&&x({rows:N,sourceIndexes:F,strictFlagName:"strictXSchema"}),A&&(({rows:e,sourceIndexes:r,strictFlagName:t})=>{if(!e.length)return;const n=e[0],a=(e,r)=>{const a=m({referenceRow:n,currentRow:e,rowLabel:"Y",currentIndex:r,strictFlagName:t}),o=Array.isArray(n);for(let s=0;s<a.length;s++){const l=a[s],i=y({key:l,isArrayParent:o});b({referenceValue:n[l],currentValue:e[l],sourceIndex:r,path:i,strictFlagName:t})}};e.length>1&&a(e[e.length-1],r[e.length-1]);for(let t=0;t<e.length;t++)a(e[t],r[t])})({rows:X,sourceIndexes:F,strictFlagName:"strictYSchema"}),w){const e=new Array(N.length);for(let r=0;r<N.length;r++)e[r]={x:N[r],y:X[r],sourceIndex:F[r]};const r=n(e);N=new Array(r.length),X=new Array(r.length);for(let e=0;e<r.length;e++)N[e]=r[e].x,X[e]=r[e].y,F[e]=r[e].sourceIndex}const Y=N.length,E=X.length,S=Y?p(N[0]):[],j=E?p(X[0]):[],M=new Array(Y),R=new Array(E),k={keyNames:S},P={keyNames:j};for(let e=0;e<Y;e++){const r=N[e],n=F[e],a=new Array(S.length);for(let e=0;e<S.length;e++){const o=S[e],s=r[o];if(t(s))throw new Error(`Invalid property value (${s}) returned from "xCallbackFunc" on index "${n}" property "${o}".`);a[e]=s}M[e]=a}for(let e=0;e<E;e++){const r=X[e],t=new Array(j.length);for(let e=0;e<j.length;e++)t[e]=r[j[e]];R[e]=t}const O=Math.floor(M.length*r);let L=M.slice(0,O),C=R.slice(0,O),T=M.slice(O),V=R.slice(O);if($){let e;if("oversample"===$)e=((e,r,t={})=>{const{random:n,seed:a,shuffleResult:l=!0,cloneX:u=!1}=t,d=o({random:n,seed:a}),f=h(e,r,{cloneX:u}),p=[...f.values()].map((e=>e.length)),y=Math.max(...p),g=[];for(const e of f.values()){const r=[...e],t=y-r.length,n=t>0?i(e,t,d).map((e=>({x:c(e.x,u),y:e.y}))):[];g.push(...r,...n)}return l&&s(g,d),{X:g.map((({x:e})=>e)),Y:g.map((({y:e})=>e))}})(L,C),L=e.X,C=e.Y;else{if("undersample"!==$)throw Error('balancing argument only accepts "", "oversample" and "undersample". Defaults to "".');e=((e,r,t={})=>{const{random:n,seed:a,shuffleResult:i=!0,cloneX:u=!1}=t,d=o({random:n,seed:a}),f=h(e,r,{cloneX:u}),p=[...f.values()].map((e=>e.length)),y=Math.min(...p),g=[];for(const e of f.values()){const r=l(e,y,d).map((e=>({x:c(e.x,u),y:e.y})));g.push(...r)}return i&&s(g,d),{X:g.map((({x:e})=>e)),Y:g.map((({y:e})=>e))}})(L,C),L=e.X,C=e.Y}}return{trainX:L,trainY:C,testX:T,testY:V,configX:k,configY:P}},v=({arrObj:e=[],xCallbackFunc:r=e=>e,validateRows:a=()=>!0,shuffle:o=!1,strictXSchema:s=!0,state:l={}})=>{let i=[],c=[];u(e,{min:5},"parseProductionX"),d(e[0]);for(let t=0;t<e.length;t++){if(!a({objRow:e,index:t,state:l}))continue;const n=r({objRow:e,index:t,state:l});null!=n&&!1!==n&&(i.push(n),c.push(t))}if(s&&x({rows:i,sourceIndexes:c,strictFlagName:"strictXSchema"}),o){const e=new Array(i.length);for(let r=0;r<i.length;r++)e[r]={x:i[r],sourceIndex:c[r]};const r=n(e);i=new Array(r.length),c=new Array(r.length);for(let e=0;e<r.length;e++)i[e]=r[e].x,c[e]=r[e].sourceIndex}const h=i.length,f=h?p(i[0]):[],y=new Array(h),g={keyNames:f};for(let e=0;e<h;e++){const r=i[e],n=c[e],a=new Array(f.length);for(let e=0;e<f.length;e++){const o=f[e],s=r[o];if(t(s))throw new Error(`Invalid property value (${s}) returned from "xCallbackFunc" on index "${n}" property "${o}".`);a[e]=s}y[e]=a}return{X:y,configX:g}},A=(e,r,t=1)=>{if(!Array.isArray(e))throw new Error("arr must be an array");if(!Number.isInteger(r)||r<=0)throw new Error("timeSteps must be a positive integer");if(!Number.isInteger(t)||t<=0)throw new Error("step must be a positive integer");if(r>e.length)return[];const n=[];for(let a=0;a<=e.length-r;a+=t)n.push(e.slice(a,a+r));return n};XY_Scale=r})();
package/index.js CHANGED
@@ -1,4 +1,5 @@
1
1
  import { parseTrainingXY, parseProductionX } from "./src/datasets.js"
2
2
  import {arrayToTimesteps } from "./src/timeSteps.js"
3
+ import { arrayShuffle, arrayShuffleXY } from "./src/utilities.js"
3
4
 
4
5
  export { parseTrainingXY, parseProductionX, arrayToTimesteps }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "xy-scale",
3
- "version": "1.4.33",
3
+ "version": "1.4.35",
4
4
  "main": "./index.js",
5
5
  "type": "module",
6
6
  "scripts": {
package/src/balancing.js CHANGED
@@ -1,71 +1,187 @@
1
- export const oversampleXY = (X, Y) => {
2
-
3
- const labelCounts = {};
4
- const data = {};
5
-
6
- // Count occurrences of each label and group by label
7
- Y.forEach((label, i) => {
8
- if (!labelCounts[label]) {
9
- labelCounts[label] = 0;
10
- data[label] = [];
11
- }
12
- labelCounts[label]++;
13
- data[label].push([X[i], Y[i]]);
14
- });
15
-
16
- // Find the max label count
17
- const maxCount = Math.max(...Object.values(labelCounts));
18
-
19
- const oversampledX = [];
20
- const oversampledY = [];
21
-
22
- // Oversample each label to match the max count
23
- Object.keys(data).forEach(label => {
24
- const samples = data[label];
25
- const numSamples = samples.length;
26
-
27
- for (let i = 0; i < maxCount; i++) {
28
- const sample = samples[i % numSamples]; // Cycle through samples
29
- oversampledX.push(sample[0]);
30
- oversampledY.push(sample[1]);
31
- }
32
- });
33
-
34
- return { X: oversampledX, Y: oversampledY };
1
+ const isValidLabelValue = (value) => {
2
+ if (typeof value === "string" || typeof value === "boolean") return true;
3
+ if (typeof value === "number") return Number.isFinite(value);
4
+
5
+ if (Array.isArray(value)) {
6
+ return value.every(isValidLabelValue);
35
7
  }
36
-
37
-
38
- export const undersampleXY = (X, Y) => {
39
-
40
- const labelCounts = {};
41
- const data = {};
42
-
43
- // Count occurrences of each label and group by label
44
- Y.forEach((label, i) => {
45
- if (!labelCounts[label]) {
46
- labelCounts[label] = 0;
47
- data[label] = [];
48
- }
49
- labelCounts[label]++;
50
- data[label].push([X[i], Y[i]]);
51
- });
52
-
53
- // Find the minimum label count
54
- const minCount = Math.min(...Object.values(labelCounts));
55
-
56
- const undersampledX = [];
57
- const undersampledY = [];
58
-
59
- // Undersample each label to match the minimum count
60
- Object.keys(data).forEach(label => {
61
- const samples = data[label];
62
- for (let i = 0; i < minCount; i++) {
63
- const sample = samples[i]; // Use first minCount samples
64
- undersampledX.push(sample[0]);
65
- undersampledY.push(sample[1]);
66
- }
8
+
9
+ return false;
10
+ };
11
+
12
+ const makeLabelKey = (label) => {
13
+ if (!isValidLabelValue(label)) {
14
+ throw new Error(
15
+ "Invalid Y label. Allowed types: finite numbers, strings, booleans, or nested arrays of those."
16
+ );
17
+ }
18
+
19
+ return JSON.stringify(label);
20
+ };
21
+
22
+ const validateXY = (X, Y) => {
23
+ if (!Array.isArray(X) || !Array.isArray(Y)) {
24
+ throw new Error("X and Y must be arrays.");
25
+ }
26
+
27
+ if (X.length !== Y.length) {
28
+ throw new Error("X and Y must have the same length.");
29
+ }
30
+
31
+ if (X.length === 0) {
32
+ throw new Error("X and Y cannot be empty.");
33
+ }
34
+ };
35
+
36
+ const mulberry32 = (seed) => {
37
+ let t = seed >>> 0;
38
+
39
+ return () => {
40
+ t += 0x6D2B79F5;
41
+ let r = Math.imul(t ^ (t >>> 15), t | 1);
42
+ r ^= r + Math.imul(r ^ (r >>> 7), r | 61);
43
+ return ((r ^ (r >>> 14)) >>> 0) / 4294967296;
44
+ };
45
+ };
46
+
47
+ const resolveRandom = ({ random, seed } = {}) => {
48
+ if (typeof random === "function") return random;
49
+ if (Number.isInteger(seed)) return mulberry32(seed);
50
+ return Math.random;
51
+ };
52
+
53
+ const shuffleInPlace = (arr, random = Math.random) => {
54
+ for (let i = arr.length - 1; i > 0; i--) {
55
+ const j = Math.floor(random() * (i + 1));
56
+ [arr[i], arr[j]] = [arr[j], arr[i]];
57
+ }
58
+
59
+ return arr;
60
+ };
61
+
62
+ const sampleWithoutReplacement = (samples, size, random = Math.random) => {
63
+ if (size > samples.length) {
64
+ throw new Error("Cannot sample more items than available without replacement.");
65
+ }
66
+
67
+ const copy = [...samples];
68
+ shuffleInPlace(copy, random);
69
+ return copy.slice(0, size);
70
+ };
71
+
72
+ const sampleWithReplacement = (samples, size, random = Math.random) => {
73
+ if (samples.length === 0) {
74
+ throw new Error("Cannot sample from an empty array.");
75
+ }
76
+
77
+ const out = [];
78
+
79
+ for (let i = 0; i < size; i++) {
80
+ const idx = Math.floor(random() * samples.length);
81
+ out.push(samples[idx]);
82
+ }
83
+
84
+ return out;
85
+ };
86
+
87
+ const maybeClone = (value, clone) => {
88
+ if (!clone) return value;
89
+ return structuredClone(value);
90
+ };
91
+
92
+ const groupXYByLabel = (X, Y, { cloneX = false } = {}) => {
93
+ validateXY(X, Y);
94
+
95
+ const groups = new Map();
96
+
97
+ Y.forEach((label, i) => {
98
+ const key = makeLabelKey(label);
99
+
100
+ if (!groups.has(key)) {
101
+ groups.set(key, []);
102
+ }
103
+
104
+ groups.get(key).push({
105
+ x: maybeClone(X[i], cloneX),
106
+ y: label,
67
107
  });
68
-
69
- return { X: undersampledX, Y: undersampledY };
108
+ });
109
+
110
+ return groups;
111
+ };
112
+
113
+ export const oversampleXY = (X, Y, options = {}) => {
114
+ const {
115
+ random,
116
+ seed,
117
+ shuffleResult = true,
118
+ cloneX = false,
119
+ } = options;
120
+
121
+ const rng = resolveRandom({ random, seed });
122
+ const groups = groupXYByLabel(X, Y, { cloneX });
123
+
124
+ const counts = [...groups.values()].map((samples) => samples.length);
125
+ const maxCount = Math.max(...counts);
126
+
127
+ const combined = [];
128
+
129
+ for (const samples of groups.values()) {
130
+ const originals = [...samples];
131
+ const extrasNeeded = maxCount - originals.length;
132
+
133
+ const extras =
134
+ extrasNeeded > 0
135
+ ? sampleWithReplacement(samples, extrasNeeded, rng).map((sample) => ({
136
+ x: maybeClone(sample.x, cloneX),
137
+ y: sample.y,
138
+ }))
139
+ : [];
140
+
141
+ combined.push(...originals, ...extras);
142
+ }
143
+
144
+ if (shuffleResult) {
145
+ shuffleInPlace(combined, rng);
146
+ }
147
+
148
+ return {
149
+ X: combined.map(({ x }) => x),
150
+ Y: combined.map(({ y }) => y),
151
+ };
152
+ };
153
+
154
+ export const undersampleXY = (X, Y, options = {}) => {
155
+ const {
156
+ random,
157
+ seed,
158
+ shuffleResult = true,
159
+ cloneX = false,
160
+ } = options;
161
+
162
+ const rng = resolveRandom({ random, seed });
163
+ const groups = groupXYByLabel(X, Y, { cloneX });
164
+
165
+ const counts = [...groups.values()].map((samples) => samples.length);
166
+ const minCount = Math.min(...counts);
167
+
168
+ const combined = [];
169
+
170
+ for (const samples of groups.values()) {
171
+ const selected = sampleWithoutReplacement(samples, minCount, rng).map((sample) => ({
172
+ x: maybeClone(sample.x, cloneX),
173
+ y: sample.y,
174
+ }));
175
+
176
+ combined.push(...selected);
70
177
  }
71
-
178
+
179
+ if (shuffleResult) {
180
+ shuffleInPlace(combined, rng);
181
+ }
182
+
183
+ return {
184
+ X: combined.map(({ x }) => x),
185
+ Y: combined.map(({ y }) => y),
186
+ };
187
+ };
package/src/timeSteps.js CHANGED
@@ -1,12 +1,23 @@
1
- export const arrayToTimesteps = (arr, timeSteps) => {
2
- if (timeSteps === 0) return arr;
3
- if (timeSteps < 0) throw new Error("timeSteps must be greater than 0");
4
-
5
- const timestepsArray = [];
6
-
7
- for (let i = 0; i <= arr.length - timeSteps; i++) {
8
- timestepsArray.push(arr.slice(i, i + timeSteps));
9
- }
10
-
11
- return timestepsArray;
12
- }
1
+ export const arrayToTimesteps = (arr, timeSteps, step = 1) => {
2
+ // Validation
3
+ if (!Array.isArray(arr)) {
4
+ throw new Error('arr must be an array');
5
+ }
6
+ if (!Number.isInteger(timeSteps) || timeSteps <= 0) {
7
+ throw new Error('timeSteps must be a positive integer');
8
+ }
9
+ if (!Number.isInteger(step) || step <= 0) {
10
+ throw new Error('step must be a positive integer');
11
+ }
12
+
13
+ // If no windows possible, return empty array
14
+ if (timeSteps > arr.length) {
15
+ return [];
16
+ }
17
+
18
+ const timestepsArray = [];
19
+ for (let i = 0; i <= arr.length - timeSteps; i += step) {
20
+ timestepsArray.push(arr.slice(i, i + timeSteps));
21
+ }
22
+ return timestepsArray;
23
+ };
package/src/utilities.js CHANGED
@@ -11,4 +11,28 @@ export const arrayShuffle = X => {
11
11
  }
12
12
 
13
13
  return shuffledX;
14
- }
14
+ }
15
+
16
+ export const arrayShuffleXY = (X, Y) => {
17
+ if (!Array.isArray(X) || !Array.isArray(Y)) {
18
+ throw new TypeError('Both X and Y must be arrays');
19
+ }
20
+
21
+ if (X.length !== Y.length) {
22
+ throw new Error('X and Y must have the same length');
23
+ }
24
+
25
+ // Copy to avoid mutating the originals
26
+ const shuffledX = [...X];
27
+ const shuffledY = [...Y];
28
+
29
+ // Shuffle both arrays using the same swaps
30
+ for (let i = shuffledX.length - 1; i > 0; i--) {
31
+ const j = Math.floor(Math.random() * (i + 1));
32
+
33
+ [shuffledX[i], shuffledX[j]] = [shuffledX[j], shuffledX[i]];
34
+ [shuffledY[i], shuffledY[j]] = [shuffledY[j], shuffledY[i]];
35
+ }
36
+
37
+ return { X: shuffledX, Y: shuffledY };
38
+ };
package/test/test.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import OHLCV_INDICATORS from 'ohlcv-indicators'
2
2
  import { parseTrainingXY } from "../src/datasets.js"
3
+ import { arrayToTimesteps } from '../src/timeSteps.js'
3
4
  import { loadFile } from "./fs.js"
4
5
 
5
6
  const test = async () => {
@@ -60,6 +61,12 @@ const test = async () => {
60
61
  console.log('row_1', {features: trainX[0], labels: trainY[0]})
61
62
 
62
63
  console.log(trainY.length, trainX.length)
64
+
65
+ const timeSteps = arrayToTimesteps(trainX, 10)
66
+
67
+ const typeArr = (a) => Array.isArray(a) ? 'array' : typeof a
68
+
69
+ console.log(`timeSteps: ${typeArr(timeSteps)} => ${typeArr(timeSteps[0])} => ${typeArr(timeSteps[0][0][0])}`)
63
70
  }
64
71
 
65
72
  //callback function used to prepare X before flattening