xy-scale 1.4.39 → 1.4.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- var XY_Scale;(()=>{"use strict";var e={d:(r,t)=>{for(var n in t)e.o(t,n)&&!e.o(r,n)&&Object.defineProperty(r,n,{enumerable:!0,get:t[n]})},o:(e,r)=>Object.prototype.hasOwnProperty.call(e,r),r:e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})}},r={};e.r(r),e.d(r,{arrayShuffle:()=>n,arrayShuffleXY:()=>a,arrayToTimesteps:()=>I,parseProductionX:()=>v,parseTrainingXY:()=>$});const t=e=>null==e||!Number.isFinite(e),n=e=>{const r=[...e];for(let e=r.length-1;e>0;e--){const t=Math.floor(Math.random()*(e+1));[r[e],r[t]]=[r[t],r[e]]}return r},a=(e,r)=>{if(!Array.isArray(e)||!Array.isArray(r))throw new TypeError("Both X and Y must be arrays");if(e.length!==r.length)throw new Error("X and Y must have the same length");const t=[...e],n=[...r];for(let e=t.length-1;e>0;e--){const r=Math.floor(Math.random()*(e+1));[t[e],t[r]]=[t[r],t[e]],[n[e],n[r]]=[n[r],n[e]]}return{X:t,Y:n}},o=e=>"string"==typeof e||"boolean"==typeof e||("number"==typeof e?Number.isFinite(e):!!Array.isArray(e)&&e.every(o)),s=({random:e,seed:r}={})=>"function"==typeof e?e:Number.isInteger(r)?(e=>{let r=e>>>0;return()=>{r+=1831565813;let e=Math.imul(r^r>>>15,1|r);return e^=e+Math.imul(e^e>>>7,61|e),((e^e>>>14)>>>0)/4294967296}})(r):Math.random,l=(e,r=Math.random)=>{for(let t=e.length-1;t>0;t--){const n=Math.floor(r()*(t+1));[e[t],e[n]]=[e[n],e[t]]}return e},i=(e,r,t=Math.random)=>{if(r>e.length)throw new Error("Cannot sample more items than available without replacement.");const n=[...e];return l(n,t),n.slice(0,r)},c=(e,r,t=Math.random)=>{if(0===e.length)throw new Error("Cannot sample from an empty array.");const n=[];for(let a=0;a<r;a++){const r=Math.floor(t()*e.length);n.push(e[r])}return n},h=(e,r)=>r?structuredClone(e):e,u=(e,r,{cloneX:t=!1}={})=>{((e,r)=>{if(!Array.isArray(e)||!Array.isArray(r))throw new Error("X and Y must be arrays.");if(e.length!==r.length)throw new Error("X and Y must have the same length.");if(0===e.length)throw new Error("X and Y cannot be empty.")})(e,r);const n=new Map;return r.forEach(((r,a)=>{const s=(e=>{if(!o(e))throw new Error("Invalid Y label. Allowed types: finite numbers, strings, booleans, or nested arrays of those.");return JSON.stringify(e)})(r);n.has(s)||n.set(s,[]),n.get(s).push({x:h(e[a],t),y:r})})),n},f=(e,{min:r=-1/0,max:t=1/0},n)=>{if(!Array.isArray(e))throw new Error(`Invalid property. "${n}" expected an array.`);if(e.length<r)throw new Error(`Invalid property value. Array "${n}" expected at least ${t} items.`);if(e.length>t)throw new Error(`Invalid property value. Array "${n}" expected at max ${t} items.`);return!0},d=e=>{for(const[r,t]of Object.entries(e)){if("number"==typeof t&&Number.isNaN(t))throw new Error(`Invalid value at index 0 property "${r}": value is "${t}". Expected a numeric value.`);if(null===t)throw new Error(`Invalid value at index 0 property "${r}": value is "${t}".`)}return!0},y=e=>{return Array.isArray(e)?"array":(r=e,"[object Object]"===Object.prototype.toString.call(r)?"object":typeof e);var r},p=e=>Object.keys(e).filter((e=>"tempIdx"!==e)),g=({parentPath:e="",key:r,isArrayParent:t=!1})=>`${e}${t?`[${r}]`:e?`.${r}`:`${r}`}`,m=({rowLabel:e,sourceIndex:r,path:t="",detail:n,strictFlagName:a})=>new Error(`${e} schema error at index "${r}"${t?` path "${t}"`:""}. ${n} Set "${a}" to false to disable this validation.`),w=({referenceRow:e,currentRow:r,rowLabel:t,currentIndex:n,strictFlagName:a})=>{const o=y(e),s=y(r);if(o!==s)throw m({rowLabel:t,sourceIndex:n,detail:`Expected row type "${o}" based on the first parsed ${t} row, but got "${s}".`,strictFlagName:a});if("array"!==o&&"object"!==o)throw m({rowLabel:t,sourceIndex:n,detail:`Parsed ${t} rows must be arrays or plain objects.`,strictFlagName:a});const l=p(e),i=p(r),c=new Set(l),h=new Set(i),u=Array.isArray(e);for(let e=0;e<l.length;e++){const r=l[e];if(!h.has(r))throw m({rowLabel:t,sourceIndex:n,path:g({key:r,isArrayParent:u}),detail:`Missing required ${u?"index":"property"} "${r}" found in the first parsed ${t} row.`,strictFlagName:a})}for(let e=0;e<i.length;e++){const r=i[e];if(!c.has(r))throw m({rowLabel:t,sourceIndex:n,path:g({key:r,isArrayParent:u}),detail:`Unexpected ${u?"index":"property"} "${r}" not present in the first parsed ${t} row.`,strictFlagName:a})}return l},b=e=>Array.isArray(e)?"array":typeof e,x=({referenceValue:e,currentValue:r,sourceIndex:t,path:n,strictFlagName:a})=>{const o=b(e),s=b(r);if(o!==s)throw m({rowLabel:"Y",sourceIndex:t,path:n,detail:`Expected type "${o}" based on the first parsed Y row, but got "${s}".`,strictFlagName:a});if("array"!==o){if("number"!==o&&"boolean"!==o&&"string"!==o)throw m({rowLabel:"Y",sourceIndex:t,path:n,detail:`Unsupported Y value type "${o}". Y values must be numbers, booleans, strings, or nested arrays of those types.`,strictFlagName:a})}else{if(e.length!==r.length)throw m({rowLabel:"Y",sourceIndex:t,path:n,detail:`Expected array length "${e.length}" based on the first parsed Y row, but got "${r.length}".`,strictFlagName:a});for(let o=0;o<e.length;o++)x({referenceValue:e[o],currentValue:r[o],sourceIndex:t,path:g({parentPath:n,key:o,isArrayParent:!0}),strictFlagName:a})}},A=({rows:e,sourceIndexes:r,strictFlagName:t})=>{if(!e.length)return;const n=e[0],a=(e,r)=>{w({referenceRow:n,currentRow:e,rowLabel:"X",currentIndex:r,strictFlagName:t})};e.length>1&&a(e[e.length-1],r[e.length-1]);for(let t=0;t<e.length;t++)a(e[t],r[t])},$=({arrObj:e=[],trainingSplit:r=.8,yCallbackFunc:a=e=>e,xCallbackFunc:o=e=>e,validateRows:y=()=>!0,shuffle:m=!1,balancing:b="",strictXSchema:$=!0,strictYSchema:v=!0,state:I={}})=>{let X=[],N=[];const Y=[];f(e,{min:5},"parseTrainingXY"),d(e[0]);for(let r=0;r<e.length;r++){if(!y({objRow:e,index:r,state:I}))continue;const t=o({objRow:e,index:r,state:I}),n=a({objRow:e,index:r,state:I});null!=t&&null!=n&&(X.push(t),N.push(n),Y.push(r))}if($&&A({rows:X,sourceIndexes:Y,strictFlagName:"strictXSchema"}),v&&(({rows:e,sourceIndexes:r,strictFlagName:t})=>{if(!e.length)return;const n=e[0],a=(e,r)=>{const a=w({referenceRow:n,currentRow:e,rowLabel:"Y",currentIndex:r,strictFlagName:t}),o=Array.isArray(n);for(let s=0;s<a.length;s++){const l=a[s],i=g({key:l,isArrayParent:o});x({referenceValue:n[l],currentValue:e[l],sourceIndex:r,path:i,strictFlagName:t})}};e.length>1&&a(e[e.length-1],r[e.length-1]);for(let t=0;t<e.length;t++)a(e[t],r[t])})({rows:N,sourceIndexes:Y,strictFlagName:"strictYSchema"}),m){const e=new Array(X.length);for(let r=0;r<X.length;r++)e[r]={x:X[r],y:N[r],sourceIndex:Y[r]};const r=n(e);X=new Array(r.length),N=new Array(r.length);for(let e=0;e<r.length;e++)X[e]=r[e].x,N[e]=r[e].y,Y[e]=r[e].sourceIndex}const F=X.length,E=N.length,S=F?p(X[0]):[],j=E?p(N[0]):[],M=new Array(F),k=new Array(E),P={keyNames:S},R={keyNames:j};for(let e=0;e<F;e++){const r=X[e],n=Y[e],a=new Array(S.length);for(let e=0;e<S.length;e++){const o=S[e],s=r[o];if(t(s))throw new Error(`Invalid property value (${s}) returned from "xCallbackFunc" on index "${n}" property "${o}".`);a[e]=s}M[e]=a}for(let e=0;e<E;e++){const r=N[e],t=new Array(j.length);for(let e=0;e<j.length;e++)t[e]=r[j[e]];k[e]=t}const O=Math.floor(M.length*r);let L=M.slice(0,O),C=k.slice(0,O),T=M.slice(O),V=k.slice(O);if(b){let e;if("oversample"===b)e=((e,r,t={})=>{const{random:n,seed:a,shuffleResult:o=!0,cloneX:i=!1}=t,f=s({random:n,seed:a}),d=u(e,r,{cloneX:i}),y=[...d.values()].map((e=>e.length)),p=Math.max(...y),g=[];for(const e of d.values()){const r=[...e],t=p-r.length,n=t>0?c(e,t,f).map((e=>({x:h(e.x,i),y:e.y}))):[];g.push(...r,...n)}return o&&l(g,f),{X:g.map((({x:e})=>e)),Y:g.map((({y:e})=>e))}})(L,C),L=e.X,C=e.Y;else{if("undersample"!==b)throw Error('balancing argument only accepts "", "oversample" and "undersample". Defaults to "".');e=((e,r,t={})=>{const{random:n,seed:a,shuffleResult:o=!0,cloneX:c=!1}=t,f=s({random:n,seed:a}),d=u(e,r,{cloneX:c}),y=[...d.values()].map((e=>e.length)),p=Math.min(...y),g=[];for(const e of d.values()){const r=i(e,p,f).map((e=>({x:h(e.x,c),y:e.y})));g.push(...r)}return o&&l(g,f),{X:g.map((({x:e})=>e)),Y:g.map((({y:e})=>e))}})(L,C),L=e.X,C=e.Y}}return{trainX:L,trainY:C,testX:T,testY:V,configX:P,configY:R}},v=({arrObj:e=[],xCallbackFunc:r=e=>e,yCallbackFunc:a=null,validateRows:o=()=>!0,shuffle:s=!1,strictXSchema:l=!0,state:i={}})=>{let c=[],h=[];if(f(e,{min:5},"parseProductionX"),d(e[0]),null!=a)throw new Error('The property "yCallbackFunc" must not be set in "parseProductionX".');for(let t=0;t<e.length;t++){if(!o({objRow:e,index:t,state:i}))continue;const n=r({objRow:e,index:t,state:i});null!=n&&!1!==n&&(c.push(n),h.push(t))}if(l&&A({rows:c,sourceIndexes:h,strictFlagName:"strictXSchema"}),s){const e=new Array(c.length);for(let r=0;r<c.length;r++)e[r]={x:c[r],sourceIndex:h[r]};const r=n(e);c=new Array(r.length),h=new Array(r.length);for(let e=0;e<r.length;e++)c[e]=r[e].x,h[e]=r[e].sourceIndex}const u=c.length,y=u?p(c[0]):[],g=new Array(u),m={keyNames:y};for(let e=0;e<u;e++){const r=c[e],n=h[e],a=new Array(y.length);for(let e=0;e<y.length;e++){const o=y[e],s=r[o];if(t(s))throw new Error(`Invalid property value (${s}) returned from "xCallbackFunc" on index "${n}" property "${o}".`);a[e]=s}g[e]=a}return{X:g,configX:m}},I=(e,r,t=1)=>{if(!Array.isArray(e))throw new Error("arr must be an array");if(!Number.isInteger(r)||r<=0)throw new Error("timeSteps must be a positive integer");if(!Number.isInteger(t)||t<=0)throw new Error("step must be a positive integer");if(r>e.length)return[];const n=[];for(let a=0;a<=e.length-r;a+=t)n.push(e.slice(a,a+r));return n};XY_Scale=r})();
1
+ var XY_Scale;(()=>{"use strict";var e={d:(r,t)=>{for(var n in t)e.o(t,n)&&!e.o(r,n)&&Object.defineProperty(r,n,{enumerable:!0,get:t[n]})},o:(e,r)=>Object.prototype.hasOwnProperty.call(e,r),r:e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})}},r={};e.r(r),e.d(r,{arrayShuffle:()=>t,arrayShuffleXY:()=>n,arrayToTimesteps:()=>b,parseProductionX:()=>g,parseTrainingXY:()=>w});const t=e=>{const r=[...e];for(let e=r.length-1;e>0;e--){const t=Math.floor(Math.random()*(e+1));[r[e],r[t]]=[r[t],r[e]]}return r},n=(e,r)=>{if(!Array.isArray(e)||!Array.isArray(r))throw new TypeError("Both X and Y must be arrays");if(e.length!==r.length)throw new Error("X and Y must have the same length");const t=[...e],n=[...r];for(let e=t.length-1;e>0;e--){const r=Math.floor(Math.random()*(e+1));[t[e],t[r]]=[t[r],t[e]],[n[e],n[r]]=[n[r],n[e]]}return{X:t,Y:n}},o=e=>"string"==typeof e||"boolean"==typeof e||("number"==typeof e?Number.isFinite(e):!!Array.isArray(e)&&e.every(o)),a=({random:e,seed:r}={})=>"function"==typeof e?e:Number.isInteger(r)?(e=>{let r=e>>>0;return()=>{r+=1831565813;let e=Math.imul(r^r>>>15,1|r);return e^=e+Math.imul(e^e>>>7,61|e),((e^e>>>14)>>>0)/4294967296}})(r):Math.random,l=(e,r=Math.random)=>{for(let t=e.length-1;t>0;t--){const n=Math.floor(r()*(t+1));[e[t],e[n]]=[e[n],e[t]]}return e},i=(e,r,t=Math.random)=>{if(r>e.length)throw new Error("Cannot sample more items than available without replacement.");const n=[...e];return l(n,t),n.slice(0,r)},s=(e,r,t=Math.random)=>{if(0===e.length)throw new Error("Cannot sample from an empty array.");const n=[];for(let o=0;o<r;o++){const r=Math.floor(t()*e.length);n.push(e[r])}return n},u=(e,r)=>r?structuredClone(e):e,c=(e,r,{cloneX:t=!1}={})=>{((e,r)=>{if(!Array.isArray(e)||!Array.isArray(r))throw new Error("X and Y must be arrays.");if(e.length!==r.length)throw new Error("X and Y must have the same length.");if(0===e.length)throw new Error("X and Y cannot be empty.")})(e,r);const n=new Map;return r.forEach(((r,a)=>{const l=(e=>{if(!o(e))throw new Error("Invalid Y label. Allowed types: finite numbers, strings, booleans, or nested arrays of those.");return JSON.stringify(e)})(r);n.has(l)||n.set(l,[]),n.get(l).push({x:u(e[a],t),y:r})})),n},f=e=>null!=e&&Number.isFinite(e),h=e=>null!==e&&"object"==typeof e&&!Array.isArray(e)&&Object.keys(e).length>0,d=(e,{min:r=-1/0,max:t=1/0},n)=>{if(!Array.isArray(e))throw new Error(`Invalid property. "${n}" expected an array.`);if(e.length<r)throw new Error(`Invalid property value. Array "${n}" expected at least ${r} items.`);if(e.length>t)throw new Error(`Invalid property value. Array "${n}" expected at max ${t} items.`);return!0},y=e=>{if(!h(e))throw new Error("The first item in arrObj is expeted to be a key par object.");const r={},t={};for(const[n,o]of Object.entries(e))"number"==typeof o?r[n]=o:t[n]=o;if(Object.keys(r).length>0&&m(r,"validateFirstRow"))throw new Error("Invalid numeric value at index 0.");if(Object.keys(t).length>0&&p(t,"validateFirstRow"))throw new Error("Invalid non-numeric value at index 0.");return!0},m=(e,r)=>{if(null==r)throw new Error('[hasInvalidNumbers] Missing required param "callerName".');if(!h(e))throw new Error(`[${r}:hasInvalidNumbers] Received an invalid "list" param expected a non-empty key-pair object, got: ${JSON.stringify(e)}.`);for(const[t,n]of Object.entries(e))if(!f(n))return console.error(`[${r}:hasInvalidNumbers] property "${t}" only accept numbers. Invalid value is "${n}" and invalid type is "${typeof n}".`),!0;return!1},p=(e,r)=>{if(null==r)throw new Error('[hasNullOrUndefined] Missing required param "callerName".');if(!h(e))throw new Error(`[${r}:hasNullOrUndefined] Received an invalid "list" param expected a non-empty key-pair object, got: ${JSON.stringify(e)}.`);for(const[t,n]of Object.entries(e))if(h(n)){for(const[e,o]of Object.entries(n))if(null==o)return console.error(`[${r}:hasNullOrUndefined] Null or undefined value detected for key "${t}.${e}".\n${String(o)}`),!0}else if(null==n)return console.error(`[${r}:hasNullOrUndefined] Null or undefined value detected for key "${t}".\n${String(n)}`),!0;return!1},w=({arrObj:e=[],trainingSplit:r=.8,yCallbackFunc:n=e=>e,xCallbackFunc:o=e=>e,validateRows:f=()=>!0,shuffle:h=!1,balancing:p="",state:w={}})=>{d(e,{min:2},"parseTrainingXY"),y(e[0]);let g=[],b=[],v=null,x=null;const X={};for(let r=0;r<e.length;r++)try{if(!f({objRow:e,index:r,state:w}))continue;const t=o({objRow:e,index:r,state:w}),a=n({objRow:e,index:r,state:w});if(null==t||null==a)continue;if(m(t,"parseTrainingXY"))throw new Error('Invalid numeric value returned from "xCallbackFunc".');if(null===v&&(v=Object.keys(t)),null===x){x=Object.keys(a);for(let e=0;e<x.length;e++)X[x[e]]={}}const l=v.length,i=x.length,s=new Array(l),u=new Array(i);for(let e=0;e<l;e++){const r=v[e];s[e]=t[r]}for(let e=0;e<i;e++){const r=x[e],t=a[r];u[e]=t;const n=Array.isArray(t)?JSON.stringify(t):String(t);X[r][n]=(X[r][n]??0)+1}g.push(s),b.push(u)}catch(e){throw new Error(`[BUG] - Skipped row index=${r}: ${e.message}`)}if(h){const e=new Array(g.length);for(let r=0;r<g.length;r++)e[r]={x:g[r],y:b[r]};const r=t(e);g=new Array(r.length),b=new Array(r.length);for(let e=0;e<r.length;e++)g[e]=r[e].x,b[e]=r[e].y}const E={keyNames:v??[]},A={keyNames:x??[],labelCounts:X},j=Math.floor(g.length*r);let O=g.slice(0,j),$=b.slice(0,j),k=g.slice(j),N=b.slice(j);if(p){let e;if("oversample"===p)e=((e,r,t={})=>{const{random:n,seed:o,shuffleResult:i=!0,cloneX:f=!1}=t,h=a({random:n,seed:o}),d=c(e,r,{cloneX:f}),y=[...d.values()].map((e=>e.length)),m=Math.max(...y),p=[];for(const e of d.values()){const r=[...e],t=m-r.length,n=t>0?s(e,t,h).map((e=>({x:u(e.x,f),y:e.y}))):[];p.push(...r,...n)}return i&&l(p,h),{X:p.map((({x:e})=>e)),Y:p.map((({y:e})=>e))}})(O,$),O=e.X,$=e.Y;else{if("undersample"!==p)throw Error('balancing argument only accepts "", "oversample" and "undersample". Defaults to "".');e=((e,r,t={})=>{const{random:n,seed:o,shuffleResult:s=!0,cloneX:f=!1}=t,h=a({random:n,seed:o}),d=c(e,r,{cloneX:f}),y=[...d.values()].map((e=>e.length)),m=Math.min(...y),p=[];for(const e of d.values()){const r=i(e,m,h).map((e=>({x:u(e.x,f),y:e.y})));p.push(...r)}return s&&l(p,h),{X:p.map((({x:e})=>e)),Y:p.map((({y:e})=>e))}})(O,$),O=e.X,$=e.Y}}return{trainX:O,trainY:$,testX:k,testY:N,configX:E,configY:A}},g=({arrObj:e=[],xCallbackFunc:r=e=>e,yCallbackFunc:n=null,validateRows:o=()=>!0,shuffle:a=!1,state:l={}})=>{let i=[],s=null;if(d(e,{min:1},"parseProductionX"),y(e[0]),null!=n)throw new Error('The property "yCallbackFunc" must not be set in "parseProductionX".');for(let t=0;t<e.length;t++)try{if(!o({objRow:e,index:t,state:l}))continue;const n=r({objRow:e,index:t,state:l});if(null==n)continue;if(m(n,"parseProductionX"))throw new Error('Invalid numeric value returned from "xCallbackFunc".');null===s&&(s=Object.keys(n));const a=s.length,u=new Array(a);for(let e=0;e<a;e++){const r=s[e];u[e]=n[r]}i.push(u)}catch(e){throw new Error(`[BUG] - Skipped row index=${t}: ${e.message}`)}return a&&(i=t(i)),{X:i,configX:{keyNames:s??[]}}},b=(e,r,t=1)=>{if(!Array.isArray(e))throw new Error("arr must be an array");if(!Number.isInteger(r)||r<=0)throw new Error("timeSteps must be a positive integer");if(!Number.isInteger(t)||t<=0)throw new Error("step must be a positive integer");if(r>e.length)return[];const n=[];for(let o=0;o<=e.length-r;o+=t)n.push(e.slice(o,o+r));return n};XY_Scale=r})();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "xy-scale",
3
- "version": "1.4.39",
3
+ "version": "1.4.42",
4
4
  "main": "./index.js",
5
5
  "type": "module",
6
6
  "scripts": {
package/src/datasets.js CHANGED
@@ -1,376 +1,115 @@
1
- import { arrayShuffle, isBadNumber } from "./utilities.js";
1
+ import { arrayShuffle } from "./utilities.js";
2
2
  import { oversampleXY, undersampleXY } from "./balancing.js";
3
- import { validateFirstRow, validateArray } from "./validators.js";
3
+ import { validateFirstRow, validateArray, hasInvalidNumbers } from "./validators.js";
4
4
 
5
- // ADD A PARAM max correlation that will measure the correlation between variables if defined
6
-
7
- const isPlainObject = value =>
8
- Object.prototype.toString.call(value) === '[object Object]';
9
-
10
- const getRowKind = row => {
11
- if (Array.isArray(row)) return 'array';
12
- if (isPlainObject(row)) return 'object';
13
- return typeof row;
14
- };
15
-
16
- const getComparableKeys = row =>
17
- Object.keys(row).filter(key => key !== 'tempIdx');
18
-
19
- const buildPath = ({ parentPath = '', key, isArrayParent = false }) => {
20
- const nextPart = isArrayParent ? `[${key}]` : (parentPath ? `.${key}` : `${key}`);
21
- return `${parentPath}${nextPart}`;
22
- };
23
-
24
- const createStrictSchemaError = ({
25
- rowLabel,
26
- sourceIndex,
27
- path = '',
28
- detail,
29
- strictFlagName,
30
- }) => {
31
- const pathText = path ? ` path "${path}"` : '';
32
- return new Error(
33
- `${rowLabel} schema error at index "${sourceIndex}"${pathText}. ${detail} Set "${strictFlagName}" to false to disable this validation.`
34
- );
35
- };
36
-
37
- const assertSameTopLevelSchema = ({
38
- referenceRow,
39
- currentRow,
40
- rowLabel,
41
- currentIndex,
42
- strictFlagName,
5
+ export const parseTrainingXY = ({
6
+ arrObj = [],
7
+ trainingSplit = 0.8,
8
+ yCallbackFunc = row => row,
9
+ xCallbackFunc = row => row,
10
+ validateRows = () => true,
11
+ shuffle = false,
12
+ balancing = '',
13
+ state = {},
43
14
  }) => {
44
- const referenceKind = getRowKind(referenceRow);
45
- const currentKind = getRowKind(currentRow);
46
-
47
- if (referenceKind !== currentKind) {
48
- throw createStrictSchemaError({
49
- rowLabel,
50
- sourceIndex: currentIndex,
51
- detail: `Expected row type "${referenceKind}" based on the first parsed ${rowLabel} row, but got "${currentKind}".`,
52
- strictFlagName,
53
- });
54
- }
55
-
56
- if (referenceKind !== 'array' && referenceKind !== 'object') {
57
- throw createStrictSchemaError({
58
- rowLabel,
59
- sourceIndex: currentIndex,
60
- detail: `Parsed ${rowLabel} rows must be arrays or plain objects.`,
61
- strictFlagName,
62
- });
63
- }
64
-
65
- const referenceKeys = getComparableKeys(referenceRow);
66
- const currentKeys = getComparableKeys(currentRow);
67
-
68
- const referenceKeySet = new Set(referenceKeys);
69
- const currentKeySet = new Set(currentKeys);
70
- const isArrayRow = Array.isArray(referenceRow);
71
-
72
- for (let i = 0; i < referenceKeys.length; i++) {
73
- const key = referenceKeys[i];
74
-
75
- if (!currentKeySet.has(key)) {
76
- throw createStrictSchemaError({
77
- rowLabel,
78
- sourceIndex: currentIndex,
79
- path: buildPath({ key, isArrayParent: isArrayRow }),
80
- detail: `Missing required ${isArrayRow ? 'index' : 'property'} "${key}" found in the first parsed ${rowLabel} row.`,
81
- strictFlagName,
82
- });
83
- }
84
- }
15
+ validateArray(arrObj, { min: 2 }, 'parseTrainingXY');
16
+ validateFirstRow(arrObj[0]);
85
17
 
86
- for (let i = 0; i < currentKeys.length; i++) {
87
- const key = currentKeys[i];
88
-
89
- if (!referenceKeySet.has(key)) {
90
- throw createStrictSchemaError({
91
- rowLabel,
92
- sourceIndex: currentIndex,
93
- path: buildPath({ key, isArrayParent: isArrayRow }),
94
- detail: `Unexpected ${isArrayRow ? 'index' : 'property'} "${key}" not present in the first parsed ${rowLabel} row.`,
95
- strictFlagName,
96
- });
97
- }
98
- }
18
+ let flatX = [];
19
+ let flatY = [];
99
20
 
100
- return referenceKeys;
101
- };
21
+ let keyNamesX = null;
22
+ let keyNamesY = null;
102
23
 
103
- const getYValueKind = value => {
104
- if (Array.isArray(value)) return 'array';
105
- return typeof value;
106
- };
24
+ const labelCounts = {};
107
25
 
108
- const assertSameYValueSchema = ({
109
- referenceValue,
110
- currentValue,
111
- sourceIndex,
112
- path,
113
- strictFlagName,
114
- }) => {
115
- const referenceKind = getYValueKind(referenceValue);
116
- const currentKind = getYValueKind(currentValue);
117
-
118
- if (referenceKind !== currentKind) {
119
- throw createStrictSchemaError({
120
- rowLabel: 'Y',
121
- sourceIndex,
122
- path,
123
- detail: `Expected type "${referenceKind}" based on the first parsed Y row, but got "${currentKind}".`,
124
- strictFlagName,
125
- });
126
- }
26
+ for (let x = 0; x < arrObj.length; x++) {
27
+ try {
28
+ if (!validateRows({ objRow: arrObj, index: x, state })) continue;
127
29
 
128
- if (referenceKind === 'array') {
129
- if (referenceValue.length !== currentValue.length) {
130
- throw createStrictSchemaError({
131
- rowLabel: 'Y',
132
- sourceIndex,
133
- path,
134
- detail: `Expected array length "${referenceValue.length}" based on the first parsed Y row, but got "${currentValue.length}".`,
135
- strictFlagName,
136
- });
137
- }
30
+ const parsedX = xCallbackFunc({ objRow: arrObj, index: x, state });
31
+ const parsedY = yCallbackFunc({ objRow: arrObj, index: x, state });
138
32
 
139
- for (let i = 0; i < referenceValue.length; i++) {
140
- assertSameYValueSchema({
141
- referenceValue: referenceValue[i],
142
- currentValue: currentValue[i],
143
- sourceIndex,
144
- path: buildPath({ parentPath: path, key: i, isArrayParent: true }),
145
- strictFlagName,
146
- });
147
- }
33
+ if (parsedX == null || parsedY == null) continue;
148
34
 
149
- return;
150
- }
35
+ if (hasInvalidNumbers(parsedX, 'parseTrainingXY')) {
36
+ throw new Error(`Invalid numeric value returned from "xCallbackFunc".`);
37
+ }
151
38
 
152
- if (
153
- referenceKind !== 'number' &&
154
- referenceKind !== 'boolean' &&
155
- referenceKind !== 'string'
156
- ) {
157
- throw createStrictSchemaError({
158
- rowLabel: 'Y',
159
- sourceIndex,
160
- path,
161
- detail: `Unsupported Y value type "${referenceKind}". Y values must be numbers, booleans, strings, or nested arrays of those types.`,
162
- strictFlagName,
163
- });
164
- }
165
- };
39
+ if (keyNamesX === null) {
40
+ keyNamesX = Object.keys(parsedX);
41
+ }
166
42
 
167
- const validateStrictXRows = ({ rows, sourceIndexes, strictFlagName }) => {
168
- if (!rows.length) return;
43
+ if (keyNamesY === null) {
44
+ keyNamesY = Object.keys(parsedY);
169
45
 
170
- const referenceRow = rows[0];
46
+ for (let i = 0; i < keyNamesY.length; i++) {
47
+ labelCounts[keyNamesY[i]] = {};
48
+ }
49
+ }
171
50
 
172
- const compareRow = (currentRow, currentIndex) => {
173
- assertSameTopLevelSchema({
174
- referenceRow,
175
- currentRow,
176
- rowLabel: 'X',
177
- currentIndex,
178
- strictFlagName,
179
- });
180
- };
51
+ const xLen = keyNamesX.length;
52
+ const yLen = keyNamesY.length;
181
53
 
182
- if (rows.length > 1) {
183
- compareRow(rows[rows.length - 1], sourceIndexes[rows.length - 1]);
184
- }
54
+ const rowX = new Array(xLen);
55
+ const rowY = new Array(yLen);
185
56
 
186
- for (let i = 0; i < rows.length; i++) {
187
- compareRow(rows[i], sourceIndexes[i]);
188
- }
189
- };
57
+ for (let i = 0; i < xLen; i++) {
58
+ const key = keyNamesX[i];
59
+ rowX[i] = parsedX[key];
60
+ }
190
61
 
191
- const validateStrictYRows = ({ rows, sourceIndexes, strictFlagName }) => {
192
- if (!rows.length) return;
193
-
194
- const referenceRow = rows[0];
195
-
196
- const compareRow = (currentRow, currentIndex) => {
197
- const referenceKeys = assertSameTopLevelSchema({
198
- referenceRow,
199
- currentRow,
200
- rowLabel: 'Y',
201
- currentIndex,
202
- strictFlagName,
203
- });
204
-
205
- const isArrayRow = Array.isArray(referenceRow);
206
-
207
- for (let i = 0; i < referenceKeys.length; i++) {
208
- const key = referenceKeys[i];
209
- const path = buildPath({ key, isArrayParent: isArrayRow });
210
-
211
- assertSameYValueSchema({
212
- referenceValue: referenceRow[key],
213
- currentValue: currentRow[key],
214
- sourceIndex: currentIndex,
215
- path,
216
- strictFlagName,
217
- });
218
- }
219
- };
62
+ for (let i = 0; i < yLen; i++) {
63
+ const key = keyNamesY[i];
64
+ const value = parsedY[key];
220
65
 
221
- if (rows.length > 1) {
222
- compareRow(rows[rows.length - 1], sourceIndexes[rows.length - 1]);
223
- }
66
+ rowY[i] = value;
224
67
 
225
- for (let i = 0; i < rows.length; i++) {
226
- compareRow(rows[i], sourceIndexes[i]);
227
- }
228
- };
68
+ const labelKey = Array.isArray(value)
69
+ ? JSON.stringify(value)
70
+ : String(value);
229
71
 
230
- export const parseTrainingXY = ({
231
- arrObj = [], // array of objects
232
- trainingSplit = 0.8, // numeric float between 0.01 and 0.99
233
- yCallbackFunc = row => row, // accepted callback functions
234
- xCallbackFunc = row => row, // accepted callback functions
235
- validateRows = () => true, // accepted callback functions
236
- shuffle = false, // only booleans
237
- balancing = '', // accepted '', 'oversample' or 'undersample'
238
- strictXSchema = true,
239
- strictYSchema = true,
240
- state = {}, // accepted object or classes
241
- }) => {
242
- let X = [];
243
- let Y = [];
244
- const sourceIndexes = [];
72
+ labelCounts[key][labelKey] = (labelCounts[key][labelKey] ?? 0) + 1;
73
+ }
245
74
 
246
- validateArray(arrObj, { min: 2 }, 'parseTrainingXY');
247
- validateFirstRow(arrObj[0]);
75
+ flatX.push(rowX);
76
+ flatY.push(rowY);
248
77
 
249
- for (let x = 0; x < arrObj.length; x++) {
250
- if (!validateRows({ objRow: arrObj, index: x, state })) continue;
251
-
252
- const parsedX = xCallbackFunc({ objRow: arrObj, index: x, state });
253
- const parsedY = yCallbackFunc({ objRow: arrObj, index: x, state });
254
-
255
- if (
256
- typeof parsedX !== 'undefined' &&
257
- parsedX !== null &&
258
- typeof parsedY !== 'undefined' &&
259
- parsedY !== null
260
- ) {
261
- X.push(parsedX);
262
- Y.push(parsedY);
263
- sourceIndexes.push(x);
78
+ } catch(err) {
79
+ throw new Error(`[BUG] - Skipped row index=${x}: ${err.message}`);
264
80
  }
265
81
  }
266
82
 
267
- if (strictXSchema) {
268
- validateStrictXRows({
269
- rows: X,
270
- sourceIndexes,
271
- strictFlagName: 'strictXSchema',
272
- });
273
- }
274
-
275
- if (strictYSchema) {
276
- validateStrictYRows({
277
- rows: Y,
278
- sourceIndexes,
279
- strictFlagName: 'strictYSchema',
280
- });
281
- }
282
-
283
83
  if (shuffle) {
284
- const merged = new Array(X.length);
84
+ const merged = new Array(flatX.length);
285
85
 
286
- for (let i = 0; i < X.length; i++) {
86
+ for (let i = 0; i < flatX.length; i++) {
287
87
  merged[i] = {
288
- x: X[i],
289
- y: Y[i],
290
- sourceIndex: sourceIndexes[i],
88
+ x: flatX[i],
89
+ y: flatY[i]
291
90
  };
292
91
  }
293
92
 
294
93
  const shuffled = arrayShuffle(merged);
295
94
 
296
- X = new Array(shuffled.length);
297
- Y = new Array(shuffled.length);
95
+ flatX = new Array(shuffled.length);
96
+ flatY = new Array(shuffled.length);
298
97
 
299
98
  for (let i = 0; i < shuffled.length; i++) {
300
- X[i] = shuffled[i].x;
301
- Y[i] = shuffled[i].y;
302
- sourceIndexes[i] = shuffled[i].sourceIndex;
99
+ flatX[i] = shuffled[i].x;
100
+ flatY[i] = shuffled[i].y;
303
101
  }
304
102
  }
305
103
 
306
- const xLen = X.length;
307
- const yLen = Y.length;
308
-
309
- const xKeys = xLen ? getComparableKeys(X[0]) : [];
310
- const yKeys = yLen ? getComparableKeys(Y[0]) : [];
311
-
312
- const flatX = new Array(xLen);
313
- const flatY = new Array(yLen);
314
-
315
104
  const configX = {
316
- keyNames: xKeys,
105
+ keyNames: keyNamesX ?? [],
317
106
  };
318
107
 
319
-
320
-
321
- for (let idx = 0; idx < xLen; idx++) {
322
- const rowObj = X[idx];
323
- const sourceIndex = sourceIndexes[idx];
324
- const flatRow = new Array(xKeys.length);
325
-
326
- for (let j = 0; j < xKeys.length; j++) {
327
- const key = xKeys[j];
328
- const value = rowObj[key];
329
-
330
- if (isBadNumber(value)) {
331
- throw new Error(
332
- `Invalid property value (${value}) returned from "xCallbackFunc" on index "${sourceIndex}" property "${key}".`
333
- );
334
- }
335
-
336
- flatRow[j] = value;
337
- }
338
-
339
- flatX[idx] = flatRow;
340
- }
341
-
342
- const toLabelKey = value => Array.isArray(value)
343
- ? JSON.stringify(value)
344
- : String(value);
345
-
346
- const initLabelCounts = keyNames =>
347
- Object.fromEntries(
348
- keyNames.map(keyName => [keyName, ({})])
349
- );
350
-
351
108
  const configY = {
352
- keyNames: yKeys,
353
- labelCounts: initLabelCounts(yKeys),
109
+ keyNames: keyNamesY ?? [],
110
+ labelCounts,
354
111
  };
355
112
 
356
- for (let idx = 0; idx < yLen; idx++) {
357
- const rowObj = Y[idx];
358
- const flatRow = new Array(yKeys.length);
359
-
360
- for (let j = 0; j < yKeys.length; j++) {
361
- const keyName = yKeys[j];
362
- const value = rowObj[keyName];
363
-
364
- flatRow[j] = value;
365
-
366
- const labelKey = toLabelKey(value);
367
- configY.labelCounts[keyName][labelKey] =
368
- (configY.labelCounts[keyName][labelKey] ?? 0) + 1;
369
- }
370
-
371
- flatY[idx] = flatRow;
372
- }
373
-
374
113
  const splitIndex = Math.floor(flatX.length * trainingSplit);
375
114
 
376
115
  let trainX = flatX.slice(0, splitIndex);
@@ -403,95 +142,62 @@ export const parseTrainingXY = ({
403
142
  configY,
404
143
  };
405
144
  };
406
-
407
145
  export const parseProductionX = ({
408
146
  arrObj = [],
409
147
  xCallbackFunc = row => row,
410
148
  yCallbackFunc = null,
411
149
  validateRows = () => true,
412
150
  shuffle = false,
413
- strictXSchema = true,
414
151
  state = {},
415
152
  }) => {
416
- let X = [];
417
- let sourceIndexes = [];
153
+ let flatX = [];
154
+ let keyNamesX = null;
418
155
 
419
156
  validateArray(arrObj, { min: 1 }, 'parseProductionX');
420
157
  validateFirstRow(arrObj[0]);
421
158
 
422
- if(yCallbackFunc != null) {
423
- throw new Error('The property "yCallbackFunc" must not be set in "parseProductionX".')
159
+ if (yCallbackFunc != null) {
160
+ throw new Error('The property "yCallbackFunc" must not be set in "parseProductionX".');
424
161
  }
425
162
 
426
163
  for (let x = 0; x < arrObj.length; x++) {
427
- if (!validateRows({ objRow: arrObj, index: x, state })) continue;
164
+ try {
165
+ if (!validateRows({ objRow: arrObj, index: x, state })) continue;
428
166
 
429
- const parsedX = xCallbackFunc({ objRow: arrObj, index: x, state });
167
+ const parsedX = xCallbackFunc({ objRow: arrObj, index: x, state });
430
168
 
431
- if (typeof parsedX !== 'undefined' && parsedX !== null && parsedX !== false) {
432
- X.push(parsedX);
433
- sourceIndexes.push(x);
434
- }
435
- }
169
+ if (parsedX == null) continue;
436
170
 
437
- if (strictXSchema) {
438
- validateStrictXRows({
439
- rows: X,
440
- sourceIndexes,
441
- strictFlagName: 'strictXSchema',
442
- });
443
- }
171
+ if (hasInvalidNumbers(parsedX, 'parseProductionX')) {
172
+ throw new Error(`Invalid numeric value returned from "xCallbackFunc".`);
173
+ }
444
174
 
445
- if (shuffle) {
446
- const merged = new Array(X.length);
175
+ if (keyNamesX === null) {
176
+ keyNamesX = Object.keys(parsedX);
177
+ }
447
178
 
448
- for (let i = 0; i < X.length; i++) {
449
- merged[i] = {
450
- x: X[i],
451
- sourceIndex: sourceIndexes[i],
452
- };
453
- }
179
+ const xLen = keyNamesX.length;
180
+ const rowX = new Array(xLen);
454
181
 
455
- const shuffled = arrayShuffle(merged);
456
-
457
- X = new Array(shuffled.length);
458
- sourceIndexes = new Array(shuffled.length);
182
+ for (let i = 0; i < xLen; i++) {
183
+ const key = keyNamesX[i];
184
+ rowX[i] = parsedX[key];
185
+ }
459
186
 
460
- for (let i = 0; i < shuffled.length; i++) {
461
- X[i] = shuffled[i].x;
462
- sourceIndexes[i] = shuffled[i].sourceIndex;
187
+ flatX.push(rowX);
188
+ } catch(err) {
189
+ throw new Error(`[BUG] - Skipped row index=${x}: ${err.message}`);
463
190
  }
464
191
  }
465
192
 
466
- const xLen = X.length;
467
- const xKeys = xLen ? getComparableKeys(X[0]) : [];
468
- const flatX = new Array(xLen);
193
+ if (shuffle) {
194
+ flatX = arrayShuffle(flatX);
195
+ }
469
196
 
470
197
  const configX = {
471
- keyNames: xKeys,
198
+ keyNames: keyNamesX ?? [],
472
199
  };
473
200
 
474
- for (let idx = 0; idx < xLen; idx++) {
475
- const rowObj = X[idx];
476
- const sourceIndex = sourceIndexes[idx];
477
- const flatRow = new Array(xKeys.length);
478
-
479
- for (let j = 0; j < xKeys.length; j++) {
480
- const key = xKeys[j];
481
- const value = rowObj[key];
482
-
483
- if (isBadNumber(value)) {
484
- throw new Error(
485
- `Invalid property value (${value}) returned from "xCallbackFunc" on index "${sourceIndex}" property "${key}".`
486
- );
487
- }
488
-
489
- flatRow[j] = value;
490
- }
491
-
492
- flatX[idx] = flatRow;
493
- }
494
-
495
201
  return {
496
202
  X: flatX,
497
203
  configX,
package/src/utilities.js CHANGED
@@ -1,4 +1,3 @@
1
- export const isBadNumber = (v) => v == null || !Number.isFinite(v)
2
1
 
3
2
  export const arrayShuffle = X => {
4
3
  // Make a copy of the array to avoid mutating the original
package/src/validators.js CHANGED
@@ -1,17 +1,13 @@
1
- export const validateExcludes = (row, excludes) => {
2
1
 
3
- const keyNames = Object.keys(row)
4
-
5
- if(!Array.isArray(excludes))
6
- {
7
- throw new Error(`Property "excludes" must be an array.`)
8
- }
9
-
10
- for(const k of excludes)
11
- {
12
- if(!keyNames.includes(k)) throw new Error(`An item in "excludes" property was not found in "arrObj".\n\nexcludes: ${JSON.stringify(excludes)}\n\narrObj: ${JSON.stringify(keyNames)}`)
13
- }
2
+ export const isNumber = v => v != null && Number.isFinite(v)
14
3
 
4
+ export const isKeyPairObject = param => {
5
+ return (
6
+ param !== null &&
7
+ typeof param === "object" &&
8
+ !Array.isArray(param) &&
9
+ Object.keys(param).length > 0
10
+ );
15
11
  }
16
12
 
17
13
  export const validateArray = (arr, {min = -Infinity, max = Infinity}, paramName) => {
@@ -35,15 +31,105 @@ export const validateArray = (arr, {min = -Infinity, max = Infinity}, paramName)
35
31
 
36
32
  export const validateFirstRow = row => {
37
33
 
34
+ if(!isKeyPairObject(row)) {
35
+ throw new Error(`The first item in arrObj is expeted to be a key par object.`)
36
+ }
37
+
38
+ const numeric = {}
39
+ const nonNumeric = {}
40
+
38
41
  for(const [k, v] of Object.entries(row))
39
42
  {
40
- if (typeof v === 'number' && Number.isNaN(v)) {
41
- throw new Error(`Invalid value at index 0 property "${k}": value is "${v}". Expected a numeric value.`);
43
+ if (typeof v === 'number') {
44
+ numeric[k] = v
42
45
  }
43
- if (v === null) {
44
- throw new Error(`Invalid value at index 0 property "${k}": value is "${v}".`);
46
+ else {
47
+ nonNumeric[k] = v
45
48
  }
46
49
  }
50
+
51
+ if (Object.keys(numeric).length > 0 && hasInvalidNumbers(numeric, 'validateFirstRow')) {
52
+ throw new Error(`Invalid numeric value at index 0.`)
53
+ }
54
+ if (Object.keys(nonNumeric).length > 0 && hasNullOrUndefined(nonNumeric, 'validateFirstRow')) {
55
+ throw new Error(`Invalid non-numeric value at index 0.`)
56
+ }
47
57
 
48
58
  return true
59
+ }
60
+
61
+ export const hasInvalidNumbers = (list, callerName) => {
62
+ if (callerName == null) {
63
+ throw new Error('[hasInvalidNumbers] Missing required param "callerName".')
64
+ }
65
+
66
+ if (!isKeyPairObject(list)) {
67
+ throw new Error(`[${callerName}:hasInvalidNumbers] Received an invalid "list" param — expected a non-empty key-pair object, got: ${JSON.stringify(list)}.`)
68
+ }
69
+
70
+ for (const [k, v] of Object.entries(list)) {
71
+ if(!isNumber(v)) {
72
+ console.error(`[${callerName}:hasInvalidNumbers] property "${k}" only accept numbers. Invalid value is "${v}" and invalid type is "${typeof v}".`)
73
+ return true
74
+ }
75
+ }
76
+
77
+ return false
78
+ }
79
+
80
+ export const hasNullOrUndefined = (list, callerName) => {
81
+ if (callerName == null) {
82
+ throw new Error('[hasNullOrUndefined] Missing required param "callerName".')
83
+ }
84
+
85
+ if (!isKeyPairObject(list)) {
86
+ throw new Error(`[${callerName}:hasNullOrUndefined] Received an invalid "list" param — expected a non-empty key-pair object, got: ${JSON.stringify(list)}.`)
87
+ }
88
+
89
+ for (const [k, v] of Object.entries(list)) {
90
+
91
+ if(isKeyPairObject(v)) {
92
+ for(const [k2, v2] of Object.entries(v)) {
93
+ if(v2 == null) {
94
+ console.error(`[${callerName}:hasNullOrUndefined] Null or undefined value detected for key "${k}.${k2}".\n${String(v2)}`)
95
+ return true
96
+ }
97
+ }
98
+ }
99
+ else if (v == null) {
100
+ console.error(`[${callerName}:hasNullOrUndefined] Null or undefined value detected for key "${k}".\n${String(v)}`)
101
+ return true
102
+ }
103
+ }
104
+
105
+ return false
106
+ }
107
+
108
+ export const arraysAreNotEqualSize = (list, callerName) => {
109
+ if (callerName == null) {
110
+ throw new Error('[arraysArentTheSameSize] Missing required param "callerName".')
111
+ }
112
+
113
+ if (!isKeyPairObject(list)) {
114
+ throw new Error(`[${callerName}:arraysArentTheSameSize] Received an invalid "list" param — expected a non-empty key-pair object, got: ${JSON.stringify(list)}.`)
115
+ }
116
+
117
+ let preArr = null
118
+
119
+ for (const [k, v] of Object.entries(list)) {
120
+ if(!Array.isArray(v)) {
121
+ console.error(`[${callerName}:arraysArentTheSameSize] Invalid array detected in key "${k}".\n${String(v)}`)
122
+ return true
123
+ }
124
+
125
+ if(preArr === null) {
126
+ preArr = v.length
127
+ } else {
128
+ if(preArr !== v.length) {
129
+ return true
130
+ }
131
+ }
132
+ }
133
+
134
+ return false
49
135
  }