xy-scale 1.4.32 → 1.4.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/xy-scale.min.js +1 -1
- package/package.json +1 -1
- package/src/balancing.js +184 -68
- package/src/datasets.js +253 -4
package/dist/xy-scale.min.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
var XY_Scale;(()=>{"use strict";var e={d:(t
|
|
1
|
+
var XY_Scale;(()=>{"use strict";var e={d:(r,t)=>{for(var n in t)e.o(t,n)&&!e.o(r,n)&&Object.defineProperty(r,n,{enumerable:!0,get:t[n]})},o:(e,r)=>Object.prototype.hasOwnProperty.call(e,r),r:e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})}},r={};e.r(r),e.d(r,{arrayToTimesteps:()=>A,parseProductionX:()=>v,parseTrainingXY:()=>$});const t=e=>null==e||!Number.isFinite(e),n=e=>{const r=[...e];for(let e=r.length-1;e>0;e--){const t=Math.floor(Math.random()*(e+1));[r[e],r[t]]=[r[t],r[e]]}return r},a=e=>"string"==typeof e||"boolean"==typeof e||("number"==typeof e?Number.isFinite(e):!!Array.isArray(e)&&e.every(a)),o=({random:e,seed:r}={})=>"function"==typeof e?e:Number.isInteger(r)?(e=>{let r=e>>>0;return()=>{r+=1831565813;let e=Math.imul(r^r>>>15,1|r);return e^=e+Math.imul(e^e>>>7,61|e),((e^e>>>14)>>>0)/4294967296}})(r):Math.random,s=(e,r=Math.random)=>{for(let t=e.length-1;t>0;t--){const n=Math.floor(r()*(t+1));[e[t],e[n]]=[e[n],e[t]]}return e},l=(e,r,t=Math.random)=>{if(r>e.length)throw new Error("Cannot sample more items than available without replacement.");const n=[...e];return s(n,t),n.slice(0,r)},i=(e,r,t=Math.random)=>{if(0===e.length)throw new Error("Cannot sample from an empty array.");const n=[];for(let a=0;a<r;a++){const r=Math.floor(t()*e.length);n.push(e[r])}return n},c=(e,r)=>r?structuredClone(e):e,h=(e,r,{cloneX:t=!1}={})=>{((e,r)=>{if(!Array.isArray(e)||!Array.isArray(r))throw new Error("X and Y must be arrays.");if(e.length!==r.length)throw new Error("X and Y must have the same length.");if(0===e.length)throw new Error("X and Y cannot be empty.")})(e,r);const n=new Map;return r.forEach(((r,o)=>{const s=(e=>{if(!a(e))throw new Error("Invalid Y label. Allowed types: finite numbers, strings, booleans, or nested arrays of those.");return JSON.stringify(e)})(r);n.has(s)||n.set(s,[]),n.get(s).push({x:c(e[o],t),y:r})})),n},u=(e,{min:r=-1/0,max:t=1/0},n)=>{if(!Array.isArray(e))throw new Error(`Invalid property. "${n}" expected an array.`);if(e.length<r)throw new Error(`Invalid property value. Array "${n}" expected at least ${t} items.`);if(e.length>t)throw new Error(`Invalid property value. Array "${n}" expected at max ${t} items.`);return!0},d=e=>{for(const[r,t]of Object.entries(e)){if("number"==typeof t&&Number.isNaN(t))throw new Error(`Invalid value at index 0 property "${r}": value is "${t}". Expected a numeric value.`);if(null===t)throw new Error(`Invalid value at index 0 property "${r}": value is "${t}".`)}return!0},f=e=>{return Array.isArray(e)?"array":(r=e,"[object Object]"===Object.prototype.toString.call(r)?"object":typeof e);var r},p=e=>Object.keys(e).filter((e=>"tempIdx"!==e)),y=({parentPath:e="",key:r,isArrayParent:t=!1})=>`${e}${t?`[${r}]`:e?`.${r}`:`${r}`}`,g=({rowLabel:e,sourceIndex:r,path:t="",detail:n,strictFlagName:a})=>new Error(`${e} schema error at index "${r}"${t?` path "${t}"`:""}. ${n} Set "${a}" to false to disable this validation.`),m=({referenceRow:e,currentRow:r,rowLabel:t,currentIndex:n,strictFlagName:a})=>{const o=f(e),s=f(r);if(o!==s)throw g({rowLabel:t,sourceIndex:n,detail:`Expected row type "${o}" based on the first parsed ${t} row, but got "${s}".`,strictFlagName:a});if("array"!==o&&"object"!==o)throw g({rowLabel:t,sourceIndex:n,detail:`Parsed ${t} rows must be arrays or plain objects.`,strictFlagName:a});const l=p(e),i=p(r),c=new Set(l),h=new Set(i),u=Array.isArray(e);for(let e=0;e<l.length;e++){const r=l[e];if(!h.has(r))throw g({rowLabel:t,sourceIndex:n,path:y({key:r,isArrayParent:u}),detail:`Missing required ${u?"index":"property"} "${r}" found in the first parsed ${t} row.`,strictFlagName:a})}for(let e=0;e<i.length;e++){const r=i[e];if(!c.has(r))throw g({rowLabel:t,sourceIndex:n,path:y({key:r,isArrayParent:u}),detail:`Unexpected ${u?"index":"property"} "${r}" not present in the first parsed ${t} row.`,strictFlagName:a})}return l},w=e=>Array.isArray(e)?"array":typeof e,b=({referenceValue:e,currentValue:r,sourceIndex:t,path:n,strictFlagName:a})=>{const o=w(e),s=w(r);if(o!==s)throw g({rowLabel:"Y",sourceIndex:t,path:n,detail:`Expected type "${o}" based on the first parsed Y row, but got "${s}".`,strictFlagName:a});if("array"!==o){if("number"!==o&&"boolean"!==o&&"string"!==o)throw g({rowLabel:"Y",sourceIndex:t,path:n,detail:`Unsupported Y value type "${o}". Y values must be numbers, booleans, strings, or nested arrays of those types.`,strictFlagName:a})}else{if(e.length!==r.length)throw g({rowLabel:"Y",sourceIndex:t,path:n,detail:`Expected array length "${e.length}" based on the first parsed Y row, but got "${r.length}".`,strictFlagName:a});for(let o=0;o<e.length;o++)b({referenceValue:e[o],currentValue:r[o],sourceIndex:t,path:y({parentPath:n,key:o,isArrayParent:!0}),strictFlagName:a})}},x=({rows:e,sourceIndexes:r,strictFlagName:t})=>{if(!e.length)return;const n=e[0],a=(e,r)=>{m({referenceRow:n,currentRow:e,rowLabel:"X",currentIndex:r,strictFlagName:t})};e.length>1&&a(e[e.length-1],r[e.length-1]);for(let t=0;t<e.length;t++)a(e[t],r[t])},$=({arrObj:e=[],trainingSplit:r=.8,yCallbackFunc:a=e=>e,xCallbackFunc:f=e=>e,validateRows:g=()=>!0,shuffle:w=!1,balancing:$="",strictXSchema:v=!0,strictYSchema:A=!0,state:I={}})=>{let N=[],X=[];const F=[];u(e,{min:5},"parseTrainingXY"),d(e[0]);for(let r=0;r<e.length;r++){if(!g({objRow:e,index:r,state:I}))continue;const t=f({objRow:e,index:r,state:I}),n=a({objRow:e,index:r,state:I});null!=t&&null!=n&&(N.push(t),X.push(n),F.push(r))}if(v&&x({rows:N,sourceIndexes:F,strictFlagName:"strictXSchema"}),A&&(({rows:e,sourceIndexes:r,strictFlagName:t})=>{if(!e.length)return;const n=e[0],a=(e,r)=>{const a=m({referenceRow:n,currentRow:e,rowLabel:"Y",currentIndex:r,strictFlagName:t}),o=Array.isArray(n);for(let s=0;s<a.length;s++){const l=a[s],i=y({key:l,isArrayParent:o});b({referenceValue:n[l],currentValue:e[l],sourceIndex:r,path:i,strictFlagName:t})}};e.length>1&&a(e[e.length-1],r[e.length-1]);for(let t=0;t<e.length;t++)a(e[t],r[t])})({rows:X,sourceIndexes:F,strictFlagName:"strictYSchema"}),w){const e=new Array(N.length);for(let r=0;r<N.length;r++)e[r]={x:N[r],y:X[r],sourceIndex:F[r]};const r=n(e);N=new Array(r.length),X=new Array(r.length);for(let e=0;e<r.length;e++)N[e]=r[e].x,X[e]=r[e].y,F[e]=r[e].sourceIndex}const Y=N.length,E=X.length,S=Y?p(N[0]):[],j=E?p(X[0]):[],M=new Array(Y),R=new Array(E),k={keyNames:S},P={keyNames:j};for(let e=0;e<Y;e++){const r=N[e],n=F[e],a=new Array(S.length);for(let e=0;e<S.length;e++){const o=S[e],s=r[o];if(t(s))throw new Error(`Invalid property value (${s}) returned from "xCallbackFunc" on index "${n}" property "${o}".`);a[e]=s}M[e]=a}for(let e=0;e<E;e++){const r=X[e],t=new Array(j.length);for(let e=0;e<j.length;e++)t[e]=r[j[e]];R[e]=t}const O=Math.floor(M.length*r);let L=M.slice(0,O),C=R.slice(0,O),T=M.slice(O),V=R.slice(O);if($){let e;if("oversample"===$)e=((e,r,t={})=>{const{random:n,seed:a,shuffleResult:l=!0,cloneX:u=!1}=t,d=o({random:n,seed:a}),f=h(e,r,{cloneX:u}),p=[...f.values()].map((e=>e.length)),y=Math.max(...p),g=[];for(const e of f.values()){const r=[...e],t=y-r.length,n=t>0?i(e,t,d).map((e=>({x:c(e.x,u),y:e.y}))):[];g.push(...r,...n)}return l&&s(g,d),{X:g.map((({x:e})=>e)),Y:g.map((({y:e})=>e))}})(L,C),L=e.X,C=e.Y;else{if("undersample"!==$)throw Error('balancing argument only accepts "", "oversample" and "undersample". Defaults to "".');e=((e,r,t={})=>{const{random:n,seed:a,shuffleResult:i=!0,cloneX:u=!1}=t,d=o({random:n,seed:a}),f=h(e,r,{cloneX:u}),p=[...f.values()].map((e=>e.length)),y=Math.min(...p),g=[];for(const e of f.values()){const r=l(e,y,d).map((e=>({x:c(e.x,u),y:e.y})));g.push(...r)}return i&&s(g,d),{X:g.map((({x:e})=>e)),Y:g.map((({y:e})=>e))}})(L,C),L=e.X,C=e.Y}}return{trainX:L,trainY:C,testX:T,testY:V,configX:k,configY:P}},v=({arrObj:e=[],xCallbackFunc:r=e=>e,validateRows:a=()=>!0,shuffle:o=!1,strictXSchema:s=!0,state:l={}})=>{let i=[],c=[];u(e,{min:5},"parseProductionX"),d(e[0]);for(let t=0;t<e.length;t++){if(!a({objRow:e,index:t,state:l}))continue;const n=r({objRow:e,index:t,state:l});null!=n&&!1!==n&&(i.push(n),c.push(t))}if(s&&x({rows:i,sourceIndexes:c,strictFlagName:"strictXSchema"}),o){const e=new Array(i.length);for(let r=0;r<i.length;r++)e[r]={x:i[r],sourceIndex:c[r]};const r=n(e);i=new Array(r.length),c=new Array(r.length);for(let e=0;e<r.length;e++)i[e]=r[e].x,c[e]=r[e].sourceIndex}const h=i.length,f=h?p(i[0]):[],y=new Array(h),g={keyNames:f};for(let e=0;e<h;e++){const r=i[e],n=c[e],a=new Array(f.length);for(let e=0;e<f.length;e++){const o=f[e],s=r[o];if(t(s))throw new Error(`Invalid property value (${s}) returned from "xCallbackFunc" on index "${n}" property "${o}".`);a[e]=s}y[e]=a}return{X:y,configX:g}},A=(e,r)=>{if(0===r)return e;if(r<0)throw new Error("timeSteps must be greater than 0");const t=[];for(let n=0;n<=e.length-r;n++)t.push(e.slice(n,n+r));return t};XY_Scale=r})();
|
package/package.json
CHANGED
package/src/balancing.js
CHANGED
|
@@ -1,71 +1,187 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
Y.forEach((label, i) => {
|
|
8
|
-
if (!labelCounts[label]) {
|
|
9
|
-
labelCounts[label] = 0;
|
|
10
|
-
data[label] = [];
|
|
11
|
-
}
|
|
12
|
-
labelCounts[label]++;
|
|
13
|
-
data[label].push([X[i], Y[i]]);
|
|
14
|
-
});
|
|
15
|
-
|
|
16
|
-
// Find the max label count
|
|
17
|
-
const maxCount = Math.max(...Object.values(labelCounts));
|
|
18
|
-
|
|
19
|
-
const oversampledX = [];
|
|
20
|
-
const oversampledY = [];
|
|
21
|
-
|
|
22
|
-
// Oversample each label to match the max count
|
|
23
|
-
Object.keys(data).forEach(label => {
|
|
24
|
-
const samples = data[label];
|
|
25
|
-
const numSamples = samples.length;
|
|
26
|
-
|
|
27
|
-
for (let i = 0; i < maxCount; i++) {
|
|
28
|
-
const sample = samples[i % numSamples]; // Cycle through samples
|
|
29
|
-
oversampledX.push(sample[0]);
|
|
30
|
-
oversampledY.push(sample[1]);
|
|
31
|
-
}
|
|
32
|
-
});
|
|
33
|
-
|
|
34
|
-
return { X: oversampledX, Y: oversampledY };
|
|
1
|
+
const isValidLabelValue = (value) => {
|
|
2
|
+
if (typeof value === "string" || typeof value === "boolean") return true;
|
|
3
|
+
if (typeof value === "number") return Number.isFinite(value);
|
|
4
|
+
|
|
5
|
+
if (Array.isArray(value)) {
|
|
6
|
+
return value.every(isValidLabelValue);
|
|
35
7
|
}
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
8
|
+
|
|
9
|
+
return false;
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
const makeLabelKey = (label) => {
|
|
13
|
+
if (!isValidLabelValue(label)) {
|
|
14
|
+
throw new Error(
|
|
15
|
+
"Invalid Y label. Allowed types: finite numbers, strings, booleans, or nested arrays of those."
|
|
16
|
+
);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
return JSON.stringify(label);
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
const validateXY = (X, Y) => {
|
|
23
|
+
if (!Array.isArray(X) || !Array.isArray(Y)) {
|
|
24
|
+
throw new Error("X and Y must be arrays.");
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
if (X.length !== Y.length) {
|
|
28
|
+
throw new Error("X and Y must have the same length.");
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
if (X.length === 0) {
|
|
32
|
+
throw new Error("X and Y cannot be empty.");
|
|
33
|
+
}
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
const mulberry32 = (seed) => {
|
|
37
|
+
let t = seed >>> 0;
|
|
38
|
+
|
|
39
|
+
return () => {
|
|
40
|
+
t += 0x6D2B79F5;
|
|
41
|
+
let r = Math.imul(t ^ (t >>> 15), t | 1);
|
|
42
|
+
r ^= r + Math.imul(r ^ (r >>> 7), r | 61);
|
|
43
|
+
return ((r ^ (r >>> 14)) >>> 0) / 4294967296;
|
|
44
|
+
};
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
const resolveRandom = ({ random, seed } = {}) => {
|
|
48
|
+
if (typeof random === "function") return random;
|
|
49
|
+
if (Number.isInteger(seed)) return mulberry32(seed);
|
|
50
|
+
return Math.random;
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
const shuffleInPlace = (arr, random = Math.random) => {
|
|
54
|
+
for (let i = arr.length - 1; i > 0; i--) {
|
|
55
|
+
const j = Math.floor(random() * (i + 1));
|
|
56
|
+
[arr[i], arr[j]] = [arr[j], arr[i]];
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return arr;
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
const sampleWithoutReplacement = (samples, size, random = Math.random) => {
|
|
63
|
+
if (size > samples.length) {
|
|
64
|
+
throw new Error("Cannot sample more items than available without replacement.");
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const copy = [...samples];
|
|
68
|
+
shuffleInPlace(copy, random);
|
|
69
|
+
return copy.slice(0, size);
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
const sampleWithReplacement = (samples, size, random = Math.random) => {
|
|
73
|
+
if (samples.length === 0) {
|
|
74
|
+
throw new Error("Cannot sample from an empty array.");
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const out = [];
|
|
78
|
+
|
|
79
|
+
for (let i = 0; i < size; i++) {
|
|
80
|
+
const idx = Math.floor(random() * samples.length);
|
|
81
|
+
out.push(samples[idx]);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
return out;
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
const maybeClone = (value, clone) => {
|
|
88
|
+
if (!clone) return value;
|
|
89
|
+
return structuredClone(value);
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
const groupXYByLabel = (X, Y, { cloneX = false } = {}) => {
|
|
93
|
+
validateXY(X, Y);
|
|
94
|
+
|
|
95
|
+
const groups = new Map();
|
|
96
|
+
|
|
97
|
+
Y.forEach((label, i) => {
|
|
98
|
+
const key = makeLabelKey(label);
|
|
99
|
+
|
|
100
|
+
if (!groups.has(key)) {
|
|
101
|
+
groups.set(key, []);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
groups.get(key).push({
|
|
105
|
+
x: maybeClone(X[i], cloneX),
|
|
106
|
+
y: label,
|
|
67
107
|
});
|
|
68
|
-
|
|
69
|
-
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
return groups;
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
export const oversampleXY = (X, Y, options = {}) => {
|
|
114
|
+
const {
|
|
115
|
+
random,
|
|
116
|
+
seed,
|
|
117
|
+
shuffleResult = true,
|
|
118
|
+
cloneX = false,
|
|
119
|
+
} = options;
|
|
120
|
+
|
|
121
|
+
const rng = resolveRandom({ random, seed });
|
|
122
|
+
const groups = groupXYByLabel(X, Y, { cloneX });
|
|
123
|
+
|
|
124
|
+
const counts = [...groups.values()].map((samples) => samples.length);
|
|
125
|
+
const maxCount = Math.max(...counts);
|
|
126
|
+
|
|
127
|
+
const combined = [];
|
|
128
|
+
|
|
129
|
+
for (const samples of groups.values()) {
|
|
130
|
+
const originals = [...samples];
|
|
131
|
+
const extrasNeeded = maxCount - originals.length;
|
|
132
|
+
|
|
133
|
+
const extras =
|
|
134
|
+
extrasNeeded > 0
|
|
135
|
+
? sampleWithReplacement(samples, extrasNeeded, rng).map((sample) => ({
|
|
136
|
+
x: maybeClone(sample.x, cloneX),
|
|
137
|
+
y: sample.y,
|
|
138
|
+
}))
|
|
139
|
+
: [];
|
|
140
|
+
|
|
141
|
+
combined.push(...originals, ...extras);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
if (shuffleResult) {
|
|
145
|
+
shuffleInPlace(combined, rng);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
return {
|
|
149
|
+
X: combined.map(({ x }) => x),
|
|
150
|
+
Y: combined.map(({ y }) => y),
|
|
151
|
+
};
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
export const undersampleXY = (X, Y, options = {}) => {
|
|
155
|
+
const {
|
|
156
|
+
random,
|
|
157
|
+
seed,
|
|
158
|
+
shuffleResult = true,
|
|
159
|
+
cloneX = false,
|
|
160
|
+
} = options;
|
|
161
|
+
|
|
162
|
+
const rng = resolveRandom({ random, seed });
|
|
163
|
+
const groups = groupXYByLabel(X, Y, { cloneX });
|
|
164
|
+
|
|
165
|
+
const counts = [...groups.values()].map((samples) => samples.length);
|
|
166
|
+
const minCount = Math.min(...counts);
|
|
167
|
+
|
|
168
|
+
const combined = [];
|
|
169
|
+
|
|
170
|
+
for (const samples of groups.values()) {
|
|
171
|
+
const selected = sampleWithoutReplacement(samples, minCount, rng).map((sample) => ({
|
|
172
|
+
x: maybeClone(sample.x, cloneX),
|
|
173
|
+
y: sample.y,
|
|
174
|
+
}));
|
|
175
|
+
|
|
176
|
+
combined.push(...selected);
|
|
70
177
|
}
|
|
71
|
-
|
|
178
|
+
|
|
179
|
+
if (shuffleResult) {
|
|
180
|
+
shuffleInPlace(combined, rng);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
return {
|
|
184
|
+
X: combined.map(({ x }) => x),
|
|
185
|
+
Y: combined.map(({ y }) => y),
|
|
186
|
+
};
|
|
187
|
+
};
|
package/src/datasets.js
CHANGED
|
@@ -4,6 +4,229 @@ import { validateFirstRow, validateArray } from "./validators.js";
|
|
|
4
4
|
|
|
5
5
|
// ADD A PARAM max correlation that will measure the correlation between variables if defined
|
|
6
6
|
|
|
7
|
+
const isPlainObject = value =>
|
|
8
|
+
Object.prototype.toString.call(value) === '[object Object]';
|
|
9
|
+
|
|
10
|
+
const getRowKind = row => {
|
|
11
|
+
if (Array.isArray(row)) return 'array';
|
|
12
|
+
if (isPlainObject(row)) return 'object';
|
|
13
|
+
return typeof row;
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
const getComparableKeys = row =>
|
|
17
|
+
Object.keys(row).filter(key => key !== 'tempIdx');
|
|
18
|
+
|
|
19
|
+
const buildPath = ({ parentPath = '', key, isArrayParent = false }) => {
|
|
20
|
+
const nextPart = isArrayParent ? `[${key}]` : (parentPath ? `.${key}` : `${key}`);
|
|
21
|
+
return `${parentPath}${nextPart}`;
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
const createStrictSchemaError = ({
|
|
25
|
+
rowLabel,
|
|
26
|
+
sourceIndex,
|
|
27
|
+
path = '',
|
|
28
|
+
detail,
|
|
29
|
+
strictFlagName,
|
|
30
|
+
}) => {
|
|
31
|
+
const pathText = path ? ` path "${path}"` : '';
|
|
32
|
+
return new Error(
|
|
33
|
+
`${rowLabel} schema error at index "${sourceIndex}"${pathText}. ${detail} Set "${strictFlagName}" to false to disable this validation.`
|
|
34
|
+
);
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
const assertSameTopLevelSchema = ({
|
|
38
|
+
referenceRow,
|
|
39
|
+
currentRow,
|
|
40
|
+
rowLabel,
|
|
41
|
+
currentIndex,
|
|
42
|
+
strictFlagName,
|
|
43
|
+
}) => {
|
|
44
|
+
const referenceKind = getRowKind(referenceRow);
|
|
45
|
+
const currentKind = getRowKind(currentRow);
|
|
46
|
+
|
|
47
|
+
if (referenceKind !== currentKind) {
|
|
48
|
+
throw createStrictSchemaError({
|
|
49
|
+
rowLabel,
|
|
50
|
+
sourceIndex: currentIndex,
|
|
51
|
+
detail: `Expected row type "${referenceKind}" based on the first parsed ${rowLabel} row, but got "${currentKind}".`,
|
|
52
|
+
strictFlagName,
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
if (referenceKind !== 'array' && referenceKind !== 'object') {
|
|
57
|
+
throw createStrictSchemaError({
|
|
58
|
+
rowLabel,
|
|
59
|
+
sourceIndex: currentIndex,
|
|
60
|
+
detail: `Parsed ${rowLabel} rows must be arrays or plain objects.`,
|
|
61
|
+
strictFlagName,
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const referenceKeys = getComparableKeys(referenceRow);
|
|
66
|
+
const currentKeys = getComparableKeys(currentRow);
|
|
67
|
+
|
|
68
|
+
const referenceKeySet = new Set(referenceKeys);
|
|
69
|
+
const currentKeySet = new Set(currentKeys);
|
|
70
|
+
const isArrayRow = Array.isArray(referenceRow);
|
|
71
|
+
|
|
72
|
+
for (let i = 0; i < referenceKeys.length; i++) {
|
|
73
|
+
const key = referenceKeys[i];
|
|
74
|
+
|
|
75
|
+
if (!currentKeySet.has(key)) {
|
|
76
|
+
throw createStrictSchemaError({
|
|
77
|
+
rowLabel,
|
|
78
|
+
sourceIndex: currentIndex,
|
|
79
|
+
path: buildPath({ key, isArrayParent: isArrayRow }),
|
|
80
|
+
detail: `Missing required ${isArrayRow ? 'index' : 'property'} "${key}" found in the first parsed ${rowLabel} row.`,
|
|
81
|
+
strictFlagName,
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
for (let i = 0; i < currentKeys.length; i++) {
|
|
87
|
+
const key = currentKeys[i];
|
|
88
|
+
|
|
89
|
+
if (!referenceKeySet.has(key)) {
|
|
90
|
+
throw createStrictSchemaError({
|
|
91
|
+
rowLabel,
|
|
92
|
+
sourceIndex: currentIndex,
|
|
93
|
+
path: buildPath({ key, isArrayParent: isArrayRow }),
|
|
94
|
+
detail: `Unexpected ${isArrayRow ? 'index' : 'property'} "${key}" not present in the first parsed ${rowLabel} row.`,
|
|
95
|
+
strictFlagName,
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return referenceKeys;
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
const getYValueKind = value => {
|
|
104
|
+
if (Array.isArray(value)) return 'array';
|
|
105
|
+
return typeof value;
|
|
106
|
+
};
|
|
107
|
+
|
|
108
|
+
const assertSameYValueSchema = ({
|
|
109
|
+
referenceValue,
|
|
110
|
+
currentValue,
|
|
111
|
+
sourceIndex,
|
|
112
|
+
path,
|
|
113
|
+
strictFlagName,
|
|
114
|
+
}) => {
|
|
115
|
+
const referenceKind = getYValueKind(referenceValue);
|
|
116
|
+
const currentKind = getYValueKind(currentValue);
|
|
117
|
+
|
|
118
|
+
if (referenceKind !== currentKind) {
|
|
119
|
+
throw createStrictSchemaError({
|
|
120
|
+
rowLabel: 'Y',
|
|
121
|
+
sourceIndex,
|
|
122
|
+
path,
|
|
123
|
+
detail: `Expected type "${referenceKind}" based on the first parsed Y row, but got "${currentKind}".`,
|
|
124
|
+
strictFlagName,
|
|
125
|
+
});
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
if (referenceKind === 'array') {
|
|
129
|
+
if (referenceValue.length !== currentValue.length) {
|
|
130
|
+
throw createStrictSchemaError({
|
|
131
|
+
rowLabel: 'Y',
|
|
132
|
+
sourceIndex,
|
|
133
|
+
path,
|
|
134
|
+
detail: `Expected array length "${referenceValue.length}" based on the first parsed Y row, but got "${currentValue.length}".`,
|
|
135
|
+
strictFlagName,
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
for (let i = 0; i < referenceValue.length; i++) {
|
|
140
|
+
assertSameYValueSchema({
|
|
141
|
+
referenceValue: referenceValue[i],
|
|
142
|
+
currentValue: currentValue[i],
|
|
143
|
+
sourceIndex,
|
|
144
|
+
path: buildPath({ parentPath: path, key: i, isArrayParent: true }),
|
|
145
|
+
strictFlagName,
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
return;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
if (
|
|
153
|
+
referenceKind !== 'number' &&
|
|
154
|
+
referenceKind !== 'boolean' &&
|
|
155
|
+
referenceKind !== 'string'
|
|
156
|
+
) {
|
|
157
|
+
throw createStrictSchemaError({
|
|
158
|
+
rowLabel: 'Y',
|
|
159
|
+
sourceIndex,
|
|
160
|
+
path,
|
|
161
|
+
detail: `Unsupported Y value type "${referenceKind}". Y values must be numbers, booleans, strings, or nested arrays of those types.`,
|
|
162
|
+
strictFlagName,
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
};
|
|
166
|
+
|
|
167
|
+
const validateStrictXRows = ({ rows, sourceIndexes, strictFlagName }) => {
|
|
168
|
+
if (!rows.length) return;
|
|
169
|
+
|
|
170
|
+
const referenceRow = rows[0];
|
|
171
|
+
|
|
172
|
+
const compareRow = (currentRow, currentIndex) => {
|
|
173
|
+
assertSameTopLevelSchema({
|
|
174
|
+
referenceRow,
|
|
175
|
+
currentRow,
|
|
176
|
+
rowLabel: 'X',
|
|
177
|
+
currentIndex,
|
|
178
|
+
strictFlagName,
|
|
179
|
+
});
|
|
180
|
+
};
|
|
181
|
+
|
|
182
|
+
if (rows.length > 1) {
|
|
183
|
+
compareRow(rows[rows.length - 1], sourceIndexes[rows.length - 1]);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
for (let i = 0; i < rows.length; i++) {
|
|
187
|
+
compareRow(rows[i], sourceIndexes[i]);
|
|
188
|
+
}
|
|
189
|
+
};
|
|
190
|
+
|
|
191
|
+
const validateStrictYRows = ({ rows, sourceIndexes, strictFlagName }) => {
|
|
192
|
+
if (!rows.length) return;
|
|
193
|
+
|
|
194
|
+
const referenceRow = rows[0];
|
|
195
|
+
|
|
196
|
+
const compareRow = (currentRow, currentIndex) => {
|
|
197
|
+
const referenceKeys = assertSameTopLevelSchema({
|
|
198
|
+
referenceRow,
|
|
199
|
+
currentRow,
|
|
200
|
+
rowLabel: 'Y',
|
|
201
|
+
currentIndex,
|
|
202
|
+
strictFlagName,
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
const isArrayRow = Array.isArray(referenceRow);
|
|
206
|
+
|
|
207
|
+
for (let i = 0; i < referenceKeys.length; i++) {
|
|
208
|
+
const key = referenceKeys[i];
|
|
209
|
+
const path = buildPath({ key, isArrayParent: isArrayRow });
|
|
210
|
+
|
|
211
|
+
assertSameYValueSchema({
|
|
212
|
+
referenceValue: referenceRow[key],
|
|
213
|
+
currentValue: currentRow[key],
|
|
214
|
+
sourceIndex: currentIndex,
|
|
215
|
+
path,
|
|
216
|
+
strictFlagName,
|
|
217
|
+
});
|
|
218
|
+
}
|
|
219
|
+
};
|
|
220
|
+
|
|
221
|
+
if (rows.length > 1) {
|
|
222
|
+
compareRow(rows[rows.length - 1], sourceIndexes[rows.length - 1]);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
for (let i = 0; i < rows.length; i++) {
|
|
226
|
+
compareRow(rows[i], sourceIndexes[i]);
|
|
227
|
+
}
|
|
228
|
+
};
|
|
229
|
+
|
|
7
230
|
export const parseTrainingXY = ({
|
|
8
231
|
arrObj = [], // array of objects
|
|
9
232
|
trainingSplit = 0.8, // numeric float between 0.01 and 0.99
|
|
@@ -12,6 +235,8 @@ export const parseTrainingXY = ({
|
|
|
12
235
|
validateRows = () => true, // accepted callback functions
|
|
13
236
|
shuffle = false, // only booleans
|
|
14
237
|
balancing = '', // accepted '', 'oversample' or 'undersample'
|
|
238
|
+
strictXSchema = true,
|
|
239
|
+
strictYSchema = true,
|
|
15
240
|
state = {}, // accepted object or classes
|
|
16
241
|
}) => {
|
|
17
242
|
let X = [];
|
|
@@ -21,7 +246,6 @@ export const parseTrainingXY = ({
|
|
|
21
246
|
validateArray(arrObj, { min: 5 }, 'parseTrainingXY');
|
|
22
247
|
validateFirstRow(arrObj[0]);
|
|
23
248
|
|
|
24
|
-
// if parsedX or parsedY is undefined/null the current row will be excluded from training
|
|
25
249
|
for (let x = 0; x < arrObj.length; x++) {
|
|
26
250
|
if (!validateRows({ objRow: arrObj, index: x, state })) continue;
|
|
27
251
|
|
|
@@ -40,6 +264,22 @@ export const parseTrainingXY = ({
|
|
|
40
264
|
}
|
|
41
265
|
}
|
|
42
266
|
|
|
267
|
+
if (strictXSchema) {
|
|
268
|
+
validateStrictXRows({
|
|
269
|
+
rows: X,
|
|
270
|
+
sourceIndexes,
|
|
271
|
+
strictFlagName: 'strictXSchema',
|
|
272
|
+
});
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
if (strictYSchema) {
|
|
276
|
+
validateStrictYRows({
|
|
277
|
+
rows: Y,
|
|
278
|
+
sourceIndexes,
|
|
279
|
+
strictFlagName: 'strictYSchema',
|
|
280
|
+
});
|
|
281
|
+
}
|
|
282
|
+
|
|
43
283
|
if (shuffle) {
|
|
44
284
|
const merged = new Array(X.length);
|
|
45
285
|
|
|
@@ -66,8 +306,8 @@ export const parseTrainingXY = ({
|
|
|
66
306
|
const xLen = X.length;
|
|
67
307
|
const yLen = Y.length;
|
|
68
308
|
|
|
69
|
-
const xKeys = xLen ?
|
|
70
|
-
const yKeys = yLen ?
|
|
309
|
+
const xKeys = xLen ? getComparableKeys(X[0]) : [];
|
|
310
|
+
const yKeys = yLen ? getComparableKeys(Y[0]) : [];
|
|
71
311
|
|
|
72
312
|
const flatX = new Array(xLen);
|
|
73
313
|
const flatY = new Array(yLen);
|
|
@@ -150,6 +390,7 @@ export const parseProductionX = ({
|
|
|
150
390
|
xCallbackFunc = row => row,
|
|
151
391
|
validateRows = () => true,
|
|
152
392
|
shuffle = false,
|
|
393
|
+
strictXSchema = true,
|
|
153
394
|
state = {},
|
|
154
395
|
}) => {
|
|
155
396
|
let X = [];
|
|
@@ -169,6 +410,14 @@ export const parseProductionX = ({
|
|
|
169
410
|
}
|
|
170
411
|
}
|
|
171
412
|
|
|
413
|
+
if (strictXSchema) {
|
|
414
|
+
validateStrictXRows({
|
|
415
|
+
rows: X,
|
|
416
|
+
sourceIndexes,
|
|
417
|
+
strictFlagName: 'strictXSchema',
|
|
418
|
+
});
|
|
419
|
+
}
|
|
420
|
+
|
|
172
421
|
if (shuffle) {
|
|
173
422
|
const merged = new Array(X.length);
|
|
174
423
|
|
|
@@ -191,7 +440,7 @@ export const parseProductionX = ({
|
|
|
191
440
|
}
|
|
192
441
|
|
|
193
442
|
const xLen = X.length;
|
|
194
|
-
const xKeys = xLen ?
|
|
443
|
+
const xKeys = xLen ? getComparableKeys(X[0]) : [];
|
|
195
444
|
const flatX = new Array(xLen);
|
|
196
445
|
|
|
197
446
|
const configX = {
|