@datagrok/eda 1.1.3 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/README.md +2 -0
- package/dist/package.js +2 -2
- package/package.json +2 -1
- package/src/eda-ui.ts +8 -1
- package/src/package.ts +26 -11
- package/src/stat-tools.ts +266 -0
- package/src/utils.ts +1 -1
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@datagrok/eda",
|
|
3
3
|
"friendlyName": "EDA",
|
|
4
|
-
"version": "1.1.
|
|
4
|
+
"version": "1.1.4",
|
|
5
5
|
"description": "Exploratory Data Analysis Tools",
|
|
6
6
|
"dependencies": {
|
|
7
7
|
"@datagrok-libraries/ml": "^6.3.39",
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
"cash-dom": "^8.1.1",
|
|
12
12
|
"datagrok-api": "^1.16.0",
|
|
13
13
|
"dayjs": "^1.11.9",
|
|
14
|
+
"jstat": "^1.9.6",
|
|
14
15
|
"umap-js": "^1.3.3"
|
|
15
16
|
},
|
|
16
17
|
"author": {
|
package/src/eda-ui.ts
CHANGED
|
@@ -112,7 +112,7 @@ export function loadingScatterPlot(features: DG.ColumnList, xLoadings: Array<DG.
|
|
|
112
112
|
// Add PLS visualization
|
|
113
113
|
export function addPLSvisualization(table: DG.DataFrame, samplesNames: DG.Column, features: DG.ColumnList, predict: DG.Column, plsOutput: any): void {
|
|
114
114
|
|
|
115
|
-
|
|
115
|
+
const view = grok.shell.getTableView(table.name);
|
|
116
116
|
|
|
117
117
|
// 1. Predicted vs Reference scatter plot
|
|
118
118
|
view.addViewer(predictedVersusReferenceScatterPlot(samplesNames, predict, plsOutput[0]));
|
|
@@ -126,3 +126,10 @@ export function addPLSvisualization(table: DG.DataFrame, samplesNames: DG.Column
|
|
|
126
126
|
// 4. Scores Scatter Plot
|
|
127
127
|
view.addViewer(scoresScatterPlot(samplesNames, plsOutput[2], plsOutput[3]));
|
|
128
128
|
}
|
|
129
|
+
|
|
130
|
+
// Add one-way ANOVA results
|
|
131
|
+
export function addOneWayAnovaVizualization(table: DG.DataFrame, factors: DG.Column, values: DG.Column, anova: DG.DataFrame) {
|
|
132
|
+
const view = grok.shell.getTableView(table.name);
|
|
133
|
+
view.addViewer(DG.Viewer.boxPlot(DG.DataFrame.fromColumns([factors, values])));
|
|
134
|
+
view.addViewer(DG.Viewer.grid(anova));
|
|
135
|
+
}
|
package/src/package.ts
CHANGED
|
@@ -8,11 +8,13 @@ import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
|
8
8
|
import {_initEDAAPI} from '../wasm/EDAAPI';
|
|
9
9
|
import {computePCA, computePLS, computeUMAP, computeTSNE, computeSPE} from './eda-tools';
|
|
10
10
|
import {addPrefixToEachColumnName, addPLSvisualization, regressionCoefficientsBarChart,
|
|
11
|
-
scoresScatterPlot, predictedVersusReferenceScatterPlot} from './eda-ui';
|
|
11
|
+
scoresScatterPlot, predictedVersusReferenceScatterPlot, addOneWayAnovaVizualization} from './eda-ui';
|
|
12
12
|
import {carsDataframe, testDataForBinaryClassification} from './data-generators';
|
|
13
13
|
import {LINEAR, RBF, POLYNOMIAL, SIGMOID,
|
|
14
14
|
getTrainedModel, getPrediction, showTrainReport, getPackedModel} from './svm';
|
|
15
15
|
|
|
16
|
+
import {oneWayAnova} from './stat-tools';
|
|
17
|
+
|
|
16
18
|
export const _package = new DG.Package();
|
|
17
19
|
|
|
18
20
|
//name: info
|
|
@@ -25,14 +27,14 @@ export async function init(): Promise<void> {
|
|
|
25
27
|
await _initEDAAPI();
|
|
26
28
|
}
|
|
27
29
|
|
|
28
|
-
//top-menu: ML |
|
|
30
|
+
//top-menu: ML | Dimensionality Reduction | PCA...
|
|
29
31
|
//name: PCA
|
|
30
32
|
//description: Principal component analysis (PCA).
|
|
31
|
-
//input: dataframe table
|
|
32
|
-
//input: column_list features {type: numerical
|
|
33
|
-
//input: int components = 2 {caption: Components
|
|
34
|
-
//input: bool center = false
|
|
35
|
-
//input: bool scale = false
|
|
33
|
+
//input: dataframe table
|
|
34
|
+
//input: column_list features {type: numerical}
|
|
35
|
+
//input: int components = 2 {caption: Components} [Number of components.]
|
|
36
|
+
//input: bool center = false [Indicating whether the variables should be shifted to be zero centered.]
|
|
37
|
+
//input: bool scale = false [Indicating whether the variables should be scaled to have unit variance.]
|
|
36
38
|
//output: dataframe result {action:join(table)}
|
|
37
39
|
export async function PCA(table: DG.DataFrame, features: DG.ColumnList, components: number,
|
|
38
40
|
center: boolean, scale: boolean): Promise<DG.DataFrame>
|
|
@@ -42,7 +44,7 @@ export async function PCA(table: DG.DataFrame, features: DG.ColumnList, componen
|
|
|
42
44
|
return pcaTable;
|
|
43
45
|
}
|
|
44
46
|
|
|
45
|
-
//top-menu: ML |
|
|
47
|
+
//top-menu: ML | Dimensionality Reduction | UMAP...
|
|
46
48
|
//name: UMAP
|
|
47
49
|
//description: Uniform Manifold Approximation and Projection (UMAP).
|
|
48
50
|
//input: dataframe table {category: Data}
|
|
@@ -59,7 +61,7 @@ export async function UMAP(table: DG.DataFrame, features: DG.ColumnList, compone
|
|
|
59
61
|
return await computeUMAP(features, components, epochs, neighbors, minDist, spread);
|
|
60
62
|
}
|
|
61
63
|
|
|
62
|
-
//top-menu: ML |
|
|
64
|
+
//top-menu: ML | Dimensionality Reduction | t-SNE...
|
|
63
65
|
//name: t-SNE
|
|
64
66
|
//description: t-distributed stochastic neighbor embedding (t-SNE).
|
|
65
67
|
//input: dataframe table {category: Data}
|
|
@@ -75,7 +77,7 @@ export async function tSNE(table: DG.DataFrame, features: DG.ColumnList, compone
|
|
|
75
77
|
return await computeTSNE(features, components, learningRate, perplexity, iterations);
|
|
76
78
|
}
|
|
77
79
|
|
|
78
|
-
//top-menu: ML |
|
|
80
|
+
//top-menu: ML | Dimensionality Reduction | SPE...
|
|
79
81
|
//name: SPE
|
|
80
82
|
//description: Stochastic proximity embedding (SPE).
|
|
81
83
|
//input: dataframe table {category: Data}
|
|
@@ -92,7 +94,7 @@ export async function SPE(table: DG.DataFrame, features: DG.ColumnList, dimensio
|
|
|
92
94
|
return await computeSPE(features, dimension, steps, cycles, cutoff, lambda);
|
|
93
95
|
}
|
|
94
96
|
|
|
95
|
-
//top-menu: ML | Multivariate Analysis
|
|
97
|
+
//top-menu: ML | Analyze | Multivariate Analysis...
|
|
96
98
|
//name: Multivariate Analysis (PLS)
|
|
97
99
|
//description: Multidimensional data analysis using partial least squares (PLS) regression. It reduces the predictors to a smaller set of uncorrelated components and performs least squares regression on them.
|
|
98
100
|
//input: dataframe table
|
|
@@ -318,3 +320,16 @@ export async function trainSigmoidKernelSVM(df: DG.DataFrame, predict_column: st
|
|
|
318
320
|
export async function applySigmoidKernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
|
|
319
321
|
return await getPrediction(df, model);
|
|
320
322
|
}
|
|
323
|
+
|
|
324
|
+
//top-menu: ML | Analysis of Variances (ANOVA)...
|
|
325
|
+
//name: One-way ANOVA
|
|
326
|
+
//description: One-way analysis of variances (ANOVA) determines whether the examined factor has a significant impact on the studied feature.
|
|
327
|
+
//input: dataframe table
|
|
328
|
+
//input: column factor {type: categorical}
|
|
329
|
+
//input: column feature {type: numerical}
|
|
330
|
+
//input: double significance = 0.05 [The significance level is a value from the interval (0, 1) specifying the criterion used for rejecting the null hypothesis.]
|
|
331
|
+
//input: bool validate = false [Indicates whether the normality of distribution and an eqaulity of varainces should be checked.]
|
|
332
|
+
export function anova(table: DG.DataFrame, factor: DG.Column, feature: DG.Column, significance: number, validate: boolean) {
|
|
333
|
+
const res = oneWayAnova(factor, feature, significance, validate);
|
|
334
|
+
addOneWayAnovaVizualization(table, factor, feature, res);
|
|
335
|
+
}
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
// Statistic tools
|
|
2
|
+
|
|
3
|
+
/* REFERENCES
|
|
4
|
+
|
|
5
|
+
[1] One-way analysis of variance, https://en.wikipedia.org/wiki/One-way_analysis_of_variance
|
|
6
|
+
|
|
7
|
+
[2] G.W. Heiman. Basic Statistics for the Behavioral Sciences, 6th ed. Wadsworth Publishing, 2010
|
|
8
|
+
|
|
9
|
+
[3] F-test of equality of variances, https://en.wikipedia.org/wiki/F-test_of_equality_of_variances
|
|
10
|
+
|
|
11
|
+
[4] S. McKillup. Statistics Explained, Cambridge University Press, 2005
|
|
12
|
+
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import * as grok from 'datagrok-api/grok';
|
|
16
|
+
import * as ui from 'datagrok-api/ui';
|
|
17
|
+
import * as DG from 'datagrok-api/dg';
|
|
18
|
+
|
|
19
|
+
//@ts-ignore: no types
|
|
20
|
+
import * as jStat from 'jstat';
|
|
21
|
+
|
|
22
|
+
enum ERROR_MSG {
|
|
23
|
+
NON_EQUAL_FACTORS_VALUES_SIZE = 'non-equal sizes of factor and values arrays. INPUT ERROR.',
|
|
24
|
+
INCORRECT_SIGNIFICANCE_LEVEL = 'incorrect significance level. It must be from the interval (0, 1). INPUT ERROR.',
|
|
25
|
+
INCORRECT_SAMPLE_SIZE = 'incorrect size of sample. DATA FACTORIZAING ERROR.',
|
|
26
|
+
NON_EQUAL_VARIANCES = 'variances are not equal.',
|
|
27
|
+
NON_NORMAL_DISTRIB = 'non-normal distribution.',
|
|
28
|
+
UNSUPPORTED_COLUMN_TYPE = 'unsupported column type.',
|
|
29
|
+
INCORRECT_CATEGORIES_COL_TYPE = 'incorrect categories column type.',
|
|
30
|
+
ANOVA_FAILED_JUST_ONE_CAT = 'ANOVA filed: there should be at least 2 categories.'
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
type SampleData = {
|
|
34
|
+
sum: number,
|
|
35
|
+
sumOfSquares: number,
|
|
36
|
+
size: number,
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
/** One-way ANOVA computation results. The classic notations are used (see [2], p. 290). */
|
|
40
|
+
type OneWayAnova = {
|
|
41
|
+
/** sum of squares between groups, SSbn */
|
|
42
|
+
ssBn: number,
|
|
43
|
+
/** sum of squares within groups, SSnn */
|
|
44
|
+
ssWn: number,
|
|
45
|
+
/** total sum of squares, SStot */
|
|
46
|
+
ssTot: number,
|
|
47
|
+
/** degrees of freedom between groups, DFbn */
|
|
48
|
+
dfBn: number,
|
|
49
|
+
/** degrees of freedom within groups, DFwn */
|
|
50
|
+
dfWn: number,
|
|
51
|
+
/** total degrees of freedom, DFtot */
|
|
52
|
+
dfTot: number,
|
|
53
|
+
/** mean square between groups, MSbn */
|
|
54
|
+
msBn: number,
|
|
55
|
+
/** mean square within groups, MSwn */
|
|
56
|
+
msWn: number,
|
|
57
|
+
/** Fobt, value of F-statistics, Fstat */
|
|
58
|
+
fStat: number,
|
|
59
|
+
/** p-value corresponding to F-statistics, pValue */
|
|
60
|
+
pValue: number,
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
/** Categorical column */
|
|
64
|
+
type CatCol = DG.Column<DG.COLUMN_TYPE.STRING>;
|
|
65
|
+
|
|
66
|
+
/** Numerical column */
|
|
67
|
+
type NumCol = DG.Column<DG.COLUMN_TYPE.FLOAT> | DG.Column<DG.COLUMN_TYPE.INT>;
|
|
68
|
+
|
|
69
|
+
/** Create dataframe with one-way ANOVA results. */
|
|
70
|
+
export function getOneWayAnovaDF(anova: OneWayAnova, alpha: number, fCritical: number, hypothesis: string, testResult: string): DG.DataFrame {
|
|
71
|
+
return DG.DataFrame.fromColumns([
|
|
72
|
+
DG.Column.fromStrings('Source of variance', ['Between groups', 'Within groups', 'Total', '', hypothesis, '', testResult]),
|
|
73
|
+
DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'Sum of squares', [anova.ssBn, anova.ssWn, anova.ssTot, null, null, null, null]),
|
|
74
|
+
DG.Column.fromList(DG.COLUMN_TYPE.INT, 'Degrees of freedom', [anova.dfBn, anova.dfWn, anova.dfTot, null, null, null, null]),
|
|
75
|
+
DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'Mean square', [anova.msBn, anova.msWn, null, null, null, null, null]),
|
|
76
|
+
DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'F-statistics', [anova.fStat, null, null, null, null, null, null]),
|
|
77
|
+
DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'p-value', [anova.pValue, null, null, null, null, null, null]),
|
|
78
|
+
DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, `${alpha}-critical value`, [fCritical, null, null, null, null, null, null]),
|
|
79
|
+
]);
|
|
80
|
+
} // getOneWayAnovaDF
|
|
81
|
+
|
|
82
|
+
/** Check correctness of significance level. */
|
|
83
|
+
export function checkSignificanceLevel(alpha: number) {
|
|
84
|
+
if ((alpha <= 0) || (alpha >= 1))
|
|
85
|
+
throw new Error(ERROR_MSG.INCORRECT_SIGNIFICANCE_LEVEL);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/** Compute unbiased variance.*/
|
|
89
|
+
export function getVariance(data: SampleData): number {
|
|
90
|
+
// The applied formulas can be found in [4] (see p. 63)
|
|
91
|
+
const size = data.size;
|
|
92
|
+
|
|
93
|
+
if (size <= 0)
|
|
94
|
+
throw new Error(ERROR_MSG.INCORRECT_SAMPLE_SIZE);
|
|
95
|
+
|
|
96
|
+
if (size === 1)
|
|
97
|
+
return 0;
|
|
98
|
+
|
|
99
|
+
return (data.sumOfSquares - (data.sum) ** 2 / size) / (size - 1);
|
|
100
|
+
} // getVariance
|
|
101
|
+
|
|
102
|
+
/** Check equality of variances of 2 samples. F-test is performed.*/
|
|
103
|
+
function areVarsEqual(xData: SampleData, yData: SampleData, alpha: number = 0.05): boolean {
|
|
104
|
+
// The applied approach can be found in [3]
|
|
105
|
+
checkSignificanceLevel(alpha);
|
|
106
|
+
|
|
107
|
+
const xVar = getVariance(xData);
|
|
108
|
+
const yVar = getVariance(yData);
|
|
109
|
+
|
|
110
|
+
if (yVar === 0)
|
|
111
|
+
return (xVar === yVar);
|
|
112
|
+
|
|
113
|
+
const fStat = xVar / yVar;
|
|
114
|
+
const fCrit = jStat.centralF.inv(1 - alpha, xData.size - 1, yData.size - 1);
|
|
115
|
+
|
|
116
|
+
return (fStat < fCrit);
|
|
117
|
+
} // areVarsEqual
|
|
118
|
+
|
|
119
|
+
export class FactorizedData {
|
|
120
|
+
private isNormDistrib: boolean | undefined = undefined;
|
|
121
|
+
private categories: string[] = [];
|
|
122
|
+
private sums!: Float64Array;
|
|
123
|
+
private sumsOfSquares!: Float64Array;
|
|
124
|
+
private subSampleSizes!: Int32Array;
|
|
125
|
+
private size!: number;
|
|
126
|
+
private catCount!: number;
|
|
127
|
+
|
|
128
|
+
constructor(categories: CatCol, values: NumCol, checkNormality: boolean = false, alpha: number = 0.05) {
|
|
129
|
+
if (categories.type !== DG.COLUMN_TYPE.STRING)
|
|
130
|
+
throw new Error();
|
|
131
|
+
|
|
132
|
+
if (categories.length !== values.length)
|
|
133
|
+
throw new Error(ERROR_MSG.NON_EQUAL_FACTORS_VALUES_SIZE);
|
|
134
|
+
|
|
135
|
+
this.setStats(categories, values, checkNormality, alpha);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
public isNormal(): boolean | undefined {
|
|
139
|
+
return true;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/** Check equality of variances of factorized data. */
|
|
143
|
+
public areVarsEqual(alpha: number = 0.05): boolean {
|
|
144
|
+
const K = this.catCount;
|
|
145
|
+
|
|
146
|
+
if (K === 1)
|
|
147
|
+
return true;
|
|
148
|
+
|
|
149
|
+
const first: SampleData = {sum: this.sums[0], sumOfSquares: this.sumsOfSquares[0], size: this.subSampleSizes[0]};
|
|
150
|
+
|
|
151
|
+
for (let i = 1; i < K; ++i)
|
|
152
|
+
if(!areVarsEqual(first, {sum: this.sums[i], sumOfSquares: this.sumsOfSquares[i], size: this.subSampleSizes[i]}, alpha))
|
|
153
|
+
return false;
|
|
154
|
+
|
|
155
|
+
return true;
|
|
156
|
+
} // areVarsEqual
|
|
157
|
+
|
|
158
|
+
/** Perform one-way ANOVA computations. */
|
|
159
|
+
public getOneWayAnova(): OneWayAnova {
|
|
160
|
+
// Further, notations and formulas from (see [2], p. 290) are used.
|
|
161
|
+
|
|
162
|
+
const K = this.catCount;
|
|
163
|
+
|
|
164
|
+
if (K === 1)
|
|
165
|
+
throw new Error(ERROR_MSG.ANOVA_FAILED_JUST_ONE_CAT);
|
|
166
|
+
|
|
167
|
+
let sum = 0;
|
|
168
|
+
let sumOfSquares = 0;
|
|
169
|
+
let N = this.size;
|
|
170
|
+
let buf = 0;
|
|
171
|
+
|
|
172
|
+
for (let i = 0; i < K; ++i) {
|
|
173
|
+
sum += this.sums[i];
|
|
174
|
+
sumOfSquares += this.sumsOfSquares[i];
|
|
175
|
+
buf += this.sums[i] ** 2 / this.subSampleSizes[i];
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
const ssTot = sumOfSquares - sum ** 2 / N;
|
|
179
|
+
const ssBn = buf - sum ** 2 / N;
|
|
180
|
+
const ssWn = ssTot - ssBn;
|
|
181
|
+
|
|
182
|
+
const dfBn = K - 1;
|
|
183
|
+
const dfWn = N - K;
|
|
184
|
+
const dfTot = N - 1;
|
|
185
|
+
|
|
186
|
+
const msBn = ssBn / dfBn;
|
|
187
|
+
const msWn = ssWn / dfWn;
|
|
188
|
+
|
|
189
|
+
const fStat = msBn / msWn;
|
|
190
|
+
|
|
191
|
+
return {
|
|
192
|
+
ssBn: ssBn,
|
|
193
|
+
ssWn: ssWn,
|
|
194
|
+
ssTot: ssTot,
|
|
195
|
+
dfBn: dfBn,
|
|
196
|
+
dfWn: dfWn,
|
|
197
|
+
dfTot: dfTot,
|
|
198
|
+
msBn: msBn,
|
|
199
|
+
msWn: msWn,
|
|
200
|
+
fStat: fStat,
|
|
201
|
+
pValue: 1 - jStat.centralF.cdf(fStat, dfBn, dfWn)
|
|
202
|
+
};
|
|
203
|
+
} // getOneWayAnova
|
|
204
|
+
|
|
205
|
+
/** Compute sum & sums of squares with respect to factor levels. */
|
|
206
|
+
private setStats(categories: CatCol, values: NumCol, checkNormality: boolean = false, alpha: number = 0.05): void {
|
|
207
|
+
// TODO: provide check normality feature
|
|
208
|
+
const type = values.type;
|
|
209
|
+
const size = values.length;
|
|
210
|
+
|
|
211
|
+
switch (type) {
|
|
212
|
+
case DG.COLUMN_TYPE.INT:
|
|
213
|
+
case DG.COLUMN_TYPE.FLOAT:
|
|
214
|
+
this.categories = categories.categories;
|
|
215
|
+
const catCount = this.categories.length;
|
|
216
|
+
this.catCount = catCount;
|
|
217
|
+
this.size = size;
|
|
218
|
+
|
|
219
|
+
const vals = values.getRawData();
|
|
220
|
+
const cats = categories.getRawData();
|
|
221
|
+
|
|
222
|
+
const sums = new Float64Array(catCount).fill(0);
|
|
223
|
+
const sumsOfSquares = new Float64Array(catCount).fill(0);
|
|
224
|
+
const subSampleSizes = new Int32Array(catCount).fill(0);
|
|
225
|
+
|
|
226
|
+
for (let i = 0; i < size; ++i) {
|
|
227
|
+
const c = cats[i];
|
|
228
|
+
sums[c] += vals[i];
|
|
229
|
+
sumsOfSquares[c] += vals[i] ** 2;
|
|
230
|
+
++subSampleSizes[c];
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
this.sums = sums;
|
|
234
|
+
this.sumsOfSquares = sumsOfSquares;
|
|
235
|
+
this.subSampleSizes = subSampleSizes;
|
|
236
|
+
|
|
237
|
+
break;
|
|
238
|
+
|
|
239
|
+
default:
|
|
240
|
+
throw new Error(ERROR_MSG.UNSUPPORTED_COLUMN_TYPE);
|
|
241
|
+
}
|
|
242
|
+
} // setStats
|
|
243
|
+
} // FactorizedData
|
|
244
|
+
|
|
245
|
+
/** Perform one-way analysis of variances. */
|
|
246
|
+
export function oneWayAnova(categores: CatCol, values: NumCol, alpha: number = 0.05, validate: boolean = false): DG.DataFrame {
|
|
247
|
+
checkSignificanceLevel(alpha);
|
|
248
|
+
|
|
249
|
+
const factorized = new FactorizedData(categores, values, validate, alpha);
|
|
250
|
+
|
|
251
|
+
if (validate) {
|
|
252
|
+
if(!factorized.areVarsEqual(alpha))
|
|
253
|
+
throw new Error(ERROR_MSG.NON_EQUAL_VARIANCES);
|
|
254
|
+
|
|
255
|
+
if (!factorized.isNormal())
|
|
256
|
+
throw new Error(ERROR_MSG.NON_NORMAL_DISTRIB);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
const anova = factorized.getOneWayAnova();
|
|
260
|
+
const fCrit = jStat.centralF.inv(1 - alpha, anova.dfBn, anova.dfWn);
|
|
261
|
+
|
|
262
|
+
const hypothesis = `THE NULL HYPOTHESIS: the "${categores.name}" factor does not produce a significant difference in the "${values.name}" feature.`;
|
|
263
|
+
const testResult = `Test result: ${(anova.fStat > fCrit) ? 'REJECTED.' : 'FAILED TO REJECT.'}`;
|
|
264
|
+
|
|
265
|
+
return getOneWayAnovaDF(anova, alpha, fCrit, hypothesis, testResult);
|
|
266
|
+
} // oneWayAnova
|
package/src/utils.ts
CHANGED
|
@@ -14,7 +14,7 @@ const MAX_ELEMENTS_COUNT = 100000000;
|
|
|
14
14
|
|
|
15
15
|
// Error messages
|
|
16
16
|
const COMP_POSITVE_MES = 'components must be positive.';
|
|
17
|
-
const COMP_EXCESS = 'components must not be greater than
|
|
17
|
+
const COMP_EXCESS = 'components must not be greater than features count.';
|
|
18
18
|
const INCORERRECT_MIN_MAX_MES = 'min must be less than max.';
|
|
19
19
|
const INCORERRECT_FEATURES_MES = 'features must be positive.';
|
|
20
20
|
const INCORERRECT_SAMPLES_MES = 'samples must be positive.';
|