@datagrok/eda 1.2.1 → 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -4
- package/dist/111.js +1 -1
- package/dist/111.js.map +1 -1
- package/dist/153.js +1 -1
- package/dist/153.js.map +1 -1
- package/dist/234.js +1 -1
- package/dist/234.js.map +1 -1
- package/dist/260.js +1 -1
- package/dist/260.js.map +1 -1
- package/dist/348.js +1 -1
- package/dist/348.js.map +1 -1
- package/dist/377.js +1 -1
- package/dist/377.js.map +1 -1
- package/dist/412.js +1 -1
- package/dist/412.js.map +1 -1
- package/dist/531.js +1 -1
- package/dist/531.js.map +1 -1
- package/dist/583.js +1 -1
- package/dist/583.js.map +1 -1
- package/dist/603.js +1 -1
- package/dist/603.js.map +1 -1
- package/dist/656.js +1 -1
- package/dist/656.js.map +1 -1
- package/dist/682.js +1 -1
- package/dist/682.js.map +1 -1
- package/dist/705.js +1 -1
- package/dist/705.js.map +1 -1
- package/dist/727.js +1 -1
- package/dist/727.js.map +1 -1
- package/dist/763.js +1 -1
- package/dist/763.js.map +1 -1
- package/dist/778.js +1 -1
- package/dist/778.js.map +1 -1
- package/dist/783.js +1 -1
- package/dist/783.js.map +1 -1
- package/dist/793.js +1 -1
- package/dist/793.js.map +1 -1
- package/dist/91.js +1 -1
- package/dist/91.js.map +1 -1
- package/dist/950.js +1 -1
- package/dist/950.js.map +1 -1
- package/dist/980.js +1 -1
- package/dist/980.js.map +1 -1
- package/dist/990.js +1 -1
- package/dist/990.js.map +1 -1
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +11 -10
- package/src/anova/anova-tools.ts +308 -0
- package/src/anova/anova-ui.ts +258 -0
- package/src/eda-ui.ts +0 -9
- package/src/global.d.ts +13 -0
- package/src/missing-values-imputation/ui-constants.ts +2 -0
- package/src/missing-values-imputation/ui.ts +7 -7
- package/src/package-test.ts +7 -1
- package/src/package.ts +6 -12
- package/src/tests/anova-tests.ts +87 -0
- package/src/tests/linear-methods-tests.ts +1 -1
package/src/package-test.ts
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
|
-
import {runTests, tests, TestContext} from '@datagrok-libraries/utils/src/test';
|
|
2
|
+
import {runTests, tests, TestContext, initAutoTests as initTests} from '@datagrok-libraries/utils/src/test';
|
|
3
3
|
import './tests/dim-reduction-tests';
|
|
4
4
|
import './tests/linear-methods-tests';
|
|
5
5
|
import './tests/classifiers-tests';
|
|
6
6
|
import './tests/mis-vals-imputation-tests';
|
|
7
|
+
import './tests/anova-tests';
|
|
7
8
|
export const _package = new DG.Package();
|
|
8
9
|
export {tests};
|
|
9
10
|
|
|
@@ -16,3 +17,8 @@ export async function test(category: string, test: string, testContext: TestCont
|
|
|
16
17
|
const data = await runTests({category, test, testContext});
|
|
17
18
|
return DG.DataFrame.fromObjects(data)!;
|
|
18
19
|
}
|
|
20
|
+
|
|
21
|
+
//name: initAutoTests
|
|
22
|
+
export async function initAutoTests() {
|
|
23
|
+
await initTests(_package, _package.getModule('package-test.js'));
|
|
24
|
+
}
|
package/src/package.ts
CHANGED
|
@@ -7,14 +7,14 @@ import * as DG from 'datagrok-api/dg';
|
|
|
7
7
|
|
|
8
8
|
import {_initEDAAPI} from '../wasm/EDAAPI';
|
|
9
9
|
import {computePCA} from './eda-tools';
|
|
10
|
-
import {addPrefixToEachColumnName
|
|
10
|
+
import {addPrefixToEachColumnName} from './eda-ui';
|
|
11
11
|
import {LINEAR, RBF, POLYNOMIAL, SIGMOID,
|
|
12
12
|
getTrainedModel, getPrediction, isApplicableSVM, isInteractiveSVM, showTrainReport, getPackedModel} from './svm';
|
|
13
13
|
|
|
14
14
|
import {PLS_ANALYSIS} from './pls/pls-constants';
|
|
15
15
|
import {runMVA, runDemoMVA, getPlsAnalysis, PlsOutput} from './pls/pls-tools';
|
|
16
|
+
import {runOneWayAnova} from './anova/anova-ui';
|
|
16
17
|
|
|
17
|
-
import {oneWayAnova} from './stat-tools';
|
|
18
18
|
import {getDbscanWorker} from '@datagrok-libraries/math';
|
|
19
19
|
|
|
20
20
|
import {DistanceAggregationMethod, DistanceAggregationMethods} from '@datagrok-libraries/ml/src/distance-matrix/types';
|
|
@@ -304,7 +304,7 @@ export async function MVA(): Promise<void> {
|
|
|
304
304
|
|
|
305
305
|
//name: MVA demo
|
|
306
306
|
//description: Multidimensional data analysis using partial least squares (PLS) regression. It identifies latent factors and constructs a linear model based on them.
|
|
307
|
-
//meta.demoPath: Compute | Multivariate
|
|
307
|
+
//meta.demoPath: Compute | Multivariate Analysis
|
|
308
308
|
export async function demoMultivariateAnalysis(): Promise<any> {
|
|
309
309
|
await runDemoMVA();
|
|
310
310
|
}
|
|
@@ -547,15 +547,9 @@ export async function visualizeSigmoidKernelSVM(df: DG.DataFrame, targetColumn:
|
|
|
547
547
|
|
|
548
548
|
//top-menu: ML | Analyze | ANOVA...
|
|
549
549
|
//name: ANOVA
|
|
550
|
-
//description: One-way analysis of variances (ANOVA) determines whether the examined factor has a significant impact on the
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
//input: column feature {type: numerical}
|
|
554
|
-
//input: double significance = 0.05 [The significance level is a value from the interval (0, 1) specifying the criterion used for rejecting the null hypothesis.]
|
|
555
|
-
//input: bool validate = false [Indicates whether the normality of distribution and an eqaulity of varainces should be checked.]
|
|
556
|
-
export function anova(table: DG.DataFrame, factor: DG.Column, feature: DG.Column, significance: number, validate: boolean) {
|
|
557
|
-
const res = oneWayAnova(factor, feature, significance, validate);
|
|
558
|
-
addOneWayAnovaVizualization(table, factor, feature, res);
|
|
550
|
+
//description: One-way analysis of variances (ANOVA) determines whether the examined factor has a significant impact on the explored feature.
|
|
551
|
+
export function anova(): void {
|
|
552
|
+
runOneWayAnova();
|
|
559
553
|
}
|
|
560
554
|
|
|
561
555
|
//top-menu: ML | Missing Values Imputation ...
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
// Tests for ANOVA
|
|
2
|
+
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
import * as ui from 'datagrok-api/ui';
|
|
5
|
+
import * as DG from 'datagrok-api/dg';
|
|
6
|
+
import {_package} from '../package-test';
|
|
7
|
+
|
|
8
|
+
import {category, expect, test} from '@datagrok-libraries/utils/src/test';
|
|
9
|
+
|
|
10
|
+
import {oneWayAnova, FactorizedData} from '../anova/anova-tools';
|
|
11
|
+
|
|
12
|
+
const ROWS_M = 1;
|
|
13
|
+
const M = 1000000;
|
|
14
|
+
const TIMEOUT = 4000;
|
|
15
|
+
const ALPHA = 0.05;
|
|
16
|
+
const CATEGORIES = 'race';
|
|
17
|
+
const FEATURES = 'height';
|
|
18
|
+
const TO_VALIDATE = false;
|
|
19
|
+
const ERR = 0.01;
|
|
20
|
+
|
|
21
|
+
/** Validation features*/
|
|
22
|
+
const FEATURES_COL = DG.Column.fromList(DG.COLUMN_TYPE.INT, 'features', [
|
|
23
|
+
9, 12, 4, 8, 7, 4, 6, 8, 2, 10, 1, 3, 4, 5, 2,
|
|
24
|
+
]);
|
|
25
|
+
|
|
26
|
+
/** Validation categories */
|
|
27
|
+
const CATEGORIES_COL = DG.Column.fromStrings('features', [
|
|
28
|
+
'A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B', 'C', 'C', 'C', 'C', 'C',
|
|
29
|
+
]);
|
|
30
|
+
|
|
31
|
+
/** Expected ANOVA results for the validation data */
|
|
32
|
+
enum EXPECTED {
|
|
33
|
+
DF_BN = 2,
|
|
34
|
+
DF_TOT = 14,
|
|
35
|
+
DF_WN = 12,
|
|
36
|
+
SS_BN = 63.333,
|
|
37
|
+
SS_TOT = 147.333,
|
|
38
|
+
SS_WN = 84,
|
|
39
|
+
MS_BN = 31.666,
|
|
40
|
+
MS_WN = 7,
|
|
41
|
+
F_STAT = 4.523,
|
|
42
|
+
F_CRIT = 3.885,
|
|
43
|
+
P_VAL = 0.034,
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
category('ANOVA', () => {
|
|
47
|
+
test(`Performance: ${ROWS_M}M rows demog`, async () => {
|
|
48
|
+
const df = grok.data.demo.demog(ROWS_M * M);
|
|
49
|
+
const categories = df.col(CATEGORIES);
|
|
50
|
+
const features = df.col(FEATURES);
|
|
51
|
+
|
|
52
|
+
const factorized = new FactorizedData(categories!, features!, categories!.stats.uniqueCount);
|
|
53
|
+
factorized.areVarsEqual(ALPHA);
|
|
54
|
+
|
|
55
|
+
oneWayAnova(categories!, features!, ALPHA, TO_VALIDATE);
|
|
56
|
+
}, {timeout: TIMEOUT, benchmark: true});
|
|
57
|
+
|
|
58
|
+
test(`Correctness`, async () => {
|
|
59
|
+
const analysis = oneWayAnova(CATEGORIES_COL, FEATURES_COL, ALPHA, TO_VALIDATE);
|
|
60
|
+
const anova = analysis.anovaTable;
|
|
61
|
+
|
|
62
|
+
// check degrees of freedom (df-s)
|
|
63
|
+
expect(anova.dfBn, EXPECTED.DF_BN, 'Incorrect degrees of freedom: dfBn');
|
|
64
|
+
expect(anova.dfTot, EXPECTED.DF_TOT, 'Incorrect degrees of freedom: dfTot');
|
|
65
|
+
expect(anova.dfWn, EXPECTED.DF_WN, 'Incorrect degrees of freedom: dfWn');
|
|
66
|
+
|
|
67
|
+
const eq = (x: number, y: number) => Math.abs(x - y) < ERR;
|
|
68
|
+
|
|
69
|
+
// check sum of squares (ss-s)
|
|
70
|
+
expect(eq(anova.ssBn, EXPECTED.SS_BN), true, 'Incorrect sum of squares: ssBn');
|
|
71
|
+
expect(eq(anova.ssTot, EXPECTED.SS_TOT), true, 'Incorrect sum of squares: ssTot');
|
|
72
|
+
expect(eq(anova.ssWn, EXPECTED.SS_WN), true, 'Incorrect sum of squares: ssWn');
|
|
73
|
+
|
|
74
|
+
// check mean squares (ms-s)
|
|
75
|
+
expect(eq(anova.msBn, EXPECTED.MS_BN), true, 'Incorrect mean squares: msBn');
|
|
76
|
+
expect(eq(anova.msWn, EXPECTED.MS_WN), true, 'Incorrect mean squares: msWn');
|
|
77
|
+
|
|
78
|
+
// check F-statistics
|
|
79
|
+
expect(eq(anova.fStat, EXPECTED.F_STAT), true, 'Incorrect F-statistics value');
|
|
80
|
+
|
|
81
|
+
// check p-value
|
|
82
|
+
expect(eq(anova.pValue, EXPECTED.P_VAL), true, 'Incorrect p-value');
|
|
83
|
+
|
|
84
|
+
// check F-critical
|
|
85
|
+
expect(eq(analysis.fCritical, EXPECTED.F_CRIT), true, 'Incorrect F-critical');
|
|
86
|
+
}, {timeout: TIMEOUT, benchmark: true});
|
|
87
|
+
});
|