@datagrok/eda 1.2.1 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,87 @@
1
+ // Tests for ANOVA
2
+
3
+ import * as grok from 'datagrok-api/grok';
4
+ import * as ui from 'datagrok-api/ui';
5
+ import * as DG from 'datagrok-api/dg';
6
+ import {_package} from '../package-test';
7
+
8
+ import {category, expect, test} from '@datagrok-libraries/utils/src/test';
9
+
10
+ import {oneWayAnova, FactorizedData} from '../anova/anova-tools';
11
+
12
+ const ROWS_M = 1;
13
+ const M = 1000000;
14
+ const TIMEOUT = 4000;
15
+ const ALPHA = 0.05;
16
+ const CATEGORIES = 'race';
17
+ const FEATURES = 'height';
18
+ const TO_VALIDATE = false;
19
+ const ERR = 0.01;
20
+
21
+ /** Validation features*/
22
+ const FEATURES_COL = DG.Column.fromList(DG.COLUMN_TYPE.INT, 'features', [
23
+ 9, 12, 4, 8, 7, 4, 6, 8, 2, 10, 1, 3, 4, 5, 2,
24
+ ]);
25
+
26
+ /** Validation categories */
27
+ const CATEGORIES_COL = DG.Column.fromStrings('features', [
28
+ 'A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B', 'C', 'C', 'C', 'C', 'C',
29
+ ]);
30
+
31
+ /** Expected ANOVA results for the validation data */
32
+ enum EXPECTED {
33
+ DF_BN = 2,
34
+ DF_TOT = 14,
35
+ DF_WN = 12,
36
+ SS_BN = 63.333,
37
+ SS_TOT = 147.333,
38
+ SS_WN = 84,
39
+ MS_BN = 31.666,
40
+ MS_WN = 7,
41
+ F_STAT = 4.523,
42
+ F_CRIT = 3.885,
43
+ P_VAL = 0.034,
44
+ };
45
+
46
+ category('ANOVA', () => {
47
+ test(`Performance: ${ROWS_M}M rows demog`, async () => {
48
+ const df = grok.data.demo.demog(ROWS_M * M);
49
+ const categories = df.col(CATEGORIES);
50
+ const features = df.col(FEATURES);
51
+
52
+ const factorized = new FactorizedData(categories!, features!, categories!.stats.uniqueCount);
53
+ factorized.areVarsEqual(ALPHA);
54
+
55
+ oneWayAnova(categories!, features!, ALPHA, TO_VALIDATE);
56
+ }, {timeout: TIMEOUT, benchmark: true});
57
+
58
+ test(`Correctness`, async () => {
59
+ const analysis = oneWayAnova(CATEGORIES_COL, FEATURES_COL, ALPHA, TO_VALIDATE);
60
+ const anova = analysis.anovaTable;
61
+
62
+ // check degrees of freedom (df-s)
63
+ expect(anova.dfBn, EXPECTED.DF_BN, 'Incorrect degrees of freedom: dfBn');
64
+ expect(anova.dfTot, EXPECTED.DF_TOT, 'Incorrect degrees of freedom: dfTot');
65
+ expect(anova.dfWn, EXPECTED.DF_WN, 'Incorrect degrees of freedom: dfWn');
66
+
67
+ const eq = (x: number, y: number) => Math.abs(x - y) < ERR;
68
+
69
+ // check sum of squares (ss-s)
70
+ expect(eq(anova.ssBn, EXPECTED.SS_BN), true, 'Incorrect sum of squares: ssBn');
71
+ expect(eq(anova.ssTot, EXPECTED.SS_TOT), true, 'Incorrect sum of squares: ssTot');
72
+ expect(eq(anova.ssWn, EXPECTED.SS_WN), true, 'Incorrect sum of squares: ssWn');
73
+
74
+ // check mean squares (ms-s)
75
+ expect(eq(anova.msBn, EXPECTED.MS_BN), true, 'Incorrect mean squares: msBn');
76
+ expect(eq(anova.msWn, EXPECTED.MS_WN), true, 'Incorrect mean squares: msWn');
77
+
78
+ // check F-statistics
79
+ expect(eq(anova.fStat, EXPECTED.F_STAT), true, 'Incorrect F-statistics value');
80
+
81
+ // check p-value
82
+ expect(eq(anova.pValue, EXPECTED.P_VAL), true, 'Incorrect p-value');
83
+
84
+ // check F-critical
85
+ expect(eq(analysis.fCritical, EXPECTED.F_CRIT), true, 'Incorrect F-critical');
86
+ }, {timeout: TIMEOUT, benchmark: true});
87
+ });