@datagrok/eda 1.2.1 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/CHANGELOG.md +14 -4
  2. package/dist/111.js +1 -1
  3. package/dist/111.js.map +1 -1
  4. package/dist/153.js +1 -1
  5. package/dist/153.js.map +1 -1
  6. package/dist/234.js +1 -1
  7. package/dist/234.js.map +1 -1
  8. package/dist/260.js +1 -1
  9. package/dist/260.js.map +1 -1
  10. package/dist/348.js +1 -1
  11. package/dist/348.js.map +1 -1
  12. package/dist/377.js +1 -1
  13. package/dist/377.js.map +1 -1
  14. package/dist/412.js +1 -1
  15. package/dist/412.js.map +1 -1
  16. package/dist/531.js +1 -1
  17. package/dist/531.js.map +1 -1
  18. package/dist/583.js +1 -1
  19. package/dist/583.js.map +1 -1
  20. package/dist/603.js +1 -1
  21. package/dist/603.js.map +1 -1
  22. package/dist/656.js +1 -1
  23. package/dist/656.js.map +1 -1
  24. package/dist/682.js +1 -1
  25. package/dist/682.js.map +1 -1
  26. package/dist/705.js +1 -1
  27. package/dist/705.js.map +1 -1
  28. package/dist/727.js +1 -1
  29. package/dist/727.js.map +1 -1
  30. package/dist/763.js +1 -1
  31. package/dist/763.js.map +1 -1
  32. package/dist/778.js +1 -1
  33. package/dist/778.js.map +1 -1
  34. package/dist/783.js +1 -1
  35. package/dist/783.js.map +1 -1
  36. package/dist/793.js +1 -1
  37. package/dist/793.js.map +1 -1
  38. package/dist/91.js +1 -1
  39. package/dist/91.js.map +1 -1
  40. package/dist/950.js +1 -1
  41. package/dist/950.js.map +1 -1
  42. package/dist/980.js +1 -1
  43. package/dist/980.js.map +1 -1
  44. package/dist/990.js +1 -1
  45. package/dist/990.js.map +1 -1
  46. package/dist/package-test.js +1 -1
  47. package/dist/package-test.js.map +1 -1
  48. package/dist/package.js +1 -1
  49. package/dist/package.js.map +1 -1
  50. package/package.json +11 -10
  51. package/src/anova/anova-tools.ts +308 -0
  52. package/src/anova/anova-ui.ts +258 -0
  53. package/src/eda-ui.ts +0 -9
  54. package/src/global.d.ts +13 -0
  55. package/src/missing-values-imputation/ui-constants.ts +2 -0
  56. package/src/missing-values-imputation/ui.ts +7 -7
  57. package/src/package-test.ts +7 -1
  58. package/src/package.ts +6 -12
  59. package/src/tests/anova-tests.ts +87 -0
  60. package/src/tests/linear-methods-tests.ts +1 -1
@@ -1,9 +1,10 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
- import {runTests, tests, TestContext} from '@datagrok-libraries/utils/src/test';
2
+ import {runTests, tests, TestContext, initAutoTests as initTests} from '@datagrok-libraries/utils/src/test';
3
3
  import './tests/dim-reduction-tests';
4
4
  import './tests/linear-methods-tests';
5
5
  import './tests/classifiers-tests';
6
6
  import './tests/mis-vals-imputation-tests';
7
+ import './tests/anova-tests';
7
8
  export const _package = new DG.Package();
8
9
  export {tests};
9
10
 
@@ -16,3 +17,8 @@ export async function test(category: string, test: string, testContext: TestCont
16
17
  const data = await runTests({category, test, testContext});
17
18
  return DG.DataFrame.fromObjects(data)!;
18
19
  }
20
+
21
+ //name: initAutoTests
22
+ export async function initAutoTests() {
23
+ await initTests(_package, _package.getModule('package-test.js'));
24
+ }
package/src/package.ts CHANGED
@@ -7,14 +7,14 @@ import * as DG from 'datagrok-api/dg';
7
7
 
8
8
  import {_initEDAAPI} from '../wasm/EDAAPI';
9
9
  import {computePCA} from './eda-tools';
10
- import {addPrefixToEachColumnName, addOneWayAnovaVizualization} from './eda-ui';
10
+ import {addPrefixToEachColumnName} from './eda-ui';
11
11
  import {LINEAR, RBF, POLYNOMIAL, SIGMOID,
12
12
  getTrainedModel, getPrediction, isApplicableSVM, isInteractiveSVM, showTrainReport, getPackedModel} from './svm';
13
13
 
14
14
  import {PLS_ANALYSIS} from './pls/pls-constants';
15
15
  import {runMVA, runDemoMVA, getPlsAnalysis, PlsOutput} from './pls/pls-tools';
16
+ import {runOneWayAnova} from './anova/anova-ui';
16
17
 
17
- import {oneWayAnova} from './stat-tools';
18
18
  import {getDbscanWorker} from '@datagrok-libraries/math';
19
19
 
20
20
  import {DistanceAggregationMethod, DistanceAggregationMethods} from '@datagrok-libraries/ml/src/distance-matrix/types';
@@ -304,7 +304,7 @@ export async function MVA(): Promise<void> {
304
304
 
305
305
  //name: MVA demo
306
306
  //description: Multidimensional data analysis using partial least squares (PLS) regression. It identifies latent factors and constructs a linear model based on them.
307
- //meta.demoPath: Compute | Multivariate analysis
307
+ //meta.demoPath: Compute | Multivariate Analysis
308
308
  export async function demoMultivariateAnalysis(): Promise<any> {
309
309
  await runDemoMVA();
310
310
  }
@@ -547,15 +547,9 @@ export async function visualizeSigmoidKernelSVM(df: DG.DataFrame, targetColumn:
547
547
 
548
548
  //top-menu: ML | Analyze | ANOVA...
549
549
  //name: ANOVA
550
- //description: One-way analysis of variances (ANOVA) determines whether the examined factor has a significant impact on the studied feature.
551
- //input: dataframe table
552
- //input: column factor {type: categorical}
553
- //input: column feature {type: numerical}
554
- //input: double significance = 0.05 [The significance level is a value from the interval (0, 1) specifying the criterion used for rejecting the null hypothesis.]
555
- //input: bool validate = false [Indicates whether the normality of distribution and an eqaulity of varainces should be checked.]
556
- export function anova(table: DG.DataFrame, factor: DG.Column, feature: DG.Column, significance: number, validate: boolean) {
557
- const res = oneWayAnova(factor, feature, significance, validate);
558
- addOneWayAnovaVizualization(table, factor, feature, res);
550
+ //description: One-way analysis of variances (ANOVA) determines whether the examined factor has a significant impact on the explored feature.
551
+ export function anova(): void {
552
+ runOneWayAnova();
559
553
  }
560
554
 
561
555
  //top-menu: ML | Missing Values Imputation ...
@@ -0,0 +1,87 @@
1
+ // Tests for ANOVA
2
+
3
+ import * as grok from 'datagrok-api/grok';
4
+ import * as ui from 'datagrok-api/ui';
5
+ import * as DG from 'datagrok-api/dg';
6
+ import {_package} from '../package-test';
7
+
8
+ import {category, expect, test} from '@datagrok-libraries/utils/src/test';
9
+
10
+ import {oneWayAnova, FactorizedData} from '../anova/anova-tools';
11
+
12
+ const ROWS_M = 1;
13
+ const M = 1000000;
14
+ const TIMEOUT = 4000;
15
+ const ALPHA = 0.05;
16
+ const CATEGORIES = 'race';
17
+ const FEATURES = 'height';
18
+ const TO_VALIDATE = false;
19
+ const ERR = 0.01;
20
+
21
+ /** Validation features*/
22
+ const FEATURES_COL = DG.Column.fromList(DG.COLUMN_TYPE.INT, 'features', [
23
+ 9, 12, 4, 8, 7, 4, 6, 8, 2, 10, 1, 3, 4, 5, 2,
24
+ ]);
25
+
26
+ /** Validation categories */
27
+ const CATEGORIES_COL = DG.Column.fromStrings('features', [
28
+ 'A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B', 'C', 'C', 'C', 'C', 'C',
29
+ ]);
30
+
31
+ /** Expected ANOVA results for the validation data */
32
+ enum EXPECTED {
33
+ DF_BN = 2,
34
+ DF_TOT = 14,
35
+ DF_WN = 12,
36
+ SS_BN = 63.333,
37
+ SS_TOT = 147.333,
38
+ SS_WN = 84,
39
+ MS_BN = 31.666,
40
+ MS_WN = 7,
41
+ F_STAT = 4.523,
42
+ F_CRIT = 3.885,
43
+ P_VAL = 0.034,
44
+ };
45
+
46
+ category('ANOVA', () => {
47
+ test(`Performance: ${ROWS_M}M rows demog`, async () => {
48
+ const df = grok.data.demo.demog(ROWS_M * M);
49
+ const categories = df.col(CATEGORIES);
50
+ const features = df.col(FEATURES);
51
+
52
+ const factorized = new FactorizedData(categories!, features!, categories!.stats.uniqueCount);
53
+ factorized.areVarsEqual(ALPHA);
54
+
55
+ oneWayAnova(categories!, features!, ALPHA, TO_VALIDATE);
56
+ }, {timeout: TIMEOUT, benchmark: true});
57
+
58
+ test(`Correctness`, async () => {
59
+ const analysis = oneWayAnova(CATEGORIES_COL, FEATURES_COL, ALPHA, TO_VALIDATE);
60
+ const anova = analysis.anovaTable;
61
+
62
+ // check degrees of freedom (df-s)
63
+ expect(anova.dfBn, EXPECTED.DF_BN, 'Incorrect degrees of freedom: dfBn');
64
+ expect(anova.dfTot, EXPECTED.DF_TOT, 'Incorrect degrees of freedom: dfTot');
65
+ expect(anova.dfWn, EXPECTED.DF_WN, 'Incorrect degrees of freedom: dfWn');
66
+
67
+ const eq = (x: number, y: number) => Math.abs(x - y) < ERR;
68
+
69
+ // check sum of squares (ss-s)
70
+ expect(eq(anova.ssBn, EXPECTED.SS_BN), true, 'Incorrect sum of squares: ssBn');
71
+ expect(eq(anova.ssTot, EXPECTED.SS_TOT), true, 'Incorrect sum of squares: ssTot');
72
+ expect(eq(anova.ssWn, EXPECTED.SS_WN), true, 'Incorrect sum of squares: ssWn');
73
+
74
+ // check mean squares (ms-s)
75
+ expect(eq(anova.msBn, EXPECTED.MS_BN), true, 'Incorrect mean squares: msBn');
76
+ expect(eq(anova.msWn, EXPECTED.MS_WN), true, 'Incorrect mean squares: msWn');
77
+
78
+ // check F-statistics
79
+ expect(eq(anova.fStat, EXPECTED.F_STAT), true, 'Incorrect F-statistics value');
80
+
81
+ // check p-value
82
+ expect(eq(anova.pValue, EXPECTED.P_VAL), true, 'Incorrect p-value');
83
+
84
+ // check F-critical
85
+ expect(eq(analysis.fCritical, EXPECTED.F_CRIT), true, 'Incorrect F-critical');
86
+ }, {timeout: TIMEOUT, benchmark: true});
87
+ });
@@ -16,7 +16,7 @@ const ROWS = 100;
16
16
  const ROWS_K = 100;
17
17
  const COLS = 100;
18
18
  const COMPONENTS = 3;
19
- const TIMEOUT = 4000;
19
+ const TIMEOUT = 8000;
20
20
  const INDEP_COLS = 2;
21
21
  const DEP_COLS = 5;
22
22
  const ERROR = 0.1;