@datagrok/eda 1.2.1 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/CHANGELOG.md +14 -4
  2. package/dist/111.js +1 -1
  3. package/dist/111.js.map +1 -1
  4. package/dist/153.js +1 -1
  5. package/dist/153.js.map +1 -1
  6. package/dist/234.js +1 -1
  7. package/dist/234.js.map +1 -1
  8. package/dist/260.js +1 -1
  9. package/dist/260.js.map +1 -1
  10. package/dist/348.js +1 -1
  11. package/dist/348.js.map +1 -1
  12. package/dist/377.js +1 -1
  13. package/dist/377.js.map +1 -1
  14. package/dist/412.js +1 -1
  15. package/dist/412.js.map +1 -1
  16. package/dist/531.js +1 -1
  17. package/dist/531.js.map +1 -1
  18. package/dist/583.js +1 -1
  19. package/dist/583.js.map +1 -1
  20. package/dist/603.js +1 -1
  21. package/dist/603.js.map +1 -1
  22. package/dist/656.js +1 -1
  23. package/dist/656.js.map +1 -1
  24. package/dist/682.js +1 -1
  25. package/dist/682.js.map +1 -1
  26. package/dist/705.js +1 -1
  27. package/dist/705.js.map +1 -1
  28. package/dist/727.js +1 -1
  29. package/dist/727.js.map +1 -1
  30. package/dist/763.js +1 -1
  31. package/dist/763.js.map +1 -1
  32. package/dist/778.js +1 -1
  33. package/dist/778.js.map +1 -1
  34. package/dist/783.js +1 -1
  35. package/dist/783.js.map +1 -1
  36. package/dist/793.js +1 -1
  37. package/dist/793.js.map +1 -1
  38. package/dist/91.js +1 -1
  39. package/dist/91.js.map +1 -1
  40. package/dist/950.js +1 -1
  41. package/dist/950.js.map +1 -1
  42. package/dist/980.js +1 -1
  43. package/dist/980.js.map +1 -1
  44. package/dist/990.js +1 -1
  45. package/dist/990.js.map +1 -1
  46. package/dist/package-test.js +1 -1
  47. package/dist/package-test.js.map +1 -1
  48. package/dist/package.js +1 -1
  49. package/dist/package.js.map +1 -1
  50. package/package.json +11 -10
  51. package/src/anova/anova-tools.ts +308 -0
  52. package/src/anova/anova-ui.ts +258 -0
  53. package/src/eda-ui.ts +0 -9
  54. package/src/global.d.ts +13 -0
  55. package/src/missing-values-imputation/ui-constants.ts +2 -0
  56. package/src/missing-values-imputation/ui.ts +7 -7
  57. package/src/package-test.ts +7 -1
  58. package/src/package.ts +6 -12
  59. package/src/tests/anova-tests.ts +87 -0
  60. package/src/tests/linear-methods-tests.ts +1 -1
package/package.json CHANGED
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "@datagrok/eda",
3
3
  "friendlyName": "EDA",
4
- "version": "1.2.1",
4
+ "version": "1.2.3",
5
5
  "description": "Exploratory Data Analysis Tools",
6
6
  "dependencies": {
7
7
  "@datagrok-libraries/math": "^1.2.0",
8
8
  "@datagrok-libraries/ml": "^6.7.0",
9
- "@datagrok-libraries/tutorials": "^1.4.0",
9
+ "@datagrok-libraries/tutorials": "^1.4.2",
10
10
  "@datagrok-libraries/utils": "^4.3.0",
11
11
  "@keckelt/tsne": "^1.0.2",
12
12
  "@webgpu/types": "^0.1.40",
@@ -16,7 +16,7 @@
16
16
  "jstat": "^1.9.6",
17
17
  "source-map-loader": "^4.0.1",
18
18
  "umap-js": "^1.3.3",
19
- "worker-loader": "latest"
19
+ "worker-loader": "^3.0.8"
20
20
  },
21
21
  "author": {
22
22
  "name": "Viktor Makarichev",
@@ -25,14 +25,14 @@
25
25
  "devDependencies": {
26
26
  "@typescript-eslint/eslint-plugin": "^5.32.0",
27
27
  "@typescript-eslint/parser": "^5.32.0",
28
- "css-loader": "latest",
28
+ "css-loader": "^7.1.2",
29
29
  "eslint": "^8.21.0",
30
30
  "eslint-config-google": "^0.14.0",
31
- "style-loader": "latest",
32
- "ts-loader": "latest",
33
- "typescript": "latest",
34
- "webpack": "latest",
35
- "webpack-cli": "latest"
31
+ "style-loader": "^4.0.0",
32
+ "ts-loader": "^9.5.1",
33
+ "typescript": "^5.6.3",
34
+ "webpack": "^5.95.0",
35
+ "webpack-cli": "^5.1.4"
36
36
  },
37
37
  "scripts": {
38
38
  "link-all": "npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/tutorials",
@@ -40,6 +40,7 @@
40
40
  "release-eda": "webpack && grok publish --release",
41
41
  "build-eda": "webpack",
42
42
  "build": "webpack",
43
+ "test": "grok test",
43
44
  "debug-eda-dev": "webpack && grok publish dev",
44
45
  "release-eda-dev": "webpack && grok publish dev --release",
45
46
  "debug-eda-local": "webpack && grok publish local",
@@ -92,4 +93,4 @@
92
93
  }
93
94
  }
94
95
  }
95
- }
96
+ }
@@ -0,0 +1,308 @@
1
+ // Analysis of Variances (ANOVA): computations
2
+
3
+ /* REFERENCES
4
+
5
+ [1] One-way analysis of variance, https://en.wikipedia.org/wiki/One-way_analysis_of_variance
6
+
7
+ [2] G.W. Heiman. Basic Statistics for the Behavioral Sciences, 6th ed. Wadsworth Publishing, 2010
8
+
9
+ [3] F-test of equality of variances, https://en.wikipedia.org/wiki/F-test_of_equality_of_variances
10
+
11
+ [4] S. McKillup. Statistics Explained, Cambridge University Press, 2005
12
+
13
+ */
14
+
15
+ import * as grok from 'datagrok-api/grok';
16
+ import * as ui from 'datagrok-api/ui';
17
+ import * as DG from 'datagrok-api/dg';
18
+
19
+ //@ts-ignore: no types
20
+ import * as jStat from 'jstat';
21
+
22
+ import {getNullValue} from '../missing-values-imputation/knn-imputer';
23
+
24
+ enum ERROR_MSG {
25
+ NON_EQUAL_FACTORS_VALUES_SIZE = 'non-equal sizes of factor and values arrays',
26
+ INCORRECT_SIGNIFICANCE_LEVEL = 'incorrect significance level',
27
+ NON_EQUAL_VARIANCES = 'non-equal variances',
28
+ NON_NORMAL_DISTRIB = 'non-normal distribution',
29
+ UNSUPPORTED_COLUMN_TYPE = 'unsupported feature column type',
30
+ INCORRECT_CATEGORIES_COL_TYPE = 'incorrect categories column type',
31
+ SINGLE_FACTOR = 'single category features',
32
+ CATS_EQUAL_SIZE = 'single value in each category',
33
+ NO_FEATURE_VARIATION = 'no feature variation',
34
+ NO_FEATURE_VARIATION_WITHIN_GROUPS = 'no feature variation within groups',
35
+ };
36
+
37
+ type SampleData = {
38
+ sum: number,
39
+ sumOfSquares: number,
40
+ size: number,
41
+ };
42
+
43
+ /** One-way ANOVA computation results. The classic notations are used (see [2], p. 290). */
44
+ type OneWayAnova = {
45
+ /** sum of squares between groups, SSbn */
46
+ ssBn: number,
47
+ /** sum of squares within groups, SSnn */
48
+ ssWn: number,
49
+ /** total sum of squares, SStot */
50
+ ssTot: number,
51
+ /** degrees of freedom between groups, DFbn */
52
+ dfBn: number,
53
+ /** degrees of freedom within groups, DFwn */
54
+ dfWn: number,
55
+ /** total degrees of freedom, DFtot */
56
+ dfTot: number,
57
+ /** mean square between groups, MSbn */
58
+ msBn: number,
59
+ /** mean square within groups, MSwn */
60
+ msWn: number,
61
+ /** Fobt, value of F-statistics, Fstat */
62
+ fStat: number,
63
+ /** p-value corresponding to F-statistics, pValue */
64
+ pValue: number,
65
+ };
66
+
67
+ /** One-way ANOVA report */
68
+ export type OneWayAnovaReport = {
69
+ anovaTable: OneWayAnova,
70
+ fCritical: number,
71
+ significance: number,
72
+ };
73
+
74
+ /** Categorical column */
75
+ type CatCol = DG.Column<DG.COLUMN_TYPE.STRING | DG.COLUMN_TYPE.BOOL>;
76
+
77
+ /** Numerical column */
78
+ type NumCol = DG.Column<DG.COLUMN_TYPE.FLOAT> | DG.Column<DG.COLUMN_TYPE.INT>;
79
+
80
+ /** Check correctness of significance level. */
81
+ export function checkSignificanceLevel(alpha: number) {
82
+ if ((alpha <= 0) || (alpha >= 1))
83
+ throw new Error(ERROR_MSG.INCORRECT_SIGNIFICANCE_LEVEL);
84
+ }
85
+
86
+ /** Compute unbiased variance.*/
87
+ export function getVariance(data: SampleData): number {
88
+ // The applied formulas can be found in [4] (see p. 63)
89
+ const size = data.size;
90
+
91
+ if (size <= 1)
92
+ return 0;
93
+
94
+ return (data.sumOfSquares - (data.sum) ** 2 / size) / (size - 1);
95
+ } // getVariance
96
+
97
+ /** Check equality of variances of 2 samples. F-test is performed.*/
98
+ function areVarsEqual(xData: SampleData, yData: SampleData, alpha: number): boolean {
99
+ // The applied approach can be found in [3]
100
+ checkSignificanceLevel(alpha);
101
+
102
+ const xVar = getVariance(xData);
103
+ const yVar = getVariance(yData);
104
+
105
+ if ((xVar === 0) || (yVar === 0))
106
+ return (xVar === yVar);
107
+
108
+ const fStat = xVar / yVar;
109
+ const fCrit = jStat.centralF.inv(1 - alpha, xData.size - 1, yData.size - 1);
110
+
111
+ return (fStat < fCrit);
112
+ } // areVarsEqual
113
+
114
+ export class FactorizedData {
115
+ private sums!: Float64Array;
116
+ private sumsOfSquares!: Float64Array;
117
+ private subSampleSizes!: Int32Array;
118
+ private size!: number;
119
+ private catCount!: number;
120
+ private nullsCount = 0;
121
+
122
+ constructor(categories: CatCol, values: NumCol, uniqueCount: number) {
123
+ if (categories.length !== values.length)
124
+ throw new Error(ERROR_MSG.NON_EQUAL_FACTORS_VALUES_SIZE);
125
+
126
+ if (values.stats.stdev > 0)
127
+ this.setStats(categories, values, uniqueCount);
128
+ else
129
+ throw new Error(ERROR_MSG.NO_FEATURE_VARIATION);
130
+ }
131
+
132
+ /** Check equality of variances of factorized data. */
133
+ public areVarsEqual(alpha: number): boolean {
134
+ const K = this.catCount;
135
+
136
+ if (K === 1)
137
+ return true;
138
+
139
+ const first: SampleData = {sum: this.sums[0], sumOfSquares: this.sumsOfSquares[0], size: this.subSampleSizes[0]};
140
+
141
+ for (let i = 1; i < K; ++i) {
142
+ if (!areVarsEqual(first, {sum: this.sums[i], sumOfSquares: this.sumsOfSquares[i],
143
+ size: this.subSampleSizes[i]}, alpha))
144
+ return false;
145
+ }
146
+
147
+ return true;
148
+ } // areVarsEqual
149
+
150
+ /** Perform one-way ANOVA computations. */
151
+ public getOneWayAnova(): OneWayAnova {
152
+ // Further, notations and formulas from (see [2], p. 290) are used.
153
+
154
+ let sum = 0;
155
+ let sumOfSquares = 0;
156
+ let buf = 0;
157
+ let K = this.catCount;
158
+ let nonEmptyCategories = K;
159
+
160
+ for (let i = 0; i < K; ++i) {
161
+ if (this.subSampleSizes[i] !== 0) {
162
+ sum += this.sums[i];
163
+ sumOfSquares += this.sumsOfSquares[i];
164
+ buf += this.sums[i] ** 2 / this.subSampleSizes[i];
165
+ } else
166
+ --nonEmptyCategories;
167
+ }
168
+
169
+ K = nonEmptyCategories;
170
+
171
+ if (K === 1)
172
+ throw new Error(ERROR_MSG.SINGLE_FACTOR);
173
+
174
+ const N = this.size - this.nullsCount;
175
+ if (N === K)
176
+ throw new Error(ERROR_MSG.CATS_EQUAL_SIZE);
177
+
178
+ const ssTot = sumOfSquares - sum ** 2 / N;
179
+ const ssBn = buf - sum ** 2 / N;
180
+ const ssWn = ssTot - ssBn;
181
+
182
+ if (ssWn === 0)
183
+ throw new Error(ERROR_MSG.NO_FEATURE_VARIATION_WITHIN_GROUPS);
184
+
185
+ const dfBn = K - 1;
186
+ const dfWn = N - K;
187
+ const dfTot = N - 1;
188
+
189
+ const msBn = ssBn / dfBn;
190
+ const msWn = ssWn / dfWn;
191
+
192
+ const fStat = msBn / msWn;
193
+
194
+ return {
195
+ ssBn: ssBn,
196
+ ssWn: ssWn,
197
+ ssTot: ssTot,
198
+ dfBn: dfBn,
199
+ dfWn: dfWn,
200
+ dfTot: dfTot,
201
+ msBn: msBn,
202
+ msWn: msWn,
203
+ fStat: fStat,
204
+ pValue: 1 - jStat.centralF.cdf(fStat, dfBn, dfWn),
205
+ };
206
+ } // getOneWayAnova
207
+
208
+ /** Compute sum & sums of squares with respect to factor levels. */
209
+ private setStats(categories: CatCol, features: NumCol, uniqueCount: number): void {
210
+ const type = features.type;
211
+ const size = features.length;
212
+ const featuresNull = getNullValue(features);
213
+
214
+ switch (type) {
215
+ case DG.COLUMN_TYPE.INT:
216
+ case DG.COLUMN_TYPE.FLOAT:
217
+ const catCount = uniqueCount;
218
+ this.catCount = catCount;
219
+ this.size = size;
220
+
221
+ const vals = features.getRawData();
222
+ const cats = categories.getRawData();
223
+
224
+ const sums = new Float64Array(catCount).fill(0);
225
+ const sumsOfSquares = new Float64Array(catCount).fill(0);
226
+ const subSampleSizes = new Int32Array(catCount).fill(0);
227
+
228
+ let cat: number;
229
+
230
+ if (categories.type == DG.COLUMN_TYPE.BOOL) {
231
+ let catIdx = 0;
232
+ let shift = 0;
233
+ let packed = cats[0];
234
+ const MAX_SHIFT = 8 * cats.BYTES_PER_ELEMENT - 1;
235
+
236
+ for (let i = 0; i < size; ++i) {
237
+ cat = 1 & (packed >> shift);
238
+
239
+ if (vals[i] !== featuresNull) {
240
+ sums[cat] += vals[i];
241
+ sumsOfSquares[cat] += vals[i] ** 2;
242
+ ++subSampleSizes[cat];
243
+ } else
244
+ ++this.nullsCount;
245
+
246
+
247
+ ++shift;
248
+
249
+ if (shift > MAX_SHIFT) {
250
+ shift = 0;
251
+ ++catIdx;
252
+ packed = cats[catIdx];
253
+ }
254
+ }
255
+ } else {
256
+ const categoriesNull = categories.stats.missingValueCount > 0 ? getNullValue(categories) : -1;
257
+
258
+ for (let i = 0; i < size; ++i) {
259
+ cat = cats[i];
260
+
261
+ if ((cat === categoriesNull) || (vals[i] === featuresNull)) {
262
+ ++this.nullsCount;
263
+ continue;
264
+ }
265
+
266
+ sums[cat] += vals[i];
267
+ sumsOfSquares[cat] += vals[i] ** 2;
268
+ ++subSampleSizes[cat];
269
+ }
270
+ }
271
+
272
+ this.sums = sums;
273
+ this.sumsOfSquares = sumsOfSquares;
274
+ this.subSampleSizes = subSampleSizes;
275
+
276
+ break;
277
+
278
+ default:
279
+ throw new Error(ERROR_MSG.UNSUPPORTED_COLUMN_TYPE);
280
+ }
281
+ } // setStats
282
+ } // FactorizedData
283
+
284
+ /** Perform one-way analysis of variances. */
285
+ export function oneWayAnova(categores: CatCol, values: NumCol, alpha: number,
286
+ toValidate: boolean = true): OneWayAnovaReport {
287
+ checkSignificanceLevel(alpha);
288
+
289
+ const uniqueCount = categores.stats.uniqueCount;
290
+
291
+ if (uniqueCount < 2)
292
+ throw new Error(ERROR_MSG.SINGLE_FACTOR);
293
+
294
+ const factorized = new FactorizedData(categores, values, uniqueCount);
295
+
296
+ if (toValidate) {
297
+ if (!factorized.areVarsEqual(alpha))
298
+ throw new Error(ERROR_MSG.NON_EQUAL_VARIANCES);
299
+ }
300
+
301
+ const anova = factorized.getOneWayAnova();
302
+
303
+ return {
304
+ anovaTable: anova,
305
+ fCritical: jStat.centralF.inv(1 - alpha, anova.dfBn, anova.dfWn),
306
+ significance: alpha,
307
+ };
308
+ } // oneWayAnova
@@ -0,0 +1,258 @@
1
+ // Analysis of Variances (ANOVA): UI
2
+
3
+ import * as grok from 'datagrok-api/grok';
4
+ import * as ui from 'datagrok-api/ui';
5
+ import * as DG from 'datagrok-api/dg';
6
+
7
+ import {oneWayAnova, OneWayAnovaReport} from './anova-tools';
8
+
9
+ const FEATURE_TYPES = [DG.COLUMN_TYPE.INT, DG.COLUMN_TYPE.FLOAT] as string[];
10
+ const FACTOR_TYPES = [DG.COLUMN_TYPE.STRING, DG.COLUMN_TYPE.BOOL] as string[];
11
+
12
+ const ANOVA_HELP_URL = '/help/explore/anova';
13
+
14
+ /** Significance const */
15
+ enum SIGNIFICANCE {
16
+ DEFAULT = 0.05,
17
+ MIN = 0.01,
18
+ MAX = 0.99,
19
+ INFIMUM = 0,
20
+ SUPREMUM = 1,
21
+ };
22
+
23
+ /** Default names */
24
+ enum DEFAULT {
25
+ FACTOR = 'race',
26
+ FEATURE = 'age',
27
+ };
28
+
29
+ /** Add one-way ANOVA results */
30
+ function addVizualization(df: DG.DataFrame, factorsName: string, featuresName: string, report: OneWayAnovaReport) {
31
+ const test = report.anovaTable.fStat > report.fCritical;
32
+
33
+ const shortConclusion = test ?
34
+ `"${factorsName}" affects the "${featuresName}"` :
35
+ `"${factorsName}" doesn't affect the "${featuresName}"`;
36
+
37
+ const view = grok.shell.getTableView(df.name);
38
+ const boxPlot = DG.Viewer.boxPlot(df, {
39
+ categoryColumnNames: [factorsName],
40
+ valueColumnName: featuresName,
41
+ showPValue: false,
42
+ showStatistics: false,
43
+ description: shortConclusion,
44
+ showColorSelector: false,
45
+ });
46
+ const boxPlotNode = view.dockManager.dock(boxPlot.root, DG.DOCK_TYPE.RIGHT, null, 'ANOVA');
47
+
48
+ const hypoMd = ui.markdown(`**H0:** the "${factorsName}"
49
+ factor does not produce a significant difference in the "${featuresName}" feature.`);
50
+ ui.tooltip.bind(hypoMd, 'Null hypothesis');
51
+
52
+ const testMd = ui.markdown(`**Test result:** ${test ?
53
+ 'means differ significantly.' :
54
+ 'means do not differ significantly.'}`,
55
+ );
56
+
57
+ const tooltipDiv = test ?
58
+ ui.divV([
59
+ ui.p(`Reject the null hypothesis, since F > F-critical:
60
+ ${report.anovaTable.fStat.toFixed(2)} > ${report.fCritical.toFixed(2)}.`),
61
+ ui.h2('There is a significant difference among sample averages.'),
62
+ ]) :
63
+ ui.divV([
64
+ ui.p(`Fail to reject the null hypothesis, since F < F-critical:
65
+ ${report.anovaTable.fStat.toFixed(2)} < ${report.fCritical.toFixed(2)}.`),
66
+ ui.h2('There is no significant difference among sample averages.'),
67
+ ]);
68
+
69
+ ui.tooltip.bind(testMd, () => tooltipDiv);
70
+
71
+ const divResult = ui.divV([
72
+ hypoMd,
73
+ testMd,
74
+ ui.link('Learn more',
75
+ () => window.open('https://en.wikipedia.org/wiki/F-test', '_blank'),
76
+ 'Click to open in a new tab',
77
+ ),
78
+ ]);
79
+ divResult.style.marginLeft = '20px';
80
+
81
+ const hypoNode = grok.shell.dockManager.dock(divResult, DG.DOCK_TYPE.DOWN, boxPlotNode, 'F-test', 0.3);
82
+
83
+ const reportViewer = getAnovaGrid(report);
84
+ grok.shell.dockManager.dock(reportViewer.root, DG.DOCK_TYPE.FILL, hypoNode, 'Analysis');
85
+ }
86
+
87
+ /** Create dataframe with one-way ANOVA results. */
88
+ function getAnovaGrid(report: OneWayAnovaReport): DG.Grid {
89
+ const anova = report.anovaTable;
90
+
91
+ const grid = DG.Viewer.grid(DG.DataFrame.fromColumns([
92
+ DG.Column.fromStrings('Source of variance', ['Between groups', 'Within groups', 'Total']),
93
+ DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'SS', [anova.ssBn, anova.ssWn, anova.ssTot]),
94
+ DG.Column.fromList(DG.COLUMN_TYPE.INT, 'DF', [anova.dfBn, anova.dfWn, anova.dfTot]),
95
+ DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'MS', [anova.msBn, anova.msWn, null]),
96
+ DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'F', [anova.fStat, null, null]),
97
+ DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'F-critical', [report.fCritical, null, null]),
98
+ DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'p-value', [anova.pValue, null, null]),
99
+ ]));
100
+
101
+ const tooltip = new Map([
102
+ ['Source of variance', 'List of the explored variation sources'],
103
+ ['SS', 'Sum of squares (SS)'],
104
+ ['DF', 'Degrees of freedom (DF)'],
105
+ ['MS', 'Mean square (MS)'],
106
+ ['F', 'F-statistics (F)'],
107
+ ['F-critical', `${report.significance}-critical value of F-statistics (F)`],
108
+ ['p-value', `Probability to obtain F-statistics (F) greater than the actual observation.`],
109
+ ]);
110
+
111
+ grid.onCellTooltip(function(cell, x, y) {
112
+ if (cell.isColHeader) {
113
+ ui.tooltip.show(ui.divV([ui.p(tooltip.get(cell.tableColumn!.name)!)]), x, y);
114
+ return true;
115
+ }
116
+ });
117
+
118
+ grid.helpUrl = ANOVA_HELP_URL;
119
+
120
+ return grid;
121
+ } // getOneWayAnovaDF
122
+
123
+ /** Return warning div */
124
+ function getWarning(msg: string): HTMLElement {
125
+ return ui.divV([
126
+ ui.markdown(`ANOVA cannot be performed:
127
+
128
+ ${msg}`),
129
+ ui.link('Learn more',
130
+ () => window.open('https://en.wikipedia.org/wiki/Analysis_of_variance#Assumptions', '_blank'),
131
+ 'Click to open in a new tab',
132
+ ),
133
+ ]);
134
+ }
135
+
136
+ /** Run one-way analysis of variances */
137
+ export function runOneWayAnova(): void {
138
+ /** current dataframe */
139
+ const df: DG.DataFrame | null = grok.shell.t;
140
+
141
+ if (df === null) {
142
+ grok.shell.warning('No dataframe is opened');
143
+ return;
144
+ }
145
+
146
+ const columns = df.columns;
147
+ const factorColNames = [] as string[];
148
+ const featureColNames = [] as string[];
149
+
150
+ for (const col of columns) {
151
+ if (FEATURE_TYPES.includes(col.type))
152
+ featureColNames.push(col.name);
153
+ else if (FACTOR_TYPES.includes(col.type))
154
+ factorColNames.push(col.name);
155
+ }
156
+
157
+ const factorColsCount = factorColNames.length;
158
+ if (factorColsCount < 1) {
159
+ grok.shell.warning(ui.markdown(`No acceptable factor columns:
160
+
161
+ - type: ${FACTOR_TYPES.join(', ')}
162
+ - at least two categories`,
163
+ ));
164
+ return;
165
+ };
166
+
167
+ let factor = df.col(DEFAULT.FACTOR);
168
+
169
+ if (factor === null) {
170
+ let minIdx = 0;
171
+ let minCount = columns.byName(factorColNames[0]).categories.length;
172
+ let current: number;
173
+
174
+ for (let i = 1; i < factorColsCount; ++i) {
175
+ current = columns.byName(factorColNames[i]).categories.length;
176
+ if (current < minCount) {
177
+ minCount = current;
178
+ minIdx = i;
179
+ }
180
+ }
181
+
182
+ factor = columns.byName(factorColNames[minIdx]);
183
+ }
184
+
185
+ if (featureColNames.length < 1) {
186
+ grok.shell.warning(ui.markdown(`No acceptable feature columns:
187
+
188
+ - type: ${FEATURE_TYPES.join(', ')}`,
189
+ ));
190
+ return;
191
+ }
192
+
193
+ const factorInput = ui.input.column('Category', {
194
+ table: df,
195
+ value: factor,
196
+ tooltipText: 'Column with factor values',
197
+ onValueChanged: (col) => factor = col,
198
+ filter: (col: DG.Column) => factorColNames.includes(col.name),
199
+ nullable: false,
200
+ });
201
+
202
+ let feature = df.col(DEFAULT.FEATURE);
203
+ if (feature === null)
204
+ feature = columns.byName(featureColNames[0]);
205
+
206
+ const featureInput = ui.input.column('Feature', {
207
+ table: df,
208
+ value: feature,
209
+ tooltipText: 'Column with feature values',
210
+ onValueChanged: (col) => feature = col,
211
+ filter: (col: DG.Column) => featureColNames.includes(col.name),
212
+ nullable: false,
213
+ });
214
+
215
+ let significance = SIGNIFICANCE.DEFAULT;
216
+ const signInput = ui.input.float('Alpha', {
217
+ min: SIGNIFICANCE.MIN,
218
+ max: SIGNIFICANCE.MAX,
219
+ value: significance,
220
+ nullable: false,
221
+ tooltipText: 'Significance level',
222
+ onValueChanged: (value) => {
223
+ significance = value;
224
+ runBtn.disabled = (significance <= SIGNIFICANCE.INFIMUM) || (significance >= SIGNIFICANCE.SUPREMUM);
225
+ },
226
+ });
227
+
228
+ const dlg = ui.dialog({title: 'ANOVA', helpUrl: ANOVA_HELP_URL});
229
+ const view = grok.shell.getTableView(df.name);
230
+ view.root.appendChild(dlg.root);
231
+ dlg.addButton('Run', () => {
232
+ dlg.close();
233
+
234
+ try {
235
+ const res = oneWayAnova(factor!, feature!, significance);
236
+ addVizualization(df, factor!.name, feature!.name, res);
237
+ } catch (error) {
238
+ if (error instanceof Error) {
239
+ grok.shell.warning(getWarning(error.message));
240
+
241
+ view.addViewer(DG.VIEWER.BOX_PLOT, {
242
+ categoryColumnNames: [factor!.name],
243
+ valueColumnName: feature!.name,
244
+ showStatistics: false,
245
+ showPValue: false,
246
+ });
247
+ } else
248
+ grok.shell.error('ANOVA fails: the platform issue');
249
+ }
250
+ }, undefined, 'Perform analysis of variances');
251
+
252
+ const runBtn = dlg.getButton('Run');
253
+
254
+ dlg.add(factorInput)
255
+ .add(featureInput)
256
+ .add(signInput)
257
+ .show();
258
+ } // runOneWayAnova
package/src/eda-ui.ts CHANGED
@@ -131,12 +131,3 @@ export function addPLSvisualization(
131
131
  // 4. Scores Scatter Plot
132
132
  view.addViewer(scoresScatterPlot(samplesNames, plsOutput[2], plsOutput[3]));
133
133
  }
134
-
135
- // Add one-way ANOVA results
136
- export function addOneWayAnovaVizualization(
137
- table: DG.DataFrame, factors: DG.Column, values: DG.Column, anova: DG.DataFrame,
138
- ) {
139
- const view = grok.shell.getTableView(table.name);
140
- view.addViewer(DG.Viewer.boxPlot(DG.DataFrame.fromColumns([factors, values])));
141
- view.addViewer(DG.Viewer.grid(anova));
142
- }
@@ -0,0 +1,13 @@
1
+ import * as grokNamespace from 'datagrok-api/grok';
2
+ import * as uiNamespace from 'datagrok-api/ui';
3
+ import * as DGNamespace from 'datagrok-api/dg';
4
+ import * as rxjsNamespace from 'rxjs';
5
+ import $Namespace from 'cash-dom';
6
+
7
+ declare global {
8
+ const grok: typeof grokNamespace;
9
+ const ui: typeof uiNamespace;
10
+ const DG: typeof DGNamespace;
11
+ const rjxs: typeof rxjsNamespace;
12
+ const $: typeof $Namespace;
13
+ }
@@ -62,3 +62,5 @@ export enum HINT {
62
62
  IMPUTATION_SETTINGS = 'Simple imputation settings',
63
63
  KEEP_EMPTY = 'Defines whether to keep empty missing values failed to be imputed OR fill them using simple imputation',
64
64
  };
65
+
66
+ export const MAX_INPUT_NAME_LENGTH = 15;
@@ -2,7 +2,7 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
- import {TITLE, KNN_IMPUTER, ERROR_MSG, HINT} from './ui-constants';
5
+ import {TITLE, KNN_IMPUTER, ERROR_MSG, HINT, MAX_INPUT_NAME_LENGTH} from './ui-constants';
6
6
  import {SUPPORTED_COLUMN_TYPES, METRIC_TYPE, DISTANCE_TYPE, MetricInfo, DEFAULT, MIN_NEIGHBORS,
7
7
  impute, getMissingValsIndices, areThereFails, imputeFailed} from './knn-imputer';
8
8
 
@@ -190,7 +190,7 @@ export async function runKNNImputer(df?: DG.DataFrame): Promise<void> {
190
190
 
191
191
  // Metrics components
192
192
  const featuresMetrics = new Map<string, MetricInfo>();
193
- const metricInfoInputs = new Map<string, HTMLDivElement>();
193
+ const metricInfoInputs = new Map<string, HTMLElement>();
194
194
  const metricsDiv = ui.divV([]);
195
195
  metricsDiv.style.overflow = 'auto';
196
196
 
@@ -214,7 +214,7 @@ export async function runKNNImputer(df?: DG.DataFrame): Promise<void> {
214
214
 
215
215
  // The following should provide a slider (see th bug https://reddata.atlassian.net/browse/GROK-14431)
216
216
  const prop = DG.Property.fromOptions({
217
- 'name': name,
217
+ 'name': name.length < MAX_INPUT_NAME_LENGTH ? name : name.slice(0, MAX_INPUT_NAME_LENGTH).concat('...'),
218
218
  'inputType': 'Float',
219
219
  'min': 0,
220
220
  'max': 10,
@@ -229,11 +229,11 @@ export async function runKNNImputer(df?: DG.DataFrame): Promise<void> {
229
229
  distInfo.weight = value ?? settings.defaultWeight;
230
230
  featuresMetrics.set(name, distInfo);
231
231
  });
232
- weightInput.setTooltip(HINT.WEIGHT);
232
+ ui.tooltip.bind(weightInput.captionLabel, name);
233
+ ui.tooltip.bind(weightInput.input, HINT.WEIGHT);
233
234
 
234
- const div = ui.divH([distTypeInput.root, weightInput.root]);
235
- metricInfoInputs.set(name, div);
236
- metricsDiv.append(div);
235
+ metricInfoInputs.set(name, weightInput.root);
236
+ metricsDiv.append(weightInput.root);
237
237
  });
238
238
 
239
239
  // The main dialog