@datagrok/eda 1.1.8 → 1.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintignore +1 -0
- package/.eslintrc.json +45 -0
- package/dist/100.js +2 -2
- package/dist/118.js +2 -2
- package/dist/{645.js → 208.js} +2 -2
- package/dist/221.js +1 -1
- package/dist/261.js +2 -0
- package/dist/334.js +2 -0
- package/dist/352.js +2 -0
- package/dist/361.js +2 -0
- package/dist/{604.js → 367.js} +2 -2
- package/dist/{584.js → 374.js} +2 -2
- package/dist/42.js +2 -0
- package/dist/{111.js → 467.js} +2 -2
- package/dist/471.js +2 -2
- package/dist/483.js +2 -0
- package/dist/{632.js → 533.js} +2 -2
- package/dist/664.js +2 -2
- package/dist/694.js +2 -2
- package/dist/729.js +1 -1
- package/dist/{146.js → 902.js} +2 -2
- package/dist/910.js +2 -0
- package/dist/943.js +3 -0
- package/dist/package-test.js +2 -2
- package/dist/package.js +2 -2
- package/package.json +10 -4
- package/src/data-generators.ts +13 -13
- package/src/eda-tools.ts +42 -83
- package/src/eda-ui.ts +65 -58
- package/src/package-test.ts +2 -2
- package/src/package.ts +53 -61
- package/src/stat-tools.ts +72 -61
- package/src/svm.ts +144 -151
- package/src/utils.ts +13 -17
- package/src/workers/tsne-worker.ts +6 -6
- package/src/workers/umap-worker.ts +3 -3
- package/webpack.config.js +3 -2
- package/dist/155.js +0 -2
- package/dist/313.js +0 -2
- package/dist/355.js +0 -2
- package/dist/44.js +0 -2
- package/dist/489.js +0 -3
- package/dist/656.js +0 -2
- package/dist/727.js +0 -2
- package/dist/861.js +0 -2
- package/dist/93.js +0 -2
- package/dist/972.js +0 -2
- /package/dist/{489.js.LICENSE.txt → 943.js.LICENSE.txt} +0 -0
package/src/package.ts
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
/* eslint-disable camelcase */
|
|
2
|
+
/* eslint-disable max-len */
|
|
1
3
|
/* Do not change these import lines to match external modules in webpack configuration */
|
|
2
4
|
import * as grok from 'datagrok-api/grok';
|
|
3
5
|
import * as ui from 'datagrok-api/ui';
|
|
@@ -6,23 +8,23 @@ import * as DG from 'datagrok-api/dg';
|
|
|
6
8
|
import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
7
9
|
|
|
8
10
|
import {_initEDAAPI} from '../wasm/EDAAPI';
|
|
9
|
-
import {computePCA, computePLS
|
|
10
|
-
import {addPrefixToEachColumnName, addPLSvisualization, regressionCoefficientsBarChart,
|
|
11
|
+
import {computePCA, computePLS} from './eda-tools';
|
|
12
|
+
import {addPrefixToEachColumnName, addPLSvisualization, regressionCoefficientsBarChart,
|
|
11
13
|
scoresScatterPlot, predictedVersusReferenceScatterPlot, addOneWayAnovaVizualization} from './eda-ui';
|
|
12
14
|
import {carsDataframe, testDataForBinaryClassification} from './data-generators';
|
|
13
|
-
import {LINEAR, RBF, POLYNOMIAL, SIGMOID,
|
|
15
|
+
import {LINEAR, RBF, POLYNOMIAL, SIGMOID,
|
|
14
16
|
getTrainedModel, getPrediction, showTrainReport, getPackedModel} from './svm';
|
|
15
17
|
|
|
16
18
|
import {oneWayAnova} from './stat-tools';
|
|
17
|
-
import {
|
|
19
|
+
import {getDbscanWorker} from '@datagrok-libraries/math';
|
|
18
20
|
|
|
19
21
|
import {DistanceAggregationMethods} from '@datagrok-libraries/ml/src/distance-matrix/types';
|
|
20
22
|
import {MultiColumnDimReductionEditor} from
|
|
21
23
|
'@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/multi-column-dim-reduction-editor';
|
|
22
24
|
import {multiColReduceDimensionality} from
|
|
23
25
|
'@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/reduce-dimensionality';
|
|
24
|
-
import {
|
|
25
|
-
import {
|
|
26
|
+
import {KnownMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
27
|
+
import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/types';
|
|
26
28
|
|
|
27
29
|
export const _package = new DG.Package();
|
|
28
30
|
|
|
@@ -63,8 +65,7 @@ export async function dbScan(df: DG.DataFrame, xCol: DG.Column, yCol: DG.Column,
|
|
|
63
65
|
//input: bool scale = false [Indicating whether the variables should be scaled to have unit variance.]
|
|
64
66
|
//output: dataframe result {action:join(table)}
|
|
65
67
|
export async function PCA(table: DG.DataFrame, features: DG.ColumnList, components: number,
|
|
66
|
-
center: boolean, scale: boolean): Promise<DG.DataFrame>
|
|
67
|
-
{
|
|
68
|
+
center: boolean, scale: boolean): Promise<DG.DataFrame> {
|
|
68
69
|
const pcaTable = await computePCA(table, features, components, center, scale);
|
|
69
70
|
addPrefixToEachColumnName('PCA', pcaTable.columns);
|
|
70
71
|
return pcaTable;
|
|
@@ -120,9 +121,8 @@ export async function reduceDimensionality(): Promise<void> {
|
|
|
120
121
|
//input: column_list features {type: numerical}
|
|
121
122
|
//input: column predict {type: numerical}
|
|
122
123
|
//input: int components = 3
|
|
123
|
-
export async function PLS(table: DG.DataFrame, names: DG.Column, features: DG.ColumnList,
|
|
124
|
-
predict: DG.Column, components: number): Promise<void>
|
|
125
|
-
{
|
|
124
|
+
export async function PLS(table: DG.DataFrame, names: DG.Column, features: DG.ColumnList,
|
|
125
|
+
predict: DG.Column, components: number): Promise<void> {
|
|
126
126
|
const plsResults = await computePLS(table, features, predict, components);
|
|
127
127
|
addPLSvisualization(table, names, features, predict, plsResults);
|
|
128
128
|
}
|
|
@@ -131,17 +131,17 @@ export async function PLS(table: DG.DataFrame, names: DG.Column, features: DG.Co
|
|
|
131
131
|
//description: Multidimensional data analysis using partial least squares (PLS) regression. It reduces the predictors to a smaller set of uncorrelated components and performs least squares regression on them.
|
|
132
132
|
//meta.demoPath: Compute | Multivariate analysis
|
|
133
133
|
//meta.isDemoScript: True
|
|
134
|
-
export async function demoMultivariateAnalysis(): Promise<any>
|
|
135
|
-
const demoScript = new DemoScript('Partial least squares regression',
|
|
136
|
-
'Analysis of multidimensional data.');
|
|
137
|
-
|
|
134
|
+
export async function demoMultivariateAnalysis(): Promise<any> {
|
|
135
|
+
const demoScript = new DemoScript('Partial least squares regression',
|
|
136
|
+
'Analysis of multidimensional data.');
|
|
137
|
+
|
|
138
138
|
const cars = carsDataframe();
|
|
139
139
|
|
|
140
140
|
const components = 3;
|
|
141
141
|
const names = cars.columns.byName('model');
|
|
142
142
|
const predict = cars.columns.byName('price');
|
|
143
143
|
const features = cars.columns.remove('price').remove('model');
|
|
144
|
-
const plsOutput = await computePLS(cars, features, predict, components);
|
|
144
|
+
const plsOutput = await computePLS(cars, features, predict, components);
|
|
145
145
|
|
|
146
146
|
const sourceCars = carsDataframe();
|
|
147
147
|
sourceCars.name = 'Cars';
|
|
@@ -154,7 +154,7 @@ export async function demoMultivariateAnalysis(): Promise<any> {
|
|
|
154
154
|
view = grok.shell.getTableView(sourceCars.name);
|
|
155
155
|
}, {description: 'Each car has many features - patterns extraction is complicated.', delay: 0})
|
|
156
156
|
.step('Model', async () => {
|
|
157
|
-
dialog = ui.dialog({title:'Multivariate Analysis (PLS)'})
|
|
157
|
+
dialog = ui.dialog({title: 'Multivariate Analysis (PLS)'})
|
|
158
158
|
.add(ui.tableInput('Table', sourceCars))
|
|
159
159
|
.add(ui.columnsInput('Features', cars, features.toList, {available: undefined, checked: features.names()}))
|
|
160
160
|
.add(ui.columnInput('Names', cars, names, undefined))
|
|
@@ -165,16 +165,14 @@ export async function demoMultivariateAnalysis(): Promise<any> {
|
|
|
165
165
|
})
|
|
166
166
|
.show({x: 400, y: 140});
|
|
167
167
|
}, {description: 'Predict car price by its other features.', delay: 0})
|
|
168
|
-
.step('Regression coeffcicients', async () =>
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
.step('Scores', async () =>
|
|
174
|
-
{view.addViewer(scoresScatterPlot(names, plsOutput[2], plsOutput[3]))},
|
|
168
|
+
.step('Regression coeffcicients', async () => {
|
|
169
|
+
dialog.close();
|
|
170
|
+
view.addViewer(regressionCoefficientsBarChart(features, plsOutput[1]));
|
|
171
|
+
},
|
|
172
|
+
{description: 'The feature "diesel" affects the price the most.', delay: 0})
|
|
173
|
+
.step('Scores', async () => {view.addViewer(scoresScatterPlot(names, plsOutput[2], plsOutput[3]));},
|
|
175
174
|
{description: 'Similarities & dissimilarities: alfaromeo and mercedes are different.', delay: 0})
|
|
176
|
-
.step('Prediction', async () =>
|
|
177
|
-
{view.addViewer(predictedVersusReferenceScatterPlot(names, predict, plsOutput[0]))},
|
|
175
|
+
.step('Prediction', async () => {view.addViewer(predictedVersusReferenceScatterPlot(names, predict, plsOutput[0]));},
|
|
178
176
|
{description: 'Closer to the line means better price prediction.', delay: 0})
|
|
179
177
|
.start();
|
|
180
178
|
}
|
|
@@ -188,9 +186,8 @@ export async function demoMultivariateAnalysis(): Promise<any> {
|
|
|
188
186
|
//input: double max = 173 {caption: max; category: Range}
|
|
189
187
|
//input: double violatorsPercentage = 5 {caption: violators; units: %; category: Dataset}
|
|
190
188
|
//output: dataframe df
|
|
191
|
-
export async function testDataLinearSeparable(name: string, samplesCount: number, featuresCount: number,
|
|
192
|
-
min: number, max: number, violatorsPercentage: number): Promise<DG.DataFrame>
|
|
193
|
-
{
|
|
189
|
+
export async function testDataLinearSeparable(name: string, samplesCount: number, featuresCount: number,
|
|
190
|
+
min: number, max: number, violatorsPercentage: number): Promise<DG.DataFrame> {
|
|
194
191
|
return await testDataForBinaryClassification(LINEAR, [0, 0], name, samplesCount, featuresCount,
|
|
195
192
|
min, max, violatorsPercentage);
|
|
196
193
|
}
|
|
@@ -205,9 +202,8 @@ export async function testDataLinearSeparable(name: string, samplesCount: number
|
|
|
205
202
|
//input: double max = 173 {caption: max; category: Range}
|
|
206
203
|
//input: double violatorsPercentage = 5 {caption: violators; units: %; category: Dataset}
|
|
207
204
|
//output: dataframe df
|
|
208
|
-
export async function testDataLinearNonSeparable(name: string, sigma: number, samplesCount: number,
|
|
209
|
-
featuresCount: number, min: number, max: number, violatorsPercentage: number): Promise<DG.DataFrame>
|
|
210
|
-
{
|
|
205
|
+
export async function testDataLinearNonSeparable(name: string, sigma: number, samplesCount: number,
|
|
206
|
+
featuresCount: number, min: number, max: number, violatorsPercentage: number): Promise<DG.DataFrame> {
|
|
211
207
|
return await testDataForBinaryClassification(RBF, [sigma, 0], name, samplesCount, featuresCount,
|
|
212
208
|
min, max, violatorsPercentage);
|
|
213
209
|
}
|
|
@@ -220,10 +216,9 @@ export async function testDataLinearNonSeparable(name: string, sigma: number, sa
|
|
|
220
216
|
//input: double gamma = 1.0 {category: Hyperparameters}
|
|
221
217
|
//input: bool toShowReport = false {caption: to show report; category: Report}
|
|
222
218
|
//output: dynamic model
|
|
223
|
-
export async function trainLinearKernelSVM(df: DG.DataFrame, predict_column: string,
|
|
224
|
-
gamma: number, toShowReport: boolean): Promise<any>
|
|
225
|
-
{
|
|
226
|
-
const trainedModel = await getTrainedModel({gamma: gamma, kernel: LINEAR}, df, predict_column);
|
|
219
|
+
export async function trainLinearKernelSVM(df: DG.DataFrame, predict_column: string,
|
|
220
|
+
gamma: number, toShowReport: boolean): Promise<any> {
|
|
221
|
+
const trainedModel = await getTrainedModel({gamma: gamma, kernel: LINEAR}, df, predict_column);
|
|
227
222
|
|
|
228
223
|
if (toShowReport)
|
|
229
224
|
showTrainReport(df, trainedModel);
|
|
@@ -237,8 +232,8 @@ export async function trainLinearKernelSVM(df: DG.DataFrame, predict_column: str
|
|
|
237
232
|
//input: dataframe df
|
|
238
233
|
//input: dynamic model
|
|
239
234
|
//output: dataframe table
|
|
240
|
-
export async function applyLinearKernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
|
|
241
|
-
return await getPrediction(df, model);
|
|
235
|
+
export async function applyLinearKernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
|
|
236
|
+
return await getPrediction(df, model);
|
|
242
237
|
}
|
|
243
238
|
|
|
244
239
|
//name: trainRBFkernelSVM
|
|
@@ -250,12 +245,11 @@ export async function applyLinearKernelSVM(df: DG.DataFrame, model: any): Promis
|
|
|
250
245
|
//input: double sigma = 1.5 {category: Hyperparameters}
|
|
251
246
|
//input: bool toShowReport = false {caption: to show report; category: Report}
|
|
252
247
|
//output: dynamic model
|
|
253
|
-
export async function trainRBFkernelSVM(df: DG.DataFrame, predict_column: string,
|
|
254
|
-
gamma: number, sigma: number, toShowReport: boolean): Promise<any>
|
|
255
|
-
{
|
|
248
|
+
export async function trainRBFkernelSVM(df: DG.DataFrame, predict_column: string,
|
|
249
|
+
gamma: number, sigma: number, toShowReport: boolean): Promise<any> {
|
|
256
250
|
const trainedModel = await getTrainedModel(
|
|
257
|
-
{gamma: gamma, kernel: RBF, sigma: sigma},
|
|
258
|
-
df, predict_column);
|
|
251
|
+
{gamma: gamma, kernel: RBF, sigma: sigma},
|
|
252
|
+
df, predict_column);
|
|
259
253
|
|
|
260
254
|
if (toShowReport)
|
|
261
255
|
showTrainReport(df, trainedModel);
|
|
@@ -269,9 +263,9 @@ export async function trainRBFkernelSVM(df: DG.DataFrame, predict_column: string
|
|
|
269
263
|
//input: dataframe df
|
|
270
264
|
//input: dynamic model
|
|
271
265
|
//output: dataframe table
|
|
272
|
-
export async function applyRBFkernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
|
|
273
|
-
return await getPrediction(df, model);
|
|
274
|
-
}
|
|
266
|
+
export async function applyRBFkernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
|
|
267
|
+
return await getPrediction(df, model);
|
|
268
|
+
}
|
|
275
269
|
|
|
276
270
|
//name: trainPolynomialKernelSVM
|
|
277
271
|
//meta.mlname: polynomial kernel LS-SVM
|
|
@@ -283,12 +277,11 @@ export async function applyRBFkernelSVM(df: DG.DataFrame, model: any): Promise<D
|
|
|
283
277
|
//input: double d = 2 {category: Hyperparameters}
|
|
284
278
|
//input: bool toShowReport = false {caption: to show report; category: Report}
|
|
285
279
|
//output: dynamic model
|
|
286
|
-
export async function trainPolynomialKernelSVM(df: DG.DataFrame, predict_column: string,
|
|
287
|
-
gamma: number, c: number, d: number, toShowReport: boolean): Promise<any>
|
|
288
|
-
{
|
|
280
|
+
export async function trainPolynomialKernelSVM(df: DG.DataFrame, predict_column: string,
|
|
281
|
+
gamma: number, c: number, d: number, toShowReport: boolean): Promise<any> {
|
|
289
282
|
const trainedModel = await getTrainedModel(
|
|
290
|
-
{gamma: gamma, kernel: POLYNOMIAL, cParam: c, dParam: d},
|
|
291
|
-
df, predict_column);
|
|
283
|
+
{gamma: gamma, kernel: POLYNOMIAL, cParam: c, dParam: d},
|
|
284
|
+
df, predict_column);
|
|
292
285
|
|
|
293
286
|
if (toShowReport)
|
|
294
287
|
showTrainReport(df, trainedModel);
|
|
@@ -302,8 +295,8 @@ export async function trainPolynomialKernelSVM(df: DG.DataFrame, predict_column:
|
|
|
302
295
|
//input: dataframe df
|
|
303
296
|
//input: dynamic model
|
|
304
297
|
//output: dataframe table
|
|
305
|
-
export async function applyPolynomialKernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
|
|
306
|
-
return await getPrediction(df, model);
|
|
298
|
+
export async function applyPolynomialKernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
|
|
299
|
+
return await getPrediction(df, model);
|
|
307
300
|
}
|
|
308
301
|
|
|
309
302
|
//name: trainSigmoidKernelSVM
|
|
@@ -316,12 +309,11 @@ export async function applyPolynomialKernelSVM(df: DG.DataFrame, model: any): Pr
|
|
|
316
309
|
//input: double theta = 1 {category: Hyperparameters}
|
|
317
310
|
//input: bool toShowReport = false {caption: to show report; category: Report}
|
|
318
311
|
//output: dynamic model
|
|
319
|
-
export async function trainSigmoidKernelSVM(df: DG.DataFrame, predict_column: string,
|
|
320
|
-
gamma: number, kappa: number, theta: number, toShowReport: boolean): Promise<any>
|
|
321
|
-
{
|
|
312
|
+
export async function trainSigmoidKernelSVM(df: DG.DataFrame, predict_column: string,
|
|
313
|
+
gamma: number, kappa: number, theta: number, toShowReport: boolean): Promise<any> {
|
|
322
314
|
const trainedModel = await getTrainedModel(
|
|
323
|
-
{gamma: gamma, kernel: SIGMOID, kappa: kappa, theta: theta},
|
|
324
|
-
df, predict_column);
|
|
315
|
+
{gamma: gamma, kernel: SIGMOID, kappa: kappa, theta: theta},
|
|
316
|
+
df, predict_column);
|
|
325
317
|
|
|
326
318
|
if (toShowReport)
|
|
327
319
|
showTrainReport(df, trainedModel);
|
|
@@ -335,8 +327,8 @@ export async function trainSigmoidKernelSVM(df: DG.DataFrame, predict_column: st
|
|
|
335
327
|
//input: dataframe df
|
|
336
328
|
//input: dynamic model
|
|
337
329
|
//output: dataframe table
|
|
338
|
-
export async function applySigmoidKernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
|
|
339
|
-
return await getPrediction(df, model);
|
|
330
|
+
export async function applySigmoidKernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
|
|
331
|
+
return await getPrediction(df, model);
|
|
340
332
|
}
|
|
341
333
|
|
|
342
334
|
//top-menu: ML | Analyze | ANOVA...
|
|
@@ -349,5 +341,5 @@ export async function applySigmoidKernelSVM(df: DG.DataFrame, model: any): Promi
|
|
|
349
341
|
//input: bool validate = false [Indicates whether the normality of distribution and an eqaulity of varainces should be checked.]
|
|
350
342
|
export function anova(table: DG.DataFrame, factor: DG.Column, feature: DG.Column, significance: number, validate: boolean) {
|
|
351
343
|
const res = oneWayAnova(factor, feature, significance, validate);
|
|
352
|
-
addOneWayAnovaVizualization(table, factor, feature, res);
|
|
344
|
+
addOneWayAnovaVizualization(table, factor, feature, res);
|
|
353
345
|
}
|
package/src/stat-tools.ts
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
// Statistic tools
|
|
2
2
|
|
|
3
3
|
/* REFERENCES
|
|
4
|
-
|
|
4
|
+
|
|
5
5
|
[1] One-way analysis of variance, https://en.wikipedia.org/wiki/One-way_analysis_of_variance
|
|
6
6
|
|
|
7
7
|
[2] G.W. Heiman. Basic Statistics for the Behavioral Sciences, 6th ed. Wadsworth Publishing, 2010
|
|
8
|
-
|
|
8
|
+
|
|
9
9
|
[3] F-test of equality of variances, https://en.wikipedia.org/wiki/F-test_of_equality_of_variances
|
|
10
10
|
|
|
11
11
|
[4] S. McKillup. Statistics Explained, Cambridge University Press, 2005
|
|
@@ -40,9 +40,9 @@ type SampleData = {
|
|
|
40
40
|
type OneWayAnova = {
|
|
41
41
|
/** sum of squares between groups, SSbn */
|
|
42
42
|
ssBn: number,
|
|
43
|
-
/** sum of squares within groups, SSnn */
|
|
43
|
+
/** sum of squares within groups, SSnn */
|
|
44
44
|
ssWn: number,
|
|
45
|
-
/** total sum of squares, SStot */
|
|
45
|
+
/** total sum of squares, SStot */
|
|
46
46
|
ssTot: number,
|
|
47
47
|
/** degrees of freedom between groups, DFbn */
|
|
48
48
|
dfBn: number,
|
|
@@ -67,15 +67,21 @@ type CatCol = DG.Column<DG.COLUMN_TYPE.STRING>;
|
|
|
67
67
|
type NumCol = DG.Column<DG.COLUMN_TYPE.FLOAT> | DG.Column<DG.COLUMN_TYPE.INT>;
|
|
68
68
|
|
|
69
69
|
/** Create dataframe with one-way ANOVA results. */
|
|
70
|
-
export function getOneWayAnovaDF(
|
|
70
|
+
export function getOneWayAnovaDF(
|
|
71
|
+
anova: OneWayAnova, alpha: number, fCritical: number, hypothesis: string, testResult: string,
|
|
72
|
+
): DG.DataFrame {
|
|
71
73
|
return DG.DataFrame.fromColumns([
|
|
72
|
-
DG.Column.fromStrings('Source of variance',
|
|
73
|
-
|
|
74
|
-
DG.Column.fromList(DG.COLUMN_TYPE.
|
|
74
|
+
DG.Column.fromStrings('Source of variance',
|
|
75
|
+
['Between groups', 'Within groups', 'Total', '', hypothesis, '', testResult]),
|
|
76
|
+
DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'Sum of squares',
|
|
77
|
+
[anova.ssBn, anova.ssWn, anova.ssTot, null, null, null, null]),
|
|
78
|
+
DG.Column.fromList(DG.COLUMN_TYPE.INT, 'Degrees of freedom',
|
|
79
|
+
[anova.dfBn, anova.dfWn, anova.dfTot, null, null, null, null]),
|
|
75
80
|
DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'Mean square', [anova.msBn, anova.msWn, null, null, null, null, null]),
|
|
76
81
|
DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'F-statistics', [anova.fStat, null, null, null, null, null, null]),
|
|
77
82
|
DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'p-value', [anova.pValue, null, null, null, null, null, null]),
|
|
78
|
-
DG.Column.fromList(DG.COLUMN_TYPE.FLOAT,
|
|
83
|
+
DG.Column.fromList(DG.COLUMN_TYPE.FLOAT,
|
|
84
|
+
`${alpha}-critical value`, [fCritical, null, null, null, null, null, null]),
|
|
79
85
|
]);
|
|
80
86
|
} // getOneWayAnovaDF
|
|
81
87
|
|
|
@@ -95,7 +101,7 @@ export function getVariance(data: SampleData): number {
|
|
|
95
101
|
|
|
96
102
|
if (size === 1)
|
|
97
103
|
return 0;
|
|
98
|
-
|
|
104
|
+
|
|
99
105
|
return (data.sumOfSquares - (data.sum) ** 2 / size) / (size - 1);
|
|
100
106
|
} // getVariance
|
|
101
107
|
|
|
@@ -103,7 +109,7 @@ export function getVariance(data: SampleData): number {
|
|
|
103
109
|
function areVarsEqual(xData: SampleData, yData: SampleData, alpha: number = 0.05): boolean {
|
|
104
110
|
// The applied approach can be found in [3]
|
|
105
111
|
checkSignificanceLevel(alpha);
|
|
106
|
-
|
|
112
|
+
|
|
107
113
|
const xVar = getVariance(xData);
|
|
108
114
|
const yVar = getVariance(yData);
|
|
109
115
|
|
|
@@ -118,7 +124,7 @@ function areVarsEqual(xData: SampleData, yData: SampleData, alpha: number = 0.05
|
|
|
118
124
|
|
|
119
125
|
export class FactorizedData {
|
|
120
126
|
private isNormDistrib: boolean | undefined = undefined;
|
|
121
|
-
private categories: string[] = [];
|
|
127
|
+
private categories: string[] = [];
|
|
122
128
|
private sums!: Float64Array;
|
|
123
129
|
private sumsOfSquares!: Float64Array;
|
|
124
130
|
private subSampleSizes!: Int32Array;
|
|
@@ -130,7 +136,7 @@ export class FactorizedData {
|
|
|
130
136
|
throw new Error();
|
|
131
137
|
|
|
132
138
|
if (categories.length !== values.length)
|
|
133
|
-
throw new Error(ERROR_MSG.NON_EQUAL_FACTORS_VALUES_SIZE);
|
|
139
|
+
throw new Error(ERROR_MSG.NON_EQUAL_FACTORS_VALUES_SIZE);
|
|
134
140
|
|
|
135
141
|
this.setStats(categories, values, checkNormality, alpha);
|
|
136
142
|
}
|
|
@@ -148,11 +154,13 @@ export class FactorizedData {
|
|
|
148
154
|
|
|
149
155
|
const first: SampleData = {sum: this.sums[0], sumOfSquares: this.sumsOfSquares[0], size: this.subSampleSizes[0]};
|
|
150
156
|
|
|
151
|
-
for (let i = 1; i < K; ++i)
|
|
152
|
-
if(!areVarsEqual(first, {sum: this.sums[i], sumOfSquares: this.sumsOfSquares[i],
|
|
157
|
+
for (let i = 1; i < K; ++i) {
|
|
158
|
+
if (!areVarsEqual(first, {sum: this.sums[i], sumOfSquares: this.sumsOfSquares[i],
|
|
159
|
+
size: this.subSampleSizes[i]}, alpha))
|
|
153
160
|
return false;
|
|
161
|
+
}
|
|
154
162
|
|
|
155
|
-
return true;
|
|
163
|
+
return true;
|
|
156
164
|
} // areVarsEqual
|
|
157
165
|
|
|
158
166
|
/** Perform one-way ANOVA computations. */
|
|
@@ -163,18 +171,18 @@ export class FactorizedData {
|
|
|
163
171
|
|
|
164
172
|
if (K === 1)
|
|
165
173
|
throw new Error(ERROR_MSG.ANOVA_FAILED_JUST_ONE_CAT);
|
|
166
|
-
|
|
174
|
+
|
|
167
175
|
let sum = 0;
|
|
168
176
|
let sumOfSquares = 0;
|
|
169
|
-
|
|
177
|
+
const N = this.size;
|
|
170
178
|
let buf = 0;
|
|
171
179
|
|
|
172
180
|
for (let i = 0; i < K; ++i) {
|
|
173
181
|
sum += this.sums[i];
|
|
174
182
|
sumOfSquares += this.sumsOfSquares[i];
|
|
175
|
-
buf += this.sums[i] ** 2 / this.subSampleSizes[i];
|
|
183
|
+
buf += this.sums[i] ** 2 / this.subSampleSizes[i];
|
|
176
184
|
}
|
|
177
|
-
|
|
185
|
+
|
|
178
186
|
const ssTot = sumOfSquares - sum ** 2 / N;
|
|
179
187
|
const ssBn = buf - sum ** 2 / N;
|
|
180
188
|
const ssWn = ssTot - ssBn;
|
|
@@ -182,12 +190,12 @@ export class FactorizedData {
|
|
|
182
190
|
const dfBn = K - 1;
|
|
183
191
|
const dfWn = N - K;
|
|
184
192
|
const dfTot = N - 1;
|
|
185
|
-
|
|
193
|
+
|
|
186
194
|
const msBn = ssBn / dfBn;
|
|
187
195
|
const msWn = ssWn / dfWn;
|
|
188
196
|
|
|
189
197
|
const fStat = msBn / msWn;
|
|
190
|
-
|
|
198
|
+
|
|
191
199
|
return {
|
|
192
200
|
ssBn: ssBn,
|
|
193
201
|
ssWn: ssWn,
|
|
@@ -197,61 +205,63 @@ export class FactorizedData {
|
|
|
197
205
|
dfTot: dfTot,
|
|
198
206
|
msBn: msBn,
|
|
199
207
|
msWn: msWn,
|
|
200
|
-
|
|
201
|
-
pValue: 1 - jStat.centralF.cdf(fStat, dfBn, dfWn)
|
|
208
|
+
fStat: fStat,
|
|
209
|
+
pValue: 1 - jStat.centralF.cdf(fStat, dfBn, dfWn),
|
|
202
210
|
};
|
|
203
211
|
} // getOneWayAnova
|
|
204
212
|
|
|
205
213
|
/** Compute sum & sums of squares with respect to factor levels. */
|
|
206
|
-
private setStats(categories: CatCol, values: NumCol,
|
|
207
|
-
// TODO: provide check normality feature
|
|
214
|
+
private setStats(categories: CatCol, values: NumCol, _checkNormality: boolean = false, _alpha: number = 0.05): void {
|
|
215
|
+
// TODO: provide check normality feature
|
|
208
216
|
const type = values.type;
|
|
209
217
|
const size = values.length;
|
|
210
218
|
|
|
211
219
|
switch (type) {
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
}
|
|
220
|
+
case DG.COLUMN_TYPE.INT:
|
|
221
|
+
case DG.COLUMN_TYPE.FLOAT:
|
|
222
|
+
this.categories = categories.categories;
|
|
223
|
+
const catCount = this.categories.length;
|
|
224
|
+
this.catCount = catCount;
|
|
225
|
+
this.size = size;
|
|
226
|
+
|
|
227
|
+
const vals = values.getRawData();
|
|
228
|
+
const cats = categories.getRawData();
|
|
229
|
+
|
|
230
|
+
const sums = new Float64Array(catCount).fill(0);
|
|
231
|
+
const sumsOfSquares = new Float64Array(catCount).fill(0);
|
|
232
|
+
const subSampleSizes = new Int32Array(catCount).fill(0);
|
|
233
|
+
|
|
234
|
+
for (let i = 0; i < size; ++i) {
|
|
235
|
+
const c = cats[i];
|
|
236
|
+
sums[c] += vals[i];
|
|
237
|
+
sumsOfSquares[c] += vals[i] ** 2;
|
|
238
|
+
++subSampleSizes[c];
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
this.sums = sums;
|
|
242
|
+
this.sumsOfSquares = sumsOfSquares;
|
|
243
|
+
this.subSampleSizes = subSampleSizes;
|
|
244
|
+
|
|
245
|
+
break;
|
|
246
|
+
|
|
247
|
+
default:
|
|
248
|
+
throw new Error(ERROR_MSG.UNSUPPORTED_COLUMN_TYPE);
|
|
249
|
+
}
|
|
242
250
|
} // setStats
|
|
243
251
|
} // FactorizedData
|
|
244
252
|
|
|
245
253
|
/** Perform one-way analysis of variances. */
|
|
246
|
-
export function oneWayAnova(
|
|
247
|
-
|
|
254
|
+
export function oneWayAnova(
|
|
255
|
+
categores: CatCol, values: NumCol, alpha: number = 0.05, validate: boolean = false,
|
|
256
|
+
): DG.DataFrame {
|
|
257
|
+
checkSignificanceLevel(alpha);
|
|
248
258
|
|
|
249
259
|
const factorized = new FactorizedData(categores, values, validate, alpha);
|
|
250
260
|
|
|
251
261
|
if (validate) {
|
|
252
|
-
if(!factorized.areVarsEqual(alpha))
|
|
262
|
+
if (!factorized.areVarsEqual(alpha))
|
|
253
263
|
throw new Error(ERROR_MSG.NON_EQUAL_VARIANCES);
|
|
254
|
-
|
|
264
|
+
|
|
255
265
|
if (!factorized.isNormal())
|
|
256
266
|
throw new Error(ERROR_MSG.NON_NORMAL_DISTRIB);
|
|
257
267
|
}
|
|
@@ -259,7 +269,8 @@ export function oneWayAnova(categores: CatCol, values: NumCol, alpha: number = 0
|
|
|
259
269
|
const anova = factorized.getOneWayAnova();
|
|
260
270
|
const fCrit = jStat.centralF.inv(1 - alpha, anova.dfBn, anova.dfWn);
|
|
261
271
|
|
|
262
|
-
const hypothesis = `THE NULL HYPOTHESIS: the "${categores.name}"
|
|
272
|
+
const hypothesis = `THE NULL HYPOTHESIS: the "${categores.name}"
|
|
273
|
+
factor does not produce a significant difference in the "${values.name}" feature.`;
|
|
263
274
|
const testResult = `Test result: ${(anova.fStat > fCrit) ? 'REJECTED.' : 'FAILED TO REJECT.'}`;
|
|
264
275
|
|
|
265
276
|
return getOneWayAnovaDF(anova, alpha, fCrit, hypothesis, testResult);
|