@datagrok/eda 1.1.8 → 1.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/package.ts CHANGED
@@ -1,3 +1,5 @@
1
+ /* eslint-disable camelcase */
2
+ /* eslint-disable max-len */
1
3
  /* Do not change these import lines to match external modules in webpack configuration */
2
4
  import * as grok from 'datagrok-api/grok';
3
5
  import * as ui from 'datagrok-api/ui';
@@ -6,23 +8,23 @@ import * as DG from 'datagrok-api/dg';
6
8
  import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
7
9
 
8
10
  import {_initEDAAPI} from '../wasm/EDAAPI';
9
- import {computePCA, computePLS, computeUMAP, computeTSNE, computeSPE} from './eda-tools';
10
- import {addPrefixToEachColumnName, addPLSvisualization, regressionCoefficientsBarChart,
11
+ import {computePCA, computePLS} from './eda-tools';
12
+ import {addPrefixToEachColumnName, addPLSvisualization, regressionCoefficientsBarChart,
11
13
  scoresScatterPlot, predictedVersusReferenceScatterPlot, addOneWayAnovaVizualization} from './eda-ui';
12
14
  import {carsDataframe, testDataForBinaryClassification} from './data-generators';
13
- import {LINEAR, RBF, POLYNOMIAL, SIGMOID,
15
+ import {LINEAR, RBF, POLYNOMIAL, SIGMOID,
14
16
  getTrainedModel, getPrediction, showTrainReport, getPackedModel} from './svm';
15
17
 
16
18
  import {oneWayAnova} from './stat-tools';
17
- import { getDbscanWorker } from '@datagrok-libraries/math';
19
+ import {getDbscanWorker} from '@datagrok-libraries/math';
18
20
 
19
21
  import {DistanceAggregationMethods} from '@datagrok-libraries/ml/src/distance-matrix/types';
20
22
  import {MultiColumnDimReductionEditor} from
21
23
  '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/multi-column-dim-reduction-editor';
22
24
  import {multiColReduceDimensionality} from
23
25
  '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/reduce-dimensionality';
24
- import { DimReductionMethods } from '@datagrok-libraries/ml/src/reduce-dimensionality';
25
- import { KnownMetrics } from '@datagrok-libraries/ml/src/typed-metrics';
26
+ import {KnownMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
27
+ import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/types';
26
28
 
27
29
  export const _package = new DG.Package();
28
30
 
@@ -63,8 +65,7 @@ export async function dbScan(df: DG.DataFrame, xCol: DG.Column, yCol: DG.Column,
63
65
  //input: bool scale = false [Indicating whether the variables should be scaled to have unit variance.]
64
66
  //output: dataframe result {action:join(table)}
65
67
  export async function PCA(table: DG.DataFrame, features: DG.ColumnList, components: number,
66
- center: boolean, scale: boolean): Promise<DG.DataFrame>
67
- {
68
+ center: boolean, scale: boolean): Promise<DG.DataFrame> {
68
69
  const pcaTable = await computePCA(table, features, components, center, scale);
69
70
  addPrefixToEachColumnName('PCA', pcaTable.columns);
70
71
  return pcaTable;
@@ -120,9 +121,8 @@ export async function reduceDimensionality(): Promise<void> {
120
121
  //input: column_list features {type: numerical}
121
122
  //input: column predict {type: numerical}
122
123
  //input: int components = 3
123
- export async function PLS(table: DG.DataFrame, names: DG.Column, features: DG.ColumnList,
124
- predict: DG.Column, components: number): Promise<void>
125
- {
124
+ export async function PLS(table: DG.DataFrame, names: DG.Column, features: DG.ColumnList,
125
+ predict: DG.Column, components: number): Promise<void> {
126
126
  const plsResults = await computePLS(table, features, predict, components);
127
127
  addPLSvisualization(table, names, features, predict, plsResults);
128
128
  }
@@ -131,17 +131,17 @@ export async function PLS(table: DG.DataFrame, names: DG.Column, features: DG.Co
131
131
  //description: Multidimensional data analysis using partial least squares (PLS) regression. It reduces the predictors to a smaller set of uncorrelated components and performs least squares regression on them.
132
132
  //meta.demoPath: Compute | Multivariate analysis
133
133
  //meta.isDemoScript: True
134
- export async function demoMultivariateAnalysis(): Promise<any> {
135
- const demoScript = new DemoScript('Partial least squares regression',
136
- 'Analysis of multidimensional data.');
137
-
134
+ export async function demoMultivariateAnalysis(): Promise<any> {
135
+ const demoScript = new DemoScript('Partial least squares regression',
136
+ 'Analysis of multidimensional data.');
137
+
138
138
  const cars = carsDataframe();
139
139
 
140
140
  const components = 3;
141
141
  const names = cars.columns.byName('model');
142
142
  const predict = cars.columns.byName('price');
143
143
  const features = cars.columns.remove('price').remove('model');
144
- const plsOutput = await computePLS(cars, features, predict, components);
144
+ const plsOutput = await computePLS(cars, features, predict, components);
145
145
 
146
146
  const sourceCars = carsDataframe();
147
147
  sourceCars.name = 'Cars';
@@ -154,7 +154,7 @@ export async function demoMultivariateAnalysis(): Promise<any> {
154
154
  view = grok.shell.getTableView(sourceCars.name);
155
155
  }, {description: 'Each car has many features - patterns extraction is complicated.', delay: 0})
156
156
  .step('Model', async () => {
157
- dialog = ui.dialog({title:'Multivariate Analysis (PLS)'})
157
+ dialog = ui.dialog({title: 'Multivariate Analysis (PLS)'})
158
158
  .add(ui.tableInput('Table', sourceCars))
159
159
  .add(ui.columnsInput('Features', cars, features.toList, {available: undefined, checked: features.names()}))
160
160
  .add(ui.columnInput('Names', cars, names, undefined))
@@ -165,16 +165,14 @@ export async function demoMultivariateAnalysis(): Promise<any> {
165
165
  })
166
166
  .show({x: 400, y: 140});
167
167
  }, {description: 'Predict car price by its other features.', delay: 0})
168
- .step('Regression coeffcicients', async () =>
169
- {
170
- dialog.close();
171
- view.addViewer(regressionCoefficientsBarChart(features, plsOutput[1]))},
172
- {description: 'The feature "diesel" affects the price the most.', delay: 0})
173
- .step('Scores', async () =>
174
- {view.addViewer(scoresScatterPlot(names, plsOutput[2], plsOutput[3]))},
168
+ .step('Regression coeffcicients', async () => {
169
+ dialog.close();
170
+ view.addViewer(regressionCoefficientsBarChart(features, plsOutput[1]));
171
+ },
172
+ {description: 'The feature "diesel" affects the price the most.', delay: 0})
173
+ .step('Scores', async () => {view.addViewer(scoresScatterPlot(names, plsOutput[2], plsOutput[3]));},
175
174
  {description: 'Similarities & dissimilarities: alfaromeo and mercedes are different.', delay: 0})
176
- .step('Prediction', async () =>
177
- {view.addViewer(predictedVersusReferenceScatterPlot(names, predict, plsOutput[0]))},
175
+ .step('Prediction', async () => {view.addViewer(predictedVersusReferenceScatterPlot(names, predict, plsOutput[0]));},
178
176
  {description: 'Closer to the line means better price prediction.', delay: 0})
179
177
  .start();
180
178
  }
@@ -188,9 +186,8 @@ export async function demoMultivariateAnalysis(): Promise<any> {
188
186
  //input: double max = 173 {caption: max; category: Range}
189
187
  //input: double violatorsPercentage = 5 {caption: violators; units: %; category: Dataset}
190
188
  //output: dataframe df
191
- export async function testDataLinearSeparable(name: string, samplesCount: number, featuresCount: number,
192
- min: number, max: number, violatorsPercentage: number): Promise<DG.DataFrame>
193
- {
189
+ export async function testDataLinearSeparable(name: string, samplesCount: number, featuresCount: number,
190
+ min: number, max: number, violatorsPercentage: number): Promise<DG.DataFrame> {
194
191
  return await testDataForBinaryClassification(LINEAR, [0, 0], name, samplesCount, featuresCount,
195
192
  min, max, violatorsPercentage);
196
193
  }
@@ -205,9 +202,8 @@ export async function testDataLinearSeparable(name: string, samplesCount: number
205
202
  //input: double max = 173 {caption: max; category: Range}
206
203
  //input: double violatorsPercentage = 5 {caption: violators; units: %; category: Dataset}
207
204
  //output: dataframe df
208
- export async function testDataLinearNonSeparable(name: string, sigma: number, samplesCount: number,
209
- featuresCount: number, min: number, max: number, violatorsPercentage: number): Promise<DG.DataFrame>
210
- {
205
+ export async function testDataLinearNonSeparable(name: string, sigma: number, samplesCount: number,
206
+ featuresCount: number, min: number, max: number, violatorsPercentage: number): Promise<DG.DataFrame> {
211
207
  return await testDataForBinaryClassification(RBF, [sigma, 0], name, samplesCount, featuresCount,
212
208
  min, max, violatorsPercentage);
213
209
  }
@@ -220,10 +216,9 @@ export async function testDataLinearNonSeparable(name: string, sigma: number, sa
220
216
  //input: double gamma = 1.0 {category: Hyperparameters}
221
217
  //input: bool toShowReport = false {caption: to show report; category: Report}
222
218
  //output: dynamic model
223
- export async function trainLinearKernelSVM(df: DG.DataFrame, predict_column: string,
224
- gamma: number, toShowReport: boolean): Promise<any>
225
- {
226
- const trainedModel = await getTrainedModel({gamma: gamma, kernel: LINEAR}, df, predict_column);
219
+ export async function trainLinearKernelSVM(df: DG.DataFrame, predict_column: string,
220
+ gamma: number, toShowReport: boolean): Promise<any> {
221
+ const trainedModel = await getTrainedModel({gamma: gamma, kernel: LINEAR}, df, predict_column);
227
222
 
228
223
  if (toShowReport)
229
224
  showTrainReport(df, trainedModel);
@@ -237,8 +232,8 @@ export async function trainLinearKernelSVM(df: DG.DataFrame, predict_column: str
237
232
  //input: dataframe df
238
233
  //input: dynamic model
239
234
  //output: dataframe table
240
- export async function applyLinearKernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
241
- return await getPrediction(df, model);
235
+ export async function applyLinearKernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
236
+ return await getPrediction(df, model);
242
237
  }
243
238
 
244
239
  //name: trainRBFkernelSVM
@@ -250,12 +245,11 @@ export async function applyLinearKernelSVM(df: DG.DataFrame, model: any): Promis
250
245
  //input: double sigma = 1.5 {category: Hyperparameters}
251
246
  //input: bool toShowReport = false {caption: to show report; category: Report}
252
247
  //output: dynamic model
253
- export async function trainRBFkernelSVM(df: DG.DataFrame, predict_column: string,
254
- gamma: number, sigma: number, toShowReport: boolean): Promise<any>
255
- {
248
+ export async function trainRBFkernelSVM(df: DG.DataFrame, predict_column: string,
249
+ gamma: number, sigma: number, toShowReport: boolean): Promise<any> {
256
250
  const trainedModel = await getTrainedModel(
257
- {gamma: gamma, kernel: RBF, sigma: sigma},
258
- df, predict_column);
251
+ {gamma: gamma, kernel: RBF, sigma: sigma},
252
+ df, predict_column);
259
253
 
260
254
  if (toShowReport)
261
255
  showTrainReport(df, trainedModel);
@@ -269,9 +263,9 @@ export async function trainRBFkernelSVM(df: DG.DataFrame, predict_column: string
269
263
  //input: dataframe df
270
264
  //input: dynamic model
271
265
  //output: dataframe table
272
- export async function applyRBFkernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
273
- return await getPrediction(df, model);
274
- }
266
+ export async function applyRBFkernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
267
+ return await getPrediction(df, model);
268
+ }
275
269
 
276
270
  //name: trainPolynomialKernelSVM
277
271
  //meta.mlname: polynomial kernel LS-SVM
@@ -283,12 +277,11 @@ export async function applyRBFkernelSVM(df: DG.DataFrame, model: any): Promise<D
283
277
  //input: double d = 2 {category: Hyperparameters}
284
278
  //input: bool toShowReport = false {caption: to show report; category: Report}
285
279
  //output: dynamic model
286
- export async function trainPolynomialKernelSVM(df: DG.DataFrame, predict_column: string,
287
- gamma: number, c: number, d: number, toShowReport: boolean): Promise<any>
288
- {
280
+ export async function trainPolynomialKernelSVM(df: DG.DataFrame, predict_column: string,
281
+ gamma: number, c: number, d: number, toShowReport: boolean): Promise<any> {
289
282
  const trainedModel = await getTrainedModel(
290
- {gamma: gamma, kernel: POLYNOMIAL, cParam: c, dParam: d},
291
- df, predict_column);
283
+ {gamma: gamma, kernel: POLYNOMIAL, cParam: c, dParam: d},
284
+ df, predict_column);
292
285
 
293
286
  if (toShowReport)
294
287
  showTrainReport(df, trainedModel);
@@ -302,8 +295,8 @@ export async function trainPolynomialKernelSVM(df: DG.DataFrame, predict_column:
302
295
  //input: dataframe df
303
296
  //input: dynamic model
304
297
  //output: dataframe table
305
- export async function applyPolynomialKernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
306
- return await getPrediction(df, model);
298
+ export async function applyPolynomialKernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
299
+ return await getPrediction(df, model);
307
300
  }
308
301
 
309
302
  //name: trainSigmoidKernelSVM
@@ -316,12 +309,11 @@ export async function applyPolynomialKernelSVM(df: DG.DataFrame, model: any): Pr
316
309
  //input: double theta = 1 {category: Hyperparameters}
317
310
  //input: bool toShowReport = false {caption: to show report; category: Report}
318
311
  //output: dynamic model
319
- export async function trainSigmoidKernelSVM(df: DG.DataFrame, predict_column: string,
320
- gamma: number, kappa: number, theta: number, toShowReport: boolean): Promise<any>
321
- {
312
+ export async function trainSigmoidKernelSVM(df: DG.DataFrame, predict_column: string,
313
+ gamma: number, kappa: number, theta: number, toShowReport: boolean): Promise<any> {
322
314
  const trainedModel = await getTrainedModel(
323
- {gamma: gamma, kernel: SIGMOID, kappa: kappa, theta: theta},
324
- df, predict_column);
315
+ {gamma: gamma, kernel: SIGMOID, kappa: kappa, theta: theta},
316
+ df, predict_column);
325
317
 
326
318
  if (toShowReport)
327
319
  showTrainReport(df, trainedModel);
@@ -335,8 +327,8 @@ export async function trainSigmoidKernelSVM(df: DG.DataFrame, predict_column: st
335
327
  //input: dataframe df
336
328
  //input: dynamic model
337
329
  //output: dataframe table
338
- export async function applySigmoidKernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
339
- return await getPrediction(df, model);
330
+ export async function applySigmoidKernelSVM(df: DG.DataFrame, model: any): Promise<DG.DataFrame> {
331
+ return await getPrediction(df, model);
340
332
  }
341
333
 
342
334
  //top-menu: ML | Analyze | ANOVA...
@@ -349,5 +341,5 @@ export async function applySigmoidKernelSVM(df: DG.DataFrame, model: any): Promi
349
341
  //input: bool validate = false [Indicates whether the normality of distribution and an eqaulity of varainces should be checked.]
350
342
  export function anova(table: DG.DataFrame, factor: DG.Column, feature: DG.Column, significance: number, validate: boolean) {
351
343
  const res = oneWayAnova(factor, feature, significance, validate);
352
- addOneWayAnovaVizualization(table, factor, feature, res);
344
+ addOneWayAnovaVizualization(table, factor, feature, res);
353
345
  }
package/src/stat-tools.ts CHANGED
@@ -1,11 +1,11 @@
1
1
  // Statistic tools
2
2
 
3
3
  /* REFERENCES
4
-
4
+
5
5
  [1] One-way analysis of variance, https://en.wikipedia.org/wiki/One-way_analysis_of_variance
6
6
 
7
7
  [2] G.W. Heiman. Basic Statistics for the Behavioral Sciences, 6th ed. Wadsworth Publishing, 2010
8
-
8
+
9
9
  [3] F-test of equality of variances, https://en.wikipedia.org/wiki/F-test_of_equality_of_variances
10
10
 
11
11
  [4] S. McKillup. Statistics Explained, Cambridge University Press, 2005
@@ -40,9 +40,9 @@ type SampleData = {
40
40
  type OneWayAnova = {
41
41
  /** sum of squares between groups, SSbn */
42
42
  ssBn: number,
43
- /** sum of squares within groups, SSnn */
43
+ /** sum of squares within groups, SSnn */
44
44
  ssWn: number,
45
- /** total sum of squares, SStot */
45
+ /** total sum of squares, SStot */
46
46
  ssTot: number,
47
47
  /** degrees of freedom between groups, DFbn */
48
48
  dfBn: number,
@@ -67,15 +67,21 @@ type CatCol = DG.Column<DG.COLUMN_TYPE.STRING>;
67
67
  type NumCol = DG.Column<DG.COLUMN_TYPE.FLOAT> | DG.Column<DG.COLUMN_TYPE.INT>;
68
68
 
69
69
  /** Create dataframe with one-way ANOVA results. */
70
- export function getOneWayAnovaDF(anova: OneWayAnova, alpha: number, fCritical: number, hypothesis: string, testResult: string): DG.DataFrame {
70
+ export function getOneWayAnovaDF(
71
+ anova: OneWayAnova, alpha: number, fCritical: number, hypothesis: string, testResult: string,
72
+ ): DG.DataFrame {
71
73
  return DG.DataFrame.fromColumns([
72
- DG.Column.fromStrings('Source of variance', ['Between groups', 'Within groups', 'Total', '', hypothesis, '', testResult]),
73
- DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'Sum of squares', [anova.ssBn, anova.ssWn, anova.ssTot, null, null, null, null]),
74
- DG.Column.fromList(DG.COLUMN_TYPE.INT, 'Degrees of freedom', [anova.dfBn, anova.dfWn, anova.dfTot, null, null, null, null]),
74
+ DG.Column.fromStrings('Source of variance',
75
+ ['Between groups', 'Within groups', 'Total', '', hypothesis, '', testResult]),
76
+ DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'Sum of squares',
77
+ [anova.ssBn, anova.ssWn, anova.ssTot, null, null, null, null]),
78
+ DG.Column.fromList(DG.COLUMN_TYPE.INT, 'Degrees of freedom',
79
+ [anova.dfBn, anova.dfWn, anova.dfTot, null, null, null, null]),
75
80
  DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'Mean square', [anova.msBn, anova.msWn, null, null, null, null, null]),
76
81
  DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'F-statistics', [anova.fStat, null, null, null, null, null, null]),
77
82
  DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'p-value', [anova.pValue, null, null, null, null, null, null]),
78
- DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, `${alpha}-critical value`, [fCritical, null, null, null, null, null, null]),
83
+ DG.Column.fromList(DG.COLUMN_TYPE.FLOAT,
84
+ `${alpha}-critical value`, [fCritical, null, null, null, null, null, null]),
79
85
  ]);
80
86
  } // getOneWayAnovaDF
81
87
 
@@ -95,7 +101,7 @@ export function getVariance(data: SampleData): number {
95
101
 
96
102
  if (size === 1)
97
103
  return 0;
98
-
104
+
99
105
  return (data.sumOfSquares - (data.sum) ** 2 / size) / (size - 1);
100
106
  } // getVariance
101
107
 
@@ -103,7 +109,7 @@ export function getVariance(data: SampleData): number {
103
109
  function areVarsEqual(xData: SampleData, yData: SampleData, alpha: number = 0.05): boolean {
104
110
  // The applied approach can be found in [3]
105
111
  checkSignificanceLevel(alpha);
106
-
112
+
107
113
  const xVar = getVariance(xData);
108
114
  const yVar = getVariance(yData);
109
115
 
@@ -118,7 +124,7 @@ function areVarsEqual(xData: SampleData, yData: SampleData, alpha: number = 0.05
118
124
 
119
125
  export class FactorizedData {
120
126
  private isNormDistrib: boolean | undefined = undefined;
121
- private categories: string[] = [];
127
+ private categories: string[] = [];
122
128
  private sums!: Float64Array;
123
129
  private sumsOfSquares!: Float64Array;
124
130
  private subSampleSizes!: Int32Array;
@@ -130,7 +136,7 @@ export class FactorizedData {
130
136
  throw new Error();
131
137
 
132
138
  if (categories.length !== values.length)
133
- throw new Error(ERROR_MSG.NON_EQUAL_FACTORS_VALUES_SIZE);
139
+ throw new Error(ERROR_MSG.NON_EQUAL_FACTORS_VALUES_SIZE);
134
140
 
135
141
  this.setStats(categories, values, checkNormality, alpha);
136
142
  }
@@ -148,11 +154,13 @@ export class FactorizedData {
148
154
 
149
155
  const first: SampleData = {sum: this.sums[0], sumOfSquares: this.sumsOfSquares[0], size: this.subSampleSizes[0]};
150
156
 
151
- for (let i = 1; i < K; ++i)
152
- if(!areVarsEqual(first, {sum: this.sums[i], sumOfSquares: this.sumsOfSquares[i], size: this.subSampleSizes[i]}, alpha))
157
+ for (let i = 1; i < K; ++i) {
158
+ if (!areVarsEqual(first, {sum: this.sums[i], sumOfSquares: this.sumsOfSquares[i],
159
+ size: this.subSampleSizes[i]}, alpha))
153
160
  return false;
161
+ }
154
162
 
155
- return true;
163
+ return true;
156
164
  } // areVarsEqual
157
165
 
158
166
  /** Perform one-way ANOVA computations. */
@@ -163,18 +171,18 @@ export class FactorizedData {
163
171
 
164
172
  if (K === 1)
165
173
  throw new Error(ERROR_MSG.ANOVA_FAILED_JUST_ONE_CAT);
166
-
174
+
167
175
  let sum = 0;
168
176
  let sumOfSquares = 0;
169
- let N = this.size;
177
+ const N = this.size;
170
178
  let buf = 0;
171
179
 
172
180
  for (let i = 0; i < K; ++i) {
173
181
  sum += this.sums[i];
174
182
  sumOfSquares += this.sumsOfSquares[i];
175
- buf += this.sums[i] ** 2 / this.subSampleSizes[i];
183
+ buf += this.sums[i] ** 2 / this.subSampleSizes[i];
176
184
  }
177
-
185
+
178
186
  const ssTot = sumOfSquares - sum ** 2 / N;
179
187
  const ssBn = buf - sum ** 2 / N;
180
188
  const ssWn = ssTot - ssBn;
@@ -182,12 +190,12 @@ export class FactorizedData {
182
190
  const dfBn = K - 1;
183
191
  const dfWn = N - K;
184
192
  const dfTot = N - 1;
185
-
193
+
186
194
  const msBn = ssBn / dfBn;
187
195
  const msWn = ssWn / dfWn;
188
196
 
189
197
  const fStat = msBn / msWn;
190
-
198
+
191
199
  return {
192
200
  ssBn: ssBn,
193
201
  ssWn: ssWn,
@@ -197,61 +205,63 @@ export class FactorizedData {
197
205
  dfTot: dfTot,
198
206
  msBn: msBn,
199
207
  msWn: msWn,
200
- fStat: fStat,
201
- pValue: 1 - jStat.centralF.cdf(fStat, dfBn, dfWn)
208
+ fStat: fStat,
209
+ pValue: 1 - jStat.centralF.cdf(fStat, dfBn, dfWn),
202
210
  };
203
211
  } // getOneWayAnova
204
212
 
205
213
  /** Compute sum & sums of squares with respect to factor levels. */
206
- private setStats(categories: CatCol, values: NumCol, checkNormality: boolean = false, alpha: number = 0.05): void {
207
- // TODO: provide check normality feature
214
+ private setStats(categories: CatCol, values: NumCol, _checkNormality: boolean = false, _alpha: number = 0.05): void {
215
+ // TODO: provide check normality feature
208
216
  const type = values.type;
209
217
  const size = values.length;
210
218
 
211
219
  switch (type) {
212
- case DG.COLUMN_TYPE.INT:
213
- case DG.COLUMN_TYPE.FLOAT:
214
- this.categories = categories.categories;
215
- const catCount = this.categories.length;
216
- this.catCount = catCount;
217
- this.size = size;
218
-
219
- const vals = values.getRawData();
220
- const cats = categories.getRawData();
221
-
222
- const sums = new Float64Array(catCount).fill(0);
223
- const sumsOfSquares = new Float64Array(catCount).fill(0);
224
- const subSampleSizes = new Int32Array(catCount).fill(0);
225
-
226
- for (let i = 0; i < size; ++i) {
227
- const c = cats[i];
228
- sums[c] += vals[i];
229
- sumsOfSquares[c] += vals[i] ** 2;
230
- ++subSampleSizes[c];
231
- }
232
-
233
- this.sums = sums;
234
- this.sumsOfSquares = sumsOfSquares;
235
- this.subSampleSizes = subSampleSizes;
236
-
237
- break;
238
-
239
- default:
240
- throw new Error(ERROR_MSG.UNSUPPORTED_COLUMN_TYPE);
241
- }
220
+ case DG.COLUMN_TYPE.INT:
221
+ case DG.COLUMN_TYPE.FLOAT:
222
+ this.categories = categories.categories;
223
+ const catCount = this.categories.length;
224
+ this.catCount = catCount;
225
+ this.size = size;
226
+
227
+ const vals = values.getRawData();
228
+ const cats = categories.getRawData();
229
+
230
+ const sums = new Float64Array(catCount).fill(0);
231
+ const sumsOfSquares = new Float64Array(catCount).fill(0);
232
+ const subSampleSizes = new Int32Array(catCount).fill(0);
233
+
234
+ for (let i = 0; i < size; ++i) {
235
+ const c = cats[i];
236
+ sums[c] += vals[i];
237
+ sumsOfSquares[c] += vals[i] ** 2;
238
+ ++subSampleSizes[c];
239
+ }
240
+
241
+ this.sums = sums;
242
+ this.sumsOfSquares = sumsOfSquares;
243
+ this.subSampleSizes = subSampleSizes;
244
+
245
+ break;
246
+
247
+ default:
248
+ throw new Error(ERROR_MSG.UNSUPPORTED_COLUMN_TYPE);
249
+ }
242
250
  } // setStats
243
251
  } // FactorizedData
244
252
 
245
253
  /** Perform one-way analysis of variances. */
246
- export function oneWayAnova(categores: CatCol, values: NumCol, alpha: number = 0.05, validate: boolean = false): DG.DataFrame {
247
- checkSignificanceLevel(alpha);
254
+ export function oneWayAnova(
255
+ categores: CatCol, values: NumCol, alpha: number = 0.05, validate: boolean = false,
256
+ ): DG.DataFrame {
257
+ checkSignificanceLevel(alpha);
248
258
 
249
259
  const factorized = new FactorizedData(categores, values, validate, alpha);
250
260
 
251
261
  if (validate) {
252
- if(!factorized.areVarsEqual(alpha))
262
+ if (!factorized.areVarsEqual(alpha))
253
263
  throw new Error(ERROR_MSG.NON_EQUAL_VARIANCES);
254
-
264
+
255
265
  if (!factorized.isNormal())
256
266
  throw new Error(ERROR_MSG.NON_NORMAL_DISTRIB);
257
267
  }
@@ -259,7 +269,8 @@ export function oneWayAnova(categores: CatCol, values: NumCol, alpha: number = 0
259
269
  const anova = factorized.getOneWayAnova();
260
270
  const fCrit = jStat.centralF.inv(1 - alpha, anova.dfBn, anova.dfWn);
261
271
 
262
- const hypothesis = `THE NULL HYPOTHESIS: the "${categores.name}" factor does not produce a significant difference in the "${values.name}" feature.`;
272
+ const hypothesis = `THE NULL HYPOTHESIS: the "${categores.name}"
273
+ factor does not produce a significant difference in the "${values.name}" feature.`;
263
274
  const testResult = `Test result: ${(anova.fStat > fCrit) ? 'REJECTED.' : 'FAILED TO REJECT.'}`;
264
275
 
265
276
  return getOneWayAnovaDF(anova, alpha, fCrit, hypothesis, testResult);