@datagrok/eda 1.2.2 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/CHANGELOG.md +15 -4
  2. package/dist/111.js +1 -1
  3. package/dist/111.js.map +1 -1
  4. package/dist/128.js +2 -0
  5. package/dist/128.js.map +1 -0
  6. package/dist/153.js +1 -1
  7. package/dist/153.js.map +1 -1
  8. package/dist/23.js +1 -1
  9. package/dist/23.js.map +1 -1
  10. package/dist/234.js +1 -1
  11. package/dist/234.js.map +1 -1
  12. package/dist/260.js +1 -1
  13. package/dist/260.js.map +1 -1
  14. package/dist/348.js +1 -1
  15. package/dist/348.js.map +1 -1
  16. package/dist/377.js +1 -1
  17. package/dist/377.js.map +1 -1
  18. package/dist/{12a82b8001995d426ed2.wasm → 3cec7d4ab7dacdcb37e6.wasm} +0 -0
  19. package/dist/412.js +1 -1
  20. package/dist/412.js.map +1 -1
  21. package/dist/531.js +1 -1
  22. package/dist/531.js.map +1 -1
  23. package/dist/583.js +1 -1
  24. package/dist/583.js.map +1 -1
  25. package/dist/603.js +1 -1
  26. package/dist/603.js.map +1 -1
  27. package/dist/656.js +1 -1
  28. package/dist/656.js.map +1 -1
  29. package/dist/682.js +1 -1
  30. package/dist/682.js.map +1 -1
  31. package/dist/705.js +1 -1
  32. package/dist/705.js.map +1 -1
  33. package/dist/727.js +1 -1
  34. package/dist/727.js.map +1 -1
  35. package/dist/763.js +1 -1
  36. package/dist/763.js.map +1 -1
  37. package/dist/778.js +1 -1
  38. package/dist/778.js.map +1 -1
  39. package/dist/783.js +1 -1
  40. package/dist/783.js.map +1 -1
  41. package/dist/793.js +1 -1
  42. package/dist/793.js.map +1 -1
  43. package/dist/860.js +2 -0
  44. package/dist/860.js.map +1 -0
  45. package/dist/950.js +1 -1
  46. package/dist/950.js.map +1 -1
  47. package/dist/980.js +1 -1
  48. package/dist/980.js.map +1 -1
  49. package/dist/990.js +1 -1
  50. package/dist/990.js.map +1 -1
  51. package/dist/package-test.js +1 -1
  52. package/dist/package-test.js.map +1 -1
  53. package/dist/package.js +1 -1
  54. package/dist/package.js.map +1 -1
  55. package/package.json +1 -95
  56. package/scripts/command.txt +1 -1
  57. package/scripts/func.json +1 -664
  58. package/src/anova/anova-tools.ts +0 -4
  59. package/src/eda-tools.ts +52 -17
  60. package/src/eda-ui.ts +0 -114
  61. package/src/global.d.ts +13 -0
  62. package/src/missing-values-imputation/ui-constants.ts +3 -1
  63. package/src/missing-values-imputation/ui.ts +7 -7
  64. package/src/package.ts +21 -17
  65. package/src/pls/pls-constants.ts +7 -7
  66. package/src/pls/pls-ml.ts +2 -1
  67. package/src/pls/pls-tools.ts +8 -3
  68. package/src/tests/anova-tests.ts +1 -1
  69. package/src/tests/linear-methods-tests.ts +6 -1
  70. package/src/utils.ts +90 -0
  71. package/wasm/EDA.js +28 -1
  72. package/wasm/EDA.wasm +0 -0
  73. package/wasm/EDAAPI.js +22 -4
  74. package/wasm/EDAForWebWorker.js +1 -1
  75. package/wasm/PCA/PCA.cpp +49 -58
  76. package/wasm/PCA/PCA.h +19 -0
  77. package/wasm/pcaExport.cpp +25 -1
  78. package/wasm/workers/{principalComponentAnalysisWorker.js → principalComponentAnalysisNipalsWorker.js} +1 -1
  79. package/wasm/workers/principalComponentAnalysisWorkerUpd.js +16 -0
  80. package/dist/91.js +0 -2
  81. package/dist/91.js.map +0 -1
@@ -273,10 +273,6 @@ export class FactorizedData {
273
273
  this.sumsOfSquares = sumsOfSquares;
274
274
  this.subSampleSizes = subSampleSizes;
275
275
 
276
- console.log(sums);
277
- console.log(sumsOfSquares);
278
- console.log(subSampleSizes);
279
-
280
276
  break;
281
277
 
282
278
  default:
package/src/eda-tools.ts CHANGED
@@ -4,32 +4,67 @@ import * as grok from 'datagrok-api/grok';
4
4
  import * as ui from 'datagrok-api/ui';
5
5
  import * as DG from 'datagrok-api/dg';
6
6
 
7
- import {_principalComponentAnalysisInWebWorker,
8
- _partialLeastSquareRegressionInWebWorker} from '../wasm/EDAAPI';
7
+ import {_principalComponentAnalysisInWebWorker, _principalComponentAnalysis,
8
+ _partialLeastSquareRegressionInWebWorker,
9
+ _principalComponentAnalysisNipals, _principalComponentAnalysisNipalsInWebWorker,
10
+ } from '../wasm/EDAAPI';
9
11
 
10
- import {checkWasmDimensionReducerInputs, checkUMAPinputs, checkTSNEinputs,
11
- getRowsOfNumericalColumnns} from './utils';
12
+ import {checkWasmDimensionReducerInputs, checkUMAPinputs, checkTSNEinputs, NIPALS_PREFER_COLS_COUNT,
13
+ getRowsOfNumericalColumnns, centerScaleDataFrame, extractNonConstantColsDf} from './utils';
12
14
 
13
15
  // Principal components analysis (PCA)
14
16
  export async function computePCA(table: DG.DataFrame, features: DG.ColumnList, components: number,
15
- center: boolean, scale: boolean): Promise<DG.DataFrame> {
17
+ toCenter: boolean, toScale: boolean): Promise<DG.DataFrame> {
16
18
  checkWasmDimensionReducerInputs(features, components);
17
19
 
18
- const centerNum = center ? 1 : 0;
19
- const scaleNum = scale ? 1 : 0;
20
+ const rowCount = table.rowCount;
20
21
 
21
- return await _principalComponentAnalysisInWebWorker(table, features, components, centerNum, scaleNum);
22
- }
22
+ // Extract non-const cols dataframe
23
+ const nonConstData = extractNonConstantColsDf(features);
24
+ const nonConstColsCount = nonConstData.columns.length;
23
25
 
24
- // Partial least square regression (PLS): TO REMOVE
25
- export async function computePLS(
26
- table: DG.DataFrame, features: DG.ColumnList, predict: DG.Column, components: number,
27
- ): Promise<any> {
28
- // Inputs are checked in the same manner as in PCA, since the same computations are applied.
29
- checkWasmDimensionReducerInputs(features, components);
26
+ // Return zero columns if data is constant
27
+ if (nonConstColsCount === 0) {
28
+ const cols: DG.Column[] = [];
29
+
30
+ for (let i = 0; i < components; ++i)
31
+ cols.push(DG.Column.fromFloat32Array(`${i + 1}`, new Float32Array(rowCount).fill(0)));
32
+
33
+ return DG.DataFrame.fromColumns(cols);
34
+ }
35
+
36
+ const zeroColsToAdd = (nonConstColsCount < components) ? (components - nonConstColsCount) : 0;
37
+ const componentsToCompute = Math.min(components, nonConstColsCount);
38
+
39
+ let output: DG.DataFrame | undefined = undefined;
40
+
41
+ // PCA
42
+ if (nonConstColsCount > NIPALS_PREFER_COLS_COUNT)
43
+ output = await _principalComponentAnalysisNipalsInWebWorker(table, features, componentsToCompute);
44
+ else {
45
+ //try to apply the classic algorithm
46
+ const res = await _principalComponentAnalysisInWebWorker(table, features, componentsToCompute);
47
+
48
+ if (res !== -1) // the classic succeed
49
+ output = centerScaleDataFrame(res, toCenter, toScale);
50
+ else // the classic failed
51
+ output = await _principalComponentAnalysisNipalsInWebWorker(table, features, componentsToCompute);
52
+ }
53
+
54
+ if (output === undefined)
55
+ throw new Error('Failed to compute PCA');
56
+
57
+ output = centerScaleDataFrame(output, toCenter, toScale);
58
+
59
+ const cols = output.columns;
60
+ const count = cols.length;
61
+
62
+ // Add zero columns (with respect to the const cols count)
63
+ for (let i = 0; i < zeroColsToAdd; ++i)
64
+ cols.add(DG.Column.fromFloat32Array(`${count + i + 1}`, new Float32Array(rowCount).fill(0)));
30
65
 
31
- return await _partialLeastSquareRegressionInWebWorker(table, features, predict, components);
32
- }
66
+ return output;
67
+ } // computePCA
33
68
 
34
69
  // Uniform Manifold Approximation and Projection (UMAP)
35
70
  export async function computeUMAP(features: DG.ColumnList, components: number, epochs: number,
package/src/eda-ui.ts CHANGED
@@ -17,117 +17,3 @@ export function addPrefixToEachColumnName(prefix: string, columns: DG.ColumnList
17
17
  for (const col of columns.toList())
18
18
  col.name = prefix + col.name;
19
19
  }
20
-
21
- // Predicted vs Reference scatter plot
22
- export function predictedVersusReferenceScatterPlot(
23
- samplesNames: DG.Column, reference: DG.Column, prediction: DG.Column,
24
- ): DG.Viewer {
25
- prediction.name = reference.name + '(predicted)';
26
-
27
- const dfReferencePrediction = DG.DataFrame.fromColumns([samplesNames, reference, prediction]);
28
- dfReferencePrediction.name = 'Reference vs. Predicted';
29
-
30
- return DG.Viewer.scatterPlot(dfReferencePrediction,
31
- {title: dfReferencePrediction.name,
32
- x: reference.name,
33
- y: prediction.name,
34
- showRegressionLine: true,
35
- markerType: 'circle',
36
- labels: samplesNames.name,
37
- });
38
- }
39
-
40
- // Regression Coefficients Bar Chart
41
- export function regressionCoefficientsBarChart(features: DG.ColumnList, regressionCoeffs: DG.Column): DG.Viewer {
42
- regressionCoeffs.name = 'regression coefficient';
43
-
44
- const namesOfPredictors = [];
45
- for (const col of features)
46
- namesOfPredictors.push(col.name);
47
-
48
- const predictorNamesColumn = DG.Column.fromStrings('feature', namesOfPredictors);
49
-
50
- const dfRegrCoefs = DG.DataFrame.fromColumns([predictorNamesColumn, regressionCoeffs]);
51
- dfRegrCoefs.name = 'Regression Coefficients';
52
-
53
- return DG.Viewer.barChart(dfRegrCoefs,
54
- {title: dfRegrCoefs.name, split: 'feature',
55
- value: 'regression coefficient', valueAggrType: 'avg'});
56
- }
57
-
58
- // Scores Scatter Plot
59
- export function scoresScatterPlot(
60
- samplesNames: DG.Column, xScores: Array<DG.Column>, yScores: Array<DG.Column>,
61
- ): DG.Viewer {
62
- const scoresColumns = [samplesNames];
63
-
64
- for (let i = 0; i < xScores.length; i++) {
65
- xScores[i].name = `x.score.t${i+1}`;
66
- scoresColumns.push(xScores[i]);
67
- }
68
-
69
- for (let i = 0; i < yScores.length; i++) {
70
- yScores[i].name = `y.score.u${i+1}`;
71
- scoresColumns.push(yScores[i]);
72
- }
73
-
74
- const scores = DG.DataFrame.fromColumns(scoresColumns);
75
- scores.name = 'Scores';
76
- //grok.shell.addTableView(scores);
77
-
78
- const index = xScores.length > 1 ? 1 : 0;
79
-
80
- return DG.Viewer.scatterPlot(scores,
81
- {title: scores.name,
82
- x: xScores[0].name,
83
- y: xScores[index].name,
84
- markerType: 'circle',
85
- labels: samplesNames.name,
86
- });
87
- }
88
-
89
- // Loading Scatter Plot
90
- export function loadingScatterPlot(features: DG.ColumnList, xLoadings: Array<DG.Column>): DG.Viewer {
91
- const loadingCols = [];
92
-
93
- const loadingLabels = [];
94
- for (const col of features)
95
- loadingLabels.push(col.name);
96
-
97
- loadingCols.push(DG.Column.fromStrings('labels', loadingLabels));
98
-
99
- for (let i = 0; i < xLoadings.length; i++) {
100
- xLoadings[i].name = `x.loading.p${i+1}`;
101
- loadingCols.push(xLoadings[i]);
102
- }
103
-
104
- const dfLoadings = DG.DataFrame.fromColumns(loadingCols);
105
- dfLoadings.name = 'Loadings';
106
-
107
- return DG.Viewer.scatterPlot(dfLoadings,
108
- {title: dfLoadings.name,
109
- x: xLoadings[0].name,
110
- y: xLoadings[xLoadings.length - 1].name,
111
- markerType: 'circle',
112
- labels: 'labels',
113
- });
114
- }
115
-
116
- // Add PLS visualization
117
- export function addPLSvisualization(
118
- table: DG.DataFrame, samplesNames: DG.Column, features: DG.ColumnList, predict: DG.Column, plsOutput: any,
119
- ): void {
120
- const view = (table.id !== null) ? grok.shell.getTableView(table.name) : grok.shell.addTableView(table);
121
-
122
- // 1. Predicted vs Reference scatter plot
123
- view.addViewer(predictedVersusReferenceScatterPlot(samplesNames, predict, plsOutput[0]));
124
-
125
- // 2. Regression Coefficients Bar Chart
126
- view.addViewer(regressionCoefficientsBarChart(features, plsOutput[1]));
127
-
128
- // 3. Loading Scatter Plot
129
- view.addViewer(loadingScatterPlot(features, plsOutput[4]));
130
-
131
- // 4. Scores Scatter Plot
132
- view.addViewer(scoresScatterPlot(samplesNames, plsOutput[2], plsOutput[3]));
133
- }
@@ -0,0 +1,13 @@
1
+ import * as grokNamespace from 'datagrok-api/grok';
2
+ import * as uiNamespace from 'datagrok-api/ui';
3
+ import * as DGNamespace from 'datagrok-api/dg';
4
+ import * as rxjsNamespace from 'rxjs';
5
+ import $Namespace from 'cash-dom';
6
+
7
+ declare global {
8
+ const grok: typeof grokNamespace;
9
+ const ui: typeof uiNamespace;
10
+ const DG: typeof DGNamespace;
11
+ const rjxs: typeof rxjsNamespace;
12
+ const $: typeof $Namespace;
13
+ }
@@ -26,7 +26,7 @@ export const COPY_SUFFIX = 'copy';
26
26
 
27
27
  /** UI titles */
28
28
  export enum TITLE {
29
- KNN_IMPUTER = 'Impute',
29
+ KNN_IMPUTER = 'KNN Imputation',
30
30
  TABLE = 'Table',
31
31
  IN_PLACE = 'In-place',
32
32
  COLUMNS = 'Impute',
@@ -62,3 +62,5 @@ export enum HINT {
62
62
  IMPUTATION_SETTINGS = 'Simple imputation settings',
63
63
  KEEP_EMPTY = 'Defines whether to keep empty missing values failed to be imputed OR fill them using simple imputation',
64
64
  };
65
+
66
+ export const MAX_INPUT_NAME_LENGTH = 15;
@@ -2,7 +2,7 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
- import {TITLE, KNN_IMPUTER, ERROR_MSG, HINT} from './ui-constants';
5
+ import {TITLE, KNN_IMPUTER, ERROR_MSG, HINT, MAX_INPUT_NAME_LENGTH} from './ui-constants';
6
6
  import {SUPPORTED_COLUMN_TYPES, METRIC_TYPE, DISTANCE_TYPE, MetricInfo, DEFAULT, MIN_NEIGHBORS,
7
7
  impute, getMissingValsIndices, areThereFails, imputeFailed} from './knn-imputer';
8
8
 
@@ -190,7 +190,7 @@ export async function runKNNImputer(df?: DG.DataFrame): Promise<void> {
190
190
 
191
191
  // Metrics components
192
192
  const featuresMetrics = new Map<string, MetricInfo>();
193
- const metricInfoInputs = new Map<string, HTMLDivElement>();
193
+ const metricInfoInputs = new Map<string, HTMLElement>();
194
194
  const metricsDiv = ui.divV([]);
195
195
  metricsDiv.style.overflow = 'auto';
196
196
 
@@ -214,7 +214,7 @@ export async function runKNNImputer(df?: DG.DataFrame): Promise<void> {
214
214
 
215
215
  // The following should provide a slider (see th bug https://reddata.atlassian.net/browse/GROK-14431)
216
216
  const prop = DG.Property.fromOptions({
217
- 'name': name,
217
+ 'name': name.length < MAX_INPUT_NAME_LENGTH ? name : name.slice(0, MAX_INPUT_NAME_LENGTH).concat('...'),
218
218
  'inputType': 'Float',
219
219
  'min': 0,
220
220
  'max': 10,
@@ -229,11 +229,11 @@ export async function runKNNImputer(df?: DG.DataFrame): Promise<void> {
229
229
  distInfo.weight = value ?? settings.defaultWeight;
230
230
  featuresMetrics.set(name, distInfo);
231
231
  });
232
- weightInput.setTooltip(HINT.WEIGHT);
232
+ ui.tooltip.bind(weightInput.captionLabel, name);
233
+ ui.tooltip.bind(weightInput.input, HINT.WEIGHT);
233
234
 
234
- const div = ui.divH([distTypeInput.root, weightInput.root]);
235
- metricInfoInputs.set(name, div);
236
- metricsDiv.append(div);
235
+ metricInfoInputs.set(name, weightInput.root);
236
+ metricsDiv.append(weightInput.root);
237
237
  });
238
238
 
239
239
  // The main dialog
package/src/package.ts CHANGED
@@ -73,23 +73,27 @@ export async function dbScan(df: DG.DataFrame, xCol: DG.Column, yCol: DG.Column,
73
73
  //name: PCA
74
74
  //description: Principal component analysis (PCA)
75
75
  //input: dataframe table
76
- //input: column_list features {type: numerical}
77
- //input: int components = 2 {caption: Components} [Number of components.]
76
+ //input: column_list features {type: numerical; allowNulls: false}
77
+ //input: int components = 2 {caption: Components; nullable: false; min: 1} [Number of components.]
78
78
  //input: bool center = false [Indicating whether the variables should be shifted to be zero centered.]
79
79
  //input: bool scale = false [Indicating whether the variables should be scaled to have unit variance.]
80
80
  export async function PCA(table: DG.DataFrame, features: DG.ColumnList, components: number, center: boolean, scale: boolean): Promise<void> {
81
- const pcaTable = await computePCA(table, features, components, center, scale);
82
- addPrefixToEachColumnName('PC', pcaTable.columns);
83
-
84
- if (table.id === null) // table is loaded from a local file
85
- grok.shell.addTableView(pcaTable);
86
- else {
87
- const cols = table.columns;
88
-
89
- for (const col of pcaTable.columns) {
90
- col.name = cols.getUnusedName(col.name);
91
- cols.add(col);
81
+ try {
82
+ const pcaTable = await computePCA(table, features, components, center, scale);
83
+ addPrefixToEachColumnName('PC', pcaTable.columns);
84
+
85
+ if (table.id === null) // table is loaded from a local file
86
+ grok.shell.addTableView(pcaTable);
87
+ else {
88
+ const cols = table.columns;
89
+
90
+ for (const col of pcaTable.columns) {
91
+ col.name = cols.getUnusedName(col.name);
92
+ cols.add(col);
93
+ }
92
94
  }
95
+ } catch (error) {
96
+ grok.shell.warning(`Failed to compute PCA: ${error instanceof Error ? error.message : 'platform issue'}`);
93
97
  }
94
98
  }
95
99
 
@@ -304,7 +308,7 @@ export async function MVA(): Promise<void> {
304
308
 
305
309
  //name: MVA demo
306
310
  //description: Multidimensional data analysis using partial least squares (PLS) regression. It identifies latent factors and constructs a linear model based on them.
307
- //meta.demoPath: Compute | Multivariate analysis
311
+ //meta.demoPath: Compute | Multivariate Analysis
308
312
  export async function demoMultivariateAnalysis(): Promise<any> {
309
313
  await runDemoMVA();
310
314
  }
@@ -552,15 +556,15 @@ export function anova(): void {
552
556
  runOneWayAnova();
553
557
  }
554
558
 
555
- //top-menu: ML | Missing Values Imputation ...
559
+ //top-menu: ML | Impute Missing Values...
556
560
  //name: KNN impute
557
- //desription: Missing values imputation using the k-nearest neighbors method
561
+ //description: Missing values imputation using the k-nearest neighbors method (KNN)
558
562
  export function kNNImputation() {
559
563
  runKNNImputer();
560
564
  }
561
565
 
562
566
  //name: KNN imputation for a table
563
- //desription: Missing values imputation using the k-nearest neighbors method for a given table
567
+ //description: Missing values imputation using the k-nearest neighbors method
564
568
  //input: dataframe table
565
569
  export async function kNNImputationForTable(table: DG.DataFrame) {
566
570
  await runKNNImputer(table);
@@ -50,13 +50,13 @@ export enum HINT {
50
50
 
51
51
  /** Links to help */
52
52
  export enum LINK {
53
- PLS = 'https://datagrok.ai/help/explore/multivariate-analysis/pls#pls-components',
54
- MVA = 'https://datagrok.ai/help/explore/multivariate-analysis/pls',
55
- MODEL = 'https://datagrok.ai/help/explore/multivariate-analysis/plots/predicted-vs-reference',
56
- COEFFS = 'https://datagrok.ai/help/explore/multivariate-analysis/plots/regression-coefficients',
57
- LOADINGS = 'https://datagrok.ai/help/explore/multivariate-analysis/plots/loadings',
58
- EXPL_VARS = 'https://datagrok.ai/help/explore/multivariate-analysis/plots/explained-variance',
59
- SCORES = 'https://datagrok.ai/help/explore/multivariate-analysis/plots/scores',
53
+ PLS = '/help/explore/multivariate-analysis/pls#pls-components',
54
+ MVA = '/help/explore/multivariate-analysis/pls',
55
+ MODEL = '/help/explore/multivariate-analysis/plots/predicted-vs-reference',
56
+ COEFFS = '/help/explore/multivariate-analysis/plots/regression-coefficients',
57
+ LOADINGS = '/help/explore/multivariate-analysis/plots/loadings',
58
+ EXPL_VARS = '/help/explore/multivariate-analysis/plots/explained-variance',
59
+ SCORES = '/help/explore/multivariate-analysis/plots/scores',
60
60
  }
61
61
 
62
62
  /** Components consts */
package/src/pls/pls-ml.ts CHANGED
@@ -276,7 +276,8 @@ export class PlsModel {
276
276
  xColumnName: columns.byIndex(shift).name,
277
277
  yColumnName: columns.byIndex(shift + (components > 1 ? 1 : 0)).name,
278
278
  markerType: DG.MARKER_TYPE.CIRCLE,
279
- labels: TITLE.FEATURES,
279
+ //@ts-ignore
280
+ labelFormColumnNames: [TITLE.FEATURES],
280
281
  help: LINK.LOADINGS,
281
282
  }));
282
283
 
@@ -161,10 +161,13 @@ async function performMVA(input: PlsInput, analysisType: PLS_ANALYSIS): Promise<
161
161
  yColumnName: pred.name,
162
162
  showRegressionLine: true,
163
163
  markerType: DG.MARKER_TYPE.CIRCLE,
164
- labels: input.names?.name,
164
+ showLabels: 'Always',
165
165
  help: LINK.MODEL,
166
166
  }));
167
167
 
168
+ if ((input.names !== undefined) && (input.names !== null))
169
+ predictVsReferScatter.setOptions({labelFormColumnNames: [input.names?.name]});
170
+
168
171
  // 2. Regression Coefficients Bar Chart
169
172
  result.regressionCoefficients.name = TITLE.REGR_COEFS;
170
173
  const regrCoeffsBar = view.addViewer(DG.Viewer.barChart(buffer, {
@@ -184,7 +187,7 @@ async function performMVA(input: PlsInput, analysisType: PLS_ANALYSIS): Promise<
184
187
  xColumnName: `${TITLE.XLOADING}1`,
185
188
  yColumnName: `${TITLE.XLOADING}${result.xLoadings.length > 1 ? '2' : '1'}`,
186
189
  markerType: DG.MARKER_TYPE.CIRCLE,
187
- labels: TITLE.FEATURE,
190
+ labelFormColumnNames: [TITLE.FEATURE],
188
191
  help: LINK.LOADINGS,
189
192
  }));
190
193
 
@@ -204,11 +207,13 @@ async function performMVA(input: PlsInput, analysisType: PLS_ANALYSIS): Promise<
204
207
  xColumnName: plsCols[0].name,
205
208
  yColumnName: (plsCols.length > 1) ? plsCols[1].name : result.uScores[0].name,
206
209
  markerType: DG.MARKER_TYPE.CIRCLE,
207
- labels: input.names?.name,
208
210
  help: LINK.SCORES,
209
211
  showViewerFormulaLines: true,
210
212
  });
211
213
 
214
+ if ((input.names !== undefined) && (input.names !== null))
215
+ predictVsReferScatter.setOptions({labelFormColumnNames: [input.names?.name]});
216
+
212
217
  // 4.3) create lines & circles
213
218
  scoresScatter.meta.formulaLines.addAll(getLines(scoreNames));
214
219
  view.addViewer(scoresScatter);
@@ -83,5 +83,5 @@ category('ANOVA', () => {
83
83
 
84
84
  // check F-critical
85
85
  expect(eq(analysis.fCritical, EXPECTED.F_CRIT), true, 'Incorrect F-critical');
86
- }, {timeout: TIMEOUT, benchmark: true});
86
+ }, {timeout: TIMEOUT});
87
87
  });
@@ -16,7 +16,7 @@ const ROWS = 100;
16
16
  const ROWS_K = 100;
17
17
  const COLS = 100;
18
18
  const COMPONENTS = 3;
19
- const TIMEOUT = 4000;
19
+ const TIMEOUT = 9000;
20
20
  const INDEP_COLS = 2;
21
21
  const DEP_COLS = 5;
22
22
  const ERROR = 0.1;
@@ -27,6 +27,11 @@ category('Principal component analysis', () => {
27
27
  await computePCA(df, df.columns, COMPONENTS, false, false);
28
28
  }, {timeout: TIMEOUT, benchmark: true});
29
29
 
30
+ test(`Performance: 1K rows, 5K cols, ${COMPONENTS} components`, async () => {
31
+ const df = grok.data.demo.randomWalk(1000, 5000);
32
+ await computePCA(df, df.columns, COMPONENTS, false, false);
33
+ }, {timeout: TIMEOUT, benchmark: true});
34
+
30
35
  test('Correctness', async () => {
31
36
  // Data
32
37
  const df = regressionDataset(ROWS, COMPONENTS, DEP_COLS);
package/src/utils.ts CHANGED
@@ -11,6 +11,9 @@ const FEATURES_COUNT_MIN = 1;
11
11
  const PERCENTAGE_MIN = 0;
12
12
  const PERCENTAGE_MAX = 100;
13
13
  const MAX_ELEMENTS_COUNT = 100000000;
14
+ export const NIPALS_PREFER_COLS_COUNT = 900;
15
+
16
+ const TINY = 0.000001;
14
17
 
15
18
  // Error messages
16
19
  const COMP_POSITVE_MES = 'components must be positive.';
@@ -180,3 +183,90 @@ export function getRowsOfNumericalColumnns(columnList: DG.ColumnList): any[][] {
180
183
 
181
184
  return output;
182
185
  }
186
+
187
+ /** Return centered data */
188
+ function centerDf(df: DG.DataFrame): DG.DataFrame {
189
+ const rowCount = df.rowCount;
190
+
191
+ for (const col of df.columns) {
192
+ if (col.isNumerical) {
193
+ const avg = col.stats.avg;
194
+
195
+ if (Math.abs(avg) > TINY) {
196
+ const raw = col.getRawData();
197
+
198
+ for (let i = 0; i < rowCount; ++i)
199
+ raw[i] -= avg;
200
+ }
201
+ }
202
+ }
203
+ return df;
204
+ }
205
+
206
+ /** Return scaled & centered data */
207
+ function centerScaleDf(df: DG.DataFrame): DG.DataFrame {
208
+ const rowCount = df.rowCount;
209
+
210
+ for (const col of df.columns) {
211
+ if (col.isNumerical) {
212
+ const stdev = col.stats.stdev;
213
+ const avg = col.stats.avg;
214
+ const raw = col.getRawData();
215
+
216
+ if (stdev > 0) {
217
+ for (let i = 0; i < rowCount; ++i)
218
+ raw[i] = (raw[i] - avg) / stdev;
219
+ } else {
220
+ for (let i = 0; i < rowCount; ++i)
221
+ raw[i] -= avg;
222
+ }
223
+ }
224
+ }
225
+ return df;
226
+ }
227
+
228
+ /** Return scaled data */
229
+ function scaleDf(df: DG.DataFrame): DG.DataFrame {
230
+ const rowCount = df.rowCount;
231
+
232
+ for (const col of df.columns) {
233
+ if (col.isNumerical) {
234
+ const stdev = col.stats.stdev;
235
+
236
+ if (Math.abs(stdev - 1) > TINY && (stdev > 0)) {
237
+ const raw = col.getRawData();
238
+
239
+ for (let i = 0; i < rowCount; ++i)
240
+ raw[i] /= stdev;
241
+ }
242
+ }
243
+ }
244
+ return df;
245
+ }
246
+
247
+ /** Return standartized dataframe */
248
+ export function centerScaleDataFrame(df: DG.DataFrame, toCenter: boolean, toScale: boolean): DG.DataFrame {
249
+ if (toCenter) {
250
+ if (toScale)
251
+ return centerScaleDf(df);
252
+ else
253
+ return centerDf(df);
254
+ }
255
+
256
+ if (toScale)
257
+ return scaleDf(df);
258
+
259
+ return df;
260
+ }
261
+
262
+ /** Return table of columns with non-zero variance */
263
+ export function extractNonConstantColsDf(features: DG.ColumnList): DG.DataFrame {
264
+ const cols: DG.Column[]= [];
265
+
266
+ for (const col of features) {
267
+ if ((col.stats.stdev > 0) && (col.stats.missingValueCount < 1))
268
+ cols.push(col);
269
+ }
270
+
271
+ return DG.DataFrame.fromColumns(cols);
272
+ }