@datagrok/eda 1.2.2 → 1.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -4
- package/dist/111.js +1 -1
- package/dist/111.js.map +1 -1
- package/dist/128.js +2 -0
- package/dist/128.js.map +1 -0
- package/dist/153.js +1 -1
- package/dist/153.js.map +1 -1
- package/dist/23.js +1 -1
- package/dist/23.js.map +1 -1
- package/dist/234.js +1 -1
- package/dist/234.js.map +1 -1
- package/dist/260.js +1 -1
- package/dist/260.js.map +1 -1
- package/dist/348.js +1 -1
- package/dist/348.js.map +1 -1
- package/dist/377.js +1 -1
- package/dist/377.js.map +1 -1
- package/dist/{12a82b8001995d426ed2.wasm → 3cec7d4ab7dacdcb37e6.wasm} +0 -0
- package/dist/412.js +1 -1
- package/dist/412.js.map +1 -1
- package/dist/531.js +1 -1
- package/dist/531.js.map +1 -1
- package/dist/583.js +1 -1
- package/dist/583.js.map +1 -1
- package/dist/603.js +1 -1
- package/dist/603.js.map +1 -1
- package/dist/656.js +1 -1
- package/dist/656.js.map +1 -1
- package/dist/682.js +1 -1
- package/dist/682.js.map +1 -1
- package/dist/705.js +1 -1
- package/dist/705.js.map +1 -1
- package/dist/727.js +1 -1
- package/dist/727.js.map +1 -1
- package/dist/763.js +1 -1
- package/dist/763.js.map +1 -1
- package/dist/778.js +1 -1
- package/dist/778.js.map +1 -1
- package/dist/783.js +1 -1
- package/dist/783.js.map +1 -1
- package/dist/793.js +1 -1
- package/dist/793.js.map +1 -1
- package/dist/860.js +2 -0
- package/dist/860.js.map +1 -0
- package/dist/950.js +1 -1
- package/dist/950.js.map +1 -1
- package/dist/980.js +1 -1
- package/dist/980.js.map +1 -1
- package/dist/990.js +1 -1
- package/dist/990.js.map +1 -1
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +1 -95
- package/scripts/command.txt +1 -1
- package/scripts/func.json +1 -664
- package/src/anova/anova-tools.ts +0 -4
- package/src/eda-tools.ts +52 -17
- package/src/eda-ui.ts +0 -114
- package/src/global.d.ts +13 -0
- package/src/missing-values-imputation/ui-constants.ts +3 -1
- package/src/missing-values-imputation/ui.ts +7 -7
- package/src/package.ts +21 -17
- package/src/pls/pls-constants.ts +7 -7
- package/src/pls/pls-ml.ts +2 -1
- package/src/pls/pls-tools.ts +8 -3
- package/src/tests/anova-tests.ts +1 -1
- package/src/tests/linear-methods-tests.ts +6 -1
- package/src/utils.ts +90 -0
- package/wasm/EDA.js +28 -1
- package/wasm/EDA.wasm +0 -0
- package/wasm/EDAAPI.js +22 -4
- package/wasm/EDAForWebWorker.js +1 -1
- package/wasm/PCA/PCA.cpp +49 -58
- package/wasm/PCA/PCA.h +19 -0
- package/wasm/pcaExport.cpp +25 -1
- package/wasm/workers/{principalComponentAnalysisWorker.js → principalComponentAnalysisNipalsWorker.js} +1 -1
- package/wasm/workers/principalComponentAnalysisWorkerUpd.js +16 -0
- package/dist/91.js +0 -2
- package/dist/91.js.map +0 -1
package/src/anova/anova-tools.ts
CHANGED
package/src/eda-tools.ts
CHANGED
|
@@ -4,32 +4,67 @@ import * as grok from 'datagrok-api/grok';
|
|
|
4
4
|
import * as ui from 'datagrok-api/ui';
|
|
5
5
|
import * as DG from 'datagrok-api/dg';
|
|
6
6
|
|
|
7
|
-
import {_principalComponentAnalysisInWebWorker,
|
|
8
|
-
_partialLeastSquareRegressionInWebWorker
|
|
7
|
+
import {_principalComponentAnalysisInWebWorker, _principalComponentAnalysis,
|
|
8
|
+
_partialLeastSquareRegressionInWebWorker,
|
|
9
|
+
_principalComponentAnalysisNipals, _principalComponentAnalysisNipalsInWebWorker,
|
|
10
|
+
} from '../wasm/EDAAPI';
|
|
9
11
|
|
|
10
|
-
import {checkWasmDimensionReducerInputs, checkUMAPinputs, checkTSNEinputs,
|
|
11
|
-
getRowsOfNumericalColumnns} from './utils';
|
|
12
|
+
import {checkWasmDimensionReducerInputs, checkUMAPinputs, checkTSNEinputs, NIPALS_PREFER_COLS_COUNT,
|
|
13
|
+
getRowsOfNumericalColumnns, centerScaleDataFrame, extractNonConstantColsDf} from './utils';
|
|
12
14
|
|
|
13
15
|
// Principal components analysis (PCA)
|
|
14
16
|
export async function computePCA(table: DG.DataFrame, features: DG.ColumnList, components: number,
|
|
15
|
-
|
|
17
|
+
toCenter: boolean, toScale: boolean): Promise<DG.DataFrame> {
|
|
16
18
|
checkWasmDimensionReducerInputs(features, components);
|
|
17
19
|
|
|
18
|
-
const
|
|
19
|
-
const scaleNum = scale ? 1 : 0;
|
|
20
|
+
const rowCount = table.rowCount;
|
|
20
21
|
|
|
21
|
-
|
|
22
|
-
|
|
22
|
+
// Extract non-const cols dataframe
|
|
23
|
+
const nonConstData = extractNonConstantColsDf(features);
|
|
24
|
+
const nonConstColsCount = nonConstData.columns.length;
|
|
23
25
|
|
|
24
|
-
//
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
26
|
+
// Return zero columns if data is constant
|
|
27
|
+
if (nonConstColsCount === 0) {
|
|
28
|
+
const cols: DG.Column[] = [];
|
|
29
|
+
|
|
30
|
+
for (let i = 0; i < components; ++i)
|
|
31
|
+
cols.push(DG.Column.fromFloat32Array(`${i + 1}`, new Float32Array(rowCount).fill(0)));
|
|
32
|
+
|
|
33
|
+
return DG.DataFrame.fromColumns(cols);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const zeroColsToAdd = (nonConstColsCount < components) ? (components - nonConstColsCount) : 0;
|
|
37
|
+
const componentsToCompute = Math.min(components, nonConstColsCount);
|
|
38
|
+
|
|
39
|
+
let output: DG.DataFrame | undefined = undefined;
|
|
40
|
+
|
|
41
|
+
// PCA
|
|
42
|
+
if (nonConstColsCount > NIPALS_PREFER_COLS_COUNT)
|
|
43
|
+
output = await _principalComponentAnalysisNipalsInWebWorker(table, features, componentsToCompute);
|
|
44
|
+
else {
|
|
45
|
+
//try to apply the classic algorithm
|
|
46
|
+
const res = await _principalComponentAnalysisInWebWorker(table, features, componentsToCompute);
|
|
47
|
+
|
|
48
|
+
if (res !== -1) // the classic succeed
|
|
49
|
+
output = centerScaleDataFrame(res, toCenter, toScale);
|
|
50
|
+
else // the classic failed
|
|
51
|
+
output = await _principalComponentAnalysisNipalsInWebWorker(table, features, componentsToCompute);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (output === undefined)
|
|
55
|
+
throw new Error('Failed to compute PCA');
|
|
56
|
+
|
|
57
|
+
output = centerScaleDataFrame(output, toCenter, toScale);
|
|
58
|
+
|
|
59
|
+
const cols = output.columns;
|
|
60
|
+
const count = cols.length;
|
|
61
|
+
|
|
62
|
+
// Add zero columns (with respect to the const cols count)
|
|
63
|
+
for (let i = 0; i < zeroColsToAdd; ++i)
|
|
64
|
+
cols.add(DG.Column.fromFloat32Array(`${count + i + 1}`, new Float32Array(rowCount).fill(0)));
|
|
30
65
|
|
|
31
|
-
return
|
|
32
|
-
}
|
|
66
|
+
return output;
|
|
67
|
+
} // computePCA
|
|
33
68
|
|
|
34
69
|
// Uniform Manifold Approximation and Projection (UMAP)
|
|
35
70
|
export async function computeUMAP(features: DG.ColumnList, components: number, epochs: number,
|
package/src/eda-ui.ts
CHANGED
|
@@ -17,117 +17,3 @@ export function addPrefixToEachColumnName(prefix: string, columns: DG.ColumnList
|
|
|
17
17
|
for (const col of columns.toList())
|
|
18
18
|
col.name = prefix + col.name;
|
|
19
19
|
}
|
|
20
|
-
|
|
21
|
-
// Predicted vs Reference scatter plot
|
|
22
|
-
export function predictedVersusReferenceScatterPlot(
|
|
23
|
-
samplesNames: DG.Column, reference: DG.Column, prediction: DG.Column,
|
|
24
|
-
): DG.Viewer {
|
|
25
|
-
prediction.name = reference.name + '(predicted)';
|
|
26
|
-
|
|
27
|
-
const dfReferencePrediction = DG.DataFrame.fromColumns([samplesNames, reference, prediction]);
|
|
28
|
-
dfReferencePrediction.name = 'Reference vs. Predicted';
|
|
29
|
-
|
|
30
|
-
return DG.Viewer.scatterPlot(dfReferencePrediction,
|
|
31
|
-
{title: dfReferencePrediction.name,
|
|
32
|
-
x: reference.name,
|
|
33
|
-
y: prediction.name,
|
|
34
|
-
showRegressionLine: true,
|
|
35
|
-
markerType: 'circle',
|
|
36
|
-
labels: samplesNames.name,
|
|
37
|
-
});
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
// Regression Coefficients Bar Chart
|
|
41
|
-
export function regressionCoefficientsBarChart(features: DG.ColumnList, regressionCoeffs: DG.Column): DG.Viewer {
|
|
42
|
-
regressionCoeffs.name = 'regression coefficient';
|
|
43
|
-
|
|
44
|
-
const namesOfPredictors = [];
|
|
45
|
-
for (const col of features)
|
|
46
|
-
namesOfPredictors.push(col.name);
|
|
47
|
-
|
|
48
|
-
const predictorNamesColumn = DG.Column.fromStrings('feature', namesOfPredictors);
|
|
49
|
-
|
|
50
|
-
const dfRegrCoefs = DG.DataFrame.fromColumns([predictorNamesColumn, regressionCoeffs]);
|
|
51
|
-
dfRegrCoefs.name = 'Regression Coefficients';
|
|
52
|
-
|
|
53
|
-
return DG.Viewer.barChart(dfRegrCoefs,
|
|
54
|
-
{title: dfRegrCoefs.name, split: 'feature',
|
|
55
|
-
value: 'regression coefficient', valueAggrType: 'avg'});
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
// Scores Scatter Plot
|
|
59
|
-
export function scoresScatterPlot(
|
|
60
|
-
samplesNames: DG.Column, xScores: Array<DG.Column>, yScores: Array<DG.Column>,
|
|
61
|
-
): DG.Viewer {
|
|
62
|
-
const scoresColumns = [samplesNames];
|
|
63
|
-
|
|
64
|
-
for (let i = 0; i < xScores.length; i++) {
|
|
65
|
-
xScores[i].name = `x.score.t${i+1}`;
|
|
66
|
-
scoresColumns.push(xScores[i]);
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
for (let i = 0; i < yScores.length; i++) {
|
|
70
|
-
yScores[i].name = `y.score.u${i+1}`;
|
|
71
|
-
scoresColumns.push(yScores[i]);
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
const scores = DG.DataFrame.fromColumns(scoresColumns);
|
|
75
|
-
scores.name = 'Scores';
|
|
76
|
-
//grok.shell.addTableView(scores);
|
|
77
|
-
|
|
78
|
-
const index = xScores.length > 1 ? 1 : 0;
|
|
79
|
-
|
|
80
|
-
return DG.Viewer.scatterPlot(scores,
|
|
81
|
-
{title: scores.name,
|
|
82
|
-
x: xScores[0].name,
|
|
83
|
-
y: xScores[index].name,
|
|
84
|
-
markerType: 'circle',
|
|
85
|
-
labels: samplesNames.name,
|
|
86
|
-
});
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
// Loading Scatter Plot
|
|
90
|
-
export function loadingScatterPlot(features: DG.ColumnList, xLoadings: Array<DG.Column>): DG.Viewer {
|
|
91
|
-
const loadingCols = [];
|
|
92
|
-
|
|
93
|
-
const loadingLabels = [];
|
|
94
|
-
for (const col of features)
|
|
95
|
-
loadingLabels.push(col.name);
|
|
96
|
-
|
|
97
|
-
loadingCols.push(DG.Column.fromStrings('labels', loadingLabels));
|
|
98
|
-
|
|
99
|
-
for (let i = 0; i < xLoadings.length; i++) {
|
|
100
|
-
xLoadings[i].name = `x.loading.p${i+1}`;
|
|
101
|
-
loadingCols.push(xLoadings[i]);
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
const dfLoadings = DG.DataFrame.fromColumns(loadingCols);
|
|
105
|
-
dfLoadings.name = 'Loadings';
|
|
106
|
-
|
|
107
|
-
return DG.Viewer.scatterPlot(dfLoadings,
|
|
108
|
-
{title: dfLoadings.name,
|
|
109
|
-
x: xLoadings[0].name,
|
|
110
|
-
y: xLoadings[xLoadings.length - 1].name,
|
|
111
|
-
markerType: 'circle',
|
|
112
|
-
labels: 'labels',
|
|
113
|
-
});
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
// Add PLS visualization
|
|
117
|
-
export function addPLSvisualization(
|
|
118
|
-
table: DG.DataFrame, samplesNames: DG.Column, features: DG.ColumnList, predict: DG.Column, plsOutput: any,
|
|
119
|
-
): void {
|
|
120
|
-
const view = (table.id !== null) ? grok.shell.getTableView(table.name) : grok.shell.addTableView(table);
|
|
121
|
-
|
|
122
|
-
// 1. Predicted vs Reference scatter plot
|
|
123
|
-
view.addViewer(predictedVersusReferenceScatterPlot(samplesNames, predict, plsOutput[0]));
|
|
124
|
-
|
|
125
|
-
// 2. Regression Coefficients Bar Chart
|
|
126
|
-
view.addViewer(regressionCoefficientsBarChart(features, plsOutput[1]));
|
|
127
|
-
|
|
128
|
-
// 3. Loading Scatter Plot
|
|
129
|
-
view.addViewer(loadingScatterPlot(features, plsOutput[4]));
|
|
130
|
-
|
|
131
|
-
// 4. Scores Scatter Plot
|
|
132
|
-
view.addViewer(scoresScatterPlot(samplesNames, plsOutput[2], plsOutput[3]));
|
|
133
|
-
}
|
package/src/global.d.ts
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import * as grokNamespace from 'datagrok-api/grok';
|
|
2
|
+
import * as uiNamespace from 'datagrok-api/ui';
|
|
3
|
+
import * as DGNamespace from 'datagrok-api/dg';
|
|
4
|
+
import * as rxjsNamespace from 'rxjs';
|
|
5
|
+
import $Namespace from 'cash-dom';
|
|
6
|
+
|
|
7
|
+
declare global {
|
|
8
|
+
const grok: typeof grokNamespace;
|
|
9
|
+
const ui: typeof uiNamespace;
|
|
10
|
+
const DG: typeof DGNamespace;
|
|
11
|
+
const rjxs: typeof rxjsNamespace;
|
|
12
|
+
const $: typeof $Namespace;
|
|
13
|
+
}
|
|
@@ -26,7 +26,7 @@ export const COPY_SUFFIX = 'copy';
|
|
|
26
26
|
|
|
27
27
|
/** UI titles */
|
|
28
28
|
export enum TITLE {
|
|
29
|
-
KNN_IMPUTER = '
|
|
29
|
+
KNN_IMPUTER = 'KNN Imputation',
|
|
30
30
|
TABLE = 'Table',
|
|
31
31
|
IN_PLACE = 'In-place',
|
|
32
32
|
COLUMNS = 'Impute',
|
|
@@ -62,3 +62,5 @@ export enum HINT {
|
|
|
62
62
|
IMPUTATION_SETTINGS = 'Simple imputation settings',
|
|
63
63
|
KEEP_EMPTY = 'Defines whether to keep empty missing values failed to be imputed OR fill them using simple imputation',
|
|
64
64
|
};
|
|
65
|
+
|
|
66
|
+
export const MAX_INPUT_NAME_LENGTH = 15;
|
|
@@ -2,7 +2,7 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
-
import {TITLE, KNN_IMPUTER, ERROR_MSG, HINT} from './ui-constants';
|
|
5
|
+
import {TITLE, KNN_IMPUTER, ERROR_MSG, HINT, MAX_INPUT_NAME_LENGTH} from './ui-constants';
|
|
6
6
|
import {SUPPORTED_COLUMN_TYPES, METRIC_TYPE, DISTANCE_TYPE, MetricInfo, DEFAULT, MIN_NEIGHBORS,
|
|
7
7
|
impute, getMissingValsIndices, areThereFails, imputeFailed} from './knn-imputer';
|
|
8
8
|
|
|
@@ -190,7 +190,7 @@ export async function runKNNImputer(df?: DG.DataFrame): Promise<void> {
|
|
|
190
190
|
|
|
191
191
|
// Metrics components
|
|
192
192
|
const featuresMetrics = new Map<string, MetricInfo>();
|
|
193
|
-
const metricInfoInputs = new Map<string,
|
|
193
|
+
const metricInfoInputs = new Map<string, HTMLElement>();
|
|
194
194
|
const metricsDiv = ui.divV([]);
|
|
195
195
|
metricsDiv.style.overflow = 'auto';
|
|
196
196
|
|
|
@@ -214,7 +214,7 @@ export async function runKNNImputer(df?: DG.DataFrame): Promise<void> {
|
|
|
214
214
|
|
|
215
215
|
// The following should provide a slider (see th bug https://reddata.atlassian.net/browse/GROK-14431)
|
|
216
216
|
const prop = DG.Property.fromOptions({
|
|
217
|
-
'name': name,
|
|
217
|
+
'name': name.length < MAX_INPUT_NAME_LENGTH ? name : name.slice(0, MAX_INPUT_NAME_LENGTH).concat('...'),
|
|
218
218
|
'inputType': 'Float',
|
|
219
219
|
'min': 0,
|
|
220
220
|
'max': 10,
|
|
@@ -229,11 +229,11 @@ export async function runKNNImputer(df?: DG.DataFrame): Promise<void> {
|
|
|
229
229
|
distInfo.weight = value ?? settings.defaultWeight;
|
|
230
230
|
featuresMetrics.set(name, distInfo);
|
|
231
231
|
});
|
|
232
|
-
|
|
232
|
+
ui.tooltip.bind(weightInput.captionLabel, name);
|
|
233
|
+
ui.tooltip.bind(weightInput.input, HINT.WEIGHT);
|
|
233
234
|
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
metricsDiv.append(div);
|
|
235
|
+
metricInfoInputs.set(name, weightInput.root);
|
|
236
|
+
metricsDiv.append(weightInput.root);
|
|
237
237
|
});
|
|
238
238
|
|
|
239
239
|
// The main dialog
|
package/src/package.ts
CHANGED
|
@@ -73,23 +73,27 @@ export async function dbScan(df: DG.DataFrame, xCol: DG.Column, yCol: DG.Column,
|
|
|
73
73
|
//name: PCA
|
|
74
74
|
//description: Principal component analysis (PCA)
|
|
75
75
|
//input: dataframe table
|
|
76
|
-
//input: column_list features {type: numerical}
|
|
77
|
-
//input: int components = 2 {caption: Components} [Number of components.]
|
|
76
|
+
//input: column_list features {type: numerical; allowNulls: false}
|
|
77
|
+
//input: int components = 2 {caption: Components; nullable: false; min: 1} [Number of components.]
|
|
78
78
|
//input: bool center = false [Indicating whether the variables should be shifted to be zero centered.]
|
|
79
79
|
//input: bool scale = false [Indicating whether the variables should be scaled to have unit variance.]
|
|
80
80
|
export async function PCA(table: DG.DataFrame, features: DG.ColumnList, components: number, center: boolean, scale: boolean): Promise<void> {
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
col
|
|
91
|
-
|
|
81
|
+
try {
|
|
82
|
+
const pcaTable = await computePCA(table, features, components, center, scale);
|
|
83
|
+
addPrefixToEachColumnName('PC', pcaTable.columns);
|
|
84
|
+
|
|
85
|
+
if (table.id === null) // table is loaded from a local file
|
|
86
|
+
grok.shell.addTableView(pcaTable);
|
|
87
|
+
else {
|
|
88
|
+
const cols = table.columns;
|
|
89
|
+
|
|
90
|
+
for (const col of pcaTable.columns) {
|
|
91
|
+
col.name = cols.getUnusedName(col.name);
|
|
92
|
+
cols.add(col);
|
|
93
|
+
}
|
|
92
94
|
}
|
|
95
|
+
} catch (error) {
|
|
96
|
+
grok.shell.warning(`Failed to compute PCA: ${error instanceof Error ? error.message : 'platform issue'}`);
|
|
93
97
|
}
|
|
94
98
|
}
|
|
95
99
|
|
|
@@ -304,7 +308,7 @@ export async function MVA(): Promise<void> {
|
|
|
304
308
|
|
|
305
309
|
//name: MVA demo
|
|
306
310
|
//description: Multidimensional data analysis using partial least squares (PLS) regression. It identifies latent factors and constructs a linear model based on them.
|
|
307
|
-
//meta.demoPath: Compute | Multivariate
|
|
311
|
+
//meta.demoPath: Compute | Multivariate Analysis
|
|
308
312
|
export async function demoMultivariateAnalysis(): Promise<any> {
|
|
309
313
|
await runDemoMVA();
|
|
310
314
|
}
|
|
@@ -552,15 +556,15 @@ export function anova(): void {
|
|
|
552
556
|
runOneWayAnova();
|
|
553
557
|
}
|
|
554
558
|
|
|
555
|
-
//top-menu: ML | Missing Values
|
|
559
|
+
//top-menu: ML | Impute Missing Values...
|
|
556
560
|
//name: KNN impute
|
|
557
|
-
//
|
|
561
|
+
//description: Missing values imputation using the k-nearest neighbors method (KNN)
|
|
558
562
|
export function kNNImputation() {
|
|
559
563
|
runKNNImputer();
|
|
560
564
|
}
|
|
561
565
|
|
|
562
566
|
//name: KNN imputation for a table
|
|
563
|
-
//
|
|
567
|
+
//description: Missing values imputation using the k-nearest neighbors method
|
|
564
568
|
//input: dataframe table
|
|
565
569
|
export async function kNNImputationForTable(table: DG.DataFrame) {
|
|
566
570
|
await runKNNImputer(table);
|
package/src/pls/pls-constants.ts
CHANGED
|
@@ -50,13 +50,13 @@ export enum HINT {
|
|
|
50
50
|
|
|
51
51
|
/** Links to help */
|
|
52
52
|
export enum LINK {
|
|
53
|
-
PLS = '
|
|
54
|
-
MVA = '
|
|
55
|
-
MODEL = '
|
|
56
|
-
COEFFS = '
|
|
57
|
-
LOADINGS = '
|
|
58
|
-
EXPL_VARS = '
|
|
59
|
-
SCORES = '
|
|
53
|
+
PLS = '/help/explore/multivariate-analysis/pls#pls-components',
|
|
54
|
+
MVA = '/help/explore/multivariate-analysis/pls',
|
|
55
|
+
MODEL = '/help/explore/multivariate-analysis/plots/predicted-vs-reference',
|
|
56
|
+
COEFFS = '/help/explore/multivariate-analysis/plots/regression-coefficients',
|
|
57
|
+
LOADINGS = '/help/explore/multivariate-analysis/plots/loadings',
|
|
58
|
+
EXPL_VARS = '/help/explore/multivariate-analysis/plots/explained-variance',
|
|
59
|
+
SCORES = '/help/explore/multivariate-analysis/plots/scores',
|
|
60
60
|
}
|
|
61
61
|
|
|
62
62
|
/** Components consts */
|
package/src/pls/pls-ml.ts
CHANGED
|
@@ -276,7 +276,8 @@ export class PlsModel {
|
|
|
276
276
|
xColumnName: columns.byIndex(shift).name,
|
|
277
277
|
yColumnName: columns.byIndex(shift + (components > 1 ? 1 : 0)).name,
|
|
278
278
|
markerType: DG.MARKER_TYPE.CIRCLE,
|
|
279
|
-
|
|
279
|
+
//@ts-ignore
|
|
280
|
+
labelFormColumnNames: [TITLE.FEATURES],
|
|
280
281
|
help: LINK.LOADINGS,
|
|
281
282
|
}));
|
|
282
283
|
|
package/src/pls/pls-tools.ts
CHANGED
|
@@ -161,10 +161,13 @@ async function performMVA(input: PlsInput, analysisType: PLS_ANALYSIS): Promise<
|
|
|
161
161
|
yColumnName: pred.name,
|
|
162
162
|
showRegressionLine: true,
|
|
163
163
|
markerType: DG.MARKER_TYPE.CIRCLE,
|
|
164
|
-
|
|
164
|
+
showLabels: 'Always',
|
|
165
165
|
help: LINK.MODEL,
|
|
166
166
|
}));
|
|
167
167
|
|
|
168
|
+
if ((input.names !== undefined) && (input.names !== null))
|
|
169
|
+
predictVsReferScatter.setOptions({labelFormColumnNames: [input.names?.name]});
|
|
170
|
+
|
|
168
171
|
// 2. Regression Coefficients Bar Chart
|
|
169
172
|
result.regressionCoefficients.name = TITLE.REGR_COEFS;
|
|
170
173
|
const regrCoeffsBar = view.addViewer(DG.Viewer.barChart(buffer, {
|
|
@@ -184,7 +187,7 @@ async function performMVA(input: PlsInput, analysisType: PLS_ANALYSIS): Promise<
|
|
|
184
187
|
xColumnName: `${TITLE.XLOADING}1`,
|
|
185
188
|
yColumnName: `${TITLE.XLOADING}${result.xLoadings.length > 1 ? '2' : '1'}`,
|
|
186
189
|
markerType: DG.MARKER_TYPE.CIRCLE,
|
|
187
|
-
|
|
190
|
+
labelFormColumnNames: [TITLE.FEATURE],
|
|
188
191
|
help: LINK.LOADINGS,
|
|
189
192
|
}));
|
|
190
193
|
|
|
@@ -204,11 +207,13 @@ async function performMVA(input: PlsInput, analysisType: PLS_ANALYSIS): Promise<
|
|
|
204
207
|
xColumnName: plsCols[0].name,
|
|
205
208
|
yColumnName: (plsCols.length > 1) ? plsCols[1].name : result.uScores[0].name,
|
|
206
209
|
markerType: DG.MARKER_TYPE.CIRCLE,
|
|
207
|
-
labels: input.names?.name,
|
|
208
210
|
help: LINK.SCORES,
|
|
209
211
|
showViewerFormulaLines: true,
|
|
210
212
|
});
|
|
211
213
|
|
|
214
|
+
if ((input.names !== undefined) && (input.names !== null))
|
|
215
|
+
predictVsReferScatter.setOptions({labelFormColumnNames: [input.names?.name]});
|
|
216
|
+
|
|
212
217
|
// 4.3) create lines & circles
|
|
213
218
|
scoresScatter.meta.formulaLines.addAll(getLines(scoreNames));
|
|
214
219
|
view.addViewer(scoresScatter);
|
package/src/tests/anova-tests.ts
CHANGED
|
@@ -16,7 +16,7 @@ const ROWS = 100;
|
|
|
16
16
|
const ROWS_K = 100;
|
|
17
17
|
const COLS = 100;
|
|
18
18
|
const COMPONENTS = 3;
|
|
19
|
-
const TIMEOUT =
|
|
19
|
+
const TIMEOUT = 9000;
|
|
20
20
|
const INDEP_COLS = 2;
|
|
21
21
|
const DEP_COLS = 5;
|
|
22
22
|
const ERROR = 0.1;
|
|
@@ -27,6 +27,11 @@ category('Principal component analysis', () => {
|
|
|
27
27
|
await computePCA(df, df.columns, COMPONENTS, false, false);
|
|
28
28
|
}, {timeout: TIMEOUT, benchmark: true});
|
|
29
29
|
|
|
30
|
+
test(`Performance: 1K rows, 5K cols, ${COMPONENTS} components`, async () => {
|
|
31
|
+
const df = grok.data.demo.randomWalk(1000, 5000);
|
|
32
|
+
await computePCA(df, df.columns, COMPONENTS, false, false);
|
|
33
|
+
}, {timeout: TIMEOUT, benchmark: true});
|
|
34
|
+
|
|
30
35
|
test('Correctness', async () => {
|
|
31
36
|
// Data
|
|
32
37
|
const df = regressionDataset(ROWS, COMPONENTS, DEP_COLS);
|
package/src/utils.ts
CHANGED
|
@@ -11,6 +11,9 @@ const FEATURES_COUNT_MIN = 1;
|
|
|
11
11
|
const PERCENTAGE_MIN = 0;
|
|
12
12
|
const PERCENTAGE_MAX = 100;
|
|
13
13
|
const MAX_ELEMENTS_COUNT = 100000000;
|
|
14
|
+
export const NIPALS_PREFER_COLS_COUNT = 900;
|
|
15
|
+
|
|
16
|
+
const TINY = 0.000001;
|
|
14
17
|
|
|
15
18
|
// Error messages
|
|
16
19
|
const COMP_POSITVE_MES = 'components must be positive.';
|
|
@@ -180,3 +183,90 @@ export function getRowsOfNumericalColumnns(columnList: DG.ColumnList): any[][] {
|
|
|
180
183
|
|
|
181
184
|
return output;
|
|
182
185
|
}
|
|
186
|
+
|
|
187
|
+
/** Return centered data */
|
|
188
|
+
function centerDf(df: DG.DataFrame): DG.DataFrame {
|
|
189
|
+
const rowCount = df.rowCount;
|
|
190
|
+
|
|
191
|
+
for (const col of df.columns) {
|
|
192
|
+
if (col.isNumerical) {
|
|
193
|
+
const avg = col.stats.avg;
|
|
194
|
+
|
|
195
|
+
if (Math.abs(avg) > TINY) {
|
|
196
|
+
const raw = col.getRawData();
|
|
197
|
+
|
|
198
|
+
for (let i = 0; i < rowCount; ++i)
|
|
199
|
+
raw[i] -= avg;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
return df;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/** Return scaled & centered data */
|
|
207
|
+
function centerScaleDf(df: DG.DataFrame): DG.DataFrame {
|
|
208
|
+
const rowCount = df.rowCount;
|
|
209
|
+
|
|
210
|
+
for (const col of df.columns) {
|
|
211
|
+
if (col.isNumerical) {
|
|
212
|
+
const stdev = col.stats.stdev;
|
|
213
|
+
const avg = col.stats.avg;
|
|
214
|
+
const raw = col.getRawData();
|
|
215
|
+
|
|
216
|
+
if (stdev > 0) {
|
|
217
|
+
for (let i = 0; i < rowCount; ++i)
|
|
218
|
+
raw[i] = (raw[i] - avg) / stdev;
|
|
219
|
+
} else {
|
|
220
|
+
for (let i = 0; i < rowCount; ++i)
|
|
221
|
+
raw[i] -= avg;
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
return df;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/** Return scaled data */
|
|
229
|
+
function scaleDf(df: DG.DataFrame): DG.DataFrame {
|
|
230
|
+
const rowCount = df.rowCount;
|
|
231
|
+
|
|
232
|
+
for (const col of df.columns) {
|
|
233
|
+
if (col.isNumerical) {
|
|
234
|
+
const stdev = col.stats.stdev;
|
|
235
|
+
|
|
236
|
+
if (Math.abs(stdev - 1) > TINY && (stdev > 0)) {
|
|
237
|
+
const raw = col.getRawData();
|
|
238
|
+
|
|
239
|
+
for (let i = 0; i < rowCount; ++i)
|
|
240
|
+
raw[i] /= stdev;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
return df;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
/** Return standartized dataframe */
|
|
248
|
+
export function centerScaleDataFrame(df: DG.DataFrame, toCenter: boolean, toScale: boolean): DG.DataFrame {
|
|
249
|
+
if (toCenter) {
|
|
250
|
+
if (toScale)
|
|
251
|
+
return centerScaleDf(df);
|
|
252
|
+
else
|
|
253
|
+
return centerDf(df);
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
if (toScale)
|
|
257
|
+
return scaleDf(df);
|
|
258
|
+
|
|
259
|
+
return df;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
/** Return table of columns with non-zero variance */
|
|
263
|
+
export function extractNonConstantColsDf(features: DG.ColumnList): DG.DataFrame {
|
|
264
|
+
const cols: DG.Column[]= [];
|
|
265
|
+
|
|
266
|
+
for (const col of features) {
|
|
267
|
+
if ((col.stats.stdev > 0) && (col.stats.missingValueCount < 1))
|
|
268
|
+
cols.push(col);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
return DG.DataFrame.fromColumns(cols);
|
|
272
|
+
}
|