@datagrok/eda 1.0.3 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/146.js +1 -1
- package/dist/{d711f70338306e5bddc4.wasm → 191bd97af33c713bf78e.wasm} +0 -0
- package/dist/package.js +2 -2
- package/package.json +49 -48
- package/src/EDAui.ts +19 -15
- package/src/package.ts +46 -26
- package/wasm/EDA.wasm +0 -0
- package/wasm/PLS/pls.cpp +2 -173
- package/src/demos.ts +0 -38
package/package.json
CHANGED
|
@@ -1,49 +1,50 @@
|
|
|
1
1
|
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
2
|
+
"name": "@datagrok/eda",
|
|
3
|
+
"friendlyName": "EDA",
|
|
4
|
+
"version": "1.1.0",
|
|
5
|
+
"description": "Exploratory Data Analysis Tools",
|
|
6
|
+
"dependencies": {
|
|
7
|
+
"datagrok-api": "latest",
|
|
8
|
+
"cash-dom": "latest",
|
|
9
|
+
"dayjs": "latest",
|
|
10
|
+
"@datagrok-libraries/utils": "latest",
|
|
11
|
+
"@datagrok-libraries/tutorials": "^1.3.3"
|
|
12
|
+
},
|
|
13
|
+
"author": {
|
|
14
|
+
"name": "Viktor Makarichev",
|
|
15
|
+
"email": "vmakarichev@datagrok.ai"
|
|
16
|
+
},
|
|
17
|
+
"devDependencies": {
|
|
18
|
+
"webpack": "latest",
|
|
19
|
+
"webpack-cli": "latest",
|
|
20
|
+
"ts-loader": "latest",
|
|
21
|
+
"typescript": "latest"
|
|
22
|
+
},
|
|
23
|
+
"scripts": {
|
|
24
|
+
"link-all": "npm link datagrok-api @datagrok-libraries/utils @datagrok-libraries/tutorials",
|
|
25
|
+
"debug-eda": "webpack && grok publish",
|
|
26
|
+
"release-eda": "webpack && grok publish --release",
|
|
27
|
+
"build-eda": "webpack",
|
|
28
|
+
"build": "webpack",
|
|
29
|
+
"debug-eda-dev": "webpack && grok publish dev",
|
|
30
|
+
"release-eda-dev": "webpack && grok publish dev --release",
|
|
31
|
+
"debug-eda-local": "webpack && grok publish local",
|
|
32
|
+
"release-eda-local": "webpack && grok publish local --release",
|
|
33
|
+
"build-all": "npm --prefix ./../../js-api run build && npm --prefix ./../../libraries/utils run build && npm --prefix ./../../libraries/tutorials run build && npm run build"
|
|
34
|
+
},
|
|
35
|
+
"canEdit": [
|
|
36
|
+
"Developers"
|
|
37
|
+
],
|
|
38
|
+
"canView": [
|
|
39
|
+
"All users"
|
|
40
|
+
],
|
|
41
|
+
"repository": {
|
|
42
|
+
"type": "git",
|
|
43
|
+
"url": "https://github.com/datagrok-ai/public.git",
|
|
44
|
+
"directory": "packages/EDA"
|
|
45
|
+
},
|
|
46
|
+
"category": "Machine Learning",
|
|
47
|
+
"sources": [
|
|
48
|
+
"wasm/EDA.js"
|
|
49
|
+
]
|
|
50
|
+
}
|
package/src/EDAui.ts
CHANGED
|
@@ -7,16 +7,16 @@ import * as DG from 'datagrok-api/dg';
|
|
|
7
7
|
// Rename PCA columns
|
|
8
8
|
export function renamePCAcolumns(pcaTable: DG.DataFrame): DG.DataFrame {
|
|
9
9
|
for (const col of pcaTable.columns.toList())
|
|
10
|
-
col.name = '
|
|
10
|
+
col.name = 'PCA' + col.name;
|
|
11
11
|
|
|
12
12
|
return pcaTable;
|
|
13
13
|
}
|
|
14
14
|
|
|
15
15
|
// Predicted vs Reference scatter plot
|
|
16
|
-
export function predictedVersusReferenceScatterPlot(reference: DG.Column, prediction: DG.Column): DG.Viewer {
|
|
16
|
+
export function predictedVersusReferenceScatterPlot(samplesNames: DG.Column, reference: DG.Column, prediction: DG.Column): DG.Viewer {
|
|
17
17
|
prediction.name = reference.name + '(predicted)';
|
|
18
18
|
|
|
19
|
-
let dfReferencePrediction = DG.DataFrame.fromColumns([reference, prediction]);
|
|
19
|
+
let dfReferencePrediction = DG.DataFrame.fromColumns([samplesNames, reference, prediction]);
|
|
20
20
|
dfReferencePrediction.name = 'Reference vs. Predicted';
|
|
21
21
|
|
|
22
22
|
return DG.Viewer.scatterPlot(dfReferencePrediction,
|
|
@@ -24,7 +24,8 @@ export function predictedVersusReferenceScatterPlot(reference: DG.Column, predic
|
|
|
24
24
|
x: reference.name,
|
|
25
25
|
y: prediction.name,
|
|
26
26
|
showRegressionLine: true,
|
|
27
|
-
markerType: 'circle'
|
|
27
|
+
markerType: 'circle',
|
|
28
|
+
labels: samplesNames.name
|
|
28
29
|
});
|
|
29
30
|
}
|
|
30
31
|
|
|
@@ -46,9 +47,9 @@ export function regressionCoefficientsBarChart(features: DG.ColumnList, regressi
|
|
|
46
47
|
}
|
|
47
48
|
|
|
48
49
|
// Scores Scatter Plot
|
|
49
|
-
export function scoresScatterPlot(xScores: Array<DG.Column>, yScores: Array<DG.Column>): DG.Viewer {
|
|
50
|
+
export function scoresScatterPlot(samplesNames: DG.Column, xScores: Array<DG.Column>, yScores: Array<DG.Column>): DG.Viewer {
|
|
50
51
|
|
|
51
|
-
let scoresColumns = [];
|
|
52
|
+
let scoresColumns = [samplesNames];
|
|
52
53
|
|
|
53
54
|
for (let i = 0; i < xScores.length; i++) {
|
|
54
55
|
xScores[i].name = `x.score.t${i+1}`;
|
|
@@ -63,12 +64,15 @@ export function scoresScatterPlot(xScores: Array<DG.Column>, yScores: Array<DG.C
|
|
|
63
64
|
let scores = DG.DataFrame.fromColumns(scoresColumns);
|
|
64
65
|
scores.name = 'Scores';
|
|
65
66
|
//grok.shell.addTableView(scores);
|
|
67
|
+
|
|
68
|
+
const index = xScores.length > 1 ? 1 : 0;
|
|
66
69
|
|
|
67
70
|
return DG.Viewer.scatterPlot(scores,
|
|
68
71
|
{ title: scores.name,
|
|
69
72
|
x: xScores[0].name,
|
|
70
|
-
y:
|
|
71
|
-
markerType: 'circle'
|
|
73
|
+
y: xScores[index].name,
|
|
74
|
+
markerType: 'circle',
|
|
75
|
+
labels: samplesNames.name
|
|
72
76
|
});
|
|
73
77
|
}
|
|
74
78
|
|
|
@@ -100,19 +104,19 @@ export function loadingScatterPlot(features: DG.ColumnList, xLoadings: Array<DG.
|
|
|
100
104
|
}
|
|
101
105
|
|
|
102
106
|
// Add PLS visualization
|
|
103
|
-
export function addPLSvisualization(table: DG.DataFrame, features: DG.ColumnList, predict: DG.Column, plsOutput: any): void {
|
|
107
|
+
export function addPLSvisualization(table: DG.DataFrame, samplesNames: DG.Column, features: DG.ColumnList, predict: DG.Column, plsOutput: any): void {
|
|
104
108
|
|
|
105
109
|
let view = grok.shell.getTableView(table.name);
|
|
106
110
|
|
|
107
111
|
// 1. Predicted vs Reference scatter plot
|
|
108
|
-
view.addViewer(predictedVersusReferenceScatterPlot(predict, plsOutput[0]));
|
|
112
|
+
view.addViewer(predictedVersusReferenceScatterPlot(samplesNames, predict, plsOutput[0]));
|
|
109
113
|
|
|
110
114
|
// 2. Regression Coefficients Bar Chart
|
|
111
|
-
view.addViewer(regressionCoefficientsBarChart(features, plsOutput[1]));
|
|
115
|
+
view.addViewer(regressionCoefficientsBarChart(features, plsOutput[1]));
|
|
112
116
|
|
|
113
|
-
// 3.
|
|
114
|
-
view.addViewer(scoresScatterPlot(plsOutput[2], plsOutput[3]));
|
|
115
|
-
|
|
116
|
-
// 4. Loading Scatter Plot
|
|
117
|
+
// 3. Loading Scatter Plot
|
|
117
118
|
view.addViewer(loadingScatterPlot(features, plsOutput[4]));
|
|
119
|
+
|
|
120
|
+
// 4. Scores Scatter Plot
|
|
121
|
+
view.addViewer(scoresScatterPlot(samplesNames, plsOutput[2], plsOutput[3]));
|
|
118
122
|
}
|
package/src/package.ts
CHANGED
|
@@ -7,8 +7,8 @@ import {DemoScript} from '@datagrok-libraries/tutorials/src/demo-script';
|
|
|
7
7
|
|
|
8
8
|
import {_initEDAAPI} from '../wasm/EDAAPI';
|
|
9
9
|
import {computePCA, computePLS} from './EDAtools';
|
|
10
|
-
import {renamePCAcolumns, addPLSvisualization
|
|
11
|
-
|
|
10
|
+
import {renamePCAcolumns, addPLSvisualization, regressionCoefficientsBarChart,
|
|
11
|
+
scoresScatterPlot, predictedVersusReferenceScatterPlot} from './EDAui';
|
|
12
12
|
import {carsDataframe, testDataForBinaryClassification} from './dataGenerators';
|
|
13
13
|
import {LINEAR, RBF, POLYNOMIAL, SIGMOID,
|
|
14
14
|
getTrainedModel, getPrediction, showTrainReport, getPackedModel} from './svm';
|
|
@@ -25,7 +25,7 @@ export async function init(): Promise<void> {
|
|
|
25
25
|
await _initEDAAPI();
|
|
26
26
|
}
|
|
27
27
|
|
|
28
|
-
//top-menu: Tools | Data Science |
|
|
28
|
+
//top-menu: Tools | Data Science | Principal Component Analysis...
|
|
29
29
|
//name: PCA
|
|
30
30
|
//description: Principal component analysis (PCA).
|
|
31
31
|
//input: dataframe table
|
|
@@ -40,50 +40,70 @@ export async function PCA(table: DG.DataFrame, features: DG.ColumnList, componen
|
|
|
40
40
|
return renamePCAcolumns(await computePCA(table, features, components, center, scale));
|
|
41
41
|
}
|
|
42
42
|
|
|
43
|
-
//top-menu: Tools | Data Science | PLS
|
|
44
|
-
//name: PLS
|
|
43
|
+
//top-menu: Tools | Data Science | Multivariate Analysis (PLS)...
|
|
44
|
+
//name: Multivariate Analysis (PLS)
|
|
45
45
|
//description: Partial least square regression (PLS).
|
|
46
46
|
//input: dataframe table
|
|
47
|
+
//input: column names
|
|
47
48
|
//input: column_list features
|
|
48
49
|
//input: column predict
|
|
49
50
|
//input: int components = 3
|
|
50
|
-
export async function PLS(table: DG.DataFrame,
|
|
51
|
+
export async function PLS(table: DG.DataFrame, names: DG.Column, features: DG.ColumnList,
|
|
52
|
+
predict: DG.Column, components: number): Promise<void>
|
|
53
|
+
{
|
|
51
54
|
const plsResults = await computePLS(table, features, predict, components);
|
|
52
|
-
addPLSvisualization(table, features, predict, plsResults);
|
|
55
|
+
addPLSvisualization(table, names, features, predict, plsResults);
|
|
53
56
|
}
|
|
54
57
|
|
|
55
58
|
//name: MVA demo
|
|
56
|
-
//description:
|
|
57
|
-
//meta.demoPath:
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
59
|
+
//description: Multidimensional data analysis using partial least squares (PLS) regression. It reduces the predictors to a smaller set of uncorrelated components and performs least squares regression on them.
|
|
60
|
+
//meta.demoPath: Compute | Multivariate analysis
|
|
61
|
+
//meta.isDemoScript: True
|
|
62
|
+
export async function demoMultivariateAnalysis(): Promise<any> {
|
|
63
|
+
const demoScript = new DemoScript('Partial least squares regression',
|
|
64
|
+
'Analysis of multidimensional data.');
|
|
61
65
|
|
|
62
66
|
const cars = carsDataframe();
|
|
63
67
|
|
|
64
68
|
const components = 3;
|
|
69
|
+
const names = cars.columns.byName('model');
|
|
65
70
|
const predict = cars.columns.byName('price');
|
|
66
71
|
const features = cars.columns.remove('price').remove('model');
|
|
67
72
|
const plsOutput = await computePLS(cars, features, predict, components);
|
|
68
73
|
|
|
69
74
|
const sourceCars = carsDataframe();
|
|
70
|
-
grok.shell.addTableView(sourceCars);
|
|
71
75
|
sourceCars.name = 'Cars';
|
|
72
|
-
let view
|
|
76
|
+
let view: any;
|
|
77
|
+
let dialog: any;
|
|
73
78
|
|
|
74
79
|
await demoScript
|
|
75
|
-
.step('
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
.add(ui.
|
|
84
|
-
.
|
|
85
|
-
.
|
|
86
|
-
|
|
80
|
+
.step('Data', async () => {
|
|
81
|
+
grok.shell.addTableView(sourceCars);
|
|
82
|
+
view = grok.shell.getTableView(sourceCars.name);
|
|
83
|
+
}, {description: 'Each car has many features - patterns extraction is complicated.', delay: 0})
|
|
84
|
+
.step('Model', async () => {
|
|
85
|
+
dialog = ui.dialog({title:'Multivariate Analysis (PLS)'})
|
|
86
|
+
.add(ui.tableInput('Table', sourceCars))
|
|
87
|
+
.add(ui.columnsInput('Features', cars, features.toList, {available: undefined, checked: features.names()}))
|
|
88
|
+
.add(ui.columnInput('Names', cars, names, undefined))
|
|
89
|
+
.add(ui.columnInput('Predict', cars, predict, undefined))
|
|
90
|
+
.add(ui.intInput('Components', components, undefined))
|
|
91
|
+
.onOK(() => {
|
|
92
|
+
grok.shell.info('Multivariate analysis has been already performed.');
|
|
93
|
+
})
|
|
94
|
+
.show({x: 400, y: 140});
|
|
95
|
+
}, {description: 'Predict car price by its other features.', delay: 0})
|
|
96
|
+
.step('Regression coeffcicients', async () =>
|
|
97
|
+
{
|
|
98
|
+
dialog.close();
|
|
99
|
+
view.addViewer(regressionCoefficientsBarChart(features, plsOutput[1]))},
|
|
100
|
+
{description: 'The feature "diesel" affects the price the most.', delay: 0})
|
|
101
|
+
.step('Scores', async () =>
|
|
102
|
+
{view.addViewer(scoresScatterPlot(names, plsOutput[2], plsOutput[3]))},
|
|
103
|
+
{description: 'Similarities & dissimilarities: alfaromeo and mercedes are different.', delay: 0})
|
|
104
|
+
.step('Prediction', async () =>
|
|
105
|
+
{view.addViewer(predictedVersusReferenceScatterPlot(names, predict, plsOutput[0]))},
|
|
106
|
+
{description: 'Closer to the line means better price prediction.', delay: 0})
|
|
87
107
|
.start();
|
|
88
108
|
}
|
|
89
109
|
|
package/wasm/EDA.wasm
CHANGED
|
Binary file
|
package/wasm/PLS/pls.cpp
CHANGED
|
@@ -12,177 +12,6 @@ using namespace Eigen;
|
|
|
12
12
|
using pls::Float;
|
|
13
13
|
using pls::Double;
|
|
14
14
|
|
|
15
|
-
/* Partial Least Square (PLS1).
|
|
16
|
-
predictorColumnsDataPtr - data from columns that are used for prediction
|
|
17
|
-
rowCount - number of rows
|
|
18
|
-
columnCount - number of columns
|
|
19
|
-
responseColumnDataPtr - data from column that is predicted, i.e. responce
|
|
20
|
-
componentsCount - number of components that extracted in PLS
|
|
21
|
-
predictionDataPtr - prediction obtained using PLS (its size is equal to the size of responce)
|
|
22
|
-
regressionCoefficients - coeffcient of linear regression that are computed (their size is eqaul to the number of columns)
|
|
23
|
-
*/
|
|
24
|
-
int pls::partialLeastSquare(Float * predictorColumnsDataPtr,
|
|
25
|
-
const int rowCount,
|
|
26
|
-
const int columnCount,
|
|
27
|
-
Float * responseColumnDataPtr,
|
|
28
|
-
const int componentsCount,
|
|
29
|
-
Float * predictionDataPtr,
|
|
30
|
-
Float * regressionCoefficients) noexcept
|
|
31
|
-
{
|
|
32
|
-
// check correctness of arguments
|
|
33
|
-
if (componentsCount <= 0 || componentsCount > columnCount)
|
|
34
|
-
return UNCORRECT_ARGUMENTS_ERROR;
|
|
35
|
-
|
|
36
|
-
// Further, notation from the paper https://doi.org/10.1002/cem.2589 is used (see Algorithm 2).
|
|
37
|
-
|
|
38
|
-
// create matrix, which is associated with predictor data
|
|
39
|
-
Map < Matrix<Float, Dynamic, Dynamic, ColMajor>> D(predictorColumnsDataPtr, rowCount, columnCount);
|
|
40
|
-
|
|
41
|
-
// compute mean value of each column of D
|
|
42
|
-
Vector<Float, Dynamic> mu = D.colwise().mean();
|
|
43
|
-
|
|
44
|
-
// mean-centered version of D
|
|
45
|
-
Matrix<Float, Dynamic, Dynamic, ColMajor> X = D.rowwise() - mu.transpose();
|
|
46
|
-
|
|
47
|
-
// vector for standard deviations of X
|
|
48
|
-
Vector<Float, Dynamic> stdDevX(columnCount);
|
|
49
|
-
|
|
50
|
-
Float rowCountSqrt = sqrt(static_cast<Float>(rowCount));
|
|
51
|
-
|
|
52
|
-
// normilizing X-columns
|
|
53
|
-
for (int i = 0; i < columnCount; i++)
|
|
54
|
-
{
|
|
55
|
-
stdDevX(i) = X.col(i).norm() / rowCountSqrt;
|
|
56
|
-
X.col(i) = X.col(i) / stdDevX(i);
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
// create a vector, which is associated with responce or predicted data
|
|
60
|
-
Map<Vector<Float, Dynamic>> ySource(responseColumnDataPtr, rowCount);
|
|
61
|
-
|
|
62
|
-
// mean value of the responce
|
|
63
|
-
Vector<Float, 1> meanY;
|
|
64
|
-
meanY(0) = ySource.mean();
|
|
65
|
-
|
|
66
|
-
// mean-centered version of the responce
|
|
67
|
-
Vector<Float, Dynamic> y = ySource.rowwise() - meanY;
|
|
68
|
-
|
|
69
|
-
// standard deviation
|
|
70
|
-
Float stdDevY = sqrt(y.squaredNorm() / rowCount);
|
|
71
|
-
|
|
72
|
-
// normalizing
|
|
73
|
-
y /= stdDevY;
|
|
74
|
-
|
|
75
|
-
// create a vector, which is associtated with regression coefficients
|
|
76
|
-
Map<Vector<Float, Dynamic>> b(regressionCoefficients, columnCount);
|
|
77
|
-
|
|
78
|
-
// create a vector, which is associated with prediction data
|
|
79
|
-
Map<Vector<Float, Dynamic>> prediction(predictionDataPtr, rowCount);
|
|
80
|
-
|
|
81
|
-
// PLS1 algorithm routine
|
|
82
|
-
|
|
83
|
-
Matrix<Float, Dynamic, Dynamic, ColMajor> W(columnCount, componentsCount);
|
|
84
|
-
|
|
85
|
-
Matrix<Float, Dynamic, Dynamic, ColMajor> P(columnCount, componentsCount);
|
|
86
|
-
|
|
87
|
-
Matrix<Float, Dynamic, Dynamic, ColMajor> T(rowCount, componentsCount);
|
|
88
|
-
|
|
89
|
-
Vector<Float, Dynamic> normTau(componentsCount);
|
|
90
|
-
|
|
91
|
-
Vector<Float, Dynamic> q(componentsCount);
|
|
92
|
-
|
|
93
|
-
Vector<Float, Dynamic> normV(componentsCount);
|
|
94
|
-
|
|
95
|
-
// PLS1 algorithm: see Algorithm 2 in https://doi.org/10.1002/cem.2589
|
|
96
|
-
|
|
97
|
-
Vector<Float, Dynamic> w = (X.transpose() * y);
|
|
98
|
-
|
|
99
|
-
normV(0) = w.norm();
|
|
100
|
-
|
|
101
|
-
// prevent division by zero
|
|
102
|
-
if (normV(0) == static_cast<Float>(0))
|
|
103
|
-
return METHOD_ERROR;
|
|
104
|
-
|
|
105
|
-
w = w / normV(0);
|
|
106
|
-
|
|
107
|
-
W.col(0) = w;
|
|
108
|
-
|
|
109
|
-
Vector<Float, Dynamic> t = X * w;
|
|
110
|
-
|
|
111
|
-
normTau(0) = t.norm();
|
|
112
|
-
|
|
113
|
-
// prevent division by zero
|
|
114
|
-
if (normTau(0) == static_cast<Float>(0))
|
|
115
|
-
return METHOD_ERROR;
|
|
116
|
-
|
|
117
|
-
t = t / normTau(0);
|
|
118
|
-
|
|
119
|
-
T.col(0) = t;
|
|
120
|
-
|
|
121
|
-
Vector<Float, Dynamic> p = X.transpose() * t;
|
|
122
|
-
|
|
123
|
-
P.col(0) = p;
|
|
124
|
-
|
|
125
|
-
q(0) = t.transpose() * y;
|
|
126
|
-
|
|
127
|
-
for (int a = 1; a < componentsCount; a++)
|
|
128
|
-
{
|
|
129
|
-
w = normV(a - 1) * (w - p / normTau(a - 1));
|
|
130
|
-
|
|
131
|
-
normV(a) = w.norm();
|
|
132
|
-
|
|
133
|
-
// prevent division by zero
|
|
134
|
-
if (normV(a) == static_cast<Float>(0))
|
|
135
|
-
return METHOD_ERROR;
|
|
136
|
-
|
|
137
|
-
w = w / normV(a);
|
|
138
|
-
|
|
139
|
-
W.col(a) = w;
|
|
140
|
-
|
|
141
|
-
t = X * w;
|
|
142
|
-
|
|
143
|
-
t = t - T.leftCols(a) * (T.leftCols(a).transpose() * t);
|
|
144
|
-
|
|
145
|
-
normTau(a) = t.norm();
|
|
146
|
-
|
|
147
|
-
// prevent division by zero
|
|
148
|
-
if (normTau(a) == static_cast<Float>(0))
|
|
149
|
-
return METHOD_ERROR;
|
|
150
|
-
|
|
151
|
-
t = t / normTau(a);
|
|
152
|
-
|
|
153
|
-
T.col(a) = t;
|
|
154
|
-
|
|
155
|
-
p = X.transpose() * t;
|
|
156
|
-
|
|
157
|
-
P.col(a) = p;
|
|
158
|
-
|
|
159
|
-
q(a) = t.transpose() * y;
|
|
160
|
-
} // for a
|
|
161
|
-
|
|
162
|
-
// compute coefficients of regression
|
|
163
|
-
Matrix<Float, Dynamic, Dynamic> H = P.transpose() * W;
|
|
164
|
-
|
|
165
|
-
// chech existence of inverse matrix
|
|
166
|
-
if (H.determinant() == static_cast<Float>(0))
|
|
167
|
-
return METHOD_ERROR;
|
|
168
|
-
|
|
169
|
-
b = W * H.inverse() * q;
|
|
170
|
-
|
|
171
|
-
for (int i = 0; i < columnCount; i++)
|
|
172
|
-
b(i) *= stdDevY / stdDevX(i);
|
|
173
|
-
|
|
174
|
-
// TODO: to discuss a constant term of the regression
|
|
175
|
-
// a constant term
|
|
176
|
-
//Vector<Float, 1> shift;
|
|
177
|
-
//shift(0) = ySource(0) - D.row(0) * b;
|
|
178
|
-
//q(0) - P.col(0).transpose().dot(b);
|
|
179
|
-
//prediction = (D * b).rowwise() + shift;
|
|
180
|
-
|
|
181
|
-
prediction = D * b;
|
|
182
|
-
|
|
183
|
-
return NO_ERROR;
|
|
184
|
-
} // partialLeastSquare
|
|
185
|
-
|
|
186
15
|
/* Partial Least Square (PLS1) - extended version: scores data is provided.
|
|
187
16
|
predictorColumnsDataPtr - data from columns that are used for prediction (X)
|
|
188
17
|
rowCount - number of rows
|
|
@@ -358,7 +187,7 @@ int pls::partialLeastSquareExtended(Float * predictorColumnsDataPtr,
|
|
|
358
187
|
// compute coefficients of regression
|
|
359
188
|
Matrix<Float, Dynamic, Dynamic> H = P.transpose() * W;
|
|
360
189
|
|
|
361
|
-
//
|
|
190
|
+
// check existence of inverse matrix
|
|
362
191
|
if (H.determinant() == static_cast<Float>(0))
|
|
363
192
|
return METHOD_ERROR;
|
|
364
193
|
|
|
@@ -370,7 +199,7 @@ int pls::partialLeastSquareExtended(Float * predictorColumnsDataPtr,
|
|
|
370
199
|
b(i) *= stdDevY / stdDevX(i);
|
|
371
200
|
|
|
372
201
|
// compute predictions
|
|
373
|
-
prediction = D * b;
|
|
202
|
+
prediction = D * b;
|
|
374
203
|
|
|
375
204
|
// Remove the following comments in order to print and verify results
|
|
376
205
|
//cout << "\nW_star:\n" << Wstar << endl;
|
package/src/demos.ts
DELETED
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
/* Do not change these import lines to match external modules in webpack configuration */
|
|
2
|
-
import * as grok from 'datagrok-api/grok';
|
|
3
|
-
import * as ui from 'datagrok-api/ui';
|
|
4
|
-
import * as DG from 'datagrok-api/dg';
|
|
5
|
-
|
|
6
|
-
import {computePLS} from './EDAtools';
|
|
7
|
-
import {addPLSvisualization} from './EDAui';
|
|
8
|
-
|
|
9
|
-
// Demo multivariate analysis (PLS)
|
|
10
|
-
export async function demoPLS(rowCount: number, colCount: number, componentsCount: number): Promise<void> {
|
|
11
|
-
// check inputs
|
|
12
|
-
if ((rowCount <= 0) || (colCount <= 0) || (componentsCount <= 0) || (componentsCount > colCount)) {
|
|
13
|
-
const bal = new DG.Balloon;
|
|
14
|
-
bal.error('Incorrect inputs.');
|
|
15
|
-
return;
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
// further, custom interface is provided
|
|
19
|
-
|
|
20
|
-
const PREDICT = 'Reference';
|
|
21
|
-
|
|
22
|
-
const bigDemoTable = grok.data.testData('random walk', rowCount, colCount);
|
|
23
|
-
bigDemoTable.name = `${rowCount} x ${colCount}`;
|
|
24
|
-
|
|
25
|
-
for (const col of bigDemoTable.columns)
|
|
26
|
-
col.name = 'Feature ' + col.name;
|
|
27
|
-
bigDemoTable.columns.byIndex(0).name = PREDICT;
|
|
28
|
-
|
|
29
|
-
grok.shell.addTableView(bigDemoTable);
|
|
30
|
-
let predict = bigDemoTable.columns.byName(PREDICT);
|
|
31
|
-
let features = bigDemoTable.columns.remove(PREDICT);
|
|
32
|
-
|
|
33
|
-
const plsResults = await computePLS(bigDemoTable, features, predict, componentsCount);
|
|
34
|
-
|
|
35
|
-
addPLSvisualization(bigDemoTable, features, predict, plsResults);
|
|
36
|
-
|
|
37
|
-
bigDemoTable.columns.add(predict);
|
|
38
|
-
}
|