@datagrok/eda 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -0
- package/detectors.js +9 -0
- package/dist/111.js +2 -0
- package/dist/146.js +2 -0
- package/dist/155.js +2 -0
- package/dist/355.js +2 -0
- package/dist/584.js +2 -0
- package/dist/604.js +2 -0
- package/dist/632.js +2 -0
- package/dist/645.js +2 -0
- package/dist/93.js +2 -0
- package/dist/d711f70338306e5bddc4.wasm +0 -0
- package/dist/package-test.js +2 -0
- package/dist/package.js +2 -0
- package/package.json +49 -0
- package/package.png +0 -0
- package/scripts/command.txt +1 -0
- package/scripts/exportForTS.py +862 -0
- package/scripts/exportForTSConstants.py +93 -0
- package/scripts/func.json +1 -0
- package/scripts/module.json +11 -0
- package/src/EDAtools.ts +46 -0
- package/src/EDAui.ts +118 -0
- package/src/dataGenerators.ts +74 -0
- package/src/demos.ts +38 -0
- package/src/package-test.ts +12 -0
- package/src/package.ts +248 -0
- package/src/svm.ts +485 -0
- package/src/utils.ts +51 -0
- package/tsconfig.json +71 -0
- package/wasm/EDA.js +443 -0
- package/wasm/EDA.wasm +0 -0
- package/wasm/EDAAPI.js +131 -0
- package/wasm/EDAForWebWorker.js +21 -0
- package/wasm/PCA/PCA.cpp +151 -0
- package/wasm/PCA/PCA.h +48 -0
- package/wasm/PLS/PLS.h +64 -0
- package/wasm/PLS/pls.cpp +393 -0
- package/wasm/callWasm.js +475 -0
- package/wasm/callWasmForWebWorker.js +706 -0
- package/wasm/dataGenerators.h +169 -0
- package/wasm/dataMining.h +116 -0
- package/wasm/pcaExport.cpp +64 -0
- package/wasm/plsExport.cpp +75 -0
- package/wasm/svm.h +608 -0
- package/wasm/svmApi.cpp +323 -0
- package/wasm/workers/errorWorker.js +13 -0
- package/wasm/workers/generateDatasetWorker.js +13 -0
- package/wasm/workers/normalizeDatasetWorker.js +13 -0
- package/wasm/workers/partialLeastSquareRegressionWorker.js +13 -0
- package/wasm/workers/predictByLSSVMWorker.js +13 -0
- package/wasm/workers/principalComponentAnalysisWorker.js +13 -0
- package/wasm/workers/trainAndAnalyzeLSSVMWorker.js +13 -0
- package/wasm/workers/trainLSSVMWorker.js +13 -0
- package/webpack.config.js +37 -0
package/wasm/PCA/PCA.cpp
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
// PCA.cpp
|
|
2
|
+
// Principal Component Analysis using the lib Eigen: implementations of functions
|
|
3
|
+
|
|
4
|
+
#include "../../../../../Eigen/Eigen/Dense"
|
|
5
|
+
using namespace Eigen;
|
|
6
|
+
|
|
7
|
+
#include "PCA.h"
|
|
8
|
+
using pca::Float;
|
|
9
|
+
using pca::Integer;
|
|
10
|
+
using pca::Double;
|
|
11
|
+
|
|
12
|
+
/* Principal Component Analysis of the data: using correlation matrix.
|
|
13
|
+
data - input matrix;
|
|
14
|
+
height, width - sizes of the input;
|
|
15
|
+
numOfPrincipalComponents - number of principal components to be computed;
|
|
16
|
+
principalComponents - the principal components computed;
|
|
17
|
+
approxData - approximation of the input data using principal components obtained. */
|
|
18
|
+
int pca::pcaUsingCorrelationMatrix(Float * data,
|
|
19
|
+
const int height,
|
|
20
|
+
const int width,
|
|
21
|
+
const int numOfPrincipalComponents,
|
|
22
|
+
const int centerNum,
|
|
23
|
+
const int scaleNum,
|
|
24
|
+
Float * principalComponents,
|
|
25
|
+
Float * approxData) noexcept
|
|
26
|
+
{
|
|
27
|
+
/* Here, we use a MODIFICATION of the algorithm given in
|
|
28
|
+
Charu C. Aggarwal. Data Mining: The Textbook. Springer, 2015,
|
|
29
|
+
(see page 42). */
|
|
30
|
+
|
|
31
|
+
// check number of principal components
|
|
32
|
+
if (height < numOfPrincipalComponents || numOfPrincipalComponents < 1)
|
|
33
|
+
return UNCORRECT_ARGUMENTS_ERROR;
|
|
34
|
+
|
|
35
|
+
// assign data and Eigen matrix
|
|
36
|
+
Map< Matrix<Float, Dynamic, Dynamic, RowMajor> > dataMatrix(data, height, width);
|
|
37
|
+
|
|
38
|
+
Vector<Float, Dynamic> means = dataMatrix.rowwise().mean();
|
|
39
|
+
|
|
40
|
+
if (centerNum != 0)
|
|
41
|
+
dataMatrix = dataMatrix.colwise() - means;
|
|
42
|
+
|
|
43
|
+
if (scaleNum != 0)
|
|
44
|
+
dataMatrix = dataMatrix.rowwise().normalized() * sqrt(height);
|
|
45
|
+
|
|
46
|
+
Matrix<Float, Dynamic, Dynamic> corMatrix = dataMatrix * dataMatrix.transpose();
|
|
47
|
+
|
|
48
|
+
// The following solver computes eigen vals & vectors: the order of eigen vals is increasing.
|
|
49
|
+
SelfAdjointEigenSolver<Matrix<Float, Dynamic, Dynamic>> eigensolver(corMatrix);
|
|
50
|
+
|
|
51
|
+
// Check result of eigen values & vectors computation.
|
|
52
|
+
if (eigensolver.info() != Success)
|
|
53
|
+
return COMPUTATION_ERROR;
|
|
54
|
+
|
|
55
|
+
// Check order of computed eigen values: increasing order is expected
|
|
56
|
+
Vector<Float, Dynamic> eigenVals = eigensolver.eigenvalues();
|
|
57
|
+
for(int i = 1; i < eigenVals.size(); i++)
|
|
58
|
+
if(eigenVals(i - 1) > eigenVals(i))
|
|
59
|
+
return METHOD_ERROR;
|
|
60
|
+
|
|
61
|
+
// get feature vectors, taking into account increasing order of computed eigen values
|
|
62
|
+
Matrix<Float, Dynamic, Dynamic, ColMajor> featureVectors
|
|
63
|
+
= (eigensolver.eigenvectors().rowwise().reverse())(all, seq(0, numOfPrincipalComponents - 1));
|
|
64
|
+
|
|
65
|
+
// assign principal components and Eigen matrix
|
|
66
|
+
Map< Matrix<Float, Dynamic, Dynamic, RowMajor> >
|
|
67
|
+
princCompMatrix(principalComponents, numOfPrincipalComponents, width);
|
|
68
|
+
|
|
69
|
+
princCompMatrix = featureVectors.transpose() * dataMatrix;
|
|
70
|
+
|
|
71
|
+
// computation of approximation
|
|
72
|
+
if (approxData != NULL)
|
|
73
|
+
{
|
|
74
|
+
// assign data and Eigen matrix
|
|
75
|
+
Map< Matrix<Float, Dynamic, Dynamic, RowMajor> > approxMatrix(approxData, height, width);
|
|
76
|
+
|
|
77
|
+
approxMatrix = (featureVectors * princCompMatrix).colwise() + means;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return NO_ERROR;
|
|
81
|
+
} // pcaUsingCorrelationMatrix
|
|
82
|
+
|
|
83
|
+
/*{
|
|
84
|
+
// Here, we use a MODIFICATION of the algorithm given in
|
|
85
|
+
// Charu C. Aggarwal. Data Mining: The Textbook. Springer, 2015,
|
|
86
|
+
// (see page 42).
|
|
87
|
+
|
|
88
|
+
// check number of principal components
|
|
89
|
+
if (height < numOfPrincipalComponents || numOfPrincipalComponents < 1)
|
|
90
|
+
return UNCORRECT_ARGUMENTS_ERROR;
|
|
91
|
+
|
|
92
|
+
// assign data and Eigen matrix
|
|
93
|
+
Map< Matrix<Float, Dynamic, Dynamic, RowMajor> > dataMatrix(data, height, width);
|
|
94
|
+
|
|
95
|
+
Vector<Float, Dynamic> means = dataMatrix.rowwise().mean();
|
|
96
|
+
|
|
97
|
+
Matrix<Float, Dynamic, Dynamic> corMatrix
|
|
98
|
+
= dataMatrix * dataMatrix.transpose() / width - means * means.transpose();
|
|
99
|
+
|
|
100
|
+
// The following solver computes eigen vals & vectors: the order of eigen vals is increasing.
|
|
101
|
+
SelfAdjointEigenSolver<Matrix<Float, Dynamic, Dynamic>> eigensolver(corMatrix);
|
|
102
|
+
|
|
103
|
+
// Check result of eigen values & vectors computation.
|
|
104
|
+
if (eigensolver.info() != Success)
|
|
105
|
+
return COMPUTATION_ERROR;
|
|
106
|
+
|
|
107
|
+
// Check order of computed eigen values: increasing order is expected
|
|
108
|
+
Vector<Float, Dynamic> eigenVals = eigensolver.eigenvalues();
|
|
109
|
+
for(int i = 1; i < eigenVals.size(); i++)
|
|
110
|
+
if(eigenVals(i - 1) > eigenVals(i))
|
|
111
|
+
return METHOD_ERROR;
|
|
112
|
+
|
|
113
|
+
// get feature vectors, taking into account increasing order of computed eigen values
|
|
114
|
+
Matrix<Float, Dynamic, Dynamic, ColMajor> featureVectors
|
|
115
|
+
= (eigensolver.eigenvectors().rowwise().reverse())(all, seq(0, numOfPrincipalComponents - 1));
|
|
116
|
+
|
|
117
|
+
// assign principal components and Eigen matrix
|
|
118
|
+
Map< Matrix<Float, Dynamic, Dynamic, RowMajor> >
|
|
119
|
+
princCompMatrix(principalComponents, numOfPrincipalComponents, width);
|
|
120
|
+
|
|
121
|
+
// compute principal componets
|
|
122
|
+
princCompMatrix = featureVectors.transpose() * (dataMatrix.colwise() - means);
|
|
123
|
+
|
|
124
|
+
// computation of approximation
|
|
125
|
+
if (approxData != NULL)
|
|
126
|
+
{
|
|
127
|
+
// assign data and Eigen matrix
|
|
128
|
+
Map< Matrix<Float, Dynamic, Dynamic, RowMajor> > approxMatrix(approxData, height, width);
|
|
129
|
+
|
|
130
|
+
approxMatrix = (featureVectors * princCompMatrix).colwise() + means;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return NO_ERROR;
|
|
134
|
+
} */
|
|
135
|
+
|
|
136
|
+
// Maximum absolute deviation between arrays
|
|
137
|
+
Float pca::mad(Float * arr1, Float * arr2, const int length) noexcept
|
|
138
|
+
{
|
|
139
|
+
// Solution using Eigen: nice, but additional structures are created!
|
|
140
|
+
/*Map<Vector<Float, Dynamic>> vec1(arr1, length);
|
|
141
|
+
Map<Vector<Float, Dynamic>> vec2(arr2, length);
|
|
142
|
+
return ((vec1 - vec2).cwiseAbs()).maxCoeff();*/
|
|
143
|
+
|
|
144
|
+
// Naive solution
|
|
145
|
+
Float result = fabs(arr1[0] - arr2[0]);
|
|
146
|
+
|
|
147
|
+
for (int i = 1; i < length; i++)
|
|
148
|
+
result = fmax(result, fabs(arr1[i] - arr2[i]));
|
|
149
|
+
|
|
150
|
+
return result;
|
|
151
|
+
}
|
package/wasm/PCA/PCA.h
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
// PCA.h
|
|
2
|
+
// Principal Component Analysis (PCA) using the lib Eigen: headers of functions
|
|
3
|
+
|
|
4
|
+
// REMARK 1. Each row of the input data contains Datagrok column.
|
|
5
|
+
// For this reason, the following convention is used:
|
|
6
|
+
// - height is a number of Datagrok columns to be processed,
|
|
7
|
+
// - width is a number of each Datagrok column.
|
|
8
|
+
|
|
9
|
+
// REMARK 2. Here, we operate matrices that have float rows and integer rows,
|
|
10
|
+
// each row contains values of the same type.
|
|
11
|
+
// Each matrix consists of two blocks: float rows and integer rows.
|
|
12
|
+
// In this case, an input is void **.
|
|
13
|
+
|
|
14
|
+
// RMEARK 3. Also, the same methods are implemented for the case when data is
|
|
15
|
+
// given by float *.
|
|
16
|
+
|
|
17
|
+
#ifndef PCA_H
|
|
18
|
+
#define PCA_H
|
|
19
|
+
|
|
20
|
+
namespace pca {
|
|
21
|
+
|
|
22
|
+
typedef float Float;
|
|
23
|
+
typedef int Integer;
|
|
24
|
+
typedef double Double;
|
|
25
|
+
|
|
26
|
+
enum ResultCode {NO_ERROR = 0, UNCORRECT_ARGUMENTS_ERROR, COMPUTATION_ERROR, METHOD_ERROR};
|
|
27
|
+
|
|
28
|
+
/* Principal Component Analysis of the data: using correlation matrix.
|
|
29
|
+
data - input matrix;
|
|
30
|
+
height, width - sizes of the input;
|
|
31
|
+
numOfPrincipalComponents - number of principal components to be computed;
|
|
32
|
+
principalComponents - the principal components computed;
|
|
33
|
+
approxData - approximation of the input data using principal components obtained. */
|
|
34
|
+
int pcaUsingCorrelationMatrix(Float * data,
|
|
35
|
+
const int height,
|
|
36
|
+
const int width,
|
|
37
|
+
const int numOfPrincipalComponents,
|
|
38
|
+
const int centerNum,
|
|
39
|
+
const int scaleNum,
|
|
40
|
+
Float * principalComponents,
|
|
41
|
+
Float * approxData = 0) noexcept;
|
|
42
|
+
|
|
43
|
+
// Maximum absolute deviation between arrays
|
|
44
|
+
Float mad(Float * arr1, Float * arr2, const int length) noexcept;
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
#endif // PCA_H
|
|
48
|
+
|
package/wasm/PLS/PLS.h
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
// PLS.h
|
|
2
|
+
// Declarations of functions that provides Partial Least Square (PLS) Regression.
|
|
3
|
+
|
|
4
|
+
// An implementation of the algorithm PLS1 without X-deflation is used.
|
|
5
|
+
// Source paper: Ulf G. Indahl, The geometry of PLS1 explained properly:
|
|
6
|
+
// 10 key notes on mathematical properties of and some alternative
|
|
7
|
+
// algorithmic approaches to PLS1 modelling, DOI: https://doi.org/10.1002/cem.2589
|
|
8
|
+
// Also, the following aricle is used: https://doi.org/10.1016/S0169-7439(01)00155-1
|
|
9
|
+
|
|
10
|
+
#ifndef PLS_H
|
|
11
|
+
#define PLS_H
|
|
12
|
+
|
|
13
|
+
namespace pls {
|
|
14
|
+
|
|
15
|
+
typedef float Float;
|
|
16
|
+
typedef double Double;
|
|
17
|
+
|
|
18
|
+
enum ResultCode { NO_ERROR = 0, UNCORRECT_ARGUMENTS_ERROR, COMPUTATION_ERROR, METHOD_ERROR };
|
|
19
|
+
|
|
20
|
+
/* Partial Least Square (PLS1).
|
|
21
|
+
predictorColumnsDataPtr - data from columns that are used for prediction
|
|
22
|
+
rowCount - number of rows
|
|
23
|
+
columnCount - number of columns
|
|
24
|
+
responseColumnDataPtr - data from column that is predicted, i.e. responce
|
|
25
|
+
componentsCount - number of components that extracted in PLS
|
|
26
|
+
predictionDataPtr - prediction obtained using PLS (its size is equal to the size of responce)
|
|
27
|
+
regressionCoefficients - coeffcient of linear regression that are computed (their size is eqaul to the number of columns)
|
|
28
|
+
*/
|
|
29
|
+
int partialLeastSquare(Float * predictorColumnsDataPtr,
|
|
30
|
+
const int rowCount,
|
|
31
|
+
const int columnCount,
|
|
32
|
+
Float * responseColumnDataPtr,
|
|
33
|
+
const int componentsCount,
|
|
34
|
+
Float * predictionDataPtr,
|
|
35
|
+
Float * regressionCoefficients) noexcept;
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
/* Partial Least Square (PLS1) - extended version: scores data is provided.
|
|
39
|
+
predictorColumnsDataPtr - data from columns that are used for prediction (X)
|
|
40
|
+
rowCount - number of rows
|
|
41
|
+
columnCount - number of columns
|
|
42
|
+
responseColumnDataPtr - data from column that is predicted, i.e. responce (Y)
|
|
43
|
+
componentsCount - number of components that extracted in PLS (A)
|
|
44
|
+
predictionDataPtr - prediction obtained using PLS (its size is equal to the size of responce)
|
|
45
|
+
regressionCoefficientsPtr - coeffcient of linear regression that are computed (their size is eqaul to the number of columns) (b)
|
|
46
|
+
predictorScoresPtr - scores of predectors (T)
|
|
47
|
+
responceScoresPtr - scores of response (U)
|
|
48
|
+
predictorLoadingsPtr - loadings of predictors (P)
|
|
49
|
+
*/
|
|
50
|
+
int partialLeastSquareExtended(Float * predictorColumnsDataPtr,
|
|
51
|
+
const int rowCount,
|
|
52
|
+
const int columnCount,
|
|
53
|
+
Float * responseColumnDataPtr,
|
|
54
|
+
const int componentsCount,
|
|
55
|
+
Float * predictionDataPtr,
|
|
56
|
+
Float * regressionCoefficientsPtr,
|
|
57
|
+
Float * predictorScoresPtr,
|
|
58
|
+
Float * responceScoresPtr,
|
|
59
|
+
Float * predictorLoadingsPtr) noexcept;
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
#endif
|
|
63
|
+
|
|
64
|
+
|
package/wasm/PLS/pls.cpp
ADDED
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
// pls.cpp
|
|
2
|
+
// Principal Component Analysis (PCA) using the lib Eigen: implementation of functions
|
|
3
|
+
|
|
4
|
+
// The following STL lib is used for printing results and their verifying
|
|
5
|
+
//#include<iostream>
|
|
6
|
+
//using namespace std;
|
|
7
|
+
|
|
8
|
+
#include "../../../../../Eigen/Eigen/Dense"
|
|
9
|
+
using namespace Eigen;
|
|
10
|
+
|
|
11
|
+
#include "PLS.h"
|
|
12
|
+
using pls::Float;
|
|
13
|
+
using pls::Double;
|
|
14
|
+
|
|
15
|
+
/* Partial Least Square (PLS1).
|
|
16
|
+
predictorColumnsDataPtr - data from columns that are used for prediction
|
|
17
|
+
rowCount - number of rows
|
|
18
|
+
columnCount - number of columns
|
|
19
|
+
responseColumnDataPtr - data from column that is predicted, i.e. responce
|
|
20
|
+
componentsCount - number of components that extracted in PLS
|
|
21
|
+
predictionDataPtr - prediction obtained using PLS (its size is equal to the size of responce)
|
|
22
|
+
regressionCoefficients - coeffcient of linear regression that are computed (their size is eqaul to the number of columns)
|
|
23
|
+
*/
|
|
24
|
+
int pls::partialLeastSquare(Float * predictorColumnsDataPtr,
|
|
25
|
+
const int rowCount,
|
|
26
|
+
const int columnCount,
|
|
27
|
+
Float * responseColumnDataPtr,
|
|
28
|
+
const int componentsCount,
|
|
29
|
+
Float * predictionDataPtr,
|
|
30
|
+
Float * regressionCoefficients) noexcept
|
|
31
|
+
{
|
|
32
|
+
// check correctness of arguments
|
|
33
|
+
if (componentsCount <= 0 || componentsCount > columnCount)
|
|
34
|
+
return UNCORRECT_ARGUMENTS_ERROR;
|
|
35
|
+
|
|
36
|
+
// Further, notation from the paper https://doi.org/10.1002/cem.2589 is used (see Algorithm 2).
|
|
37
|
+
|
|
38
|
+
// create matrix, which is associated with predictor data
|
|
39
|
+
Map < Matrix<Float, Dynamic, Dynamic, ColMajor>> D(predictorColumnsDataPtr, rowCount, columnCount);
|
|
40
|
+
|
|
41
|
+
// compute mean value of each column of D
|
|
42
|
+
Vector<Float, Dynamic> mu = D.colwise().mean();
|
|
43
|
+
|
|
44
|
+
// mean-centered version of D
|
|
45
|
+
Matrix<Float, Dynamic, Dynamic, ColMajor> X = D.rowwise() - mu.transpose();
|
|
46
|
+
|
|
47
|
+
// vector for standard deviations of X
|
|
48
|
+
Vector<Float, Dynamic> stdDevX(columnCount);
|
|
49
|
+
|
|
50
|
+
Float rowCountSqrt = sqrt(static_cast<Float>(rowCount));
|
|
51
|
+
|
|
52
|
+
// normilizing X-columns
|
|
53
|
+
for (int i = 0; i < columnCount; i++)
|
|
54
|
+
{
|
|
55
|
+
stdDevX(i) = X.col(i).norm() / rowCountSqrt;
|
|
56
|
+
X.col(i) = X.col(i) / stdDevX(i);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// create a vector, which is associated with responce or predicted data
|
|
60
|
+
Map<Vector<Float, Dynamic>> ySource(responseColumnDataPtr, rowCount);
|
|
61
|
+
|
|
62
|
+
// mean value of the responce
|
|
63
|
+
Vector<Float, 1> meanY;
|
|
64
|
+
meanY(0) = ySource.mean();
|
|
65
|
+
|
|
66
|
+
// mean-centered version of the responce
|
|
67
|
+
Vector<Float, Dynamic> y = ySource.rowwise() - meanY;
|
|
68
|
+
|
|
69
|
+
// standard deviation
|
|
70
|
+
Float stdDevY = sqrt(y.squaredNorm() / rowCount);
|
|
71
|
+
|
|
72
|
+
// normalizing
|
|
73
|
+
y /= stdDevY;
|
|
74
|
+
|
|
75
|
+
// create a vector, which is associtated with regression coefficients
|
|
76
|
+
Map<Vector<Float, Dynamic>> b(regressionCoefficients, columnCount);
|
|
77
|
+
|
|
78
|
+
// create a vector, which is associated with prediction data
|
|
79
|
+
Map<Vector<Float, Dynamic>> prediction(predictionDataPtr, rowCount);
|
|
80
|
+
|
|
81
|
+
// PLS1 algorithm routine
|
|
82
|
+
|
|
83
|
+
Matrix<Float, Dynamic, Dynamic, ColMajor> W(columnCount, componentsCount);
|
|
84
|
+
|
|
85
|
+
Matrix<Float, Dynamic, Dynamic, ColMajor> P(columnCount, componentsCount);
|
|
86
|
+
|
|
87
|
+
Matrix<Float, Dynamic, Dynamic, ColMajor> T(rowCount, componentsCount);
|
|
88
|
+
|
|
89
|
+
Vector<Float, Dynamic> normTau(componentsCount);
|
|
90
|
+
|
|
91
|
+
Vector<Float, Dynamic> q(componentsCount);
|
|
92
|
+
|
|
93
|
+
Vector<Float, Dynamic> normV(componentsCount);
|
|
94
|
+
|
|
95
|
+
// PLS1 algorithm: see Algorithm 2 in https://doi.org/10.1002/cem.2589
|
|
96
|
+
|
|
97
|
+
Vector<Float, Dynamic> w = (X.transpose() * y);
|
|
98
|
+
|
|
99
|
+
normV(0) = w.norm();
|
|
100
|
+
|
|
101
|
+
// prevent division by zero
|
|
102
|
+
if (normV(0) == static_cast<Float>(0))
|
|
103
|
+
return METHOD_ERROR;
|
|
104
|
+
|
|
105
|
+
w = w / normV(0);
|
|
106
|
+
|
|
107
|
+
W.col(0) = w;
|
|
108
|
+
|
|
109
|
+
Vector<Float, Dynamic> t = X * w;
|
|
110
|
+
|
|
111
|
+
normTau(0) = t.norm();
|
|
112
|
+
|
|
113
|
+
// prevent division by zero
|
|
114
|
+
if (normTau(0) == static_cast<Float>(0))
|
|
115
|
+
return METHOD_ERROR;
|
|
116
|
+
|
|
117
|
+
t = t / normTau(0);
|
|
118
|
+
|
|
119
|
+
T.col(0) = t;
|
|
120
|
+
|
|
121
|
+
Vector<Float, Dynamic> p = X.transpose() * t;
|
|
122
|
+
|
|
123
|
+
P.col(0) = p;
|
|
124
|
+
|
|
125
|
+
q(0) = t.transpose() * y;
|
|
126
|
+
|
|
127
|
+
for (int a = 1; a < componentsCount; a++)
|
|
128
|
+
{
|
|
129
|
+
w = normV(a - 1) * (w - p / normTau(a - 1));
|
|
130
|
+
|
|
131
|
+
normV(a) = w.norm();
|
|
132
|
+
|
|
133
|
+
// prevent division by zero
|
|
134
|
+
if (normV(a) == static_cast<Float>(0))
|
|
135
|
+
return METHOD_ERROR;
|
|
136
|
+
|
|
137
|
+
w = w / normV(a);
|
|
138
|
+
|
|
139
|
+
W.col(a) = w;
|
|
140
|
+
|
|
141
|
+
t = X * w;
|
|
142
|
+
|
|
143
|
+
t = t - T.leftCols(a) * (T.leftCols(a).transpose() * t);
|
|
144
|
+
|
|
145
|
+
normTau(a) = t.norm();
|
|
146
|
+
|
|
147
|
+
// prevent division by zero
|
|
148
|
+
if (normTau(a) == static_cast<Float>(0))
|
|
149
|
+
return METHOD_ERROR;
|
|
150
|
+
|
|
151
|
+
t = t / normTau(a);
|
|
152
|
+
|
|
153
|
+
T.col(a) = t;
|
|
154
|
+
|
|
155
|
+
p = X.transpose() * t;
|
|
156
|
+
|
|
157
|
+
P.col(a) = p;
|
|
158
|
+
|
|
159
|
+
q(a) = t.transpose() * y;
|
|
160
|
+
} // for a
|
|
161
|
+
|
|
162
|
+
// compute coefficients of regression
|
|
163
|
+
Matrix<Float, Dynamic, Dynamic> H = P.transpose() * W;
|
|
164
|
+
|
|
165
|
+
// chech existence of inverse matrix
|
|
166
|
+
if (H.determinant() == static_cast<Float>(0))
|
|
167
|
+
return METHOD_ERROR;
|
|
168
|
+
|
|
169
|
+
b = W * H.inverse() * q;
|
|
170
|
+
|
|
171
|
+
for (int i = 0; i < columnCount; i++)
|
|
172
|
+
b(i) *= stdDevY / stdDevX(i);
|
|
173
|
+
|
|
174
|
+
// TODO: to discuss a constant term of the regression
|
|
175
|
+
// a constant term
|
|
176
|
+
//Vector<Float, 1> shift;
|
|
177
|
+
//shift(0) = ySource(0) - D.row(0) * b;
|
|
178
|
+
//q(0) - P.col(0).transpose().dot(b);
|
|
179
|
+
//prediction = (D * b).rowwise() + shift;
|
|
180
|
+
|
|
181
|
+
prediction = D * b;
|
|
182
|
+
|
|
183
|
+
return NO_ERROR;
|
|
184
|
+
} // partialLeastSquare
|
|
185
|
+
|
|
186
|
+
/* Partial Least Square (PLS1) - extended version: scores data is provided.
|
|
187
|
+
predictorColumnsDataPtr - data from columns that are used for prediction (X)
|
|
188
|
+
rowCount - number of rows
|
|
189
|
+
columnCount - number of columns
|
|
190
|
+
responseColumnDataPtr - data from column that is predicted, i.e. responce (Y)
|
|
191
|
+
componentsCount - number of components that extracted in PLS (A)
|
|
192
|
+
predictionDataPtr - prediction obtained using PLS (its size is equal to the size of responce)
|
|
193
|
+
regressionCoefficientsPtr - coeffcient of linear regression that are computed (their size is eqaul to the number of columns) (b)
|
|
194
|
+
predictorScoresPtr - scores of predectors (T)
|
|
195
|
+
responceScoresPtr - scores of response (U)
|
|
196
|
+
predictorLoadingsPtr - loadings of predictors (P)
|
|
197
|
+
*/
|
|
198
|
+
int pls::partialLeastSquareExtended(Float * predictorColumnsDataPtr,
|
|
199
|
+
const int rowCount,
|
|
200
|
+
const int columnCount,
|
|
201
|
+
Float * responseColumnDataPtr,
|
|
202
|
+
const int componentsCount,
|
|
203
|
+
Float * predictionDataPtr,
|
|
204
|
+
Float * regressionCoefficientsPtr,
|
|
205
|
+
Float * predictorScoresPtr,
|
|
206
|
+
Float * responceScoresPtr,
|
|
207
|
+
Float * predictorLoadingsPtr) noexcept
|
|
208
|
+
{
|
|
209
|
+
// check correctness of arguments
|
|
210
|
+
if (componentsCount <= 0 || componentsCount > columnCount)
|
|
211
|
+
return UNCORRECT_ARGUMENTS_ERROR;
|
|
212
|
+
|
|
213
|
+
// Further, notation from the paper https://doi.org/10.1002/cem.2589 is used (see Algorithm 2).
|
|
214
|
+
|
|
215
|
+
// create matrix, which is associated with predictor data
|
|
216
|
+
Map < Matrix<Float, Dynamic, Dynamic, ColMajor>> D(predictorColumnsDataPtr, rowCount, columnCount);
|
|
217
|
+
|
|
218
|
+
// compute mean value of each column of D
|
|
219
|
+
Vector<Float, Dynamic> mu = D.colwise().mean();
|
|
220
|
+
|
|
221
|
+
// mean-centered version of D
|
|
222
|
+
Matrix<Float, Dynamic, Dynamic, ColMajor> X = D.rowwise() - mu.transpose();
|
|
223
|
+
|
|
224
|
+
// standard deviations of X
|
|
225
|
+
Vector<Float, Dynamic> stdDevX(columnCount);
|
|
226
|
+
|
|
227
|
+
Float rowCountSqrt = sqrt(static_cast<Float>(rowCount));
|
|
228
|
+
|
|
229
|
+
// normalizing X
|
|
230
|
+
for (int i = 0; i < columnCount; i++)
|
|
231
|
+
{
|
|
232
|
+
stdDevX(i) = X.col(i).norm() / rowCountSqrt;
|
|
233
|
+
|
|
234
|
+
// check deviation
|
|
235
|
+
if(stdDevX(i) == static_cast<Float>(0))
|
|
236
|
+
return UNCORRECT_ARGUMENTS_ERROR;
|
|
237
|
+
|
|
238
|
+
X.col(i) = X.col(i) / stdDevX(i);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// create a vector, which is associated with responce or predicted data
|
|
242
|
+
Map<Vector<Float, Dynamic>> ySource(responseColumnDataPtr, rowCount);
|
|
243
|
+
|
|
244
|
+
// mean value of Y: Eigen vector is used in order to provide broadcasting
|
|
245
|
+
Vector<Float, 1> meanY;
|
|
246
|
+
meanY(0) = ySource.mean();
|
|
247
|
+
|
|
248
|
+
// centering Y
|
|
249
|
+
Vector<Float, Dynamic> y = ySource.rowwise() - meanY;
|
|
250
|
+
|
|
251
|
+
// standard deviation of Y normalizing Y
|
|
252
|
+
Float stdDevY = sqrt(y.squaredNorm() / rowCount);
|
|
253
|
+
|
|
254
|
+
// check deviation
|
|
255
|
+
if(stdDevY == static_cast<Float>(0))
|
|
256
|
+
return UNCORRECT_ARGUMENTS_ERROR;
|
|
257
|
+
|
|
258
|
+
// normalizing Y
|
|
259
|
+
y /= stdDevY;
|
|
260
|
+
|
|
261
|
+
// create a vector, which is associtated with regression coefficients
|
|
262
|
+
Map<Vector<Float, Dynamic>> b(regressionCoefficientsPtr, columnCount);
|
|
263
|
+
|
|
264
|
+
// create a vector, which is associated with prediction data
|
|
265
|
+
Map<Vector<Float, Dynamic>> prediction(predictionDataPtr, rowCount);
|
|
266
|
+
|
|
267
|
+
// weights matrix, W
|
|
268
|
+
Matrix<Float, Dynamic, Dynamic, ColMajor> W(columnCount, componentsCount);
|
|
269
|
+
|
|
270
|
+
// X-loadings matrix, P
|
|
271
|
+
Map<Matrix<Float, Dynamic, Dynamic, ColMajor>> P(predictorLoadingsPtr, columnCount, componentsCount);
|
|
272
|
+
|
|
273
|
+
//Matrix<Float, Dynamic, Dynamic, ColMajor> P(columnCount, componentsCount);
|
|
274
|
+
|
|
275
|
+
// X-scores, T
|
|
276
|
+
Map<Matrix<Float, Dynamic, Dynamic, ColMajor>> T(predictorScoresPtr, rowCount, componentsCount);
|
|
277
|
+
|
|
278
|
+
// Y-scores, U
|
|
279
|
+
Map<Matrix<Float, Dynamic, Dynamic, ColMajor>> U(responceScoresPtr, rowCount, componentsCount);
|
|
280
|
+
|
|
281
|
+
// Y-loadings, q
|
|
282
|
+
Vector<Float, Dynamic> q(componentsCount);
|
|
283
|
+
|
|
284
|
+
// PLS1 routine auxiliry vectors
|
|
285
|
+
Vector<Float, Dynamic> normTau(componentsCount);
|
|
286
|
+
Vector<Float, Dynamic> normV(componentsCount);
|
|
287
|
+
|
|
288
|
+
// PLS1 algorithm: see Algorithm 2 in https://doi.org/10.1002/cem.2589
|
|
289
|
+
|
|
290
|
+
Vector<Float, Dynamic> w = (X.transpose() * y);
|
|
291
|
+
|
|
292
|
+
normV(0) = w.norm();
|
|
293
|
+
|
|
294
|
+
// prevent division by zero
|
|
295
|
+
if (normV(0) == static_cast<Float>(0))
|
|
296
|
+
return METHOD_ERROR;
|
|
297
|
+
|
|
298
|
+
w = w / normV(0);
|
|
299
|
+
|
|
300
|
+
W.col(0) = w;
|
|
301
|
+
|
|
302
|
+
Vector<Float, Dynamic> t = X * w;
|
|
303
|
+
|
|
304
|
+
normTau(0) = t.norm();
|
|
305
|
+
|
|
306
|
+
// prevent division by zero
|
|
307
|
+
if (normTau(0) == static_cast<Float>(0))
|
|
308
|
+
return METHOD_ERROR;
|
|
309
|
+
|
|
310
|
+
t = t / normTau(0);
|
|
311
|
+
|
|
312
|
+
T.col(0) = t;
|
|
313
|
+
|
|
314
|
+
Vector<Float, Dynamic> p = X.transpose() * t;
|
|
315
|
+
|
|
316
|
+
P.col(0) = p;
|
|
317
|
+
|
|
318
|
+
q(0) = t.transpose() * y;
|
|
319
|
+
|
|
320
|
+
for (int a = 1; a < componentsCount; a++)
|
|
321
|
+
{
|
|
322
|
+
w = normV(a - 1) * (w - p / normTau(a - 1));
|
|
323
|
+
|
|
324
|
+
normV(a) = w.norm();
|
|
325
|
+
|
|
326
|
+
// prevent division by zero
|
|
327
|
+
if (normV(a) == static_cast<Float>(0))
|
|
328
|
+
return METHOD_ERROR;
|
|
329
|
+
|
|
330
|
+
w = w / normV(a);
|
|
331
|
+
|
|
332
|
+
W.col(a) = w;
|
|
333
|
+
|
|
334
|
+
t = X * w;
|
|
335
|
+
|
|
336
|
+
t = t - T.leftCols(a) * (T.leftCols(a).transpose() * t);
|
|
337
|
+
|
|
338
|
+
normTau(a) = t.norm();
|
|
339
|
+
|
|
340
|
+
// prevent division by zero
|
|
341
|
+
if (normTau(a) == static_cast<Float>(0))
|
|
342
|
+
return METHOD_ERROR;
|
|
343
|
+
|
|
344
|
+
t = t / normTau(a);
|
|
345
|
+
|
|
346
|
+
T.col(a) = t;
|
|
347
|
+
|
|
348
|
+
p = X.transpose() * t;
|
|
349
|
+
|
|
350
|
+
P.col(a) = p;
|
|
351
|
+
|
|
352
|
+
q(a) = t.transpose() * y;
|
|
353
|
+
} // for a
|
|
354
|
+
|
|
355
|
+
// compute Y-scores
|
|
356
|
+
U = y * q.transpose() / q.squaredNorm();
|
|
357
|
+
|
|
358
|
+
// compute coefficients of regression
|
|
359
|
+
Matrix<Float, Dynamic, Dynamic> H = P.transpose() * W;
|
|
360
|
+
|
|
361
|
+
// chech existence of inverse matrix
|
|
362
|
+
if (H.determinant() == static_cast<Float>(0))
|
|
363
|
+
return METHOD_ERROR;
|
|
364
|
+
|
|
365
|
+
// compute regression coefficients
|
|
366
|
+
b = W * H.inverse() * q;
|
|
367
|
+
|
|
368
|
+
// ... also, we take into account a normalizing
|
|
369
|
+
for (int i = 0; i < columnCount; i++)
|
|
370
|
+
b(i) *= stdDevY / stdDevX(i);
|
|
371
|
+
|
|
372
|
+
// compute predictions
|
|
373
|
+
prediction = D * b;
|
|
374
|
+
|
|
375
|
+
// Remove the following comments in order to print and verify results
|
|
376
|
+
//cout << "\nW_star:\n" << Wstar << endl;
|
|
377
|
+
//cout << "\nU:\n" << U << endl;
|
|
378
|
+
//cout << "\nU.tr * U:\n" << U.transpose() * U << endl; // this must be identity matrix
|
|
379
|
+
//cout << "\nb:\n" << b << endl;
|
|
380
|
+
//cout << "\nq:\n" << q << endl;
|
|
381
|
+
//cout << "\nD:\n" << D << endl;
|
|
382
|
+
//cout << "\nP:\n" << P << endl;
|
|
383
|
+
//cout << "\nT:\n" << T << endl;
|
|
384
|
+
//cout << "\nT.tr * T:\n" << T.transpose() * T << endl; // this must be identity matrix
|
|
385
|
+
//cout << "\nW:\n" << W << endl;
|
|
386
|
+
//cout << "\nW.tr * W:\n" << W.transpose() * W << endl; // this must be identity matrix
|
|
387
|
+
//cout << "\nprediction\n" << prediction << endl;
|
|
388
|
+
|
|
389
|
+
return NO_ERROR;
|
|
390
|
+
} // partialLeastSquareExtended
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
|