@datagrok/eda 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +3 -0
  2. package/detectors.js +9 -0
  3. package/dist/111.js +2 -0
  4. package/dist/146.js +2 -0
  5. package/dist/155.js +2 -0
  6. package/dist/355.js +2 -0
  7. package/dist/584.js +2 -0
  8. package/dist/604.js +2 -0
  9. package/dist/632.js +2 -0
  10. package/dist/645.js +2 -0
  11. package/dist/93.js +2 -0
  12. package/dist/d711f70338306e5bddc4.wasm +0 -0
  13. package/dist/package-test.js +2 -0
  14. package/dist/package.js +2 -0
  15. package/package.json +49 -0
  16. package/package.png +0 -0
  17. package/scripts/command.txt +1 -0
  18. package/scripts/exportForTS.py +862 -0
  19. package/scripts/exportForTSConstants.py +93 -0
  20. package/scripts/func.json +1 -0
  21. package/scripts/module.json +11 -0
  22. package/src/EDAtools.ts +46 -0
  23. package/src/EDAui.ts +118 -0
  24. package/src/dataGenerators.ts +74 -0
  25. package/src/demos.ts +38 -0
  26. package/src/package-test.ts +12 -0
  27. package/src/package.ts +248 -0
  28. package/src/svm.ts +485 -0
  29. package/src/utils.ts +51 -0
  30. package/tsconfig.json +71 -0
  31. package/wasm/EDA.js +443 -0
  32. package/wasm/EDA.wasm +0 -0
  33. package/wasm/EDAAPI.js +131 -0
  34. package/wasm/EDAForWebWorker.js +21 -0
  35. package/wasm/PCA/PCA.cpp +151 -0
  36. package/wasm/PCA/PCA.h +48 -0
  37. package/wasm/PLS/PLS.h +64 -0
  38. package/wasm/PLS/pls.cpp +393 -0
  39. package/wasm/callWasm.js +475 -0
  40. package/wasm/callWasmForWebWorker.js +706 -0
  41. package/wasm/dataGenerators.h +169 -0
  42. package/wasm/dataMining.h +116 -0
  43. package/wasm/pcaExport.cpp +64 -0
  44. package/wasm/plsExport.cpp +75 -0
  45. package/wasm/svm.h +608 -0
  46. package/wasm/svmApi.cpp +323 -0
  47. package/wasm/workers/errorWorker.js +13 -0
  48. package/wasm/workers/generateDatasetWorker.js +13 -0
  49. package/wasm/workers/normalizeDatasetWorker.js +13 -0
  50. package/wasm/workers/partialLeastSquareRegressionWorker.js +13 -0
  51. package/wasm/workers/predictByLSSVMWorker.js +13 -0
  52. package/wasm/workers/principalComponentAnalysisWorker.js +13 -0
  53. package/wasm/workers/trainAndAnalyzeLSSVMWorker.js +13 -0
  54. package/wasm/workers/trainLSSVMWorker.js +13 -0
  55. package/webpack.config.js +37 -0
@@ -0,0 +1,151 @@
1
+ // PCA.cpp
2
+ // Principal Component Analysis using the lib Eigen: implementations of functions
3
+
4
+ #include "../../../../../Eigen/Eigen/Dense"
5
+ using namespace Eigen;
6
+
7
+ #include "PCA.h"
8
+ using pca::Float;
9
+ using pca::Integer;
10
+ using pca::Double;
11
+
12
+ /* Principal Component Analysis of the data: using correlation matrix.
13
+ data - input matrix;
14
+ height, width - sizes of the input;
15
+ numOfPrincipalComponents - number of principal components to be computed;
16
+ principalComponents - the principal components computed;
17
+ approxData - approximation of the input data using principal components obtained. */
18
+ int pca::pcaUsingCorrelationMatrix(Float * data,
19
+ const int height,
20
+ const int width,
21
+ const int numOfPrincipalComponents,
22
+ const int centerNum,
23
+ const int scaleNum,
24
+ Float * principalComponents,
25
+ Float * approxData) noexcept
26
+ {
27
+ /* Here, we use a MODIFICATION of the algorithm given in
28
+ Charu C. Aggarwal. Data Mining: The Textbook. Springer, 2015,
29
+ (see page 42). */
30
+
31
+ // check number of principal components
32
+ if (height < numOfPrincipalComponents || numOfPrincipalComponents < 1)
33
+ return UNCORRECT_ARGUMENTS_ERROR;
34
+
35
+ // assign data and Eigen matrix
36
+ Map< Matrix<Float, Dynamic, Dynamic, RowMajor> > dataMatrix(data, height, width);
37
+
38
+ Vector<Float, Dynamic> means = dataMatrix.rowwise().mean();
39
+
40
+ if (centerNum != 0)
41
+ dataMatrix = dataMatrix.colwise() - means;
42
+
43
+ if (scaleNum != 0)
44
+ dataMatrix = dataMatrix.rowwise().normalized() * sqrt(height);
45
+
46
+ Matrix<Float, Dynamic, Dynamic> corMatrix = dataMatrix * dataMatrix.transpose();
47
+
48
+ // The following solver computes eigen vals & vectors: the order of eigen vals is increasing.
49
+ SelfAdjointEigenSolver<Matrix<Float, Dynamic, Dynamic>> eigensolver(corMatrix);
50
+
51
+ // Check result of eigen values & vectors computation.
52
+ if (eigensolver.info() != Success)
53
+ return COMPUTATION_ERROR;
54
+
55
+ // Check order of computed eigen values: increasing order is expected
56
+ Vector<Float, Dynamic> eigenVals = eigensolver.eigenvalues();
57
+ for(int i = 1; i < eigenVals.size(); i++)
58
+ if(eigenVals(i - 1) > eigenVals(i))
59
+ return METHOD_ERROR;
60
+
61
+ // get feature vectors, taking into account increasing order of computed eigen values
62
+ Matrix<Float, Dynamic, Dynamic, ColMajor> featureVectors
63
+ = (eigensolver.eigenvectors().rowwise().reverse())(all, seq(0, numOfPrincipalComponents - 1));
64
+
65
+ // assign principal components and Eigen matrix
66
+ Map< Matrix<Float, Dynamic, Dynamic, RowMajor> >
67
+ princCompMatrix(principalComponents, numOfPrincipalComponents, width);
68
+
69
+ princCompMatrix = featureVectors.transpose() * dataMatrix;
70
+
71
+ // computation of approximation
72
+ if (approxData != NULL)
73
+ {
74
+ // assign data and Eigen matrix
75
+ Map< Matrix<Float, Dynamic, Dynamic, RowMajor> > approxMatrix(approxData, height, width);
76
+
77
+ approxMatrix = (featureVectors * princCompMatrix).colwise() + means;
78
+ }
79
+
80
+ return NO_ERROR;
81
+ } // pcaUsingCorrelationMatrix
82
+
83
+ /*{
84
+ // Here, we use a MODIFICATION of the algorithm given in
85
+ // Charu C. Aggarwal. Data Mining: The Textbook. Springer, 2015,
86
+ // (see page 42).
87
+
88
+ // check number of principal components
89
+ if (height < numOfPrincipalComponents || numOfPrincipalComponents < 1)
90
+ return UNCORRECT_ARGUMENTS_ERROR;
91
+
92
+ // assign data and Eigen matrix
93
+ Map< Matrix<Float, Dynamic, Dynamic, RowMajor> > dataMatrix(data, height, width);
94
+
95
+ Vector<Float, Dynamic> means = dataMatrix.rowwise().mean();
96
+
97
+ Matrix<Float, Dynamic, Dynamic> corMatrix
98
+ = dataMatrix * dataMatrix.transpose() / width - means * means.transpose();
99
+
100
+ // The following solver computes eigen vals & vectors: the order of eigen vals is increasing.
101
+ SelfAdjointEigenSolver<Matrix<Float, Dynamic, Dynamic>> eigensolver(corMatrix);
102
+
103
+ // Check result of eigen values & vectors computation.
104
+ if (eigensolver.info() != Success)
105
+ return COMPUTATION_ERROR;
106
+
107
+ // Check order of computed eigen values: increasing order is expected
108
+ Vector<Float, Dynamic> eigenVals = eigensolver.eigenvalues();
109
+ for(int i = 1; i < eigenVals.size(); i++)
110
+ if(eigenVals(i - 1) > eigenVals(i))
111
+ return METHOD_ERROR;
112
+
113
+ // get feature vectors, taking into account increasing order of computed eigen values
114
+ Matrix<Float, Dynamic, Dynamic, ColMajor> featureVectors
115
+ = (eigensolver.eigenvectors().rowwise().reverse())(all, seq(0, numOfPrincipalComponents - 1));
116
+
117
+ // assign principal components and Eigen matrix
118
+ Map< Matrix<Float, Dynamic, Dynamic, RowMajor> >
119
+ princCompMatrix(principalComponents, numOfPrincipalComponents, width);
120
+
121
+ // compute principal componets
122
+ princCompMatrix = featureVectors.transpose() * (dataMatrix.colwise() - means);
123
+
124
+ // computation of approximation
125
+ if (approxData != NULL)
126
+ {
127
+ // assign data and Eigen matrix
128
+ Map< Matrix<Float, Dynamic, Dynamic, RowMajor> > approxMatrix(approxData, height, width);
129
+
130
+ approxMatrix = (featureVectors * princCompMatrix).colwise() + means;
131
+ }
132
+
133
+ return NO_ERROR;
134
+ } */
135
+
136
+ // Maximum absolute deviation between arrays
137
+ Float pca::mad(Float * arr1, Float * arr2, const int length) noexcept
138
+ {
139
+ // Solution using Eigen: nice, but additional structures are created!
140
+ /*Map<Vector<Float, Dynamic>> vec1(arr1, length);
141
+ Map<Vector<Float, Dynamic>> vec2(arr2, length);
142
+ return ((vec1 - vec2).cwiseAbs()).maxCoeff();*/
143
+
144
+ // Naive solution
145
+ Float result = fabs(arr1[0] - arr2[0]);
146
+
147
+ for (int i = 1; i < length; i++)
148
+ result = fmax(result, fabs(arr1[i] - arr2[i]));
149
+
150
+ return result;
151
+ }
package/wasm/PCA/PCA.h ADDED
@@ -0,0 +1,48 @@
1
+ // PCA.h
2
+ // Principal Component Analysis (PCA) using the lib Eigen: headers of functions
3
+
4
+ // REMARK 1. Each row of the input data contains Datagrok column.
5
+ // For this reason, the following convention is used:
6
+ // - height is a number of Datagrok columns to be processed,
7
+ // - width is a number of each Datagrok column.
8
+
9
+ // REMARK 2. Here, we operate matrices that have float rows and integer rows,
10
+ // each row contains values of the same type.
11
+ // Each matrix consists of two blocks: float rows and integer rows.
12
+ // In this case, an input is void **.
13
+
14
+ // RMEARK 3. Also, the same methods are implemented for the case when data is
15
+ // given by float *.
16
+
17
+ #ifndef PCA_H
18
+ #define PCA_H
19
+
20
+ namespace pca {
21
+
22
+ typedef float Float;
23
+ typedef int Integer;
24
+ typedef double Double;
25
+
26
+ enum ResultCode {NO_ERROR = 0, UNCORRECT_ARGUMENTS_ERROR, COMPUTATION_ERROR, METHOD_ERROR};
27
+
28
+ /* Principal Component Analysis of the data: using correlation matrix.
29
+ data - input matrix;
30
+ height, width - sizes of the input;
31
+ numOfPrincipalComponents - number of principal components to be computed;
32
+ principalComponents - the principal components computed;
33
+ approxData - approximation of the input data using principal components obtained. */
34
+ int pcaUsingCorrelationMatrix(Float * data,
35
+ const int height,
36
+ const int width,
37
+ const int numOfPrincipalComponents,
38
+ const int centerNum,
39
+ const int scaleNum,
40
+ Float * principalComponents,
41
+ Float * approxData = 0) noexcept;
42
+
43
+ // Maximum absolute deviation between arrays
44
+ Float mad(Float * arr1, Float * arr2, const int length) noexcept;
45
+ };
46
+
47
+ #endif // PCA_H
48
+
package/wasm/PLS/PLS.h ADDED
@@ -0,0 +1,64 @@
1
+ // PLS.h
2
+ // Declarations of functions that provides Partial Least Square (PLS) Regression.
3
+
4
+ // An implementation of the algorithm PLS1 without X-deflation is used.
5
+ // Source paper: Ulf G. Indahl, The geometry of PLS1 explained properly:
6
+ // 10 key notes on mathematical properties of and some alternative
7
+ // algorithmic approaches to PLS1 modelling, DOI: https://doi.org/10.1002/cem.2589
8
+ // Also, the following aricle is used: https://doi.org/10.1016/S0169-7439(01)00155-1
9
+
10
+ #ifndef PLS_H
11
+ #define PLS_H
12
+
13
+ namespace pls {
14
+
15
+ typedef float Float;
16
+ typedef double Double;
17
+
18
+ enum ResultCode { NO_ERROR = 0, UNCORRECT_ARGUMENTS_ERROR, COMPUTATION_ERROR, METHOD_ERROR };
19
+
20
+ /* Partial Least Square (PLS1).
21
+ predictorColumnsDataPtr - data from columns that are used for prediction
22
+ rowCount - number of rows
23
+ columnCount - number of columns
24
+ responseColumnDataPtr - data from column that is predicted, i.e. responce
25
+ componentsCount - number of components that extracted in PLS
26
+ predictionDataPtr - prediction obtained using PLS (its size is equal to the size of responce)
27
+ regressionCoefficients - coeffcient of linear regression that are computed (their size is eqaul to the number of columns)
28
+ */
29
+ int partialLeastSquare(Float * predictorColumnsDataPtr,
30
+ const int rowCount,
31
+ const int columnCount,
32
+ Float * responseColumnDataPtr,
33
+ const int componentsCount,
34
+ Float * predictionDataPtr,
35
+ Float * regressionCoefficients) noexcept;
36
+
37
+
38
+ /* Partial Least Square (PLS1) - extended version: scores data is provided.
39
+ predictorColumnsDataPtr - data from columns that are used for prediction (X)
40
+ rowCount - number of rows
41
+ columnCount - number of columns
42
+ responseColumnDataPtr - data from column that is predicted, i.e. responce (Y)
43
+ componentsCount - number of components that extracted in PLS (A)
44
+ predictionDataPtr - prediction obtained using PLS (its size is equal to the size of responce)
45
+ regressionCoefficientsPtr - coeffcient of linear regression that are computed (their size is eqaul to the number of columns) (b)
46
+ predictorScoresPtr - scores of predectors (T)
47
+ responceScoresPtr - scores of response (U)
48
+ predictorLoadingsPtr - loadings of predictors (P)
49
+ */
50
+ int partialLeastSquareExtended(Float * predictorColumnsDataPtr,
51
+ const int rowCount,
52
+ const int columnCount,
53
+ Float * responseColumnDataPtr,
54
+ const int componentsCount,
55
+ Float * predictionDataPtr,
56
+ Float * regressionCoefficientsPtr,
57
+ Float * predictorScoresPtr,
58
+ Float * responceScoresPtr,
59
+ Float * predictorLoadingsPtr) noexcept;
60
+ };
61
+
62
+ #endif
63
+
64
+
@@ -0,0 +1,393 @@
1
+ // pls.cpp
2
+ // Principal Component Analysis (PCA) using the lib Eigen: implementation of functions
3
+
4
+ // The following STL lib is used for printing results and their verifying
5
+ //#include<iostream>
6
+ //using namespace std;
7
+
8
+ #include "../../../../../Eigen/Eigen/Dense"
9
+ using namespace Eigen;
10
+
11
+ #include "PLS.h"
12
+ using pls::Float;
13
+ using pls::Double;
14
+
15
+ /* Partial Least Square (PLS1).
16
+ predictorColumnsDataPtr - data from columns that are used for prediction
17
+ rowCount - number of rows
18
+ columnCount - number of columns
19
+ responseColumnDataPtr - data from column that is predicted, i.e. responce
20
+ componentsCount - number of components that extracted in PLS
21
+ predictionDataPtr - prediction obtained using PLS (its size is equal to the size of responce)
22
+ regressionCoefficients - coeffcient of linear regression that are computed (their size is eqaul to the number of columns)
23
+ */
24
+ int pls::partialLeastSquare(Float * predictorColumnsDataPtr,
25
+ const int rowCount,
26
+ const int columnCount,
27
+ Float * responseColumnDataPtr,
28
+ const int componentsCount,
29
+ Float * predictionDataPtr,
30
+ Float * regressionCoefficients) noexcept
31
+ {
32
+ // check correctness of arguments
33
+ if (componentsCount <= 0 || componentsCount > columnCount)
34
+ return UNCORRECT_ARGUMENTS_ERROR;
35
+
36
+ // Further, notation from the paper https://doi.org/10.1002/cem.2589 is used (see Algorithm 2).
37
+
38
+ // create matrix, which is associated with predictor data
39
+ Map < Matrix<Float, Dynamic, Dynamic, ColMajor>> D(predictorColumnsDataPtr, rowCount, columnCount);
40
+
41
+ // compute mean value of each column of D
42
+ Vector<Float, Dynamic> mu = D.colwise().mean();
43
+
44
+ // mean-centered version of D
45
+ Matrix<Float, Dynamic, Dynamic, ColMajor> X = D.rowwise() - mu.transpose();
46
+
47
+ // vector for standard deviations of X
48
+ Vector<Float, Dynamic> stdDevX(columnCount);
49
+
50
+ Float rowCountSqrt = sqrt(static_cast<Float>(rowCount));
51
+
52
+ // normilizing X-columns
53
+ for (int i = 0; i < columnCount; i++)
54
+ {
55
+ stdDevX(i) = X.col(i).norm() / rowCountSqrt;
56
+ X.col(i) = X.col(i) / stdDevX(i);
57
+ }
58
+
59
+ // create a vector, which is associated with responce or predicted data
60
+ Map<Vector<Float, Dynamic>> ySource(responseColumnDataPtr, rowCount);
61
+
62
+ // mean value of the responce
63
+ Vector<Float, 1> meanY;
64
+ meanY(0) = ySource.mean();
65
+
66
+ // mean-centered version of the responce
67
+ Vector<Float, Dynamic> y = ySource.rowwise() - meanY;
68
+
69
+ // standard deviation
70
+ Float stdDevY = sqrt(y.squaredNorm() / rowCount);
71
+
72
+ // normalizing
73
+ y /= stdDevY;
74
+
75
+ // create a vector, which is associtated with regression coefficients
76
+ Map<Vector<Float, Dynamic>> b(regressionCoefficients, columnCount);
77
+
78
+ // create a vector, which is associated with prediction data
79
+ Map<Vector<Float, Dynamic>> prediction(predictionDataPtr, rowCount);
80
+
81
+ // PLS1 algorithm routine
82
+
83
+ Matrix<Float, Dynamic, Dynamic, ColMajor> W(columnCount, componentsCount);
84
+
85
+ Matrix<Float, Dynamic, Dynamic, ColMajor> P(columnCount, componentsCount);
86
+
87
+ Matrix<Float, Dynamic, Dynamic, ColMajor> T(rowCount, componentsCount);
88
+
89
+ Vector<Float, Dynamic> normTau(componentsCount);
90
+
91
+ Vector<Float, Dynamic> q(componentsCount);
92
+
93
+ Vector<Float, Dynamic> normV(componentsCount);
94
+
95
+ // PLS1 algorithm: see Algorithm 2 in https://doi.org/10.1002/cem.2589
96
+
97
+ Vector<Float, Dynamic> w = (X.transpose() * y);
98
+
99
+ normV(0) = w.norm();
100
+
101
+ // prevent division by zero
102
+ if (normV(0) == static_cast<Float>(0))
103
+ return METHOD_ERROR;
104
+
105
+ w = w / normV(0);
106
+
107
+ W.col(0) = w;
108
+
109
+ Vector<Float, Dynamic> t = X * w;
110
+
111
+ normTau(0) = t.norm();
112
+
113
+ // prevent division by zero
114
+ if (normTau(0) == static_cast<Float>(0))
115
+ return METHOD_ERROR;
116
+
117
+ t = t / normTau(0);
118
+
119
+ T.col(0) = t;
120
+
121
+ Vector<Float, Dynamic> p = X.transpose() * t;
122
+
123
+ P.col(0) = p;
124
+
125
+ q(0) = t.transpose() * y;
126
+
127
+ for (int a = 1; a < componentsCount; a++)
128
+ {
129
+ w = normV(a - 1) * (w - p / normTau(a - 1));
130
+
131
+ normV(a) = w.norm();
132
+
133
+ // prevent division by zero
134
+ if (normV(a) == static_cast<Float>(0))
135
+ return METHOD_ERROR;
136
+
137
+ w = w / normV(a);
138
+
139
+ W.col(a) = w;
140
+
141
+ t = X * w;
142
+
143
+ t = t - T.leftCols(a) * (T.leftCols(a).transpose() * t);
144
+
145
+ normTau(a) = t.norm();
146
+
147
+ // prevent division by zero
148
+ if (normTau(a) == static_cast<Float>(0))
149
+ return METHOD_ERROR;
150
+
151
+ t = t / normTau(a);
152
+
153
+ T.col(a) = t;
154
+
155
+ p = X.transpose() * t;
156
+
157
+ P.col(a) = p;
158
+
159
+ q(a) = t.transpose() * y;
160
+ } // for a
161
+
162
+ // compute coefficients of regression
163
+ Matrix<Float, Dynamic, Dynamic> H = P.transpose() * W;
164
+
165
+ // chech existence of inverse matrix
166
+ if (H.determinant() == static_cast<Float>(0))
167
+ return METHOD_ERROR;
168
+
169
+ b = W * H.inverse() * q;
170
+
171
+ for (int i = 0; i < columnCount; i++)
172
+ b(i) *= stdDevY / stdDevX(i);
173
+
174
+ // TODO: to discuss a constant term of the regression
175
+ // a constant term
176
+ //Vector<Float, 1> shift;
177
+ //shift(0) = ySource(0) - D.row(0) * b;
178
+ //q(0) - P.col(0).transpose().dot(b);
179
+ //prediction = (D * b).rowwise() + shift;
180
+
181
+ prediction = D * b;
182
+
183
+ return NO_ERROR;
184
+ } // partialLeastSquare
185
+
186
+ /* Partial Least Square (PLS1) - extended version: scores data is provided.
187
+ predictorColumnsDataPtr - data from columns that are used for prediction (X)
188
+ rowCount - number of rows
189
+ columnCount - number of columns
190
+ responseColumnDataPtr - data from column that is predicted, i.e. responce (Y)
191
+ componentsCount - number of components that extracted in PLS (A)
192
+ predictionDataPtr - prediction obtained using PLS (its size is equal to the size of responce)
193
+ regressionCoefficientsPtr - coeffcient of linear regression that are computed (their size is eqaul to the number of columns) (b)
194
+ predictorScoresPtr - scores of predectors (T)
195
+ responceScoresPtr - scores of response (U)
196
+ predictorLoadingsPtr - loadings of predictors (P)
197
+ */
198
+ int pls::partialLeastSquareExtended(Float * predictorColumnsDataPtr,
199
+ const int rowCount,
200
+ const int columnCount,
201
+ Float * responseColumnDataPtr,
202
+ const int componentsCount,
203
+ Float * predictionDataPtr,
204
+ Float * regressionCoefficientsPtr,
205
+ Float * predictorScoresPtr,
206
+ Float * responceScoresPtr,
207
+ Float * predictorLoadingsPtr) noexcept
208
+ {
209
+ // check correctness of arguments
210
+ if (componentsCount <= 0 || componentsCount > columnCount)
211
+ return UNCORRECT_ARGUMENTS_ERROR;
212
+
213
+ // Further, notation from the paper https://doi.org/10.1002/cem.2589 is used (see Algorithm 2).
214
+
215
+ // create matrix, which is associated with predictor data
216
+ Map < Matrix<Float, Dynamic, Dynamic, ColMajor>> D(predictorColumnsDataPtr, rowCount, columnCount);
217
+
218
+ // compute mean value of each column of D
219
+ Vector<Float, Dynamic> mu = D.colwise().mean();
220
+
221
+ // mean-centered version of D
222
+ Matrix<Float, Dynamic, Dynamic, ColMajor> X = D.rowwise() - mu.transpose();
223
+
224
+ // standard deviations of X
225
+ Vector<Float, Dynamic> stdDevX(columnCount);
226
+
227
+ Float rowCountSqrt = sqrt(static_cast<Float>(rowCount));
228
+
229
+ // normalizing X
230
+ for (int i = 0; i < columnCount; i++)
231
+ {
232
+ stdDevX(i) = X.col(i).norm() / rowCountSqrt;
233
+
234
+ // check deviation
235
+ if(stdDevX(i) == static_cast<Float>(0))
236
+ return UNCORRECT_ARGUMENTS_ERROR;
237
+
238
+ X.col(i) = X.col(i) / stdDevX(i);
239
+ }
240
+
241
+ // create a vector, which is associated with responce or predicted data
242
+ Map<Vector<Float, Dynamic>> ySource(responseColumnDataPtr, rowCount);
243
+
244
+ // mean value of Y: Eigen vector is used in order to provide broadcasting
245
+ Vector<Float, 1> meanY;
246
+ meanY(0) = ySource.mean();
247
+
248
+ // centering Y
249
+ Vector<Float, Dynamic> y = ySource.rowwise() - meanY;
250
+
251
+ // standard deviation of Y normalizing Y
252
+ Float stdDevY = sqrt(y.squaredNorm() / rowCount);
253
+
254
+ // check deviation
255
+ if(stdDevY == static_cast<Float>(0))
256
+ return UNCORRECT_ARGUMENTS_ERROR;
257
+
258
+ // normalizing Y
259
+ y /= stdDevY;
260
+
261
+ // create a vector, which is associtated with regression coefficients
262
+ Map<Vector<Float, Dynamic>> b(regressionCoefficientsPtr, columnCount);
263
+
264
+ // create a vector, which is associated with prediction data
265
+ Map<Vector<Float, Dynamic>> prediction(predictionDataPtr, rowCount);
266
+
267
+ // weights matrix, W
268
+ Matrix<Float, Dynamic, Dynamic, ColMajor> W(columnCount, componentsCount);
269
+
270
+ // X-loadings matrix, P
271
+ Map<Matrix<Float, Dynamic, Dynamic, ColMajor>> P(predictorLoadingsPtr, columnCount, componentsCount);
272
+
273
+ //Matrix<Float, Dynamic, Dynamic, ColMajor> P(columnCount, componentsCount);
274
+
275
+ // X-scores, T
276
+ Map<Matrix<Float, Dynamic, Dynamic, ColMajor>> T(predictorScoresPtr, rowCount, componentsCount);
277
+
278
+ // Y-scores, U
279
+ Map<Matrix<Float, Dynamic, Dynamic, ColMajor>> U(responceScoresPtr, rowCount, componentsCount);
280
+
281
+ // Y-loadings, q
282
+ Vector<Float, Dynamic> q(componentsCount);
283
+
284
+ // PLS1 routine auxiliry vectors
285
+ Vector<Float, Dynamic> normTau(componentsCount);
286
+ Vector<Float, Dynamic> normV(componentsCount);
287
+
288
+ // PLS1 algorithm: see Algorithm 2 in https://doi.org/10.1002/cem.2589
289
+
290
+ Vector<Float, Dynamic> w = (X.transpose() * y);
291
+
292
+ normV(0) = w.norm();
293
+
294
+ // prevent division by zero
295
+ if (normV(0) == static_cast<Float>(0))
296
+ return METHOD_ERROR;
297
+
298
+ w = w / normV(0);
299
+
300
+ W.col(0) = w;
301
+
302
+ Vector<Float, Dynamic> t = X * w;
303
+
304
+ normTau(0) = t.norm();
305
+
306
+ // prevent division by zero
307
+ if (normTau(0) == static_cast<Float>(0))
308
+ return METHOD_ERROR;
309
+
310
+ t = t / normTau(0);
311
+
312
+ T.col(0) = t;
313
+
314
+ Vector<Float, Dynamic> p = X.transpose() * t;
315
+
316
+ P.col(0) = p;
317
+
318
+ q(0) = t.transpose() * y;
319
+
320
+ for (int a = 1; a < componentsCount; a++)
321
+ {
322
+ w = normV(a - 1) * (w - p / normTau(a - 1));
323
+
324
+ normV(a) = w.norm();
325
+
326
+ // prevent division by zero
327
+ if (normV(a) == static_cast<Float>(0))
328
+ return METHOD_ERROR;
329
+
330
+ w = w / normV(a);
331
+
332
+ W.col(a) = w;
333
+
334
+ t = X * w;
335
+
336
+ t = t - T.leftCols(a) * (T.leftCols(a).transpose() * t);
337
+
338
+ normTau(a) = t.norm();
339
+
340
+ // prevent division by zero
341
+ if (normTau(a) == static_cast<Float>(0))
342
+ return METHOD_ERROR;
343
+
344
+ t = t / normTau(a);
345
+
346
+ T.col(a) = t;
347
+
348
+ p = X.transpose() * t;
349
+
350
+ P.col(a) = p;
351
+
352
+ q(a) = t.transpose() * y;
353
+ } // for a
354
+
355
+ // compute Y-scores
356
+ U = y * q.transpose() / q.squaredNorm();
357
+
358
+ // compute coefficients of regression
359
+ Matrix<Float, Dynamic, Dynamic> H = P.transpose() * W;
360
+
361
+ // chech existence of inverse matrix
362
+ if (H.determinant() == static_cast<Float>(0))
363
+ return METHOD_ERROR;
364
+
365
+ // compute regression coefficients
366
+ b = W * H.inverse() * q;
367
+
368
+ // ... also, we take into account a normalizing
369
+ for (int i = 0; i < columnCount; i++)
370
+ b(i) *= stdDevY / stdDevX(i);
371
+
372
+ // compute predictions
373
+ prediction = D * b;
374
+
375
+ // Remove the following comments in order to print and verify results
376
+ //cout << "\nW_star:\n" << Wstar << endl;
377
+ //cout << "\nU:\n" << U << endl;
378
+ //cout << "\nU.tr * U:\n" << U.transpose() * U << endl; // this must be identity matrix
379
+ //cout << "\nb:\n" << b << endl;
380
+ //cout << "\nq:\n" << q << endl;
381
+ //cout << "\nD:\n" << D << endl;
382
+ //cout << "\nP:\n" << P << endl;
383
+ //cout << "\nT:\n" << T << endl;
384
+ //cout << "\nT.tr * T:\n" << T.transpose() * T << endl; // this must be identity matrix
385
+ //cout << "\nW:\n" << W << endl;
386
+ //cout << "\nW.tr * W:\n" << W.transpose() * W << endl; // this must be identity matrix
387
+ //cout << "\nprediction\n" << prediction << endl;
388
+
389
+ return NO_ERROR;
390
+ } // partialLeastSquareExtended
391
+
392
+
393
+