mlpack 4.6.1__cp313-cp313-win_amd64.whl → 4.6.2__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlpack/__init__.py +1 -1
- mlpack/adaboost_classify.cp313-win_amd64.pyd +0 -0
- mlpack/adaboost_probabilities.cp313-win_amd64.pyd +0 -0
- mlpack/adaboost_train.cp313-win_amd64.pyd +0 -0
- mlpack/approx_kfn.cp313-win_amd64.pyd +0 -0
- mlpack/arma_numpy.cp313-win_amd64.pyd +0 -0
- mlpack/bayesian_linear_regression.cp313-win_amd64.pyd +0 -0
- mlpack/cf.cp313-win_amd64.pyd +0 -0
- mlpack/dbscan.cp313-win_amd64.pyd +0 -0
- mlpack/decision_tree.cp313-win_amd64.pyd +0 -0
- mlpack/det.cp313-win_amd64.pyd +0 -0
- mlpack/emst.cp313-win_amd64.pyd +0 -0
- mlpack/fastmks.cp313-win_amd64.pyd +0 -0
- mlpack/gmm_generate.cp313-win_amd64.pyd +0 -0
- mlpack/gmm_probability.cp313-win_amd64.pyd +0 -0
- mlpack/gmm_train.cp313-win_amd64.pyd +0 -0
- mlpack/hmm_generate.cp313-win_amd64.pyd +0 -0
- mlpack/hmm_loglik.cp313-win_amd64.pyd +0 -0
- mlpack/hmm_train.cp313-win_amd64.pyd +0 -0
- mlpack/hmm_viterbi.cp313-win_amd64.pyd +0 -0
- mlpack/hoeffding_tree.cp313-win_amd64.pyd +0 -0
- mlpack/image_converter.cp313-win_amd64.pyd +0 -0
- mlpack/include/mlpack/core/cv/k_fold_cv.hpp +21 -12
- mlpack/include/mlpack/core/cv/k_fold_cv_impl.hpp +49 -39
- mlpack/include/mlpack/core/data/detect_file_type_impl.hpp +9 -46
- mlpack/include/mlpack/core/data/save_impl.hpp +315 -315
- mlpack/include/mlpack/core/data/utilities.hpp +158 -158
- mlpack/include/mlpack/core/math/ccov.hpp +1 -0
- mlpack/include/mlpack/core/math/ccov_impl.hpp +4 -5
- mlpack/include/mlpack/core/math/make_alias.hpp +98 -3
- mlpack/include/mlpack/core/util/arma_traits.hpp +19 -2
- mlpack/include/mlpack/core/util/gitversion.hpp +1 -1
- mlpack/include/mlpack/core/util/sfinae_utility.hpp +24 -2
- mlpack/include/mlpack/core/util/version.hpp +1 -1
- mlpack/include/mlpack/methods/ann/dists/bernoulli_distribution_impl.hpp +1 -2
- mlpack/include/mlpack/methods/ann/init_rules/network_init.hpp +5 -5
- mlpack/include/mlpack/methods/ann/layer/batch_norm.hpp +3 -2
- mlpack/include/mlpack/methods/ann/layer/batch_norm_impl.hpp +19 -20
- mlpack/include/mlpack/methods/ann/layer/concat.hpp +1 -0
- mlpack/include/mlpack/methods/ann/layer/concat_impl.hpp +6 -7
- mlpack/include/mlpack/methods/ann/layer/convolution_impl.hpp +3 -3
- mlpack/include/mlpack/methods/ann/layer/grouped_convolution_impl.hpp +3 -3
- mlpack/include/mlpack/methods/ann/layer/linear3d.hpp +1 -0
- mlpack/include/mlpack/methods/ann/layer/linear3d_impl.hpp +11 -14
- mlpack/include/mlpack/methods/ann/layer/max_pooling.hpp +5 -4
- mlpack/include/mlpack/methods/ann/layer/max_pooling_impl.hpp +15 -14
- mlpack/include/mlpack/methods/ann/layer/mean_pooling.hpp +3 -2
- mlpack/include/mlpack/methods/ann/layer/mean_pooling_impl.hpp +14 -15
- mlpack/include/mlpack/methods/ann/layer/multihead_attention.hpp +6 -5
- mlpack/include/mlpack/methods/ann/layer/multihead_attention_impl.hpp +24 -25
- mlpack/include/mlpack/methods/ann/layer/nearest_interpolation.hpp +1 -0
- mlpack/include/mlpack/methods/ann/layer/nearest_interpolation_impl.hpp +4 -4
- mlpack/include/mlpack/methods/ann/layer/padding.hpp +1 -0
- mlpack/include/mlpack/methods/ann/layer/padding_impl.hpp +12 -13
- mlpack/include/mlpack/methods/ann/layer/recurrent_layer.hpp +3 -2
- mlpack/include/mlpack/methods/ann/loss_functions/cosine_embedding_loss_impl.hpp +5 -4
- mlpack/include/mlpack/methods/ann/rnn.hpp +19 -18
- mlpack/include/mlpack/methods/ann/rnn_impl.hpp +15 -15
- mlpack/include/mlpack/methods/bayesian_linear_regression/bayesian_linear_regression_impl.hpp +3 -8
- mlpack/include/mlpack/methods/decision_tree/fitness_functions/gini_gain.hpp +5 -8
- mlpack/include/mlpack/methods/decision_tree/fitness_functions/information_gain.hpp +5 -8
- mlpack/include/mlpack/methods/gmm/diagonal_gmm_impl.hpp +2 -1
- mlpack/include/mlpack/methods/gmm/eigenvalue_ratio_constraint.hpp +3 -3
- mlpack/include/mlpack/methods/gmm/gmm_impl.hpp +2 -1
- mlpack/include/mlpack/methods/hmm/hmm_impl.hpp +10 -5
- mlpack/include/mlpack/methods/random_forest/random_forest.hpp +57 -37
- mlpack/include/mlpack/methods/random_forest/random_forest_impl.hpp +69 -59
- mlpack/kde.cp313-win_amd64.pyd +0 -0
- mlpack/kernel_pca.cp313-win_amd64.pyd +0 -0
- mlpack/kfn.cp313-win_amd64.pyd +0 -0
- mlpack/kmeans.cp313-win_amd64.pyd +0 -0
- mlpack/knn.cp313-win_amd64.pyd +0 -0
- mlpack/krann.cp313-win_amd64.pyd +0 -0
- mlpack/lars.cp313-win_amd64.pyd +0 -0
- mlpack/linear_regression_predict.cp313-win_amd64.pyd +0 -0
- mlpack/linear_regression_train.cp313-win_amd64.pyd +0 -0
- mlpack/linear_svm.cp313-win_amd64.pyd +0 -0
- mlpack/lmnn.cp313-win_amd64.pyd +0 -0
- mlpack/local_coordinate_coding.cp313-win_amd64.pyd +0 -0
- mlpack/logistic_regression.cp313-win_amd64.pyd +0 -0
- mlpack/lsh.cp313-win_amd64.pyd +0 -0
- mlpack/mean_shift.cp313-win_amd64.pyd +0 -0
- mlpack/nbc.cp313-win_amd64.pyd +0 -0
- mlpack/nca.cp313-win_amd64.pyd +0 -0
- mlpack/nmf.cp313-win_amd64.pyd +0 -0
- mlpack/pca.cp313-win_amd64.pyd +0 -0
- mlpack/perceptron.cp313-win_amd64.pyd +0 -0
- mlpack/preprocess_binarize.cp313-win_amd64.pyd +0 -0
- mlpack/preprocess_describe.cp313-win_amd64.pyd +0 -0
- mlpack/preprocess_one_hot_encoding.cp313-win_amd64.pyd +0 -0
- mlpack/preprocess_scale.cp313-win_amd64.pyd +0 -0
- mlpack/preprocess_split.cp313-win_amd64.pyd +0 -0
- mlpack/radical.cp313-win_amd64.pyd +0 -0
- mlpack/random_forest.cp313-win_amd64.pyd +0 -0
- mlpack/softmax_regression.cp313-win_amd64.pyd +0 -0
- mlpack/sparse_coding.cp313-win_amd64.pyd +0 -0
- mlpack-4.6.2.dist-info/DELVEWHEEL +2 -0
- {mlpack-4.6.1.dist-info → mlpack-4.6.2.dist-info}/METADATA +2 -2
- {mlpack-4.6.1.dist-info → mlpack-4.6.2.dist-info}/RECORD +101 -101
- {mlpack-4.6.1.dist-info → mlpack-4.6.2.dist-info}/WHEEL +1 -1
- mlpack-4.6.1.dist-info/DELVEWHEEL +0 -2
- {mlpack-4.6.1.dist-info → mlpack-4.6.2.dist-info}/top_level.txt +0 -0
|
@@ -202,14 +202,14 @@ void BatchNormType<MatType>::Forward(
|
|
|
202
202
|
|
|
203
203
|
// Input corresponds to output from previous layer.
|
|
204
204
|
// Used a cube for simplicity.
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
batchSize * higherDimension,
|
|
205
|
+
CubeType inputTemp;
|
|
206
|
+
MakeAlias(inputTemp, input, inputSize, size,
|
|
207
|
+
batchSize * higherDimension, 0, false);
|
|
208
208
|
|
|
209
209
|
// Initialize output to same size and values for convenience.
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
batchSize * higherDimension,
|
|
210
|
+
CubeType outputTemp;
|
|
211
|
+
MakeAlias(outputTemp, output, inputSize, size,
|
|
212
|
+
batchSize * higherDimension, 0, false);
|
|
213
213
|
outputTemp = inputTemp;
|
|
214
214
|
|
|
215
215
|
// Calculate mean and variance over all channels.
|
|
@@ -251,9 +251,9 @@ void BatchNormType<MatType>::Forward(
|
|
|
251
251
|
{
|
|
252
252
|
// Normalize the input and scale and shift the output.
|
|
253
253
|
output = input;
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
batchSize * higherDimension,
|
|
254
|
+
CubeType outputTemp;
|
|
255
|
+
MakeAlias(outputTemp, output, inputSize, size,
|
|
256
|
+
batchSize * higherDimension, 0, false);
|
|
257
257
|
|
|
258
258
|
outputTemp.each_slice() -= repmat(runningMean.t(), inputSize, 1);
|
|
259
259
|
outputTemp.each_slice() /= sqrt(repmat(runningVariance.t(),
|
|
@@ -276,16 +276,15 @@ void BatchNormType<MatType>::Backward(
|
|
|
276
276
|
const size_t inputSize = inputDimension;
|
|
277
277
|
const size_t m = inputSize * batchSize * higherDimension;
|
|
278
278
|
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
batchSize * higherDimension,
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
batchSize * higherDimension,
|
|
279
|
+
CubeType gyTemp;
|
|
280
|
+
MakeAlias(gyTemp, gy, inputSize, size,
|
|
281
|
+
batchSize * higherDimension, 0, false);
|
|
282
|
+
CubeType gTemp;
|
|
283
|
+
MakeAlias(gTemp, g, inputSize, size,
|
|
284
|
+
batchSize * higherDimension, 0, false);
|
|
285
285
|
|
|
286
286
|
// Step 1: dl / dxhat.
|
|
287
|
-
|
|
288
|
-
gyTemp.each_slice() % repmat(gamma.t(), inputSize, 1);
|
|
287
|
+
CubeType norm = gyTemp.each_slice() % repmat(gamma.t(), inputSize, 1);
|
|
289
288
|
|
|
290
289
|
// Step 2: sum dl / dxhat * (x - mu) * -0.5 * stdInv^3.
|
|
291
290
|
MatType temp = sum(sum(norm % inputMean, 2), 0);
|
|
@@ -313,9 +312,9 @@ void BatchNormType<MatType>::Gradient(
|
|
|
313
312
|
{
|
|
314
313
|
const size_t inputSize = inputDimension;
|
|
315
314
|
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
error.n_cols * higherDimension,
|
|
315
|
+
CubeType errorTemp;
|
|
316
|
+
MakeAlias(errorTemp, error, inputSize, size,
|
|
317
|
+
error.n_cols * higherDimension, 0, false);
|
|
319
318
|
|
|
320
319
|
// Step 5: dl / dy * xhat.
|
|
321
320
|
MatType temp = sum(sum(normalized % errorTemp, 0), 2);
|
|
@@ -119,15 +119,14 @@ void ConcatType<MatType>::Forward(const MatType& input, MatType& output)
|
|
|
119
119
|
for (size_t i = axis + 1; i < this->outputDimensions.size(); ++i)
|
|
120
120
|
slices *= this->outputDimensions[i];
|
|
121
121
|
|
|
122
|
-
std::vector<
|
|
123
|
-
this->layerOutputs.size());
|
|
122
|
+
std::vector<CubeType> layerOutputAliases(this->layerOutputs.size());
|
|
124
123
|
for (size_t i = 0; i < this->layerOutputs.size(); ++i)
|
|
125
124
|
{
|
|
126
125
|
MakeAlias(layerOutputAliases[i], this->layerOutputs[i], rows,
|
|
127
126
|
this->network[i]->OutputDimensions()[axis], slices);
|
|
128
127
|
}
|
|
129
128
|
|
|
130
|
-
|
|
129
|
+
CubeType outputAlias;
|
|
131
130
|
MakeAlias(outputAlias, output, rows, this->outputDimensions[axis], slices);
|
|
132
131
|
|
|
133
132
|
// Now get the columns from each output.
|
|
@@ -163,7 +162,7 @@ void ConcatType<MatType>::Backward(
|
|
|
163
162
|
for (size_t i = axis + 1; i < this->outputDimensions.size(); ++i)
|
|
164
163
|
slices *= this->outputDimensions[i];
|
|
165
164
|
|
|
166
|
-
|
|
165
|
+
CubeType gyTmp;
|
|
167
166
|
MakeAlias(gyTmp, gy, rows, this->outputDimensions[axis], slices);
|
|
168
167
|
|
|
169
168
|
size_t startCol = 0;
|
|
@@ -209,7 +208,7 @@ void ConcatType<MatType>::Backward(
|
|
|
209
208
|
for (size_t i = axis + 1; i < this->outputDimensions.size(); ++i)
|
|
210
209
|
slices *= this->outputDimensions[i];
|
|
211
210
|
|
|
212
|
-
|
|
211
|
+
CubeType gyTmp;
|
|
213
212
|
MakeAlias(gyTmp, gy, rows, this->outputDimensions[axis], slices);
|
|
214
213
|
|
|
215
214
|
size_t startCol = 0;
|
|
@@ -243,7 +242,7 @@ void ConcatType<MatType>::Gradient(
|
|
|
243
242
|
for (size_t i = axis + 1; i < this->outputDimensions.size(); ++i)
|
|
244
243
|
slices *= this->outputDimensions[i];
|
|
245
244
|
|
|
246
|
-
|
|
245
|
+
CubeType errorTmp;
|
|
247
246
|
MakeAlias(errorTmp, error, rows, this->outputDimensions[axis], slices);
|
|
248
247
|
|
|
249
248
|
size_t startCol = 0;
|
|
@@ -282,7 +281,7 @@ void ConcatType<MatType>::Gradient(
|
|
|
282
281
|
for (size_t i = axis + 1; i < this->outputDimensions.size(); ++i)
|
|
283
282
|
slices *= this->outputDimensions[i];
|
|
284
283
|
|
|
285
|
-
|
|
284
|
+
CubeType errorTmp;
|
|
286
285
|
MakeAlias(errorTmp, error, rows, this->outputDimensions[axis], slices);
|
|
287
286
|
|
|
288
287
|
size_t startCol = 0;
|
|
@@ -500,9 +500,9 @@ void ConvolutionType<
|
|
|
500
500
|
const size_t paddedRows = this->inputDimensions[0] + padWLeft + padWRight;
|
|
501
501
|
const size_t paddedCols = this->inputDimensions[1] + padHTop + padHBottom;
|
|
502
502
|
|
|
503
|
-
CubeType inputTemp
|
|
504
|
-
|
|
505
|
-
paddedRows, paddedCols, inMaps * batchSize,
|
|
503
|
+
CubeType inputTemp;
|
|
504
|
+
MakeAlias(inputTemp, (usingPadding ? inputPadded : input),
|
|
505
|
+
paddedRows, paddedCols, inMaps * batchSize, 0, false);
|
|
506
506
|
|
|
507
507
|
MatType temp(apparentWidth * apparentHeight * inMaps * higherInDimensions,
|
|
508
508
|
batchSize);
|
|
@@ -527,9 +527,9 @@ void GroupedConvolutionType<
|
|
|
527
527
|
const size_t paddedRows = this->inputDimensions[0] + padWLeft + padWRight;
|
|
528
528
|
const size_t paddedCols = this->inputDimensions[1] + padHTop + padHBottom;
|
|
529
529
|
|
|
530
|
-
CubeType inputTemp
|
|
531
|
-
|
|
532
|
-
paddedRows, paddedCols, inMaps * batchSize,
|
|
530
|
+
CubeType inputTemp;
|
|
531
|
+
MakeAlias(inputTemp, (usingPadding ? inputPadded : input),
|
|
532
|
+
paddedRows, paddedCols, inMaps * batchSize, 0, false);
|
|
533
533
|
|
|
534
534
|
MatType temp(apparentWidth * apparentHeight * inMaps * higherInDimensions,
|
|
535
535
|
batchSize);
|
|
@@ -98,13 +98,12 @@ template<typename MatType, typename RegularizerType>
|
|
|
98
98
|
void Linear3DType<MatType, RegularizerType>::Forward(
|
|
99
99
|
const MatType& input, MatType& output)
|
|
100
100
|
{
|
|
101
|
-
using CubeType = arma::Cube<typename MatType::elem_type>;
|
|
102
|
-
|
|
103
101
|
const size_t nPoints = input.n_rows / this->inputDimensions[0];
|
|
104
102
|
const size_t batchSize = input.n_cols;
|
|
105
103
|
|
|
106
|
-
const CubeType inputTemp
|
|
107
|
-
|
|
104
|
+
const CubeType inputTemp;
|
|
105
|
+
MakeAlias(const_cast<CubeType&>(inputTemp), input, this->inputDimensions[0],
|
|
106
|
+
nPoints, batchSize, 0, false);
|
|
108
107
|
|
|
109
108
|
for (size_t i = 0; i < batchSize; ++i)
|
|
110
109
|
{
|
|
@@ -123,8 +122,6 @@ void Linear3DType<MatType, RegularizerType>::Backward(
|
|
|
123
122
|
const MatType& gy,
|
|
124
123
|
MatType& g)
|
|
125
124
|
{
|
|
126
|
-
using CubeType = arma::Cube<typename MatType::elem_type>;
|
|
127
|
-
|
|
128
125
|
if (gy.n_rows % outSize != 0)
|
|
129
126
|
{
|
|
130
127
|
Log::Fatal << "Number of rows in propagated error must be divisible by "
|
|
@@ -134,8 +131,9 @@ void Linear3DType<MatType, RegularizerType>::Backward(
|
|
|
134
131
|
const size_t nPoints = gy.n_rows / outSize;
|
|
135
132
|
const size_t batchSize = gy.n_cols;
|
|
136
133
|
|
|
137
|
-
const CubeType gyTemp
|
|
138
|
-
|
|
134
|
+
const CubeType gyTemp;
|
|
135
|
+
MakeAlias(const_cast<CubeType&>(gyTemp), gy, outSize, nPoints, batchSize,
|
|
136
|
+
0, false);
|
|
139
137
|
|
|
140
138
|
for (size_t i = 0; i < gyTemp.n_slices; ++i)
|
|
141
139
|
{
|
|
@@ -151,18 +149,17 @@ void Linear3DType<MatType, RegularizerType>::Gradient(
|
|
|
151
149
|
const MatType& error,
|
|
152
150
|
MatType& gradient)
|
|
153
151
|
{
|
|
154
|
-
using CubeType = arma::Cube<typename MatType::elem_type>;
|
|
155
|
-
|
|
156
152
|
if (error.n_rows % outSize != 0)
|
|
157
153
|
Log::Fatal << "Propagated error matrix has invalid dimension!" << std::endl;
|
|
158
154
|
|
|
159
155
|
const size_t nPoints = input.n_rows / this->inputDimensions[0];
|
|
160
156
|
const size_t batchSize = input.n_cols;
|
|
161
157
|
|
|
162
|
-
const CubeType inputTemp
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
158
|
+
const CubeType inputTemp, errorTemp;
|
|
159
|
+
MakeAlias(const_cast<CubeType&>(inputTemp), input, this->inputDimensions[0],
|
|
160
|
+
nPoints, batchSize, 0, false);
|
|
161
|
+
MakeAlias(const_cast<CubeType&>(errorTemp), error, outSize, nPoints,
|
|
162
|
+
batchSize, 0, false);
|
|
166
163
|
|
|
167
164
|
CubeType dW(outSize, this->inputDimensions[0], batchSize);
|
|
168
165
|
for (size_t i = 0; i < batchSize; ++i)
|
|
@@ -59,6 +59,7 @@ template<typename MatType = arma::mat>
|
|
|
59
59
|
class MaxPoolingType : public Layer<MatType>
|
|
60
60
|
{
|
|
61
61
|
public:
|
|
62
|
+
using CubeType = typename GetCubeType<MatType>::type;
|
|
62
63
|
//! Create the MaxPooling object.
|
|
63
64
|
MaxPoolingType();
|
|
64
65
|
|
|
@@ -160,8 +161,8 @@ class MaxPoolingType : public Layer<MatType>
|
|
|
160
161
|
* @param poolingIndices The pooled indices.
|
|
161
162
|
*/
|
|
162
163
|
void PoolingOperation(
|
|
163
|
-
const
|
|
164
|
-
|
|
164
|
+
const CubeType& input,
|
|
165
|
+
CubeType& output,
|
|
165
166
|
arma::Cube<size_t>& poolingIndices)
|
|
166
167
|
{
|
|
167
168
|
// Iterate over all slices individually.
|
|
@@ -222,8 +223,8 @@ class MaxPoolingType : public Layer<MatType>
|
|
|
222
223
|
* @param output The pooled result.
|
|
223
224
|
*/
|
|
224
225
|
void PoolingOperation(
|
|
225
|
-
const
|
|
226
|
-
|
|
226
|
+
const CubeType& input,
|
|
227
|
+
CubeType& output)
|
|
227
228
|
{
|
|
228
229
|
// Iterate over all slices individually.
|
|
229
230
|
#pragma omp parallel for
|
|
@@ -114,13 +114,14 @@ MaxPoolingType<MatType>::operator=(MaxPoolingType&& other)
|
|
|
114
114
|
template<typename MatType>
|
|
115
115
|
void MaxPoolingType<MatType>::Forward(const MatType& input, MatType& output)
|
|
116
116
|
{
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
117
|
+
using CubeType = typename GetCubeType<MatType>::type;
|
|
118
|
+
CubeType inputTemp;
|
|
119
|
+
MakeAlias(inputTemp, input, this->inputDimensions[0],
|
|
120
|
+
this->inputDimensions[1], input.n_cols * channels, 0, false);
|
|
120
121
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
input.n_cols * channels,
|
|
122
|
+
CubeType outputTemp;
|
|
123
|
+
MakeAlias(outputTemp, output, this->outputDimensions[0],
|
|
124
|
+
this->outputDimensions[1], input.n_cols * channels, 0, true);
|
|
124
125
|
|
|
125
126
|
if (this->training)
|
|
126
127
|
{
|
|
@@ -144,14 +145,14 @@ void MaxPoolingType<MatType>::Backward(
|
|
|
144
145
|
const MatType& gy,
|
|
145
146
|
MatType& g)
|
|
146
147
|
{
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
channels * input.n_cols,
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
channels * input.n_cols,
|
|
148
|
+
using CubeType = typename GetCubeType<MatType>::type;
|
|
149
|
+
CubeType mappedError;
|
|
150
|
+
MakeAlias(mappedError, gy, this->outputDimensions[0],
|
|
151
|
+
this->outputDimensions[1], channels * input.n_cols, 0, false);
|
|
152
|
+
|
|
153
|
+
CubeType gTemp;
|
|
154
|
+
MakeAlias(gTemp, g, this->inputDimensions[0], this->inputDimensions[1],
|
|
155
|
+
channels * input.n_cols, 0, true);
|
|
155
156
|
|
|
156
157
|
gTemp.zeros();
|
|
157
158
|
|
|
@@ -29,6 +29,7 @@ template <typename MatType = arma::mat>
|
|
|
29
29
|
class MeanPoolingType : public Layer<MatType>
|
|
30
30
|
{
|
|
31
31
|
public:
|
|
32
|
+
using CubeType = typename GetCubeType<MatType>::type;
|
|
32
33
|
//! Create the MeanPoolingType object.
|
|
33
34
|
MeanPoolingType();
|
|
34
35
|
|
|
@@ -129,8 +130,8 @@ class MeanPoolingType : public Layer<MatType>
|
|
|
129
130
|
* @param output The pooled result.
|
|
130
131
|
*/
|
|
131
132
|
void PoolingOperation(
|
|
132
|
-
const
|
|
133
|
-
|
|
133
|
+
const CubeType& input,
|
|
134
|
+
CubeType& output);
|
|
134
135
|
|
|
135
136
|
/**
|
|
136
137
|
* Apply unpooling to the input and store the results.
|
|
@@ -113,14 +113,14 @@ void MeanPoolingType<MatType>::Forward(
|
|
|
113
113
|
const MatType& input, MatType& output)
|
|
114
114
|
{
|
|
115
115
|
// Create Alias of input as 2D image as input is 1D vector.
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
this->inputDimensions[1], input.n_cols * channels,
|
|
116
|
+
CubeType inputTemp;
|
|
117
|
+
MakeAlias(inputTemp, input, this->inputDimensions[0],
|
|
118
|
+
this->inputDimensions[1], input.n_cols * channels, 0, false);
|
|
119
119
|
|
|
120
120
|
// Create Alias of output as 2D image as output is 1D vector.
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
input.n_cols * channels,
|
|
121
|
+
CubeType outputTemp;
|
|
122
|
+
MakeAlias(outputTemp, output, this->outputDimensions[0],
|
|
123
|
+
this->outputDimensions[1], input.n_cols * channels, 0, true);
|
|
124
124
|
|
|
125
125
|
// Apply Pooling to the input.
|
|
126
126
|
PoolingOperation(inputTemp, outputTemp);
|
|
@@ -134,15 +134,14 @@ void MeanPoolingType<MatType>::Backward(
|
|
|
134
134
|
MatType& g)
|
|
135
135
|
{
|
|
136
136
|
// Create Alias of gy as 2D matrix as gy is 1D vector.
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
this->outputDimensions[
|
|
140
|
-
channels * input.n_cols, false, false);
|
|
137
|
+
CubeType mappedError;
|
|
138
|
+
MakeAlias(mappedError, gy, this->outputDimensions[0],
|
|
139
|
+
this->outputDimensions[1], channels * input.n_cols, 0, false);
|
|
141
140
|
|
|
142
141
|
// Create Alias of g as 2D matrix as g is 1D vector.
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
channels * input.n_cols,
|
|
142
|
+
CubeType gTemp;
|
|
143
|
+
MakeAlias(gTemp, g, this->inputDimensions[0],
|
|
144
|
+
this->inputDimensions[1], channels * input.n_cols, 0, true);
|
|
146
145
|
|
|
147
146
|
// Initialize the gradient with zero.
|
|
148
147
|
gTemp.zeros();
|
|
@@ -201,8 +200,8 @@ void MeanPoolingType<MatType>::serialize(
|
|
|
201
200
|
|
|
202
201
|
template<typename MatType>
|
|
203
202
|
void MeanPoolingType<MatType>::PoolingOperation(
|
|
204
|
-
const
|
|
205
|
-
|
|
203
|
+
const CubeType& input,
|
|
204
|
+
CubeType& output)
|
|
206
205
|
{
|
|
207
206
|
// Iterate over all slices individually.
|
|
208
207
|
#pragma omp parallel for
|
|
@@ -69,6 +69,7 @@ template <
|
|
|
69
69
|
class MultiheadAttentionType : public Layer<MatType>
|
|
70
70
|
{
|
|
71
71
|
public:
|
|
72
|
+
using CubeType = typename GetCubeType<MatType>::type;
|
|
72
73
|
/**
|
|
73
74
|
* Default constructor.
|
|
74
75
|
*/
|
|
@@ -321,19 +322,19 @@ class MultiheadAttentionType : public Layer<MatType>
|
|
|
321
322
|
MatType weights;
|
|
322
323
|
|
|
323
324
|
//! Locally-stored projected query matrix over linear layer.
|
|
324
|
-
|
|
325
|
+
CubeType qProj;
|
|
325
326
|
|
|
326
327
|
//! Locally-stored projected key matrix over linear layer.
|
|
327
|
-
|
|
328
|
+
CubeType kProj;
|
|
328
329
|
|
|
329
330
|
//! Locally-stored projected value matrix over linear layer.
|
|
330
|
-
|
|
331
|
+
CubeType vProj;
|
|
331
332
|
|
|
332
333
|
//! Locally-stored result of output of dropout layer.
|
|
333
|
-
|
|
334
|
+
CubeType scores;
|
|
334
335
|
|
|
335
336
|
//! Locally-stored attention output weight to be fed to last linear layer.
|
|
336
|
-
|
|
337
|
+
CubeType attnOut;
|
|
337
338
|
|
|
338
339
|
//! Softmax layer to represent the probabilities of next sequence.
|
|
339
340
|
SoftmaxType<MatType> softmax;
|
|
@@ -73,8 +73,6 @@ template <typename MatType, typename RegularizerType>
|
|
|
73
73
|
void MultiheadAttentionType<MatType, RegularizerType>::
|
|
74
74
|
Forward(const MatType& input, MatType& output)
|
|
75
75
|
{
|
|
76
|
-
using CubeType = arma::Cube<typename MatType::elem_type>;
|
|
77
|
-
|
|
78
76
|
if (input.n_rows != embedDim *
|
|
79
77
|
(selfAttention ? srcSeqLen : (tgtSeqLen + 2 * srcSeqLen)))
|
|
80
78
|
{
|
|
@@ -97,14 +95,14 @@ Forward(const MatType& input, MatType& output)
|
|
|
97
95
|
// The shape of q : (embedDim, tgtSeqLen, batchSize).
|
|
98
96
|
// The shape of k : (embedDim, srcSeqLen, batchSize).
|
|
99
97
|
// The shape of v : (embedDim, srcSeqLen, batchSize).
|
|
100
|
-
const CubeType q
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
embedDim
|
|
105
|
-
|
|
98
|
+
const CubeType q, k, v;
|
|
99
|
+
MakeAlias(const_cast<CubeType&>(q), input, embedDim, tgtSeqLen, batchSize,
|
|
100
|
+
0, false);
|
|
101
|
+
MakeAlias(const_cast<CubeType&>(k), input, embedDim, srcSeqLen, batchSize,
|
|
102
|
+
(selfAttention ? 0 : embedDim * tgtSeqLen * batchSize), false);
|
|
103
|
+
MakeAlias(const_cast<CubeType&>(v), input, embedDim, srcSeqLen, batchSize,
|
|
106
104
|
(selfAttention ? 0 : embedDim * (tgtSeqLen + srcSeqLen) * batchSize),
|
|
107
|
-
|
|
105
|
+
false);
|
|
108
106
|
|
|
109
107
|
// qProj, kProj, and vProj are the linearly projected query, key and value
|
|
110
108
|
// respectively.
|
|
@@ -187,8 +185,6 @@ Backward(const MatType& /* input */,
|
|
|
187
185
|
const MatType& gy,
|
|
188
186
|
MatType& g)
|
|
189
187
|
{
|
|
190
|
-
using CubeType = arma::Cube<typename MatType::elem_type>;
|
|
191
|
-
|
|
192
188
|
if (gy.n_rows != tgtSeqLen * embedDim)
|
|
193
189
|
{
|
|
194
190
|
Log::Fatal << "Backpropagated error has incorrect dimensions!" << std::endl;
|
|
@@ -202,8 +198,11 @@ Backward(const MatType& /* input */,
|
|
|
202
198
|
// The shape of gyTemp : (tgtSeqLen, embedDim, batchSize).
|
|
203
199
|
// We need not split it into n heads now because this is the part when
|
|
204
200
|
// output were concatenated from n heads.
|
|
205
|
-
CubeType
|
|
206
|
-
|
|
201
|
+
const CubeType gyTempAlias;
|
|
202
|
+
MakeAlias(const_cast<CubeType&>(gyTempAlias), gy, embedDim, tgtSeqLen,
|
|
203
|
+
batchSize, 0, false);
|
|
204
|
+
// Make a copy of the alias so gy actually remains constant
|
|
205
|
+
CubeType gyTemp = gyTempAlias;
|
|
207
206
|
|
|
208
207
|
// The shape of gyTemp : (embedDim, tgtSeqLen, batchSize).
|
|
209
208
|
// The shape of outWt : (embedDim, embedDim).
|
|
@@ -306,8 +305,6 @@ Gradient(const MatType& input,
|
|
|
306
305
|
const MatType& error,
|
|
307
306
|
MatType& gradient)
|
|
308
307
|
{
|
|
309
|
-
using CubeType = arma::Cube<typename MatType::elem_type>;
|
|
310
|
-
|
|
311
308
|
if (input.n_rows != embedDim * (selfAttention ? srcSeqLen :
|
|
312
309
|
(tgtSeqLen + 2 * srcSeqLen)))
|
|
313
310
|
{
|
|
@@ -332,19 +329,21 @@ Gradient(const MatType& input,
|
|
|
332
329
|
// The shape of gradient : (4 * embedDim * embedDim + 4 * embedDim, 1).
|
|
333
330
|
gradient.set_size(arma::size(weights));
|
|
334
331
|
|
|
335
|
-
const CubeType q
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
false);
|
|
340
|
-
|
|
341
|
-
(selfAttention ? 0 : (q.n_elem + k.n_elem)),
|
|
342
|
-
batchSize, false, false);
|
|
332
|
+
const CubeType q, k, v;
|
|
333
|
+
MakeAlias(const_cast<CubeType&>(q), input, embedDim, tgtSeqLen, batchSize,
|
|
334
|
+
0, false);
|
|
335
|
+
MakeAlias(const_cast<CubeType&>(k), input, embedDim, srcSeqLen, batchSize,
|
|
336
|
+
(selfAttention ? 0 : q.n_elem), false);
|
|
337
|
+
MakeAlias(const_cast<CubeType&>(v), input, embedDim, srcSeqLen, batchSize,
|
|
338
|
+
(selfAttention ? 0 : (q.n_elem + k.n_elem)), false);
|
|
343
339
|
|
|
344
340
|
// Reshape the propagated error into a cube.
|
|
345
341
|
// The shape of errorTemp : (embedDim, tgtSeqLen, batchSize).
|
|
346
|
-
CubeType
|
|
347
|
-
|
|
342
|
+
const CubeType errorTempAlias;
|
|
343
|
+
MakeAlias(const_cast<CubeType&>(errorTempAlias), error, embedDim, tgtSeqLen,
|
|
344
|
+
batchSize, 0, false);
|
|
345
|
+
// Make a copy of the alias so error actually remains constant
|
|
346
|
+
CubeType errorTemp = errorTempAlias;
|
|
348
347
|
|
|
349
348
|
// Gradient wrt. outBias, i.e. dL/d(outBias).
|
|
350
349
|
gradient.rows(4 * wtSize + 3 * embedDim, 4 * wtSize + 4 * embedDim - 1)
|
|
@@ -92,8 +92,8 @@ void NearestInterpolationType<MatType>::Forward(
|
|
|
92
92
|
const size_t inRowSize = this->inputDimensions[0];
|
|
93
93
|
const size_t inColSize = this->inputDimensions[1];
|
|
94
94
|
|
|
95
|
-
|
|
96
|
-
|
|
95
|
+
CubeType inputAsCube;
|
|
96
|
+
CubeType outputAsCube;
|
|
97
97
|
|
|
98
98
|
MakeAlias(inputAsCube, input, inRowSize, inColSize, channels, 0, false);
|
|
99
99
|
MakeAlias(outputAsCube, output, outRowSize, outColSize, channels, 0, true);
|
|
@@ -126,8 +126,8 @@ void NearestInterpolationType<MatType>::Backward(
|
|
|
126
126
|
const size_t inRowSize = this->inputDimensions[0];
|
|
127
127
|
const size_t inColSize = this->inputDimensions[1];
|
|
128
128
|
|
|
129
|
-
|
|
130
|
-
|
|
129
|
+
CubeType outputAsCube;
|
|
130
|
+
CubeType gradientAsCube;
|
|
131
131
|
|
|
132
132
|
MakeAlias(outputAsCube, output, inRowSize, inColSize, channels, 0, true);
|
|
133
133
|
MakeAlias(gradientAsCube, gradient, outRowSize, outColSize, channels, 0,
|
|
@@ -97,13 +97,12 @@ void PaddingType<MatType>::Forward(const MatType& input, MatType& output)
|
|
|
97
97
|
{
|
|
98
98
|
// Make an alias of the input and output so that we can deal with the first
|
|
99
99
|
// two dimensions directly.
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
this->inputDimensions[
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
this->outputDimensions[
|
|
106
|
-
output.n_cols, false, true);
|
|
100
|
+
CubeType reshapedInput;
|
|
101
|
+
MakeAlias(reshapedInput, input, this->inputDimensions[0],
|
|
102
|
+
this->inputDimensions[1], totalInMaps * input.n_cols, 0, true);
|
|
103
|
+
CubeType reshapedOutput;
|
|
104
|
+
MakeAlias(reshapedOutput, output, this->outputDimensions[0],
|
|
105
|
+
this->outputDimensions[1], totalInMaps * output.n_cols, 0, true);
|
|
107
106
|
|
|
108
107
|
// Set the padding parts to 0.
|
|
109
108
|
if (padHTop > 0)
|
|
@@ -154,12 +153,12 @@ void PaddingType<MatType>::Backward(
|
|
|
154
153
|
{
|
|
155
154
|
// Reshape g and gy so that extracting the un-padded input is easier to
|
|
156
155
|
// understand.
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
this->outputDimensions[1], totalInMaps * gy.n_cols,
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
g.n_cols,
|
|
156
|
+
CubeType reshapedGy;
|
|
157
|
+
MakeAlias(reshapedGy, gy, this->outputDimensions[0],
|
|
158
|
+
this->outputDimensions[1], totalInMaps * gy.n_cols, 0, true);
|
|
159
|
+
CubeType reshapedG;
|
|
160
|
+
MakeAlias(reshapedG, g, this->inputDimensions[0],
|
|
161
|
+
this->inputDimensions[1], totalInMaps * g.n_cols, 0, true);
|
|
163
162
|
|
|
164
163
|
reshapedG = reshapedGy.tube(padWLeft,
|
|
165
164
|
padHTop,
|
|
@@ -58,6 +58,7 @@ template<typename MatType = arma::mat>
|
|
|
58
58
|
class RecurrentLayer : public Layer<MatType>
|
|
59
59
|
{
|
|
60
60
|
public:
|
|
61
|
+
using CubeType = typename GetCubeType<MatType>::type;
|
|
61
62
|
/**
|
|
62
63
|
* Create the RecurrentLayer.
|
|
63
64
|
*/
|
|
@@ -152,10 +153,10 @@ class RecurrentLayer : public Layer<MatType>
|
|
|
152
153
|
// This holds the recurrent state at each time step for BPTT. If BPTT is not
|
|
153
154
|
// being used (e.g. if we are only running the network in forward mode and not
|
|
154
155
|
// training), then only one previous time step is held.
|
|
155
|
-
|
|
156
|
+
CubeType recurrentState;
|
|
156
157
|
// This holds the recurrent gradient for BPTT. If BPTT is not being used,
|
|
157
158
|
// this is empty.
|
|
158
|
-
|
|
159
|
+
CubeType recurrentGradient;
|
|
159
160
|
};
|
|
160
161
|
|
|
161
162
|
} // namespace mlpack
|
|
@@ -67,6 +67,7 @@ void CosineEmbeddingLossType<MatType>::Backward(
|
|
|
67
67
|
const MatType& target,
|
|
68
68
|
MatType& loss)
|
|
69
69
|
{
|
|
70
|
+
using ColType = typename GetColType<MatType>::type;
|
|
70
71
|
using ElemType = typename MatType::elem_type;
|
|
71
72
|
|
|
72
73
|
const size_t cols = prediction.n_cols;
|
|
@@ -74,12 +75,12 @@ void CosineEmbeddingLossType<MatType>::Backward(
|
|
|
74
75
|
if (arma::size(prediction) != arma::size(target))
|
|
75
76
|
Log::Fatal << "Input Tensors must have same dimensions." << std::endl;
|
|
76
77
|
|
|
77
|
-
|
|
78
|
-
|
|
78
|
+
ColType inputTemp1 = vectorise(prediction);
|
|
79
|
+
ColType inputTemp2 = vectorise(target);
|
|
79
80
|
loss.set_size(arma::size(inputTemp1));
|
|
80
81
|
|
|
81
|
-
|
|
82
|
-
|
|
82
|
+
ColType outputTemp;
|
|
83
|
+
MakeAlias(outputTemp, loss, inputTemp1.n_elem, 0, false);
|
|
83
84
|
for (size_t i = 0; i < inputTemp1.n_elem; i += cols)
|
|
84
85
|
{
|
|
85
86
|
const ElemType cosDist = CosineDistance::Evaluate(inputTemp1(
|