PyPI - mlpack - Versions diffs - 4.6.2__cp313-cp313-win_amd64.whl → 4.7.0__cp313-cp313-win_amd64.whl - Mend

mlpack 4.6.2__cp313-cp313-win_amd64.whl → 4.7.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (415) hide show

mlpack/include/mlpack/methods/ann/layer/base_layer.hpp CHANGED Viewed

@@ -24,6 +24,7 @@
 #include <mlpack/methods/ann/activation_functions/mish_function.hpp>
 #include <mlpack/methods/ann/activation_functions/lisht_function.hpp>
 #include <mlpack/methods/ann/activation_functions/gelu_function.hpp>
+#include <mlpack/methods/ann/activation_functions/gelu_exact_function.hpp>
 #include <mlpack/methods/ann/activation_functions/elliot_function.hpp>
 #include <mlpack/methods/ann/activation_functions/elish_function.hpp>
 #include <mlpack/methods/ann/activation_functions/gaussian_function.hpp>
@@ -51,6 +52,7 @@ namespace mlpack {
  *  - Mish
  *  - LiSHT
  *  - GELU
+ *  - GELUExact
  *  - ELiSH
  *  - Elliot
  *  - Gaussian
@@ -68,6 +70,9 @@ template <
 class BaseLayer : public Layer<MatType>
 {
  public:
+  // Convenience typedef to access the element type of the weights and data.
+  using ElemType = typename MatType::elem_type;
   /**
    * Create the BaseLayer object.
    */
@@ -83,7 +88,7 @@ class BaseLayer : public Layer<MatType>
   // members.
   //! Clone the BaseLayer object. This handles polymorphism correctly.
-  BaseLayer* Clone() const { return new BaseLayer(*this); }
+  virtual BaseLayer* Clone() const { return new BaseLayer(*this); }
   /**
    * Forward pass: apply the activation to the inputs.
@@ -131,138 +136,110 @@ class BaseLayer : public Layer<MatType>
 /**
  * Standard Sigmoid-Layer using the logistic activation function.
  */
-using Sigmoid = BaseLayer<LogisticFunction, arma::mat>;
 template<typename MatType = arma::mat>
-using SigmoidType = BaseLayer<LogisticFunction, MatType>;
+using Sigmoid = BaseLayer<LogisticFunction, MatType>;
 /**
  * Standard rectified linear unit non-linearity layer.
  */
-using ReLU = BaseLayer<RectifierFunction, arma::mat>;
 template<typename MatType = arma::mat>
-using ReLUType = BaseLayer<RectifierFunction, MatType>;
+using ReLU = BaseLayer<RectifierFunction, MatType>;
 /**
  * Standard hyperbolic tangent layer.
  */
-using TanH = BaseLayer<TanhFunction, arma::mat>;
 template<typename MatType = arma::mat>
-using TanHType = BaseLayer<TanhFunction, MatType>;
+using TanH = BaseLayer<TanhFunction, MatType>;
 /**
  * Standard Softplus-Layer using the Softplus activation function.
  */
-using SoftPlus = BaseLayer<SoftplusFunction, arma::mat>;
 template<typename MatType = arma::mat>
-using SoftPlusType = BaseLayer<SoftplusFunction, MatType>;
+using SoftPlus = BaseLayer<SoftplusFunction, MatType>;
 /**
  * Standard HardSigmoid-Layer using the HardSigmoid activation function.
  */
-using HardSigmoid = BaseLayer<HardSigmoidFunction, arma::mat>;
 template<typename MatType = arma::mat>
-using HardSigmoidType = BaseLayer<HardSigmoidFunction, MatType>;
+using HardSigmoid = BaseLayer<HardSigmoidFunction, MatType>;
 /**
  * Standard Swish-Layer using the Swish activation function.
  */
-using Swish = BaseLayer<SwishFunction, arma::mat>;
 template<typename MatType = arma::mat>
-using SwishType = BaseLayer<SwishFunction, MatType>;
+using Swish = BaseLayer<SwishFunction, MatType>;
 /**
  * Standard Mish-Layer using the Mish activation function.
  */
-using Mish = BaseLayer<MishFunction, arma::mat>;
 template<typename MatType = arma::mat>
-using MishType = BaseLayer<MishFunction, MatType>;
+using Mish = BaseLayer<MishFunction, MatType>;
 /**
  * Standard LiSHT-Layer using the LiSHT activation function.
  */
-using LiSHT = BaseLayer<LiSHTFunction, arma::mat>;
 template<typename MatType = arma::mat>
-using LiSHTType = BaseLayer<LiSHTFunction, MatType>;
+using LiSHT = BaseLayer<LiSHTFunction, MatType>;
 /**
  * Standard GELU-Layer using the GELU activation function.
  */
-using GELU = BaseLayer<GELUFunction, arma::mat>;
+template<typename MatType = arma::mat>
+using GELU = BaseLayer<GELUFunction, MatType>;
+/**
+ * Standard GELUExact-Layer using the GELUExact activation function.
+ */
 template<typename MatType = arma::mat>
-using GELUType = BaseLayer<GELUFunction, MatType>;
+using GELUExact = BaseLayer<GELUExactFunction, MatType>;
 /**
  * Standard Elliot-Layer using the Elliot activation function.
  */
-using Elliot = BaseLayer<ElliotFunction, arma::mat>;
 template<typename MatType = arma::mat>
-using ElliotType = BaseLayer<ElliotFunction, MatType>;
+using Elliot = BaseLayer<ElliotFunction, MatType>;
 /**
  * Standard ELiSH-Layer using the ELiSH activation function.
  */
-using Elish = BaseLayer<ElishFunction, arma::mat>;
 template<typename MatType = arma::mat>
-using ElishType = BaseLayer<ElishFunction, MatType>;
+using Elish = BaseLayer<ElishFunction, MatType>;
 /**
  * Standard Gaussian-Layer using the Gaussian activation function.
  */
-using Gaussian = BaseLayer<GaussianFunction, arma::mat>;
 template<typename MatType = arma::mat>
-using GaussianType = BaseLayer<GaussianFunction, MatType>;
+using Gaussian = BaseLayer<GaussianFunction, MatType>;
 /**
  * Standard HardSwish-Layer using the HardSwish activation function.
  */
-using HardSwish = BaseLayer<HardSwishFunction, arma::mat>;
 template <typename MatType = arma::mat>
-using HardSwishType = BaseLayer<HardSwishFunction, MatType>;
+using HardSwish = BaseLayer<HardSwishFunction, MatType>;
 /**
  * Standard TanhExp-Layer using the TanhExp activation function.
  */
-using TanhExp = BaseLayer<TanhExpFunction, arma::mat>;
 template<typename MatType = arma::mat>
-using TanhExpType = BaseLayer<TanhExpFunction, MatType>;
+using TanhExp = BaseLayer<TanhExpFunction, MatType>;
 /**
  * Standard SILU-Layer using the SILU activation function.
  */
-using SILU = BaseLayer<SILUFunction, arma::mat>;
 template<typename MatType = arma::mat>
-using SILUType = BaseLayer<SILUFunction, MatType>;
+using SILU = BaseLayer<SILUFunction, MatType>;
 /**
  * Standard Hyper Sinh layer.
  */
-using HyperSinh = BaseLayer<HyperSinhFunction, arma::mat>;
 template<typename MatType = arma::mat>
-using HyperSinhType = BaseLayer<HyperSinhFunction, MatType>;
+using HyperSinh = BaseLayer<HyperSinhFunction, MatType>;
 /**
  * Standard Bipolar Sigmoid layer.
  */
-using BipolarSigmoid = BaseLayer<BipolarSigmoidFunction, arma::mat>;
 template<typename MatType = arma::mat>
-using BipolarSigmoidType = BaseLayer<BipolarSigmoidFunction, MatType>;
+using BipolarSigmoid = BaseLayer<BipolarSigmoidFunction, MatType>;
 } // namespace mlpack

mlpack/include/mlpack/methods/ann/layer/batch_norm.hpp CHANGED Viewed

@@ -50,10 +50,13 @@ namespace mlpack {
  *         computation.
  */
 template <typename MatType = arma::mat>
-class BatchNormType : public Layer<MatType>
+class BatchNorm : public Layer<MatType>
 {
  public:
+  // Convenience typedefs to access the element type of the weights and data.
+  using ElemType = typename MatType::elem_type;
   using CubeType = typename GetCubeType<MatType>::type;
   /**
    * Create the BatchNorm object.
    *
@@ -72,7 +75,7 @@ class BatchNormType : public Layer<MatType>
    * three dimensions rows, columns and slices), and `minAxis` & `maxAxis` is
    * 2, then we apply the same normalization across different slices.
    */
-  BatchNormType();
+  BatchNorm();
   /**
    * Create the BatchNorm layer object for a specified axis of input units as
@@ -93,30 +96,30 @@ class BatchNormType : public Layer<MatType>
    *                updating the parameters or momentum is used.
    * @param momentum Parameter used to to update the running mean and variance.
    */
-  BatchNormType(const size_t minAxis,
+  BatchNorm(const size_t minAxis,
                 const size_t maxAxis,
                 const double eps = 1e-8,
                 const bool average = true,
                 const double momentum = 0.1);
-  virtual ~BatchNormType() { }
+  virtual ~BatchNorm() { }
-  //! Clone the BatchNormType object. This handles polymorphism correctly.
-  BatchNormType* Clone() const { return new BatchNormType(*this); }
+  //! Clone the BatchNorm object. This handles polymorphism correctly.
+  BatchNorm* Clone() const { return new BatchNorm(*this); }
   //! Copy the other BatchNorm layer (but not weights).
-  BatchNormType(const BatchNormType& layer);
+  BatchNorm(const BatchNorm& layer);
   //! Take ownership of the members of the other BatchNorm layer (but not
   //! weights).
-  BatchNormType(BatchNormType&& layer);
+  BatchNorm(BatchNorm&& layer);
   //! Copy the other BatchNorm layer (but not weights).
-  BatchNormType& operator=(const BatchNormType& layer);
+  BatchNorm& operator=(const BatchNorm& layer);
   //! Take ownership of the members of the other BatchNorm layer (but not
   //! weights).
-  BatchNormType& operator=(BatchNormType&& layer);
+  BatchNorm& operator=(BatchNorm&& layer);
   /**
    * Reset the layer parameters.
@@ -189,7 +192,7 @@ class BatchNormType : public Layer<MatType>
   MatType& TrainingVariance() { return runningVariance; }
   //! Get the number of input units / channels.
-  size_t InputSize() const { return size; }
+  size_t InputSize() const { return inputUnits; }
   //! Get the epsilon value.
   const double &Epsilon() const { return eps; }
@@ -203,7 +206,7 @@ class BatchNormType : public Layer<MatType>
   bool Average() const { return average; }
   //! Get size of weights.
-  size_t WeightSize() const { return 2 * size; }
+  size_t WeightSize() const { return 2 * inputUnits; }
   //! Compute the output dimensions of the layer given `InputDimensions()`.
   void ComputeOutputDimensions();
@@ -253,7 +256,7 @@ class BatchNormType : public Layer<MatType>
   //! Locally-stored number of input units.  (This is the product of all
   //! dimensions between minAxis and maxAxis, inclusive.)
-  size_t size;
+  size_t inputUnits;
   //! Locally-stored number of higher dimension we are not applying
   //! batch normalization to.  This is the product of this->inputDimensions
@@ -273,11 +276,6 @@ class BatchNormType : public Layer<MatType>
   CubeType inputMean;
 }; // class BatchNorm
-// Convenience typedefs.
-// Standard Adaptive max pooling layer.
-using BatchNorm = BatchNormType<arma::mat>;
 } // namespace mlpack
 // Include the implementation.

mlpack/include/mlpack/methods/ann/layer/batch_norm_impl.hpp CHANGED Viewed

@@ -22,7 +22,7 @@
 namespace mlpack {
 template<typename MatType>
-BatchNormType<MatType>::BatchNormType() :
+BatchNorm<MatType>::BatchNorm() :
     Layer<MatType>(),
     minAxis(2),
     maxAxis(2),
@@ -31,14 +31,14 @@ BatchNormType<MatType>::BatchNormType() :
     momentum(0.0),
     count(0),
     inputDimension(1),
-    size(0),
+    inputUnits(0),
     higherDimension(1)
 {
   // Nothing to do here.
 }
 template <typename MatType>
-BatchNormType<MatType>::BatchNormType(
+BatchNorm<MatType>::BatchNorm(
     const size_t minAxis,
     const size_t maxAxis,
     const double eps,
@@ -52,7 +52,7 @@ BatchNormType<MatType>::BatchNormType(
     momentum(momentum),
     count(0),
     inputDimension(1),
-    size(0),
+    inputUnits(0),
     higherDimension(1)
 {
   // Nothing to do here.
@@ -60,7 +60,7 @@ BatchNormType<MatType>::BatchNormType(
 // Copy constructor.
 template<typename MatType>
-BatchNormType<MatType>::BatchNormType(const BatchNormType& layer) :
+BatchNorm<MatType>::BatchNorm(const BatchNorm& layer) :
     Layer<MatType>(layer),
     minAxis(layer.minAxis),
     maxAxis(layer.maxAxis),
@@ -70,7 +70,7 @@ BatchNormType<MatType>::BatchNormType(const BatchNormType& layer) :
     variance(layer.variance),
     count(layer.count),
     inputDimension(layer.inputDimension),
-    size(layer.size),
+    inputUnits(layer.inputUnits),
     higherDimension(layer.higherDimension),
     runningMean(layer.runningMean),
     runningVariance(layer.runningVariance)
@@ -80,7 +80,7 @@ BatchNormType<MatType>::BatchNormType(const BatchNormType& layer) :
 // Move constructor.
 template<typename MatType>
-BatchNormType<MatType>::BatchNormType(BatchNormType&& layer) :
+BatchNorm<MatType>::BatchNorm(BatchNorm&& layer) :
     Layer<MatType>(std::move(layer)),
     minAxis(std::move(layer.minAxis)),
     maxAxis(std::move(layer.maxAxis)),
@@ -90,7 +90,7 @@ BatchNormType<MatType>::BatchNormType(BatchNormType&& layer) :
     variance(std::move(layer.variance)),
     count(std::move(layer.count)),
     inputDimension(std::move(layer.inputDimension)),
-    size(std::move(layer.size)),
+    inputUnits(std::move(layer.inputUnits)),
     higherDimension(std::move(layer.higherDimension)),
     runningMean(std::move(layer.runningMean)),
     runningVariance(std::move(layer.runningVariance))
@@ -99,8 +99,8 @@ BatchNormType<MatType>::BatchNormType(BatchNormType&& layer) :
 }
 template<typename MatType>
-BatchNormType<MatType>&
-BatchNormType<MatType>::operator=(const BatchNormType& layer)
+BatchNorm<MatType>&
+BatchNorm<MatType>::operator=(const BatchNorm& layer)
 {
   if (&layer != this)
   {
@@ -113,7 +113,7 @@ BatchNormType<MatType>::operator=(const BatchNormType& layer)
     variance = layer.variance;
     count = layer.count;
     inputDimension = layer.inputDimension;
-    size = layer.size;
+    inputUnits = layer.inputUnits;
     higherDimension = layer.higherDimension;
     runningMean = layer.runningMean;
     runningVariance = layer.runningVariance;
@@ -123,9 +123,9 @@ BatchNormType<MatType>::operator=(const BatchNormType& layer)
 }
 template<typename MatType>
-BatchNormType<MatType>&
-BatchNormType<MatType>::operator=(
-    BatchNormType&& layer)
+BatchNorm<MatType>&
+BatchNorm<MatType>::operator=(
+    BatchNorm&& layer)
 {
   if (&layer != this)
   {
@@ -138,7 +138,7 @@ BatchNormType<MatType>::operator=(
     variance = std::move(layer.variance);
     count = std::move(layer.count);
     inputDimension = std::move(layer.inputDimension);
-    size = std::move(layer.size);
+    inputUnits = std::move(layer.inputUnits);
     higherDimension = std::move(layer.higherDimension);
     runningMean = std::move(layer.runningMean);
     runningVariance = std::move(layer.runningVariance);
@@ -148,40 +148,40 @@ BatchNormType<MatType>::operator=(
 }
 template<typename MatType>
-void BatchNormType<MatType>::SetWeights(const MatType& weightsIn)
+void BatchNorm<MatType>::SetWeights(const MatType& weightsIn)
 {
   MakeAlias(weights, weightsIn, WeightSize(), 1);
   // Gamma acts as the scaling parameters for the normalized output.
-  MakeAlias(gamma, weightsIn, size, 1);
+  MakeAlias(gamma, weightsIn, inputUnits, 1);
   // Beta acts as the shifting parameters for the normalized output.
-  MakeAlias(beta, weightsIn, size, 1, gamma.n_elem);
+  MakeAlias(beta, weightsIn, inputUnits, 1, gamma.n_elem);
 }
 template<typename MatType>
-void BatchNormType<MatType>::CustomInitialize(
+void BatchNorm<MatType>::CustomInitialize(
     MatType& W,
     const size_t elements)
 {
-  if (elements != 2 * size) {
-    throw std::invalid_argument("BatchNormType::CustomInitialize(): wrong "
+  if (elements != 2 * inputUnits) {
+    throw std::invalid_argument("BatchNorm::CustomInitialize(): wrong "
         "elements size!");
   }
   MatType gammaTemp;
   MatType betaTemp;
   // Gamma acts as the scaling parameters for the normalized output.
-  MakeAlias(gammaTemp, W, size, 1);
+  MakeAlias(gammaTemp, W, inputUnits, 1);
   // Beta acts as the shifting parameters for the normalized output.
-  MakeAlias(betaTemp, W, size, 1, gammaTemp.n_elem);
+  MakeAlias(betaTemp, W, inputUnits, 1, gammaTemp.n_elem);
-  gammaTemp.fill(1.0);
-  betaTemp.fill(0.0);
+  gammaTemp.ones();
+  betaTemp.zeros();
-  runningMean.zeros(size, 1);
-  runningVariance.ones(size, 1);
+  runningMean.zeros(inputUnits, 1);
+  runningVariance.ones(inputUnits, 1);
 }
 template<typename MatType>
-void BatchNormType<MatType>::Forward(
+void BatchNorm<MatType>::Forward(
     const MatType& input,
     MatType& output)
 {
@@ -203,31 +203,32 @@ void BatchNormType<MatType>::Forward(
     // Input corresponds to output from previous layer.
     // Used a cube for simplicity.
     CubeType inputTemp;
-    MakeAlias(inputTemp, input, inputSize, size,
+    MakeAlias(inputTemp, input, inputSize, inputUnits,
         batchSize * higherDimension, 0, false);
     // Initialize output to same size and values for convenience.
     CubeType outputTemp;
-    MakeAlias(outputTemp, output, inputSize, size,
+    MakeAlias(outputTemp, output, inputSize, inputUnits,
         batchSize * higherDimension, 0, false);
     outputTemp = inputTemp;
     // Calculate mean and variance over all channels.
     MatType mean = sum(sum(inputTemp, 2), 0) / m;
-    variance = sum(sum(pow(
-        inputTemp.each_slice() - repmat(mean, inputSize, 1), 2), 2), 0) / m;
+    variance = sum(sum(square(
+        inputTemp.each_slice() - repmat(mean, inputSize, 1)), 2), 0) / m;
     outputTemp.each_slice() -= repmat(mean, inputSize, 1);
     // Used in backward propagation.
-    inputMean.set_size(arma::size(inputTemp));
+    inputMean.set_size(size(inputTemp));
     inputMean = outputTemp;
     // Normalize output.
-    outputTemp.each_slice() /= sqrt(repmat(variance, inputSize, 1) + eps);
+    outputTemp.each_slice() /= sqrt(repmat(variance, inputSize, 1) +
+        ElemType(eps));
     // Re-used in backward propagation.
-    normalized.set_size(arma::size(inputTemp));
+    normalized.set_size(size(inputTemp));
     normalized = outputTemp;
     outputTemp.each_slice() %= repmat(gamma.t(), inputSize, 1);
@@ -235,11 +236,11 @@ void BatchNormType<MatType>::Forward(
     count += 1;
     // Value for average factor which used to update running parameters.
-    double averageFactor = average ? 1.0 / count : momentum;
+    ElemType averageFactor = ElemType(average ? 1.0 / count : momentum);
-    double nElements = 0.0;
+    ElemType nElements = 0;
     if (m - 1 != 0)
-      nElements = m * (1.0 / (m - 1));
+      nElements = m * (ElemType(1) / (m - 1));
     // Update running mean and running variance.
     runningMean = (1 - averageFactor) * runningMean + averageFactor *
@@ -252,35 +253,35 @@ void BatchNormType<MatType>::Forward(
     // Normalize the input and scale and shift the output.
     output = input;
     CubeType outputTemp;
-    MakeAlias(outputTemp, output, inputSize, size,
+    MakeAlias(outputTemp, output, inputSize, inputUnits,
         batchSize * higherDimension, 0, false);
     outputTemp.each_slice() -= repmat(runningMean.t(), inputSize, 1);
     outputTemp.each_slice() /= sqrt(repmat(runningVariance.t(),
-        inputSize, 1) + eps);
+        inputSize, 1) + ElemType(eps));
     outputTemp.each_slice() %= repmat(gamma.t(), inputSize, 1);
     outputTemp.each_slice() += repmat(beta.t(), inputSize, 1);
   }
 }
 template<typename MatType>
-void BatchNormType<MatType>::Backward(
+void BatchNorm<MatType>::Backward(
     const MatType& /* input */,
     const MatType& /* output */,
     const MatType& gy,
     MatType& g)
 {
-  const MatType stdInv = 1.0 / sqrt(variance + eps);
+  const MatType stdInv = 1 / sqrt(variance + ElemType(eps));
   const size_t batchSize = gy.n_cols;
   const size_t inputSize = inputDimension;
   const size_t m = inputSize * batchSize * higherDimension;
   CubeType gyTemp;
-  MakeAlias(gyTemp, gy, inputSize, size,
+  MakeAlias(gyTemp, gy, inputSize, inputUnits,
       batchSize * higherDimension, 0, false);
   CubeType gTemp;
-  MakeAlias(gTemp, g, inputSize, size,
+  MakeAlias(gTemp, g, inputSize, inputUnits,
       batchSize * higherDimension, 0, false);
   // Step 1: dl / dxhat.
@@ -288,24 +289,24 @@ void BatchNormType<MatType>::Backward(
   // Step 2: sum dl / dxhat * (x - mu) * -0.5 * stdInv^3.
   MatType temp = sum(sum(norm % inputMean, 2), 0);
-  MatType vars = temp % pow(stdInv, 3) * (-0.5);
+  MatType vars = -temp % pow(stdInv, 3) / 2;
   // Step 3: dl / dxhat * 1 / stdInv + variance * 2 * (x - mu) / m +
   // dl / dmu * 1 / m.
   gTemp = (norm.each_slice() % repmat(stdInv, inputSize, 1)) +
-      ((inputMean.each_slice() % repmat(vars, inputSize, 1) * 2.0) / m);
+      ((inputMean.each_slice() % repmat(vars, inputSize, 1) * 2) / m);
   // Step 4: sum (dl / dxhat * -1 / stdInv) + variance *
   // sum (-2 * (x - mu)) / m.
   MatType normTemp = sum(sum((norm.each_slice() %
       repmat(-stdInv, inputSize, 1)) +
-      (inputMean.each_slice() % repmat(vars, inputSize, 1) * (-2.0) / m),
+      -2 * (inputMean.each_slice() % repmat(vars, inputSize, 1) / m),
       2), 0) / m;
   gTemp.each_slice() += repmat(normTemp, inputSize, 1);
 }
 template<typename MatType>
-void BatchNormType<MatType>::Gradient(
+void BatchNorm<MatType>::Gradient(
     const MatType& /* input */,
     const MatType& error,
     MatType& gradient)
@@ -313,7 +314,7 @@ void BatchNormType<MatType>::Gradient(
   const size_t inputSize = inputDimension;
   CubeType errorTemp;
-  MakeAlias(errorTemp, error, inputSize, size,
+  MakeAlias(errorTemp, error, inputSize, inputUnits,
       error.n_cols * higherDimension, 0, false);
   // Step 5: dl / dy * xhat.
@@ -326,7 +327,7 @@ void BatchNormType<MatType>::Gradient(
 }
 template<typename MatType>
-void BatchNormType<MatType>::ComputeOutputDimensions()
+void BatchNorm<MatType>::ComputeOutputDimensions()
 {
   if (minAxis > maxAxis)
   {
@@ -354,9 +355,9 @@ void BatchNormType<MatType>::ComputeOutputDimensions()
   for (size_t i = 0; i < mainMinAxis; i++)
     inputDimension *= this->inputDimensions[i];
-  size = this->inputDimensions[mainMinAxis];
+  inputUnits = this->inputDimensions[mainMinAxis];
   for (size_t i = mainMinAxis + 1; i <= mainMaxAxis; i++)
-    size *= this->inputDimensions[i];
+    inputUnits *= this->inputDimensions[i];
   higherDimension = 1;
   for (size_t i = mainMaxAxis + 1; i < this->inputDimensions.size(); i++)
@@ -365,7 +366,7 @@ void BatchNormType<MatType>::ComputeOutputDimensions()
 template<typename MatType>
 template<typename Archive>
-void BatchNormType<MatType>::serialize(
+void BatchNorm<MatType>::serialize(
     Archive& ar, const uint32_t /* version */)
 {
   ar(cereal::base_class<Layer<MatType>>(this));
@@ -380,7 +381,7 @@ void BatchNormType<MatType>::serialize(
   ar(CEREAL_NVP(runningVariance));
   ar(CEREAL_NVP(inputMean));
   ar(CEREAL_NVP(inputDimension));
-  ar(CEREAL_NVP(size));
+  ar(CEREAL_NVP(inputUnits));
   ar(CEREAL_NVP(higherDimension));
 }