PyPI - mlpack - Versions diffs - 4.6.2__cp313-cp313-win_amd64.whl → 4.7.0__cp313-cp313-win_amd64.whl - Mend

mlpack 4.6.2__cp313-cp313-win_amd64.whl → 4.7.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (415) hide show

mlpack/include/mlpack/methods/ann/activation_functions/elish_function.hpp CHANGED Viewed

@@ -54,9 +54,10 @@ class ElishFunction
    * @param x Input data.
    * @return f(x).
    */
-  static double Fn(const double x)
+  template<typename ElemType>
+  static ElemType Fn(const ElemType x)
   {
-    if (x < 0.0)
+    if (x < 0)
       return (std::exp(x) - 1) / (1 + std::exp(-x));
     return x / (1 + std::exp(-x));
@@ -71,8 +72,8 @@ class ElishFunction
   template<typename InputVecType, typename OutputVecType>
   static void Fn(const InputVecType& x, OutputVecType& y)
   {
-    y = ((x < 0.0) % ((exp(x) - 1) / (1 + exp(-x))))
-        + ((x >= 0.0) % (x / (1 + exp(-x))));
+    y = (conv_to<InputVecType>::from(x < 0) % ((exp(x) - 1) / (1 + exp(-x))))
+        + (conv_to<InputVecType>::from(x >= 0) % (x / (1 + exp(-x))));
   }
   /**
@@ -82,17 +83,19 @@ class ElishFunction
    * @param y Result of Fn(x).
    * @return f'(x).
    */
-  static double Deriv(const double x, const double y)
+  template<typename ElemType>
+  static ElemType Deriv(const ElemType x, const ElemType y)
   {
-    if (x < 0.0)
+    if (x < 0)
     {
       return std::exp(x) - 2 / (1 + std::exp(x)) +
           2 / std::pow(1 + std::exp(x) , 2);
     }
     else if (x == 0)
     {
-      return 0.5; // the expression below is indeterminate at 0, even though
-                  // the expression solely in terms of x is defined (= 0.5)
+      // The expression below is indeterminate at 0, even though the expression
+      // solely in terms of x is defined (= 0.5).
+      return ElemType(0.5);
     }
     else
     {
@@ -118,12 +121,14 @@ class ElishFunction
     // the expression solely in terms of x is defined (= 0.5)
     // only calculate exp(x) once for each element where x < 0
     // this gives approx 3x speedup, despite allocating the temp vector
-    DerivVecType ex = (x < 0) % exp(x);
-    dy = ((x < 0) % ((ex - 2 / (1 + ex) + 2 / pow(1 + ex, 2)))) +
-         ((x > 0) % ((y / x) % (1.0 + x - y)));
+    DerivVecType ex = conv_to<DerivVecType>::from(x < 0) % exp(x);
+    dy = (conv_to<InputVecType>::from(x < 0) %
+            ((ex - 2 / (1 + ex) + 2 / square(1 + ex)))) +
+         (conv_to<InputVecType>::from(x > 0) %
+            ((y / x) % (1 + x - y)));
     // need to do this here, because the /x above gives nans even when the
     // condition is not met (e.g. when x > 0 is false)
-    dy(arma::find(x == 0)).fill(0.5);
+    dy(arma::find(x == 0)).fill(typename InputVecType::elem_type(0.5));
   }
 }; // class ElishFunction

mlpack/include/mlpack/methods/ann/activation_functions/elliot_function.hpp CHANGED Viewed

@@ -45,9 +45,10 @@ class ElliotFunction
    * @param x Input data.
    * @return f(x).
    */
-  static double Fn(const double x)
+  template<typename ElemType>
+  static ElemType Fn(const ElemType x)
   {
-    return x / (1.0 + std::abs(x));
+    return x / (1 + std::abs(x));
   }
   /**
@@ -56,10 +57,10 @@ class ElliotFunction
    * @param x Input data.
    * @param y The resulting output activation.
    */
-  template <typename InputVecType, typename OutputVecType>
+  template<typename InputVecType, typename OutputVecType>
   static void Fn(const InputVecType &x, OutputVecType &y)
   {
-    y = x / (1.0 + arma::abs(x));
+    y = x / (1 + arma::abs(x));
   }
   /**
@@ -69,9 +70,10 @@ class ElliotFunction
    * @param y Result of Fn(x).
    * @return f'(x).
    */
-  static double Deriv(const double x, const double /* y */)
+  template<typename ElemType>
+  static ElemType Deriv(const ElemType x, const ElemType /* y */)
   {
-    return 1.0 / std::pow(1.0 + std::abs(x), 2);
+    return 1 / std::pow(1 + std::abs(x), ElemType(2));
   }
   /**
@@ -86,7 +88,7 @@ class ElliotFunction
                     const OutputVecType& /* y */,
                     DerivVecType &dy)
   {
-    dy = 1.0 / pow(1.0 + arma::abs(x), 2);
+    dy = 1 / square(1 + abs(x));
   }
 }; // class ElliotFunction

mlpack/include/mlpack/methods/ann/activation_functions/gaussian_function.hpp CHANGED Viewed

@@ -22,7 +22,7 @@ namespace mlpack {
  *
  * @f{eqnarray*}{
  * f(x) &=& e^{-1 * x^2} \\
- * f'(x) &=& 2 * -x * f(x)
+ * f'(x) &=& 2 * -x * f(x)
  * @f}
  */
 class GaussianFunction
@@ -34,10 +34,10 @@ class GaussianFunction
    * @param x Input data.
    * @return f(x).
    */
-  template<typename eT>
-  static double Fn(const eT x)
+  template<typename ElemType>
+  static ElemType Fn(const ElemType x)
   {
-    return std::exp(-1 * std::pow(x, 2));
+    return std::exp(-std::pow(x, ElemType(2)));
   }
   /**
@@ -49,7 +49,7 @@ class GaussianFunction
   template<typename InputVecType, typename OutputVecType>
   static void Fn(const InputVecType& x, OutputVecType& y)
   {
-    y = exp(-1 * pow(x, 2));
+    y = exp(-square(x));
   }
   /**
@@ -59,7 +59,8 @@ class GaussianFunction
    * @param y Result of Fn(x).
    * @return f'(x)
    */
-  static double Deriv(const double x, const double y)
+  template<typename ElemType>
+  static ElemType Deriv(const ElemType x, const ElemType y)
   {
     return -2 * x * y;
   }

mlpack/include/mlpack/methods/ann/activation_functions/gelu_exact_function.hpp ADDED Viewed

@@ -0,0 +1,73 @@
+/**
+ * @file methods/ann/activation_functions/gelu_exact_function.hpp
+ * @author Kumar Utkarsh
+ *
+ * Definition and implementation of the exact Gaussian Error Linear Unit (GELU)
+ * function.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_ACTIVATION_FUNCTIONS_GELU_EXACT_FUNCTION_HPP
+#define MLPACK_METHODS_ANN_ACTIVATION_FUNCTIONS_GELU_EXACT_FUNCTION_HPP
+#include <mlpack/prereqs.hpp>
+namespace mlpack {
+/**
+ * The exact GELU function, defined by
+ *
+ * @f{eqnarray*}{
+ * f(x) = x * Phi(x) \\
+ * Phi(x) = 0.5 * (1 + erf(x / sqrt(2))) \\
+ * f'(x) = Phi(x) + x * phi(x) \\
+ * phi(x) = (1 / sqrt(2\pi)) * exp(-x^2 / 2)
+ * @f}
+ */
+class GELUExactFunction
+{
+ public:
+  //! Compute the exact GELU function for a single value.
+  static double Fn(const double x)
+  {
+    return 0.5 * x * (1.0 + std::erf(x / std::sqrt(2.0)));
+  }
+  //! Compute the exact GELU function for matrices/vectors.
+  template<typename InputVecType, typename OutputVecType>
+  static void Fn(const InputVecType& x, OutputVecType& y)
+  {
+    y = 0.5 * x % (1.0 + erf(x / std::sqrt(2.0)));
+  }
+  // Compute the first derivative of the exact GELU function for a single value
+  static double Deriv(const double x, const double y )
+  {
+    const double phi = std::exp(-0.5 * x * x) / std::sqrt(2.0 * M_PI);
+    // Reuse y to avoid costly Phi(x) computation.
+    return (x == 0.0) ? 0.5 : (y / x + x * phi);
+  }
+  //! Compute the first derivative for matrices/vectors.
+  template<typename InputVecType, typename OutputVecType, typename DerivVecType>
+  static void Deriv(const InputVecType& x,
+                    const OutputVecType& y,
+                    DerivVecType& dy)
+  {
+    dy.set_size(x.n_elem);
+    // Reuse y to avoid costly Phi(x) computation.
+    for (size_t i = 0; i < x.n_elem; ++i)
+    {
+      if (x[i] == 0.0) dy[i] = 0.5;
+      else dy[i] = y[i] / x[i] +
+          x[i] * std::exp(-0.5 * x[i] * x[i]) / std::sqrt(2.0 * M_PI);
+    }
+  }
+}; // class GELUExactFunction
+} // namespace mlpack
+#endif

mlpack/include/mlpack/methods/ann/activation_functions/gelu_function.hpp CHANGED Viewed

@@ -37,10 +37,12 @@ class GELUFunction
    * @param x Input data.
    * @return f(x).
    */
-  static double Fn(const double x)
+  template<typename ElemType>
+  static ElemType Fn(const ElemType x)
   {
-    return 0.5 * x * (1 + std::tanh(std::sqrt(2 / M_PI) *
-           (x + 0.044715 * std::pow(x, 3))));
+    return (x / 2) *
+        (1 + std::tanh(std::sqrt(2 / arma::Datum<ElemType>::pi) *
+        (x + ElemType(0.044715) * std::pow(x, ElemType(3)))));
   }
   /**
@@ -52,8 +54,11 @@ class GELUFunction
   template<typename InputVecType, typename OutputVecType>
   static void Fn(const InputVecType& x, OutputVecType& y)
   {
-    y = 0.5 * x % (1 + arma::tanh(std::sqrt(2 / M_PI) *
-        (x + 0.044715 * pow(x, 3))));
+    typedef typename InputVecType::elem_type ElemType;
+    y = (x / 2) %
+        (1 + tanh(std::sqrt(2 / arma::Datum<ElemType>::pi) *
+        (x + ElemType(0.044715) * pow(x, ElemType(3)))));
   }
   /**
@@ -63,13 +68,16 @@ class GELUFunction
    * @param y Result of Fn(x).
    * @return f'(x)
    */
-  static double Deriv(const double x, const double /* y */)
+  template<typename ElemType>
+  static ElemType Deriv(const ElemType x, const ElemType /* y */)
   {
-    if (x < -10) return 0.0; // catch overflows
-    return 0.5 * std::tanh(0.0356774 * std::pow(x, 3) + 0.797885 * x) +
-           (0.0535161 * std::pow(x, 3) + 0.398942 * x) *
-           std::pow(1 / std::cosh(0.0356774 * std::pow(x, 3) +
-           0.797885 * x), 2) + 0.5;
+    if (x < -10) return 0; // catch overflows
+    return ElemType(0.5) * std::tanh(ElemType(0.0356774) *
+        std::pow(x, ElemType(3)) + ElemType(0.797885) * x) +
+        (ElemType(0.0535161) * std::pow(x, ElemType(3)) +
+         ElemType(0.398942) * x) *
+         std::pow(1 / std::cosh(ElemType(0.0356774) * std::pow(x, 3) +
+         ElemType(0.797885) * x), 2) + ElemType(0.5);
   }
   /**
@@ -84,11 +92,14 @@ class GELUFunction
                     const OutputVecType& /* y */,
                     DerivVecType& dy)
   {
-    dy = 0.5 * arma::tanh(0.0356774 * pow(x, 3) + 0.797885 * x) +
-        (0.0535161 * pow(x, 3) + 0.398942 * x) %
-        pow(1 / arma::cosh(0.0356774 * pow(x, 3) +
-        0.797885 * x), 2) + 0.5;
-    dy(arma::find(x < -10)).fill(0); // catch overflows
+    typedef typename InputVecType::elem_type ElemType;
+    dy = ElemType(0.5) * tanh(ElemType(0.0356774) * pow(x, ElemType(3)) +
+        ElemType(0.797885) * x) + (ElemType(0.0535161) * pow(x, ElemType(3)) +
+        ElemType(0.398942) * x) %
+        pow(1 / cosh(ElemType(0.0356774) * pow(x, ElemType(3)) +
+        ElemType(0.797885) * x), 2) + ElemType(0.5);
+    dy(find(x < -10)).fill(0); // catch overflows
   }
 }; // class GELUFunction

mlpack/include/mlpack/methods/ann/activation_functions/hard_sigmoid_function.hpp CHANGED Viewed

@@ -39,9 +39,10 @@ class HardSigmoidFunction
    * @param x Input data.
    * @return f(x).
    */
-  static double Fn(const double x)
+  template<typename ElemType>
+  static ElemType Fn(const ElemType x)
   {
-    return std::min(1.0, std::max(0.0, 0.2 * x + 0.5));
+    return std::min(ElemType(1), std::max(ElemType(0), x / 5 + ElemType(0.5)));
   }
   /**
@@ -67,13 +68,14 @@ class HardSigmoidFunction
    * @param y Result of Fn(x).
    * @return f'(x)
    */
-  static double Deriv(const double /* x */, const double y)
+  template<typename ElemType>
+  static ElemType Deriv(const ElemType /* x */, const ElemType y)
   {
-    if (y == 0.0 || y == 1.0)
+    if (y == 0 || y == 1)
     {
-      return 0.0;
+      return 0;
     }
-    return 0.2;
+    return ElemType(0.2);
   }
   /**

mlpack/include/mlpack/methods/ann/activation_functions/hard_swish_function.hpp CHANGED Viewed

@@ -52,7 +52,8 @@ class HardSwishFunction
    * @param x Input data.
    * @return f(x).
    */
-  static double Fn(const double x)
+  template<typename ElemType>
+  static ElemType Fn(const ElemType x)
   {
     if (x <= -3)
       return 0;
@@ -68,7 +69,7 @@ class HardSwishFunction
    * @param x Input data.
    * @param y The resulting output activation.
    */
-  template <typename InputVecType, typename OutputVecType>
+  template<typename InputVecType, typename OutputVecType>
   static void Fn(const InputVecType &x, OutputVecType &y)
   {
     y.set_size(size(x));
@@ -85,14 +86,15 @@ class HardSwishFunction
    * @param * (y) Result of Fn(x).
    * @return f'(x).
    */
-  static double Deriv(const double x, const double /* y */)
+  template<typename ElemType>
+  static ElemType Deriv(const ElemType x, const ElemType /* y */)
   {
     if (x <= -3)
       return 0;
     else if (x >= 3)
       return 1;
-    return (2 * x + 3.0) / 6.0;
+    return (2 * x + 3) / 6;
   }
   /**

mlpack/include/mlpack/methods/ann/activation_functions/hyper_sinh_function.hpp CHANGED Viewed

@@ -56,12 +56,13 @@ class HyperSinhFunction
    * @param x Input data.
    * @return f(x).
    */
-  static double Fn(const double x)
+  template<typename ElemType>
+  static ElemType Fn(const ElemType x)
   {
     if (x > 0)
-      return (std::sinh(x) / 3.0);
+      return (std::sinh(x) / 3);
     else
-      return (std::pow(x, 3.0) / 4.0);
+      return (std::pow(x, ElemType(3)) / 4);
   }
   /**
@@ -94,12 +95,13 @@ class HyperSinhFunction
    * @param y Input activation.
    * @return f'(x)
    */
-  static double Deriv(const double /* x */, const double y)
+  template<typename ElemType>
+  static ElemType Deriv(const ElemType /* x */, const ElemType y)
   {
     if (y > 0)
-      return (std::pow((1.0 / 9.0) + (y * y), 0.5));
+      return (std::pow(ElemType(1.0 / 9.0) + (y * y), ElemType(0.5)));
     else
-      return (3.0  * std::pow(std::pow(y, 2) / 4, 1.0 / 3.0));
+      return (3 * std::pow(std::pow(y, ElemType(2)) / 4, ElemType(1.0 / 3.0)));
   }
   /**
@@ -113,17 +115,20 @@ class HyperSinhFunction
                     const OutputVecType& y,
                     DerivVecType& dy)
   {
+    typedef typename InputVecType::elem_type ElemType;
     dy.set_size(size(y));
     #pragma omp for
     for (size_t i = 0; i < y.n_elem; ++i)
     {
       if (y(i) > 0)
       {
-        dy(i) = (std::pow((1.0 / 9.0) + (y(i) * y(i)), 0.5));
+        dy(i) = (std::pow(ElemType(1.0 / 9.0) + (y(i) * y(i)), ElemType(0.5)));
       }
       else
       {
-        dy(i) = (3.0 * std::pow(std::pow(y(i), 2) / 4, 1.0 / 3.0));
+        dy(i) = (3 * std::pow(std::pow(y(i), ElemType(2)) / 4,
+            ElemType(1.0 / 3.0)));
       }
     }
   }

mlpack/include/mlpack/methods/ann/activation_functions/identity_function.hpp CHANGED Viewed

@@ -33,7 +33,8 @@ class IdentityFunction
    * @param x Input data.
    * @return f(x).
    */
-  static double Fn(const double x)
+  template<typename ElemType>
+  static ElemType Fn(const ElemType x)
   {
     return x;
   }
@@ -59,9 +60,10 @@ class IdentityFunction
    * @param * (y) Result of Fn(x).
    * @return f'(x)
    */
-  static double Deriv(const double /* x */, const double /* y */)
+  template<typename ElemType>
+  static ElemType Deriv(const ElemType /* x */, const ElemType /* y */)
   {
-    return 1.0;
+    return 1;
   }
   /**
@@ -76,7 +78,7 @@ class IdentityFunction
                     const OutputVecType& /* y */,
                     DerivVecType& dy)
   {
-    dy.ones(arma::size(x));
+    dy.ones(size(x));
   }
   /**

mlpack/include/mlpack/methods/ann/activation_functions/inverse_quadratic_function.hpp CHANGED Viewed

@@ -33,9 +33,10 @@ class InvQuadFunction
    * @param x Input data.
    * @return f(x).
    */
-  static double Fn(const double x)
+  template<typename ElemType>
+  static ElemType Fn(const ElemType x)
   {
-    return 1 / ( 1 + x * x);
+    return 1 / (1 + x * x);
   }
   /**
@@ -47,7 +48,7 @@ class InvQuadFunction
   template<typename InputVecType, typename OutputVecType>
   static void Fn(const InputVecType& x, OutputVecType& y)
   {
-    y = 1 / (1 + pow(x, 2));
+    y = 1 / (1 + square(x));
   }
   /**
@@ -57,9 +58,10 @@ class InvQuadFunction
    * @param y Result of Fn(x).
    * @return f'(x)
    */
-  static double Deriv(const double x, const double /* y */)
+  template<typename ElemType>
+  static ElemType Deriv(const ElemType x, const ElemType /* y */)
   {
-    return -2 * x / std::pow(1 + std::pow(x, 2), 2);
+    return -2 * x / std::pow(1 + std::pow(x, ElemType(2)), ElemType(2));
   }
   /**
@@ -74,7 +76,7 @@ class InvQuadFunction
                     const OutputVecType& /* y */,
                     DerivVecType &dy)
   {
-    dy = - 2 * x / pow(1 + pow(x, 2), 2);
+    dy = -2 * x / square(1 + square(x));
   }
 }; // class InvQuadFunction

mlpack/include/mlpack/methods/ann/activation_functions/lisht_function.hpp CHANGED Viewed

@@ -47,7 +47,8 @@ class LiSHTFunction
    * @param x Input data.
    * @return f(x).
    */
-  static double Fn(const double x)
+  template<typename ElemType>
+  static ElemType Fn(const ElemType x)
   {
     return x * std::tanh(x);
   }
@@ -61,7 +62,7 @@ class LiSHTFunction
   template <typename InputVecType, typename OutputVecType>
   static void Fn(const InputVecType &x, OutputVecType &y)
   {
-    y = x % arma::tanh(x);
+    y = x % tanh(x);
   }
   /**
@@ -71,9 +72,10 @@ class LiSHTFunction
    * @param y Result of Fn(x).
    * @return f'(x)
    */
-  static double Deriv(const double x, const double /* y */)
+  template<typename ElemType>
+  static ElemType Deriv(const ElemType x, const ElemType /* y */)
   {
-    return std::tanh(x) + x * (1 - std::pow(std::tanh(x), 2));
+    return std::tanh(x) + x * (1 - std::pow(std::tanh(x), ElemType(2)));
   }
   /**
@@ -88,7 +90,7 @@ class LiSHTFunction
                     const OutputVecType& /* y */,
                     DerivVecType& dy)
   {
-    dy = arma::tanh(x) + x % (1 - pow(arma::tanh(x), 2));
+    dy = tanh(x) + x % (1 - square(tanh(x)));
   }
 }; // class LishtFunction

mlpack/include/mlpack/methods/ann/activation_functions/logistic_function.hpp CHANGED Viewed

@@ -34,18 +34,18 @@ class LogisticFunction
    * @param x Input data.
    * @return f(x).
    */
-  template<typename eT>
-  static double Fn(const eT x)
+  template<typename ElemType>
+  static ElemType Fn(const ElemType x)
   {
-    if (x < arma::Datum<eT>::log_max)
+    if (x < arma::Datum<ElemType>::log_max)
     {
-      if (x > -arma::Datum<eT>::log_max)
-        return 1.0 / (1.0 + std::exp(-x));
+      if (x > -arma::Datum<ElemType>::log_max)
+        return 1 / (1 + std::exp(-x));
-      return 0.0;
+      return 0;
     }
-    return 1.0;
+    return 1;
   }
   /**
@@ -57,7 +57,7 @@ class LogisticFunction
   template<typename InputVecType, typename OutputVecType>
   static void Fn(const InputVecType& x, OutputVecType& y)
   {
-    y = (1.0 / (1 + exp(-x)));
+    y = (1 / (1 + exp(-x)));
   }
   /**
@@ -67,9 +67,10 @@ class LogisticFunction
    * @param y Result of Fn(x).
    * @return f'(x)
    */
-  static double Deriv(const double /* x */, const double y)
+  template<typename ElemType>
+  static ElemType Deriv(const ElemType /* x */, const ElemType y)
   {
-    return y * (1.0 - y);
+    return y * (1 - y);
   }
   /**
@@ -84,7 +85,7 @@ class LogisticFunction
                     const OutputVecType& y,
                     DerivVecType& dy)
   {
-    dy = y % (1.0 - y);
+    dy = y % (1 - y);
   }
   /**
@@ -93,7 +94,8 @@ class LogisticFunction
    * @param y Input data.
    * @return f^{-1}(y)
    */
-  static double Inv(const double y)
+  template<typename ElemType>
+  static ElemType Inv(const ElemType y)
   {
     return arma::trunc_log(y / (1 - y));
   }

mlpack/include/mlpack/methods/ann/activation_functions/mish_function.hpp CHANGED Viewed

@@ -45,7 +45,8 @@ class MishFunction
    * @param x Input data.
    * @return f(x).
    */
-  static double Fn(const double x)
+  template<typename ElemType>
+  static ElemType Fn(const ElemType x)
   {
     return x * (std::exp(2 * x) + 2 * std::exp(x)) /
            (2 + 2 * std::exp(x) + std::exp(2 * x));
@@ -57,7 +58,7 @@ class MishFunction
    * @param x Input data.
    * @param y The resulting output activation.
    */
-  template <typename InputVecType, typename OutputVecType>
+  template<typename InputVecType, typename OutputVecType>
   static void Fn(const InputVecType &x, OutputVecType &y)
   {
     y = x % (exp(2 * x) + 2 * exp(x)) / (2 + 2 * exp(x) + exp(2 * x));
@@ -70,11 +71,12 @@ class MishFunction
    * @param y Result of Fn(x).
    * @return f'(x)
    */
-  static double Deriv(const double x, const double /* y */)
+  template<typename ElemType>
+  static ElemType Deriv(const ElemType x, const ElemType /* y */)
   {
     return std::exp(x) * (4 * (x + 1) + std::exp(x) * (4 * x + 6) +
            4 * std::exp(2 * x) + std::exp(3 * x)) /
-           std::pow(std::exp(2 * x) + 2 * std::exp(x) + 2, 2);
+           std::pow(std::exp(2 * x) + 2 * std::exp(x) + 2, ElemType(2));
   }
   /**
@@ -91,7 +93,7 @@ class MishFunction
   {
     dy = exp(x) % (4 * (x + 1) + exp(x) % (4 * x + 6) +
         4 * exp(2 * x) + exp(3 * x)) /
-        pow(exp(2 * x) + 2 * exp(x) + 2, 2);
+        square(exp(2 * x) + 2 * exp(x) + 2);
   }
 }; // class MishFunction