PyPI - sgptools - Versions diffs - 1.2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend

sgptools 1.2.0py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

sgptools/__init__.py +3 -4
sgptools/core/__init__.py +1 -0
sgptools/{models/core → core}/augmented_gpr.py +11 -17
sgptools/{models/core → core}/augmented_sgpr.py +27 -34
sgptools/core/osgpr.py +417 -0
sgptools/core/transformations.py +699 -0
sgptools/kernels/__init__.py +0 -8
sgptools/kernels/attentive_kernel.py +214 -69
sgptools/kernels/neural_kernel.py +268 -92
sgptools/kernels/neural_network.py +127 -28
sgptools/methods.py +1047 -0
sgptools/objectives.py +275 -0
sgptools/utils/__init__.py +0 -9
sgptools/utils/data.py +452 -149
sgptools/utils/gpflow.py +335 -174
sgptools/utils/metrics.py +375 -102
sgptools/utils/misc.py +145 -111
sgptools/utils/tsp.py +224 -84
sgptools-2.0.0.dist-info/METADATA +216 -0
sgptools-2.0.0.dist-info/RECORD +23 -0
{sgptools-1.2.0.dist-info → sgptools-2.0.0.dist-info}/WHEEL +1 -1
sgptools/models/__init__.py +0 -10
sgptools/models/bo.py +0 -118
sgptools/models/cma_es.py +0 -121
sgptools/models/continuous_sgp.py +0 -68
sgptools/models/core/__init__.py +0 -9
sgptools/models/core/osgpr.py +0 -291
sgptools/models/core/transformations.py +0 -434
sgptools/models/greedy_mi.py +0 -115
sgptools/models/greedy_sgp.py +0 -97
sgptools-1.2.0.dist-info/METADATA +0 -39
sgptools-1.2.0.dist-info/RECORD +0 -27
{sgptools-1.2.0.dist-info → sgptools-2.0.0.dist-info/licenses}/LICENSE.txt +0 -0
{sgptools-1.2.0.dist-info → sgptools-2.0.0.dist-info}/top_level.txt +0 -0

sgptools/kernels/neural_kernel.py CHANGED Viewed

@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Provides a neural spectral kernel function along with an initialization function
 """
@@ -19,149 +18,326 @@ import tensorflow as tf
 import numpy as np
 import gc
 import gpflow
 from gpflow.config import default_jitter, default_float
 from gpflow.models import SGPR
 from gpflow.models.util import data_input_to_tensor
 float_type = default_float()
 from .neural_network import NN
+from typing import List, Optional, Tuple, Union, Any
 class NeuralSpectralKernel(gpflow.kernels.Kernel):
-    """Neural Spectral Kernel function (non-stationary kernel function).
-    Based on the implementation from this [repo](https://github.com/sremes/nssm-gp/tree/master?tab=readme-ov-file)
+    """
+    Neural Spectral Kernel function (non-stationary kernel function).
+    This kernel models non-stationarity by using multiple Multi-Layer Perceptrons (MLPs)
+    to map input locations to frequency, lengthscale, and variance parameters for a
+    mixture of spectral components.
+    Based on the implementation from this [repo](https://github.com/sremes/nssm-gp/tree/master?tab=readme-ov-file).
     Refer to the following papers for more details:
         - Neural Non-Stationary Spectral Kernel [Remes et al., 2018]
-    Args:
-        input_dim (int): Number of data dimensions
-        active_dims (int): Number of data dimensions that are used for computing the covariances
-        Q (int): Number of MLP mixture components used in the kernel function
-        hidden_sizes (list): Number of hidden units in each MLP layer. Length of the list determines the number of layers.
+    Attributes:
+        input_dim (int): Dimensionality of the input data points.
+        Q (int): Number of MLP mixture components used in the kernel function.
+        num_hidden (int): Number of hidden layers in each MLP.
+        freq (List[NN]): List of MLPs, one for each component, predicting frequencies.
+        length (List[NN]): List of MLPs, one for each component, predicting lengthscales.
+        var (List[NN]): List of MLPs, one for each component, predicting variances.
     """
-    def __init__(self, input_dim, active_dims=None, Q=1, hidden_sizes=[32, 32]):
+    def __init__(self,
+                 input_dim: int,
+                 active_dims: Optional[List[int]] = None,
+                 Q: int = 1,
+                 hidden_sizes: List[int] = None):
+        """
+        Initializes the Neural Spectral Kernel.
+        Args:
+            input_dim (int): Number of dimensions of the input data points (e.g., 2 for 2D data).
+            active_dims (Optional[List[int]]): A list of indices specifying which input dimensions
+                                                the kernel operates on. If None, all dimensions are active.
+                                                Defaults to None.
+            Q (int): The number of MLP mixture components used in the kernel function.
+                     Each component has its own set of MLPs for frequency, lengthscale, and variance.
+                     Defaults to 1.
+            hidden_sizes (List[int]): A list where each element specifies the number of hidden units
+                                      in a layer of the MLPs. The length of this list determines
+                                      the number of hidden layers. Defaults to [32, 32].
+        Usage:
+            ```python
+            import gpflow
+            import numpy as np
+            from sgptools.kernels.neural_kernel import NeuralSpectralKernel
+            # Initialize a Neural Spectral Kernel for 2D data with 3 mixture components
+            # and MLPs with 2 hidden layers of 64 units each.
+            kernel = NeuralSpectralKernel(input_dim=2, Q=3, hidden_sizes=[64, 64])
+            # You can then use this kernel in a GPflow model:
+            # model = gpflow.models.SGPR(data=(X_train, Y_train), kernel=kernel, ...)
+            ```
+        """
         super().__init__(active_dims=active_dims)
+        if hidden_sizes is None:
+            hidden_sizes = [32, 32]  # Default if not provided
+        else:
+            hidden_sizes = list(hidden_sizes)
         self.input_dim = input_dim
         self.Q = Q
         self.num_hidden = len(hidden_sizes)
-        self.freq = []
-        self.length = []
-        self.var = []
+        # Initialize lists of MLPs for each component
+        self.freq: List[NN] = []
+        self.length: List[NN] = []
+        self.var: List[NN] = []
+        # Create Q sets of MLPs
         for q in range(self.Q):
-            freq = NN([input_dim]+[hidden_sizes[i] for i in range(self.num_hidden)]+[input_dim],
-                     output_activation_fn='softplus')
-            length = NN([input_dim]+[hidden_sizes[i] for i in range(self.num_hidden)]+[input_dim],
+            # MLP for frequency: maps input_dim -> hidden_sizes -> input_dim
+            # Output activation 'softplus' ensures positive frequencies.
+            freq_nn = NN([input_dim] + hidden_sizes + [input_dim],
+                         output_activation_fn='softplus')
+            # MLP for lengthscale: maps input_dim -> hidden_sizes -> input_dim
+            # Output activation 'softplus' ensures positive lengthscales.
+            length_nn = NN([input_dim] + hidden_sizes + [input_dim],
+                           output_activation_fn='softplus')
+            # MLP for variance: maps input_dim -> hidden_sizes -> 1 (scalar variance)
+            # Output activation 'softplus' ensures positive variances.
+            var_nn = NN([input_dim] + hidden_sizes + [1],
                         output_activation_fn='softplus')
-            var = NN([input_dim]+[hidden_sizes[i] for i in range(self.num_hidden)]+[1],
-                     output_activation_fn='softplus')
-            self.freq.append(freq)
-            self.length.append(length)
-            self.var.append(var)
-    def K(self, X, X2=None):
-        """Computes the covariances between/amongst the input variables
+            self.freq.append(freq_nn)
+            self.length.append(length_nn)
+            self.var.append(var_nn)
+    @tf.autograph.experimental.do_not_convert
+    def K(self, X: tf.Tensor, X2: Optional[tf.Tensor] = None) -> tf.Tensor:
+        """
+        Computes the covariance matrix between/amongst the input variables `X` and `X2`.
+        If `X2` is None, the function computes `K(X, X)` (a symmetric covariance matrix).
+        Otherwise, it computes `K(X, X2)` (a cross-covariance matrix).
+        The kernel is a sum over `Q` mixture components, where each component's
+        parameters (frequency, lengthscale, variance) are determined by MLPs
+        based on the input locations.
         Args:
-            X (ndarray): Variables to compute the covariance matrix
-            X2 (ndarray): If passed, the covariance between X and X2 is computed. Otherwise,
-                          the covariance between X and X is computed.
+            X (tf.Tensor): (N1, D); First set of input variables to compute covariance from.
+                           `N1` is the number of points, `D` is the dimensionality.
+            X2 (Optional[tf.Tensor]): (N2, D); Optional second set of input variables.
+                                     If provided, computes cross-covariance `K(X, X2)`.
+                                     If None, computes auto-covariance `K(X, X)`.
         Returns:
-            cov (ndarray): covariance matrix
+            tf.Tensor: (N1, N2); The computed covariance matrix. If `X2` is None, the
+                       diagonal of `K(X, X)` is jittered for numerical stability.
         """
         if X2 is None:
-            X2 = X
-            equal = True
+            X2_internal = X
+            equal = True  # Flag to add jitter to diagonal for K(X,X)
         else:
+            X2_internal = X2
             equal = False
-        kern = 0.0
+        kern = tf.constant(0.0, dtype=float_type)  # Initialize kernel sum
         for q in range(self.Q):
-            # compute latent function values by the neural network
-            freq, freq2 = self.freq[q](X), self.freq[q](X2)
-            lens, lens2 = self.length[q](X), self.length[q](X2)
-            var, var2 = self.var[q](X), self.var[q](X2)
-            # compute length-scale term
-            Xr = tf.expand_dims(X, 1)  # N1 1 D
-            X2r = tf.expand_dims(X2, 0)  # 1 N2 D
-            l1 = tf.expand_dims(lens, 1)  # N1 1 D
-            l2 = tf.expand_dims(lens2, 0)  # 1 N2 D
-            L = tf.square(l1) + tf.square(l2)  # N1 N2 D
-            #D = tf.square((Xr - X2r) / L)  # N1 N2 D
-            D = tf.square(Xr - X2r) / L  # N1 N2 D
-            D = tf.reduce_sum(D, 2)  # N1 N2
-            det = tf.sqrt(2 * l1 * l2 / L)  # N1 N2 D
-            det = tf.reduce_prod(det, 2)  # N1 N2
-            E = det * tf.exp(-D)  # N1 N2
-            # compute cosine term
-            muX = (tf.reduce_sum(freq * X, 1, keepdims=True)
-                   - tf.transpose(tf.reduce_sum(freq2 * X2, 1, keepdims=True)))
-            COS = tf.cos(2 * np.pi * muX)
-            # compute kernel variance term
-            WW = tf.matmul(var, var2, transpose_b=True)  # w*w'^T
-            # compute the q'th kernel component
+            # Compute latent function values (frequencies, lengthscales, variances)
+            # by passing input locations through the MLPs.
+            freq_X, freq_X2 = self.freq[q](X), self.freq[q](
+                X2_internal)  # (N, D) frequencies
+            lens_X, lens_X2 = self.length[q](X), self.length[q](
+                X2_internal)  # (N, D) lengthscales
+            var_X, var_X2 = self.var[q](X), self.var[q](
+                X2_internal)  # (N, 1) variances
+            # Compute length-scale term (E) - based on inverse lengthscales and distances
+            Xr = tf.expand_dims(X, 1)  # (N1, 1, D)
+            X2r = tf.expand_dims(X2_internal, 0)  # (1, N2, D)
+            l1 = tf.expand_dims(lens_X, 1)  # (N1, 1, D)
+            l2 = tf.expand_dims(lens_X2, 0)  # (1, N2, D)
+            L = tf.square(l1) + tf.square(
+                l2)  # (N1, N2, D) - sum of squared lengthscales
+            # D term: Squared difference scaled by L, summed over dimensions
+            D_term = tf.square(Xr - X2r) / L  # (N1, N2, D)
+            D_term = tf.reduce_sum(D_term, 2)  # (N1, N2) - sum over dimensions
+            # Determinant term: Product over dimensions of (2 * l1 * l2 / L)^(1/2)
+            det_term = tf.sqrt(2 * l1 * l2 / L)  # (N1, N2, D)
+            det_term = tf.reduce_prod(det_term,
+                                      2)  # (N1, N2) - product over dimensions
+            # E term: Combine determinant and exponential of D_term
+            E = det_term * tf.exp(-D_term)  # (N1, N2)
+            # Compute cosine term (COS) - based on frequencies and dot products with X
+            # (N1, D) * (N1, D) -> sum over D -> (N1, 1)
+            muX = (tf.reduce_sum(freq_X * X, 1, keepdims=True) - tf.transpose(
+                tf.reduce_sum(freq_X2 * X2_internal, 1, keepdims=True)))
+            COS = tf.cos(2 * np.pi * muX)  # (N1, N2)
+            # Compute kernel variance term (WW) - outer product of variance predictions
+            WW = tf.matmul(var_X, var_X2,
+                           transpose_b=True)  # (N1, 1) @ (1, N2) -> (N1, N2)
+            # Compute the q'th kernel component and add to total kernel
             kern += WW * E * COS
+        # Add jitter to the diagonal for K(X,X) matrices for numerical stability
         if equal:
             return robust_kernel(kern, tf.shape(X)[0])
         else:
             return kern
-    def K_diag(self, X):
-        kd = default_jitter()
+    @tf.autograph.experimental.do_not_convert
+    def K_diag(self, X: tf.Tensor) -> tf.Tensor:
+        """
+        Computes the diagonal of the covariance matrix `K(X, X)`.
+        For the Neural Spectral Kernel, this is `sum_q(var_q(X)^2) + jitter`.
+        Args:
+            X (tf.Tensor): (N, D); Input data points. `N` is the number of points.
+        Returns:
+            tf.Tensor: (N,); A 1D tensor representing the diagonal elements of the
+                        covariance matrix.
+        """
+        kd = default_jitter()  # Initialize with a small jitter
         for q in range(self.Q):
+            # Sum of squared variance predictions from each MLP component
             kd += tf.square(self.var[q](X))
-        return tf.squeeze(kd)
+        return tf.squeeze(
+            kd)  # Remove singleton dimension (e.g., (N, 1) -> (N,))
-'''
-Helper functions
-'''
-def robust_kernel(kern, shape_X):
-    jitter = 1e-3
-    return kern + jitter * tf.eye(shape_X, dtype=float_type)
-def init_neural_kernel(x, y, inducing_variable, Q, n_inits=1, hidden_sizes=None):
-    """Helper function to initialize a Neural Spectral Kernel function (non-stationary kernel function).
-    Based on the implementation from this [repo](https://github.com/sremes/nssm-gp/tree/master?tab=readme-ov-file)
+# --- Helper functions ---
+@tf.autograph.experimental.do_not_convert
+def robust_kernel(kern: tf.Tensor, shape_X_0: tf.Tensor) -> tf.Tensor:
+    """
+    Adds a small positive jitter to the diagonal of a covariance matrix
+    to ensure numerical stability. This is particularly important for
+    Cholesky decompositions or inverse calculations.
-    Refer to the following papers for more details:
+    Args:
+        kern (tf.Tensor): The input covariance matrix.
+        shape_X_0 (tf.Tensor): The size of the first dimension of the original input `X`
+                               (i.e., the number of data points N). Used to create the identity matrix.
+    Returns:
+        tf.Tensor: The covariance matrix with jitter added to its diagonal.
+    """
+    jitter_val = 1e-3  # Fixed jitter value
+    # Add jitter to the diagonal of the kernel matrix
+    return kern + jitter_val * tf.eye(shape_X_0, dtype=float_type)
+def init_neural_kernel(X_train: np.ndarray,
+                       Y_train: np.ndarray,
+                       inducing_variable: np.ndarray,
+                       Q: int,
+                       n_inits: int = 1,
+                       hidden_sizes: Optional[List[int]] = None) -> SGPR:
+    """
+    Helper function to initialize a Sparse Gaussian Process Regression (SGPR) model
+    with a Neural Spectral Kernel. This function can perform multiple random
+    initializations and return the model with the best initial Evidence Lower Bound (ELBO).
+    Refer to the original paper for more details:
         - Neural Non-Stationary Spectral Kernel [Remes et al., 2018]
     Args:
-        x (ndarray): (n, d); Input training set points
-        y (ndarray): (n, 1); Training set labels
-        inducing_variable (ndarray): (m, d); Initial inducing points
-        Q (int): Number of MLP mixture components used in the kernel function
-        n_inits (int): Number of times to initalize the kernel function (returns the best model)
-        hidden_sizes (list): Number of hidden units in each MLP layer. Length of the list determines the number of layers.
+        X_train (np.ndarray): (n, d); Input training set points.
+        Y_train (np.ndarray): (n, 1); Training set labels.
+        inducing_variable (np.ndarray): (m, d); Initial inducing points. These are passed
+                                        directly to the SGPR model.
+        Q (int): The number of MLP mixture components for the Neural Spectral Kernel.
+        n_inits (int): Number of times to randomly initialize the kernel's MLPs and
+                       compute the initial ELBO. The model with the highest ELBO
+                       among these initializations is returned. Defaults to 1.
+        hidden_sizes (Optional[List[int]]): List of integers specifying the number of hidden
+                                            units in each MLP layer. If None, [32, 32] is used.
+    Returns:
+        SGPR: The SGPR model instance initialized with the Neural Spectral Kernel
+              that yielded the best initial ELBO.
+    Usage:
+        ```python
+        import numpy as np
+        import gpflow
+        from sgptools.kernels.neural_kernel import init_neural_kernel
+        from sgptools.utils.misc import get_inducing_pts # For initial inducing points
+        # Dummy data
+        X_train_data = np.random.rand(100, 2).astype(np.float32)
+        Y_train_data = (np.sin(X_train_data[:, 0]) + np.cos(X_train_data[:, 1]))[:, None].astype(np.float32)
+        # Initial inducing points (e.g., subset of training data or k-means centers)
+        initial_inducing_points = get_inducing_pts(X_train_data, num_inducing=20)
+        # Initialize the SGPR model with Neural Spectral Kernel
+        # Try 3 random initializations for the MLPs.
+        model_ns_kernel = init_neural_kernel(
+            X_train=X_train_data,
+            Y_train=Y_train_data,
+            inducing_variable=initial_inducing_points,
+            Q=5,              # 5 mixture components
+            n_inits=3,        # 3 initializations
+            hidden_sizes=[16, 16] # Custom hidden layer sizes
+        )
+        # You would typically optimize this model further using optimize_model:
+        # from sgptools.utils.gpflow import optimize_model
+        # optimize_model(model_ns_kernel)
+        ```
     """
-    x, y = data_input_to_tensor((x, y))
-    print('Initializing neural spectral kernel...')
-    best_loglik = -np.inf
-    best_m = None
-    N, input_dim = x.shape
-    for k in range(n_inits):
-        # gpflow.reset_default_graph_and_session()
-        k = NeuralSpectralKernel(input_dim=input_dim, Q=Q,
-                                 hidden_sizes=hidden_sizes)
-        model = SGPR((x, y), inducing_variable=inducing_variable,
-                     kernel=k)
+    # Convert NumPy arrays to TensorFlow tensors
+    X_train_tf, Y_train_tf = data_input_to_tensor((X_train, Y_train))
+    best_loglik = -np.inf  # Track the best ELBO found
+    best_m: Optional[SGPR] = None  # Store the best model
+    N, input_dim = X_train_tf.shape  # Get number of data points and input dimensionality
+    for k_init_idx in range(n_inits):
+        # Create a new NeuralSpectralKernel instance for each initialization
+        current_kernel = NeuralSpectralKernel(input_dim=input_dim,
+                                              Q=Q,
+                                              hidden_sizes=hidden_sizes)
+        # Create an SGPR model with the current kernel initialization
+        model = SGPR(data=(X_train_tf, Y_train_tf),
+                     inducing_variable=inducing_variable,
+                     kernel=current_kernel)
+        # Compute the initial ELBO (Evidence Lower Bound)
         loglik = model.elbo()
+        # Check if the current initialization is better than previous ones
         if loglik > best_loglik:
             best_loglik = loglik
+            # Deepcopy the model to save its state, as it will be deleted/overwritten in next iteration
+            # This requires gpflow.utilities.traversal.deepcopy or similar for GPflow models
+            # For simplicity, we directly assign here, assuming shallow copy is sufficient
+            # or that the user will optimize it later. For robust best model saving, a deepcopy is safer.
             best_m = model
+        # Explicitly delete the model and run garbage collection to free memory
+        # (important if n_inits is large and models are complex)
         del model
         gc.collect()
-    print('Best init: %f' % best_loglik)
-    return best_m
+    return best_m

sgptools/kernels/neural_network.py CHANGED Viewed

@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Multi Layer Perceptron Model
 """
@@ -20,39 +19,139 @@ import tensorflow as tf
 import gpflow
 from gpflow.config import default_float
 float_type = default_float()
-def xavier(dim_in, dim_out):
-    return np.random.randn(dim_in, dim_out)*(2./(dim_in+dim_out))**0.5
+from typing import List, Union, Callable
-class NN(gpflow.base.Module):
-    """Multi Layer Perceptron Model that is compatible with GPFlow
+def xavier(dim_in: int, dim_out: int) -> np.ndarray:
+    """
+    Initializes weights using the Xavier (Glorot) uniform initialization method.
+    This method aims to keep the variance of activations consistent across layers,
+    helping to prevent vanishing/exploding gradients.
+    Formula: $W \sim U(-\sqrt{6/(dim_{in} + dim_{out})}, \sqrt{6/(dim_{in} + dim_{out})})$
     Args:
-        dims (List): List of each layer's size, needs input layer dimensions as well
-        activation_fn (str): Activation function for each layer
-        output_activation_fn (str): Activation function for the last layer
+        dim_in (int): The number of input units to the layer.
+        dim_out (int): The number of output units from the layer.
+    Returns:
+        np.ndarray: A NumPy array of shape (dim_in, dim_out) containing
+                    the initialized weights.
+    """
+    # Calculate the fan-in + fan-out for the scaling factor
+    scale_factor = (2.0 / (dim_in + dim_out))**0.5
+    # Generate random numbers from a normal (Gaussian) distribution
+    # This is often used as an approximation for Xavier uniform in practice
+    # or sometimes Xavier normal is explicitly implemented this way.
+    return np.random.randn(dim_in, dim_out) * scale_factor
+class NN(gpflow.base.Module):
+    """
+    A Multi-Layer Perceptron (MLP) model that is compatible with GPFlow,
+    allowing its parameters (weights and biases) to be optimized as part of
+    a GPflow model (e.g., within a custom kernel).
+    The network consists of multiple fully connected (dense) layers with
+    specified activation functions.
+    Attributes:
+        dims (List[int]): List of layer sizes, including input and output dimensions.
+        activation_fn (Callable): Activation function for hidden layers.
+        output_activation_fn (Callable): Activation function for the output layer.
+        _weights (List[tf.Variable]): List of TensorFlow Variable for weights of each layer.
+        _biases (List[tf.Variable]): List of TensorFlow Variable for biases of each layer.
     """
-    def __init__(self, dims,
-                 activation_fn='selu',
-                 output_activation_fn='softmax'):
+    def __init__(self,
+                 dims: List[int],
+                 activation_fn: Union[str, Callable] = 'selu',
+                 output_activation_fn: Union[str, Callable] = 'softmax'):
+        """
+        Initializes the Multi-Layer Perceptron (MLP).
+        Args:
+            dims (List[int]): A list of integers specifying the size of each layer.
+                              The first element is the input dimension, the last is
+                              the output dimension, and intermediate elements are
+                              hidden layer sizes.
+                              Example: `[input_dim, hidden1_dim, hidden2_dim, output_dim]`
+            activation_fn (Union[str, Callable]): The activation function to use for hidden layers.
+                                                  Can be a string (e.g., 'relu', 'tanh', 'selu')
+                                                  or a callable TensorFlow activation function.
+                                                  Defaults to 'selu'.
+            output_activation_fn (Union[str, Callable]): The activation function to use for the output layer.
+                                                        Can be a string (e.g., 'softmax', 'sigmoid', 'softplus')
+                                                        or a callable TensorFlow activation function.
+                                                        Defaults to 'softplus'.
+        Usage:
+            ```python
+            from sgptools.kernels.neural_network import NN
+            import tensorflow as tf
+            import numpy as np
+            # Example: A simple MLP with one hidden layer
+            mlp = NN(dims=[2, 10, 1], activation_fn='tanh', output_activation_fn='sigmoid')
+            # Input data
+            input_data = tf.constant(np.random.rand(5, 2), dtype=tf.float32)
+            # Pass input through the network
+            output = mlp(input_data)
+            ```
+        """
         super().__init__()
         self.dims = dims
-        self.activation_fn = tf.keras.activations.get(activation_fn)
-        self.output_activation_fn = tf.keras.activations.get(output_activation_fn)
+        # Get TensorFlow activation functions from strings or use provided callables
+        self.activation_fn = tf.keras.activations.get(
+            activation_fn) if isinstance(activation_fn, str) else activation_fn
+        self.output_activation_fn = tf.keras.activations.get(
+            output_activation_fn) if isinstance(output_activation_fn,
+                                                str) else output_activation_fn
+        self._weights: List[tf.Variable] = []
+        self._biases: List[tf.Variable] = []
+        # Create weights and biases for each layer
         for i, (dim_in, dim_out) in enumerate(zip(dims[:-1], dims[1:])):
-            setattr(self, 'W_{}'.format(i), tf.Variable(xavier(dim_in, dim_out),
-                                                        dtype=float_type))
-            setattr(self, 'b_{}'.format(i), tf.Variable(np.zeros(dim_out),
-                                                        dtype=float_type))
-    def __call__(self, X):
-        if X is not None:
-            for i in range(len(self.dims) - 2):
-                W = getattr(self, 'W_{}'.format(i))
-                b = getattr(self, 'b_{}'.format(i))
-                X = self.activation_fn(tf.matmul(X, W) + b)
-            W = getattr(self, 'W_{}'.format(i+1))
-            b = getattr(self, 'b_{}'.format(i+1))
-            X = self.output_activation_fn(tf.matmul(X, W) + b)
-            return X
+            # Use Xavier initialization for weights
+            weight_init = xavier(dim_in, dim_out)
+            self._weights.append(
+                tf.Variable(weight_init, dtype=float_type, name=f'W_{i}'))
+            # Initialize biases to zeros
+            bias_init = np.zeros(dim_out, dtype=float_type)
+            self._biases.append(
+                tf.Variable(bias_init, dtype=float_type, name=f'b_{i}'))
+    def __call__(self, X: tf.Tensor) -> tf.Tensor:
+        """
+        Performs a forward pass through the MLP.
+        Args:
+            X (tf.Tensor): (N, D_in); The input tensor to the MLP. `N` is the batch size,
+                           `D_in` is the input dimension of the network.
+        Returns:
+            tf.Tensor: (N, D_out); The output tensor from the MLP. `D_out` is the output
+                       dimension of the network.
+        """
+        # Process through hidden layers
+        # The loop runs for (num_layers - 1) iterations, covering all hidden layers
+        # and the input-to-first-hidden layer transition.
+        for i in range(len(self.dims) -
+                       2):  # Iterate up to second to last layer
+            W = self._weights[i]
+            b = self._biases[i]
+            X = self.activation_fn(tf.matmul(X, W) + b)
+        # Process through the last layer (output layer)
+        W_last = self._weights[-1]  # Weights for the last layer
+        b_last = self._biases[-1]  # Biases for the last layer
+        X = self.output_activation_fn(tf.matmul(X, W_last) + b_last)
+        return X

sgptools 1.2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

sgptools 1.2.0py3-none-any.whl → 2.0.0py3-none-any.whl