PyPI - gpjax - Versions diffs - 0.11.2__py3-none-any.whl → 0.12.2__py3-none-any.whl - Mend

gpjax 0.11.2py3-none-any.whl → 0.12.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

gpjax/__init__.py +1 -4
gpjax/distributions.py +16 -56
gpjax/fit.py +11 -6
gpjax/gps.py +61 -73
gpjax/kernels/approximations/rff.py +2 -5
gpjax/kernels/base.py +2 -5
gpjax/kernels/computations/base.py +7 -7
gpjax/kernels/computations/basis_functions.py +7 -6
gpjax/kernels/computations/constant_diagonal.py +10 -12
gpjax/kernels/computations/diagonal.py +6 -6
gpjax/kernels/computations/eigen.py +1 -1
gpjax/kernels/non_euclidean/graph.py +10 -11
gpjax/kernels/nonstationary/arccosine.py +13 -21
gpjax/kernels/nonstationary/polynomial.py +7 -8
gpjax/kernels/stationary/periodic.py +3 -6
gpjax/kernels/stationary/powered_exponential.py +3 -8
gpjax/kernels/stationary/rational_quadratic.py +5 -8
gpjax/likelihoods.py +11 -14
gpjax/linalg/__init__.py +37 -0
gpjax/linalg/operations.py +237 -0
gpjax/linalg/operators.py +411 -0
gpjax/linalg/utils.py +65 -0
gpjax/mean_functions.py +8 -7
gpjax/objectives.py +22 -21
gpjax/parameters.py +11 -23
gpjax/variational_families.py +93 -67
{gpjax-0.11.2.dist-info → gpjax-0.12.2.dist-info}/METADATA +50 -18
gpjax-0.12.2.dist-info/RECORD +52 -0
gpjax/lower_cholesky.py +0 -69
gpjax-0.11.2.dist-info/RECORD +0 -49
{gpjax-0.11.2.dist-info → gpjax-0.12.2.dist-info}/WHEEL +0 -0
{gpjax-0.11.2.dist-info → gpjax-0.12.2.dist-info}/licenses/LICENSE.txt +0 -0

gpjax/__init__.py CHANGED Viewed

@@ -40,10 +40,9 @@ __license__ = "MIT"
 __description__ = "Gaussian processes in JAX and Flax"
 __url__ = "https://github.com/JaxGaussianProcesses/GPJax"
 __contributors__ = "https://github.com/JaxGaussianProcesses/GPJax/graphs/contributors"
-__version__ = "0.11.2"
+__version__ = "0.12.2"
 __all__ = [
-    "base",
     "gps",
     "integrators",
     "kernels",
@@ -55,8 +54,6 @@ __all__ = [
     "Dataset",
     "cite",
     "fit",
-    "Module",
-    "param_field",
     "fit_lbfgs",
     "fit_scipy",
 ]

gpjax/distributions.py CHANGED Viewed

@@ -17,11 +17,6 @@
 from beartype.typing import (
     Optional,
 )
-import cola
-from cola.linalg.decompositions import Cholesky
-from cola.ops import (
-    LinearOperator,
-)
 from jax import vmap
 import jax.numpy as jnp
 import jax.random as jr
@@ -30,7 +25,14 @@ from numpyro.distributions import constraints
 from numpyro.distributions.distribution import Distribution
 from numpyro.distributions.util import is_prng_key
-from gpjax.lower_cholesky import lower_cholesky
+from gpjax.linalg.operations import (
+    diag,
+    logdet,
+    lower_cholesky,
+    solve,
+)
+from gpjax.linalg.operators import LinearOperator
+from gpjax.linalg.utils import psd
 from gpjax.typing import (
     Array,
     ScalarFloat,
@@ -47,7 +49,7 @@ class GaussianDistribution(Distribution):
         validate_args=None,
     ):
         self.loc = loc
-        self.scale = cola.PSD(scale)
+        self.scale = psd(scale)
         batch_shape = ()
         event_shape = jnp.shape(self.loc)
         super().__init__(batch_shape, event_shape, validate_args=validate_args)
@@ -76,13 +78,12 @@ class GaussianDistribution(Distribution):
     @property
     def variance(self) -> Float[Array, " N"]:
         r"""Calculates the variance."""
-        return cola.diag(self.scale)
+        return diag(self.scale)
     def entropy(self) -> ScalarFloat:
         r"""Calculates the entropy of the distribution."""
         return 0.5 * (
-            self.event_shape[0] * (1.0 + jnp.log(2.0 * jnp.pi))
-            + cola.logdet(self.scale, Cholesky(), Cholesky())
+            self.event_shape[0] * (1.0 + jnp.log(2.0 * jnp.pi)) + logdet(self.scale)
         )
     def median(self) -> Float[Array, " N"]:
@@ -104,7 +105,7 @@ class GaussianDistribution(Distribution):
     def stddev(self) -> Float[Array, " N"]:
         r"""Calculates the standard deviation."""
-        return jnp.sqrt(cola.diag(self.scale))
+        return jnp.sqrt(diag(self.scale))
     #     @property
     #     def event_shape(self) -> Tuple:
@@ -129,9 +130,7 @@ class GaussianDistribution(Distribution):
         # compute the pdf, -1/2[ n log(2π) + log|Σ| + (y - µ)ᵀΣ⁻¹(y - µ) ]
         return -0.5 * (
-            n * jnp.log(2.0 * jnp.pi)
-            + cola.logdet(sigma, Cholesky(), Cholesky())
-            + diff.T @ cola.solve(sigma, diff, Cholesky())
+            n * jnp.log(2.0 * jnp.pi) + logdet(sigma) + diff.T @ solve(sigma, diff)
         )
     #     def _sample_n(self, key: KeyArray, n: int) -> Float[Array, "n N"]:
@@ -219,53 +218,14 @@ def _kl_divergence(q: GaussianDistribution, p: GaussianDistribution) -> ScalarFl
     # trace term, tr[Σp⁻¹ Σq] = tr[(LpLpᵀ)⁻¹(LqLqᵀ)] = tr[(Lp⁻¹Lq)(Lp⁻¹Lq)ᵀ] = (fr[LqLp⁻¹])²
     trace = _frobenius_norm_squared(
-        cola.solve(sqrt_p, sqrt_q.to_dense(), Cholesky())
+        solve(sqrt_p, sqrt_q.to_dense())
     )  # TODO: Not most efficient, given the `to_dense()` call (e.g., consider diagonal p and q). Need to abstract solving linear operator against another linear operator.
     # Mahalanobis term, (μp - μq)ᵀ Σp⁻¹ (μp - μq) = tr [(μp - μq)ᵀ [LpLpᵀ]⁻¹ (μp - μq)] = (fr[Lp⁻¹(μp - μq)])²
-    mahalanobis = jnp.sum(jnp.square(cola.solve(sqrt_p, diff, Cholesky())))
+    mahalanobis = jnp.sum(jnp.square(solve(sqrt_p, diff)))
     # KL[q(x)||p(x)] = [ [(μp - μq)ᵀ Σp⁻¹ (μp - μq)] - n - log|Σq| + log|Σp| + tr[Σp⁻¹ Σq] ] / 2
-    return (
-        mahalanobis
-        - n_dim
-        - cola.logdet(sigma_q, Cholesky(), Cholesky())
-        + cola.logdet(sigma_p, Cholesky(), Cholesky())
-        + trace
-    ) / 2.0
-# def _check_loc_scale(loc: Optional[Any], scale: Optional[Any]) -> None:
-#     r"""Checks that the inputs are correct."""
-#     if loc is None and scale is None:
-#         raise ValueError("At least one of `loc` or `scale` must be specified.")
-#     if loc is not None and loc.ndim < 1:
-#         raise ValueError("The parameter `loc` must have at least one dimension.")
-#     if scale is not None and len(scale.shape) < 2:  # scale.ndim < 2:
-#         raise ValueError(
-#             "The `scale` must have at least two dimensions, but "
-#             f"`scale.shape = {scale.shape}`."
-#         )
-#     if scale is not None and not isinstance(scale, LinearOperator):
-#         raise ValueError(
-#             f"The `scale` must be a CoLA LinearOperator but got {type(scale)}"
-#         )
-#     if scale is not None and (scale.shape[-1] != scale.shape[-2]):
-#         raise ValueError(
-#             f"The `scale` must be a square matrix, but `scale.shape = {scale.shape}`."
-#         )
-#     if loc is not None:
-#         num_dims = loc.shape[-1]
-#         if scale is not None and (scale.shape[-1] != num_dims):
-#             raise ValueError(
-#                 f"Shapes are not compatible: `loc.shape = {loc.shape}` and "
-#                 f"`scale.shape = {scale.shape}`."
-#             )
+    return (mahalanobis - n_dim - logdet(sigma_q) + logdet(sigma_p) + trace) / 2.0
 __all__ = [

gpjax/fit.py CHANGED Viewed

@@ -48,6 +48,7 @@ def fit(  # noqa: PLR0913
     train_data: Dataset,
     optim: ox.GradientTransformation,
     params_bijection: tp.Union[dict[Parameter, Transform], None] = DEFAULT_BIJECTION,
+    trainable: nnx.filterlib.Filter = Parameter,
     key: KeyArray = jr.PRNGKey(42),
     num_iters: int = 100,
     batch_size: int = -1,
@@ -65,7 +66,7 @@ def fit(  # noqa: PLR0913
         >>> import jax.random as jr
         >>> import optax as ox
         >>> import gpjax as gpx
-        >>> from gpjax.parameters import PositiveReal, Static
+        >>> from gpjax.parameters import PositiveReal
         >>>
         >>> # (1) Create a dataset:
         >>> X = jnp.linspace(0.0, 10.0, 100)[:, None]
@@ -75,10 +76,10 @@ def fit(  # noqa: PLR0913
         >>> class LinearModel(nnx.Module):
         >>>     def __init__(self, weight: float, bias: float):
         >>>         self.weight = PositiveReal(weight)
-        >>>         self.bias = Static(bias)
+        >>>         self.bias = bias
         >>>
         >>>     def __call__(self, x):
-        >>>         return self.weight.value * x + self.bias.value
+        >>>         return self.weight.value * x + self.bias
         >>>
         >>> model = LinearModel(weight=1.0, bias=1.0)
         >>>
@@ -100,6 +101,8 @@ def fit(  # noqa: PLR0913
         train_data (Dataset): The training data to be used for the optimisation.
         optim (GradientTransformation): The Optax optimiser that is to be used for
             learning a parameter set.
+        trainable (nnx.filterlib.Filter): Filter to determine which parameters are trainable.
+            Defaults to nnx.Param (all Parameter instances).
         num_iters (int): The number of optimisation steps to run. Defaults
             to 100.
         batch_size (int): The size of the mini-batch to use. Defaults to -1
@@ -127,7 +130,7 @@ def fit(  # noqa: PLR0913
         _check_verbose(verbose)
     # Model state filtering
-    graphdef, params, *static_state = nnx.split(model, Parameter, ...)
+    graphdef, params, *static_state = nnx.split(model, trainable, ...)
     # Parameters bijection to unconstrained space
     if params_bijection is not None:
@@ -182,6 +185,7 @@ def fit_scipy(  # noqa: PLR0913
     model: Model,
     objective: Objective,
     train_data: Dataset,
+    trainable: nnx.filterlib.Filter = Parameter,
     max_iters: int = 500,
     verbose: bool = True,
     safe: bool = True,
@@ -210,7 +214,7 @@ def fit_scipy(  # noqa: PLR0913
         _check_verbose(verbose)
     # Model state filtering
-    graphdef, params, *static_state = nnx.split(model, Parameter, ...)
+    graphdef, params, *static_state = nnx.split(model, trainable, ...)
     # Parameters bijection to unconstrained space
     params = transform(params, DEFAULT_BIJECTION, inverse=True)
@@ -258,6 +262,7 @@ def fit_lbfgs(
     objective: Objective,
     train_data: Dataset,
     params_bijection: tp.Union[dict[Parameter, Transform], None] = DEFAULT_BIJECTION,
+    trainable: nnx.filterlib.Filter = Parameter,
     max_iters: int = 100,
     safe: bool = True,
     max_linesearch_steps: int = 32,
@@ -290,7 +295,7 @@ def fit_lbfgs(
         _check_num_iters(max_iters)
     # Model state filtering
-    graphdef, params, *static_state = nnx.split(model, Parameter, ...)
+    graphdef, params, *static_state = nnx.split(model, trainable, ...)
     # Parameters bijection to unconstrained space
     if params_bijection is not None:

gpjax/gps.py CHANGED Viewed

@@ -16,14 +16,9 @@
 from abc import abstractmethod
 import beartype.typing as tp
-from cola.annotations import PSD
-from cola.linalg.algorithm_base import Algorithm
-from cola.linalg.decompositions.decompositions import Cholesky
-from cola.linalg.inverse.inv import solve
-from cola.ops.operators import I_like
-from flax import nnx
 import jax.numpy as jnp
 import jax.random as jr
+from flax import nnx
 from jaxtyping import (
     Float,
     Num,
@@ -38,12 +33,17 @@ from gpjax.likelihoods import (
     Gaussian,
     NonGaussian,
 )
-from gpjax.lower_cholesky import lower_cholesky
+from gpjax.linalg import (
+    Dense,
+    psd,
+    solve,
+)
+from gpjax.linalg.operations import lower_cholesky
+from gpjax.linalg.utils import add_jitter
 from gpjax.mean_functions import AbstractMeanFunction
 from gpjax.parameters import (
     Parameter,
     Real,
-    Static,
 )
 from gpjax.typing import (
     Array,
@@ -77,7 +77,7 @@ class AbstractPrior(nnx.Module, tp.Generic[M, K]):
         self.mean_function = mean_function
         self.jitter = jitter
-    def __call__(self, *args: tp.Any, **kwargs: tp.Any) -> GaussianDistribution:
+    def __call__(self, test_inputs: Num[Array, "N D"]) -> GaussianDistribution:
         r"""Evaluate the Gaussian process at the given points.
         The output of this function is a
@@ -90,17 +90,16 @@ class AbstractPrior(nnx.Module, tp.Generic[M, K]):
         `__call__` method and should instead define a `predict` method.
         Args:
-            *args (Any): The arguments to pass to the GP's `predict` method.
-            **kwargs (Any): The keyword arguments to pass to the GP's `predict` method.
+            test_inputs: Input locations where the GP should be evaluated.
         Returns:
             GaussianDistribution: A multivariate normal random variable representation
                 of the Gaussian process.
         """
-        return self.predict(*args, **kwargs)
+        return self.predict(test_inputs)
     @abstractmethod
-    def predict(self, *args: tp.Any, **kwargs: tp.Any) -> GaussianDistribution:
+    def predict(self, test_inputs: Num[Array, "N D"]) -> GaussianDistribution:
         r"""Evaluate the predictive distribution.
         Compute the latent function's multivariate normal distribution for a
@@ -108,8 +107,7 @@ class AbstractPrior(nnx.Module, tp.Generic[M, K]):
         this method must be implemented.
         Args:
-            *args (Any): Arguments to the predict method.
-            **kwargs (Any): Keyword arguments to the predict method.
+            test_inputs: Input locations where the GP should be evaluated.
         Returns:
             GaussianDistribution: A multivariate normal random variable representation
@@ -248,13 +246,12 @@ class Prior(AbstractPrior[M, K]):
             GaussianDistribution: A multivariate normal random variable representation
                 of the Gaussian process.
         """
-        x = test_inputs
-        mx = self.mean_function(x)
-        Kxx = self.kernel.gram(x)
-        Kxx += I_like(Kxx) * self.jitter
-        Kxx = PSD(Kxx)
+        mean_at_test = self.mean_function(test_inputs)
+        Kxx = self.kernel.gram(test_inputs)
+        Kxx_dense = add_jitter(Kxx.to_dense(), self.jitter)
+        Kxx = psd(Dense(Kxx_dense))
-        return GaussianDistribution(jnp.atleast_1d(mx.squeeze()), Kxx)
+        return GaussianDistribution(jnp.atleast_1d(mean_at_test.squeeze()), Kxx)
     def sample_approx(
         self,
@@ -315,15 +312,13 @@ class Prior(AbstractPrior[M, K]):
         if (not isinstance(num_samples, int)) or num_samples <= 0:
             raise ValueError("num_samples must be a positive integer")
-        # sample fourier features
         fourier_feature_fn = _build_fourier_features_fn(self, num_features, key)
-        # sample fourier weights
-        feature_weights = jr.normal(key, [num_samples, 2 * num_features])  # [B, L]
+        feature_weights = jr.normal(key, [num_samples, 2 * num_features])
         def sample_fn(test_inputs: Float[Array, "N D"]) -> Float[Array, "N B"]:
-            feature_evals = fourier_feature_fn(test_inputs)  # [N, L]
-            evaluated_sample = jnp.inner(feature_evals, feature_weights)  # [N, B]
+            feature_evals = fourier_feature_fn(test_inputs)
+            evaluated_sample = jnp.inner(feature_evals, feature_weights)
             return self.mean_function(test_inputs) + evaluated_sample
         return sample_fn
@@ -360,7 +355,9 @@ class AbstractPosterior(nnx.Module, tp.Generic[P, L]):
         self.likelihood = likelihood
         self.jitter = jitter
-    def __call__(self, *args: tp.Any, **kwargs: tp.Any) -> GaussianDistribution:
+    def __call__(
+        self, test_inputs: Num[Array, "N D"], train_data: Dataset
+    ) -> GaussianDistribution:
         r"""Evaluate the Gaussian process posterior at the given points.
         The output of this function is a
@@ -369,28 +366,30 @@ class AbstractPosterior(nnx.Module, tp.Generic[P, L]):
         evaluated and the distribution can be sampled.
         Under the hood, `__call__` is calling the objects `predict` method. For this
-        reasons, classes inheriting the `AbstractPrior` class, should not overwrite the
+        reasons, classes inheriting the `AbstractPosterior` class, should not overwrite the
         `__call__` method and should instead define a `predict` method.
         Args:
-            *args (Any): The arguments to pass to the GP's `predict` method.
-            **kwargs (Any): The keyword arguments to pass to the GP's `predict` method.
+            test_inputs: Input locations where the GP should be evaluated.
+            train_data: Training dataset to condition on.
         Returns:
             GaussianDistribution: A multivariate normal random variable representation
                 of the Gaussian process.
         """
-        return self.predict(*args, **kwargs)
+        return self.predict(test_inputs, train_data)
     @abstractmethod
-    def predict(self, *args: tp.Any, **kwargs: tp.Any) -> GaussianDistribution:
+    def predict(
+        self, test_inputs: Num[Array, "N D"], train_data: Dataset
+    ) -> GaussianDistribution:
         r"""Compute the latent function's multivariate normal distribution for a
-        given set of parameters. For any class inheriting the `AbstractPrior` class,
+        given set of parameters. For any class inheriting the `AbstractPosterior` class,
         this method must be implemented.
         Args:
-            *args (Any): Arguments to the predict method.
-            **kwargs (Any): Keyword arguments to the predict method.
+            test_inputs: Input locations where the GP should be evaluated.
+            train_data: Training dataset to condition on.
         Returns:
             GaussianDistribution: A multivariate normal random variable representation
@@ -504,24 +503,25 @@ class ConjugatePosterior(AbstractPosterior[P, GL]):
         # Precompute Gram matrix, Kxx, at training inputs, x
         Kxx = self.prior.kernel.gram(x)
-        Kxx += I_like(Kxx) * self.jitter
+        Kxx_dense = add_jitter(Kxx.to_dense(), self.jitter)
+        Kxx = Dense(Kxx_dense)
-        # Σ = Kxx + Io²
-        Sigma = Kxx + I_like(Kxx) * obs_noise
-        Sigma = PSD(Sigma)
+        Sigma_dense = Kxx.to_dense() + jnp.eye(Kxx.shape[0]) * obs_noise
+        Sigma = psd(Dense(Sigma_dense))
+        L_sigma = lower_cholesky(Sigma)
         mean_t = self.prior.mean_function(t)
         Ktt = self.prior.kernel.gram(t)
         Kxt = self.prior.kernel.cross_covariance(x, t)
-        Sigma_inv_Kxt = solve(Sigma, Kxt, Cholesky())
-        # μt  +  Ktx (Kxx + Io²)⁻¹ (y  -  μx)
-        mean = mean_t + jnp.matmul(Sigma_inv_Kxt.T, y - mx)
+        L_inv_Kxt = solve(L_sigma, Kxt)
+        L_inv_y_diff = solve(L_sigma, y - mx)
+        mean = mean_t + jnp.matmul(L_inv_Kxt.T, L_inv_y_diff)
-        # Ktt  -  Ktx (Kxx + Io²)⁻¹ Kxt, TODO: Take advantage of covariance structure to compute Schur complement more efficiently.
-        covariance = Ktt - jnp.matmul(Kxt.T, Sigma_inv_Kxt)
-        covariance += I_like(covariance) * self.prior.jitter
-        covariance = PSD(covariance)
+        covariance = Ktt.to_dense() - jnp.matmul(L_inv_Kxt.T, L_inv_Kxt)
+        covariance = add_jitter(covariance, self.prior.jitter)
+        covariance = psd(Dense(covariance))
         return GaussianDistribution(jnp.atleast_1d(mean.squeeze()), covariance)
@@ -531,7 +531,6 @@ class ConjugatePosterior(AbstractPosterior[P, GL]):
         train_data: Dataset,
         key: KeyArray,
         num_features: int | None = 100,
-        solver_algorithm: tp.Optional[Algorithm] = Cholesky(),
     ) -> FunctionalSample:
         r"""Draw approximate samples from the Gaussian process posterior.
@@ -565,11 +564,6 @@ class ConjugatePosterior(AbstractPosterior[P, GL]):
             key (KeyArray): The random seed used for the sample(s).
             num_features (int): The number of features used when approximating the
                 kernel.
-            solver_algorithm (Optional[Algorithm], optional): The algorithm to use for the solves of
-                the inverse of the covariance matrix. See the
-                [CoLA documentation](https://cola.readthedocs.io/en/latest/package/cola.linalg.html#algorithms)
-                for which solver to pick. For PSD matrices, CoLA currently recommends Cholesky() for small
-                matrices and CG() for larger matrices. Select Auto() to let CoLA decide. Defaults to Cholesky().
         Returns:
             FunctionalSample: A function representing an approximate sample from the Gaussian
@@ -581,31 +575,25 @@ class ConjugatePosterior(AbstractPosterior[P, GL]):
         # sample fourier features
         fourier_feature_fn = _build_fourier_features_fn(self.prior, num_features, key)
-        # sample fourier weights
-        fourier_weights = jr.normal(key, [num_samples, 2 * num_features])  # [B, L]
+        fourier_weights = jr.normal(key, [num_samples, 2 * num_features])
-        # sample weights v for canonical features
-        # v = Σ⁻¹ (y + ε - ɸ⍵) for  Σ = Kxx + Io² and ε ᯈ N(0, o²)
         obs_var = self.likelihood.obs_stddev.value**2
-        Kxx = self.prior.kernel.gram(train_data.X)  #  [N, N]
-        Sigma = Kxx + I_like(Kxx) * (obs_var + self.jitter)  #  [N, N]
-        eps = jnp.sqrt(obs_var) * jr.normal(key, [train_data.n, num_samples])  #  [N, B]
-        y = train_data.y - self.prior.mean_function(train_data.X)  # account for mean
+        Kxx = self.prior.kernel.gram(train_data.X)
+        Sigma = Dense(add_jitter(Kxx.to_dense(), obs_var + self.jitter))
+        eps = jnp.sqrt(obs_var) * jr.normal(key, [train_data.n, num_samples])
+        y = train_data.y - self.prior.mean_function(train_data.X)
         Phi = fourier_feature_fn(train_data.X)
         canonical_weights = solve(
             Sigma,
             y + eps - jnp.inner(Phi, fourier_weights),
-            solver_algorithm,
         )  #  [N, B]
         def sample_fn(test_inputs: Float[Array, "n D"]) -> Float[Array, "n B"]:
-            fourier_features = fourier_feature_fn(test_inputs)  # [n, L]
-            weight_space_contribution = jnp.inner(
-                fourier_features, fourier_weights
-            )  # [n, B]
+            fourier_features = fourier_feature_fn(test_inputs)
+            weight_space_contribution = jnp.inner(fourier_features, fourier_weights)
             canonical_features = self.prior.kernel.cross_covariance(
                 test_inputs, train_data.X
-            )  # [n, N]
+            )
             function_space_contribution = jnp.matmul(
                 canonical_features, canonical_weights
             )
@@ -657,7 +645,7 @@ class NonConjugatePosterior(AbstractPosterior[P, NGL]):
         # TODO: static or intermediate?
         self.latent = latent if isinstance(latent, Parameter) else Real(latent)
-        self.key = Static(key)
+        self.key = key
     def predict(
         self, test_inputs: Num[Array, "N D"], train_data: Dataset
@@ -689,8 +677,8 @@ class NonConjugatePosterior(AbstractPosterior[P, NGL]):
         # Precompute lower triangular of Gram matrix, Lx, at training inputs, x
         Kxx = kernel.gram(x)
-        Kxx += I_like(Kxx) * self.prior.jitter
-        Kxx = PSD(Kxx)
+        Kxx_dense = add_jitter(Kxx.to_dense(), self.prior.jitter)
+        Kxx = psd(Dense(Kxx_dense))
         Lx = lower_cholesky(Kxx)
         # Unpack test inputs
@@ -702,7 +690,7 @@ class NonConjugatePosterior(AbstractPosterior[P, NGL]):
         mean_t = mean_function(t)
         # Lx⁻¹ Kxt
-        Lx_inv_Kxt = solve(Lx, Ktx.T, Cholesky())
+        Lx_inv_Kxt = solve(Lx, Ktx.T)
         # Whitened function values, wx, corresponding to the inputs, x
         wx = self.latent.value
@@ -711,9 +699,9 @@ class NonConjugatePosterior(AbstractPosterior[P, NGL]):
         mean = mean_t + jnp.matmul(Lx_inv_Kxt.T, wx)
         # Ktt - Ktx Kxx⁻¹ Kxt, TODO: Take advantage of covariance structure to compute Schur complement more efficiently.
-        covariance = Ktt - jnp.matmul(Lx_inv_Kxt.T, Lx_inv_Kxt)
-        covariance += I_like(covariance) * self.prior.jitter
-        covariance = PSD(covariance)
+        covariance = Ktt.to_dense() - jnp.matmul(Lx_inv_Kxt.T, Lx_inv_Kxt)
+        covariance = add_jitter(covariance, self.prior.jitter)
+        covariance = psd(Dense(covariance))
         return GaussianDistribution(jnp.atleast_1d(mean.squeeze()), covariance)

gpjax/kernels/approximations/rff.py CHANGED Viewed

@@ -7,7 +7,6 @@ from jaxtyping import Float
 from gpjax.kernels.base import AbstractKernel
 from gpjax.kernels.computations import BasisFunctionComputation
 from gpjax.kernels.stationary.base import StationaryKernel
-from gpjax.parameters import Static
 from gpjax.typing import (
     Array,
     KeyArray,
@@ -66,10 +65,8 @@ class RFF(AbstractKernel):
                     "Please specify the n_dims argument for the base kernel."
                 )
-            self.frequencies = Static(
-                self.base_kernel.spectral_density.sample(
-                    key=key, sample_shape=(self.num_basis_fns, n_dims)
-                )
+            self.frequencies = self.base_kernel.spectral_density.sample(
+                key=key, sample_shape=(self.num_basis_fns, n_dims)
             )
         self.name = f"{self.base_kernel.name} (RFF)"

gpjax/kernels/base.py CHANGED Viewed

@@ -17,7 +17,6 @@ import abc
 import functools as ft
 import beartype.typing as tp
-from cola.ops.operator_base import LinearOperator
 from flax import nnx
 import jax.numpy as jnp
 from jaxtyping import (
@@ -29,10 +28,10 @@ from gpjax.kernels.computations import (
     AbstractKernelComputation,
     DenseKernelComputation,
 )
+from gpjax.linalg import LinearOperator
 from gpjax.parameters import (
     Parameter,
     Real,
-    Static,
 )
 from gpjax.typing import (
     Array,
@@ -221,9 +220,7 @@ class Constant(AbstractKernel):
     def __init__(
         self,
         active_dims: tp.Union[list[int], slice, None] = None,
-        constant: tp.Union[
-            ScalarFloat, Parameter[ScalarFloat], Static[ScalarFloat]
-        ] = jnp.array(0.0),
+        constant: tp.Union[ScalarFloat, Parameter[ScalarFloat]] = jnp.array(0.0),
         compute_engine: AbstractKernelComputation = DenseKernelComputation(),
     ):
         if isinstance(constant, Parameter):

gpjax/kernels/computations/base.py CHANGED Viewed

@@ -16,11 +16,6 @@
 import abc
 import typing as tp
-from cola.annotations import PSD
-from cola.ops.operators import (
-    Dense,
-    Diagonal,
-)
 from jax import vmap
 from jaxtyping import (
     Float,
@@ -28,6 +23,11 @@ from jaxtyping import (
 )
 import gpjax
+from gpjax.linalg import (
+    Dense,
+    Diagonal,
+    psd,
+)
 from gpjax.typing import Array
 K = tp.TypeVar("K", bound="gpjax.kernels.base.AbstractKernel")  # noqa: F821
@@ -69,7 +69,7 @@ class AbstractKernelComputation:
             The Gram covariance of the kernel function as a linear operator.
         """
         Kxx = self.cross_covariance(kernel, x, x)
-        return PSD(Dense(Kxx))
+        return psd(Dense(Kxx))
     @abc.abstractmethod
     def _cross_covariance(
@@ -93,7 +93,7 @@ class AbstractKernelComputation:
         return self._cross_covariance(kernel, x, y)
     def _diagonal(self, kernel: K, inputs: Num[Array, "N D"]) -> Diagonal:
-        return PSD(Diagonal(diag=vmap(lambda x: kernel(x, x))(inputs)))
+        return psd(Diagonal(vmap(lambda x: kernel(x, x))(inputs)))
     def diagonal(self, kernel: K, inputs: Num[Array, "N D"]) -> Diagonal:
         r"""For a given kernel, compute the elementwise diagonal of the

gpjax/kernels/computations/basis_functions.py CHANGED Viewed

@@ -1,18 +1,19 @@
 import typing as tp
-from cola.annotations import PSD
-from cola.ops.operators import Dense
 import jax.numpy as jnp
 from jaxtyping import Float
 import gpjax
 from gpjax.kernels.computations.base import AbstractKernelComputation
+from gpjax.linalg import (
+    Dense,
+    Diagonal,
+    psd,
+)
 from gpjax.typing import Array
 K = tp.TypeVar("K", bound="gpjax.kernels.approximations.RFF")  # noqa: F821
-from cola.ops import Diagonal
 # TODO: Use low rank linear operator!
@@ -28,7 +29,7 @@ class BasisFunctionComputation(AbstractKernelComputation):
     def _gram(self, kernel: K, inputs: Float[Array, "N D"]) -> Dense:
         z1 = self.compute_features(kernel, inputs)
-        return PSD(Dense(self.scaling(kernel) * jnp.matmul(z1, z1.T)))
+        return psd(Dense(self.scaling(kernel) * jnp.matmul(z1, z1.T)))
     def diagonal(self, kernel: K, inputs: Float[Array, "N D"]) -> Diagonal:
         r"""For a given kernel, compute the elementwise diagonal of the
@@ -56,7 +57,7 @@ class BasisFunctionComputation(AbstractKernelComputation):
         Returns:
             A matrix of shape $N \times L$ representing the random fourier features where $L = 2M$.
         """
-        frequencies = kernel.frequencies.value
+        frequencies = kernel.frequencies
         scaling_factor = kernel.base_kernel.lengthscale.value
         z = jnp.matmul(x, (frequencies / scaling_factor).T)
         z = jnp.concatenate([jnp.cos(z), jnp.sin(z)], axis=-1)

gpjax 0.11.2__py3-none-any.whl → 0.12.2__py3-none-any.whl

gpjax 0.11.2py3-none-any.whl → 0.12.2py3-none-any.whl