PyPI - gpjax - Versions diffs - 0.13.3__tar.gz → 0.13.4__tar.gz - Mend

gpjax 0.13.3tar.gz → 0.13.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (165) hide show

{gpjax-0.13.3 → gpjax-0.13.4}/.gitignore RENAMED Viewed

@@ -153,3 +153,8 @@ node_modules/
 docs/api
 docs/_examples
+local_libs/
+local_papers/
+GEMINI.md
+AGENTS.md
+plans/

{gpjax-0.13.3 → gpjax-0.13.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: gpjax
-Version: 0.13.3
+Version: 0.13.4
 Summary: Gaussian processes in JAX.
 Project-URL: Documentation, https://docs.jaxgaussianprocesses.com/
 Project-URL: Issues, https://github.com/thomaspinder/GPJax/issues
@@ -141,7 +141,7 @@ GPJax into the package it is today.
 > - [**Laplace Approximation**](https://docs.jaxgaussianprocesses.com/_examples/classification/#laplace-approximation)
 > - [**Inference on Non-Euclidean Spaces**](https://docs.jaxgaussianprocesses.com/_examples/constructing_new_kernels/#custom-kernel)
 > - [**Inference on Graphs**](https://docs.jaxgaussianprocesses.com/_examples/graph_kernels/)
-> - [**Pathwise Sampling**](https://docs.jaxgaussianprocesses.com/_examples/spatial/)
+> - [**Heteroscedastic Inference**](https://docs.jaxgaussianprocesses.com/_examples/heteroscedastic_inference/)
 > - [**Learning Gaussian Process Barycentres**](https://docs.jaxgaussianprocesses.com/_examples/barycentres/)
 > - [**Deep Kernel Regression**](https://docs.jaxgaussianprocesses.com/_examples/deep_kernels/)
 > - [**Poisson Regression**](https://docs.jaxgaussianprocesses.com/_examples/poisson/)

{gpjax-0.13.3 → gpjax-0.13.4}/README.md RENAMED Viewed

@@ -70,7 +70,7 @@ GPJax into the package it is today.
 > - [**Laplace Approximation**](https://docs.jaxgaussianprocesses.com/_examples/classification/#laplace-approximation)
 > - [**Inference on Non-Euclidean Spaces**](https://docs.jaxgaussianprocesses.com/_examples/constructing_new_kernels/#custom-kernel)
 > - [**Inference on Graphs**](https://docs.jaxgaussianprocesses.com/_examples/graph_kernels/)
-> - [**Pathwise Sampling**](https://docs.jaxgaussianprocesses.com/_examples/spatial/)
+> - [**Heteroscedastic Inference**](https://docs.jaxgaussianprocesses.com/_examples/heteroscedastic_inference/)
 > - [**Learning Gaussian Process Barycentres**](https://docs.jaxgaussianprocesses.com/_examples/barycentres/)
 > - [**Deep Kernel Regression**](https://docs.jaxgaussianprocesses.com/_examples/deep_kernels/)
 > - [**Poisson Regression**](https://docs.jaxgaussianprocesses.com/_examples/poisson/)

gpjax-0.13.4/examples/heteroscedastic_inference.py ADDED Viewed

@@ -0,0 +1,389 @@
+# -*- coding: utf-8 -*-
+# ---
+# jupyter:
+#   jupytext:
+#     cell_metadata_filter: -all
+#     custom_cell_magics: kql
+#     text_representation:
+#       extension: .py
+#       format_name: percent
+#       format_version: '1.3'
+#       jupytext_version: 1.17.3
+#   kernelspec:
+#     display_name: .venv
+#     language: python
+#     name: python3
+# ---
+# %% [markdown]
+# # Heteroscedastic inference for regression and classification
+#
+# This notebook shows how to fit a heteroscedastic Gaussian processes (GPs) that
+# allows one to perform regression where there exists non-constant, or
+# input-dependent, noise.
+#
+#
+# ## Background
+# A heteroscedastic GP couples two latent functions:
+# - A **signal GP** $f(\cdot)$ for the mean response.
+# - A **noise GP** $g(\cdot)$ that maps to a positive variance
+#   $\sigma^2(x) = \phi(g(x))$ via a positivity transform $\phi$ (typically
+#   ${\rm exp}$ or ${\rm softplus}$). Intuitively, we are introducing a pair of GPs;
+# one to model the latent mean, and a second that models the log-noise variance. This
+# is in direct contrast a
+# [homoscedastic GP](https://docs.jaxgaussianprocesses.com/_examples/regression/)
+# where we learn a constant value for the noise.
+#
+# In the Gaussian case, the observed response follows
+# $$y \mid f, g \sim \mathcal{N}(f, \sigma^2(x)).$$
+# Variational inference works with independent posteriors $q(f)q(g)$, combining the
+# moments of each into an ELBO. For non-Gaussian likelihoods the same structure
+# remains; only the expected log-likelihood changes.
+# %%
+from jax import config
+import jax.numpy as jnp
+import jax.random as jr
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import optax as ox
+from examples.utils import use_mpl_style
+import gpjax as gpx
+from gpjax.likelihoods import (
+    HeteroscedasticGaussian,
+    LogNormalTransform,
+    SoftplusTransform,
+)
+from gpjax.variational_families import (
+    HeteroscedasticVariationalFamily,
+    VariationalGaussianInit,
+)
+# Enable Float64 for stable linear algebra.
+config.update("jax_enable_x64", True)
+use_mpl_style()
+key = jr.key(123)
+cols = mpl.rcParams["axes.prop_cycle"].by_key()["color"]
+# %% [markdown]
+# ## Dataset simulation
+# We simulate whose mean and noise levels vary with
+# the input. We sample inputs $x \sim \mathcal{U}(0, 1)$ and define the
+# latent signal to be
+# $$f(x) = (x - 0.5)^2 + 0.05;$$
+# a smooth bowl-shaped curve. The observation standard deviation is chosen to be
+# proportional to the signal,
+# $$\sigma(x) = 0.5\,f(x),$$
+# which yields the heteroscedastic generative model
+# $$y \mid x \sim \mathcal{N}\!\big(f(x), \sigma^2(x)\big).$$
+# This construction makes the noise small near the minimum of the bowl and much
+# larger in the tails. We also create a dense test grid that we shall use later for
+# visualising posterior fits and predictive uncertainty.
+# %%
+# Create data with input-dependent variance.
+key, x_key, noise_key = jr.split(key, 3)
+n = 200
+x = jr.uniform(x_key, (n, 1), minval=0.0, maxval=1.0)
+signal = (x - 0.5) ** 2 + 0.05
+noise_scale = 0.5 * signal
+noise = noise_scale * jr.normal(noise_key, shape=(n, 1))
+y = signal + noise
+train = gpx.Dataset(X=x, y=y)
+xtest = jnp.linspace(-0.1, 1.1, 200)[:, None]
+signal_test = (xtest - 0.5) ** 2 + 0.05
+noise_scale_test = 0.5 * signal_test
+noise_test = noise_scale_test * jr.normal(noise_key, shape=(200, 1))
+ytest = signal_test + noise_test
+fig, ax = plt.subplots()
+ax.plot(x, y, "o", label="Observations", alpha=0.7, color=cols[0])
+ax.plot(xtest, signal_test, label="Signal", alpha=0.7, color=cols[1])
+ax.plot(xtest, noise_scale_test, label="Noise scale", alpha=0.7, color=cols[2])
+ax.set_xlabel("$x$")
+ax.set_ylabel("$y$")
+ax.legend(loc="upper left")
+# %% [markdown]
+# For a homoscedastic baseline, compare this figure with the
+# [Gaussian process regression notebook](https://docs.jaxgaussianprocesses.com/_examples/regression/)
+# (`examples/regression.py`), where a single latent GP is paired with constant
+# observation noise.
+# %% [markdown]
+# ## Prior specification
+# We place independent Gaussian process priors on the signal and noise processes:
+# $$f \sim \mathcal{GP}\big(0, k_f\big), \qquad g \sim \mathcal{GP}\big(0, k_g\big),$$
+# where $k_f$ and $k_g$ are stationary squared-exponential kernels with unit
+# variance and lengthscale of one. The noise process $g$ is mapped to the variance
+# via the logarithmic transform in `LogNormalTransform`, giving
+# $\sigma^2(x) = \exp\big(g(x)\big)$. The joint prior over $(f, g)$ combines with
+# the heteroscedastic Gaussian likelihood,
+# $$p(\mathbf{y} \mid f, g) = \prod_{i=1}^n
+# \mathcal{N}\!\big(y_i \mid f(x_i), \exp(g(x_i))\big),$$
+# to form the posterior target that we shall approximate variationally. The product
+# syntax `signal_prior * likelihood` used below constructs this augmented GP model.
+# %%
+# Signal and noise priors.
+signal_prior = gpx.gps.Prior(
+    mean_function=gpx.mean_functions.Zero(),
+    kernel=gpx.kernels.RBF(),
+)
+noise_prior = gpx.gps.Prior(
+    mean_function=gpx.mean_functions.Zero(),
+    kernel=gpx.kernels.RBF(),
+)
+likelihood = HeteroscedasticGaussian(
+    num_datapoints=train.n,
+    noise_prior=noise_prior,
+    noise_transform=LogNormalTransform(),
+)
+posterior = signal_prior * likelihood
+# Variational family over both processes.
+z = jnp.linspace(-3.2, 3.2, 25)[:, None]
+q = HeteroscedasticVariationalFamily(
+    posterior=posterior,
+    inducing_inputs=z,
+    inducing_inputs_g=z,
+)
+# %% [markdown]
+# The variational family introduces inducing variables for both latent functions,
+# located at the set $Z = \{z_m\}_{m=1}^M$. These inducing variables summarise the
+# infinite-dimensional GP priors in terms of multivariate Gaussian parameters.
+# Optimising the evidence lower bound (ELBO) corresponds to adjusting the means and
+# covariances of the variational posteriors $q(f)$ and $q(g)$ so that they best
+# explain the observed data whilst remaining close to the prior. For a deeper look at
+# these constructions in the homoscedastic setting, refer to the
+# [Sparse Gaussian Process Regression](https://docs.jaxgaussianprocesses.com/_examples/collapsed_vi/)
+# (`examples/collapsed_vi.py`) and
+# [Sparse Stochastic Variational Inference](https://docs.jaxgaussianprocesses.com/_examples/uncollapsed_vi/)
+# (`examples/uncollapsed_vi.py`) notebooks.
+# %% [markdown]
+# ### Optimisation
+# With the model specified, we minimise the negative ELBO,
+# $$\mathcal{L} = \mathbb{E}_{q(f)q(g)}\!\big[\log p(\mathbf{y}\mid f, g)\big]
+# - \mathrm{KL}\!\left[q(f) \,\|\, p(f)\right]
+# - \mathrm{KL}\!\left[q(g) \,\|\, p(g)\right],$$
+# using the Adam optimiser. GPJax automatically selects the tight bound of
+# Lázaro-Gredilla & Titsias (2011) when the likelihood is Gaussian, yielding an
+# analytically tractable expectation over the latent noise process. The resulting
+# optimisation iteratively updates the inducing posteriors for both latent GPs.
+# %%
+# Optimise the heteroscedastic ELBO (selects tighter bound).
+objective = lambda model, data: -gpx.objectives.heteroscedastic_elbo(model, data)
+optimiser = ox.adam(1e-2)
+q_trained, history = gpx.fit(
+    model=q,
+    objective=objective,
+    train_data=train,
+    optim=optimiser,
+    num_iters=10000,
+    verbose=False,
+)
+loss_trace = jnp.asarray(history)
+print(f"Final regression ELBO: {-loss_trace[-1]:.3f}")
+# %% [markdown]
+# ## Prediction
+# After training we obtain posterior marginals for both latent functions. To make a
+# prediction we evaluate two quantities:
+# 1. The latent posterior over $f$ (mean and variance), which reflects uncertainty
+#    in the latent function **prior** to observing noise.
+# 2. The marginal predictive over observations, which integrates out both $f$ and
+#    $g$ to provide predictive intervals for future noisy measurements.
+# The helper method `likelihood.predict` performs the second integration for us.
+# %%
+# Predict on a dense grid.
+xtest = jnp.linspace(-0.1, 1.1, 200)[:, None]
+mf, vf, mg, vg = q_trained.predict(xtest)
+signal_pred, noise_pred = q_trained.predict_latents(xtest)
+predictive = likelihood.predict(signal_pred, noise_pred)
+fig, ax = plt.subplots()
+ax.plot(train.X, train.y, "o", label="Observations", alpha=0.5)
+ax.plot(xtest, mf, color="C0", label="Posterior mean")
+ax.fill_between(
+    xtest.squeeze(),
+    (mf.squeeze() - 2 * jnp.sqrt(vf.squeeze())).squeeze(),
+    (mf.squeeze() + 2 * jnp.sqrt(vf.squeeze())).squeeze(),
+    color="C0",
+    alpha=0.15,
+    label="±2 std (latent)",
+)
+ax.fill_between(
+    xtest.squeeze(),
+    predictive.mean - 2 * jnp.sqrt(jnp.diag(predictive.covariance_matrix)),
+    predictive.mean + 2 * jnp.sqrt(jnp.diag(predictive.covariance_matrix)),
+    color="C1",
+    alpha=0.15,
+    label="±2 std (observed)",
+)
+ax.set_xlabel("$x$")
+ax.set_ylabel("$y$")
+ax.legend(loc="upper left")
+ax.set_title("Heteroscedastic regression")
+# %% [markdown]
+# The latent intervals quantify epistemic uncertainty about $f$, whereas the broader
+# observed band adds the aleatoric noise predicted by $g$. The widening of the orange
+# band in the right half matches the ground-truth construction of the dataset.
+# %% [markdown]
+# ## Sparse Heteroscedastic Regression
+#
+# We now demonstrate how the aforementioned heteroscedastic approach can be extended
+# into sparse scenarios, thus offering more favourable scalability as the size of our
+# dataset grows. To achieve this we defined inducing points for both the signal and
+# noise processes. Decoupling these grids allows us to focus modelling
+# capacity where each latent function varies the most. The synthetic dataset below
+# contains a smooth sinusoidal signal but exhibits a sharply peaked noise shock,
+# mimicking the situation where certain regions of the input space are far noisier
+# than others.
+# %%
+# Generate data
+key, x_key, noise_key = jr.split(key, 3)
+n = 300
+x = jr.uniform(x_key, (n, 1), minval=-2.0, maxval=2.0)
+signal = jnp.sin(2.0 * x)
+# Gaussian bump of noise
+noise_std = 0.1 + 0.5 * jnp.exp(-0.5 * ((x - 0.5) / 0.4) ** 2)
+y = signal + noise_std * jr.normal(noise_key, shape=(n, 1))
+data_adv = gpx.Dataset(X=x, y=y)
+# %% [markdown]
+# ### Model components
+# We again adopt RBF priors for both processes but now apply a `SoftplusTransform`
+# to the noise GP. This alternative map enforces positivity whilst avoiding the
+# heavier tails induced by the log-normal transform. The `HeteroscedasticGaussian`
+# likelihood seamlessly accepts the new transform.
+# %%
+# Define model components
+mean_prior = gpx.gps.Prior(
+    mean_function=gpx.mean_functions.Zero(),
+    kernel=gpx.kernels.RBF(),
+)
+noise_prior_adv = gpx.gps.Prior(
+    mean_function=gpx.mean_functions.Zero(),
+    kernel=gpx.kernels.RBF(),
+)
+likelihood_adv = HeteroscedasticGaussian(
+    num_datapoints=data_adv.n,
+    noise_prior=noise_prior_adv,
+    noise_transform=SoftplusTransform(),
+)
+posterior_adv = mean_prior * likelihood_adv
+# %%
+# Configure variational family
+# The signal requires a richer inducing set to capture its oscillations, whereas the
+# noise process can be summarised with fewer points because the burst is localised.
+z_signal = jnp.linspace(-2.0, 2.0, 30)[:, None]
+z_noise = jnp.linspace(-2.0, 2.0, 15)[:, None]
+# Use VariationalGaussianInit to pass specific configurations
+q_init_f = VariationalGaussianInit(inducing_inputs=z_signal)
+q_init_g = VariationalGaussianInit(inducing_inputs=z_noise)
+q_adv = HeteroscedasticVariationalFamily(
+    posterior=posterior_adv,
+    signal_init=q_init_f,
+    noise_init=q_init_g,
+)
+# %% [markdown]
+# The initialisation objects `VariationalGaussianInit` allow us to prescribe
+# different inducing grids and initial covariance structures for $f$ and $g$. This
+# flexibility is invaluable when working with large datasets where the latent
+# functions have markedly different smoothness properties.
+# %%
+# Optimize
+objective_adv = lambda model, data: -gpx.objectives.heteroscedastic_elbo(model, data)
+optimiser_adv = ox.adam(1e-2)
+q_adv_trained, _ = gpx.fit(
+    model=q_adv,
+    objective=objective_adv,
+    train_data=data_adv,
+    optim=optimiser_adv,
+    num_iters=8000,
+    verbose=False,
+)
+# %%
+# Plotting
+xtest = jnp.linspace(-2.2, 2.2, 200)[:, None]
+pred = q_adv_trained.predict(xtest)
+# Unpack the named tuple
+mf = pred.mean_f
+vf = pred.variance_f
+mg = pred.mean_g
+vg = pred.variance_g
+# Calculate total predictive variance
+# The likelihood expects the *latent* noise distribution to compute the predictive
+# but here we can just use the transformed expected variance for plotting.
+# For accurate predictive intervals, we should use likelihood.predict.
+signal_dist, noise_dist = q_adv_trained.predict_latents(xtest)
+predictive_dist = likelihood_adv.predict(signal_dist, noise_dist)
+predictive_mean = predictive_dist.mean
+predictive_std = jnp.sqrt(jnp.diag(predictive_dist.covariance_matrix))
+fig, ax = plt.subplots()
+ax.plot(x, y, "o", color="black", alpha=0.3, label="Data")
+ax.plot(xtest, mf, color="C0", label="Signal Mean")
+ax.fill_between(
+    xtest.squeeze(),
+    mf.squeeze() - 2 * jnp.sqrt(vf.squeeze()),
+    mf.squeeze() + 2 * jnp.sqrt(vf.squeeze()),
+    color="C0",
+    alpha=0.2,
+    label="Signal Uncertainty",
+)
+# Plot total uncertainty (signal + noise)
+ax.plot(xtest, predictive_mean, "--", color="C1", alpha=0.5)
+ax.fill_between(
+    xtest.squeeze(),
+    predictive_mean - 2 * predictive_std,
+    predictive_mean + 2 * predictive_std,
+    color="C1",
+    alpha=0.1,
+    label="Predictive Uncertainty (95%)",
+)
+ax.set_title("Heteroscedastic Regression with Custom Inducing Points")
+ax.legend(loc="upper left", fontsize="small")
+# %% [markdown]
+# ## Takeaways
+# - The heteroscedastic GP model couples two latent GPs, enabling separate control of
+#   epistemic and aleatoric uncertainties.
+# - We support multiple positivity transforms for the noise process; the choice
+#   affects the implied variance tails and should reflect prior beliefs.
+# - Inducing points for the signal and noise processes can be tuned independently to
+#   balance computational budget against the local complexity of each function.
+# - The ELBO implementation automatically selects the tightest analytical bound
+#   available, streamlining heteroscedastic inference workflows.
+# %% [markdown]
+# ## System configuration
+# %%
+# %reload_ext watermark
+# %watermark -n -u -v -iv -w -a 'Thomas Pinder'

{gpjax-0.13.3 → gpjax-0.13.4}/examples/regression.py RENAMED Viewed

@@ -29,7 +29,6 @@ import matplotlib as mpl
 import matplotlib.pyplot as plt
 from examples.utils import (
-    clean_legend,
     use_mpl_style,
 )
@@ -129,26 +128,26 @@ prior = gpx.gps.Prior(mean_function=meanf, kernel=kernel)
 # %%
 # %% [markdown]
-prior_dist = prior.predict(xtest, return_covariance_type="dense")
-prior_mean = prior_dist.mean
-prior_std = prior_dist.variance
-samples = prior_dist.sample(key=key, sample_shape=(20,))
-fig, ax = plt.subplots()
-ax.plot(xtest, samples.T, alpha=0.5, color=cols[0], label="Prior samples")
-ax.plot(xtest, prior_mean, color=cols[1], label="Prior mean")
-ax.fill_between(
-    xtest.flatten(),
-    prior_mean - prior_std,
-    prior_mean + prior_std,
-    alpha=0.3,
-    color=cols[1],
-    label="Prior variance",
-)
-ax.legend(loc="best")
-ax = clean_legend(ax)
+# prior_dist = prior.predict(xtest, return_covariance_type="dense")
+#
+# prior_mean = prior_dist.mean
+# prior_std = prior_dist.variance
+# samples = prior_dist.sample(key=key, sample_shape=(20,))
+#
+#
+# fig, ax = plt.subplots()
+# ax.plot(xtest, samples.T, alpha=0.5, color=cols[0], label="Prior samples")
+# ax.plot(xtest, prior_mean, color=cols[1], label="Prior mean")
+# ax.fill_between(
+#     xtest.flatten(),
+#     prior_mean - prior_std,
+#     prior_mean + prior_std,
+#     alpha=0.3,
+#     color=cols[1],
+#     label="Prior variance",
+# )
+# ax.legend(loc="best")
+# ax = clean_legend(ax)
 # %% [markdown]
 # ## Constructing the posterior
@@ -217,13 +216,15 @@ print(-gpx.objectives.conjugate_mll(opt_posterior, D))
 # this, we use our defined `posterior` and `likelihood` at our test inputs to obtain
 # the predictive distribution as a `Distrax` multivariate Gaussian upon which `mean`
 # and `stddev` can be used to extract the predictive mean and standard deviatation.
-#
+#
 # We are only concerned here about the variance between the test points and themselves, so
 # we can just copute the diagonal version of the covariance.  We enforce this by using
 # `return_covariance_type = "diagonal"` in the `predict` call.
 # %%
-latent_dist = opt_posterior.predict(xtest, train_data=D, return_covariance_type="diagonal")
+latent_dist = opt_posterior.predict(
+    xtest, train_data=D, return_covariance_type="diagonal"
+)
 predictive_dist = opt_posterior.likelihood(latent_dist)
 predictive_mean = predictive_dist.mean

{gpjax-0.13.3 → gpjax-0.13.4}/gpjax/__init__.py RENAMED Viewed

@@ -40,7 +40,7 @@ __license__ = "MIT"
 __description__ = "Gaussian processes in JAX and Flax"
 __url__ = "https://github.com/thomaspinder/GPJax"
 __contributors__ = "https://github.com/thomaspinder/GPJax/graphs/contributors"
-__version__ = "0.13.3"
+__version__ = "0.13.4"
 __all__ = [
     "gps",

{gpjax-0.13.3 → gpjax-0.13.4}/gpjax/citation.py RENAMED Viewed

@@ -23,6 +23,7 @@ from gpjax.kernels import (
     Matern32,
     Matern52,
 )
+from gpjax.likelihoods import HeteroscedasticGaussian
 CitationType = Union[None, str, Dict[str, str]]
@@ -149,3 +150,15 @@ def _(tree) -> PaperCitation:
         booktitle="Advances in neural information processing systems",
         citation_type="article",
     )
+@cite.register(HeteroscedasticGaussian)
+def _(tree) -> PaperCitation:
+    return PaperCitation(
+        citation_key="lazaro2011variational",
+        authors="Lázaro-Gredilla, Miguel and Titsias, Michalis",
+        title="Variational heteroscedastic Gaussian process regression",
+        year="2011",
+        booktitle="Proceedings of the 28th International Conference on Machine Learning (ICML)",
+        citation_type="inproceedings",
+    )

{gpjax-0.13.3 → gpjax-0.13.4}/gpjax/gps.py RENAMED Viewed

@@ -32,8 +32,10 @@ from gpjax.distributions import GaussianDistribution
 from gpjax.kernels import RFF
 from gpjax.kernels.base import AbstractKernel
 from gpjax.likelihoods import (
+    AbstractHeteroscedasticLikelihood,
     AbstractLikelihood,
     Gaussian,
+    HeteroscedasticGaussian,
     NonGaussian,
 )
 from gpjax.linalg import (
@@ -62,6 +64,7 @@ M = tp.TypeVar("M", bound=AbstractMeanFunction)
 L = tp.TypeVar("L", bound=AbstractLikelihood)
 NGL = tp.TypeVar("NGL", bound=NonGaussian)
 GL = tp.TypeVar("GL", bound=Gaussian)
+HL = tp.TypeVar("HL", bound=AbstractHeteroscedasticLikelihood)
 class AbstractPrior(nnx.Module, tp.Generic[M, K]):
@@ -476,6 +479,22 @@ class AbstractPosterior(nnx.Module, tp.Generic[P, L]):
         raise NotImplementedError
+class LatentPosterior(AbstractPosterior[P, L]):
+    r"""A posterior shell used to expose prior structure without inference."""
+    def predict(
+        self,
+        test_inputs: Num[Array, "N D"],
+        train_data: Dataset,
+        *,
+        return_covariance_type: Literal["dense", "diagonal"] = "dense",
+    ) -> GaussianDistribution:
+        raise NotImplementedError(
+            "LatentPosteriors are a lightweight wrapper for priors and do not "
+            "implement predictive distributions. Use a variational family for inference."
+        )
 class ConjugatePosterior(AbstractPosterior[P, GL]):
     r"""A Conjuate Gaussian process posterior object.
@@ -839,6 +858,40 @@ class NonConjugatePosterior(AbstractPosterior[P, NGL]):
         return GaussianDistribution(jnp.atleast_1d(mean.squeeze()), cov)
+class HeteroscedasticPosterior(LatentPosterior[P, HL]):
+    r"""Posterior shell for heteroscedastic likelihoods.
+    The posterior retains both the signal and noise priors; inference is delegated
+    to variational families and specialised objectives.
+    """
+    def __init__(
+        self,
+        prior: AbstractPrior[M, K],
+        likelihood: HL,
+        jitter: float = 1e-6,
+    ):
+        if likelihood.noise_prior is None:
+            raise ValueError("Heteroscedastic likelihoods require a noise_prior.")
+        super().__init__(prior=prior, likelihood=likelihood, jitter=jitter)
+        self.noise_prior = likelihood.noise_prior
+        self.noise_posterior = LatentPosterior(
+            prior=self.noise_prior, likelihood=likelihood, jitter=jitter
+        )
+class ChainedPosterior(HeteroscedasticPosterior[P, HL]):
+    r"""Posterior routed for heteroscedastic likelihoods using chained bounds."""
+    def __init__(
+        self,
+        prior: AbstractPrior[M, K],
+        likelihood: HL,
+        jitter: float = 1e-6,
+    ):
+        super().__init__(prior=prior, likelihood=likelihood, jitter=jitter)
 #######################
 # Utils
 #######################
@@ -854,6 +907,18 @@ def construct_posterior(  # noqa: F811
 ) -> NonConjugatePosterior[P, NGL]: ...
+@tp.overload
+def construct_posterior(  # noqa: F811
+    prior: P, likelihood: HeteroscedasticGaussian
+) -> HeteroscedasticPosterior[P, HeteroscedasticGaussian]: ...
+@tp.overload
+def construct_posterior(  # noqa: F811
+    prior: P, likelihood: AbstractHeteroscedasticLikelihood
+) -> ChainedPosterior[P, AbstractHeteroscedasticLikelihood]: ...
 def construct_posterior(prior, likelihood):  # noqa: F811
     r"""Utility function for constructing a posterior object from a prior and
     likelihood. The function will automatically select the correct posterior
@@ -873,6 +938,15 @@ def construct_posterior(prior, likelihood):  # noqa: F811
     if isinstance(likelihood, Gaussian):
         return ConjugatePosterior(prior=prior, likelihood=likelihood)
+    if (
+        isinstance(likelihood, HeteroscedasticGaussian)
+        and likelihood.supports_tight_bound()
+    ):
+        return HeteroscedasticPosterior(prior=prior, likelihood=likelihood)
+    if isinstance(likelihood, AbstractHeteroscedasticLikelihood):
+        return ChainedPosterior(prior=prior, likelihood=likelihood)
     return NonConjugatePosterior(prior=prior, likelihood=likelihood)
@@ -911,7 +985,10 @@ __all__ = [
     "AbstractPrior",
     "Prior",
     "AbstractPosterior",
+    "LatentPosterior",
     "ConjugatePosterior",
     "NonConjugatePosterior",
+    "HeteroscedasticPosterior",
+    "ChainedPosterior",
     "construct_posterior",
 ]

gpjax 0.13.3__tar.gz → 0.13.4__tar.gz

gpjax 0.13.3tar.gz → 0.13.4tar.gz