PyPI - qpytorch - Versions diffs - 0.1__py3-none-any.whl - Mend

qpytorch 0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of qpytorch might be problematic. Click here for more details.

Files changed (102) hide show

qpytorch/__init__.py +327 -0
qpytorch/constraints/__init__.py +3 -0
qpytorch/distributions/__init__.py +21 -0
qpytorch/distributions/delta.py +86 -0
qpytorch/distributions/multitask_multivariate_qexponential.py +435 -0
qpytorch/distributions/multivariate_qexponential.py +581 -0
qpytorch/distributions/power.py +113 -0
qpytorch/distributions/qexponential.py +153 -0
qpytorch/functions/__init__.py +58 -0
qpytorch/kernels/__init__.py +80 -0
qpytorch/kernels/grid_interpolation_kernel.py +213 -0
qpytorch/kernels/inducing_point_kernel.py +151 -0
qpytorch/kernels/kernel.py +695 -0
qpytorch/kernels/matern32_kernel_grad.py +155 -0
qpytorch/kernels/matern52_kernel_grad.py +194 -0
qpytorch/kernels/matern52_kernel_gradgrad.py +248 -0
qpytorch/kernels/polynomial_kernel_grad.py +88 -0
qpytorch/kernels/qexponential_symmetrized_kl_kernel.py +61 -0
qpytorch/kernels/rbf_kernel_grad.py +125 -0
qpytorch/kernels/rbf_kernel_gradgrad.py +186 -0
qpytorch/kernels/rff_kernel.py +153 -0
qpytorch/lazy/__init__.py +9 -0
qpytorch/likelihoods/__init__.py +66 -0
qpytorch/likelihoods/bernoulli_likelihood.py +75 -0
qpytorch/likelihoods/beta_likelihood.py +76 -0
qpytorch/likelihoods/gaussian_likelihood.py +472 -0
qpytorch/likelihoods/laplace_likelihood.py +59 -0
qpytorch/likelihoods/likelihood.py +437 -0
qpytorch/likelihoods/likelihood_list.py +60 -0
qpytorch/likelihoods/multitask_gaussian_likelihood.py +542 -0
qpytorch/likelihoods/multitask_qexponential_likelihood.py +545 -0
qpytorch/likelihoods/noise_models.py +184 -0
qpytorch/likelihoods/qexponential_likelihood.py +494 -0
qpytorch/likelihoods/softmax_likelihood.py +97 -0
qpytorch/likelihoods/student_t_likelihood.py +90 -0
qpytorch/means/__init__.py +23 -0
qpytorch/metrics/__init__.py +17 -0
qpytorch/mlls/__init__.py +53 -0
qpytorch/mlls/_approximate_mll.py +79 -0
qpytorch/mlls/deep_approximate_mll.py +30 -0
qpytorch/mlls/deep_predictive_log_likelihood.py +32 -0
qpytorch/mlls/exact_marginal_log_likelihood.py +96 -0
qpytorch/mlls/gamma_robust_variational_elbo.py +106 -0
qpytorch/mlls/inducing_point_kernel_added_loss_term.py +69 -0
qpytorch/mlls/kl_qexponential_added_loss_term.py +41 -0
qpytorch/mlls/leave_one_out_pseudo_likelihood.py +73 -0
qpytorch/mlls/marginal_log_likelihood.py +48 -0
qpytorch/mlls/predictive_log_likelihood.py +76 -0
qpytorch/mlls/sum_marginal_log_likelihood.py +40 -0
qpytorch/mlls/variational_elbo.py +77 -0
qpytorch/models/__init__.py +72 -0
qpytorch/models/approximate_qep.py +115 -0
qpytorch/models/deep_qeps/__init__.py +22 -0
qpytorch/models/deep_qeps/deep_qep.py +155 -0
qpytorch/models/deep_qeps/dspp.py +114 -0
qpytorch/models/exact_prediction_strategies.py +880 -0
qpytorch/models/exact_qep.py +349 -0
qpytorch/models/model_list.py +100 -0
qpytorch/models/pyro/__init__.py +28 -0
qpytorch/models/pyro/_pyro_mixin.py +57 -0
qpytorch/models/pyro/distributions/__init__.py +5 -0
qpytorch/models/pyro/pyro_qep.py +105 -0
qpytorch/models/qep.py +7 -0
qpytorch/models/qeplvm/__init__.py +6 -0
qpytorch/models/qeplvm/bayesian_qeplvm.py +40 -0
qpytorch/models/qeplvm/latent_variable.py +102 -0
qpytorch/module.py +30 -0
qpytorch/optim/__init__.py +5 -0
qpytorch/priors/__init__.py +42 -0
qpytorch/priors/qep_priors.py +81 -0
qpytorch/test/__init__.py +22 -0
qpytorch/test/base_likelihood_test_case.py +106 -0
qpytorch/test/model_test_case.py +150 -0
qpytorch/test/variational_test_case.py +400 -0
qpytorch/utils/__init__.py +38 -0
qpytorch/utils/warnings.py +37 -0
qpytorch/variational/__init__.py +47 -0
qpytorch/variational/_variational_distribution.py +61 -0
qpytorch/variational/_variational_strategy.py +391 -0
qpytorch/variational/additive_grid_interpolation_variational_strategy.py +90 -0
qpytorch/variational/batch_decoupled_variational_strategy.py +256 -0
qpytorch/variational/cholesky_variational_distribution.py +65 -0
qpytorch/variational/ciq_variational_strategy.py +352 -0
qpytorch/variational/delta_variational_distribution.py +41 -0
qpytorch/variational/grid_interpolation_variational_strategy.py +113 -0
qpytorch/variational/independent_multitask_variational_strategy.py +114 -0
qpytorch/variational/lmc_variational_strategy.py +248 -0
qpytorch/variational/mean_field_variational_distribution.py +58 -0
qpytorch/variational/multitask_variational_strategy.py +317 -0
qpytorch/variational/natural_variational_distribution.py +152 -0
qpytorch/variational/nearest_neighbor_variational_strategy.py +487 -0
qpytorch/variational/orthogonally_decoupled_variational_strategy.py +128 -0
qpytorch/variational/tril_natural_variational_distribution.py +130 -0
qpytorch/variational/uncorrelated_multitask_variational_strategy.py +114 -0
qpytorch/variational/unwhitened_variational_strategy.py +225 -0
qpytorch/variational/variational_strategy.py +280 -0
qpytorch/version.py +4 -0
qpytorch-0.1.dist-info/LICENSE +21 -0
qpytorch-0.1.dist-info/METADATA +177 -0
qpytorch-0.1.dist-info/RECORD +102 -0
qpytorch-0.1.dist-info/WHEEL +5 -0
qpytorch-0.1.dist-info/top_level.txt +1 -0

qpytorch/distributions/power.py ADDED Viewed

@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+from typing import Optional
+import torch
+from ..module import Module
+from ..constraints import Interval, Positive
+from ..priors import Prior
+class Power(Module):
+    """
+    Constructs a power parameter for the (multivariate) q-exponential distribution.
+    See :class:`qpytorch.distributions.QExponential` or :class:`qpytorch.distributions.MultivariateQExponential`
+    for description of the power parameter.
+    .. note::
+        This object works similarly as a hyperparameter of kernel, which can be imposed with a prior and optimized over.
+    :param power_init: initial value of power parameter of qep distribution. (Default: 1.0)
+    :param power_constraint: Set this if you want to apply a constraint to the power parameter.
+        (Default: :class:`~qpytorch.constraints.Positive`.)
+    :param power_prior: Set this if you want to apply a prior to the power parameter.
+        (Default: `None`.)
+    :ivar torch.Size shape:
+        The dimension of the power object.
+    :ivar torch.Tensor power:
+        The power parameter. The size/shape is the same as the `power_init` argument.
+    :ivar torch.Tensor data:
+        The data of the power object in :obj:`torch.tensor` format.
+    Example:
+        >>> power_init = torch.tensor(1.0)
+        >>> power_prior = qpytorch.priors.GammaPrior(4.0, 2.0)
+        >>> power = qpytorch.distributions.Power(power_init, power_prior=power_prior)
+    """
+    def __init__(
+        self,
+        power_init: torch.Tensor = torch.tensor(1.0),
+        power_constraint: Optional[Interval] = None,
+        power_prior: Optional[Prior] = None
+    ):
+        super(Power, self).__init__()
+        if power_constraint is None:
+            power_constraint = Positive()
+        # set parameter
+        self.register_parameter(
+            name="raw_power",
+            parameter=torch.nn.Parameter(power_constraint.inverse_transform(power_init))
+        )
+        self.shape = self.raw_power.shape
+        # set constraint
+        self.register_constraint("raw_power", power_constraint)
+        # set prior
+        if power_prior is not None:
+            if not isinstance(power_prior, Prior):
+                raise TypeError("Expected qpytorch.priors.Prior but got " + type(power_prior).__name__)
+            self.register_prior("power_prior", power_prior, self._power_param, self._power_closure)
+    def _power_param(self, q: Power) -> torch.Tensor:
+        # Used by the raw_power
+        return q.power
+    def _power_closure(self, q: Power, v: torch.Tensor) -> torch.Tensor:
+        # Used by the raw_power
+        return q._set_power(v)
+    @property
+    def power(self) -> torch.Tensor:
+        return self.raw_power_constraint.transform(self.raw_power)
+    @power.setter
+    def power(self, value: torch.Tensor) -> torch.Tensor:
+        self._set_power(value)
+    def _set_power(self, value: torch.Tensor):
+        if not torch.is_tensor(value):
+            value = torch.as_tensor(value).to(self.raw_power)
+        self.initialize(raw_power=self.raw_power_constraint.inverse_transform(value))
+    @property
+    def data(self) -> torch.Tensor:
+        return self.power.data
+    def __truediv__(self, other):
+        return self.power/other
+    def __rtruediv__(self, other):
+        return other/self.power
+    def __rpow__(self, other):
+        return other**self.power
+    def __ne__(self, other):
+        return self.power!=other
+    def __lt__(self, other):
+        return self.power<other
+    def __gt__(self, other):
+        return self.power>other
+    def numel(self):
+        return self.power.numel()
+    def size(self):
+        return self.power.size()

qpytorch/distributions/qexponential.py ADDED Viewed

@@ -0,0 +1,153 @@
+#!/usr/bin/env python3
+import math
+from numbers import Number, Real
+import torch
+from torch.distributions import constraints, Chi2
+from torch.distributions.exp_family import ExponentialFamily
+from torch.distributions.kl import register_kl
+from torch.distributions.utils import _standard_normal, broadcast_all
+from gpytorch.distributions.distribution import Distribution
+__all__ = ["QExponential"]
+class QExponential(ExponentialFamily, Distribution):
+    r"""
+    Creates a q-exponential distribution parameterized by
+    :attr:`loc`, :attr:`scale` and :attr:`power`, with the following density
+    .. math::
+        p(x; \mu, \sigma^2) = \frac{q}{2}(2\pi\sigma^2)^{-\frac{1}{2}}
+        \left|\frac{x-\mu}{\sigma}\right|^{\frac{q}{2}-1}
+        \exp\left\{-\frac{1}{2}\left|\frac{x-\mu}{\sigma}\right|^q\right\}
+    Example::
+        >>> # xdoctest: +IGNORE_WANT("non-deterministic")
+        >>> m = QExponential(torch.tensor([0.0]), torch.tensor([1.0]))
+        >>> m.sample()  # q-exponentially distributed with loc=0, scale=1 and power=2
+        tensor([ 0.1046])
+    Args:
+        loc (float or Tensor): mean of the distribution (often referred to as mu)
+        scale (float or Tensor): standard deviation of the distribution
+            (often referred to as sigma)
+        power (float or Tensor): power of the distribution
+    """
+    arg_constraints = {"loc": constraints.real, "scale": constraints.positive, "power": constraints.positive}
+    support = constraints.real
+    has_rsample = True
+    _mean_carrier_measure = 0
+    @property
+    def mean(self):
+        return self.loc
+    @property
+    def mode(self):
+        return self.loc
+    @property
+    def stddev(self):
+        return self.scale
+    @property
+    def variance(self):
+        return self.stddev.pow(2)
+    @property
+    def rescalor(self):
+        return torch.exp((2./self.power*math.log(2) + torch.lgamma(0.5+2./self.power) - math.log(math.pi)/2.)/2.)
+    def __init__(self, loc, scale, power=torch.tensor(2.0), validate_args=None):
+        self.loc, self.scale = broadcast_all(loc, scale)
+        if isinstance(loc, Number) and isinstance(scale, Number):
+            batch_shape = torch.Size()
+        else:
+            batch_shape = self.loc.size()
+        self.power = power
+        super().__init__(batch_shape, validate_args=validate_args)
+    def confidence(self, alpha=0.05):
+        lower = self.icdf(torch.tensor(alpha/2))
+        upper = self.icdf(torch.tensor(1-alpha/2))
+        return lower, upper
+    def expand(self, batch_shape, _instance=None):
+        new = self._get_checked_instance(QExponential, _instance)
+        batch_shape = torch.Size(batch_shape)
+        new.loc = self.loc.expand(batch_shape)
+        new.scale = self.scale.expand(batch_shape)
+        super(QExponential, new).__init__(batch_shape, validate_args=False)
+        new._validate_args = self._validate_args
+        return new
+    def sample(self, sample_shape=torch.Size(), rescale=False):
+        shape = self._extended_shape(sample_shape)
+        with torch.no_grad():
+            eps = Chi2(1).sample(shape).to(self.loc.device)**(1./self.power) * _standard_normal(shape, dtype=self.loc.dtype, device=self.loc.device).sign()
+            if rescale: eps /= self.rescalor
+            return self.loc.expand(shape) + eps * self.scale.expand(shape)
+    def rsample(self, sample_shape=torch.Size(), rescale=False):
+        shape = self._extended_shape(sample_shape)
+        eps = _standard_normal(shape, dtype=self.loc.dtype, device=self.loc.device)
+        if self.power!=2: eps = eps.abs()**(2./self.power-1) * eps
+        if rescale: eps /= self.rescalor
+        return self.loc + eps * self.scale
+    def log_prob(self, value):
+        if self._validate_args:
+            self._validate_sample(value)
+        log_scale = (
+            math.log(self.scale) if isinstance(self.scale, Real) else self.scale.log()
+        )
+        scaled_diff = ((value - self.loc) / self.scale).abs()
+        res = -.5* ( scaled_diff**self.power + math.log(2 * math.pi) ) - log_scale
+        if self.power!=2: res += (self.power/2.-1)*scaled_diff.log() + torch.log(self.power/2.)
+        return res
+    def cdf(self, value):
+        if self._validate_args:
+            self._validate_sample(value)
+        scaled_diff = (value - self.loc) * self.scale.reciprocal()
+        if self.power!=2: scaled_diff *= scaled_diff.abs()**(self.power/2.-1)
+        return 0.5 * (
+            1 + torch.erf(scaled_diff / math.sqrt(2))
+        )
+    def icdf(self, value):
+        erfinv = torch.erfinv(2 * value - 1) * math.sqrt(2)
+        if self.power!=2: erfinv *= erfinv.abs()**(2./self.power-1)
+        return self.loc + self.scale * erfinv
+    def entropy(self, exact=False):
+        res = 0.5 + 0.5 * math.log(2 * math.pi) + torch.log(self.scale)
+        if self.power!=2: res += 0.5*(self.power/2.-1) *(2./self.power* Chi2(1).entropy() if exact else 0) - torch.log(self.power/2.)
+        return res
+    @property
+    def _natural_params(self):
+        if self.power!=2:
+            raise ValueError(f"Q-Exponential distribution with power {self.power} does not belong to exponential family!")
+        else:
+            return (self.loc / self.scale.pow(2), -0.5 * self.scale.pow(2).reciprocal())
+    def _log_normalizer(self, x, y):
+        if self.power!=2:
+            raise ValueError(f"Q-Exponential distribution with power {self.power} does not belong to exponential family!")
+        else:
+            return -0.25 * x.pow(2) / y + 0.5 * torch.log(-math.pi / y)
+@register_kl(QExponential, QExponential)
+def _kl_qexponential_qexponential(p, q, exact=False):
+    var_ratio = (p.scale / q.scale).pow(2)
+    t1 = ((p.loc - q.loc) / q.scale).pow(2)
+    res = 0.5 * ((var_ratio + t1).pow(q.power/2.) - 1 - var_ratio.log())
+    if q.power!=2: res += 0.5 * ( -(q.power/2.-1)*torch.log(var_ratio + t1) + (p.power/2.-1) * (-2./p.power*Chi2(1).entropy() if exact else 0) )
+    return res

qpytorch/functions/__init__.py ADDED Viewed

@@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+import warnings
+from typing import Any
+import linear_operator
+import torch
+from gpytorch.functions._log_normal_cdf import LogNormalCDF
+from gpytorch.functions.matern_covariance import MaternCovariance
+from gpytorch.functions.rbf_covariance import RBFCovariance
+def log_normal_cdf(x):
+    """
+    Computes the element-wise log standard normal CDF of an input tensor x.
+    This function should always be preferred over calling normal_cdf and taking the log
+    manually, as it is more numerically stable.
+    """
+    return LogNormalCDF.apply(x)
+def logdet(mat):
+    warnings.warn("gpytorch.logdet is deprecated. Use torch.logdet instead.", DeprecationWarning)
+    return torch.logdet(mat)
+def matmul(mat, rhs):
+    warnings.warn("gpytorch.matmul is deprecated. Use torch.matmul instead.", DeprecationWarning)
+    return torch.matmul(mat, rhs)
+def inv_matmul(mat, right_tensor, left_tensor=None):
+    warnings.warn("gpytorch.inv_matmul is deprecated. Use gpytorch.solve instead.", DeprecationWarning)
+    return linear_operator.solve(right_tensor, left_tensor=None)
+__all__ = [
+    "MaternCovariance",
+    "RBFCovariance",
+    "inv_matmul",
+    "logdet",
+    "log_normal_cdf",
+    "matmul",
+]
+def __getattr__(name: str) -> Any:
+    if hasattr(linear_operator.functions, name):
+        warnings.warn(
+            f"gpytorch.functions.{name} is deprecated. Use linear_operator.functions.{name} instead.",
+            DeprecationWarning,
+        )
+        return getattr(linear_operator.functions, name)
+    raise AttributeError(f"module gpytorch.functions has no attribute {name}.")

qpytorch/kernels/__init__.py ADDED Viewed

@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+from gpytorch.kernels import keops
+from gpytorch.kernels.additive_structure_kernel import AdditiveStructureKernel
+from gpytorch.kernels.arc_kernel import ArcKernel
+from gpytorch.kernels.constant_kernel import ConstantKernel
+from gpytorch.kernels.cosine_kernel import CosineKernel
+from gpytorch.kernels.cylindrical_kernel import CylindricalKernel
+from gpytorch.kernels.distributional_input_kernel import DistributionalInputKernel
+from gpytorch.kernels.gaussian_symmetrized_kl_kernel import GaussianSymmetrizedKLKernel
+from .grid_interpolation_kernel import GridInterpolationKernel
+from gpytorch.kernels.grid_kernel import GridKernel
+from gpytorch.kernels.hamming_kernel import HammingIMQKernel
+from gpytorch.kernels.index_kernel import IndexKernel
+from .inducing_point_kernel import InducingPointKernel
+from .kernel import AdditiveKernel, Kernel, ProductKernel
+from gpytorch.kernels.lcm_kernel import LCMKernel
+from gpytorch.kernels.linear_kernel import LinearKernel
+from .matern32_kernel_grad import Matern32KernelGrad
+from .matern52_kernel_grad import Matern52KernelGrad
+from .matern52_kernel_gradgrad import Matern52KernelGradGrad
+from gpytorch.kernels.matern_kernel import MaternKernel
+from gpytorch.kernels.multi_device_kernel import MultiDeviceKernel
+from gpytorch.kernels.multitask_kernel import MultitaskKernel
+from gpytorch.kernels.newton_girard_additive_kernel import NewtonGirardAdditiveKernel
+from gpytorch.kernels.periodic_kernel import PeriodicKernel
+from gpytorch.kernels.piecewise_polynomial_kernel import PiecewisePolynomialKernel
+from gpytorch.kernels.polynomial_kernel import PolynomialKernel
+from .polynomial_kernel_grad import PolynomialKernelGrad
+from gpytorch.kernels.product_structure_kernel import ProductStructureKernel
+from .qexponential_symmetrized_kl_kernel import QExponentialSymmetrizedKLKernel
+from gpytorch.kernels.rbf_kernel import RBFKernel
+from .rbf_kernel_grad import RBFKernelGrad
+from .rbf_kernel_gradgrad import RBFKernelGradGrad
+from .rff_kernel import RFFKernel
+from gpytorch.kernels.rq_kernel import RQKernel
+from gpytorch.kernels.scale_kernel import ScaleKernel
+from gpytorch.kernels.spectral_delta_kernel import SpectralDeltaKernel
+from gpytorch.kernels.spectral_mixture_kernel import SpectralMixtureKernel
+__all__ = [
+    "keops",
+    "Kernel",
+    "ArcKernel",
+    "AdditiveKernel",
+    "AdditiveStructureKernel",
+    "ConstantKernel",
+    "CylindricalKernel",
+    "MultiDeviceKernel",
+    "CosineKernel",
+    "DistributionalInputKernel",
+    "GaussianSymmetrizedKLKernel",
+    "GridKernel",
+    "GridInterpolationKernel",
+    "HammingIMQKernel",
+    "IndexKernel",
+    "InducingPointKernel",
+    "LCMKernel",
+    "LinearKernel",
+    "MaternKernel",
+    "MultitaskKernel",
+    "NewtonGirardAdditiveKernel",
+    "PeriodicKernel",
+    "PiecewisePolynomialKernel",
+    "PolynomialKernel",
+    "PolynomialKernelGrad",
+    "ProductKernel",
+    "ProductStructureKernel",
+    "QExponentialSymmetrizedKLKernel",
+    "RBFKernel",
+    "RFFKernel",
+    "RBFKernelGrad",
+    "RBFKernelGradGrad",
+    "RQKernel",
+    "ScaleKernel",
+    "SpectralDeltaKernel",
+    "SpectralMixtureKernel",
+    "Matern32KernelGrad",
+    "Matern52KernelGrad",
+    "Matern52KernelGradGrad",
+]

qpytorch/kernels/grid_interpolation_kernel.py ADDED Viewed

@@ -0,0 +1,213 @@
+#!/usr/bin/env python3
+from typing import List, Optional, Tuple, Union
+import torch
+from linear_operator import to_linear_operator
+from linear_operator.operators import InterpolatedLinearOperator
+from ..models.exact_prediction_strategies import InterpolatedPredictionStrategy
+from gpytorch.utils.grid import create_grid
+from gpytorch.utils.interpolation import Interpolation
+from gpytorch.kernels.grid_kernel import GridKernel
+from .kernel import Kernel
+class GridInterpolationKernel(GridKernel):
+    r"""
+    Implements the KISS-QEP (or SKI) approximation for a given kernel.
+    It was proposed in `Kernel Interpolation for Scalable Structured Gaussian Processes`_,
+    and offers extremely fast and accurate Kernel approximations for large datasets.
+    Given a base kernel `k`, the covariance :math:`k(\mathbf{x_1}, \mathbf{x_2})` is approximated by
+    using a grid of regularly spaced *inducing points*:
+    .. math::
+       \begin{equation*}
+          k(\mathbf{x_1}, \mathbf{x_2}) = \mathbf{w_{x_1}}^\top K_{U,U} \mathbf{w_{x_2}}
+       \end{equation*}
+    where
+    * :math:`U` is the set of gridded inducing points
+    * :math:`K_{U,U}` is the kernel matrix between the inducing points
+    * :math:`\mathbf{w_{x_1}}` and :math:`\mathbf{w_{x_2}}` are sparse vectors based on
+      :math:`\mathbf{x_1}` and :math:`\mathbf{x_2}` that apply cubic interpolation.
+    The user should supply the size of the grid (using the grid_size attribute).
+    To choose a reasonable grid value, we highly recommend using the
+    :func:`gpytorch.utils.grid.choose_grid_size` helper function.
+    The bounds of the grid will automatically be determined by data.
+    (Alternatively, you can hard-code bounds using the grid_bounds, which
+    will speed up this kernel's computations.)
+    .. note::
+        `GridInterpolationKernel` can only wrap **stationary kernels** (such as RBF, Matern,
+        Periodic, Spectral Mixture, etc.)
+    Args:
+        base_kernel (Kernel):
+            The kernel to approximate with KISS-QEP
+        grid_size (Union[int, List[int]]):
+            The size of the grid in each dimension.
+            If a single int is provided, then every dimension will have the same grid size.
+        num_dims (int):
+            The dimension of the input data. Required if `grid_bounds=None`
+        grid_bounds (tuple(float, float), optional):
+            The bounds of the grid, if known (high performance mode).
+            The length of the tuple must match the number of dimensions.
+            The entries represent the min/max values for each dimension.
+        active_dims (tuple of ints, optional):
+            Passed down to the `base_kernel`.
+    .. _Kernel Interpolation for Scalable Structured Gaussian Processes:
+        http://proceedings.mlr.press/v37/wilson15.pdf
+    """
+    def __init__(
+        self,
+        base_kernel: Kernel,
+        grid_size: Union[int, List[int]],
+        num_dims: Optional[int] = None,
+        grid_bounds: Optional[Tuple[float, float]] = None,
+        active_dims: Optional[Tuple[int, ...]] = None,
+    ):
+        has_initialized_grid = 0
+        grid_is_dynamic = True
+        # Make some temporary grid bounds, if none exist
+        if grid_bounds is None:
+            if num_dims is None:
+                raise RuntimeError("num_dims must be supplied if grid_bounds is None")
+            else:
+                # Create some temporary grid bounds - they'll be changed soon
+                grid_bounds = tuple((-1.0, 1.0) for _ in range(num_dims))
+        else:
+            has_initialized_grid = 1
+            grid_is_dynamic = False
+            if num_dims is None:
+                num_dims = len(grid_bounds)
+            elif num_dims != len(grid_bounds):
+                raise RuntimeError(
+                    "num_dims ({}) disagrees with the number of supplied "
+                    "grid_bounds ({})".format(num_dims, len(grid_bounds))
+                )
+        if isinstance(grid_size, int):
+            grid_sizes = [grid_size for _ in range(num_dims)]
+        else:
+            grid_sizes = list(grid_size)
+        if len(grid_sizes) != num_dims:
+            raise RuntimeError("The number of grid sizes provided through grid_size do not match num_dims.")
+        # Initialize values and the grid
+        self.grid_is_dynamic = grid_is_dynamic
+        self.num_dims = num_dims
+        self.grid_sizes = grid_sizes
+        self.grid_bounds = grid_bounds
+        grid = create_grid(self.grid_sizes, self.grid_bounds)
+        super(GridInterpolationKernel, self).__init__(
+            base_kernel=base_kernel,
+            grid=grid,
+            interpolation_mode=True,
+            active_dims=active_dims,
+        )
+        self.register_buffer("has_initialized_grid", torch.tensor(has_initialized_grid, dtype=torch.bool))
+    @property
+    def _tight_grid_bounds(self):
+        grid_spacings = tuple((bound[1] - bound[0]) / self.grid_sizes[i] for i, bound in enumerate(self.grid_bounds))
+        return tuple(
+            (bound[0] + 2.01 * spacing, bound[1] - 2.01 * spacing)
+            for bound, spacing in zip(self.grid_bounds, grid_spacings)
+        )
+    def _compute_grid(self, inputs, last_dim_is_batch=False):
+        n_data, n_dimensions = inputs.size(-2), inputs.size(-1)
+        if last_dim_is_batch:
+            inputs = inputs.transpose(-1, -2).unsqueeze(-1)
+            n_dimensions = 1
+        batch_shape = inputs.shape[:-2]
+        inputs = inputs.reshape(-1, n_dimensions)
+        interp_indices, interp_values = Interpolation().interpolate(self.grid, inputs)
+        interp_indices = interp_indices.view(*batch_shape, n_data, -1)
+        interp_values = interp_values.view(*batch_shape, n_data, -1)
+        return interp_indices, interp_values
+    def _inducing_forward(self, last_dim_is_batch, **params):
+        return super().forward(self.grid, self.grid, last_dim_is_batch=last_dim_is_batch, **params)
+    def forward(self, x1, x2, diag=False, last_dim_is_batch=False, **params):
+        # See if we need to update the grid or not
+        if self.grid_is_dynamic:  # This is true if a grid_bounds wasn't passed in
+            if torch.equal(x1, x2):
+                x = x1.reshape(-1, self.num_dims)
+            else:
+                x = torch.cat([x1.reshape(-1, self.num_dims), x2.reshape(-1, self.num_dims)])
+            x_maxs = x.max(0)[0].tolist()
+            x_mins = x.min(0)[0].tolist()
+            # We need to update the grid if
+            # 1) it hasn't ever been initialized, or
+            # 2) if any of the grid points are "out of bounds"
+            update_grid = (not self.has_initialized_grid.item()) or any(
+                x_min < bound[0] or x_max > bound[1]
+                for x_min, x_max, bound in zip(x_mins, x_maxs, self._tight_grid_bounds)
+            )
+            # Update the grid if needed
+            if update_grid:
+                grid_spacings = tuple(
+                    (x_max - x_min) / (gs - 4.02) for gs, x_min, x_max in zip(self.grid_sizes, x_mins, x_maxs)
+                )
+                self.grid_bounds = tuple(
+                    (x_min - 2.01 * spacing, x_max + 2.01 * spacing)
+                    for x_min, x_max, spacing in zip(x_mins, x_maxs, grid_spacings)
+                )
+                grid = create_grid(
+                    self.grid_sizes,
+                    self.grid_bounds,
+                    dtype=self.grid[0].dtype,
+                    device=self.grid[0].device,
+                )
+                self.update_grid(grid)
+        base_lazy_tsr = to_linear_operator(self._inducing_forward(last_dim_is_batch=last_dim_is_batch, **params))
+        if last_dim_is_batch and base_lazy_tsr.size(-3) == 1:
+            base_lazy_tsr = base_lazy_tsr.repeat(*x1.shape[:-2], x1.size(-1), 1, 1)
+        left_interp_indices, left_interp_values = self._compute_grid(x1, last_dim_is_batch)
+        if torch.equal(x1, x2):
+            right_interp_indices = left_interp_indices
+            right_interp_values = left_interp_values
+        else:
+            right_interp_indices, right_interp_values = self._compute_grid(x2, last_dim_is_batch)
+        batch_shape = torch.broadcast_shapes(
+            base_lazy_tsr.batch_shape,
+            left_interp_indices.shape[:-2],
+            right_interp_indices.shape[:-2],
+        )
+        res = InterpolatedLinearOperator(
+            base_lazy_tsr.expand(*batch_shape, *base_lazy_tsr.matrix_shape),
+            left_interp_indices.detach().expand(*batch_shape, *left_interp_indices.shape[-2:]),
+            left_interp_values.expand(*batch_shape, *left_interp_values.shape[-2:]),
+            right_interp_indices.detach().expand(*batch_shape, *right_interp_indices.shape[-2:]),
+            right_interp_values.expand(*batch_shape, *right_interp_values.shape[-2:]),
+        )
+        if diag:
+            return res.diagonal(dim1=-1, dim2=-2)
+        else:
+            return res
+    def prediction_strategy(self, train_inputs, train_prior_dist, train_labels, likelihood):
+        return InterpolatedPredictionStrategy(train_inputs, train_prior_dist, train_labels, likelihood)