PyPI - pyautoencoder - Versions diffs - 1.1.2__tar.gz → 1.1.4__tar.gz - Mend

pyautoencoder 1.1.2tar.gz → 1.1.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

{pyautoencoder-1.1.2 → pyautoencoder-1.1.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pyautoencoder
-Version: 1.1.2
+Version: 1.1.4
 Summary: A Python package offering implementations of state-of-the-art autoencoder architectures in PyTorch.
 Author: Andrea Pollastro
 License: MIT

{pyautoencoder-1.1.2 → pyautoencoder-1.1.4}/pyautoencoder/_base/base.py RENAMED Viewed

@@ -156,7 +156,7 @@ class BuildGuardMixin(ABC):
             @wraps(_orig_build)
             def _wrapped_build(self, *args: Any, **kwargs: Any) -> None:
-                if getattr(self, "_built", True):
+                if getattr(self, "_built", False):
                     return
                 with torch.no_grad():

{pyautoencoder-1.1.2 → pyautoencoder-1.1.4}/pyautoencoder/_version.py RENAMED Viewed

@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
 commit_id: str | None
 __commit_id__: str | None
-__version__ = version = '1.1.2'
-__version_tuple__ = version_tuple = (1, 1, 2)
+__version__ = version = '1.1.4'
+__version_tuple__ = version_tuple = (1, 1, 4)
-__commit_id__ = commit_id = 'g2d2766837'
+__commit_id__ = commit_id = 'g21e483730'

{pyautoencoder-1.1.2 → pyautoencoder-1.1.4}/pyautoencoder/loss/base.py RENAMED Viewed

@@ -1,4 +1,3 @@
-import math
 import torch
 import torch.nn.functional as F
 from dataclasses import dataclass
@@ -20,9 +19,6 @@ class LikelihoodType(Enum):
     GAUSSIAN = 'gaussian'
     BERNOULLI = 'bernoulli'
-# Cache for log(2pi) constants per (device, dtype)
-_LOG2PI_CACHE = {}
 @dataclass(slots=True, repr=True)
 class LossResult:
     r"""Container for loss computation results with objective and diagnostics.
@@ -45,30 +41,6 @@ class LossResult:
     objective: torch.Tensor
     diagnostics: Dict[str, float]
-def _get_log2pi(x: torch.Tensor) -> torch.Tensor:
-    r"""Return a cached value of :math:`\log(2\pi)` for the given device and dtype.
-    This avoids repeatedly allocating the constant for different devices or
-    precisions. A separate tensor is cached for each ``(device, dtype)`` pair.
-    Parameters
-    ----------
-    x : torch.Tensor
-        A tensor whose ``device`` and ``dtype`` determine which cached value is
-        returned or created.
-    Returns
-    -------
-    torch.Tensor
-        A scalar tensor equal to :math:`\log(2\pi)` with the same device and
-        dtype as ``x``.
-    """
-    key = (x.device, x.dtype)
-    if key not in _LOG2PI_CACHE:
-        _LOG2PI_CACHE[key] = torch.tensor(2.0 * math.pi, device=x.device, dtype=x.dtype).log()
-    return _LOG2PI_CACHE[key]
 def log_likelihood(x: torch.Tensor,
                    x_hat: torch.Tensor,
                    likelihood: Union[str, LikelihoodType] = LikelihoodType.GAUSSIAN) -> torch.Tensor:
@@ -118,9 +90,9 @@ def log_likelihood(x: torch.Tensor,
     Notes
     -----
-    - The Gaussian case includes the normalization constant
-      :math:`\log(2\pi)`, cached per ``(device, dtype)`` with
-      :func:`_get_log2pi`.
+    - The Gaussian case omits the normalization constant
+      :math:`-\tfrac{1}{2}\log(2\pi)`, which is constant with respect to
+      the model parameters and has no effect on optimization.
     - The Bernoulli case is fully numerically stable because it operates
       directly in log-space.
     """

{pyautoencoder-1.1.2 → pyautoencoder-1.1.4}/pyautoencoder/variational/stochastic_layers.py RENAMED Viewed

@@ -62,7 +62,7 @@ class FullyFactorizedGaussian(nn.Module):
             raise TypeError("build(x) expects a torch.Tensor.")
         if input_sample.ndim != 2:
             raise ValueError(f"build(x): expected shape [B, F], got {tuple(input_sample.shape)}. Flatten upstream.")
-        if input_sample.shape[1] <= 0:
+        if input_sample.shape[1] == 0:
             raise ValueError("build(x): F (feature dimension) must be > 0.")
         in_features = int(input_sample.shape[1])
@@ -128,7 +128,28 @@ class FullyFactorizedGaussian(nn.Module):
         return z, mu, log_var
-    def reparametrize(self, mu: torch.Tensor, log_var: torch.Tensor, S: int = 1):
+    def reparametrize(self, mu: torch.Tensor, log_var: torch.Tensor, S: int = 1) -> torch.Tensor:
+        r"""Draw ``S`` latent samples via the reparameterization trick.
+        .. math::
+            z^{(s)} = \mu + \sigma \odot \epsilon^{(s)},
+            \qquad \epsilon^{(s)} \sim \mathcal{N}(0, I).
+        Parameters
+        ----------
+        mu : torch.Tensor
+            Mean of the posterior, shape ``[B, D_z]``.
+        log_var : torch.Tensor
+            Log-variance of the posterior, shape ``[B, D_z]``.
+        S : int, optional
+            Number of samples to draw. Defaults to ``1``.
+        Returns
+        -------
+        torch.Tensor
+            Sampled latent codes of shape ``[B, S, D_z]``.
+        """
         std = torch.exp(0.5 * log_var)              # [B, Dz]
         mu_e  = mu.unsqueeze(1).expand(-1, S, -1)   # [B, S, Dz]
         std_e = std.unsqueeze(1).expand(-1, S, -1)  # [B, S, Dz]

{pyautoencoder-1.1.2 → pyautoencoder-1.1.4}/pyautoencoder/variational/vae.py RENAMED Viewed

@@ -322,11 +322,14 @@ class VAE(BaseAutoencoder):
 class AdaGVAE(VAE):
     r"""Adaptive Group Variational Autoencoder (Ada-GVAE), from Locatello et al. (2020).
-    This class extends the VAE class and enables feature disentanglement in the latent space.
-    For inference, use the .encode() and .decode() methods, as the forward method expects pairs of images,
-    following the formulation introduced by Locatello et al.
+    This class extends the VAE class and enables feature disentanglement in the latent space.
+    Use :meth:`forward_pair` for the paired training pass and :meth:`compute_pair_loss` for
+    its loss. The inherited :meth:`encode` / :meth:`decode` methods work normally for inference
+    on single inputs.
     """
+    _GUARDED = VAE._GUARDED | {"_encode_pair"}
     def __init__(
         self,
         encoder: nn.Module,
@@ -418,7 +421,7 @@ class AdaGVAE(VAE):
                VAEEncodeOutput(z=z2, mu=mu_tilde2, log_var=log_var_tilde2)
-    def forward(self, x1: torch.Tensor, x2: torch.Tensor, S: int = 1) -> Tuple[VAEOutput, VAEOutput]:
+    def forward_pair(self, x1: torch.Tensor, x2: torch.Tensor, S: int = 1) -> Tuple[VAEOutput, VAEOutput]:
         """Full AdaGVAE forward pass: encode pairs with adaptive grouping, sample, and decode.
         Parameters
@@ -445,14 +448,14 @@ class AdaGVAE(VAE):
         x2_dec = self._decode(x2_enc.z)
         return VAEOutput(x_hat=x1_dec.x_hat, z=x1_enc.z, mu=x1_enc.mu, log_var=x1_enc.log_var), \
                VAEOutput(x_hat=x2_dec.x_hat, z=x2_enc.z, mu=x2_enc.mu, log_var=x2_enc.log_var)
-    def compute_loss(self,
-                     x1: torch.Tensor,
-                     x1_vae_output: VAEOutput,
-                     x2: torch.Tensor,
-                     x2_vae_output: VAEOutput,
-                     beta: float = 1,
-                     likelihood: Union[str, LikelihoodType] = LikelihoodType.GAUSSIAN) -> LossResult:
+    def compute_pair_loss(self,
+                          x1: torch.Tensor,
+                          x1_vae_output: VAEOutput,
+                          x2: torch.Tensor,
+                          x2_vae_output: VAEOutput,
+                          beta: float = 1,
+                          likelihood: Union[str, LikelihoodType] = LikelihoodType.GAUSSIAN) -> LossResult:
         r"""Compute the combined ELBO for a pair of inputs with adaptive posteriors.
         This method computes the sum of the standard VAE ELBOs for both inputs:

{pyautoencoder-1.1.2 → pyautoencoder-1.1.4}/pyautoencoder.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pyautoencoder
-Version: 1.1.2
+Version: 1.1.4
 Summary: A Python package offering implementations of state-of-the-art autoencoder architectures in PyTorch.
 Author: Andrea Pollastro
 License: MIT

{pyautoencoder-1.1.2 → pyautoencoder-1.1.4}/pyautoencoder.egg-info/SOURCES.txt RENAMED Viewed

@@ -32,10 +32,6 @@ pyautoencoder.egg-info/requires.txt
 pyautoencoder.egg-info/top_level.txt
 pyautoencoder/_base/__init__.py
 pyautoencoder/_base/base.py
-pyautoencoder/experimental/__init__.py
-pyautoencoder/experimental/hypernetworks.py
-pyautoencoder/experimental/benchmark_datasets/__init__.py
-pyautoencoder/experimental/benchmark_datasets/disentanglement.py
 pyautoencoder/loss/__init__.py
 pyautoencoder/loss/base.py
 pyautoencoder/vanilla/__init__.py

{pyautoencoder-1.1.2 → pyautoencoder-1.1.4}/test/_base/test_base.py RENAMED Viewed

@@ -14,7 +14,6 @@ from pyautoencoder._base.base import (
 # ================= ModelOutput =================
 def test_model_output_repr_tensors_and_non_tensors():
-    @torch.no_grad()
     @dataclass(slots=True, repr=False)
     class MyOutput(ModelOutput):
         logits: torch.Tensor

{pyautoencoder-1.1.2 → pyautoencoder-1.1.4}/test/loss/test_base_loss.py RENAMED Viewed

@@ -1,5 +1,4 @@
 import pytest
-import math
 import torch
 import torch.nn.functional as F
@@ -252,22 +251,22 @@ def test_kl_divergence_preserves_device():
     assert kl.device == mu_q.device
-def test_kl_divergence_symmetric_when_p_and_q_swapped_with_custom_prior():
-    """Test asymmetry property: KL(q||p) != KL(p||q) in general."""
+def test_kl_divergence_is_asymmetric():
+    """KL(q||p) != KL(p||q) in general — KL is not a symmetric distance."""
     B, Dz = 2, 3
+    torch.manual_seed(0)
     mu_q = torch.randn(B, Dz)
     log_var_q = torch.randn(B, Dz)
     mu_p = torch.randn(B, Dz)
     log_var_p = torch.randn(B, Dz)
     kl_q_p = kl_divergence_diag_gaussian(mu_q, log_var_q, mu_p, log_var_p)
     kl_p_q = kl_divergence_diag_gaussian(mu_p, log_var_p, mu_q, log_var_q)
-    # KL divergence is asymmetric in general
-    # (unless the distributions happen to be very similar)
-    # Just check both are valid
     assert torch.isfinite(kl_q_p).all()
     assert torch.isfinite(kl_p_q).all()
+    # For random, distinct Gaussians this will virtually never be equal
+    assert not torch.allclose(kl_q_p, kl_p_q)
 def test_kl_divergence_backward_flows_gradients():
@@ -306,22 +305,6 @@ def test_kl_divergence_with_custom_prior_backward():
     assert log_var_p.grad is not None
-def test_kl_divergence_matches_pytorch_implementation():
-    """Compare with a reference PyTorch implementation."""
-    B, Dz = 3, 4
-    mu_q = torch.randn(B, Dz)
-    log_var_q = torch.randn(B, Dz)
-    # Our implementation
-    kl_ours = kl_divergence_diag_gaussian(mu_q, log_var_q)
-    # Reference implementation (standard VAE KL)
-    var_q = log_var_q.exp()
-    kl_ref = 0.5 * torch.sum(-log_var_q + var_q + mu_q.pow(2) - 1, dim=-1)
-    assert torch.allclose(kl_ours, kl_ref, atol=1e-5)
 def test_kl_divergence_large_batch():
     """Test with larger batch size."""
     B, Dz = 128, 32

{pyautoencoder-1.1.2 → pyautoencoder-1.1.4}/test/vanilla/test_autoencoder.py RENAMED Viewed

@@ -247,8 +247,8 @@ def test_ae_compute_loss_gaussian_likelihood_returns_correct_type():
     assert isinstance(loss_result.diagnostics['log_likelihood'], float)
-def test_ae_compute_loss_gaussian_likelihood_is_nonnegative():
-    """Test that NLL (objective) is non-negative for Gaussian likelihood."""
+def test_ae_compute_loss_gaussian_nll_equals_half_mse():
+    """Test that the Gaussian NLL objective equals 0.5 * mean MSE."""
     batch_size = 5
     in_features = 6
     latent_features = 2
@@ -265,12 +265,15 @@ def test_ae_compute_loss_gaussian_likelihood_is_nonnegative():
     loss_result = ae.compute_loss(x, ae_output, likelihood='gaussian')
-    # NLL should be non-negative (it's -log_likelihood)
-    assert loss_result.objective.item() >= 0
+    # Gaussian NLL (without normalization constant) == 0.5 * per-sample MSE, batch-averaged
+    expected = 0.5 * ((ae_output.x_hat - x) ** 2).reshape(batch_size, -1).sum(-1).mean()
+    assert torch.allclose(loss_result.objective, expected, atol=1e-6)
-def test_ae_compute_loss_bernoulli_likelihood():
-    """Test compute_loss with Bernoulli likelihood."""
+def test_ae_compute_loss_bernoulli_nll_equals_bce():
+    """Bernoulli NLL equals sum-over-features BCE, batch-averaged."""
+    import torch.nn.functional as F
     batch_size = 4
     in_features = 8
     latent_features = 3
@@ -279,7 +282,7 @@ def test_ae_compute_loss_bernoulli_likelihood():
     decoder = SimpleDecoder(latent_features=latent_features, out_features=in_features)
     ae = AE(encoder=encoder, decoder=decoder)
-    x = torch.sigmoid(torch.randn(batch_size, in_features))  # Bernoulli needs [0, 1]
+    x = torch.sigmoid(torch.randn(batch_size, in_features))  # targets in [0, 1]
     ae.build(x)
     torch.set_grad_enabled(True)
@@ -287,16 +290,18 @@ def test_ae_compute_loss_bernoulli_likelihood():
     loss_result = ae.compute_loss(x, ae_output, likelihood='bernoulli')
-    # Check return structure
-    from pyautoencoder.loss.base import LossResult
-    assert isinstance(loss_result, LossResult)
+    # NLL = mean over batch of (sum over features of BCE)
+    expected = F.binary_cross_entropy_with_logits(
+        ae_output.x_hat, x, reduction='none'
+    ).reshape(batch_size, -1).sum(-1).mean()
     assert loss_result.objective.dim() == 0
+    assert torch.allclose(loss_result.objective, expected, atol=1e-6)
     assert 'log_likelihood' in loss_result.diagnostics
-    assert loss_result.objective.item() >= 0
-def test_ae_compute_loss_backward_flows_through_x_hat():
-    """Test that gradients flow properly through the loss."""
+def test_ae_compute_loss_backward_flows_through_all_params():
+    """Test that gradients flow through both encoder and decoder."""
     batch_size = 2
     in_features = 4
     latent_features = 2
@@ -314,8 +319,9 @@ def test_ae_compute_loss_backward_flows_through_x_hat():
     loss_result = ae.compute_loss(x, ae_output)
     loss_result.objective.backward()
-    # Check that decoder params have gradients
+    enc_grads = [p.grad for p in encoder.parameters() if p.requires_grad]
     dec_grads = [p.grad for p in decoder.parameters() if p.requires_grad]
+    assert any(g is not None and torch.any(g != 0) for g in enc_grads)
     assert any(g is not None and torch.any(g != 0) for g in dec_grads)

{pyautoencoder-1.1.2 → pyautoencoder-1.1.4}/test/variational/test_stochastic_layers.py RENAMED Viewed

@@ -87,6 +87,23 @@ def test_ffg_build_can_be_called_twice_with_same_feature_dim():
     assert isinstance(head.mu, nn.Linear)
     assert head.mu.in_features == F
+def test_ffg_build_replaces_layers_on_different_feature_dim():
+    """Rebuilding with a different F must replace mu and log_var layers."""
+    latent_dim = 3
+    head = FullyFactorizedGaussian(latent_dim=latent_dim)
+    head.build(torch.randn(2, 5))
+    assert head.in_features == 5
+    assert isinstance(head.mu, nn.Linear) and head.mu.in_features == 5
+    assert isinstance(head.log_var, nn.Linear) and head.log_var.in_features == 5
+    head.build(torch.randn(2, 8))
+    assert head.in_features == 8
+    assert isinstance(head.mu, nn.Linear) and head.mu.in_features == 8
+    assert isinstance(head.log_var, nn.Linear) and head.log_var.in_features == 8
+    assert head.built is True
 def test_ffg_forward_raises_if_not_built():
     head = FullyFactorizedGaussian(latent_dim=3)
     x = torch.randn(2, 5)
@@ -178,6 +195,8 @@ def test_ffg_eval_forward_respects_default_S_equals_1():
     z, mu, log_var = head(x)  # S default = 1
     assert z.shape == (B, 1, Dz)
+    assert mu.shape == (B, Dz)
+    assert log_var.shape == (B, Dz)
     expected_z = mu.unsqueeze(1)  # [B, 1, Dz]
     assert torch.allclose(z, expected_z)

{pyautoencoder-1.1.2 → pyautoencoder-1.1.4}/test/variational/test_vae.py RENAMED Viewed

@@ -323,6 +323,22 @@ def test_vae_output_repr_uses_modeloutput_smart_repr():
     assert f"shape={tuple(log_var.shape)}" in s
+def test_vae_build_runs_encoder_under_no_grad():
+    """The build wrapper executes under torch.no_grad(); encoder must see grad disabled."""
+    B, in_features, feat_dim, latent_dim = 3, 5, 7, 4
+    x = torch.randn(B, in_features)
+    encoder = DummyEncoder(in_features=in_features, feat_dim=feat_dim)
+    decoder = DummyDecoder(latent_dim=latent_dim, out_features=in_features)
+    vae = VAE(encoder=encoder, decoder=decoder, latent_dim=latent_dim)
+    torch.set_grad_enabled(True)
+    vae.build(x)
+    assert encoder.last_grad_enabled is False   # encoder saw no_grad during build
+    assert torch.is_grad_enabled() is True      # global state restored afterwards
 # ================= compute_loss =================
 def test_vae_compute_loss_gaussian_likelihood_returns_correct_type():
@@ -460,8 +476,8 @@ def test_vae_compute_loss_bernoulli_likelihood():
     assert 'kl_divergence' in loss_result.diagnostics
-def test_vae_compute_loss_multiple_samples():
-    """Test compute_loss with S > 1 samples for Monte Carlo estimation."""
+def test_vae_compute_loss_eval_mode_elbo_independent_of_S():
+    """In eval mode z = tiled mu, so ELBO must be identical for any S >= 1."""
     B, in_features, feat_dim, latent_dim = 2, 4, 6, 2
     x = torch.randn(B, in_features)
@@ -470,24 +486,19 @@ def test_vae_compute_loss_multiple_samples():
     vae = VAE(encoder=encoder, decoder=decoder, latent_dim=latent_dim)
     vae.build(x)
-    vae.train()
-    torch.set_grad_enabled(True)
+    vae.eval()
+    torch.set_grad_enabled(False)
-    # Forward with S=1
-    vae_output_s1 = vae.forward(x, S=1)
-    loss_s1 = vae.compute_loss(x, vae_output_s1)
+    out_s1 = vae.forward(x, S=1)
+    loss_s1 = vae.compute_loss(x, out_s1)
-    # Forward with S=5 (more MC samples)
-    vae_output_s5 = vae.forward(x, S=5)
-    loss_s5 = vae.compute_loss(x, vae_output_s5)
+    out_s5 = vae.forward(x, S=5)
+    loss_s5 = vae.compute_loss(x, out_s5)
-    # Both should produce valid LossResult
-    assert isinstance(loss_s1.objective, torch.Tensor)
-    assert isinstance(loss_s5.objective, torch.Tensor)
-    # Shapes should match input batch size
-    assert vae_output_s1.x_hat.shape[0] == B
-    assert vae_output_s5.x_hat.shape[0] == B
+    # All S copies of z are identical (tiled mu), so the MC average is exact
+    assert torch.allclose(loss_s1.objective, loss_s5.objective, atol=1e-5)
+    assert abs(loss_s1.diagnostics['elbo'] - loss_s5.diagnostics['elbo']) < 1e-5
+    assert abs(loss_s1.diagnostics['log_likelihood'] - loss_s5.diagnostics['log_likelihood']) < 1e-5
 def test_vae_compute_loss_backward_flows_through_all_params():
@@ -538,6 +549,29 @@ def test_vae_compute_loss_batch_size_one():
     assert not torch.isinf(loss_result.objective)
+def test_vae_compute_loss_diagnostics_elbo_consistency():
+    """elbo diagnostic must equal log_likelihood - kl_divergence."""
+    B, in_features, feat_dim, latent_dim, S = 3, 5, 7, 2, 2
+    x = torch.randn(B, in_features)
+    encoder = DummyEncoder(in_features=in_features, feat_dim=feat_dim)
+    decoder = DummyDecoder(latent_dim=latent_dim, out_features=in_features)
+    vae = VAE(encoder=encoder, decoder=decoder, latent_dim=latent_dim)
+    vae.build(x)
+    vae.train()
+    torch.set_grad_enabled(True)
+    vae_output = vae.forward(x, S=S)
+    loss_result = vae.compute_loss(x, vae_output)
+    ll = loss_result.diagnostics['log_likelihood']
+    kl = loss_result.diagnostics['kl_divergence']
+    elbo = loss_result.diagnostics['elbo']
+    assert abs(elbo - (ll - kl)) < 1e-5
 def test_vae_compute_loss_with_different_likelihood_formats():
     """Test that compute_loss handles both string and LikelihoodType inputs."""
     B, in_features, feat_dim, latent_dim, S = 3, 5, 7, 2, 2
@@ -572,10 +606,10 @@ def test_adagvae_inherits_from_vae():
     """Test that AdaGVAE is a subclass of VAE."""
     from pyautoencoder.variational.vae import AdaGVAE
-    B, in_features, latent_dim = 4, 6, 3
+    in_features, latent_dim = 6, 3
     encoder = DummyEncoder(in_features=in_features, feat_dim=10)
     decoder = DummyDecoder(latent_dim=latent_dim, out_features=in_features)
     adagvae = AdaGVAE(encoder=encoder, decoder=decoder, latent_dim=latent_dim)
     assert isinstance(adagvae, VAE)
@@ -594,10 +628,10 @@ def test_adagvae_raises_before_build():
     x1 = torch.randn(B, in_features)
     x2 = torch.randn(B, in_features)
-    with pytest.raises(NotBuiltError):
-        adagvae.forward(x1, x2)
+    with pytest.raises(NotBuiltError, match="Model is not built"):
+        adagvae.forward_pair(x1, x2)
-    with pytest.raises(NotBuiltError):
+    with pytest.raises(NotBuiltError, match="Model is not built"):
         adagvae._encode_pair(x1, x2)
@@ -618,7 +652,7 @@ def test_adagvae_forward_pair_shapes_and_types():
     torch.set_grad_enabled(True)
     # Forward returns tuple of two VAEOutput
-    out1, out2 = adagvae.forward(x1, x2, S=S)
+    out1, out2 = adagvae.forward_pair(x1, x2, S=S)
     assert isinstance(out1, VAEOutput)
     assert isinstance(out2, VAEOutput)
@@ -682,8 +716,8 @@ def test_adagvae_compute_loss_returns_correct_structure():
     adagvae.train()
     torch.set_grad_enabled(True)
-    out1, out2 = adagvae.forward(x1, x2, S=S)
-    loss_result = adagvae.compute_loss(x1, out1, x2, out2)
+    out1, out2 = adagvae.forward_pair(x1, x2, S=S)
+    loss_result = adagvae.compute_pair_loss(x1, out1, x2, out2)
     # Check return type
     assert isinstance(loss_result, LossResult)
@@ -726,8 +760,8 @@ def test_adagvae_compute_loss_backward_flows():
     adagvae.train()
     torch.set_grad_enabled(True)
-    out1, out2 = adagvae.forward(x1, x2, S=S)
-    loss_result = adagvae.compute_loss(x1, out1, x2, out2)
+    out1, out2 = adagvae.forward_pair(x1, x2, S=S)
+    loss_result = adagvae.compute_pair_loss(x1, out1, x2, out2)
     loss_result.objective.backward()
     # Check gradients in all components
@@ -756,13 +790,13 @@ def test_adagvae_compute_loss_with_beta():
     adagvae.train()
     torch.set_grad_enabled(True)
-    out1, out2 = adagvae.forward(x1, x2, S=S)
+    out1, out2 = adagvae.forward_pair(x1, x2, S=S)
     # Compute with beta=1
-    loss_beta1 = adagvae.compute_loss(x1, out1, x2, out2, beta=1.0)
+    loss_beta1 = adagvae.compute_pair_loss(x1, out1, x2, out2, beta=1.0)
     # Compute with beta=0.5
-    loss_beta05 = adagvae.compute_loss(x1, out1, x2, out2, beta=0.5)
+    loss_beta05 = adagvae.compute_pair_loss(x1, out1, x2, out2, beta=0.5)
     # ELBO should be different
     elbo_beta1 = loss_beta1.diagnostics['elbo']
@@ -772,33 +806,34 @@ def test_adagvae_compute_loss_with_beta():
     assert elbo_beta05 > elbo_beta1
-def test_adagvae_adaptive_grouping_aligns_similar_inputs():
-    """Test that AdaGVAE adaptive grouping works with similar inputs."""
+def test_adagvae_identical_inputs_produce_no_grouping():
+    """When x1 == x2, KL(q1||q2) = 0 everywhere, tau = 0, mask is all-False.
+    The adapted posteriors must equal the individual (unadapted) posteriors."""
     from pyautoencoder.variational.vae import AdaGVAE
-    B, in_features, feat_dim, latent_dim, S = 3, 5, 7, 2, 1
-    # Create similar inputs (nearly identical)
-    x_base = torch.randn(B, in_features)
-    x1 = x_base.clone()
-    x2 = x_base + 0.01 * torch.randn_like(x_base)  # Add small noise
+    B, in_features, feat_dim, latent_dim = 3, 5, 7, 4
+    x = torch.randn(B, in_features)
     encoder = DummyEncoder(in_features=in_features, feat_dim=feat_dim)
     decoder = DummyDecoder(latent_dim=latent_dim, out_features=in_features)
     adagvae = AdaGVAE(encoder=encoder, decoder=decoder, latent_dim=latent_dim)
-    adagvae.build(x1)
+    adagvae.build(x)
     adagvae.eval()
     torch.set_grad_enabled(False)
-    out1, out2 = adagvae.forward(x1, x2, S=S)
+    # Identical inputs → mu1 == mu2, log_var1 == log_var2 → KL(q1||q2) = 0 everywhere
+    # → max_delta = min_delta = 0 → tau = 0 → mask = (0 < 0) = False
+    # → no grouping: adapted posteriors equal the original individual posteriors
+    enc1_pair, enc2_pair = adagvae._encode_pair(x, x.clone(), S=1)
-    # For similar inputs, posteriors should be relatively close
-    assert out1.mu.shape == (B, latent_dim)
-    assert out2.mu.shape == (B, latent_dim)
-    # The adaptive mechanism should produce outputs (shape check is the main test)
-    assert out1.z.shape == out2.z.shape == (B, S, latent_dim)
+    # Reference: single-input encode
+    single_enc = adagvae._encode(x, S=1)
+    assert torch.allclose(enc1_pair.mu, single_enc.mu, atol=1e-6)
+    assert torch.allclose(enc1_pair.log_var, single_enc.log_var, atol=1e-6)
+    assert torch.allclose(enc2_pair.mu, single_enc.mu, atol=1e-6)
+    assert torch.allclose(enc2_pair.log_var, single_enc.log_var, atol=1e-6)
 def test_adagvae_encode_pair_with_different_s():
@@ -831,7 +866,7 @@ def test_adagvae_encode_pair_with_different_s():
 def test_adagvae_compute_loss_bernoulli_likelihood():
     """Test AdaGVAE compute_loss with Bernoulli likelihood."""
     from pyautoencoder.variational.vae import AdaGVAE
     B, in_features, feat_dim, latent_dim, S = 3, 5, 7, 2, 2
     x1 = torch.sigmoid(torch.randn(B, in_features))
     x2 = torch.sigmoid(torch.randn(B, in_features))
@@ -844,10 +879,40 @@ def test_adagvae_compute_loss_bernoulli_likelihood():
     adagvae.train()
     torch.set_grad_enabled(True)
-    out1, out2 = adagvae.forward(x1, x2, S=S)
-    loss_result = adagvae.compute_loss(x1, out1, x2, out2, likelihood='bernoulli')
+    out1, out2 = adagvae.forward_pair(x1, x2, S=S)
+    loss_result = adagvae.compute_pair_loss(x1, out1, x2, out2, likelihood='bernoulli')
     assert isinstance(loss_result.objective, torch.Tensor)
     assert loss_result.objective.dim() == 0
     assert 'elbo' in loss_result.diagnostics
+def test_adagvae_compute_pair_loss_equals_sum_of_individual_losses():
+    """compute_pair_loss objective == sum of the two individual VAE ELBO losses."""
+    from pyautoencoder.variational.vae import AdaGVAE
+    B, in_features, feat_dim, latent_dim, S = 3, 5, 7, 2, 2
+    x1 = torch.randn(B, in_features)
+    x2 = torch.randn(B, in_features)
+    encoder = DummyEncoder(in_features=in_features, feat_dim=feat_dim)
+    decoder = DummyDecoder(latent_dim=latent_dim, out_features=in_features)
+    adagvae = AdaGVAE(encoder=encoder, decoder=decoder, latent_dim=latent_dim)
+    adagvae.build(x1)
+    adagvae.train()
+    torch.set_grad_enabled(True)
+    out1, out2 = adagvae.forward_pair(x1, x2, S=S)
+    pair_loss = adagvae.compute_pair_loss(x1, out1, x2, out2)
+    # compute_pair_loss calls VAE.compute_loss twice and adds objectives
+    loss1 = adagvae.compute_loss(x1, out1)
+    loss2 = adagvae.compute_loss(x2, out2)
+    assert torch.allclose(pair_loss.objective, loss1.objective + loss2.objective, atol=1e-5)
+    assert abs(
+        pair_loss.diagnostics['elbo']
+        - (loss1.diagnostics['elbo'] + loss2.diagnostics['elbo'])
+    ) < 1e-5

pyautoencoder-1.1.2/pyautoencoder/experimental/__init__.py DELETED Viewed

File without changes

pyautoencoder-1.1.2/pyautoencoder/experimental/benchmark_datasets/__init__.py DELETED Viewed

File without changes

pyautoencoder-1.1.2/pyautoencoder/experimental/benchmark_datasets/disentanglement.py DELETED Viewed

@@ -1,84 +0,0 @@
-from __future__ import annotations
-import torch
-from torch.utils.data import Dataset
-from pathlib import Path
-from typing import Optional, Union, Tuple, Callable
-import numpy as np
-import wget
-class DSprite(Dataset):
-    """PyTorch dataset wrapper for the dSprites factorized shapes dataset.
-    Source:
-        Matthey et al., "dSprites: Disentanglement test sprites."
-        Original files hosted by DeepMind on GitHub: https://github.com/google-deepmind/dsprites-dataset
-    Files:
-        - dsprites_ndarray_co1sh3sc6or40x32y32_64x64.npz
-    Contents (loaded into memory on init):
-        - X                (torch.Tensor[int8]): Binary images, shape [N, 64, 64].
-        - latents_values  (torch.Tensor[float64]): Continuous latent values per sample,
-                                                    shape [N, 6].
-        - latents_classes (torch.Tensor[int64]): Discrete latent indices per sample,
-                                                 shape [N, 6].
-    Latent factor order (size):
-        [color (1), shape (3), scale (6), orientation (40), posX (32), posY (32)]
-    Notes:
-        - Images are binary (0/1) stored as int8; most models will want them converted
-          to float and possibly normalized. Provide a `transform` to handle this.
-        - All arrays are fully loaded into CPU memory at construction for fast access.
-        - Set `download=True` (default) to fetch the NPZ if missing at `root`.
-    """
-    _NPZ_URL = "https://github.com/deepmind/dsprites-dataset/raw/master/"
-    _NPZ_FILENAME = 'dsprites_ndarray_co1sh3sc6or40x32y32_64x64.npz'
-    def __init__(self,
-                 root: Optional[Union[str, Path]] = None,
-                 transform: Optional[Callable] = None,
-                 download: bool = True):
-        """Initialize the dSprites dataset.
-        Args:
-            root (str | pathlib.Path | None): Directory to store/find the NPZ file.
-                Defaults to "./data/dSprites" when None.
-            transform (Callable | None): Optional transform applied to each image.
-            download (bool): If True and the dataset file is not present at `root`,
-                it will be downloaded from the official GitHub URL.
-        """
-        # Assign default if no root is provided
-        if root is None:
-            root = Path('data') / 'dSprites'
-        elif isinstance(root, str):
-            root = Path(root)
-        self.root = root
-        self.root.mkdir(parents=True, exist_ok=True)
-        self.filepath = self.root / DSprite._NPZ_FILENAME
-        if download and not self.filepath.exists():
-            url = DSprite._NPZ_URL + DSprite._NPZ_FILENAME
-            print(f'Downloading dSprites from {url}')
-            wget.download(url, out=str(self.filepath))
-            print('\nDownload completed')
-        data = np.load(self.filepath, allow_pickle=True)
-        self.X = torch.as_tensor(data['imgs'], dtype=torch.int8).unsqueeze(1)
-        self.latents_values = torch.as_tensor(data['latents_values'], dtype=torch.float64)
-        self.latents_classes = torch.as_tensor(data['latents_classes'], dtype=torch.int64)
-        self.transform = transform
-    def __len__(self) -> int:
-        return self.X.shape[0]
-    def __getitem__(self, idx) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        img = self.X[idx]
-        lv = self.latents_values[idx]
-        lc = self.latents_classes[idx]
-        if self.transform:
-            img = self.transform(img)
-        return img, lv, lc

pyautoencoder-1.1.2/pyautoencoder/experimental/hypernetworks.py DELETED Viewed

@@ -1,261 +0,0 @@
-import torch
-import torch.nn as nn
-from typing import Iterable, Tuple, List, Dict, Optional, Any, Callable
-from dataclasses import fields, Field
-from .._base.base import BaseAutoencoder, BuildGuardMixin
-class HyperAE(BuildGuardMixin, nn.Module):
-    """
-    Hypernetwork wrapper around a base autoencoder.
-    This module:
-      - wraps a `BaseAutoencoder` and uses a hypernetwork (2 layered MLP with ReLU)
-        to generate a subset of its parameters on a per-input basis
-      - keeps the remaining parameters shared across the batch
-    """
-    _GUARDED = {"forward"}
-    def __init__(
-        self,
-        base_ae: BaseAutoencoder,
-        target_modules: Iterable[str] = ("encoder", "sampling_layer", "decoder"),
-        hidden_dim: int = 256,
-    ):
-        """
-        Initialize the HyperAE wrapper.
-        Args:
-            base_ae:
-                The underlying autoencoder instance to be controlled by the hypernetwork.
-            target_modules:
-                Iterable of module name prefixes inside `base_ae` whose parameters
-                will be generated by the hypernetwork (e.g. "encoder", "decoder").
-            hidden_dim:
-                Hidden dimension for the MLP hypernetwork.
-        """
-        super().__init__()
-        self.base_ae = base_ae
-        self.target_modules = tuple(target_modules)
-        self.hidden_dim = hidden_dim
-        # (param_name, original_shape, flat_start, flat_end)
-        self._param_info: List[Tuple[str, torch.Size, int, int]] = []
-        # Parameters that remain shared (usual trainable params).
-        self._shared_param_dict: Dict[str, torch.Tensor] = {}
-        # Total number of scalar parameters generated by the hypernetwork.
-        self.total_generated_params: int = 0
-        # Hypernetwork (built lazily in build()).
-        self.hypernet: Optional[nn.Module] = None
-        # Output type information inferred from a sample call in build().
-        self._output_type: Optional[type] = None
-        self._output_field_names: Optional[List[str]] = None
-        # Cached ModelOutput field metadata (set in build()).
-        self._output_fields: Optional[Tuple[Field, ...]] = None
-        # Cached vmap metadata / function (set in build()).
-        self._in_dims_params: Optional[Dict[str, Optional[int]]] = None
-        self._vmapped_call: Optional[Callable[..., Any]] = None
-    @torch.no_grad()
-    def build(self, input_sample: torch.Tensor) -> None:
-        """
-        Build the hypernetwork and prepare parameter bookkeeping.
-        This method:
-          - builds the underlying `base_ae`
-          - infers the output `ModelOutput` type and field names
-          - splits `base_ae` parameters into generated vs shared sets
-          - constructs the MLP hypernetwork that outputs all generated parameters
-        Must be called once before using `forward` (enforced by `BuildGuardMixin`).
-        """
-        # Ensure the base autoencoder is built (and warmed up via its own build).
-        self.base_ae.build(input_sample)
-        # Inspect a sample output to record the output structure.
-        sample_out = self.base_ae(input_sample)
-        self._output_type = type(sample_out)
-        self._output_fields = fields(sample_out)
-        self._output_field_names = [f.name for f in self._output_fields]
-        # Reset metadata containers.
-        self._param_info.clear()
-        # Walk over all base_ae parameters and mark them as generated or shared.
-        flat_offset = 0
-        shared_param_names = []
-        for name, p in self.base_ae.named_parameters():
-            if any(name.startswith(m + ".") for m in self.target_modules):
-                # Parameters inside target modules are generated by the hypernet.
-                numel = p.numel()
-                self._param_info.append(
-                    (name, p.shape, flat_offset, flat_offset + numel)
-                )
-                flat_offset += numel
-                # Generated parameters are "owned" by the hypernet, so we freeze them.
-                p.requires_grad_(False)
-            else:
-                # Remaining parameters are shared and stay trainable as usual.
-                shared_param_names.append(name)
-                p.requires_grad_(True)
-        self.total_generated_params = flat_offset
-        if self.total_generated_params == 0:
-            raise ValueError(
-                f"Zero parameters defined by the hypernetwork. "
-                f"Check target_modules: {self.target_modules}."
-            )
-        # Cache a dict of the shared parameters for quick access in forward().
-        self._shared_param_dict = {
-            name: p
-            for name, p in self.base_ae.named_parameters()
-            if name in shared_param_names
-        }
-        # Hypernetwork input dimension is the flattened per-sample input.
-        in_dims = input_sample[0].numel()
-        self.hypernet = nn.Sequential(
-            nn.Flatten(1),
-            nn.Linear(in_dims, self.hidden_dim),
-            nn.ReLU(),
-            nn.Linear(self.hidden_dim, self.hidden_dim),
-            nn.ReLU(),
-            nn.Linear(self.hidden_dim, self.total_generated_params),
-        )
-        # Precompute in_dims mapping for vmap: generated params vary on dim 0, shared are None.
-        # Use the same key ordering as `params` in forward: shared first, then generated
-        generated_params_names = [name for (name, _, _, _) in self._param_info]
-        all_param_names = list(self._shared_param_dict.keys()) + generated_params_names
-        self._in_dims_params = {
-            name: (0 if name in generated_params_names else None)
-            for name in all_param_names
-        }
-        # Prebuild the vmapped single-sample call.
-        self._vmapped_call = torch.func.vmap(
-            self._call_single,
-            in_dims=(self._in_dims_params, 0),
-            out_dims=0,
-            randomness="different",
-        )
-        self._built = True
-    def _generated_params(self, x: torch.Tensor) -> Dict[str, torch.Tensor]:
-        """
-        Run the hypernetwork and reshape its output into per-parameter tensors.
-        Args:
-            x:
-                Input batch of shape (B, ...) used to condition the hypernetwork.
-        Returns:
-            A dictionary mapping parameter names (inside `target_modules`) to
-            tensors of shape (B, *original_shape), i.e. one generated parameter
-            tensor per sample in the batch.
-        """
-        B = x.size(0)
-        flat = self.hypernet(x) # type: ignore -- (B, total_generated_params)
-        gen: Dict[str, torch.Tensor] = {}
-        for name, shape, start, end in self._param_info:
-            slice_ = flat[:, start:end]
-            gen[name] = slice_.view(B, *shape)
-        return gen
-    def _call_single(self, params_i: Dict[str, torch.Tensor], x_i: torch.Tensor, **kwargs: Any):
-        """
-        Apply the base autoencoder to a single sample with a single param set.
-        Args:
-            params_i:
-                Parameter dict for this particular sample.
-            x_i:
-                Input tensor of shape (...,) for this sample.
-        Returns:
-            A tuple of output tensors corresponding to the `ModelOutput` fields,
-            with the leading batch dimension (size 1) removed.
-        """
-        # print("encoder.1.weight in _call_single:", params_i["encoder.1.weight"].shape)
-        # print("encoder.1.bias in _call_single:", params_i["encoder.1.bias"].shape)
-        # print("decoder.0.weight in _call_single:", params_i["decoder.0.weight"].shape)
-        # print("decoder.0.bias in _call_single:", params_i["decoder.0.bias"].shape)
-        out = torch.func.functional_call(
-            self.base_ae,
-            params_i,
-            (x_i.unsqueeze(0),),
-            kwargs=kwargs,
-        )
-        # Construct a new output of the same type, but with squeezed tensors.
-        squeezed_kwargs: Dict[str, Any] = {}
-        for f in self._output_fields: # type: ignore
-            v = getattr(out, f.name)
-            if torch.is_tensor(v):
-                squeezed_kwargs[f.name] = v.squeeze(0)
-            else:
-                squeezed_kwargs[f.name] = v
-        out_squeezed = self._output_type(**squeezed_kwargs) # type: ignore
-        # Convert the structured output into a tuple of tensors (for vmap).
-        tensors_tuple = tuple(
-            getattr(out_squeezed, name) for name in self._output_field_names # type: ignore
-        )
-        return tensors_tuple
-    def forward(self, x: torch.Tensor, **kwargs: Any):
-        """
-        Forward pass with per-sample generated parameters.
-        For each sample in the batch:
-          - generate a distinct set of parameters for `target_modules`
-          - combine them with the shared parameters
-          - call the underlying `base_ae` via `torch.func.functional_call`
-        The underlying `base_ae` is evaluated in a batched, vectorized way using
-        `torch.func.vmap`.
-        Args:
-            x:
-                Input batch of shape (B, ...).
-            **kwargs:
-                Additional keyword arguments forwarded to `base_ae.forward`.
-        Returns:
-            A `ModelOutput` instance of the same type/structure as produced by
-            `base_ae`, but with each field batched over the leading dimension.
-        """
-        # Shared parameters are the same for all samples.
-        shared = self._shared_param_dict
-        # Generated parameters are per-sample (B, *shape)
-        generated = self._generated_params(x)
-        # Combined view of all parameters (names mapped to tensors).
-        params: Dict[str, torch.Tensor] = {}
-        params.update(shared)
-        params.update(generated)
-        # Vectorized application over batch of (params, x) using prebuilt vmap.
-        batched_tensors_tuple = self._vmapped_call(params, x, **kwargs) # type: ignore
-        # Reconstruct a batched `ModelOutput` of the same type as the base AE.
-        batched_kwargs = {
-            name: tensor
-            for name, tensor in zip(self._output_field_names, batched_tensors_tuple)  # type: ignore
-        }
-        batched_out = self._output_type(**batched_kwargs)  # type: ignore
-        return batched_out