PyPI - dct-autoencoder - Versions diffs - 0.2.0__tar.gz → 0.3.0__tar.gz - Mend

dct-autoencoder 0.2.0tar.gz → 0.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

{dct_autoencoder-0.2.0 → dct_autoencoder-0.3.0}/PKG-INFO RENAMED Viewed

@@ -1,17 +1,17 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.3
 Name: dct-autoencoder
-Version: 0.2.0
-Summary:
+Version: 0.3.0
+Summary: Discrete Cosine Transform in PyTorch
 Author: Dariush Bahrami
-Author-email: dariushbahrami1993@gmail.com
-Requires-Python: >=3.10,<4.0
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
-Requires-Dist: matplotlib (>=3.9.2,<4.0.0)
-Requires-Dist: numpy (>=2.1.1,<3.0.0)
-Requires-Dist: torch (>=2.4.1,<3.0.0)
+Author-email: Dariush Bahrami <dariushbahrami1993@gmail.com>
+License: MIT
+Requires-Dist: numpy>=2.4.6
+Requires-Dist: torch>=2.0.0 ; extra == 'torch'
+Requires-Dist: torchvision>=0.15.0 ; extra == 'torch'
+Requires-Python: >=3.12
+Project-URL: Homepage, https://github.com/dariush-bahrami/dct-autoencoder
+Project-URL: Repository, https://github.com/dariush-bahrami/dct-autoencoder
+Provides-Extra: torch
 Description-Content-Type: text/markdown
 # DCT-Autoencoder
@@ -57,4 +57,3 @@ DCT basis functions for a block size of 16:
 - [x] Improve documentation
 - [ ] Add unit tests
 - [x] Distribute package on PyPI

dct_autoencoder-0.3.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,36 @@
+[project]
+name = "dct-autoencoder"
+version = "0.3.0"
+description = "Discrete Cosine Transform in PyTorch"
+readme = "README.md"
+license = { text = "MIT" }
+authors = [
+    { name = "Dariush Bahrami", email = "dariushbahrami1993@gmail.com" }
+]
+requires-python = ">=3.12"
+dependencies = [
+    "numpy>=2.4.6",
+]
+[project.optional-dependencies]
+torch = [
+    "torch>=2.0.0",
+    "torchvision>=0.15.0",
+]
+[project.urls]
+Homepage = "https://github.com/dariush-bahrami/dct-autoencoder"
+Repository = "https://github.com/dariush-bahrami/dct-autoencoder"
+[build-system]
+requires = ["uv_build>=0.11.16,<0.12.0"]
+build-backend = "uv_build"
+[dependency-groups]
+dev = [
+    "dct-autoencoder[torch]",
+    "ipykernel>=7.2.0",
+    "ipywidgets>=8.1.8",
+    "matplotlib>=3.10.9",
+    "pillow>=12.2.0",
+]

{dct_autoencoder-0.2.0 → dct_autoencoder-0.3.0/src}/dct_autoencoder/core.py RENAMED Viewed

@@ -1,5 +1,3 @@
-import math
 import numpy as np
 import torch
 from torch import nn
@@ -14,15 +12,38 @@ class DCTAutoencoder(nn.Module):
     Args:
         block_size (int, optional): The block size. Defaults to 8.
+        num_luminance_compressed_channels (int | None, optional): Number of lowest-frequency
+            luminance channels to retain after compression. ``None`` keeps all channels
+            (no compression). Defaults to ``None``.
+        num_chrominance_compressed_channels (int | None, optional): Number of lowest-frequency
+            chrominance channels to retain after compression. ``None`` keeps all channels
+            (no compression). Defaults to ``None``.
     """
     def __init__(
         self,
         block_size: int = 8,
-        luminance_compression_ratio: float = 1 / 2,
-        chrominance_compression_ratio: float = 1 / 4,
+        num_luminance_compressed_channels: int | None = None,
+        num_chrominance_compressed_channels: int | None = None,
     ) -> None:
         super().__init__()
+        total_channels = block_size**2
+        if num_luminance_compressed_channels is not None and not (
+            1 <= num_luminance_compressed_channels <= total_channels
+        ):
+            raise ValueError(
+                f"num_luminance_compressed_channels must be between 1 and {total_channels}, "
+                f"got {num_luminance_compressed_channels}"
+            )
+        if num_chrominance_compressed_channels is not None and not (
+            1 <= num_chrominance_compressed_channels <= total_channels
+        ):
+            raise ValueError(
+                f"num_chrominance_compressed_channels must be between 1 and {total_channels}, "
+                f"got {num_chrominance_compressed_channels}"
+            )
         dct_basis = get_dct_basis(block_size)
         basis_functions = dct_basis.basis_functions
         kernels = basis_functions.reshape(-1, block_size, block_size)
@@ -32,7 +53,7 @@ class DCTAutoencoder(nn.Module):
         spatial_frequencies_components = (
             dct_basis.spatial_frequencies_components.reshape(-1, 2)
         )
-        sort_indices = np.argsort(spatial_frequencies_magnitude)
+        sort_indices = np.argsort(spatial_frequencies_magnitude, kind="stable")
         kernels = kernels[sort_indices]
         spatial_frequencies_magnitude = spatial_frequencies_magnitude[sort_indices]
         spatial_frequencies_components = spatial_frequencies_components[sort_indices]
@@ -62,64 +83,32 @@ class DCTAutoencoder(nn.Module):
             torch.from_numpy(multiplication_factor_matrix),
         )
-        self.embedding_dimension = (block_size**2) * 3
+        self.embedding_dimension = total_channels * 3
-        # compressor initialization
-        if luminance_compression_ratio == 1 and chrominance_compression_ratio == 1:
-            self.do_compression = False
-            self.compression_luminance_mask = torch.ones(
-                block_size**2,
-                dtype=bool,
-                device=self.spatial_frequencies_components.device,
-            )
-            self.compression_chrominance_mask = torch.ones(
-                block_size**2,
-                dtype=bool,
-                device=self.spatial_frequencies_components.device,
-            )
-            self.compression_luminance_passband = block_size**2
-            self.compression_chrominance_passband = block_size**2
-        else:
-            original_frequencies = self.spatial_frequencies_components.to(
-                dtype=torch.float32
-            )
-            luminance_block_size = math.ceil(block_size * luminance_compression_ratio)
-            chrominance_block_size = math.ceil(
-                block_size * chrominance_compression_ratio
-            )
-            luminance_frequencies = get_dct_basis(
-                luminance_block_size
-            ).spatial_frequencies_components.reshape(-1, 2)
-            luminance_frequencies = torch.from_numpy(luminance_frequencies).to(
-                device=original_frequencies.device, dtype=torch.float32
-            )
-            chrominance_frequencies = get_dct_basis(
-                chrominance_block_size
-            ).spatial_frequencies_components.reshape(-1, 2)
-            chrominance_frequencies = torch.from_numpy(chrominance_frequencies).to(
-                device=original_frequencies.device, dtype=torch.float32
-            )
-            indices = torch.arange(block_size**2, device=original_frequencies.device)
-            luminance_mask = torch.isin(
-                indices,
-                torch.cdist(original_frequencies, luminance_frequencies, p=2).argmin(
-                    dim=0
-                ),
-            )
-            chrominance_mask = torch.isin(
-                indices,
-                torch.cdist(original_frequencies, chrominance_frequencies, p=2).argmin(
-                    dim=0
-                ),
-            )
-            luminance_passband = luminance_mask.sum()
-            chrominance_passband = chrominance_mask.sum()
+        # compressor initialization — kernels are already sorted by ascending frequency,
+        # so keeping the first N channels retains the N lowest frequencies exactly.
+        lum_n = num_luminance_compressed_channels
+        chr_n = num_chrominance_compressed_channels
+        self.do_compression = (lum_n is not None and lum_n < total_channels) or (
+            chr_n is not None and chr_n < total_channels
+        )
+        lum_passband = lum_n if lum_n is not None else total_channels
+        chr_passband = chr_n if chr_n is not None else total_channels
+        lum_mask = torch.zeros(total_channels, dtype=torch.bool)
+        lum_mask[:lum_passband] = True
+        chr_mask = torch.zeros(total_channels, dtype=torch.bool)
+        chr_mask[:chr_passband] = True
-            self.do_compression = True
-            self.compression_luminance_mask = luminance_mask
-            self.compression_chrominance_mask = chrominance_mask
-            self.compression_luminance_passband = luminance_passband
-            self.compression_chrominance_passband = chrominance_passband
+        self.register_buffer("compression_luminance_mask", lum_mask)
+        self.register_buffer("compression_chrominance_mask", chr_mask)
+        self.register_buffer(
+            "compression_luminance_passband", torch.tensor(lum_passband)
+        )
+        self.register_buffer(
+            "compression_chrominance_passband", torch.tensor(chr_passband)
+        )
     def encode(self, rgb_images_batch: torch.Tensor) -> torch.Tensor:
         """Encodes the input RGB images.
@@ -156,12 +145,14 @@ class DCTAutoencoder(nn.Module):
         encodings_batch = encodings_batch / self.block_size
         return encodings_batch
-    def decode(self, encodings_batch: torch.Tensor) -> torch.Tensor:
+    def decode(
+        self, encodings_batch: torch.Tensor, clamp_output: bool = True
+    ) -> torch.Tensor:
         """Decodes the input encoded images.
         Args:
             encodings_batch (torch.Tensor): The input encoded images.
+            clamp_output (bool, optional): Whether to clamp the output to the range [0, 1]. Defaults to True.
         Returns:
             torch.Tensor: The decoded images.
         """
@@ -187,6 +178,9 @@ class DCTAutoencoder(nn.Module):
         ycbcr_tsr = torch.cat([y, cb, cr], dim=1)
         ycbcr_tsr = ycbcr_tsr / 2 + 0.5
         rgb_images_batch = ycbcr_to_rgb(ycbcr_tsr)
+        if clamp_output:
+            # clamp is expected for display; note it makes decode non-linear for out-of-range values
+            rgb_images_batch = rgb_images_batch.clamp(0, 1)
         return rgb_images_batch
     def get_num_compressed_channels(self) -> int:
@@ -198,7 +192,7 @@ class DCTAutoencoder(nn.Module):
                 + 2 * self.compression_chrominance_passband.item()
             )
-    def compress(self, encodings):
+    def compress(self, encodings: torch.Tensor) -> torch.Tensor:
         if not self.do_compression:
             return encodings
         else:
@@ -211,7 +205,7 @@ class DCTAutoencoder(nn.Module):
             compressed_encoding = torch.cat([l, c1, c2], dim=1)
             return compressed_encoding
-    def decompress(self, compressed_encoding):
+    def decompress(self, compressed_encoding: torch.Tensor) -> torch.Tensor:
         if not self.do_compression:
             return compressed_encoding
         else:

{dct_autoencoder-0.2.0 → dct_autoencoder-0.3.0/src}/dct_autoencoder/utils.py RENAMED Viewed

@@ -19,10 +19,11 @@ def ycbcr_to_rgb(image: torch.Tensor) -> torch.Tensor:
     cb_shifted = cb - delta
     cr_shifted = cr - delta
-    r = y + 1.403 * cr_shifted
-    g = y - 0.714 * cr_shifted - 0.344 * cb_shifted
-    b = y + 1.773 * cb_shifted
-    return torch.stack([r, g, b], -3).clamp(0, 1)
+    # Exact inverse of the forward matrix: 1/0.713, 0.299/(0.713*0.587), 0.114/(0.564*0.587), 1/0.564
+    r = y + 1.40252 * cr_shifted
+    g = y - 0.71440 * cr_shifted - 0.34434 * cb_shifted
+    b = y + 1.77305 * cb_shifted
+    return torch.stack([r, g, b], -3)
 def rgb_to_ycbcr(image) -> torch.Tensor:

dct_autoencoder-0.2.0/LICENSE DELETED Viewed

@@ -1,21 +0,0 @@
-MIT License
-Copyright (c) 2024 dariush-bahrami
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.

dct_autoencoder-0.2.0/dct_autoencoder/visualization.py DELETED Viewed

@@ -1,52 +0,0 @@
-import matplotlib.pyplot as plt
-import numpy as np
-from .basis import DCTBasis
-def visualize_dct_basis_functions(
-    dct_constants: DCTBasis,
-    figsize: int = 8,
-    fig_facecolor: str = "#fb6a2c",
-    title_color: str = "k",
-    title_fontsize: int = 20,
-    cmap: str = "gray",
-) -> tuple:
-    """Visualize the DCT basis functions.
-    Args:
-        dct_constants (DCTBasis): The DCT basis constants.
-        figsize (int, optional): The figure size. Defaults to 8.
-        fig_facecolor (str, optional): The figure facecolor. Defaults to "#fb6a2c".
-        title_color (str, optional): The title color. Defaults to "k".
-        title_fontsize (int, optional): The title fontsize. Defaults to 20.
-        cmap (str, optional): The colormap. Defaults to "gray".
-    Returns:
-        tuple: The figure and axis.
-    """
-    block_size = dct_constants.block_size
-    basis_functions = dct_constants.basis_functions
-    basis_functions_image = np.zeros((block_size * block_size, block_size * block_size))
-    for v in range(block_size):
-        for u in range(block_size):
-            basis_functions_image[
-                v * block_size : (v + 1) * block_size,
-                u * block_size : (u + 1) * block_size,
-            ] = basis_functions[v, u]
-    plt.figure(figsize=(figsize, figsize), facecolor=fig_facecolor)
-    plt.title(
-        f"DCT Basis functions (block size: {block_size}x{block_size})",
-        color=title_color,
-        fontsize=title_fontsize,
-        fontweight="bold",
-    )
-    plt.imshow(basis_functions_image, cmap=cmap)
-    plt.axis("off")
-    for i in range(block_size):
-        plt.axhline(i * block_size - 0.5, color=fig_facecolor)
-        plt.axvline(i * block_size - 0.5, color=fig_facecolor)
-    plt.tight_layout()
-    fig = plt.gcf()
-    ax = plt.gca()
-    return fig, ax

dct_autoencoder-0.2.0/pyproject.toml DELETED Viewed

@@ -1,16 +0,0 @@
-[tool.poetry]
-name = "dct-autoencoder"
-version = "0.2.0"
-description = ""
-authors = ["Dariush Bahrami <dariushbahrami1993@gmail.com>"]
-readme = "README.md"
-[tool.poetry.dependencies]
-python = "^3.10"
-numpy = "^2.1.1"
-matplotlib = {version = "^3.9.2", optional = true}
-torch = "^2.4.1"
-[build-system]
-requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"

{dct_autoencoder-0.2.0 → dct_autoencoder-0.3.0}/README.md RENAMED Viewed

File without changes

{dct_autoencoder-0.2.0 → dct_autoencoder-0.3.0/src}/dct_autoencoder/__init__.py RENAMED Viewed

File without changes

{dct_autoencoder-0.2.0 → dct_autoencoder-0.3.0/src}/dct_autoencoder/basis.py RENAMED Viewed

File without changes

dct-autoencoder 0.2.0__tar.gz → 0.3.0__tar.gz

dct-autoencoder 0.2.0tar.gz → 0.3.0tar.gz