PyPI - heavyedge-dataset - Versions diffs - 0.2.0__py3-none-any.whl → 1.0.0.post0__py3-none-any.whl - Mend

heavyedge-dataset 0.2.0py3-none-any.whl → 1.0.0.post0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

heavyedge_dataset/__init__.py CHANGED Viewed

@@ -1,97 +1,31 @@
-"""Custom dataset classes for edge profiles.
+"""Package to load edge profile data using PyTorch dataset.
-Refer to `PyTorch tutorial <tutorial>`_ for information about custom PyTorch dataset.
+Refer to `PyTorch tutorial <tutorial>`_ for information about custom dataset.
 .. _tutorial: https://docs.pytorch.org/tutorials/beginner/data_loading_tutorial.html
 """
-import abc
 import numbers
 from collections.abc import Sequence
 import numpy as np
-from heavyedge.api import landmarks_type3
 from torch.utils.data import Dataset
 __all__ = [
     "ProfileDataset",
-    "PseudoLandmarkDataset",
-    "MathematicalLandmarkDataset",
 ]
-class ProfileDatasetBase(abc.ABC):
-    """Abstract base class for profile dataset."""
+class ProfileDataset(Dataset):
+    """Edge profile dataset.
-    @property
-    @abc.abstractmethod
-    def file(self):
-        """Profile data file.
+    Loads data as a tuple of two numpy arrays:
-        Returns
-        -------
-        heavyedge.ProfileData
-        """
+    1. Profile data, shape: (N, m, L).
+    2. Length of each profile, shape: (N,).
-    @property
-    @abc.abstractmethod
-    def transform(self):
-        """Optional transformation to be applied on samples.
-        Returns
-        -------
-        Callable
-        """
-    def __len__(self):
-        return len(self.file)
-    def __getitem__(self, idx):
-        if isinstance(idx, numbers.Integral):
-            Y, L, _ = self.file[idx]
-            Ys, Ls = [Y], [L]
-        else:
-            # Support multi-indexing
-            idxs = idx
-            needs_sort = isinstance(idx, (Sequence, np.ndarray))
-            if needs_sort:
-                # idxs must be sorted for h5py
-                idxs = np.array(idxs)
-                sort_idx = np.argsort(idxs)
-                idxs = idxs[sort_idx]
-            Ys, Ls, _ = self.file[idxs]
-            if needs_sort:
-                reverse_idx = np.argsort(sort_idx)
-                Ys = Ys[reverse_idx]
-                Ls = Ls[reverse_idx]
-        ret = self.default_transform(Ys, Ls)
-        if self.transform:
-            ret = self.transform(ret)
-        return ret
-    def __getitems__(self, idxs):
-        # PyTorch API
-        return self.__getitem__(idxs)
-    @abc.abstractmethod
-    def default_transform(self, profiles, lengths):
-        """Default data transformation.
-        Subclass must implement this method to transform profile data into target data.
-        Parameters
-        ----------
-        profiles : (N, M) array
-            Profile data.
-        lengths : (N,) array
-            Length of each profile in *profiles*.
-        """
-        pass
-class ProfileDataset(ProfileDatasetBase, Dataset):
-    """Full profile dataset in 1-D or 2-D.
+    N is the number of loaded data, m is dimension of coordinates, and
+    L is the maximum length of profiles.
     Parameters
     ----------
@@ -108,165 +42,78 @@ class ProfileDataset(ProfileDatasetBase, Dataset):
     >>> from heavyedge import get_sample_path, ProfileData
     >>> from heavyedge_dataset import ProfileDataset
     >>> with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
-    ...     data = ProfileDataset(file, 2)[:]
-    >>> import matplotlib.pyplot as plt  # doctest: +SKIP
-    ... for coords in data:
-    ...     plt.plot(*coords, color="gray")
-    """
-    def __init__(self, file, m, transform=None):
-        self._file = file
-        self.m = m
-        self._transform = transform
-        self.x = file.x()
-    @property
-    def file(self):
-        return self._file
-    @property
-    def transform(self):
-        return self._transform
-    def default_transform(self, profiles, lengths):
-        """Crop profiles by their contact points.
+    ...     profiles, lengths = ProfileDataset(file, m=2)[:]
+    >>> profiles.shape
+    (22, 2, 3200)
+    >>> lengths.shape
+    (22,)
-        Parameters
-        ----------
-        profiles : (N, M) array
-            Profile data.
-        lengths : (N,) array
-            Length of each profile in *profiles*.
-        """
-        if self.m == 1:
-            ret = [Y[:L].reshape(1, -1) for Y, L in zip(profiles, lengths)]
-        elif self.m == 2:
-            ret = [np.stack([self.x[:L], Y[:L]]) for Y, L in zip(profiles, lengths)]
-        else:
-            raise ValueError(f"Invalid dimension: {self.m}")
-        return ret
-class PseudoLandmarkDataset(ProfileDatasetBase, Dataset):
-    """Pseudo-landmark dataset in 1-D or 2-D.
-    Pseudo-landmarks are points that are equidistantly sampled.
-    Parameters
-    ----------
-    file : heavyedge.ProfileData
-        Open hdf5 file.
-    k : int
-        Number of landmarks to sample.
-    m : {1, 2}
-        Profile data dimension.
-        1 means only y coordinates, and 2 means both x and y coordinates.
-    transform : callable, optional
-        Optional transformation to be applied on samples.
+    Should this dataset be used for :class:`torch.utils.data.DataLoader`,
+    ``collate_fn`` argument should be passed to the data loader.
-    Examples
-    --------
-    >>> from heavyedge import get_sample_path, ProfileData
-    >>> from heavyedge_dataset import PseudoLandmarkDataset
+    >>> from torch.utils.data import DataLoader
     >>> with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
-    ...     data = PseudoLandmarkDataset(file, 10, 2)[:]
-    >>> import matplotlib.pyplot as plt  # doctest: +SKIP
-    ... plt.plot(*data.transpose(1, 2, 0), color="gray")
+    ...     dataset = ProfileDataset(file, m=2)
+    ...     loader = DataLoader(dataset, collate_fn=lambda x: x)
+    ...     profiles, lengths = next(iter(loader))
+    >>> profiles.shape
+    (1, 2, 3200)
+    >>> lengths.shape
+    (1,)
+    If data should be loaded as :class:`torch.Tensor`, pass ``transform`` argument.
+    >>> import torch
+    >>> def to_tensor(sample):
+    ...     return (torch.from_numpy(sample[0]), torch.from_numpy(sample[1]))
+    >>> with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
+    ...     dataset = ProfileDataset(file, m=2, transform=to_tensor)
+    ...     loader = DataLoader(dataset, collate_fn=lambda x: x)
+    ...     profiles, lengths = next(iter(loader))
+    >>> type(profiles)
+    <class 'torch.Tensor'>
     """
-    def __init__(self, file, k, m, transform=None):
-        self._file = file
-        self.k = k
+    def __init__(self, file, m=1, transform=None):
+        self.file = file
         self.m = m
-        self._transform = transform
+        self.transform = transform
         self.x = file.x()
-    @property
-    def file(self):
-        return self._file
-    @property
-    def transform(self):
-        return self._transform
-    def default_transform(self, profiles, lengths):
-        """Sample pseudo-landmarks from profiles.
+    def __len__(self):
+        return len(self.file)
-        Parameters
-        ----------
-        profiles : (N, M) array
-            Profile data.
-        lengths : (N,) array
-            Length of each profile in *profiles*.
-        """
-        ret = []
+    def __getitem__(self, idx):
+        if isinstance(idx, numbers.Integral):
+            Y, L, _ = self.file[idx]
+            Y = Y[np.newaxis, :]
+        else:
+            # Support multi-indexing
+            idxs = idx
+            needs_sort = isinstance(idx, (Sequence, np.ndarray))
+            if needs_sort:
+                # idxs must be sorted for h5py
+                idxs = np.array(idxs)
+                sort_idx = np.argsort(idxs)
+                idxs = idxs[sort_idx]
+            Y, L, _ = self.file[idxs]
+            if needs_sort:
+                reverse_idx = np.argsort(sort_idx)
+                Y = Y[reverse_idx]
+                L = L[reverse_idx]
+            Y = Y[:, np.newaxis, :]
         if self.m == 1:
-            for Y, L in zip(profiles, lengths):
-                idxs = np.linspace(0, L - 1, self.k, dtype=int)
-                ret.append(Y[idxs].reshape(1, -1))
+            pass
         elif self.m == 2:
-            for Y, L in zip(profiles, lengths):
-                idxs = np.linspace(0, L - 1, self.k, dtype=int)
-                ret.append(np.stack([self.x[idxs], Y[idxs]]))
+            x = np.tile(self.x, Y.shape[:-1] + (1,))
+            Y = np.concatenate([x, Y], axis=-2)
         else:
-            raise ValueError(f"Invalid dimension: {self.m}")
-        return np.array(ret)
-class MathematicalLandmarkDataset(ProfileDatasetBase, Dataset):
-    """Mathematical landmark dataset in 1-D.
-    Mathematical landmarks are points which are choosed by their
-    mathematical properties, i.e., slope or curvature.
-    Parameters
-    ----------
-    file : heavyedge.ProfileData
-        Open hdf5 file.
-    sigma : scalar
-        Standard deviation of Gaussian kernel for landmark detection.
-    transform : callable, optional
-        Optional transformation to be applied on samples.
-    Examples
-    --------
-    >>> from heavyedge import get_sample_path, ProfileData
-    >>> from heavyedge_dataset import MathematicalLandmarkDataset
-    >>> with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
-    ...     data = MathematicalLandmarkDataset(file, 32)[:]
-    >>> import matplotlib.pyplot as plt  # doctest: +SKIP
-    ... plt.plot(*data.transpose(1, 2, 0), color="gray")
-    """
-    def __init__(self, file, sigma, transform=None):
-        self._file = file
-        self.sigma = sigma
-        self._transform = transform
-    @property
-    def file(self):
-        return self._file
-    @property
-    def transform(self):
-        return self._transform
-    def default_transform(self, profiles, lengths):
-        """Detect mathematical landmarks from profiles.
+            raise ValueError(f"Unsupported dimension: {self.m} (Must be 1 or 2).")
+        ret = (Y, L)
+        if self.transform is not None:
+            ret = self.transform(ret)
+        return ret
-        Parameters
-        ----------
-        profiles : (N, M) array
-            Profile data.
-        lengths : (N,) array
-            Length of each profile in *profiles*.
-        """
-        ret = []
-        for Y, L in zip(profiles, lengths):
-            Y = Y[:L]
-            indices = np.flip(landmarks_type3(Y, self.sigma))
-            y = np.concat([[np.mean(Y[: indices[0]])], Y[indices]])
-            ret.append(y.reshape(1, -1))
-        return np.array(ret)
+    def __getitems__(self, idxs):
+        # PyTorch API
+        return self.__getitem__(idxs)

{heavyedge_dataset-0.2.0.dist-info → heavyedge_dataset-1.0.0.post0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: heavyedge-dataset
-Version: 0.2.0
+Version: 1.0.0.post0
 Summary: PyTorch-compatible edge profile dataset API
 Author-email: Jisoo Song <jeesoo9595@snu.ac.kr>
 License-Expression: MIT
@@ -28,7 +28,6 @@ Provides-Extra: doc
 Requires-Dist: sphinx; extra == "doc"
 Requires-Dist: numpydoc; extra == "doc"
 Requires-Dist: pydata_sphinx_theme; extra == "doc"
-Requires-Dist: matplotlib; extra == "doc"
 Provides-Extra: dev
 Requires-Dist: flake8; extra == "dev"
 Requires-Dist: black; extra == "dev"
@@ -49,14 +48,14 @@ Package to load edge profile data as PyTorch dataset.
 ## Usage
-HeavyEdge-Dataset provides custom dataset classes which wraps profile data file.
+HeavyEdge-Dataset provides `ProfileDataset` which wraps profile data file.
-A simple use case to load a list of profiles as two-dimensional coordinates:
+A simple use case to load two-dimensional coordinates of profiles and their lengths:
 ```python
 from heavyedge import get_sample_path, ProfileData
 from heavyedge_dataset import ProfileDataset
-with ProfileData(get_sample_path("Prep-Type2.h5")) as file:  # Profile data file object
+with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
     data = ProfileDataset(file, 2)[:]
 ```

heavyedge_dataset-1.0.0.post0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,6 @@
+heavyedge_dataset/__init__.py,sha256=CmquaxuwXJYTCGInmzI8Tp0Z9hJBVtEK9eH3NLyoREY,3737
+heavyedge_dataset-1.0.0.post0.dist-info/licenses/LICENSE,sha256=pBq2E7YJkUcEINdYeERL4RVFOQICd_MwJq6OusuAPGc,1066
+heavyedge_dataset-1.0.0.post0.dist-info/METADATA,sha256=34mMpniE5Nn4E6mwdVKhT4m6mnEUKRoCce9Tv6aGNHI,3434
+heavyedge_dataset-1.0.0.post0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+heavyedge_dataset-1.0.0.post0.dist-info/top_level.txt,sha256=wpRFI8TlkYFGetc17appkyybauBvzhGGvyueunsdJTc,18
+heavyedge_dataset-1.0.0.post0.dist-info/RECORD,,

heavyedge_dataset-0.2.0.dist-info/RECORD DELETED Viewed

@@ -1,6 +0,0 @@
-heavyedge_dataset/__init__.py,sha256=iBFvupZMaHpTGUxZOJvVKfD4bAOAdikK60a8bd4oxB0,7746
-heavyedge_dataset-0.2.0.dist-info/licenses/LICENSE,sha256=pBq2E7YJkUcEINdYeERL4RVFOQICd_MwJq6OusuAPGc,1066
-heavyedge_dataset-0.2.0.dist-info/METADATA,sha256=vZF2VwCYUmf7OzXfCO-pg9cHEeE7kCYIfl1JGmfTad0,3496
-heavyedge_dataset-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-heavyedge_dataset-0.2.0.dist-info/top_level.txt,sha256=wpRFI8TlkYFGetc17appkyybauBvzhGGvyueunsdJTc,18
-heavyedge_dataset-0.2.0.dist-info/RECORD,,

{heavyedge_dataset-0.2.0.dist-info → heavyedge_dataset-1.0.0.post0.dist-info}/WHEEL RENAMED Viewed

File without changes

{heavyedge_dataset-0.2.0.dist-info → heavyedge_dataset-1.0.0.post0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{heavyedge_dataset-0.2.0.dist-info → heavyedge_dataset-1.0.0.post0.dist-info}/top_level.txt RENAMED Viewed

File without changes

heavyedge-dataset 0.2.0__py3-none-any.whl → 1.0.0.post0__py3-none-any.whl

heavyedge-dataset 0.2.0py3-none-any.whl → 1.0.0.post0py3-none-any.whl