heavyedge-dataset 0.2.0__tar.gz → 1.0.0.post0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: heavyedge-dataset
3
- Version: 0.2.0
3
+ Version: 1.0.0.post0
4
4
  Summary: PyTorch-compatible edge profile dataset API
5
5
  Author-email: Jisoo Song <jeesoo9595@snu.ac.kr>
6
6
  License-Expression: MIT
@@ -28,7 +28,6 @@ Provides-Extra: doc
28
28
  Requires-Dist: sphinx; extra == "doc"
29
29
  Requires-Dist: numpydoc; extra == "doc"
30
30
  Requires-Dist: pydata_sphinx_theme; extra == "doc"
31
- Requires-Dist: matplotlib; extra == "doc"
32
31
  Provides-Extra: dev
33
32
  Requires-Dist: flake8; extra == "dev"
34
33
  Requires-Dist: black; extra == "dev"
@@ -49,14 +48,14 @@ Package to load edge profile data as PyTorch dataset.
49
48
 
50
49
  ## Usage
51
50
 
52
- HeavyEdge-Dataset provides custom dataset classes which wraps profile data file.
51
+ HeavyEdge-Dataset provides `ProfileDataset` which wraps profile data file.
53
52
 
54
- A simple use case to load a list of profiles as two-dimensional coordinates:
53
+ A simple use case to load two-dimensional coordinates of profiles and their lengths:
55
54
 
56
55
  ```python
57
56
  from heavyedge import get_sample_path, ProfileData
58
57
  from heavyedge_dataset import ProfileDataset
59
- with ProfileData(get_sample_path("Prep-Type2.h5")) as file: # Profile data file object
58
+ with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
60
59
  data = ProfileDataset(file, 2)[:]
61
60
  ```
62
61
 
@@ -11,14 +11,14 @@ Package to load edge profile data as PyTorch dataset.
11
11
 
12
12
  ## Usage
13
13
 
14
- HeavyEdge-Dataset provides custom dataset classes which wraps profile data file.
14
+ HeavyEdge-Dataset provides `ProfileDataset` which wraps profile data file.
15
15
 
16
- A simple use case to load a list of profiles as two-dimensional coordinates:
16
+ A simple use case to load two-dimensional coordinates of profiles and their lengths:
17
17
 
18
18
  ```python
19
19
  from heavyedge import get_sample_path, ProfileData
20
20
  from heavyedge_dataset import ProfileDataset
21
- with ProfileData(get_sample_path("Prep-Type2.h5")) as file: # Profile data file object
21
+ with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
22
22
  data = ProfileDataset(file, 2)[:]
23
23
  ```
24
24
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "heavyedge-dataset"
7
- version = "0.2.0"
7
+ version = "1.0.0.post0"
8
8
  authors = [
9
9
  {name = "Jisoo Song", email = "jeesoo9595@snu.ac.kr"}
10
10
  ]
@@ -45,7 +45,6 @@ doc = [
45
45
  "sphinx",
46
46
  "numpydoc",
47
47
  "pydata_sphinx_theme",
48
- "matplotlib",
49
48
  ]
50
49
  dev = [
51
50
  "flake8",
@@ -0,0 +1,119 @@
1
+ """Package to load edge profile data using PyTorch dataset.
2
+
3
+ Refer to `PyTorch tutorial <tutorial>`_ for information about custom dataset.
4
+
5
+ .. _tutorial: https://docs.pytorch.org/tutorials/beginner/data_loading_tutorial.html
6
+ """
7
+
8
+ import numbers
9
+ from collections.abc import Sequence
10
+
11
+ import numpy as np
12
+ from torch.utils.data import Dataset
13
+
14
+ __all__ = [
15
+ "ProfileDataset",
16
+ ]
17
+
18
+
19
+ class ProfileDataset(Dataset):
20
+ """Edge profile dataset.
21
+
22
+ Loads data as a tuple of two numpy arrays:
23
+
24
+ 1. Profile data, shape: (N, m, L).
25
+ 2. Length of each profile, shape: (N,).
26
+
27
+ N is the number of loaded data, m is dimension of coordinates, and
28
+ L is the maximum length of profiles.
29
+
30
+ Parameters
31
+ ----------
32
+ file : heavyedge.ProfileData
33
+ Open hdf5 file.
34
+ m : {1, 2}
35
+ Profile data dimension.
36
+ 1 means only y coordinates, and 2 means both x and y coordinates.
37
+ transform : callable, optional
38
+ Optional transformation to be applied on samples.
39
+
40
+ Examples
41
+ --------
42
+ >>> from heavyedge import get_sample_path, ProfileData
43
+ >>> from heavyedge_dataset import ProfileDataset
44
+ >>> with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
45
+ ... profiles, lengths = ProfileDataset(file, m=2)[:]
46
+ >>> profiles.shape
47
+ (22, 2, 3200)
48
+ >>> lengths.shape
49
+ (22,)
50
+
51
+ Should this dataset be used for :class:`torch.utils.data.DataLoader`,
52
+ ``collate_fn`` argument should be passed to the data loader.
53
+
54
+ >>> from torch.utils.data import DataLoader
55
+ >>> with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
56
+ ... dataset = ProfileDataset(file, m=2)
57
+ ... loader = DataLoader(dataset, collate_fn=lambda x: x)
58
+ ... profiles, lengths = next(iter(loader))
59
+ >>> profiles.shape
60
+ (1, 2, 3200)
61
+ >>> lengths.shape
62
+ (1,)
63
+
64
+ If data should be loaded as :class:`torch.Tensor`, pass ``transform`` argument.
65
+
66
+ >>> import torch
67
+ >>> def to_tensor(sample):
68
+ ... return (torch.from_numpy(sample[0]), torch.from_numpy(sample[1]))
69
+ >>> with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
70
+ ... dataset = ProfileDataset(file, m=2, transform=to_tensor)
71
+ ... loader = DataLoader(dataset, collate_fn=lambda x: x)
72
+ ... profiles, lengths = next(iter(loader))
73
+ >>> type(profiles)
74
+ <class 'torch.Tensor'>
75
+ """
76
+
77
+ def __init__(self, file, m=1, transform=None):
78
+ self.file = file
79
+ self.m = m
80
+ self.transform = transform
81
+ self.x = file.x()
82
+
83
+ def __len__(self):
84
+ return len(self.file)
85
+
86
+ def __getitem__(self, idx):
87
+ if isinstance(idx, numbers.Integral):
88
+ Y, L, _ = self.file[idx]
89
+ Y = Y[np.newaxis, :]
90
+ else:
91
+ # Support multi-indexing
92
+ idxs = idx
93
+ needs_sort = isinstance(idx, (Sequence, np.ndarray))
94
+ if needs_sort:
95
+ # idxs must be sorted for h5py
96
+ idxs = np.array(idxs)
97
+ sort_idx = np.argsort(idxs)
98
+ idxs = idxs[sort_idx]
99
+ Y, L, _ = self.file[idxs]
100
+ if needs_sort:
101
+ reverse_idx = np.argsort(sort_idx)
102
+ Y = Y[reverse_idx]
103
+ L = L[reverse_idx]
104
+ Y = Y[:, np.newaxis, :]
105
+ if self.m == 1:
106
+ pass
107
+ elif self.m == 2:
108
+ x = np.tile(self.x, Y.shape[:-1] + (1,))
109
+ Y = np.concatenate([x, Y], axis=-2)
110
+ else:
111
+ raise ValueError(f"Unsupported dimension: {self.m} (Must be 1 or 2).")
112
+ ret = (Y, L)
113
+ if self.transform is not None:
114
+ ret = self.transform(ret)
115
+ return ret
116
+
117
+ def __getitems__(self, idxs):
118
+ # PyTorch API
119
+ return self.__getitem__(idxs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: heavyedge-dataset
3
- Version: 0.2.0
3
+ Version: 1.0.0.post0
4
4
  Summary: PyTorch-compatible edge profile dataset API
5
5
  Author-email: Jisoo Song <jeesoo9595@snu.ac.kr>
6
6
  License-Expression: MIT
@@ -28,7 +28,6 @@ Provides-Extra: doc
28
28
  Requires-Dist: sphinx; extra == "doc"
29
29
  Requires-Dist: numpydoc; extra == "doc"
30
30
  Requires-Dist: pydata_sphinx_theme; extra == "doc"
31
- Requires-Dist: matplotlib; extra == "doc"
32
31
  Provides-Extra: dev
33
32
  Requires-Dist: flake8; extra == "dev"
34
33
  Requires-Dist: black; extra == "dev"
@@ -49,14 +48,14 @@ Package to load edge profile data as PyTorch dataset.
49
48
 
50
49
  ## Usage
51
50
 
52
- HeavyEdge-Dataset provides custom dataset classes which wraps profile data file.
51
+ HeavyEdge-Dataset provides `ProfileDataset` which wraps profile data file.
53
52
 
54
- A simple use case to load a list of profiles as two-dimensional coordinates:
53
+ A simple use case to load two-dimensional coordinates of profiles and their lengths:
55
54
 
56
55
  ```python
57
56
  from heavyedge import get_sample_path, ProfileData
58
57
  from heavyedge_dataset import ProfileDataset
59
- with ProfileData(get_sample_path("Prep-Type2.h5")) as file: # Profile data file object
58
+ with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
60
59
  data = ProfileDataset(file, 2)[:]
61
60
  ```
62
61
 
@@ -11,7 +11,6 @@ heavyedge-dataset[doc,test]
11
11
  sphinx
12
12
  numpydoc
13
13
  pydata_sphinx_theme
14
- matplotlib
15
14
 
16
15
  [test]
17
16
  pytest
@@ -1,272 +0,0 @@
1
- """Custom dataset classes for edge profiles.
2
-
3
- Refer to `PyTorch tutorial <tutorial>`_ for information about custom PyTorch dataset.
4
-
5
- .. _tutorial: https://docs.pytorch.org/tutorials/beginner/data_loading_tutorial.html
6
- """
7
-
8
- import abc
9
- import numbers
10
- from collections.abc import Sequence
11
-
12
- import numpy as np
13
- from heavyedge.api import landmarks_type3
14
- from torch.utils.data import Dataset
15
-
16
- __all__ = [
17
- "ProfileDataset",
18
- "PseudoLandmarkDataset",
19
- "MathematicalLandmarkDataset",
20
- ]
21
-
22
-
23
- class ProfileDatasetBase(abc.ABC):
24
- """Abstract base class for profile dataset."""
25
-
26
- @property
27
- @abc.abstractmethod
28
- def file(self):
29
- """Profile data file.
30
-
31
- Returns
32
- -------
33
- heavyedge.ProfileData
34
- """
35
-
36
- @property
37
- @abc.abstractmethod
38
- def transform(self):
39
- """Optional transformation to be applied on samples.
40
-
41
- Returns
42
- -------
43
- Callable
44
- """
45
-
46
- def __len__(self):
47
- return len(self.file)
48
-
49
- def __getitem__(self, idx):
50
- if isinstance(idx, numbers.Integral):
51
- Y, L, _ = self.file[idx]
52
- Ys, Ls = [Y], [L]
53
- else:
54
- # Support multi-indexing
55
- idxs = idx
56
- needs_sort = isinstance(idx, (Sequence, np.ndarray))
57
- if needs_sort:
58
- # idxs must be sorted for h5py
59
- idxs = np.array(idxs)
60
- sort_idx = np.argsort(idxs)
61
- idxs = idxs[sort_idx]
62
- Ys, Ls, _ = self.file[idxs]
63
- if needs_sort:
64
- reverse_idx = np.argsort(sort_idx)
65
- Ys = Ys[reverse_idx]
66
- Ls = Ls[reverse_idx]
67
-
68
- ret = self.default_transform(Ys, Ls)
69
- if self.transform:
70
- ret = self.transform(ret)
71
- return ret
72
-
73
- def __getitems__(self, idxs):
74
- # PyTorch API
75
- return self.__getitem__(idxs)
76
-
77
- @abc.abstractmethod
78
- def default_transform(self, profiles, lengths):
79
- """Default data transformation.
80
-
81
- Subclass must implement this method to transform profile data into target data.
82
-
83
- Parameters
84
- ----------
85
- profiles : (N, M) array
86
- Profile data.
87
- lengths : (N,) array
88
- Length of each profile in *profiles*.
89
- """
90
- pass
91
-
92
-
93
- class ProfileDataset(ProfileDatasetBase, Dataset):
94
- """Full profile dataset in 1-D or 2-D.
95
-
96
- Parameters
97
- ----------
98
- file : heavyedge.ProfileData
99
- Open hdf5 file.
100
- m : {1, 2}
101
- Profile data dimension.
102
- 1 means only y coordinates, and 2 means both x and y coordinates.
103
- transform : callable, optional
104
- Optional transformation to be applied on samples.
105
-
106
- Examples
107
- --------
108
- >>> from heavyedge import get_sample_path, ProfileData
109
- >>> from heavyedge_dataset import ProfileDataset
110
- >>> with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
111
- ... data = ProfileDataset(file, 2)[:]
112
- >>> import matplotlib.pyplot as plt # doctest: +SKIP
113
- ... for coords in data:
114
- ... plt.plot(*coords, color="gray")
115
- """
116
-
117
- def __init__(self, file, m, transform=None):
118
- self._file = file
119
- self.m = m
120
- self._transform = transform
121
-
122
- self.x = file.x()
123
-
124
- @property
125
- def file(self):
126
- return self._file
127
-
128
- @property
129
- def transform(self):
130
- return self._transform
131
-
132
- def default_transform(self, profiles, lengths):
133
- """Crop profiles by their contact points.
134
-
135
- Parameters
136
- ----------
137
- profiles : (N, M) array
138
- Profile data.
139
- lengths : (N,) array
140
- Length of each profile in *profiles*.
141
- """
142
- if self.m == 1:
143
- ret = [Y[:L].reshape(1, -1) for Y, L in zip(profiles, lengths)]
144
- elif self.m == 2:
145
- ret = [np.stack([self.x[:L], Y[:L]]) for Y, L in zip(profiles, lengths)]
146
- else:
147
- raise ValueError(f"Invalid dimension: {self.m}")
148
- return ret
149
-
150
-
151
- class PseudoLandmarkDataset(ProfileDatasetBase, Dataset):
152
- """Pseudo-landmark dataset in 1-D or 2-D.
153
-
154
- Pseudo-landmarks are points that are equidistantly sampled.
155
-
156
- Parameters
157
- ----------
158
- file : heavyedge.ProfileData
159
- Open hdf5 file.
160
- k : int
161
- Number of landmarks to sample.
162
- m : {1, 2}
163
- Profile data dimension.
164
- 1 means only y coordinates, and 2 means both x and y coordinates.
165
- transform : callable, optional
166
- Optional transformation to be applied on samples.
167
-
168
- Examples
169
- --------
170
- >>> from heavyedge import get_sample_path, ProfileData
171
- >>> from heavyedge_dataset import PseudoLandmarkDataset
172
- >>> with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
173
- ... data = PseudoLandmarkDataset(file, 10, 2)[:]
174
- >>> import matplotlib.pyplot as plt # doctest: +SKIP
175
- ... plt.plot(*data.transpose(1, 2, 0), color="gray")
176
- """
177
-
178
- def __init__(self, file, k, m, transform=None):
179
- self._file = file
180
- self.k = k
181
- self.m = m
182
- self._transform = transform
183
-
184
- self.x = file.x()
185
-
186
- @property
187
- def file(self):
188
- return self._file
189
-
190
- @property
191
- def transform(self):
192
- return self._transform
193
-
194
- def default_transform(self, profiles, lengths):
195
- """Sample pseudo-landmarks from profiles.
196
-
197
- Parameters
198
- ----------
199
- profiles : (N, M) array
200
- Profile data.
201
- lengths : (N,) array
202
- Length of each profile in *profiles*.
203
- """
204
- ret = []
205
- if self.m == 1:
206
- for Y, L in zip(profiles, lengths):
207
- idxs = np.linspace(0, L - 1, self.k, dtype=int)
208
- ret.append(Y[idxs].reshape(1, -1))
209
- elif self.m == 2:
210
- for Y, L in zip(profiles, lengths):
211
- idxs = np.linspace(0, L - 1, self.k, dtype=int)
212
- ret.append(np.stack([self.x[idxs], Y[idxs]]))
213
- else:
214
- raise ValueError(f"Invalid dimension: {self.m}")
215
- return np.array(ret)
216
-
217
-
218
- class MathematicalLandmarkDataset(ProfileDatasetBase, Dataset):
219
- """Mathematical landmark dataset in 1-D.
220
-
221
- Mathematical landmarks are points which are choosed by their
222
- mathematical properties, i.e., slope or curvature.
223
-
224
- Parameters
225
- ----------
226
- file : heavyedge.ProfileData
227
- Open hdf5 file.
228
- sigma : scalar
229
- Standard deviation of Gaussian kernel for landmark detection.
230
- transform : callable, optional
231
- Optional transformation to be applied on samples.
232
-
233
- Examples
234
- --------
235
- >>> from heavyedge import get_sample_path, ProfileData
236
- >>> from heavyedge_dataset import MathematicalLandmarkDataset
237
- >>> with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
238
- ... data = MathematicalLandmarkDataset(file, 32)[:]
239
- >>> import matplotlib.pyplot as plt # doctest: +SKIP
240
- ... plt.plot(*data.transpose(1, 2, 0), color="gray")
241
- """
242
-
243
- def __init__(self, file, sigma, transform=None):
244
- self._file = file
245
- self.sigma = sigma
246
- self._transform = transform
247
-
248
- @property
249
- def file(self):
250
- return self._file
251
-
252
- @property
253
- def transform(self):
254
- return self._transform
255
-
256
- def default_transform(self, profiles, lengths):
257
- """Detect mathematical landmarks from profiles.
258
-
259
- Parameters
260
- ----------
261
- profiles : (N, M) array
262
- Profile data.
263
- lengths : (N,) array
264
- Length of each profile in *profiles*.
265
- """
266
- ret = []
267
- for Y, L in zip(profiles, lengths):
268
- Y = Y[:L]
269
- indices = np.flip(landmarks_type3(Y, self.sigma))
270
- y = np.concat([[np.mean(Y[: indices[0]])], Y[indices]])
271
- ret.append(y.reshape(1, -1))
272
- return np.array(ret)