heavyedge-dataset 1.0.0.post0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,10 +9,13 @@ import numbers
9
9
  from collections.abc import Sequence
10
10
 
11
11
  import numpy as np
12
+ from heavyedge.api import landmarks_type3
12
13
  from torch.utils.data import Dataset
13
14
 
14
15
  __all__ = [
15
16
  "ProfileDataset",
17
+ "PseudoLandmarkDataset",
18
+ "MathematicalLandmarkDataset",
16
19
  ]
17
20
 
18
21
 
@@ -47,8 +50,10 @@ class ProfileDataset(Dataset):
47
50
  (22, 2, 3200)
48
51
  >>> lengths.shape
49
52
  (22,)
53
+ >>> import matplotlib.pyplot as plt # doctest: +SKIP
54
+ ... plt.plot(*profiles.transpose(1, 2, 0))
50
55
 
51
- Should this dataset be used for :class:`torch.utils.data.DataLoader`,
56
+ Should the dataset be used for :class:`torch.utils.data.DataLoader`,
52
57
  ``collate_fn`` argument should be passed to the data loader.
53
58
 
54
59
  >>> from torch.utils.data import DataLoader
@@ -117,3 +122,151 @@ class ProfileDataset(Dataset):
117
122
  def __getitems__(self, idxs):
118
123
  # PyTorch API
119
124
  return self.__getitem__(idxs)
125
+
126
+
127
+ class PseudoLandmarkDataset(Dataset):
128
+ """Dataset for pseudo-landmarks of edge profiles
129
+
130
+ Parameters
131
+ ----------
132
+ file : heavyedge.ProfileData
133
+ Open hdf5 file.
134
+ m : {1, 2}
135
+ Dimension of landmark coordinates.
136
+ k : int
137
+ Number of landmarks to sample.
138
+ transform : callable, optional
139
+ Optional transformation to be applied on samples.
140
+
141
+ Examples
142
+ --------
143
+ >>> from heavyedge import ProfileData, get_sample_path
144
+ >>> from heavyedge_dataset import PseudoLandmarkDataset
145
+ >>> with ProfileData(get_sample_path("Prep-Type1.h5")) as file:
146
+ ... dataset = PseudoLandmarkDataset(file, 1, 10)
147
+ ... data = dataset[:]
148
+ >>> data.shape
149
+ (18, 1, 10)
150
+ >>> import matplotlib.pyplot as plt # doctest: +SKIP
151
+ ... plt.plot(*data.transpose(1, 2, 0))
152
+
153
+ Because sampling pseudo-landmark requires loading full profile data,
154
+ loading large dataset can cause memory failure even if the output data is managable.
155
+ This can be avoided by batched loading with :class:`torch.utils.data.DataLoader`.
156
+
157
+ >>> import numpy as np
158
+ >>> from torch.utils.data import DataLoader
159
+ >>> with ProfileData(get_sample_path("Prep-Type1.h5")) as file:
160
+ ... dataset = PseudoLandmarkDataset(file, 1, 10)
161
+ ... loader = DataLoader(dataset, batch_size=10)
162
+ ... data = np.concatenate(list(loader))
163
+ >>> data.shape
164
+ (18, 1, 10)
165
+ """
166
+
167
+ def __init__(self, file, m, k, transform=None):
168
+ self.profiles = ProfileDataset(file, m=m)
169
+ self.k = k
170
+ self.transform = transform
171
+
172
+ def __len__(self):
173
+ return len(self.profiles)
174
+
175
+ def __getitem__(self, idx):
176
+ if isinstance(idx, numbers.Integral):
177
+ Y, L = self.profiles[idx]
178
+ Ys, Ls = [Y], [L]
179
+ else:
180
+ Ys, Ls = self.profiles[idx]
181
+
182
+ X = []
183
+ for Y, L in zip(Ys, Ls):
184
+ idxs = np.linspace(0, L - 1, self.k, dtype=int)
185
+ X.append(Y[:, idxs])
186
+ ret = np.array(X)
187
+ if self.transform is not None:
188
+ ret = self.transform(ret)
189
+ return ret
190
+
191
+ def __getitems__(self, idxs):
192
+ # PyTorch API
193
+ return self.__getitem__(idxs)
194
+
195
+
196
+ class MathematicalLandmarkDataset(Dataset):
197
+ """Dataset for mathematical landmarks of edge profiles.
198
+
199
+ Loads data as a tuple of two numpy arrays:
200
+
201
+ 1. Landmark coordinates, shape: (N, m, 4).
202
+ 2. Average plateau height, shape: (N,).
203
+
204
+ N is the number of loaded data and m is dimension of coordinates.
205
+
206
+ Parameters
207
+ ----------
208
+ file : heavyedge.ProfileData
209
+ Open hdf5 file.
210
+ m : {1, 2}
211
+ Dimension of landmark coordinates.
212
+ sigma : scalar
213
+ Standard deviation of Gaussian kernel for landmark detection.
214
+ transform : callable, optional
215
+ Optional transformation to be applied on samples.
216
+
217
+ Examples
218
+ --------
219
+ >>> from heavyedge import ProfileData, get_sample_path
220
+ >>> from heavyedge_dataset import MathematicalLandmarkDataset
221
+ >>> with ProfileData(get_sample_path("Prep-Type3.h5")) as file:
222
+ ... dataset = MathematicalLandmarkDataset(file, 2, 32)
223
+ ... landmarks, height = dataset[:]
224
+ >>> landmarks.shape
225
+ (35, 2, 4)
226
+ >>> height.shape
227
+ (35,)
228
+
229
+ Because sampling mathematical landmark requires loading full profile data,
230
+ loading large dataset can cause memory failure even if the output data is managable.
231
+ This can be avoided by batched loading with :class:`torch.utils.data.DataLoader`.
232
+ Note that ``collate_fn`` argument should be passed to the data loader.
233
+
234
+ >>> import numpy as np
235
+ >>> from torch.utils.data import DataLoader
236
+ >>> with ProfileData(get_sample_path("Prep-Type3.h5")) as file:
237
+ ... dataset = MathematicalLandmarkDataset(file, 2, 32)
238
+ ... loader = DataLoader(dataset, batch_size=10, collate_fn=lambda x: x)
239
+ ... landmarks = np.concatenate([lm for lm, _ in loader])
240
+ >>> landmarks.shape
241
+ (35, 2, 4)
242
+ """
243
+
244
+ def __init__(self, file, m, sigma, transform=None):
245
+ self.profiles = ProfileDataset(file, m=m)
246
+ self.sigma = sigma
247
+ self.transform = transform
248
+
249
+ def __len__(self):
250
+ return len(self.profiles)
251
+
252
+ def __getitem__(self, idx):
253
+ if isinstance(idx, numbers.Integral):
254
+ Y, L = self.profiles[idx]
255
+ Ys, Ls = [Y], [L]
256
+ else:
257
+ Ys, Ls = self.profiles[idx]
258
+
259
+ X, H = [], []
260
+ for Y, L in zip(Ys, Ls):
261
+ idxs = np.flip(landmarks_type3(Y[-1, :L], self.sigma))
262
+ X.append(Y[:, idxs])
263
+ H.append(np.mean(Y[-1, : idxs[0]]))
264
+
265
+ ret = np.array(X), np.array(H)
266
+ if self.transform is not None:
267
+ ret = self.transform(ret)
268
+ return ret
269
+
270
+ def __getitems__(self, idxs):
271
+ # PyTorch API
272
+ return self.__getitem__(idxs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: heavyedge-dataset
3
- Version: 1.0.0.post0
3
+ Version: 1.1.0
4
4
  Summary: PyTorch-compatible edge profile dataset API
5
5
  Author-email: Jisoo Song <jeesoo9595@snu.ac.kr>
6
6
  License-Expression: MIT
@@ -28,6 +28,7 @@ Provides-Extra: doc
28
28
  Requires-Dist: sphinx; extra == "doc"
29
29
  Requires-Dist: numpydoc; extra == "doc"
30
30
  Requires-Dist: pydata_sphinx_theme; extra == "doc"
31
+ Requires-Dist: matplotlib; extra == "doc"
31
32
  Provides-Extra: dev
32
33
  Requires-Dist: flake8; extra == "dev"
33
34
  Requires-Dist: black; extra == "dev"
@@ -48,7 +49,7 @@ Package to load edge profile data as PyTorch dataset.
48
49
 
49
50
  ## Usage
50
51
 
51
- HeavyEdge-Dataset provides `ProfileDataset` which wraps profile data file.
52
+ HeavyEdge-Dataset provides dataset classes profile data file.
52
53
 
53
54
  A simple use case to load two-dimensional coordinates of profiles and their lengths:
54
55
 
@@ -0,0 +1,6 @@
1
+ heavyedge_dataset/__init__.py,sha256=V7EonirGRfibsJAQbrEmpOpz53v4zpylWHR2zrMaNPE,8772
2
+ heavyedge_dataset-1.1.0.dist-info/licenses/LICENSE,sha256=pBq2E7YJkUcEINdYeERL4RVFOQICd_MwJq6OusuAPGc,1066
3
+ heavyedge_dataset-1.1.0.dist-info/METADATA,sha256=38Jf4MZANfXyZNQS2GH_JZZ4MzrSzAhN2UeqWTFMzK8,3457
4
+ heavyedge_dataset-1.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
5
+ heavyedge_dataset-1.1.0.dist-info/top_level.txt,sha256=wpRFI8TlkYFGetc17appkyybauBvzhGGvyueunsdJTc,18
6
+ heavyedge_dataset-1.1.0.dist-info/RECORD,,
@@ -1,6 +0,0 @@
1
- heavyedge_dataset/__init__.py,sha256=CmquaxuwXJYTCGInmzI8Tp0Z9hJBVtEK9eH3NLyoREY,3737
2
- heavyedge_dataset-1.0.0.post0.dist-info/licenses/LICENSE,sha256=pBq2E7YJkUcEINdYeERL4RVFOQICd_MwJq6OusuAPGc,1066
3
- heavyedge_dataset-1.0.0.post0.dist-info/METADATA,sha256=34mMpniE5Nn4E6mwdVKhT4m6mnEUKRoCce9Tv6aGNHI,3434
4
- heavyedge_dataset-1.0.0.post0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
5
- heavyedge_dataset-1.0.0.post0.dist-info/top_level.txt,sha256=wpRFI8TlkYFGetc17appkyybauBvzhGGvyueunsdJTc,18
6
- heavyedge_dataset-1.0.0.post0.dist-info/RECORD,,