heavyedge-dataset 0.2.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,97 +1,34 @@
1
- """Custom dataset classes for edge profiles.
1
+ """Package to load :class:`heavyedge.ProfileData` using PyTorch dataset scheme.
2
2
 
3
3
  Refer to `PyTorch tutorial <tutorial>`_ for information about custom PyTorch dataset.
4
4
 
5
5
  .. _tutorial: https://docs.pytorch.org/tutorials/beginner/data_loading_tutorial.html
6
6
  """
7
7
 
8
- import abc
9
8
  import numbers
10
9
  from collections.abc import Sequence
11
10
 
12
11
  import numpy as np
13
- from heavyedge.api import landmarks_type3
14
12
  from torch.utils.data import Dataset
15
13
 
16
14
  __all__ = [
17
15
  "ProfileDataset",
18
- "PseudoLandmarkDataset",
19
- "MathematicalLandmarkDataset",
20
16
  ]
21
17
 
22
18
 
23
- class ProfileDatasetBase(abc.ABC):
24
- """Abstract base class for profile dataset."""
19
+ class ProfileDataset(Dataset):
20
+ """Edge profile dataset.
25
21
 
26
- @property
27
- @abc.abstractmethod
28
- def file(self):
29
- """Profile data file.
22
+ Loads data as a tuple of two numpy arrays:
30
23
 
31
- Returns
32
- -------
33
- heavyedge.ProfileData
34
- """
24
+ 1. Profile data, shape: (N, m, L).
25
+ 2. Length of each profile, shape: (N,).
35
26
 
36
- @property
37
- @abc.abstractmethod
38
- def transform(self):
39
- """Optional transformation to be applied on samples.
27
+ N is the number of loaded data, m is dimension of coordinates, and
28
+ L is the maximum length of profiles.
40
29
 
41
- Returns
42
- -------
43
- Callable
44
- """
45
-
46
- def __len__(self):
47
- return len(self.file)
48
-
49
- def __getitem__(self, idx):
50
- if isinstance(idx, numbers.Integral):
51
- Y, L, _ = self.file[idx]
52
- Ys, Ls = [Y], [L]
53
- else:
54
- # Support multi-indexing
55
- idxs = idx
56
- needs_sort = isinstance(idx, (Sequence, np.ndarray))
57
- if needs_sort:
58
- # idxs must be sorted for h5py
59
- idxs = np.array(idxs)
60
- sort_idx = np.argsort(idxs)
61
- idxs = idxs[sort_idx]
62
- Ys, Ls, _ = self.file[idxs]
63
- if needs_sort:
64
- reverse_idx = np.argsort(sort_idx)
65
- Ys = Ys[reverse_idx]
66
- Ls = Ls[reverse_idx]
67
-
68
- ret = self.default_transform(Ys, Ls)
69
- if self.transform:
70
- ret = self.transform(ret)
71
- return ret
72
-
73
- def __getitems__(self, idxs):
74
- # PyTorch API
75
- return self.__getitem__(idxs)
76
-
77
- @abc.abstractmethod
78
- def default_transform(self, profiles, lengths):
79
- """Default data transformation.
80
-
81
- Subclass must implement this method to transform profile data into target data.
82
-
83
- Parameters
84
- ----------
85
- profiles : (N, M) array
86
- Profile data.
87
- lengths : (N,) array
88
- Length of each profile in *profiles*.
89
- """
90
- pass
91
-
92
-
93
- class ProfileDataset(ProfileDatasetBase, Dataset):
94
- """Full profile dataset in 1-D or 2-D.
30
+ Data can be indexed either by a single integer, by a slice, or by a sequence.
31
+ When a single integer index is used, data do not have the first axis.
95
32
 
96
33
  Parameters
97
34
  ----------
@@ -108,165 +45,51 @@ class ProfileDataset(ProfileDatasetBase, Dataset):
108
45
  >>> from heavyedge import get_sample_path, ProfileData
109
46
  >>> from heavyedge_dataset import ProfileDataset
110
47
  >>> with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
111
- ... data = ProfileDataset(file, 2)[:]
112
- >>> import matplotlib.pyplot as plt # doctest: +SKIP
113
- ... for coords in data:
114
- ... plt.plot(*coords, color="gray")
48
+ ... profiles, _ = ProfileDataset(file, m=2)[:]
49
+ >>> profiles.shape
50
+ (22, 2, 3200)
115
51
  """
116
52
 
117
- def __init__(self, file, m, transform=None):
118
- self._file = file
53
+ def __init__(self, file, m=1, transform=None):
54
+ self.file = file
119
55
  self.m = m
120
- self._transform = transform
121
-
56
+ self.transform = transform
122
57
  self.x = file.x()
123
58
 
124
- @property
125
- def file(self):
126
- return self._file
127
-
128
- @property
129
- def transform(self):
130
- return self._transform
131
-
132
- def default_transform(self, profiles, lengths):
133
- """Crop profiles by their contact points.
59
+ def __len__(self):
60
+ return len(self.file)
134
61
 
135
- Parameters
136
- ----------
137
- profiles : (N, M) array
138
- Profile data.
139
- lengths : (N,) array
140
- Length of each profile in *profiles*.
141
- """
142
- if self.m == 1:
143
- ret = [Y[:L].reshape(1, -1) for Y, L in zip(profiles, lengths)]
144
- elif self.m == 2:
145
- ret = [np.stack([self.x[:L], Y[:L]]) for Y, L in zip(profiles, lengths)]
62
+ def __getitem__(self, idx):
63
+ if isinstance(idx, numbers.Integral):
64
+ Y, L, _ = self.file[idx]
65
+ Y = Y[np.newaxis, :]
146
66
  else:
147
- raise ValueError(f"Invalid dimension: {self.m}")
148
- return ret
149
-
150
-
151
- class PseudoLandmarkDataset(ProfileDatasetBase, Dataset):
152
- """Pseudo-landmark dataset in 1-D or 2-D.
153
-
154
- Pseudo-landmarks are points that are equidistantly sampled.
155
-
156
- Parameters
157
- ----------
158
- file : heavyedge.ProfileData
159
- Open hdf5 file.
160
- k : int
161
- Number of landmarks to sample.
162
- m : {1, 2}
163
- Profile data dimension.
164
- 1 means only y coordinates, and 2 means both x and y coordinates.
165
- transform : callable, optional
166
- Optional transformation to be applied on samples.
167
-
168
- Examples
169
- --------
170
- >>> from heavyedge import get_sample_path, ProfileData
171
- >>> from heavyedge_dataset import PseudoLandmarkDataset
172
- >>> with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
173
- ... data = PseudoLandmarkDataset(file, 10, 2)[:]
174
- >>> import matplotlib.pyplot as plt # doctest: +SKIP
175
- ... plt.plot(*data.transpose(1, 2, 0), color="gray")
176
- """
177
-
178
- def __init__(self, file, k, m, transform=None):
179
- self._file = file
180
- self.k = k
181
- self.m = m
182
- self._transform = transform
183
-
184
- self.x = file.x()
185
-
186
- @property
187
- def file(self):
188
- return self._file
189
-
190
- @property
191
- def transform(self):
192
- return self._transform
193
-
194
- def default_transform(self, profiles, lengths):
195
- """Sample pseudo-landmarks from profiles.
196
-
197
- Parameters
198
- ----------
199
- profiles : (N, M) array
200
- Profile data.
201
- lengths : (N,) array
202
- Length of each profile in *profiles*.
203
- """
204
- ret = []
67
+ # Support multi-indexing
68
+ idxs = idx
69
+ needs_sort = isinstance(idx, (Sequence, np.ndarray))
70
+ if needs_sort:
71
+ # idxs must be sorted for h5py
72
+ idxs = np.array(idxs)
73
+ sort_idx = np.argsort(idxs)
74
+ idxs = idxs[sort_idx]
75
+ Y, L, _ = self.file[idxs]
76
+ if needs_sort:
77
+ reverse_idx = np.argsort(sort_idx)
78
+ Y = Y[reverse_idx]
79
+ L = L[reverse_idx]
80
+ Y = Y[:, np.newaxis, :]
205
81
  if self.m == 1:
206
- for Y, L in zip(profiles, lengths):
207
- idxs = np.linspace(0, L - 1, self.k, dtype=int)
208
- ret.append(Y[idxs].reshape(1, -1))
82
+ pass
209
83
  elif self.m == 2:
210
- for Y, L in zip(profiles, lengths):
211
- idxs = np.linspace(0, L - 1, self.k, dtype=int)
212
- ret.append(np.stack([self.x[idxs], Y[idxs]]))
84
+ x = np.tile(self.x, Y.shape[:-1] + (1,))
85
+ Y = np.concatenate([x, Y], axis=-2)
213
86
  else:
214
- raise ValueError(f"Invalid dimension: {self.m}")
215
- return np.array(ret)
216
-
217
-
218
- class MathematicalLandmarkDataset(ProfileDatasetBase, Dataset):
219
- """Mathematical landmark dataset in 1-D.
220
-
221
- Mathematical landmarks are points which are choosed by their
222
- mathematical properties, i.e., slope or curvature.
223
-
224
- Parameters
225
- ----------
226
- file : heavyedge.ProfileData
227
- Open hdf5 file.
228
- sigma : scalar
229
- Standard deviation of Gaussian kernel for landmark detection.
230
- transform : callable, optional
231
- Optional transformation to be applied on samples.
232
-
233
- Examples
234
- --------
235
- >>> from heavyedge import get_sample_path, ProfileData
236
- >>> from heavyedge_dataset import MathematicalLandmarkDataset
237
- >>> with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
238
- ... data = MathematicalLandmarkDataset(file, 32)[:]
239
- >>> import matplotlib.pyplot as plt # doctest: +SKIP
240
- ... plt.plot(*data.transpose(1, 2, 0), color="gray")
241
- """
242
-
243
- def __init__(self, file, sigma, transform=None):
244
- self._file = file
245
- self.sigma = sigma
246
- self._transform = transform
247
-
248
- @property
249
- def file(self):
250
- return self._file
251
-
252
- @property
253
- def transform(self):
254
- return self._transform
255
-
256
- def default_transform(self, profiles, lengths):
257
- """Detect mathematical landmarks from profiles.
87
+ raise ValueError(f"Unsupported dimension: {self.m} (Must be 1 or 2).")
88
+ ret = (Y, L)
89
+ if self.transform is not None:
90
+ ret = self.transform(ret)
91
+ return ret
258
92
 
259
- Parameters
260
- ----------
261
- profiles : (N, M) array
262
- Profile data.
263
- lengths : (N,) array
264
- Length of each profile in *profiles*.
265
- """
266
- ret = []
267
- for Y, L in zip(profiles, lengths):
268
- Y = Y[:L]
269
- indices = np.flip(landmarks_type3(Y, self.sigma))
270
- y = np.concat([[np.mean(Y[: indices[0]])], Y[indices]])
271
- ret.append(y.reshape(1, -1))
272
- return np.array(ret)
93
+ def __getitems__(self, idxs):
94
+ # PyTorch API
95
+ return self.__getitem__(idxs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: heavyedge-dataset
3
- Version: 0.2.0
3
+ Version: 1.0.0
4
4
  Summary: PyTorch-compatible edge profile dataset API
5
5
  Author-email: Jisoo Song <jeesoo9595@snu.ac.kr>
6
6
  License-Expression: MIT
@@ -28,7 +28,6 @@ Provides-Extra: doc
28
28
  Requires-Dist: sphinx; extra == "doc"
29
29
  Requires-Dist: numpydoc; extra == "doc"
30
30
  Requires-Dist: pydata_sphinx_theme; extra == "doc"
31
- Requires-Dist: matplotlib; extra == "doc"
32
31
  Provides-Extra: dev
33
32
  Requires-Dist: flake8; extra == "dev"
34
33
  Requires-Dist: black; extra == "dev"
@@ -49,14 +48,14 @@ Package to load edge profile data as PyTorch dataset.
49
48
 
50
49
  ## Usage
51
50
 
52
- HeavyEdge-Dataset provides custom dataset classes which wraps profile data file.
51
+ HeavyEdge-Dataset provides `ProfileDataset` which wraps profile data file.
53
52
 
54
- A simple use case to load a list of profiles as two-dimensional coordinates:
53
+ A simple use case to load two-dimensional coordinates of profiles and their lengths:
55
54
 
56
55
  ```python
57
56
  from heavyedge import get_sample_path, ProfileData
58
57
  from heavyedge_dataset import ProfileDataset
59
- with ProfileData(get_sample_path("Prep-Type2.h5")) as file: # Profile data file object
58
+ with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
60
59
  data = ProfileDataset(file, 2)[:]
61
60
  ```
62
61
 
@@ -0,0 +1,6 @@
1
+ heavyedge_dataset/__init__.py,sha256=T0vxz7uPabdvdYFlaupULUami0f1u0DFSHRmBkr-plc,2873
2
+ heavyedge_dataset-1.0.0.dist-info/licenses/LICENSE,sha256=pBq2E7YJkUcEINdYeERL4RVFOQICd_MwJq6OusuAPGc,1066
3
+ heavyedge_dataset-1.0.0.dist-info/METADATA,sha256=MLPF2FDERW-fDQpvqJhDElDbLHdhrCaPtCnN_dwPbuQ,3428
4
+ heavyedge_dataset-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
5
+ heavyedge_dataset-1.0.0.dist-info/top_level.txt,sha256=wpRFI8TlkYFGetc17appkyybauBvzhGGvyueunsdJTc,18
6
+ heavyedge_dataset-1.0.0.dist-info/RECORD,,
@@ -1,6 +0,0 @@
1
- heavyedge_dataset/__init__.py,sha256=iBFvupZMaHpTGUxZOJvVKfD4bAOAdikK60a8bd4oxB0,7746
2
- heavyedge_dataset-0.2.0.dist-info/licenses/LICENSE,sha256=pBq2E7YJkUcEINdYeERL4RVFOQICd_MwJq6OusuAPGc,1066
3
- heavyedge_dataset-0.2.0.dist-info/METADATA,sha256=vZF2VwCYUmf7OzXfCO-pg9cHEeE7kCYIfl1JGmfTad0,3496
4
- heavyedge_dataset-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
5
- heavyedge_dataset-0.2.0.dist-info/top_level.txt,sha256=wpRFI8TlkYFGetc17appkyybauBvzhGGvyueunsdJTc,18
6
- heavyedge_dataset-0.2.0.dist-info/RECORD,,