heavyedge-dataset 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ """PyTorch-compatiable dataset API for edge profiles."""
2
+
3
+ __all__ = [
4
+ "ProfileDataset",
5
+ "PseudoLmDataset",
6
+ "MathLm1dDataset",
7
+ "MathLm2dDataset",
8
+ ]
9
+
10
+ from .datasets import (
11
+ MathLm1dDataset,
12
+ MathLm2dDataset,
13
+ ProfileDataset,
14
+ PseudoLmDataset,
15
+ )
@@ -0,0 +1,293 @@
1
+ """
2
+ Dataset classes
3
+ ---------------
4
+
5
+ PyTorch dataset classes for edge profiles.
6
+ """
7
+
8
+ import abc
9
+ import numbers
10
+ from collections.abc import Sequence
11
+
12
+ import numpy as np
13
+ from heavyedge.api import landmarks_type3
14
+ from torch.utils.data import Dataset
15
+
16
+ from .landmarks import math_landmarks_1d, pseudo_landmarks_1d, pseudo_landmarks_2d
17
+
18
+ __all__ = [
19
+ "ProfileDataset",
20
+ "PseudoLmDataset",
21
+ "MathLm1dDataset",
22
+ "MathLm2dDataset",
23
+ ]
24
+
25
+
26
+ class ProfileDatasetBase(abc.ABC):
27
+ """Abstract base class for profile dataset."""
28
+
29
+ @property
30
+ @abc.abstractmethod
31
+ def file(self):
32
+ """Profile data file.
33
+
34
+ Returns
35
+ -------
36
+ heavyedge.ProfileData
37
+ """
38
+
39
+ @abc.abstractmethod
40
+ def default_transform(self, profiles, lengths):
41
+ """Default transform by the dataset class.
42
+
43
+ Parameters
44
+ ----------
45
+ profiles : (N, M) array
46
+ Profile data.
47
+ lengths : (N,) array
48
+ Length of each profile.
49
+ """
50
+ pass
51
+
52
+ @property
53
+ @abc.abstractmethod
54
+ def transform(self):
55
+ """Optional transformation passed to the dataset instance.
56
+
57
+ Returns
58
+ -------
59
+ Callable
60
+ """
61
+
62
+ def __len__(self):
63
+ return len(self.file)
64
+
65
+ def __getitem__(self, idx):
66
+ if isinstance(idx, numbers.Integral):
67
+ Y, L, _ = self.file[idx]
68
+ ret = self.default_transform([Y], [L])
69
+ if self.transform:
70
+ ret = self.transform(ret)
71
+ ret = ret[0]
72
+ else:
73
+ ret = self.__getitems__(idx)
74
+ return ret
75
+
76
+ def __getitems__(self, idxs):
77
+ # PyTorch API
78
+ needs_sort = isinstance(idxs, (Sequence, np.ndarray))
79
+ if needs_sort:
80
+ # idxs must be sorted for h5py
81
+ idxs = np.array(idxs)
82
+ sort_idx = np.argsort(idxs)
83
+ idxs = idxs[sort_idx]
84
+ else:
85
+ pass
86
+ Ys, Ls, _ = self.file[idxs]
87
+ if needs_sort:
88
+ reverse_idx = np.argsort(sort_idx)
89
+ Ys = Ys[reverse_idx]
90
+ Ls = Ls[reverse_idx]
91
+ ret = self.default_transform(Ys, Ls)
92
+ if self.transform:
93
+ ret = self.transform(ret)
94
+ return ret
95
+
96
+
97
+ class ProfileDataset(ProfileDatasetBase, Dataset):
98
+ """Full profile dataset.
99
+
100
+ Parameters
101
+ ----------
102
+ file : heavyedge.ProfileData
103
+ Open hdf5 file.
104
+ m : {1, 2}
105
+ Profile data dimension.
106
+ 1 means only y coordinates, and 2 means both x and y coordinates.
107
+ transform : callable, optional
108
+ Optional transform to be applied on a sample.
109
+
110
+ Examples
111
+ --------
112
+ >>> from heavyedge import get_sample_path, ProfileData
113
+ >>> from heavyedge_dataset import ProfileDataset
114
+ >>> with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
115
+ ... data = ProfileDataset(file, 2)[:]
116
+ >>> import matplotlib.pyplot as plt # doctest: +SKIP
117
+ ... for coords in data:
118
+ ... plt.plot(*coords, color="gray")
119
+ """
120
+
121
+ def __init__(self, file, m, transform=None):
122
+ self._file = file
123
+ self.m = m
124
+ self._transform = transform
125
+
126
+ self.x = file.x()
127
+
128
+ @property
129
+ def file(self):
130
+ return self._file
131
+
132
+ def default_transform(self, profiles, lengths):
133
+ if self.m == 1:
134
+ ret = [Y[:L].reshape(1, -1) for Y, L in zip(profiles, lengths)]
135
+ elif self.m == 2:
136
+ ret = [np.stack([self.x[:L], Y[:L]]) for Y, L in zip(profiles, lengths)]
137
+ else:
138
+ raise ValueError(f"Invalid dimension: {self.m}")
139
+ return ret
140
+
141
+ @property
142
+ def transform(self):
143
+ return self._transform
144
+
145
+
146
+ class PseudoLmDataset(ProfileDatasetBase, Dataset):
147
+ """Pseudo-landmark dataset.
148
+
149
+ Parameters
150
+ ----------
151
+ file : heavyedge.ProfileData
152
+ Open hdf5 file.
153
+ k : int
154
+ Number of landmarks to sample.
155
+ m : {1, 2}
156
+ Profile data dimension.
157
+ 1 means only y coordinates, and 2 means both x and y coordinates.
158
+ transform : callable, optional
159
+ Optional transform to be applied on a sample.
160
+
161
+ Examples
162
+ --------
163
+ >>> from heavyedge import get_sample_path, ProfileData
164
+ >>> from heavyedge_dataset import PseudoLmDataset
165
+ >>> with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
166
+ ... data = PseudoLmDataset(file, 10, 2)[:]
167
+ >>> import matplotlib.pyplot as plt # doctest: +SKIP
168
+ ... plt.plot(*data.transpose(1, 2, 0), color="gray")
169
+ """
170
+
171
+ def __init__(self, file, k, m, transform=None):
172
+ self._file = file
173
+ self.k = k
174
+ self.m = m
175
+ self._transform = transform
176
+
177
+ self.x = file.x()
178
+
179
+ @property
180
+ def file(self):
181
+ return self._file
182
+
183
+ def default_transform(self, profiles, lengths):
184
+ if self.m == 1:
185
+ ret = pseudo_landmarks_1d(profiles, lengths, self.k)
186
+ elif self.m == 2:
187
+ ret = pseudo_landmarks_2d(self.x, profiles, lengths, self.k)
188
+ else:
189
+ raise ValueError(f"Invalid dimension: {self.m}")
190
+ return ret
191
+
192
+ @property
193
+ def transform(self):
194
+ return self._transform
195
+
196
+
197
+ class MathLm1dDataset(ProfileDatasetBase, Dataset):
198
+ """1-D mathematical landmarks dataset.
199
+
200
+ Parameters
201
+ ----------
202
+ file : heavyedge.ProfileData
203
+ Open hdf5 file.
204
+ sigma : scalar
205
+ Standard deviation of Gaussian kernel for landmark detection.
206
+ transform : callable, optional
207
+ Optional transform to be applied on a sample.
208
+
209
+ Examples
210
+ --------
211
+ >>> from heavyedge import get_sample_path, ProfileData
212
+ >>> from heavyedge_dataset import MathLm1dDataset
213
+ >>> with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
214
+ ... data = MathLm1dDataset(file, 32)[:]
215
+ >>> import matplotlib.pyplot as plt # doctest: +SKIP
216
+ ... plt.plot(*data.transpose(1, 2, 0), color="gray")
217
+ """
218
+
219
+ def __init__(self, file, sigma, transform=None):
220
+ self._file = file
221
+ self.sigma = sigma
222
+ self._transform = transform
223
+
224
+ @property
225
+ def file(self):
226
+ return self._file
227
+
228
+ def default_transform(self, profiles, lengths):
229
+ return math_landmarks_1d(profiles, lengths, self.sigma)
230
+
231
+ @property
232
+ def transform(self):
233
+ return self._transform
234
+
235
+
236
+ class MathLm2dDataset(ProfileDatasetBase, Dataset):
237
+ """2-D mathematical landmarks dataset.
238
+
239
+ Parameters
240
+ ----------
241
+ file : heavyedge.ProfileData
242
+ Open hdf5 file.
243
+ sigma : scalar
244
+ Standard deviation of Gaussian kernel for landmark detection.
245
+ transform : callable, optional
246
+ Optional transform to be applied on a sample.
247
+
248
+ Examples
249
+ --------
250
+ >>> from heavyedge import get_sample_path, ProfileData
251
+ >>> from heavyedge_dataset import MathLm2dDataset
252
+ >>> with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
253
+ ... lm, _ = MathLm2dDataset(file, 32)[:]
254
+ >>> import matplotlib.pyplot as plt # doctest: +SKIP
255
+ ... plt.plot(*lm.transpose(1, 2, 0), color="gray")
256
+ """
257
+
258
+ def __init__(self, file, sigma, transform=None):
259
+ self._file = file
260
+ self.sigma = sigma
261
+ self._transform = transform
262
+
263
+ self.x = file.x()
264
+
265
+ @property
266
+ def file(self):
267
+ return self._file
268
+
269
+ def default_transform(self, profiles, lengths):
270
+ # Todo: cythonize this method to avoid python loop.
271
+ # This will require cythonizing landmarks_type3().
272
+ lm, center_height = [], []
273
+ for Y, L in zip(profiles, lengths):
274
+ Y = Y[:L]
275
+ indices = np.flip(landmarks_type3(Y, self.sigma))
276
+ lm.append(np.stack([self.x[indices], Y[indices]]))
277
+ center_height.append(np.mean(Y[: indices[0]]))
278
+ return np.array(lm), np.array(center_height)
279
+
280
+ def __getitem__(self, idx):
281
+ if isinstance(idx, numbers.Integral):
282
+ Y, L, _ = self.file[idx]
283
+ lm, ch = self.default_transform([Y], [L])
284
+ if self.transform:
285
+ lm, ch = self.transform(lm, ch)
286
+ lm, ch = lm[0], ch[0]
287
+ else:
288
+ lm, ch = self.__getitems__(idx)
289
+ return (lm, ch)
290
+
291
+ @property
292
+ def transform(self):
293
+ return self._transform
@@ -0,0 +1,41 @@
1
+ """
2
+ Landmark locators
3
+ -----------------
4
+
5
+ Functions to locate landmarks from edge profiles.
6
+ """
7
+
8
+ import numpy as np
9
+ from heavyedge.api import landmarks_type3
10
+
11
+ __all__ = [
12
+ "pseudo_landmarks_1d",
13
+ "pseudo_landmarks_2d",
14
+ "math_landmarks_1d",
15
+ ]
16
+
17
+
18
+ def pseudo_landmarks_1d(Ys, Ls, k):
19
+ ret = []
20
+ for Y, L in zip(Ys, Ls):
21
+ idxs = np.linspace(0, L - 1, k, dtype=int)
22
+ ret.append(Y[idxs].reshape(1, -1))
23
+ return np.array(ret)
24
+
25
+
26
+ def pseudo_landmarks_2d(x, Ys, Ls, k):
27
+ ret = []
28
+ for Y, L in zip(Ys, Ls):
29
+ idxs = np.linspace(0, L - 1, k, dtype=int)
30
+ ret.append(np.stack([x[idxs], Y[idxs]]))
31
+ return np.array(ret)
32
+
33
+
34
+ def math_landmarks_1d(Ys, Ls, sigma):
35
+ ret = []
36
+ for Y, L in zip(Ys, Ls):
37
+ Y = Y[:L]
38
+ indices = np.flip(landmarks_type3(Y, sigma))
39
+ y = np.concat([[np.mean(Y[: indices[0]])], Y[indices]])
40
+ ret.append(y.reshape(1, -1))
41
+ return np.array(ret)
@@ -0,0 +1,39 @@
1
+ Metadata-Version: 2.4
2
+ Name: heavyedge-dataset
3
+ Version: 0.1.0
4
+ Summary: PyTorch-compatible edge profile dataset API
5
+ Author-email: Jisoo Song <jeesoo9595@snu.ac.kr>
6
+ License-Expression: MIT
7
+ Project-URL: homepage, https://pypi.python.org/pypi/heavyedge-dataset/
8
+ Project-URL: source, https://github.com/heavyedge/heavyedge-dataset
9
+ Project-URL: documentation, https://heavyedge-dataset.readthedocs.io
10
+ Classifier: Development Status :: 5 - Production/Stable
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: Programming Language :: Python
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3 :: Only
18
+ Classifier: Topic :: Scientific/Engineering
19
+ Classifier: Operating System :: OS Independent
20
+ Requires-Python: >=3.10
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Requires-Dist: heavyedge>=1.1.2
24
+ Requires-Dist: torch
25
+ Provides-Extra: test
26
+ Requires-Dist: pytest; extra == "test"
27
+ Provides-Extra: doc
28
+ Requires-Dist: sphinx; extra == "doc"
29
+ Requires-Dist: numpydoc; extra == "doc"
30
+ Requires-Dist: pydata_sphinx_theme; extra == "doc"
31
+ Requires-Dist: matplotlib; extra == "doc"
32
+ Provides-Extra: dev
33
+ Requires-Dist: flake8; extra == "dev"
34
+ Requires-Dist: black; extra == "dev"
35
+ Requires-Dist: isort; extra == "dev"
36
+ Requires-Dist: heavyedge-dataset[doc,test]; extra == "dev"
37
+ Dynamic: license-file
38
+
39
+ # HeavyEdge-Dataset
@@ -0,0 +1,8 @@
1
+ heavyedge_dataset/__init__.py,sha256=X9li4ScLZs9TSlbH4ApoctQa6g7vglH0nkQadcb7Gq4,273
2
+ heavyedge_dataset/datasets.py,sha256=j8a3KRXzWd0QpceAvP7ATvNUj5VSjyx445ksMMMdJio,8150
3
+ heavyedge_dataset/landmarks.py,sha256=Tn7Pd91cWLs9txYu9mkNxC6OX-Y6VTNMzwTNzzCE4Bo,937
4
+ heavyedge_dataset-0.1.0.dist-info/licenses/LICENSE,sha256=pBq2E7YJkUcEINdYeERL4RVFOQICd_MwJq6OusuAPGc,1066
5
+ heavyedge_dataset-0.1.0.dist-info/METADATA,sha256=QsFgB89UjLFlONWWcgWvCaxhlSCgK535OxBXeB_oMrg,1515
6
+ heavyedge_dataset-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
+ heavyedge_dataset-0.1.0.dist-info/top_level.txt,sha256=wpRFI8TlkYFGetc17appkyybauBvzhGGvyueunsdJTc,18
8
+ heavyedge_dataset-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 heavyedge
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ heavyedge_dataset