heavyedge-dataset 0.2.0__py3-none-any.whl → 1.0.0.post0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- heavyedge_dataset/__init__.py +71 -224
- {heavyedge_dataset-0.2.0.dist-info → heavyedge_dataset-1.0.0.post0.dist-info}/METADATA +4 -5
- heavyedge_dataset-1.0.0.post0.dist-info/RECORD +6 -0
- heavyedge_dataset-0.2.0.dist-info/RECORD +0 -6
- {heavyedge_dataset-0.2.0.dist-info → heavyedge_dataset-1.0.0.post0.dist-info}/WHEEL +0 -0
- {heavyedge_dataset-0.2.0.dist-info → heavyedge_dataset-1.0.0.post0.dist-info}/licenses/LICENSE +0 -0
- {heavyedge_dataset-0.2.0.dist-info → heavyedge_dataset-1.0.0.post0.dist-info}/top_level.txt +0 -0
heavyedge_dataset/__init__.py
CHANGED
@@ -1,97 +1,31 @@
|
|
1
|
-
"""
|
1
|
+
"""Package to load edge profile data using PyTorch dataset.
|
2
2
|
|
3
|
-
Refer to `PyTorch tutorial <tutorial>`_ for information about custom
|
3
|
+
Refer to `PyTorch tutorial <tutorial>`_ for information about custom dataset.
|
4
4
|
|
5
5
|
.. _tutorial: https://docs.pytorch.org/tutorials/beginner/data_loading_tutorial.html
|
6
6
|
"""
|
7
7
|
|
8
|
-
import abc
|
9
8
|
import numbers
|
10
9
|
from collections.abc import Sequence
|
11
10
|
|
12
11
|
import numpy as np
|
13
|
-
from heavyedge.api import landmarks_type3
|
14
12
|
from torch.utils.data import Dataset
|
15
13
|
|
16
14
|
__all__ = [
|
17
15
|
"ProfileDataset",
|
18
|
-
"PseudoLandmarkDataset",
|
19
|
-
"MathematicalLandmarkDataset",
|
20
16
|
]
|
21
17
|
|
22
18
|
|
23
|
-
class
|
24
|
-
"""
|
19
|
+
class ProfileDataset(Dataset):
|
20
|
+
"""Edge profile dataset.
|
25
21
|
|
26
|
-
|
27
|
-
@abc.abstractmethod
|
28
|
-
def file(self):
|
29
|
-
"""Profile data file.
|
22
|
+
Loads data as a tuple of two numpy arrays:
|
30
23
|
|
31
|
-
|
32
|
-
|
33
|
-
heavyedge.ProfileData
|
34
|
-
"""
|
24
|
+
1. Profile data, shape: (N, m, L).
|
25
|
+
2. Length of each profile, shape: (N,).
|
35
26
|
|
36
|
-
|
37
|
-
|
38
|
-
def transform(self):
|
39
|
-
"""Optional transformation to be applied on samples.
|
40
|
-
|
41
|
-
Returns
|
42
|
-
-------
|
43
|
-
Callable
|
44
|
-
"""
|
45
|
-
|
46
|
-
def __len__(self):
|
47
|
-
return len(self.file)
|
48
|
-
|
49
|
-
def __getitem__(self, idx):
|
50
|
-
if isinstance(idx, numbers.Integral):
|
51
|
-
Y, L, _ = self.file[idx]
|
52
|
-
Ys, Ls = [Y], [L]
|
53
|
-
else:
|
54
|
-
# Support multi-indexing
|
55
|
-
idxs = idx
|
56
|
-
needs_sort = isinstance(idx, (Sequence, np.ndarray))
|
57
|
-
if needs_sort:
|
58
|
-
# idxs must be sorted for h5py
|
59
|
-
idxs = np.array(idxs)
|
60
|
-
sort_idx = np.argsort(idxs)
|
61
|
-
idxs = idxs[sort_idx]
|
62
|
-
Ys, Ls, _ = self.file[idxs]
|
63
|
-
if needs_sort:
|
64
|
-
reverse_idx = np.argsort(sort_idx)
|
65
|
-
Ys = Ys[reverse_idx]
|
66
|
-
Ls = Ls[reverse_idx]
|
67
|
-
|
68
|
-
ret = self.default_transform(Ys, Ls)
|
69
|
-
if self.transform:
|
70
|
-
ret = self.transform(ret)
|
71
|
-
return ret
|
72
|
-
|
73
|
-
def __getitems__(self, idxs):
|
74
|
-
# PyTorch API
|
75
|
-
return self.__getitem__(idxs)
|
76
|
-
|
77
|
-
@abc.abstractmethod
|
78
|
-
def default_transform(self, profiles, lengths):
|
79
|
-
"""Default data transformation.
|
80
|
-
|
81
|
-
Subclass must implement this method to transform profile data into target data.
|
82
|
-
|
83
|
-
Parameters
|
84
|
-
----------
|
85
|
-
profiles : (N, M) array
|
86
|
-
Profile data.
|
87
|
-
lengths : (N,) array
|
88
|
-
Length of each profile in *profiles*.
|
89
|
-
"""
|
90
|
-
pass
|
91
|
-
|
92
|
-
|
93
|
-
class ProfileDataset(ProfileDatasetBase, Dataset):
|
94
|
-
"""Full profile dataset in 1-D or 2-D.
|
27
|
+
N is the number of loaded data, m is dimension of coordinates, and
|
28
|
+
L is the maximum length of profiles.
|
95
29
|
|
96
30
|
Parameters
|
97
31
|
----------
|
@@ -108,165 +42,78 @@ class ProfileDataset(ProfileDatasetBase, Dataset):
|
|
108
42
|
>>> from heavyedge import get_sample_path, ProfileData
|
109
43
|
>>> from heavyedge_dataset import ProfileDataset
|
110
44
|
>>> with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
|
111
|
-
...
|
112
|
-
>>>
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
def __init__(self, file, m, transform=None):
|
118
|
-
self._file = file
|
119
|
-
self.m = m
|
120
|
-
self._transform = transform
|
121
|
-
|
122
|
-
self.x = file.x()
|
123
|
-
|
124
|
-
@property
|
125
|
-
def file(self):
|
126
|
-
return self._file
|
127
|
-
|
128
|
-
@property
|
129
|
-
def transform(self):
|
130
|
-
return self._transform
|
131
|
-
|
132
|
-
def default_transform(self, profiles, lengths):
|
133
|
-
"""Crop profiles by their contact points.
|
45
|
+
... profiles, lengths = ProfileDataset(file, m=2)[:]
|
46
|
+
>>> profiles.shape
|
47
|
+
(22, 2, 3200)
|
48
|
+
>>> lengths.shape
|
49
|
+
(22,)
|
134
50
|
|
135
|
-
|
136
|
-
|
137
|
-
profiles : (N, M) array
|
138
|
-
Profile data.
|
139
|
-
lengths : (N,) array
|
140
|
-
Length of each profile in *profiles*.
|
141
|
-
"""
|
142
|
-
if self.m == 1:
|
143
|
-
ret = [Y[:L].reshape(1, -1) for Y, L in zip(profiles, lengths)]
|
144
|
-
elif self.m == 2:
|
145
|
-
ret = [np.stack([self.x[:L], Y[:L]]) for Y, L in zip(profiles, lengths)]
|
146
|
-
else:
|
147
|
-
raise ValueError(f"Invalid dimension: {self.m}")
|
148
|
-
return ret
|
149
|
-
|
150
|
-
|
151
|
-
class PseudoLandmarkDataset(ProfileDatasetBase, Dataset):
|
152
|
-
"""Pseudo-landmark dataset in 1-D or 2-D.
|
153
|
-
|
154
|
-
Pseudo-landmarks are points that are equidistantly sampled.
|
155
|
-
|
156
|
-
Parameters
|
157
|
-
----------
|
158
|
-
file : heavyedge.ProfileData
|
159
|
-
Open hdf5 file.
|
160
|
-
k : int
|
161
|
-
Number of landmarks to sample.
|
162
|
-
m : {1, 2}
|
163
|
-
Profile data dimension.
|
164
|
-
1 means only y coordinates, and 2 means both x and y coordinates.
|
165
|
-
transform : callable, optional
|
166
|
-
Optional transformation to be applied on samples.
|
51
|
+
Should this dataset be used for :class:`torch.utils.data.DataLoader`,
|
52
|
+
``collate_fn`` argument should be passed to the data loader.
|
167
53
|
|
168
|
-
|
169
|
-
--------
|
170
|
-
>>> from heavyedge import get_sample_path, ProfileData
|
171
|
-
>>> from heavyedge_dataset import PseudoLandmarkDataset
|
54
|
+
>>> from torch.utils.data import DataLoader
|
172
55
|
>>> with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
|
173
|
-
...
|
174
|
-
|
175
|
-
...
|
56
|
+
... dataset = ProfileDataset(file, m=2)
|
57
|
+
... loader = DataLoader(dataset, collate_fn=lambda x: x)
|
58
|
+
... profiles, lengths = next(iter(loader))
|
59
|
+
>>> profiles.shape
|
60
|
+
(1, 2, 3200)
|
61
|
+
>>> lengths.shape
|
62
|
+
(1,)
|
63
|
+
|
64
|
+
If data should be loaded as :class:`torch.Tensor`, pass ``transform`` argument.
|
65
|
+
|
66
|
+
>>> import torch
|
67
|
+
>>> def to_tensor(sample):
|
68
|
+
... return (torch.from_numpy(sample[0]), torch.from_numpy(sample[1]))
|
69
|
+
>>> with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
|
70
|
+
... dataset = ProfileDataset(file, m=2, transform=to_tensor)
|
71
|
+
... loader = DataLoader(dataset, collate_fn=lambda x: x)
|
72
|
+
... profiles, lengths = next(iter(loader))
|
73
|
+
>>> type(profiles)
|
74
|
+
<class 'torch.Tensor'>
|
176
75
|
"""
|
177
76
|
|
178
|
-
def __init__(self, file,
|
179
|
-
self.
|
180
|
-
self.k = k
|
77
|
+
def __init__(self, file, m=1, transform=None):
|
78
|
+
self.file = file
|
181
79
|
self.m = m
|
182
|
-
self.
|
183
|
-
|
80
|
+
self.transform = transform
|
184
81
|
self.x = file.x()
|
185
82
|
|
186
|
-
|
187
|
-
|
188
|
-
return self._file
|
189
|
-
|
190
|
-
@property
|
191
|
-
def transform(self):
|
192
|
-
return self._transform
|
193
|
-
|
194
|
-
def default_transform(self, profiles, lengths):
|
195
|
-
"""Sample pseudo-landmarks from profiles.
|
83
|
+
def __len__(self):
|
84
|
+
return len(self.file)
|
196
85
|
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
86
|
+
def __getitem__(self, idx):
|
87
|
+
if isinstance(idx, numbers.Integral):
|
88
|
+
Y, L, _ = self.file[idx]
|
89
|
+
Y = Y[np.newaxis, :]
|
90
|
+
else:
|
91
|
+
# Support multi-indexing
|
92
|
+
idxs = idx
|
93
|
+
needs_sort = isinstance(idx, (Sequence, np.ndarray))
|
94
|
+
if needs_sort:
|
95
|
+
# idxs must be sorted for h5py
|
96
|
+
idxs = np.array(idxs)
|
97
|
+
sort_idx = np.argsort(idxs)
|
98
|
+
idxs = idxs[sort_idx]
|
99
|
+
Y, L, _ = self.file[idxs]
|
100
|
+
if needs_sort:
|
101
|
+
reverse_idx = np.argsort(sort_idx)
|
102
|
+
Y = Y[reverse_idx]
|
103
|
+
L = L[reverse_idx]
|
104
|
+
Y = Y[:, np.newaxis, :]
|
205
105
|
if self.m == 1:
|
206
|
-
|
207
|
-
idxs = np.linspace(0, L - 1, self.k, dtype=int)
|
208
|
-
ret.append(Y[idxs].reshape(1, -1))
|
106
|
+
pass
|
209
107
|
elif self.m == 2:
|
210
|
-
|
211
|
-
|
212
|
-
ret.append(np.stack([self.x[idxs], Y[idxs]]))
|
108
|
+
x = np.tile(self.x, Y.shape[:-1] + (1,))
|
109
|
+
Y = np.concatenate([x, Y], axis=-2)
|
213
110
|
else:
|
214
|
-
raise ValueError(f"
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
"""Mathematical landmark dataset in 1-D.
|
220
|
-
|
221
|
-
Mathematical landmarks are points which are choosed by their
|
222
|
-
mathematical properties, i.e., slope or curvature.
|
223
|
-
|
224
|
-
Parameters
|
225
|
-
----------
|
226
|
-
file : heavyedge.ProfileData
|
227
|
-
Open hdf5 file.
|
228
|
-
sigma : scalar
|
229
|
-
Standard deviation of Gaussian kernel for landmark detection.
|
230
|
-
transform : callable, optional
|
231
|
-
Optional transformation to be applied on samples.
|
232
|
-
|
233
|
-
Examples
|
234
|
-
--------
|
235
|
-
>>> from heavyedge import get_sample_path, ProfileData
|
236
|
-
>>> from heavyedge_dataset import MathematicalLandmarkDataset
|
237
|
-
>>> with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
|
238
|
-
... data = MathematicalLandmarkDataset(file, 32)[:]
|
239
|
-
>>> import matplotlib.pyplot as plt # doctest: +SKIP
|
240
|
-
... plt.plot(*data.transpose(1, 2, 0), color="gray")
|
241
|
-
"""
|
242
|
-
|
243
|
-
def __init__(self, file, sigma, transform=None):
|
244
|
-
self._file = file
|
245
|
-
self.sigma = sigma
|
246
|
-
self._transform = transform
|
247
|
-
|
248
|
-
@property
|
249
|
-
def file(self):
|
250
|
-
return self._file
|
251
|
-
|
252
|
-
@property
|
253
|
-
def transform(self):
|
254
|
-
return self._transform
|
255
|
-
|
256
|
-
def default_transform(self, profiles, lengths):
|
257
|
-
"""Detect mathematical landmarks from profiles.
|
111
|
+
raise ValueError(f"Unsupported dimension: {self.m} (Must be 1 or 2).")
|
112
|
+
ret = (Y, L)
|
113
|
+
if self.transform is not None:
|
114
|
+
ret = self.transform(ret)
|
115
|
+
return ret
|
258
116
|
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
Profile data.
|
263
|
-
lengths : (N,) array
|
264
|
-
Length of each profile in *profiles*.
|
265
|
-
"""
|
266
|
-
ret = []
|
267
|
-
for Y, L in zip(profiles, lengths):
|
268
|
-
Y = Y[:L]
|
269
|
-
indices = np.flip(landmarks_type3(Y, self.sigma))
|
270
|
-
y = np.concat([[np.mean(Y[: indices[0]])], Y[indices]])
|
271
|
-
ret.append(y.reshape(1, -1))
|
272
|
-
return np.array(ret)
|
117
|
+
def __getitems__(self, idxs):
|
118
|
+
# PyTorch API
|
119
|
+
return self.__getitem__(idxs)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: heavyedge-dataset
|
3
|
-
Version: 0.
|
3
|
+
Version: 1.0.0.post0
|
4
4
|
Summary: PyTorch-compatible edge profile dataset API
|
5
5
|
Author-email: Jisoo Song <jeesoo9595@snu.ac.kr>
|
6
6
|
License-Expression: MIT
|
@@ -28,7 +28,6 @@ Provides-Extra: doc
|
|
28
28
|
Requires-Dist: sphinx; extra == "doc"
|
29
29
|
Requires-Dist: numpydoc; extra == "doc"
|
30
30
|
Requires-Dist: pydata_sphinx_theme; extra == "doc"
|
31
|
-
Requires-Dist: matplotlib; extra == "doc"
|
32
31
|
Provides-Extra: dev
|
33
32
|
Requires-Dist: flake8; extra == "dev"
|
34
33
|
Requires-Dist: black; extra == "dev"
|
@@ -49,14 +48,14 @@ Package to load edge profile data as PyTorch dataset.
|
|
49
48
|
|
50
49
|
## Usage
|
51
50
|
|
52
|
-
HeavyEdge-Dataset provides
|
51
|
+
HeavyEdge-Dataset provides `ProfileDataset` which wraps profile data file.
|
53
52
|
|
54
|
-
A simple use case to load
|
53
|
+
A simple use case to load two-dimensional coordinates of profiles and their lengths:
|
55
54
|
|
56
55
|
```python
|
57
56
|
from heavyedge import get_sample_path, ProfileData
|
58
57
|
from heavyedge_dataset import ProfileDataset
|
59
|
-
with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
|
58
|
+
with ProfileData(get_sample_path("Prep-Type2.h5")) as file:
|
60
59
|
data = ProfileDataset(file, 2)[:]
|
61
60
|
```
|
62
61
|
|
@@ -0,0 +1,6 @@
|
|
1
|
+
heavyedge_dataset/__init__.py,sha256=CmquaxuwXJYTCGInmzI8Tp0Z9hJBVtEK9eH3NLyoREY,3737
|
2
|
+
heavyedge_dataset-1.0.0.post0.dist-info/licenses/LICENSE,sha256=pBq2E7YJkUcEINdYeERL4RVFOQICd_MwJq6OusuAPGc,1066
|
3
|
+
heavyedge_dataset-1.0.0.post0.dist-info/METADATA,sha256=34mMpniE5Nn4E6mwdVKhT4m6mnEUKRoCce9Tv6aGNHI,3434
|
4
|
+
heavyedge_dataset-1.0.0.post0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
5
|
+
heavyedge_dataset-1.0.0.post0.dist-info/top_level.txt,sha256=wpRFI8TlkYFGetc17appkyybauBvzhGGvyueunsdJTc,18
|
6
|
+
heavyedge_dataset-1.0.0.post0.dist-info/RECORD,,
|
@@ -1,6 +0,0 @@
|
|
1
|
-
heavyedge_dataset/__init__.py,sha256=iBFvupZMaHpTGUxZOJvVKfD4bAOAdikK60a8bd4oxB0,7746
|
2
|
-
heavyedge_dataset-0.2.0.dist-info/licenses/LICENSE,sha256=pBq2E7YJkUcEINdYeERL4RVFOQICd_MwJq6OusuAPGc,1066
|
3
|
-
heavyedge_dataset-0.2.0.dist-info/METADATA,sha256=vZF2VwCYUmf7OzXfCO-pg9cHEeE7kCYIfl1JGmfTad0,3496
|
4
|
-
heavyedge_dataset-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
5
|
-
heavyedge_dataset-0.2.0.dist-info/top_level.txt,sha256=wpRFI8TlkYFGetc17appkyybauBvzhGGvyueunsdJTc,18
|
6
|
-
heavyedge_dataset-0.2.0.dist-info/RECORD,,
|
File without changes
|
{heavyedge_dataset-0.2.0.dist-info → heavyedge_dataset-1.0.0.post0.dist-info}/licenses/LICENSE
RENAMED
File without changes
|
File without changes
|