sgptools 1.2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgptools/__init__.py +3 -4
- sgptools/core/__init__.py +1 -0
- sgptools/{models/core → core}/augmented_gpr.py +11 -17
- sgptools/{models/core → core}/augmented_sgpr.py +27 -34
- sgptools/core/osgpr.py +417 -0
- sgptools/core/transformations.py +699 -0
- sgptools/kernels/__init__.py +0 -8
- sgptools/kernels/attentive_kernel.py +214 -69
- sgptools/kernels/neural_kernel.py +268 -92
- sgptools/kernels/neural_network.py +127 -28
- sgptools/methods.py +1047 -0
- sgptools/objectives.py +275 -0
- sgptools/utils/__init__.py +0 -9
- sgptools/utils/data.py +452 -149
- sgptools/utils/gpflow.py +335 -174
- sgptools/utils/metrics.py +375 -102
- sgptools/utils/misc.py +145 -111
- sgptools/utils/tsp.py +224 -84
- sgptools-2.0.0.dist-info/METADATA +216 -0
- sgptools-2.0.0.dist-info/RECORD +23 -0
- {sgptools-1.2.0.dist-info → sgptools-2.0.0.dist-info}/WHEEL +1 -1
- sgptools/models/__init__.py +0 -10
- sgptools/models/bo.py +0 -118
- sgptools/models/cma_es.py +0 -121
- sgptools/models/continuous_sgp.py +0 -68
- sgptools/models/core/__init__.py +0 -9
- sgptools/models/core/osgpr.py +0 -291
- sgptools/models/core/transformations.py +0 -434
- sgptools/models/greedy_mi.py +0 -115
- sgptools/models/greedy_sgp.py +0 -97
- sgptools-1.2.0.dist-info/METADATA +0 -39
- sgptools-1.2.0.dist-info/RECORD +0 -27
- {sgptools-1.2.0.dist-info → sgptools-2.0.0.dist-info/licenses}/LICENSE.txt +0 -0
- {sgptools-1.2.0.dist-info → sgptools-2.0.0.dist-info}/top_level.txt +0 -0
sgptools/utils/data.py
CHANGED
@@ -14,199 +14,502 @@
|
|
14
14
|
|
15
15
|
import numpy as np
|
16
16
|
from matplotlib import path
|
17
|
-
from .
|
17
|
+
from skimage.draw import line
|
18
|
+
from .misc import get_inducing_pts
|
18
19
|
from sklearn.preprocessing import StandardScaler
|
19
20
|
from hkb_diamondsquare.DiamondSquare import diamond_square
|
20
21
|
|
21
22
|
import PIL
|
22
|
-
PIL.Image.MAX_IMAGE_PIXELS = 317500000
|
23
23
|
|
24
|
-
|
25
|
-
# Utils used to prepare synthetic datasets
|
24
|
+
PIL.Image.MAX_IMAGE_PIXELS = 900000000
|
26
25
|
|
27
|
-
|
28
|
-
|
29
|
-
|
26
|
+
from typing import List, Tuple, Optional, Any
|
27
|
+
|
28
|
+
|
29
|
+
def remove_polygons(
|
30
|
+
X: np.ndarray, Y: np.ndarray,
|
31
|
+
polygons: List[path.Path]) -> Tuple[np.ndarray, np.ndarray]:
|
32
|
+
"""
|
33
|
+
Removes points that fall inside a list of matplotlib Path polygons.
|
30
34
|
|
31
35
|
Args:
|
32
|
-
X
|
33
|
-
Y
|
34
|
-
polygons (
|
36
|
+
X (np.ndarray): (N,); Array of x-coordinates.
|
37
|
+
Y (np.ndarray): (N,); Array of y-coordinates.
|
38
|
+
polygons (List[path.Path]): A list of `matplotlib.path.Path` objects.
|
39
|
+
Points within these polygons will be removed.
|
35
40
|
|
36
41
|
Returns:
|
37
|
-
|
38
|
-
|
39
|
-
|
42
|
+
Tuple[np.ndarray, np.ndarray]: A tuple containing two 1D NumPy arrays:
|
43
|
+
(filtered_X_coordinates, filtered_Y_coordinates).
|
44
|
+
|
45
|
+
Usage:
|
46
|
+
```python
|
47
|
+
import matplotlib.path as mpath
|
48
|
+
import numpy as np
|
49
|
+
|
50
|
+
# Example points
|
51
|
+
X_coords = np.array([0, 1, 2, 3, 4, 5])
|
52
|
+
Y_coords = np.array([0, 1, 2, 3, 4, 5])
|
53
|
+
|
54
|
+
# Define a square polygon (points inside will be removed)
|
55
|
+
polygon_vertices = np.array([[1, 1], [1, 3], [3, 3], [3, 1]])
|
56
|
+
square_polygon = mpath.Path(polygon_vertices)
|
57
|
+
|
58
|
+
filtered_X, filtered_Y = remove_polygons(X_coords, Y_coords, [square_polygon])
|
59
|
+
```
|
60
|
+
"""
|
40
61
|
points = np.array([X.flatten(), Y.flatten()]).T
|
41
62
|
for polygon in polygons:
|
42
63
|
p = path.Path(polygon)
|
43
64
|
points = points[~p.contains_points(points)]
|
44
65
|
return points[:, 0], points[:, 1]
|
45
66
|
|
46
|
-
|
47
|
-
|
48
|
-
|
67
|
+
|
68
|
+
def remove_circle_patches(
|
69
|
+
X: np.ndarray, Y: np.ndarray,
|
70
|
+
circle_patches: List[Any]) -> Tuple[np.ndarray, np.ndarray]:
|
71
|
+
"""
|
72
|
+
Removes points that fall inside a list of matplotlib Circle patches.
|
73
|
+
|
74
|
+
Note: This function assumes that the `circle_patch` objects have a `contains_points` method,
|
75
|
+
similar to `matplotlib.patches.Circle` or `matplotlib.path.Path`.
|
49
76
|
|
50
77
|
Args:
|
51
|
-
X
|
52
|
-
Y
|
53
|
-
circle_patches (
|
78
|
+
X (np.ndarray): (N,); Array of x-coordinates.
|
79
|
+
Y (np.ndarray): (N,); Array of y-coordinates.
|
80
|
+
circle_patches (List[Any]): A list of objects representing circle patches.
|
81
|
+
Each object must have a `contains_points(points)` method.
|
54
82
|
|
55
83
|
Returns:
|
56
|
-
|
57
|
-
|
58
|
-
|
84
|
+
Tuple[np.ndarray, np.ndarray]: A tuple containing two 1D NumPy arrays:
|
85
|
+
(filtered_X_coordinates, filtered_Y_coordinates).
|
86
|
+
|
87
|
+
Usage:
|
88
|
+
```python
|
89
|
+
import numpy as np
|
90
|
+
from matplotlib.patches import Circle
|
91
|
+
from matplotlib.collections import PatchCollection
|
92
|
+
|
93
|
+
# Example points
|
94
|
+
X_coords = np.array([0, 1, 2, 3, 4, 5])
|
95
|
+
Y_coords = np.array([0, 1, 2, 3, 4, 5])
|
96
|
+
|
97
|
+
# Define a circle patch centered at (2,2) with radius 1.5
|
98
|
+
circle = Circle((2, 2), 1.5)
|
99
|
+
|
100
|
+
filtered_X, filtered_Y = remove_circle_patches(X_coords, Y_coords, [circle])
|
101
|
+
```
|
102
|
+
"""
|
59
103
|
points = np.array([X.flatten(), Y.flatten()]).T
|
60
104
|
for circle_patch in circle_patches:
|
61
105
|
points = points[~circle_patch.contains_points(points)]
|
62
106
|
return points[:, 0], points[:, 1]
|
63
107
|
|
64
|
-
|
65
|
-
|
66
|
-
|
108
|
+
|
109
|
+
def point_pos(point: np.ndarray, d: float, theta: float) -> np.ndarray:
|
110
|
+
"""
|
111
|
+
Generates a new point at a specified distance `d` and angle `theta`
|
112
|
+
(in radians) from an existing point. This function applies the
|
113
|
+
transformation to multiple points simultaneously.
|
67
114
|
|
68
115
|
Args:
|
69
|
-
point (ndarray): (N, 2);
|
70
|
-
d (float): distance
|
71
|
-
theta (float): angle in radians
|
116
|
+
point (np.ndarray): (N, 2); Array of original 2D points (x, y).
|
117
|
+
d (float): The distance from the original point to the new point.
|
118
|
+
theta (float): The angle in radians for the direction of displacement.
|
72
119
|
|
73
120
|
Returns:
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
121
|
+
np.ndarray: (N, 2); An array of new points after displacement.
|
122
|
+
|
123
|
+
Usage:
|
124
|
+
```python
|
125
|
+
import numpy as np
|
126
|
+
|
127
|
+
# Example points (N=2)
|
128
|
+
initial_points = np.array([[0.0, 0.0], [1.0, 1.0]])
|
129
|
+
# Displace by distance 5.0 at angle pi/4 (45 degrees)
|
130
|
+
new_points = point_pos(initial_points, 5.0, np.pi/4)
|
131
|
+
# Expected:
|
132
|
+
# New points:
|
133
|
+
# [[3.53553391 3.53553391]
|
134
|
+
# [4.53553391 4.53553391]]
|
135
|
+
```
|
136
|
+
"""
|
137
|
+
return np.c_[point[:, 0] + d * np.cos(theta),
|
138
|
+
point[:, 1] + d * np.sin(theta)]
|
78
139
|
|
79
|
-
####################################################
|
80
140
|
|
81
|
-
def prep_tif_dataset(dataset_path
|
82
|
-
|
83
|
-
|
84
|
-
|
141
|
+
def prep_tif_dataset(dataset_path: str,
|
142
|
+
dim_max: int = 2500,
|
143
|
+
verbose: bool = True) -> np.ndarray:
|
144
|
+
"""
|
145
|
+
Loads and preprocesses a dataset from a GeoTIFF (.tif) file.
|
146
|
+
The function handles downsampling for large files and replaces NoData values (-999999.0) with NaN.
|
85
147
|
|
86
|
-
|
87
|
-
|
88
|
-
```gdalwarp -tr 50 50 <input>.tif <output>.tif```
|
148
|
+
For very large .tif files, it's recommended to downsample them externally using GDAL:
|
149
|
+
`gdalwarp -tr 50 50 <input>.tif <output>.tif`
|
89
150
|
|
90
151
|
Args:
|
91
|
-
dataset_path (str): Path to the dataset file
|
152
|
+
dataset_path (str): Path to the GeoTIFF dataset file.
|
153
|
+
dim_max (int): Maximum allowed dimension (width or height) for the loaded dataset.
|
154
|
+
If either dimension exceeds `dim_max`, the image will be downsampled
|
155
|
+
to fit, maintaining aspect ratio. Defaults to 2500.
|
156
|
+
verbose (bool): If `True`, print details about loading and downsampling. Defaults to True.
|
92
157
|
|
93
158
|
Returns:
|
94
|
-
|
95
|
-
|
96
|
-
|
159
|
+
np.ndarray: (H, W); The preprocessed 2D NumPy array representing the dataset,
|
160
|
+
with NoData values converted to NaN.
|
161
|
+
|
162
|
+
Usage:
|
163
|
+
```python
|
164
|
+
# Assuming 'path/to/your/dataset.tif' exists
|
165
|
+
# from sgptools.utils.data import prep_tif_dataset
|
166
|
+
# dataset_array = prep_tif_dataset('path/to/your/dataset.tif', dim_max=1000)
|
167
|
+
```
|
168
|
+
"""
|
97
169
|
data = PIL.Image.open(dataset_path)
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
def prep_synthetic_dataset(shape=(
|
120
|
-
min_height=0.0,
|
121
|
-
max_height=30.0,
|
122
|
-
roughness=0.5,
|
123
|
-
random_seed=None,
|
124
|
-
**kwargs):
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
170
|
+
data_array = np.array(data)
|
171
|
+
if verbose:
|
172
|
+
print(
|
173
|
+
f"Loaded dataset from {dataset_path} with shape {data_array.shape}"
|
174
|
+
)
|
175
|
+
|
176
|
+
downsample_factor = np.ceil(np.max(data_array.shape) / dim_max).astype(int)
|
177
|
+
if downsample_factor <= 1:
|
178
|
+
downsample_factor = 1
|
179
|
+
elif verbose:
|
180
|
+
print(
|
181
|
+
f'Downsampling by a factor of {downsample_factor} to fit the maximum dimension of {dim_max}'
|
182
|
+
)
|
183
|
+
|
184
|
+
# Downsample and convert to float, replace specific NoData value with NaN
|
185
|
+
data_array = data_array[::downsample_factor, ::downsample_factor].astype(
|
186
|
+
float)
|
187
|
+
data_array[np.where(data_array == -999999.0)] = np.nan
|
188
|
+
return data_array
|
189
|
+
|
190
|
+
|
191
|
+
def prep_synthetic_dataset(shape: Tuple[int, int] = (1000, 1000),
|
192
|
+
min_height: float = 0.0,
|
193
|
+
max_height: float = 30.0,
|
194
|
+
roughness: float = 0.5,
|
195
|
+
random_seed: Optional[int] = None,
|
196
|
+
**kwargs: Any) -> np.ndarray:
|
197
|
+
"""
|
198
|
+
Generates a 2D synthetic elevation (or similar) dataset using the diamond-square algorithm.
|
199
|
+
|
200
|
+
Reference: [https://github.com/buckinha/DiamondSquare](https://github.com/buckinha/DiamondSquare)
|
201
|
+
|
130
202
|
Args:
|
131
|
-
shape (
|
132
|
-
min_height (float): Minimum allowed
|
133
|
-
max_height (float): Maximum allowed
|
134
|
-
roughness (float):
|
135
|
-
|
203
|
+
shape (Tuple[int, int]): (width, height); The dimensions of the generated grid. Defaults to (1000, 1000).
|
204
|
+
min_height (float): Minimum allowed value in the generated data. Defaults to 0.0.
|
205
|
+
max_height (float): Maximum allowed value in the generated data. Defaults to 30.0.
|
206
|
+
roughness (float): Controls the fractal dimension of the generated terrain. Higher
|
207
|
+
values produce rougher terrain. Defaults to 0.5.
|
208
|
+
random_seed (Optional[int]): Seed for reproducibility of the generated data. Defaults to None.
|
209
|
+
**kwargs: Additional keyword arguments passed directly to the `diamond_square` function.
|
136
210
|
|
137
211
|
Returns:
|
138
|
-
|
139
|
-
|
140
|
-
|
212
|
+
np.ndarray: (height, width); The generated 2D synthetic dataset.
|
213
|
+
|
214
|
+
Usage:
|
215
|
+
```python
|
216
|
+
# from sgptools.utils.data import prep_synthetic_dataset
|
217
|
+
# synthetic_data = prep_synthetic_dataset(shape=(256, 256), roughness=0.7, random_seed=42)
|
218
|
+
```
|
219
|
+
"""
|
141
220
|
data = diamond_square(shape=shape,
|
142
|
-
min_height=min_height,
|
143
|
-
max_height=max_height,
|
221
|
+
min_height=min_height,
|
222
|
+
max_height=max_height,
|
144
223
|
roughness=roughness,
|
145
224
|
random_seed=random_seed,
|
146
225
|
**kwargs)
|
226
|
+
return data.astype(float)
|
147
227
|
|
148
|
-
# create x and y coordinates from the extent
|
149
|
-
x_coords = np.arange(0, data.shape[0])/10
|
150
|
-
y_coords = np.arange(0, data.shape[1])/10
|
151
|
-
xx, yy = np.meshgrid(x_coords, y_coords)
|
152
|
-
X = np.c_[xx.ravel(), yy.ravel()]
|
153
|
-
y = data.ravel()
|
154
|
-
y = y.reshape(-1, 1)
|
155
|
-
|
156
|
-
return X.astype(float), y.astype(float)
|
157
|
-
|
158
|
-
####################################################
|
159
|
-
|
160
|
-
def get_dataset(dataset_path=None,
|
161
|
-
num_train=1000,
|
162
|
-
num_test=2500,
|
163
|
-
num_candidates=150,
|
164
|
-
**kwargs):
|
165
|
-
"""Method to generate/load datasets and preprocess them for SP/IPP. The method uses kmeans to
|
166
|
-
generate train and test sets.
|
167
|
-
|
168
|
-
Args:
|
169
|
-
dataset_path (str): Path to a tif dataset file. If None, the method will generate synthetic data.
|
170
|
-
num_train (int): Number of training samples to generate.
|
171
|
-
num_test (int): Number of testing samples to generate.
|
172
|
-
num_candidates (int): Number of candidate locations to generate.
|
173
228
|
|
174
|
-
|
175
|
-
X_train (ndarray): (n, d); Training set inputs
|
176
|
-
y_train (ndarray): (n, 1); Training set labels
|
177
|
-
X_test (ndarray): (n, d); Testing set inputs
|
178
|
-
y_test (ndarray): (n, 1); Testing set labels
|
179
|
-
candidates (ndarray): (n, d); Candidate sensor placement locations
|
180
|
-
X (ndarray): (n, d); Full dataset inputs
|
181
|
-
y (ndarray): (n, 1); Full dataset labels
|
229
|
+
class Dataset:
|
182
230
|
"""
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
231
|
+
A class to load, preprocess, and manage access to a dataset for sensor placement
|
232
|
+
and informative path planning tasks.
|
233
|
+
|
234
|
+
It handles the following operations:
|
235
|
+
|
236
|
+
* Loading from a GeoTIFF file, loading from a numpy array, and generating a synthetic dataset.
|
237
|
+
* Sampling training, testing, and candidate points from valid (non-NaN) locations.
|
238
|
+
* Standardizing both the input coordinates (X) and the labels (y) using `StandardScaler`.
|
239
|
+
* Providing methods to retrieve different subsets of the data (train, test, candidates)
|
240
|
+
and to sample sensor data at specified locations or along a path.
|
241
|
+
|
242
|
+
The dataset is expected to be a 2D array where each element represents a label
|
243
|
+
(e.g., elevation, temperature, environmental reading).
|
244
|
+
"""
|
245
|
+
|
246
|
+
def __init__(self,
|
247
|
+
dataset_path: Optional[str] = None,
|
248
|
+
num_train: int = 1000,
|
249
|
+
num_test: int = 2500,
|
250
|
+
num_candidates: int = 150,
|
251
|
+
verbose: bool = True,
|
252
|
+
data=None,
|
253
|
+
dtype=np.float64,
|
254
|
+
**kwargs: Any):
|
255
|
+
"""
|
256
|
+
Initializes the Dataset class.
|
257
|
+
|
258
|
+
Args:
|
259
|
+
dataset_path (Optional[str]): Path to the dataset file (e.g., '.tif'). If None,
|
260
|
+
a synthetic dataset will be generated. Defaults to None.
|
261
|
+
Alternatively, pass an array of data to the constructor
|
262
|
+
with the `data` argument to use a custom dataset.
|
263
|
+
num_train (int): Number of training points to sample from the dataset. Defaults to 1000.
|
264
|
+
num_test (int): Number of testing points to sample from the dataset. Defaults to 2500.
|
265
|
+
num_candidates (int): Number of candidate points for potential sensor placements
|
266
|
+
to sample from the dataset. Defaults to 150.
|
267
|
+
verbose (bool): If `True`, print details about dataset loading, sampling, and preprocessing.
|
268
|
+
Defaults to True.
|
269
|
+
data (Optional[np.ndarray]): (height, width, d); 2D n-dimensional array of data.
|
270
|
+
dtype (Optional[np.dtype]): The type of the output arrays. If dtype is not given,
|
271
|
+
it will be set to np.float64.
|
272
|
+
**kwargs: Additional keyword arguments passed to `prep_tif_dataset` or `prep_synthetic_dataset`.
|
273
|
+
"""
|
274
|
+
self.verbose = verbose
|
275
|
+
self.dtype = dtype
|
276
|
+
|
277
|
+
# Load/Create the data
|
278
|
+
if data is not None:
|
279
|
+
self.y = data
|
280
|
+
elif dataset_path is not None:
|
281
|
+
self.y = prep_tif_dataset(dataset_path=dataset_path,
|
282
|
+
verbose=verbose,
|
283
|
+
**kwargs)
|
284
|
+
else:
|
285
|
+
self.y = prep_synthetic_dataset(**kwargs)
|
286
|
+
|
287
|
+
# Store original dimensions for reshaping
|
288
|
+
w, h = self.y.shape[0], self.y.shape[1]
|
289
|
+
if self.verbose:
|
290
|
+
print(f"Original dataset shape: {self.y.shape}")
|
291
|
+
|
292
|
+
# Get valid points (non-NaN labels)
|
293
|
+
mask = np.where(np.isfinite(self.y))
|
294
|
+
X_valid_pixel_coords = np.column_stack((mask[0], mask[1]))
|
295
|
+
|
296
|
+
# Sample training, testing, and candidate points from valid pixel coordinates
|
297
|
+
# `get_inducing_pts` with random=True is used for random sampling
|
298
|
+
X_train_pixel_coords = get_inducing_pts(X_valid_pixel_coords,
|
299
|
+
num_train,
|
300
|
+
random=True)
|
301
|
+
y_train_raw = self.y[X_train_pixel_coords[:, 0],
|
302
|
+
X_train_pixel_coords[:, 1]].reshape(-1, 1)
|
303
|
+
|
304
|
+
# If num_test is equal to dataset size, return test data in original order, enables plotting with imshow
|
305
|
+
if self.y.shape[0] * self.y.shape[1] == num_test:
|
306
|
+
X_test_pixel_coords = X_valid_pixel_coords
|
307
|
+
y_test_raw = self.y.reshape(-1, 1)
|
308
|
+
else:
|
309
|
+
X_test_pixel_coords = get_inducing_pts(X_valid_pixel_coords,
|
310
|
+
num_test,
|
311
|
+
random=True)
|
312
|
+
y_test_raw = self.y[X_test_pixel_coords[:, 0],
|
313
|
+
X_test_pixel_coords[:, 1]].reshape(-1, 1)
|
314
|
+
|
315
|
+
X_candidates_pixel_coords = get_inducing_pts(X_valid_pixel_coords,
|
316
|
+
num_candidates,
|
317
|
+
random=True)
|
318
|
+
|
319
|
+
# Standardize dataset X coordinates (pixel coords to normalized space)
|
320
|
+
self.X_scaler = StandardScaler()
|
321
|
+
self.X_scaler.fit(X_train_pixel_coords)
|
322
|
+
|
323
|
+
# Adjust X_scaler's variance/scale to ensure uniform scaling across dimensions
|
324
|
+
# and to scale the data to have an extent of at least 10.0 in each dimension.
|
325
|
+
# This ensures consistency and prevents issues with very small scales.
|
326
|
+
ind = np.argmax(self.X_scaler.var_)
|
327
|
+
self.X_scaler.var_ = np.ones_like(
|
328
|
+
self.X_scaler.var_) * self.X_scaler.var_[ind]
|
329
|
+
self.X_scaler.scale_ = np.ones_like(
|
330
|
+
self.X_scaler.scale_) * self.X_scaler.scale_[ind]
|
331
|
+
self.X_scaler.scale_ /= 10.0 # Scale to ensure an extent of ~10 units
|
332
|
+
|
333
|
+
self.X_train = self.X_scaler.transform(X_train_pixel_coords)
|
334
|
+
self.X_train = self.X_train.astype(self.dtype)
|
335
|
+
self.X_test = self.X_scaler.transform(X_test_pixel_coords)
|
336
|
+
self.X_test = self.X_test.astype(self.dtype)
|
337
|
+
self.candidates = self.X_scaler.transform(X_candidates_pixel_coords)
|
338
|
+
self.candidates = self.candidates.astype(self.dtype)
|
339
|
+
|
340
|
+
# Standardize dataset labels (y values)
|
341
|
+
self.y_scaler = StandardScaler()
|
342
|
+
self.y_scaler.fit(y_train_raw)
|
343
|
+
|
344
|
+
self.y_train = self.y_scaler.transform(y_train_raw)
|
345
|
+
self.y_train = self.y_train.astype(self.dtype)
|
346
|
+
self.y_test = self.y_scaler.transform(y_test_raw)
|
347
|
+
self.y_test = self.y_test.astype(self.dtype)
|
348
|
+
|
349
|
+
# Transform the entire dataset's labels for consistency
|
350
|
+
self.y = self.y_scaler.transform(self.y.reshape(-1, 1)).reshape(w, h)
|
351
|
+
self.y = self.y.astype(self.dtype)
|
352
|
+
|
353
|
+
if self.verbose:
|
354
|
+
print(
|
355
|
+
f"Training data shapes (X, y): {self.X_train.shape}, {self.y_train.shape}"
|
356
|
+
)
|
357
|
+
print(
|
358
|
+
f"Testing data shapes (X, y): {self.X_test.shape}, {self.y_test.shape}"
|
359
|
+
)
|
360
|
+
print(f"Candidate data shape (X): {self.candidates.shape}")
|
361
|
+
print("Dataset loaded and preprocessed successfully.")
|
362
|
+
|
363
|
+
def get_train(self) -> Tuple[np.ndarray, np.ndarray]:
|
364
|
+
"""
|
365
|
+
Retrieves the preprocessed training data.
|
366
|
+
|
367
|
+
Returns:
|
368
|
+
Tuple[np.ndarray, np.ndarray]: A tuple containing:
|
369
|
+
- X_train (np.ndarray): (num_train, 2); Normalized training input features.
|
370
|
+
- y_train (np.ndarray): (num_train, 1); Standardized training labels.
|
371
|
+
|
372
|
+
Usage:
|
373
|
+
```python
|
374
|
+
# dataset_obj = Dataset(...)
|
375
|
+
# X_train, y_train = dataset_obj.get_train()
|
376
|
+
```
|
377
|
+
"""
|
378
|
+
return self.X_train, self.y_train
|
379
|
+
|
380
|
+
def get_test(self) -> Tuple[np.ndarray, np.ndarray]:
|
381
|
+
"""
|
382
|
+
Retrieves the preprocessed testing data.
|
383
|
+
|
384
|
+
Returns:
|
385
|
+
Tuple[np.ndarray, np.ndarray]: A tuple containing:
|
386
|
+
- X_test (np.ndarray): (num_test, 2); Normalized testing input features.
|
387
|
+
- y_test (np.ndarray): (num_test, 1); Standardized testing labels.
|
388
|
+
|
389
|
+
Usage:
|
390
|
+
```python
|
391
|
+
# dataset_obj = Dataset(...)
|
392
|
+
# X_test, y_test = dataset_obj.get_test()
|
393
|
+
```
|
394
|
+
"""
|
395
|
+
return self.X_test, self.y_test
|
396
|
+
|
397
|
+
def get_candidates(self) -> np.ndarray:
|
398
|
+
"""
|
399
|
+
Retrieves the preprocessed candidate locations for sensor placement.
|
400
|
+
|
401
|
+
Returns:
|
402
|
+
np.ndarray: (num_candidates, 2); Normalized candidate locations.
|
403
|
+
|
404
|
+
Usage:
|
405
|
+
```python
|
406
|
+
# dataset_obj = Dataset(...)
|
407
|
+
# candidates = dataset_obj.get_candidates()
|
408
|
+
```
|
409
|
+
"""
|
410
|
+
return self.candidates
|
411
|
+
|
412
|
+
def get_sensor_data(
|
413
|
+
self,
|
414
|
+
locations: np.ndarray,
|
415
|
+
continuous_sening: bool = False,
|
416
|
+
max_samples: int = 500) -> Tuple[np.ndarray, np.ndarray]:
|
417
|
+
"""
|
418
|
+
Samples sensor data (labels) at specified normalized locations.
|
419
|
+
Can simulate discrete point sensing or continuous path sensing by interpolation.
|
420
|
+
|
421
|
+
Args:
|
422
|
+
locations (np.ndarray): (N, 2); Array of locations (normalized x, y coordinates)
|
423
|
+
where sensor data is to be sampled.
|
424
|
+
continuous_sening (bool): If `True`, interpolates additional points between
|
425
|
+
the given `locations` to simulate sensing along a path.
|
426
|
+
Defaults to `False`.
|
427
|
+
max_samples (int): Maximum number of samples to return if `continuous_sening`
|
428
|
+
results in too many points. If the number of interpolated
|
429
|
+
points exceeds `max_samples`, a random subset will be returned.
|
430
|
+
Defaults to 500.
|
431
|
+
|
432
|
+
Returns:
|
433
|
+
Tuple[np.ndarray, np.ndarray]: A tuple containing:
|
434
|
+
- sampled_locations (np.ndarray): (M, 2); Normalized locations
|
435
|
+
where sensor data was effectively sampled.
|
436
|
+
- sampled_data (np.ndarray): (M, 1); Standardized sensor data
|
437
|
+
sampled at these locations.
|
438
|
+
Returns empty arrays if no valid data points are found.
|
439
|
+
|
440
|
+
Usage:
|
441
|
+
```python
|
442
|
+
# dataset_obj = Dataset(...)
|
443
|
+
# X_path_normalized = np.array([[0.1, 0.2], [0.5, 0.7], [0.9, 0.8]])
|
444
|
+
# # Discrete sensing
|
445
|
+
# sensed_X_discrete, sensed_y_discrete = dataset_obj.get_sensor_data(X_path_normalized)
|
446
|
+
# # Continuous sensing with interpolation
|
447
|
+
# sensed_X_continuous, sensed_y_continuous = dataset_obj.get_sensor_data(X_path_normalized, continuous_sening=True, max_samples=100)
|
448
|
+
```
|
449
|
+
"""
|
450
|
+
# Convert normalized locations back to original pixel coordinates
|
451
|
+
locations_pixel_coords = self.X_scaler.inverse_transform(locations)
|
452
|
+
|
453
|
+
# Round locations to nearest integer and clip to valid dataset boundaries
|
454
|
+
locations_pixel_coords = np.round(locations_pixel_coords).astype(int)
|
455
|
+
locations_pixel_coords[:, 0] = np.clip(locations_pixel_coords[:, 0], 0,
|
456
|
+
self.y.shape[0] - 1)
|
457
|
+
locations_pixel_coords[:, 1] = np.clip(locations_pixel_coords[:, 1], 0,
|
458
|
+
self.y.shape[1] - 1)
|
459
|
+
|
460
|
+
# If continuous sensing is enabled, interpolate between points using skimage.draw.line
|
461
|
+
if continuous_sening:
|
462
|
+
interpolated_locs: List[np.ndarray] = []
|
463
|
+
if locations_pixel_coords.shape[0] > 1:
|
464
|
+
# Iterate through pairs of consecutive points to draw lines
|
465
|
+
for i in range(locations_pixel_coords.shape[0] - 1):
|
466
|
+
loc1 = locations_pixel_coords[i]
|
467
|
+
loc2 = locations_pixel_coords[i + 1]
|
468
|
+
# line returns (row_coords, col_coords)
|
469
|
+
rr, cc = line(loc1[0], loc1[1], loc2[0], loc2[1])
|
470
|
+
interpolated_locs.append(np.column_stack((rr, cc)))
|
471
|
+
|
472
|
+
# If there's only one point, or if no lines were drawn (e.g., due to identical consecutive points),
|
473
|
+
# still include the initial locations.
|
474
|
+
if not interpolated_locs:
|
475
|
+
# If continuous sensing is true but no path, just return the initial locations if any
|
476
|
+
if locations_pixel_coords.shape[0] > 0:
|
477
|
+
locations_pixel_coords = locations_pixel_coords
|
478
|
+
else:
|
479
|
+
return np.empty((0, 2)), np.empty((0, 1))
|
480
|
+
else:
|
481
|
+
locations_pixel_coords = np.concatenate(interpolated_locs,
|
482
|
+
axis=0)
|
483
|
+
|
484
|
+
# Ensure that locations_pixel_coords is not empty before indexing
|
485
|
+
if locations_pixel_coords.shape[0] == 0:
|
486
|
+
return np.empty((0, 2)), np.empty((0, 1))
|
487
|
+
|
488
|
+
# Ensure indices are within bounds (should be handled by clip, but double check)
|
489
|
+
valid_rows = np.clip(locations_pixel_coords[:, 0], 0,
|
490
|
+
self.y.shape[0] - 1)
|
491
|
+
valid_cols = np.clip(locations_pixel_coords[:, 1], 0,
|
492
|
+
self.y.shape[1] - 1)
|
493
|
+
|
494
|
+
# Extract data at the specified pixel locations
|
495
|
+
data = self.y[valid_rows, valid_cols].reshape(-1, 1)
|
496
|
+
|
497
|
+
# Drop NaN values from data and corresponding locations
|
498
|
+
valid_mask = np.isfinite(data.ravel())
|
499
|
+
locations_pixel_coords = locations_pixel_coords[valid_mask]
|
500
|
+
data = data[valid_mask]
|
501
|
+
|
502
|
+
# Re-normalize valid locations
|
503
|
+
if locations_pixel_coords.shape[0] == 0:
|
504
|
+
return np.empty((0, 2)), np.empty((0, 1))
|
505
|
+
locations_normalized = self.X_scaler.transform(locations_pixel_coords)
|
506
|
+
|
507
|
+
# Limit the number of samples to max_samples if needed
|
508
|
+
if len(locations_normalized) > max_samples:
|
509
|
+
indices = np.random.choice(len(locations_normalized),
|
510
|
+
max_samples,
|
511
|
+
replace=False)
|
512
|
+
locations_normalized = locations_normalized[indices]
|
513
|
+
data = data[indices]
|
514
|
+
|
515
|
+
return locations_normalized.astype(self.dtype), data.astype(self.dtype)
|