sgptools 1.2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sgptools/utils/data.py CHANGED
@@ -14,199 +14,502 @@
14
14
 
15
15
  import numpy as np
16
16
  from matplotlib import path
17
- from .misc import get_inducing_pts, cont2disc
17
+ from skimage.draw import line
18
+ from .misc import get_inducing_pts
18
19
  from sklearn.preprocessing import StandardScaler
19
20
  from hkb_diamondsquare.DiamondSquare import diamond_square
20
21
 
21
22
  import PIL
22
- PIL.Image.MAX_IMAGE_PIXELS = 317500000
23
23
 
24
- ####################################################
25
- # Utils used to prepare synthetic datasets
24
+ PIL.Image.MAX_IMAGE_PIXELS = 900000000
26
25
 
27
- def remove_polygons(X, Y, polygons):
28
- '''
29
- Remove points inside polygons.
26
+ from typing import List, Tuple, Optional, Any
27
+
28
+
29
+ def remove_polygons(
30
+ X: np.ndarray, Y: np.ndarray,
31
+ polygons: List[path.Path]) -> Tuple[np.ndarray, np.ndarray]:
32
+ """
33
+ Removes points that fall inside a list of matplotlib Path polygons.
30
34
 
31
35
  Args:
32
- X (ndarray): (N,); array of x-coordinate
33
- Y (ndarray): (N,); array of y-coordinate
34
- polygons (list of matplotlib path polygon): Polygons to remove from the X, Y points
36
+ X (np.ndarray): (N,); Array of x-coordinates.
37
+ Y (np.ndarray): (N,); Array of y-coordinates.
38
+ polygons (List[path.Path]): A list of `matplotlib.path.Path` objects.
39
+ Points within these polygons will be removed.
35
40
 
36
41
  Returns:
37
- X (ndarray): (N,); array of x-coordinate
38
- Y (ndarray): (N,); array of y-coordinate
39
- '''
42
+ Tuple[np.ndarray, np.ndarray]: A tuple containing two 1D NumPy arrays:
43
+ (filtered_X_coordinates, filtered_Y_coordinates).
44
+
45
+ Usage:
46
+ ```python
47
+ import matplotlib.path as mpath
48
+ import numpy as np
49
+
50
+ # Example points
51
+ X_coords = np.array([0, 1, 2, 3, 4, 5])
52
+ Y_coords = np.array([0, 1, 2, 3, 4, 5])
53
+
54
+ # Define a square polygon (points inside will be removed)
55
+ polygon_vertices = np.array([[1, 1], [1, 3], [3, 3], [3, 1]])
56
+ square_polygon = mpath.Path(polygon_vertices)
57
+
58
+ filtered_X, filtered_Y = remove_polygons(X_coords, Y_coords, [square_polygon])
59
+ ```
60
+ """
40
61
  points = np.array([X.flatten(), Y.flatten()]).T
41
62
  for polygon in polygons:
42
63
  p = path.Path(polygon)
43
64
  points = points[~p.contains_points(points)]
44
65
  return points[:, 0], points[:, 1]
45
66
 
46
- def remove_circle_patches(X, Y, circle_patches):
47
- '''
48
- Remove points inside polycircle patchesgons.
67
+
68
+ def remove_circle_patches(
69
+ X: np.ndarray, Y: np.ndarray,
70
+ circle_patches: List[Any]) -> Tuple[np.ndarray, np.ndarray]:
71
+ """
72
+ Removes points that fall inside a list of matplotlib Circle patches.
73
+
74
+ Note: This function assumes that the `circle_patch` objects have a `contains_points` method,
75
+ similar to `matplotlib.patches.Circle` or `matplotlib.path.Path`.
49
76
 
50
77
  Args:
51
- X (ndarray): (N,); array of x-coordinate
52
- Y (ndarray): (N,); array of y-coordinate
53
- circle_patches (list of matplotlib circle patches): Circle patches to remove from the X, Y points
78
+ X (np.ndarray): (N,); Array of x-coordinates.
79
+ Y (np.ndarray): (N,); Array of y-coordinates.
80
+ circle_patches (List[Any]): A list of objects representing circle patches.
81
+ Each object must have a `contains_points(points)` method.
54
82
 
55
83
  Returns:
56
- X (ndarray): (N,); array of x-coordinate
57
- Y (ndarray): (N,); array of y-coordinate
58
- '''
84
+ Tuple[np.ndarray, np.ndarray]: A tuple containing two 1D NumPy arrays:
85
+ (filtered_X_coordinates, filtered_Y_coordinates).
86
+
87
+ Usage:
88
+ ```python
89
+ import numpy as np
90
+ from matplotlib.patches import Circle
91
+ from matplotlib.collections import PatchCollection
92
+
93
+ # Example points
94
+ X_coords = np.array([0, 1, 2, 3, 4, 5])
95
+ Y_coords = np.array([0, 1, 2, 3, 4, 5])
96
+
97
+ # Define a circle patch centered at (2,2) with radius 1.5
98
+ circle = Circle((2, 2), 1.5)
99
+
100
+ filtered_X, filtered_Y = remove_circle_patches(X_coords, Y_coords, [circle])
101
+ ```
102
+ """
59
103
  points = np.array([X.flatten(), Y.flatten()]).T
60
104
  for circle_patch in circle_patches:
61
105
  points = points[~circle_patch.contains_points(points)]
62
106
  return points[:, 0], points[:, 1]
63
107
 
64
- def point_pos(point, d, theta):
65
- '''
66
- Generate a point at a distance d from a point at angle theta.
108
+
109
+ def point_pos(point: np.ndarray, d: float, theta: float) -> np.ndarray:
110
+ """
111
+ Generates a new point at a specified distance `d` and angle `theta`
112
+ (in radians) from an existing point. This function applies the
113
+ transformation to multiple points simultaneously.
67
114
 
68
115
  Args:
69
- point (ndarray): (N, 2); array of points
70
- d (float): distance
71
- theta (float): angle in radians
116
+ point (np.ndarray): (N, 2); Array of original 2D points (x, y).
117
+ d (float): The distance from the original point to the new point.
118
+ theta (float): The angle in radians for the direction of displacement.
72
119
 
73
120
  Returns:
74
- X (ndarray): (N,); array of x-coordinate
75
- Y (ndarray): (N,); array of y-coordinate
76
- '''
77
- return np.c_[point[:, 0] + d*np.cos(theta), point[:, 1] + d*np.sin(theta)]
121
+ np.ndarray: (N, 2); An array of new points after displacement.
122
+
123
+ Usage:
124
+ ```python
125
+ import numpy as np
126
+
127
+ # Example points (N=2)
128
+ initial_points = np.array([[0.0, 0.0], [1.0, 1.0]])
129
+ # Displace by distance 5.0 at angle pi/4 (45 degrees)
130
+ new_points = point_pos(initial_points, 5.0, np.pi/4)
131
+ # Expected:
132
+ # New points:
133
+ # [[3.53553391 3.53553391]
134
+ # [4.53553391 4.53553391]]
135
+ ```
136
+ """
137
+ return np.c_[point[:, 0] + d * np.cos(theta),
138
+ point[:, 1] + d * np.sin(theta)]
78
139
 
79
- ####################################################
80
140
 
81
- def prep_tif_dataset(dataset_path):
82
- '''Load and preprocess a dataset from a GeoTIFF file (.tif file). The input features
83
- are set to the x and y pixel block coordinates and the labels are read from the file.
84
- The method also removes all invalid points.
141
+ def prep_tif_dataset(dataset_path: str,
142
+ dim_max: int = 2500,
143
+ verbose: bool = True) -> np.ndarray:
144
+ """
145
+ Loads and preprocesses a dataset from a GeoTIFF (.tif) file.
146
+ The function handles downsampling for large files and replaces NoData values (-999999.0) with NaN.
85
147
 
86
- Large tif files
87
- need to be downsampled using the following command:
88
- ```gdalwarp -tr 50 50 <input>.tif <output>.tif```
148
+ For very large .tif files, it's recommended to downsample them externally using GDAL:
149
+ `gdalwarp -tr 50 50 <input>.tif <output>.tif`
89
150
 
90
151
  Args:
91
- dataset_path (str): Path to the dataset file, used only when dataset_type is 'tif'.
152
+ dataset_path (str): Path to the GeoTIFF dataset file.
153
+ dim_max (int): Maximum allowed dimension (width or height) for the loaded dataset.
154
+ If either dimension exceeds `dim_max`, the image will be downsampled
155
+ to fit, maintaining aspect ratio. Defaults to 2500.
156
+ verbose (bool): If `True`, print details about loading and downsampling. Defaults to True.
92
157
 
93
158
  Returns:
94
- X (ndarray): (n, d); Dataset input features
95
- y (ndarray): (n, 1); Dataset labels
96
- '''
159
+ np.ndarray: (H, W); The preprocessed 2D NumPy array representing the dataset,
160
+ with NoData values converted to NaN.
161
+
162
+ Usage:
163
+ ```python
164
+ # Assuming 'path/to/your/dataset.tif' exists
165
+ # from sgptools.utils.data import prep_tif_dataset
166
+ # dataset_array = prep_tif_dataset('path/to/your/dataset.tif', dim_max=1000)
167
+ ```
168
+ """
97
169
  data = PIL.Image.open(dataset_path)
98
- data = np.array(data)
99
-
100
- # create x and y coordinates from the extent
101
- x_coords = np.arange(0, data.shape[1])/10
102
- y_coords = np.arange(data.shape[0], 0, -1)/10
103
- xx, yy = np.meshgrid(x_coords, y_coords)
104
- X = np.c_[xx.ravel(), yy.ravel()]
105
- y = data.ravel()
106
-
107
- # Remove invalid labels
108
- y[np.where(y==-999999.0)] = np.nan
109
- X = X[~np.isnan(y)]
110
- y = y[~np.isnan(y)]
111
-
112
- X = X.reshape(-1, 2)
113
- y = y.reshape(-1, 1)
114
-
115
- return X.astype(float), y.astype(float)
116
-
117
- ####################################################
118
-
119
- def prep_synthetic_dataset(shape=(50, 50),
120
- min_height=0.0,
121
- max_height=30.0,
122
- roughness=0.5,
123
- random_seed=None,
124
- **kwargs):
125
- '''Generates a 50x50 grid of synthetic elevation data using the diamond square algorithm.
126
-
127
- Refer to the following repo for more details:
128
- - [https://github.com/buckinha/DiamondSquare](https://github.com/buckinha/DiamondSquare)
129
-
170
+ data_array = np.array(data)
171
+ if verbose:
172
+ print(
173
+ f"Loaded dataset from {dataset_path} with shape {data_array.shape}"
174
+ )
175
+
176
+ downsample_factor = np.ceil(np.max(data_array.shape) / dim_max).astype(int)
177
+ if downsample_factor <= 1:
178
+ downsample_factor = 1
179
+ elif verbose:
180
+ print(
181
+ f'Downsampling by a factor of {downsample_factor} to fit the maximum dimension of {dim_max}'
182
+ )
183
+
184
+ # Downsample and convert to float, replace specific NoData value with NaN
185
+ data_array = data_array[::downsample_factor, ::downsample_factor].astype(
186
+ float)
187
+ data_array[np.where(data_array == -999999.0)] = np.nan
188
+ return data_array
189
+
190
+
191
+ def prep_synthetic_dataset(shape: Tuple[int, int] = (1000, 1000),
192
+ min_height: float = 0.0,
193
+ max_height: float = 30.0,
194
+ roughness: float = 0.5,
195
+ random_seed: Optional[int] = None,
196
+ **kwargs: Any) -> np.ndarray:
197
+ """
198
+ Generates a 2D synthetic elevation (or similar) dataset using the diamond-square algorithm.
199
+
200
+ Reference: [https://github.com/buckinha/DiamondSquare](https://github.com/buckinha/DiamondSquare)
201
+
130
202
  Args:
131
- shape (tuple): (x, y); Grid size along the x and y axis
132
- min_height (float): Minimum allowed height in the sampled data
133
- max_height (float): Maximum allowed height in the sampled data
134
- roughness (float): Roughness of the sampled data
135
- random_seed (int): Random seed for reproducibility
203
+ shape (Tuple[int, int]): (width, height); The dimensions of the generated grid. Defaults to (1000, 1000).
204
+ min_height (float): Minimum allowed value in the generated data. Defaults to 0.0.
205
+ max_height (float): Maximum allowed value in the generated data. Defaults to 30.0.
206
+ roughness (float): Controls the fractal dimension of the generated terrain. Higher
207
+ values produce rougher terrain. Defaults to 0.5.
208
+ random_seed (Optional[int]): Seed for reproducibility of the generated data. Defaults to None.
209
+ **kwargs: Additional keyword arguments passed directly to the `diamond_square` function.
136
210
 
137
211
  Returns:
138
- X (ndarray): (n, d); Dataset input features
139
- y (ndarray): (n, 1); Dataset labels
140
- '''
212
+ np.ndarray: (height, width); The generated 2D synthetic dataset.
213
+
214
+ Usage:
215
+ ```python
216
+ # from sgptools.utils.data import prep_synthetic_dataset
217
+ # synthetic_data = prep_synthetic_dataset(shape=(256, 256), roughness=0.7, random_seed=42)
218
+ ```
219
+ """
141
220
  data = diamond_square(shape=shape,
142
- min_height=min_height,
143
- max_height=max_height,
221
+ min_height=min_height,
222
+ max_height=max_height,
144
223
  roughness=roughness,
145
224
  random_seed=random_seed,
146
225
  **kwargs)
226
+ return data.astype(float)
147
227
 
148
- # create x and y coordinates from the extent
149
- x_coords = np.arange(0, data.shape[0])/10
150
- y_coords = np.arange(0, data.shape[1])/10
151
- xx, yy = np.meshgrid(x_coords, y_coords)
152
- X = np.c_[xx.ravel(), yy.ravel()]
153
- y = data.ravel()
154
- y = y.reshape(-1, 1)
155
-
156
- return X.astype(float), y.astype(float)
157
-
158
- ####################################################
159
-
160
- def get_dataset(dataset_path=None,
161
- num_train=1000,
162
- num_test=2500,
163
- num_candidates=150,
164
- **kwargs):
165
- """Method to generate/load datasets and preprocess them for SP/IPP. The method uses kmeans to
166
- generate train and test sets.
167
-
168
- Args:
169
- dataset_path (str): Path to a tif dataset file. If None, the method will generate synthetic data.
170
- num_train (int): Number of training samples to generate.
171
- num_test (int): Number of testing samples to generate.
172
- num_candidates (int): Number of candidate locations to generate.
173
228
 
174
- Returns:
175
- X_train (ndarray): (n, d); Training set inputs
176
- y_train (ndarray): (n, 1); Training set labels
177
- X_test (ndarray): (n, d); Testing set inputs
178
- y_test (ndarray): (n, 1); Testing set labels
179
- candidates (ndarray): (n, d); Candidate sensor placement locations
180
- X (ndarray): (n, d); Full dataset inputs
181
- y (ndarray): (n, 1); Full dataset labels
229
+ class Dataset:
182
230
  """
183
- # Load the data
184
- if dataset_path is not None:
185
- X, y = prep_tif_dataset(dataset_path=dataset_path)
186
- else:
187
- X, y = prep_synthetic_dataset(**kwargs)
188
-
189
- X_train = get_inducing_pts(X, num_train)
190
- X_train, y_train = cont2disc(X_train, X, y)
191
-
192
- X_test = get_inducing_pts(X, num_test)
193
- X_test, y_test = cont2disc(X_test, X, y)
194
-
195
- candidates = get_inducing_pts(X, num_candidates)
196
- candidates = cont2disc(candidates, X)
197
-
198
- # Standardize data
199
- X_scaler = StandardScaler()
200
- X_scaler.fit(X_train)
201
- X_train = X_scaler.transform(X_train)*10.0
202
- X_test = X_scaler.transform(X_test)*10.0
203
- X = X_scaler.transform(X)*10.0
204
- candidates = X_scaler.transform(candidates)*10.0
205
-
206
- y_scaler = StandardScaler()
207
- y_scaler.fit(y_train)
208
- y_train = y_scaler.transform(y_train)
209
- y_test = y_scaler.transform(y_test)
210
- y = y_scaler.transform(y)
211
-
212
- return X_train, y_train, X_test, y_test, candidates, X, y
231
+ A class to load, preprocess, and manage access to a dataset for sensor placement
232
+ and informative path planning tasks.
233
+
234
+ It handles the following operations:
235
+
236
+ * Loading from a GeoTIFF file, loading from a numpy array, and generating a synthetic dataset.
237
+ * Sampling training, testing, and candidate points from valid (non-NaN) locations.
238
+ * Standardizing both the input coordinates (X) and the labels (y) using `StandardScaler`.
239
+ * Providing methods to retrieve different subsets of the data (train, test, candidates)
240
+ and to sample sensor data at specified locations or along a path.
241
+
242
+ The dataset is expected to be a 2D array where each element represents a label
243
+ (e.g., elevation, temperature, environmental reading).
244
+ """
245
+
246
+ def __init__(self,
247
+ dataset_path: Optional[str] = None,
248
+ num_train: int = 1000,
249
+ num_test: int = 2500,
250
+ num_candidates: int = 150,
251
+ verbose: bool = True,
252
+ data=None,
253
+ dtype=np.float64,
254
+ **kwargs: Any):
255
+ """
256
+ Initializes the Dataset class.
257
+
258
+ Args:
259
+ dataset_path (Optional[str]): Path to the dataset file (e.g., '.tif'). If None,
260
+ a synthetic dataset will be generated. Defaults to None.
261
+ Alternatively, pass an array of data to the constructor
262
+ with the `data` argument to use a custom dataset.
263
+ num_train (int): Number of training points to sample from the dataset. Defaults to 1000.
264
+ num_test (int): Number of testing points to sample from the dataset. Defaults to 2500.
265
+ num_candidates (int): Number of candidate points for potential sensor placements
266
+ to sample from the dataset. Defaults to 150.
267
+ verbose (bool): If `True`, print details about dataset loading, sampling, and preprocessing.
268
+ Defaults to True.
269
+ data (Optional[np.ndarray]): (height, width, d); 2D n-dimensional array of data.
270
+ dtype (Optional[np.dtype]): The type of the output arrays. If dtype is not given,
271
+ it will be set to np.float64.
272
+ **kwargs: Additional keyword arguments passed to `prep_tif_dataset` or `prep_synthetic_dataset`.
273
+ """
274
+ self.verbose = verbose
275
+ self.dtype = dtype
276
+
277
+ # Load/Create the data
278
+ if data is not None:
279
+ self.y = data
280
+ elif dataset_path is not None:
281
+ self.y = prep_tif_dataset(dataset_path=dataset_path,
282
+ verbose=verbose,
283
+ **kwargs)
284
+ else:
285
+ self.y = prep_synthetic_dataset(**kwargs)
286
+
287
+ # Store original dimensions for reshaping
288
+ w, h = self.y.shape[0], self.y.shape[1]
289
+ if self.verbose:
290
+ print(f"Original dataset shape: {self.y.shape}")
291
+
292
+ # Get valid points (non-NaN labels)
293
+ mask = np.where(np.isfinite(self.y))
294
+ X_valid_pixel_coords = np.column_stack((mask[0], mask[1]))
295
+
296
+ # Sample training, testing, and candidate points from valid pixel coordinates
297
+ # `get_inducing_pts` with random=True is used for random sampling
298
+ X_train_pixel_coords = get_inducing_pts(X_valid_pixel_coords,
299
+ num_train,
300
+ random=True)
301
+ y_train_raw = self.y[X_train_pixel_coords[:, 0],
302
+ X_train_pixel_coords[:, 1]].reshape(-1, 1)
303
+
304
+ # If num_test is equal to dataset size, return test data in original order, enables plotting with imshow
305
+ if self.y.shape[0] * self.y.shape[1] == num_test:
306
+ X_test_pixel_coords = X_valid_pixel_coords
307
+ y_test_raw = self.y.reshape(-1, 1)
308
+ else:
309
+ X_test_pixel_coords = get_inducing_pts(X_valid_pixel_coords,
310
+ num_test,
311
+ random=True)
312
+ y_test_raw = self.y[X_test_pixel_coords[:, 0],
313
+ X_test_pixel_coords[:, 1]].reshape(-1, 1)
314
+
315
+ X_candidates_pixel_coords = get_inducing_pts(X_valid_pixel_coords,
316
+ num_candidates,
317
+ random=True)
318
+
319
+ # Standardize dataset X coordinates (pixel coords to normalized space)
320
+ self.X_scaler = StandardScaler()
321
+ self.X_scaler.fit(X_train_pixel_coords)
322
+
323
+ # Adjust X_scaler's variance/scale to ensure uniform scaling across dimensions
324
+ # and to scale the data to have an extent of at least 10.0 in each dimension.
325
+ # This ensures consistency and prevents issues with very small scales.
326
+ ind = np.argmax(self.X_scaler.var_)
327
+ self.X_scaler.var_ = np.ones_like(
328
+ self.X_scaler.var_) * self.X_scaler.var_[ind]
329
+ self.X_scaler.scale_ = np.ones_like(
330
+ self.X_scaler.scale_) * self.X_scaler.scale_[ind]
331
+ self.X_scaler.scale_ /= 10.0 # Scale to ensure an extent of ~10 units
332
+
333
+ self.X_train = self.X_scaler.transform(X_train_pixel_coords)
334
+ self.X_train = self.X_train.astype(self.dtype)
335
+ self.X_test = self.X_scaler.transform(X_test_pixel_coords)
336
+ self.X_test = self.X_test.astype(self.dtype)
337
+ self.candidates = self.X_scaler.transform(X_candidates_pixel_coords)
338
+ self.candidates = self.candidates.astype(self.dtype)
339
+
340
+ # Standardize dataset labels (y values)
341
+ self.y_scaler = StandardScaler()
342
+ self.y_scaler.fit(y_train_raw)
343
+
344
+ self.y_train = self.y_scaler.transform(y_train_raw)
345
+ self.y_train = self.y_train.astype(self.dtype)
346
+ self.y_test = self.y_scaler.transform(y_test_raw)
347
+ self.y_test = self.y_test.astype(self.dtype)
348
+
349
+ # Transform the entire dataset's labels for consistency
350
+ self.y = self.y_scaler.transform(self.y.reshape(-1, 1)).reshape(w, h)
351
+ self.y = self.y.astype(self.dtype)
352
+
353
+ if self.verbose:
354
+ print(
355
+ f"Training data shapes (X, y): {self.X_train.shape}, {self.y_train.shape}"
356
+ )
357
+ print(
358
+ f"Testing data shapes (X, y): {self.X_test.shape}, {self.y_test.shape}"
359
+ )
360
+ print(f"Candidate data shape (X): {self.candidates.shape}")
361
+ print("Dataset loaded and preprocessed successfully.")
362
+
363
+ def get_train(self) -> Tuple[np.ndarray, np.ndarray]:
364
+ """
365
+ Retrieves the preprocessed training data.
366
+
367
+ Returns:
368
+ Tuple[np.ndarray, np.ndarray]: A tuple containing:
369
+ - X_train (np.ndarray): (num_train, 2); Normalized training input features.
370
+ - y_train (np.ndarray): (num_train, 1); Standardized training labels.
371
+
372
+ Usage:
373
+ ```python
374
+ # dataset_obj = Dataset(...)
375
+ # X_train, y_train = dataset_obj.get_train()
376
+ ```
377
+ """
378
+ return self.X_train, self.y_train
379
+
380
+ def get_test(self) -> Tuple[np.ndarray, np.ndarray]:
381
+ """
382
+ Retrieves the preprocessed testing data.
383
+
384
+ Returns:
385
+ Tuple[np.ndarray, np.ndarray]: A tuple containing:
386
+ - X_test (np.ndarray): (num_test, 2); Normalized testing input features.
387
+ - y_test (np.ndarray): (num_test, 1); Standardized testing labels.
388
+
389
+ Usage:
390
+ ```python
391
+ # dataset_obj = Dataset(...)
392
+ # X_test, y_test = dataset_obj.get_test()
393
+ ```
394
+ """
395
+ return self.X_test, self.y_test
396
+
397
+ def get_candidates(self) -> np.ndarray:
398
+ """
399
+ Retrieves the preprocessed candidate locations for sensor placement.
400
+
401
+ Returns:
402
+ np.ndarray: (num_candidates, 2); Normalized candidate locations.
403
+
404
+ Usage:
405
+ ```python
406
+ # dataset_obj = Dataset(...)
407
+ # candidates = dataset_obj.get_candidates()
408
+ ```
409
+ """
410
+ return self.candidates
411
+
412
+ def get_sensor_data(
413
+ self,
414
+ locations: np.ndarray,
415
+ continuous_sening: bool = False,
416
+ max_samples: int = 500) -> Tuple[np.ndarray, np.ndarray]:
417
+ """
418
+ Samples sensor data (labels) at specified normalized locations.
419
+ Can simulate discrete point sensing or continuous path sensing by interpolation.
420
+
421
+ Args:
422
+ locations (np.ndarray): (N, 2); Array of locations (normalized x, y coordinates)
423
+ where sensor data is to be sampled.
424
+ continuous_sening (bool): If `True`, interpolates additional points between
425
+ the given `locations` to simulate sensing along a path.
426
+ Defaults to `False`.
427
+ max_samples (int): Maximum number of samples to return if `continuous_sening`
428
+ results in too many points. If the number of interpolated
429
+ points exceeds `max_samples`, a random subset will be returned.
430
+ Defaults to 500.
431
+
432
+ Returns:
433
+ Tuple[np.ndarray, np.ndarray]: A tuple containing:
434
+ - sampled_locations (np.ndarray): (M, 2); Normalized locations
435
+ where sensor data was effectively sampled.
436
+ - sampled_data (np.ndarray): (M, 1); Standardized sensor data
437
+ sampled at these locations.
438
+ Returns empty arrays if no valid data points are found.
439
+
440
+ Usage:
441
+ ```python
442
+ # dataset_obj = Dataset(...)
443
+ # X_path_normalized = np.array([[0.1, 0.2], [0.5, 0.7], [0.9, 0.8]])
444
+ # # Discrete sensing
445
+ # sensed_X_discrete, sensed_y_discrete = dataset_obj.get_sensor_data(X_path_normalized)
446
+ # # Continuous sensing with interpolation
447
+ # sensed_X_continuous, sensed_y_continuous = dataset_obj.get_sensor_data(X_path_normalized, continuous_sening=True, max_samples=100)
448
+ ```
449
+ """
450
+ # Convert normalized locations back to original pixel coordinates
451
+ locations_pixel_coords = self.X_scaler.inverse_transform(locations)
452
+
453
+ # Round locations to nearest integer and clip to valid dataset boundaries
454
+ locations_pixel_coords = np.round(locations_pixel_coords).astype(int)
455
+ locations_pixel_coords[:, 0] = np.clip(locations_pixel_coords[:, 0], 0,
456
+ self.y.shape[0] - 1)
457
+ locations_pixel_coords[:, 1] = np.clip(locations_pixel_coords[:, 1], 0,
458
+ self.y.shape[1] - 1)
459
+
460
+ # If continuous sensing is enabled, interpolate between points using skimage.draw.line
461
+ if continuous_sening:
462
+ interpolated_locs: List[np.ndarray] = []
463
+ if locations_pixel_coords.shape[0] > 1:
464
+ # Iterate through pairs of consecutive points to draw lines
465
+ for i in range(locations_pixel_coords.shape[0] - 1):
466
+ loc1 = locations_pixel_coords[i]
467
+ loc2 = locations_pixel_coords[i + 1]
468
+ # line returns (row_coords, col_coords)
469
+ rr, cc = line(loc1[0], loc1[1], loc2[0], loc2[1])
470
+ interpolated_locs.append(np.column_stack((rr, cc)))
471
+
472
+ # If there's only one point, or if no lines were drawn (e.g., due to identical consecutive points),
473
+ # still include the initial locations.
474
+ if not interpolated_locs:
475
+ # If continuous sensing is true but no path, just return the initial locations if any
476
+ if locations_pixel_coords.shape[0] > 0:
477
+ locations_pixel_coords = locations_pixel_coords
478
+ else:
479
+ return np.empty((0, 2)), np.empty((0, 1))
480
+ else:
481
+ locations_pixel_coords = np.concatenate(interpolated_locs,
482
+ axis=0)
483
+
484
+ # Ensure that locations_pixel_coords is not empty before indexing
485
+ if locations_pixel_coords.shape[0] == 0:
486
+ return np.empty((0, 2)), np.empty((0, 1))
487
+
488
+ # Ensure indices are within bounds (should be handled by clip, but double check)
489
+ valid_rows = np.clip(locations_pixel_coords[:, 0], 0,
490
+ self.y.shape[0] - 1)
491
+ valid_cols = np.clip(locations_pixel_coords[:, 1], 0,
492
+ self.y.shape[1] - 1)
493
+
494
+ # Extract data at the specified pixel locations
495
+ data = self.y[valid_rows, valid_cols].reshape(-1, 1)
496
+
497
+ # Drop NaN values from data and corresponding locations
498
+ valid_mask = np.isfinite(data.ravel())
499
+ locations_pixel_coords = locations_pixel_coords[valid_mask]
500
+ data = data[valid_mask]
501
+
502
+ # Re-normalize valid locations
503
+ if locations_pixel_coords.shape[0] == 0:
504
+ return np.empty((0, 2)), np.empty((0, 1))
505
+ locations_normalized = self.X_scaler.transform(locations_pixel_coords)
506
+
507
+ # Limit the number of samples to max_samples if needed
508
+ if len(locations_normalized) > max_samples:
509
+ indices = np.random.choice(len(locations_normalized),
510
+ max_samples,
511
+ replace=False)
512
+ locations_normalized = locations_normalized[indices]
513
+ data = data[indices]
514
+
515
+ return locations_normalized.astype(self.dtype), data.astype(self.dtype)