sgspy 1.0.2__cp312-cp312-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. sgspy/__init__.py +82 -0
  2. sgspy/_sgs.cpython-312-x86_64-linux-gnu.so +0 -0
  3. sgspy/calculate/__init__.py +18 -0
  4. sgspy/calculate/pca/__init__.py +2 -0
  5. sgspy/calculate/pca/pca.py +158 -0
  6. sgspy/calculate/representation/__init__.py +2 -0
  7. sgspy/calculate/representation/representation.py +3 -0
  8. sgspy/sample/__init__.py +30 -0
  9. sgspy/sample/ahels/__init__.py +2 -0
  10. sgspy/sample/ahels/ahels.py +3 -0
  11. sgspy/sample/clhs/__init__.py +2 -0
  12. sgspy/sample/clhs/clhs.py +202 -0
  13. sgspy/sample/nc/__init__.py +2 -0
  14. sgspy/sample/nc/nc.py +3 -0
  15. sgspy/sample/srs/__init__.py +2 -0
  16. sgspy/sample/srs/srs.py +228 -0
  17. sgspy/sample/strat/__init__.py +2 -0
  18. sgspy/sample/strat/strat.py +394 -0
  19. sgspy/sample/systematic/__init__.py +2 -0
  20. sgspy/sample/systematic/systematic.py +233 -0
  21. sgspy/stratify/__init__.py +27 -0
  22. sgspy/stratify/breaks/__init__.py +2 -0
  23. sgspy/stratify/breaks/breaks.py +222 -0
  24. sgspy/stratify/kmeans/__init__.py +2 -0
  25. sgspy/stratify/kmeans/kmeans.py +3 -0
  26. sgspy/stratify/map/__init__.py +2 -0
  27. sgspy/stratify/map/map_stratifications.py +244 -0
  28. sgspy/stratify/poly/__init__.py +2 -0
  29. sgspy/stratify/poly/poly.py +170 -0
  30. sgspy/stratify/quantiles/__init__.py +2 -0
  31. sgspy/stratify/quantiles/quantiles.py +276 -0
  32. sgspy/utils/__init__.py +18 -0
  33. sgspy/utils/plot.py +143 -0
  34. sgspy/utils/raster.py +605 -0
  35. sgspy/utils/vector.py +268 -0
  36. sgspy-1.0.2.data/data/sgspy/libonedal.so.3 +0 -0
  37. sgspy-1.0.2.data/data/sgspy/proj.db +0 -0
  38. sgspy-1.0.2.dist-info/METADATA +13 -0
  39. sgspy-1.0.2.dist-info/RECORD +40 -0
  40. sgspy-1.0.2.dist-info/WHEEL +5 -0
@@ -0,0 +1,276 @@
1
+ # ******************************************************************************
2
+ #
3
+ # Project: sgs
4
+ # Purpose: stratification by user defined quantiles
5
+ # Author: Joseph Meyer
6
+ # Date: September, 2025
7
+ #
8
+ # ******************************************************************************
9
+
10
+ ##
11
+ # @defgroup user_quantiles quantiles
12
+ # @ingroup user_stratify
13
+
14
+ import os
15
+ import sys
16
+ import site
17
+ import tempfile
18
+ import numpy as np
19
+ from sgspy.utils import SpatialRaster
20
+
21
+ #ensure _sgs binary can be found
22
+ site_packages = list(filter(lambda x : 'site-packages' in x, site.getsitepackages()))[0]
23
+ sys.path.append(os.path.join(site_packages, "sgspy"))
24
+ sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
25
+ from _sgs import quantiles_cpp
26
+
27
+ GIGABYTE = 1073741824
28
+
29
+ ##
30
+ # @ingroup user_quantiles
31
+ # This function conducts stratification on the raster given by generating quantile
32
+ # probabilities according to the 'num_strata' argument given by the user.
33
+ #
34
+ # The quantiles may be defined as an integer, indicating the number of quantiles
35
+ # of equal size. Quantiles may also be defined as a list of probabilities between 0
36
+ # and 1. In the case of a raster with a single band, the quantiles may be passed directly
37
+ # to the num_strata argument as either type: int | list[float].
38
+ #
39
+ # In the case of a multi-band raster image, the specific bands can be specified by the index
40
+ # of a list containing an equal number of quantiles as bands (list[int | list[float]).
41
+ #
42
+ # If not all raster bands should be stratified, specific bands can be selected in
43
+ # the form of a dict where the key is the name of a raster band and the value is the
44
+ # quantiles (dict[str, int | list[float]).
45
+ #
46
+ # if the map parameter is given, an extra output band will be used which combines
47
+ # all stratifications from the bands used into an extra outpu band. A single
48
+ # value in the mapped output band corresponds to a combination a single combination
49
+ # of values from the previous bands.
50
+ #
51
+ # The thread_count parameter specifies the number of threads which this function
52
+ # will utilize the the case where the raster is large and may not fit in memory. If
53
+ # the full raster can fit in memory and does not need to be processed in blocks, this
54
+ # argument will be ignored. The default is 8 threads, although the optimal number
55
+ # will depend significantly on the hardware being used and may be more or less
56
+ # than 8.
57
+ #
58
+ # the driver_options parameter is used to specify creation options for the output
59
+ # raster. See options for the Gtiff driver here:
60
+ # https://gdal.org/en/stable/drivers/raster/gtiff.html#creation-options
61
+ # The keys in the driver_options dict must be strings, the values are converted to
62
+ # string. THe options must be valid for the driver corresponding to the filename,
63
+ # and if filename is not given they must be valid for the GTiff format, as that
64
+ # is the format used to store temporary raster files. Note that if this parameter
65
+ # is given, but filename is not and the raster fits entirely in memory, the
66
+ # driver_options parameter will be ignored.
67
+ #
68
+ # the eps parameter is used only if batch processing is used to calculate the quantiles
69
+ # for a raster. Quantile streaming algorithms cannot be perfectly accurate, as this
70
+ # would necessitate having the entire raster in memory at once. A good approximation
71
+ # can be made, and the error is controlled by this epsilon (eps) value.
72
+ # The Quantile streaming method is the method introduced by Zhang et al. and utilized by MKL:
73
+ # https://web.cs.ucla.edu/~weiwang/paper/SSDBM07_2.pdf
74
+ # https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-summary-statistics-notes/2021-1/computing-quantiles-with-vsl-ss-method-squants-zw.html
75
+ #
76
+ # Examples
77
+ # --------------------
78
+ # rast = sgspy.SpatialRaster('rast.tif') @n
79
+ # srast = sgspy.stratify.quantiles(rast, num_strata=5)
80
+ #
81
+ # rast = sgspy.SpatialRaster('rast.tif') @n
82
+ # srast = sgspy.stratify.quantiles(rast, num_strata=[.1, .2, .3, .5, .7], filename="srast.tif")
83
+ #
84
+ # rast = sgspy.SpatialRaster('multi_band_rast.tif') @n
85
+ # srast = sgspy.stratify.quantiles(rast, num_strata=[5, 5, [.5, .75]], map=True)
86
+ #
87
+ # rast = sgspy.SpatialRaster('multi_band_rast.tif') @n
88
+ # srast = sgspy.stratify.quantiles(rast, num_strata={'zq90': 5})
89
+ #
90
+ # Parameters
91
+ # --------------------
92
+ # rast : SpatialRaster @n
93
+ # raster data structure containing the raster to stratify @n @n
94
+ # num_strata : int | list[float] | list[int|list[float]] | dict[str,int|list[float]] @n
95
+ # specification of the quantiles to stratify @n @n
96
+ # map : bool @n
97
+ # whether to map the stratifiction of multiple raster bands onto a single band @n @n
98
+ # filename : str @n
99
+ # filename to write to or '' if no file should be written @n @n
100
+ # thread_count : int @n
101
+ # the number of threads to use when multithreading large images @n @n
102
+ # driver_options : dict[] @n
103
+ # the creation options as defined by GDAL which will be passed when creating output files @n @n
104
+ # eps : float @n
105
+ # the epsilon value, controlling the error of stream-processed quantiles @n @n
106
+ #
107
+ # Returns
108
+ # --------------------
109
+ # a SpatialRaster object containing stratified raster bands.
110
+ def quantiles(
111
+ rast: SpatialRaster,
112
+ num_strata: int | list[float] | list[int|list[float]] | dict[str,int|list[float]],
113
+ map: bool = False,
114
+ filename: str = '',
115
+ thread_count: int = 8,
116
+ driver_options: dict = None,
117
+ eps: float = .001):
118
+
119
+ MAX_STRATA_VAL = 2147483647 #maximum value stored within a 32-bit signed integer to ensure no overflow
120
+
121
+ if type(rast) is not SpatialRaster:
122
+ raise TypeError("'rast' parameter must be of type sgspy.SpatialRaster")
123
+
124
+ if type(num_strata) not in [int, list, dict]:
125
+ raise TypeError("'num_strata' parameter must be of type int, list, or dict.")
126
+
127
+ if type(map) is not bool:
128
+ raise TypeError("'map' parameter must be of type bool.")
129
+
130
+ if type(filename) is not str:
131
+ raise TypeError("'filename' parameter must be of type str.")
132
+
133
+ if type(thread_count) is not int:
134
+ raise TypeError("'thread_count' parameter must be of type int.")
135
+
136
+ if type(eps) is not float:
137
+ raise TypeError("'eps' parameter must be of type float.")
138
+
139
+ if rast.closed:
140
+ raise RuntimeError("the C++ object which the raster object wraps has been cleaned up and closed.")
141
+
142
+ if type(num_strata) is list and len(num_strata) < 1:
143
+ raise ValueError("num_strata list must contain at least one element")
144
+
145
+ probabilities_dict = {}
146
+ if type(num_strata) is int:
147
+ #error check number of raster bands
148
+ if rast.band_count != 1:
149
+ raise ValueError("num_strata int is for a single rast band, but the raster has {}".format(rast.band_count))
150
+
151
+ #add quantiles to probabilities_dict
152
+ inc = 1 / num_strata
153
+ probabilities_dict[0] = np.array(range(1, num_strata)) / num_strata
154
+
155
+ elif type(num_strata) is list and type(num_strata[0]) is float:
156
+ #error check number of raster bands
157
+ if rast.band_count != 1:
158
+ raise ValueError("num_strata list[float] type is for a single raster band, but the raster has {}".format(rast.band_count))
159
+
160
+ #error check list values
161
+ if min(num_strata) < 0:
162
+ raise ValueError("list[float] must not contain a value less than 0")
163
+ elif max(num_strata) > 1:
164
+ raise ValueError("list[float] must not contain a value greater than 1")
165
+
166
+ #add quantiles to probabilities_dict and ensure 1 and 0 are removed
167
+ probabilities_dict[0] = num_strata
168
+ if 0.0 in probabilities_dict[0]:
169
+ probabilities_dict[0].remove(0.0)
170
+ if 1.0 in probabilities_dict[0]:
171
+ probabilities_dict[0].remove(1.0)
172
+
173
+ elif type(num_strata) is list:
174
+ #error checking number of raster bands
175
+ if (len(num_strata)) != rast.band_count:
176
+ raise ValueError("number of lists in num_strata must be equal to the number of raster bands.")
177
+
178
+ #for each given num_strata, add it to probabilities_dict depending on type
179
+ for i in range(len(num_strata)):
180
+ if type(num_strata[i]) is int:
181
+ inc = 1 / num_strata[i]
182
+ probabilities_dict[i] = np.array(range(1, num_strata[i])) / num_strata[i]
183
+ else: #list of float
184
+ #for lists, error check max and min values
185
+ if min(num_strata[i]) < 0:
186
+ raise ValueError("list[float] must not contain value less than 0")
187
+ elif max(num_strata[i]) > 1:
188
+ raise ValueError("list[float] must not contain value greater than 1")
189
+ probabilities_dict[i] = num_strata[i]
190
+ if 0.0 in probabilities_dict[i]:
191
+ probabilities_dict[i].remove(0.0)
192
+ if 1.0 in probabilities_dict[i]:
193
+ probabilities_dict[i].remove(1.0)
194
+
195
+ else: #type dict
196
+ for key, val in num_strata.items():
197
+ if key not in rast.bands:
198
+ raise ValueError("probabilities dict key must be valid band name (see SpatialRaster.bands for list of names)")
199
+ else:
200
+ band_num = rast.band_name_dict[key]
201
+ if type(val) is int:
202
+ inc = 1 / val
203
+ probabilities_dict[band_num] = np.array(range(1, val)) / val
204
+ else: #list of float
205
+ #for lists, error check max and min values
206
+ if min(val) < 0:
207
+ raise ValueError("list[float] must not contain value less than 0")
208
+ elif max(val) > 1:
209
+ raise ValueError("list[float] must not contain value greater than 1")
210
+ probabilities_dict[band_num] = val
211
+ if 0.0 in probabilities_dict[band_num]:
212
+ probabilities_dict[band_num].remove(0.0)
213
+ if 1.0 in probabilities_dict[band_num]:
214
+ probabilities_dict[band_num].remove(1.0)
215
+
216
+ #error check max value for potential overflow error
217
+ max_mapped_strata = int(map)
218
+ for _, val in probabilities_dict.items():
219
+ strata_count = len(val) + 1
220
+ if strata_count > MAX_STRATA_VAL:
221
+ raise ValueError("one of the quantiles given will cause an integer overflow error because the max strata number is too large.")
222
+ max_mapped_strata = max_mapped_strata * strata_count
223
+
224
+ if max_mapped_strata > MAX_STRATA_VAL:
225
+ raise ValueError("the mapped strata will cause an overflow error because the max strata number is too large.")
226
+
227
+ if thread_count < 1:
228
+ raise ValueError("number of threads can't be less than 1.")
229
+
230
+ driver_options_str = {}
231
+ if driver_options:
232
+ for (key, val) in driver_options.items():
233
+ if type(key) is not str:
234
+ raise ValueError("the key for all key/value pairs in the driver_options dict must be a string.")
235
+ driver_options_str[key] = str(val)
236
+
237
+ large_raster = False
238
+ raster_size_bytes = 0
239
+ height = rast.height
240
+ width = rast.width
241
+ for key, _ in probabilities_dict.items():
242
+ pixel_size = rast.cpp_raster.get_raster_band_type_size(key)
243
+ band_size = height * width * pixel_size
244
+ raster_size_bytes += band_size
245
+ if band_size >= GIGABYTE:
246
+ large_raster = True
247
+ break
248
+
249
+ #if large_raster is true, the C++ function will process the raster in blocks
250
+ large_raster = large_raster or (raster_size_bytes > GIGABYTE * 4)
251
+
252
+ #make a temp directory which will be deleted if there is any problem when calling the cpp function
253
+ temp_dir = tempfile.mkdtemp()
254
+ rast.have_temp_dir = True
255
+ rast.temp_dir = temp_dir
256
+
257
+ #call stratify quantiles function
258
+ srast = SpatialRaster(quantiles_cpp(
259
+ rast.cpp_raster,
260
+ probabilities_dict,
261
+ map,
262
+ filename,
263
+ temp_dir,
264
+ large_raster,
265
+ thread_count,
266
+ driver_options_str,
267
+ eps
268
+ ))
269
+
270
+ #now that it's created, give the cpp raster object ownership of the temporary directory
271
+ rast.have_temp_dir = False
272
+ srast.cpp_raster.set_temp_dir(temp_dir)
273
+ srast.temp_dataset = filename == "" and large_raster
274
+ srast.filename = filename
275
+
276
+ return srast
@@ -0,0 +1,18 @@
1
+ ##
2
+ # @defgroup user_utils utils
3
+ # @ingroup user
4
+ #
5
+ # Explanations of both the SpatialRaster and SpatialVector classes.
6
+
7
+ from . import (
8
+ raster,
9
+ vector,
10
+ )
11
+
12
+ from .raster import SpatialRaster
13
+ from .vector import SpatialVector
14
+
15
+ __all__ = [
16
+ "SpatialRaster",
17
+ "spatialVector",
18
+ ]
sgspy/utils/plot.py ADDED
@@ -0,0 +1,143 @@
1
+ # ******************************************************************************
2
+ #
3
+ # Project: sgs
4
+ # Purpose: Plotting rasters and vectors with matplotlib.pyplot
5
+ # Author: Joseph Meyer
6
+ # Date: June, 2025
7
+ #
8
+ # ******************************************************************************
9
+
10
+ from typing import Optional
11
+
12
+ import numpy as np
13
+ import matplotlib.pyplot as plt
14
+ import matplotlib #for typing matplotlib.axes.Axes
15
+
16
+ def plot_raster(raster,
17
+ ax: matplotlib.axes.Axes,
18
+ target_width: int = 1000,
19
+ target_height: int = 1000,
20
+ band: Optional[int | str] = None,
21
+ **kwargs):
22
+ """
23
+ Plots the specified bands using matplotlib.pyplot.imshow function.
24
+
25
+ Parameters
26
+ --------------------
27
+ raster : SpatialRaster
28
+ raster to plot
29
+ ax : matplotlib axis
30
+ the axis to plot the image on
31
+ target_width : int
32
+ maximum width in pixels for the image (after downsampling)
33
+ target_height : int
34
+ maximum height in pxeils for the image (after downsampling)
35
+ band (optional) : int or str
36
+ specification of which band to plot
37
+ **kwargs (optional)
38
+ any parameters which may be passed to matplotlib.pyplot.imshow
39
+ """
40
+ #get bands argument as list of int
41
+ if band is None:
42
+ if raster.band_count > 1:
43
+ raise ValueError("'band' argument must be given if raster contains more than one band.")
44
+ band = 0
45
+ else:
46
+ band = raster.get_band_index(band)
47
+ title = raster.bands[band]
48
+
49
+ #calculate downsampled resolution and get downsampled raster
50
+ #for info on downsample resolution calculation:
51
+ #https://gdal.org/en/stable/api/gdaldataset_cpp.html#classGDALDataset_1ae66e21b09000133a0f4d99baabf7a0ec
52
+ target_downscaling_factor = min(raster.width / target_width, raster.height / target_height)
53
+ if (target_downscaling_factor <= 2 / 1.2):
54
+ downsampled_width = raster.width
55
+ downsampled_height = raster.height
56
+ elif (target_downscaling_factor <= 4 / 1.2):
57
+ downsampled_width = int(raster.width / 2)
58
+ downsampled_height = int(raster.height / 2)
59
+ elif (target_downscaling_factor <= 8 / 1.2):
60
+ downsampled_width = int(raster.width / 4)
61
+ downsampled_height = int(raster.height / 4)
62
+ else:
63
+ downsampled_width = int(raster.width / 8)
64
+ downsampled_height = int(raster.height / 8)
65
+
66
+ #get the raster data from the cpp object as a numpy array, and ensure no data is nan
67
+ no_data_val = raster.cpp_raster.get_band_nodata_value(band)
68
+ arr = np.asarray(
69
+ raster.cpp_raster.get_raster_as_memoryview(downsampled_width, downsampled_height, band),
70
+ copy=False
71
+ ).astype(np.float64, copy=True)
72
+ arr[arr == no_data_val] = np.nan
73
+
74
+ #get raster origin and raster extent
75
+ extent = (raster.xmin, raster.xmax, raster.ymin, raster.ymax) #(left, right, top, bottom)
76
+
77
+ #add image to matplotlib
78
+ plt.title(label=title)
79
+ ax.imshow(arr, origin='upper', extent=extent, **kwargs)
80
+
81
+ def plot_vector(vector,
82
+ ax: matplotlib.axes.Axes,
83
+ geomtype: str,
84
+ layer: Optional[int | str] = None,
85
+ **kwargs):
86
+ """
87
+ Plots the specified layer using matplotlib.pyplot.plot.
88
+ The parameter give by geomtype must be one of:
89
+ 'Point', 'MultiPoint', 'LineString', 'MultiLineString'.
90
+
91
+ The layer must contain only geometries of type Point and
92
+ MultiPoint in the case where 'Point' or 'MultiPoint is given,
93
+ or geometries of type LineString and MultiLineString
94
+ in the case where 'LineString' or 'MultiLineString' is given.
95
+
96
+ Parameters
97
+ --------------------
98
+ vector : SpatialVector
99
+ vector to plot
100
+ ax : matplotlib axis
101
+ the axis to plot the image on
102
+ geomtype : str
103
+ geometry type of the layer
104
+ layer : None | int | str
105
+ layer to plot
106
+ **kwargs (optional)
107
+ any parameter which may be passed to matplotlib.pyplot.plot
108
+
109
+ Raises
110
+ --------------------
111
+ ValueError:
112
+ if no layer was specified, and the image contains more than one layer
113
+ ValueError:
114
+ if geomtype is not one of 'Point', 'MultiPoint', 'LineString', 'MultiLineString'
115
+ RuntimeError (from C++):
116
+ if the layer contains a geometry NOT of an acceptable type
117
+ """
118
+
119
+ if type(layer) == str:
120
+ layer_name = layer
121
+ elif type(layer) == int:
122
+ layer_name = vector.layers[layer]
123
+ elif len(vector.layers) == 1: #layer is None
124
+ layer_name = vector.layers[0]
125
+ else:
126
+ ValueError("no layer was specified, and there is more than one layer in the vector. Specify a layer to plot.");
127
+
128
+ if geomtype == "Point" or geomtype == "MultiPoint":
129
+ points = vector.cpp_vector.get_points(layer_name)
130
+ if 'fmt' in kwargs:
131
+ ax.plot(points[0], points[1], **kwargs)
132
+ else:
133
+ ax.plot(points[0], points[1], '.r', **kwargs) #specify format as red points if format was not given
134
+ elif geomtype == "LineString" or geomtype == "MultiLineString":
135
+ lines = vector.cpp_vector.get_linestrings(layer_name)
136
+ if 'fmt' in kwargs:
137
+ for line in lines:
138
+ ax.plot(line[0], line[1], **kwargs)
139
+ else:
140
+ for line in lines:
141
+ ax.plot(line[0], line[1], '-k', **kwargs) #specify format as black lines if format was not give
142
+ else:
143
+ raise ValueError("geomtype must be of type 'Point', 'MultiPoint', 'LineString', or 'MultiLineString'");