sgspy 1.0.1__cp313-cp313-manylinux_2_39_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. sgspy/__init__.py +82 -0
  2. sgspy/_sgs.cpython-313-x86_64-linux-gnu.so +0 -0
  3. sgspy/calculate/__init__.py +18 -0
  4. sgspy/calculate/pca/__init__.py +2 -0
  5. sgspy/calculate/pca/pca.py +152 -0
  6. sgspy/calculate/representation/__init__.py +2 -0
  7. sgspy/calculate/representation/representation.py +3 -0
  8. sgspy/sample/__init__.py +30 -0
  9. sgspy/sample/ahels/__init__.py +2 -0
  10. sgspy/sample/ahels/ahels.py +3 -0
  11. sgspy/sample/clhs/__init__.py +2 -0
  12. sgspy/sample/clhs/clhs.py +198 -0
  13. sgspy/sample/nc/__init__.py +2 -0
  14. sgspy/sample/nc/nc.py +3 -0
  15. sgspy/sample/srs/__init__.py +2 -0
  16. sgspy/sample/srs/srs.py +224 -0
  17. sgspy/sample/strat/__init__.py +2 -0
  18. sgspy/sample/strat/strat.py +390 -0
  19. sgspy/sample/systematic/__init__.py +2 -0
  20. sgspy/sample/systematic/systematic.py +229 -0
  21. sgspy/stratify/__init__.py +27 -0
  22. sgspy/stratify/breaks/__init__.py +2 -0
  23. sgspy/stratify/breaks/breaks.py +218 -0
  24. sgspy/stratify/kmeans/__init__.py +2 -0
  25. sgspy/stratify/kmeans/kmeans.py +3 -0
  26. sgspy/stratify/map/__init__.py +2 -0
  27. sgspy/stratify/map/map_stratifications.py +240 -0
  28. sgspy/stratify/poly/__init__.py +2 -0
  29. sgspy/stratify/poly/poly.py +166 -0
  30. sgspy/stratify/quantiles/__init__.py +2 -0
  31. sgspy/stratify/quantiles/quantiles.py +272 -0
  32. sgspy/utils/__init__.py +18 -0
  33. sgspy/utils/plot.py +143 -0
  34. sgspy/utils/raster.py +602 -0
  35. sgspy/utils/vector.py +262 -0
  36. sgspy-1.0.1.data/data/sgspy/libonedal.so.3 +0 -0
  37. sgspy-1.0.1.data/data/sgspy/proj.db +0 -0
  38. sgspy-1.0.1.dist-info/METADATA +13 -0
  39. sgspy-1.0.1.dist-info/RECORD +40 -0
  40. sgspy-1.0.1.dist-info/WHEEL +5 -0
@@ -0,0 +1,229 @@
1
+ # ******************************************************************************
2
+ #
3
+ # Project: sgs
4
+ # Purpose: simple random sampling (srs)
5
+ # Author: Joseph Meyer
6
+ # Date: June, 2025
7
+ #
8
+ # ******************************************************************************
9
+
10
+ ##
11
+ # @defgroup user_systematic systematic
12
+ # @ingroup user_sample
13
+
14
+ import os
15
+ import sys
16
+ from typing import Optional
17
+
18
+ import numpy as np
19
+ import matplotlib.pyplot as plt
20
+
21
+ from sgspy.utils import (
22
+ SpatialRaster,
23
+ SpatialVector,
24
+ plot,
25
+ )
26
+
27
+ sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
28
+ from _sgs import systematic_cpp
29
+
30
+ ##
31
+ # @ingroup user_systematic
32
+ # This function conducts systematic sampling within the extent of
33
+ # the raster given. The 'cellsize' parameter specifies the grid size,
34
+ # the 'shape' parameter specifies the grid shape, and the 'location'
35
+ # parameter specifies where in the grid a sample should fall into.
36
+ #
37
+ # shape can be one of 'square', and 'hexagon'.
38
+ # location can be one of 'corners', 'centers', 'random'.
39
+ #
40
+ # An access vector of LineString or MultiLineString type can be provided.
41
+ # buff_outer specifies the buffer distance around the geometry which is
42
+ # allowed to be included in the sampling, buff_inner specifies the geometry
43
+ # which is not allowed to be included in the sampling. buff_outer must
44
+ # be larger than buff_inner. For a multi-layer vector, layer_name
45
+ # must be provided.
46
+ #
47
+ # A vector containing existing sample points can be provided. If this is
48
+ # the case then all of the points in the existing sample are automatically
49
+ # added and random samples are then chosen as required until num_samples
50
+ # number of samples are chosen.
51
+ #
52
+ # If the force parameter is True, then the the samples are forced to
53
+ # fall on an index which is NOT a no data value. This may result
54
+ # in some grids not being sampled.
55
+ #
56
+ # Examples
57
+ # --------------------
58
+ # rast = sgspy.SpatialRaster("raster.tif") @n
59
+ # samples = sgspy.sample.systematic(rast, 500, "hexagon", "centers")
60
+ #
61
+ # rast = sgspy.SpatialRaster("raster.tif") @n
62
+ # samples = sgspy.sample.systematic(rast, 500, "square", "corners", plot=True, filename="systematic_samples.shp")
63
+ #
64
+ # rast = sgspy.SpatialRaster("raster.tif") @n
65
+ # samples = sgspy.sample.systematic(rast, 500, "hexagon", "random", force=True)
66
+ #
67
+ # rast = sgspy.SpatialRaster("raster.tif") @n
68
+ # access = sgspy.SpatialVector("access_network.shp") @n
69
+ # samples = sgspy.sample.systematic(rast, 500, "hexagon", "corners", access=access, buff_outer=300)
70
+ #
71
+ # rast = sgspy.SpatialRaster("raster.tif") @n
72
+ # access = sgspy.SpatialVector("existing_samples.shp") @n
73
+ # samples = sgspy.sample.systematic(rast, 500, "hexagon", "corners", existing=existing)
74
+ #
75
+ # Parameters
76
+ # --------------------
77
+ # rast : SpatialRaster @n
78
+ # the raster to be sampled @n @n
79
+ # cellsize : float @n
80
+ # the size of the grid cells to be sampled @n @n
81
+ # shape : str @n
82
+ # the shape of the grid cells to be sampled @n @n
83
+ # location : str @n
84
+ # the location within the grid cell to be sampled @n @n
85
+ # existing (optional) : SpatialVector @n
86
+ # a vector specifying existing sample points @n @n
87
+ # access (optional) : SpatialVector @n
88
+ # a vector specifying access network @n @n
89
+ # layer_name (optional) : str @n
90
+ # the layer within access that is to be used for sampling @n @n
91
+ # buff_inner (optional) : int | float @n
92
+ # buffer boundary specifying distance from access which CANNOT be sampled @n @n
93
+ # buff_outer (optional) : int | float @n
94
+ # buffer boundary specifying distance from access which CAN be sampled @n @n
95
+ # force : bool @n
96
+ # True if samples are not allowed to fall on a nodata pixel @n @n
97
+ # plot : bool @n
98
+ # whether or not to plot the resulting samples @n @n
99
+ # filename : str @n
100
+ # the filename to write to or "" if not to write @n @n
101
+ #
102
+ # Returns
103
+ # --------------------
104
+ # a SpatialVector object containing point geometries of sample locations
105
+ def systematic(
106
+ rast: SpatialRaster,
107
+ cellsize: int | float,
108
+ shape: str = "square",
109
+ location: str = "centers",
110
+ existing: Optional[SpatialVector] = None,
111
+ access: Optional[SpatialVector] = None,
112
+ layer_name: Optional[str] = None,
113
+ buff_inner: Optional[int | float] = None,
114
+ buff_outer: Optional[int | float] = None,
115
+ force: bool = False,
116
+ plot: bool = False,
117
+ filename: str = ""):
118
+
119
+ if type(rast) is not SpatialRaster:
120
+ raise TypeError("'rast' parameter must be of type sgspy.SpatialRaster.")
121
+
122
+ if type(cellsize) not in [int, float]:
123
+ raise TypeError("'cellsize' parameter must be of type int or float.")
124
+
125
+ if type(shape) is not str:
126
+ raise TypeError("'shape' paramter must be of type str.")
127
+
128
+ if type(location) is not str:
129
+ raise TypeError("'location' parameter must be of type str.")
130
+
131
+ if existing is not None and type(existing) is not SpatialVector:
132
+ raise TypeError("'existing' parameter, if given, must be of type sgspy.SpatialVector.")
133
+
134
+ if access is not None and type(access) is not SpatialVector:
135
+ raise TypeError("'access' parameter, if given, must be of type sgspy.SpatialVector.")
136
+
137
+ if layer_name is not None and type(layer_name) is not str:
138
+ raise TypeError("'layer_name' parameter, if given, must be of type str.")
139
+
140
+ if buff_inner is not None and type(buff_inner) not in [int, float]:
141
+ raise TypeError("'buff_inner' parameter, if given, must be of type int or float.")
142
+
143
+ if buff_outer is not None and type(buff_outer) not in [int, float]:
144
+ raise TypeError("'buff_outer' parameter, if given, must be of type int or float.")
145
+
146
+ if type(force) is not bool:
147
+ raise TypeError("'force' parameter must be of type bool.")
148
+
149
+ if type(plot) is not bool:
150
+ raise TypeError("'plot' parameter must be of type bool.")
151
+
152
+ if type(filename) is not str:
153
+ raise TypeError("'filename' parameter must be of type str.")
154
+
155
+ if rast.closed:
156
+ raise RuntimeError("the C++ object which the raster object wraps has been cleaned up and closed.")
157
+
158
+ if cellsize <= 0:
159
+ raise ValueError("cellsize must be greater than 0")
160
+
161
+ if shape not in ["square", "hexagon"]:
162
+ raise ValueError("shape parameter must be one of 'square', 'hexagon'")
163
+
164
+ if location not in ["centers", "corners", "random"]:
165
+ raise ValueError("location parameter must be one of 'centers', 'corners', 'random'")
166
+
167
+ if (access):
168
+ if layer_name is None:
169
+ if len(access.layers) > 1:
170
+ raise ValueError("if there are multiple layers in the access vector, layer_name parameter must be passed.")
171
+ layer_name = access.layers[0]
172
+
173
+ if layer_name not in access.layers:
174
+ raise ValueError("layer specified by 'layer_name' does not exist in the access vector")
175
+
176
+ if buff_inner is None or buff_inner < 0:
177
+ buff_inner = 0
178
+
179
+ if buff_outer is None or buff_outer < 0:
180
+ raise ValueError("if an access vector is given, buff_outer must be a float greater than 0.")
181
+
182
+ if buff_inner >= buff_outer:
183
+ raise ValueError("buff_outer must be greater than buff_inner")
184
+
185
+ access_vector = access.cpp_vector
186
+ else:
187
+ access_vector = None
188
+ layer_name = ""
189
+ buff_inner = -1
190
+ buff_outer = -1
191
+
192
+ if (existing):
193
+ existing_vector = existing.cpp_vector
194
+ else:
195
+ existing_vector = None
196
+
197
+ [samples, points, grid] = systematic_cpp(
198
+ rast.cpp_raster,
199
+ cellsize,
200
+ shape,
201
+ location,
202
+ existing_vector,
203
+ access_vector,
204
+ layer_name,
205
+ buff_inner,
206
+ buff_outer,
207
+ force,
208
+ plot,
209
+ filename
210
+ )
211
+
212
+ #plot new vector if requested
213
+ if plot:
214
+ fig, ax = plt.subplots()
215
+ ax.set_xlim([rast.xmin, rast.xmax])
216
+ ax.set_ylim([rast.ymin, rast.ymax])
217
+ rast.plot(ax, band=rast.bands[0])
218
+ title="samples on " + rast.bands[0]
219
+
220
+ #plot grid
221
+ for shape in grid:
222
+ ax.plot(shape[0], shape[1], '-k')
223
+
224
+ #plot sample points
225
+ ax.plot(points[0], points[1], '.r')
226
+ ax.set_title(label=title)
227
+ plt.show()
228
+
229
+ return SpatialVector(samples)
@@ -0,0 +1,27 @@
1
+ ##
2
+ # @defgroup user_stratify stratify
3
+ # @ingroup user
4
+ #
5
+ # Documentation for the stratification functions.
6
+
7
+ from . import (
8
+ breaks,
9
+ kmeans,
10
+ poly,
11
+ quantiles,
12
+ map,
13
+ )
14
+
15
+ from .breaks import breaks
16
+ from .kmeans import kmeans
17
+ from .poly import poly
18
+ from .quantiles import quantiles
19
+ from .map import map
20
+
21
+ __all__ = [
22
+ "breaks",
23
+ "kmeans",
24
+ "poly",
25
+ "quantiles",
26
+ "map",
27
+ ]
@@ -0,0 +1,2 @@
1
+ from . import breaks
2
+ from .breaks import breaks
@@ -0,0 +1,218 @@
1
+ # ******************************************************************************
2
+ #
3
+ # Project: sgs
4
+ # Purpose: simple random sampling (srs)
5
+ # Author: Joseph Meyer
6
+ # Date: June, 2025
7
+ #
8
+ # ******************************************************************************
9
+
10
+ ##
11
+ # @defgroup user_breaks breaks
12
+ # @ingroup user_stratify
13
+
14
+ import os
15
+ import sys
16
+ import tempfile
17
+ import numpy as np
18
+ from sgspy.utils import SpatialRaster
19
+
20
+ sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
21
+ from _sgs import breaks_cpp
22
+
23
+ GIGABYTE = 1073741824
24
+
25
+ ##
26
+ # @ingroup user_breaks
27
+ # This function conducts stratification on the raster given
28
+ # according to the user defined breaks.
29
+ #
30
+ # The breaks may be defined as a single list of ints or floats
31
+ # in the case of a raster with a single band. Or, they may be defined
32
+ # as a list of ints or floats where the index indicates the raster band.
33
+ # Or, they may be defined as a dict where the (str) key represents
34
+ # the raster band and the value is a list of ints or floats.
35
+ #
36
+ # if the map parameter is given, an extra output band will be used which combines
37
+ # all stratifications from the previous bands used. A single value in the mapped
38
+ # output band corresponds to a single combination of values from the previous
39
+ # bands.
40
+ #
41
+ # the filename parameter specifies an output file name. Right now the only file format
42
+ # excepted is GTiff (.tif).
43
+ #
44
+ # the thread_count parameter specifies the number of threads which this function will
45
+ # utilize in the case where the raster is large and may not fit in memory. If the full
46
+ # raster can fit in memory and does not need to be processed in blocks, this argument
47
+ # will be ignored. The default is 8 threads, although the optimal number will depend significantly
48
+ # on the hardware being used and my be less or more than 8.
49
+ #
50
+ # The driver_options parameter is used to specify creation options for a the output raster.
51
+ # See options for the Gtiff driver here: https://gdal.org/en/stable/drivers/raster/gtiff.html#creation-options
52
+ # The keys in the driver_options dict must be strings, the values are converted to string.
53
+ # The options must be valid for the driver corresponding to the filename, and if filename is not given
54
+ # they must be valid for the GTiff format, as that is the format used to store temporary raster files.
55
+ # Note that if this parameter is given, but filename is not and the raster fits entirely in memory, the
56
+ # driver_options parameter will be ignored.
57
+ #
58
+ # Examples
59
+ # --------------------
60
+ # rast = sgspy.SpatialRaster("multi_band_rast.tif") @n
61
+ # srast = sgspy.stratify.breaks(rast, breaks={"band_name1": [3, 5, 11, 18]})
62
+ #
63
+ # rast = sgspy.SpatialRaster("single_band_rast.tif") @n
64
+ # srast = sgspy.stratify.breaks(rast, breaks=[20, 40, 60, 80], filename="breaks.tif", driver_options={"COMPRESS", "LZW"}))
65
+ #
66
+ # rast = sgspy.SpatialRaster("multi_band_rast.tif") @n
67
+ # srast = sgspy.stratify.breaks(rast, breaks={"band_name1": [3, 5, 11, 10], "band_name2": [20, 40, 60, 80]}, map=True)
68
+ #
69
+ # rast = sgspy.SpatialRaster("multi_band_rast.tif") @n
70
+ # srast = sgspy.stratify.breaks(rast, breaks=[[3, 5, 11, 18], [40, 60, 80], [2, 5]])
71
+ #
72
+ # Parameters
73
+ # --------------------
74
+ # rast : SpatialRaster @n
75
+ # raster data structure containing the raster to stratify @n @n
76
+ # breaks : list[float | list[float]] | dict[str, list[float]], @n
77
+ # user defined breaks to stratify @n @n
78
+ # map : bool @n
79
+ # whether to map the stratification of multiple raster bands onto a single band @n @n
80
+ # filename : str @n
81
+ # filename to write to or '' if no file should be written @n @n
82
+ # thread_count : int @n
83
+ # the number of threads to use when multithreading large images @n @n
84
+ # driver_options : dict[] @n
85
+ # the creation options as defined by GDAL which will be passed when creating output files @n @n
86
+ #
87
+ # Returns
88
+ # --------------------
89
+ # a SpatialRaster object containing stratified raster bands.
90
+ def breaks(
91
+ rast: SpatialRaster,
92
+ breaks: list[float | list[float]] | dict[str, list[float]],
93
+ map: bool = False,
94
+ filename: str = '',
95
+ thread_count: int = 8,
96
+ driver_options: dict = None
97
+ ):
98
+
99
+ MAX_STRATA_VAL = 2147483647 #maximum value stored within a 32-bit signed integer to ensure no overflow
100
+
101
+ if type(rast) is not SpatialRaster:
102
+ raise TypeError("'rast' parameter must be of type sgspy.SpatialRaster")
103
+
104
+ if type(breaks) not in [list, dict]:
105
+ raise TypeError("'breaks' parameter must be of type list or dict.")
106
+
107
+ if type(map) is not bool:
108
+ raise TypeError("'map' parameter must be of type bool.")
109
+
110
+ if type(filename) is not str:
111
+ raise TypeError("'filename' parameter must be of type str.")
112
+
113
+ if type(thread_count) is not int:
114
+ raise TypeError("'thread_count' parameter must be of type int.")
115
+
116
+ if driver_options is not None and type(driver_options) is not dict:
117
+ raise TypeError("'driver_options' parameter, if givne, must be of type dict.")
118
+
119
+ if rast.closed:
120
+ raise RuntimeError("the C++ object which the raster object wraps has been cleaned up and closed.")
121
+
122
+ breaks_dict = {}
123
+ large_raster = False
124
+ temp_folder = ""
125
+
126
+ if type(breaks) is list and len(breaks) < 1:
127
+ raise ValueError("breaks list must contain at least one element.")
128
+
129
+ if type(breaks) is list and type(breaks[0]) is list:
130
+ #error check number of rasters bands
131
+ if len(breaks) != rast.band_count:
132
+ raise ValueError("number of lists of breaks must be equal to the number of raster bands.")
133
+
134
+ for i in range(len(breaks)):
135
+ breaks_dict[i] = breaks[i]
136
+
137
+ elif type(breaks) is list and type(breaks[0]) in [int, float]:
138
+ #error check number of raster bands
139
+ if rast.band_count != 1:
140
+ raise ValueError("if breaks is a single list, raster must have a single band (has {}).".format(rast.band_count))
141
+
142
+ breaks_dict[0] = breaks
143
+
144
+ elif type(breaks) is list:
145
+ raise TypeError("if 'breaks' parameter is of type list, it must be filled with with values of type list, int, or float.")
146
+
147
+ else: #breaks is a dict
148
+ for key, val in breaks.items():
149
+ if type(key) is not str:
150
+ raise TypeError("if 'breaks' parameter is a dict, all keys must be of type str.")
151
+ if type(val) is not list:
152
+ raise TypeError("if 'breaks' parameter is a dict, all values in the key values pairs must be of type list[float].")
153
+ if key not in rast.bands:
154
+ raise ValueError("breaks dict key must be a valid band name (see SpatialRaster.bands for list of names)")
155
+
156
+ breaks_dict[rast.band_name_dict[key]] = val
157
+
158
+ #error check max value for potential overflow error
159
+ max_mapped_strata = int(map)
160
+ for _, val in breaks_dict.items():
161
+ strata_count = len(val) + 1
162
+ if strata_count > MAX_STRATA_VAL:
163
+ raise ValueError("one of the breaks given will cause an integer overflow error because the max strata number is too large.")
164
+
165
+ max_mapped_strata = max_mapped_strata * strata_count
166
+
167
+ if max_mapped_strata > MAX_STRATA_VAL:
168
+ raise ValueError("the mapped strata will cause an overflow error because the max strata number is too large.")
169
+
170
+ if thread_count < 1:
171
+ raise ValueError("number of threads can't be less than 1.")
172
+
173
+ #ensure driver options keys are string, and convert driver options vals to string
174
+ driver_options_str = {}
175
+ if driver_options:
176
+ for (key, val) in driver_options.items():
177
+ if type(key) is not str:
178
+ raise ValueError("the key for all key/value pairs in the driver_options dict must be a string.")
179
+ driver_options_str[key] = str(val)
180
+
181
+ raster_size_bytes = 0
182
+ height = rast.height
183
+ width = rast.width
184
+ for key, _ in breaks_dict.items():
185
+ pixel_size = rast.cpp_raster.get_raster_band_type_size(key)
186
+ band_size = height * width * pixel_size
187
+ raster_size_bytes += band_size
188
+ if band_size >= GIGABYTE:
189
+ large_raster = True
190
+ break
191
+
192
+ #if large_raster is true, the C++ function will process the raster in blocks
193
+ large_raster = large_raster or (raster_size_bytes > GIGABYTE * 4)
194
+
195
+ #make a temp directory which will be deleted if there is any problem when calling the cpp function
196
+ temp_dir = tempfile.mkdtemp()
197
+ rast.have_temp_dir = True
198
+ rast.temp_dir = temp_dir
199
+
200
+ #call stratify breaks function
201
+ srast = SpatialRaster(breaks_cpp(
202
+ rast.cpp_raster,
203
+ breaks_dict,
204
+ map,
205
+ filename,
206
+ large_raster,
207
+ thread_count,
208
+ temp_dir,
209
+ driver_options_str
210
+ ))
211
+
212
+ #now that it's created, give the cpp raster object ownership of the temporary directory
213
+ rast.have_temp_dir = False
214
+ srast.cpp_raster.set_temp_dir(temp_dir)
215
+ srast.temp_dataset = filename == "" and large_raster
216
+ srast.filename = filename
217
+
218
+ return srast
@@ -0,0 +1,2 @@
1
+ from . import kmeans
2
+ from .kmeans import kmeans
@@ -0,0 +1,3 @@
1
+ def kmeans():
2
+ print(__file__)
3
+ raise NotImplementedError
@@ -0,0 +1,2 @@
1
+ from . import map_stratifications
2
+ from .map_stratifications import map