sgspy 1.0.2__cp310-cp310-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. sgspy/__init__.py +82 -0
  2. sgspy/_sgs.cpython-310-x86_64-linux-gnu.so +0 -0
  3. sgspy/calculate/__init__.py +18 -0
  4. sgspy/calculate/pca/__init__.py +2 -0
  5. sgspy/calculate/pca/pca.py +158 -0
  6. sgspy/calculate/representation/__init__.py +2 -0
  7. sgspy/calculate/representation/representation.py +3 -0
  8. sgspy/sample/__init__.py +30 -0
  9. sgspy/sample/ahels/__init__.py +2 -0
  10. sgspy/sample/ahels/ahels.py +3 -0
  11. sgspy/sample/clhs/__init__.py +2 -0
  12. sgspy/sample/clhs/clhs.py +202 -0
  13. sgspy/sample/nc/__init__.py +2 -0
  14. sgspy/sample/nc/nc.py +3 -0
  15. sgspy/sample/srs/__init__.py +2 -0
  16. sgspy/sample/srs/srs.py +228 -0
  17. sgspy/sample/strat/__init__.py +2 -0
  18. sgspy/sample/strat/strat.py +394 -0
  19. sgspy/sample/systematic/__init__.py +2 -0
  20. sgspy/sample/systematic/systematic.py +233 -0
  21. sgspy/stratify/__init__.py +27 -0
  22. sgspy/stratify/breaks/__init__.py +2 -0
  23. sgspy/stratify/breaks/breaks.py +222 -0
  24. sgspy/stratify/kmeans/__init__.py +2 -0
  25. sgspy/stratify/kmeans/kmeans.py +3 -0
  26. sgspy/stratify/map/__init__.py +2 -0
  27. sgspy/stratify/map/map_stratifications.py +244 -0
  28. sgspy/stratify/poly/__init__.py +2 -0
  29. sgspy/stratify/poly/poly.py +170 -0
  30. sgspy/stratify/quantiles/__init__.py +2 -0
  31. sgspy/stratify/quantiles/quantiles.py +276 -0
  32. sgspy/utils/__init__.py +18 -0
  33. sgspy/utils/plot.py +143 -0
  34. sgspy/utils/raster.py +605 -0
  35. sgspy/utils/vector.py +268 -0
  36. sgspy-1.0.2.data/data/sgspy/libonedal.so.3 +0 -0
  37. sgspy-1.0.2.data/data/sgspy/proj.db +0 -0
  38. sgspy-1.0.2.dist-info/METADATA +13 -0
  39. sgspy-1.0.2.dist-info/RECORD +40 -0
  40. sgspy-1.0.2.dist-info/WHEEL +5 -0
@@ -0,0 +1,244 @@
1
+ # ******************************************************************************
2
+ #
3
+ # Project: sgs
4
+ # Purpose: map mulitiple stratification rasters
5
+ # Author: Joseph Meyer
6
+ # Date: September, 2025
7
+ #
8
+ # ******************************************************************************
9
+
10
+ ##
11
+ # @defgroup user_map map
12
+ # @ingroup user_stratify
13
+
14
+ import os
15
+ import sys
16
+ import site
17
+ import tempfile
18
+ from sgspy.utils import SpatialRaster
19
+
20
+ #ensure _sgs binary can be found
21
+ site_packages = list(filter(lambda x : 'site-packages' in x, site.getsitepackages()))[0]
22
+ sys.path.append(os.path.join(site_packages, "sgspy"))
23
+ sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
24
+ from _sgs import map_cpp
25
+
26
+ GIGABYTE = 1073741824
27
+
28
+ ##
29
+ # @ingroup user_map
30
+ # This function conducts mapping on existing stratifications.
31
+ #
32
+ # The pre-existing stratifications are passed in the form of a raster, band, and num_stratum.
33
+ # The bands argument specifies which bands within the raster should be used, the num_stratum
34
+ # argument specifies the number of stratum within one particular band.
35
+ #
36
+ # the arguments are passed in the form of a tuple, of which there can be any number.
37
+ # For example, both of the following are valid:
38
+ # - map((rast1, bands1, num_stratum1))
39
+ # - map((rast1, bands1, num_stratum1), (rast1, bands2, num_stratum2))
40
+ #
41
+ # the raster within the tuple MUST be of type sgs.utils.SpatialRaster.
42
+ # The bands argument MUST be:
43
+ # - an int, specifying a single band.
44
+ # - a str, specifying a single band.
45
+ # - a list of ints, specifying the indexes of bands.
46
+ # - a list of strings, specifying the names of bands.
47
+ #
48
+ # The num_stratum argument MUST be
49
+ # - an int, if bands is an int or string, specifiying the exact number of stratum in the
50
+ # selected band.
51
+ # - a list of ints of the same length of bands, specifying the exact number of stratum in
52
+ # each of the indexes specified by the bands list.
53
+ #
54
+ # the filename parameter specifies an output file name. Right now the only file format
55
+ # accepted is GTiff (.tiff).
56
+ #
57
+ # The thread_count parameter specifies the number of threads which this function will
58
+ # utilize in the case where the raster is large an may not fit in memory. If the full
59
+ # raster can fit in memory and does not need to be processed in blocks, this argument
60
+ # will be ignored. The default is 8 threads, although the optimal number will depend
61
+ # significantly on the hardware being used and may be more or less than 8.
62
+ #
63
+ # the driver_options parameter is used to specifiy creation options for the output
64
+ # raster, such as compression. See options fro GTiff driver here:
65
+ # https://gdal.org/en/stable/drivers/raster/gtiff.html#creation-options
66
+ # The keys in the driver_options dict must be strings, the values are converted to
67
+ # string. THe options must be valid for the driver corresponding to the filename,
68
+ # and if filename is not given they must be valid for the GTiff format, as that
69
+ # is the format used to store temporary raster files. Note that if this parameter
70
+ # is given, but filename is not and the raster fits entirely in memory, the
71
+ # driver_options parameter will be ignored.
72
+ #
73
+ # Examples
74
+ # --------------------
75
+ # rast = sgspy.SpatialRaster("rast.tif") @n
76
+ # breaks = sgspy.stratify.breaks(rast, breaks={'zq90': [3, 5, 11, 18], 'pzabove2]: [20, 40, 60, 80]}) @n
77
+ # quantiles = sgspy.stratify.quantiles(rast, num_strata={'zsd': 25}) @n
78
+ # srast = sgspy.stratify.map((breaks, ['strat_zq90', 'strat_pzabove2'], [5, 5]), (quantiles, 'strat_zsd', 25))
79
+ #
80
+ # rast = sgspy.SpatialRaster("rast.tif") @n
81
+ # inventory = sgspy.SpatialVector("inventory_polygons.shp") @n
82
+ # breaks = sgspy.stratify.breaks(rast, breaks={'zq90': [3, 5, 11, 18], 'pzabove2]: [20, 40, 60, 80]}) @n
83
+ # poly = sgspy.stratify.poly(rast, inventory, attribute="NUTRIENTS", layer_name="inventory_polygons", features=['poor', 'medium', 'rich']) @n
84
+ # srast = sgspy.stratify.map((breaks, [0, 1], [5, 5]), (poly, 0, 3), filename="mapped_srast.tif", driver_options={"COMPRESS", "LZW"})
85
+ #
86
+ # Parameters
87
+ # --------------------
88
+ # *args : tuple[SpatialRaster, int|list[int]|list[str], int|list[int]] @n
89
+ # tuples specifying raster bands and their number of stratifications @n @n
90
+ # filename : str @n
91
+ # filename to write to or '' if not file should be written @n @n
92
+ # thread_count : int @n
93
+ # the number of threads to use when multithreading large images @n @n
94
+ # driver_options : dict[str] @n
95
+ # the creation options as defined by GDAL which will be passed when creating output files @n @n
96
+ #
97
+ # Returns
98
+ # --------------------
99
+ # a SpatialRaster object containing a band of mapped stratifications from the input raster(s).
100
+ def map(*args: tuple[SpatialRaster, int|str|list[int]|list[str], int|list[int]],
101
+ filename: str = '',
102
+ thread_count: int = 8,
103
+ driver_options: dict = None):
104
+
105
+ MAX_STRATA_VAL = 2147483647 #maximum value stored within a 32-bit signed integer to ensure no overflow
106
+
107
+ if type(filename) is not str:
108
+ raise TypeError("'filename' parameter must be of type str.")
109
+
110
+ if type(thread_count) is not int:
111
+ raise TypeError("'thread_count' parameter must be of type int.")
112
+
113
+ if driver_options is not None and type(driver_options) is not dict:
114
+ raise TypeError("'driver_options' parameter, if given, must be of type dict.")
115
+
116
+ raster_list = []
117
+ band_lists = []
118
+ strata_lists = []
119
+
120
+ height = args[0][0].height
121
+ width = args[0][0].width
122
+
123
+ raster_size_bytes = 0
124
+ large_raster = False
125
+ for (raster, bands, num_stratum) in args:
126
+ if type(raster) is not SpatialRaster:
127
+ raise TypeError("first value in each tuple argument must be of type sgspy.SpatialRaster.")
128
+
129
+ if type(bands) not in [int, str, list]:
130
+ raise TypeError("second value in each tuple argument must be of type int, str, or list.")
131
+
132
+ if type(num_stratum) not in [int, list]:
133
+ raise TypeError("third value in each tuple argument must be of type int or list.")
134
+
135
+ if raster.closed:
136
+ raise RuntimeError("the C++ object which the raster object wraps has been cleaned up and closed.")
137
+
138
+ if raster.height != height:
139
+ raise ValueError("height is not the same across all rasters.")
140
+
141
+ if raster.width != width:
142
+ raise ValueError("width is not the same across all rasters.")
143
+
144
+ #error checking on bands and num_stratum lists
145
+ if type(bands) is list and type(num_stratum) is list and len(bands) != len(num_stratum):
146
+ raise ValueError("if bands and num_stratum arguments are lists, they must have the same length.")
147
+
148
+ if (type(bands) is list) ^ (type(num_stratum) is list): #XOR
149
+ raise TypeError("if one of bands and num_stratum is list, the other one must be a list of the same length.")
150
+
151
+ if type(bands) is list and len(bands) > raster.band_count:
152
+ raise ValueError("bands list cannot have more bands than raster contains.")
153
+
154
+ #helper function which checks int/str value and returns int band index
155
+ def get_band_int(band: int|str) -> int:
156
+ #if an int is passed, check and return
157
+ if type(band) is int:
158
+ if band not in range(raster.band_count):
159
+ raise ValueError("band {} is out of range.".format(band))
160
+ return band
161
+
162
+ #if a string is passed, check and return corresponding int
163
+ if band not in raster.bands:
164
+ msg = "band {} is not a band within the raster.".format(band)
165
+ raise ValueError(msg)
166
+ return raster.band_name_dict[band]
167
+
168
+ #error checking on band int/string values
169
+ band_list = []
170
+ stratum_list = []
171
+ if type(bands) is list:
172
+ for i in range(len(bands)):
173
+ band_int = get_band_int(bands[i])
174
+ band_list.append(band_int)
175
+ stratum_list.append(num_stratum[i])
176
+
177
+ #check for large raster
178
+ pixel_size = raster.cpp_raster.get_raster_band_type_size(band_int)
179
+ band_size = height * width * pixel_size
180
+ raster_size_bytes += band_size
181
+ if band_size > GIGABYTE:
182
+ large_raster = True
183
+ else:
184
+ band_int = get_band_int(bands)
185
+ band_list.append(band_int)
186
+ stratum_list.append(num_stratum)
187
+
188
+ #check for large raster
189
+ pixel_size = raster.cpp_raster.get_raster_band_type_size(band_int)
190
+ band_size = height * width * pixel_size
191
+ raster_size_bytes += band_size
192
+ if band_size > GIGABYTE:
193
+ large_raster == True
194
+
195
+ #prepare cpp function arguments
196
+ raster_list.append(raster.cpp_raster)
197
+ band_lists.append(band_list)
198
+ strata_lists.append(stratum_list)
199
+
200
+ #if any 1 band is larger than a gigabyte, or all bands together are larger than 4
201
+ #large_raster is defined to let the C++ function know to process in blocks rather
202
+ #than putting the entire raster into memory.
203
+ large_raster = large_raster or (raster_size_bytes > GIGABYTE * 4)
204
+
205
+ #error check max value for potential overflow error
206
+ max_mapped_strata = 1
207
+ for strata_list in strata_lists:
208
+ for strata_count in strata_list:
209
+ max_mapped_strata = max_mapped_strata * strata_count
210
+ if max_mapped_strata > MAX_STRATA_VAL:
211
+ raise ValueError("the mapped strata will cause an overflow error because the max strata number is too large.")
212
+
213
+ #emsire driver options keys are strings, and convert driver options vals to strings
214
+ driver_options_str = {}
215
+ if driver_options:
216
+ for (key, val) in driver_options.items():
217
+ if type(key) is not str:
218
+ raise ValueError("the key for all key/value pairs in teh driver_options dict must be a string")
219
+ driver_options_str[key] = str(val)
220
+
221
+ #make a temp directory which will be deleted if there is any problem when calling the cpp function
222
+ temp_dir = tempfile.mkdtemp()
223
+ args[0][0].have_temp_dir = True
224
+ args[0][0].temp_dir = temp_dir
225
+
226
+ #call cpp map function
227
+ srast = SpatialRaster(map_cpp(
228
+ raster_list,
229
+ band_lists,
230
+ strata_lists,
231
+ filename,
232
+ large_raster,
233
+ thread_count,
234
+ temp_dir,
235
+ driver_options_str
236
+ ))
237
+
238
+ #now that it's created, give the cpp raster object ownership of the temporary directory
239
+ args[0][0].have_temp_dir = False
240
+ srast.cpp_raster.set_temp_dir(temp_dir)
241
+ srast.temp_dataset = filename == "" and large_raster
242
+ srast.filename = filename
243
+
244
+ return srast
@@ -0,0 +1,2 @@
1
+ from . import poly
2
+ from .poly import poly
@@ -0,0 +1,170 @@
1
+ # ******************************************************************************
2
+ #
3
+ # Project: sgs
4
+ # Purpose: stratification using polygons
5
+ # Author: Joseph Meyer
6
+ # Date: June, 2025
7
+ #
8
+ # ******************************************************************************
9
+
10
+ ##
11
+ # @defgroup user_poly poly
12
+ # @ingroup user_stratify
13
+
14
+ import os
15
+ import sys
16
+ import site
17
+ import tempfile
18
+
19
+ from sgspy.utils import (
20
+ SpatialRaster,
21
+ SpatialVector,
22
+ )
23
+
24
+ #ensure _sgs binary can be found
25
+ site_packages = list(filter(lambda x : 'site-packages' in x, site.getsitepackages()))[0]
26
+ sys.path.append(os.path.join(site_packages, "sgspy"))
27
+ sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
28
+ from _sgs import poly_cpp
29
+
30
+ GIGABYTE = 1073741824
31
+
32
+ ##
33
+ # @ingroup user_poly
34
+ # This function conducts stratification on a vector dataset by rasterizing a polygon
35
+ # layer, and using its attribute values to determine stratifications.
36
+ #
37
+ # the layer_name parameter is the layer to be rasterized, and the attribute
38
+ # is the attribute within the layer to check. The features parameter specifies
39
+ # the which feature value corresponds to which stratification.
40
+ #
41
+ # The features parameter is a list containing strings and lists of strings.
42
+ # The index within this list determines the stratification value. For example:
43
+ #
44
+ # features = ["low", "medium", "high"] @n
45
+ # would result in 3 stratifications (0, 1, 2) where 'low' would correspond
46
+ # to stratification 0, medium to 1, and hight to 2.
47
+ #
48
+ # features = ["low", ["medium", "high"]] @n
49
+ # would result in 2 stratifications (0, 1) where 'low' would correspond
50
+ # to stratification 0, and both medium and high to stratification 1.
51
+ #
52
+ # Examples
53
+ # --------------------
54
+ # rast = sgspy.SpatialRaster('rast.tif') @n
55
+ # vect = sgspy.SpatialVector('inventory_polygons.shp') @n
56
+ # srast = sgspy.stratify.poly(rast, vect, attribute='NUTRIENTS', layer_name='inventory_polygons', features=['poor', 'medium', 'rich'])
57
+ #
58
+ # rast = sgspy.SpatialRaster('rast.tif') @n
59
+ # vect = sgspy.SpatialVector('inventory_polygons.shp') @n
60
+ # srast = sgspy.stratify.poly(rast, vect, attribute='NUTRIENTS', layer_name='inventory_polygons', 'features=['poor', ['medium', 'rich']], filename='nutrient_stratification.shp')
61
+ #
62
+ # Parameters
63
+ # --------------------
64
+ # rast : SpatialRaster @n
65
+ # raster data structure which will determine height, width, geotransform, and projection @n @n
66
+ # vect : SpatialVector @n
67
+ # the vector of polygons to stratify @n @n
68
+ # layer_name : str @n
69
+ # the layer in the vector to be stratified @n @n
70
+ # attribute : str @n
71
+ # the attribute in the layer to be stratified @n @n
72
+ # features : list[str|list[str]] @n
73
+ # the stratification values of each feature value, represented as the index in the list @n @n
74
+ # filename : str @n
75
+ # the output filename to write to, if desired @n @n
76
+ #
77
+ # Returns
78
+ # --------------------
79
+ # a SpatialRaster object containing the rasterized polygon.
80
+ def poly(
81
+ rast: SpatialRaster,
82
+ vect: SpatialVector,
83
+ layer_name: str,
84
+ attribute: str,
85
+ features: list[str|list[str]],
86
+ filename:str = '',
87
+ driver_options: dict = None):
88
+
89
+ MAX_STRATA_VAL = 2147483647 #maximum value stored within a 32-bit signed integer to ensure no overflow
90
+
91
+ if type(rast) is not SpatialRaster:
92
+ raise TypeError("'rast' parameter must be of type sgspy.SpatialRaster")
93
+
94
+ if type(vect) is not SpatialVector:
95
+ raise TypeError("'vect' parameter must be of type sgspy.SpatialVector")
96
+
97
+ if type(layer_name) is not str:
98
+ raise TypeError("'layer_name' parameter must be of type str.")
99
+
100
+ if type(attribute) is not str:
101
+ raise TypeError("'attribute' parameter must be of type str.")
102
+
103
+ if type(features) is not list:
104
+ raise TypeError("'features' parameter must be of type list.")
105
+
106
+ if type(filename) is not str:
107
+ raise TypeError("'filename' parameter must be of type str.")
108
+
109
+ if driver_options is not None and type(driver_options) is not dict:
110
+ raise TypeError("'driver_options' parameter, if givne, must be of type dict.")
111
+
112
+ if rast.closed:
113
+ raise RuntimeError("the C++ object which the rast object wraps has been cleaned up and closed.")
114
+
115
+ cases = ""
116
+ where_entries = []
117
+ num_strata = len(features)
118
+
119
+ if num_strata >= MAX_STRATA_VAL:
120
+ raise ValueError("the number of features (and resulting max strata) will cause an overflow error because the max strata number is too large.")
121
+
122
+ #generate query cases and where clause using features and attribute
123
+ for i in range(len(features)):
124
+ if type(features[i]) is not list:
125
+ cases += "WHEN '{}' THEN {} ".format(str(features[i]), i)
126
+ where_entries.append("{}='{}'".format(attribute, str(features[i])))
127
+ else:
128
+ for j in range(len(features[i])):
129
+ cases += "WHEN '{}' THEN {} ".format(str(features[i][j]), i)
130
+ where_entries.append("{}='{}'".format(attribute, str(features[i][j])))
131
+
132
+ where_clause = " OR ".join(where_entries)
133
+
134
+ #generate SQL query
135
+ sql_query = f"""SELECT CASE {attribute} {cases}ELSE NULL END AS strata, {layer_name}.* FROM {layer_name} WHERE {where_clause}"""
136
+
137
+ driver_options_str = {}
138
+ if driver_options:
139
+ for (key, val) in driver_options.items():
140
+ if type(key) is not str:
141
+ raise ValueError("the key for al key/value pairs in teh driver_options dict must be a string.")
142
+ driver_options_str[key] = str(val)
143
+
144
+ large_raster = rast.height * rast.width > GIGABYTE
145
+
146
+ #make temp directory which will be deleted if there is any problem when calling the cpp function
147
+ temp_dir = tempfile.mkdtemp()
148
+ rast.have_temp_dir = True
149
+ rast.temp_dir = temp_dir
150
+
151
+ srast = SpatialRaster(poly_cpp(
152
+ vect.cpp_vector,
153
+ rast.cpp_raster,
154
+ num_strata,
155
+ layer_name,
156
+ sql_query,
157
+ filename,
158
+ large_raster,
159
+ temp_dir,
160
+ driver_options_str
161
+ ))
162
+
163
+ #now that it's created, give the cpp raster object ownership of the temporary directory
164
+ rast.have_temp_dir = False
165
+ srast.cpp_raster.set_temp_dir(temp_dir)
166
+ srast.temp_dataset = filename == "" and large_raster
167
+ srast.filename = filename
168
+
169
+ return srast
170
+
@@ -0,0 +1,2 @@
1
+ from . import quantiles
2
+ from .quantiles import quantiles