sgspy 1.0.1__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. sgspy/__init__.py +82 -0
  2. sgspy/_sgs.cp311-win_amd64.lib +0 -0
  3. sgspy/_sgs.cp311-win_amd64.pyd +0 -0
  4. sgspy/calculate/__init__.py +18 -0
  5. sgspy/calculate/pca/__init__.py +2 -0
  6. sgspy/calculate/pca/pca.py +152 -0
  7. sgspy/calculate/representation/__init__.py +2 -0
  8. sgspy/calculate/representation/representation.py +3 -0
  9. sgspy/sample/__init__.py +30 -0
  10. sgspy/sample/ahels/__init__.py +2 -0
  11. sgspy/sample/ahels/ahels.py +3 -0
  12. sgspy/sample/clhs/__init__.py +2 -0
  13. sgspy/sample/clhs/clhs.py +198 -0
  14. sgspy/sample/nc/__init__.py +2 -0
  15. sgspy/sample/nc/nc.py +3 -0
  16. sgspy/sample/srs/__init__.py +2 -0
  17. sgspy/sample/srs/srs.py +224 -0
  18. sgspy/sample/strat/__init__.py +2 -0
  19. sgspy/sample/strat/strat.py +390 -0
  20. sgspy/sample/systematic/__init__.py +2 -0
  21. sgspy/sample/systematic/systematic.py +229 -0
  22. sgspy/stratify/__init__.py +27 -0
  23. sgspy/stratify/breaks/__init__.py +2 -0
  24. sgspy/stratify/breaks/breaks.py +218 -0
  25. sgspy/stratify/kmeans/__init__.py +2 -0
  26. sgspy/stratify/kmeans/kmeans.py +3 -0
  27. sgspy/stratify/map/__init__.py +2 -0
  28. sgspy/stratify/map/map_stratifications.py +240 -0
  29. sgspy/stratify/poly/__init__.py +2 -0
  30. sgspy/stratify/poly/poly.py +166 -0
  31. sgspy/stratify/quantiles/__init__.py +2 -0
  32. sgspy/stratify/quantiles/quantiles.py +272 -0
  33. sgspy/utils/__init__.py +18 -0
  34. sgspy/utils/plot.py +143 -0
  35. sgspy/utils/raster.py +602 -0
  36. sgspy/utils/vector.py +262 -0
  37. sgspy-1.0.1.data/data/sgspy/Lerc.dll +0 -0
  38. sgspy-1.0.1.data/data/sgspy/aec.dll +0 -0
  39. sgspy-1.0.1.data/data/sgspy/charset-1.dll +0 -0
  40. sgspy-1.0.1.data/data/sgspy/freexl-1.dll +0 -0
  41. sgspy-1.0.1.data/data/sgspy/gdal.dll +0 -0
  42. sgspy-1.0.1.data/data/sgspy/geos.dll +0 -0
  43. sgspy-1.0.1.data/data/sgspy/geos_c.dll +0 -0
  44. sgspy-1.0.1.data/data/sgspy/geotiff.dll +0 -0
  45. sgspy-1.0.1.data/data/sgspy/gif.dll +0 -0
  46. sgspy-1.0.1.data/data/sgspy/hdf5.dll +0 -0
  47. sgspy-1.0.1.data/data/sgspy/hdf5_cpp.dll +0 -0
  48. sgspy-1.0.1.data/data/sgspy/hdf5_hl.dll +0 -0
  49. sgspy-1.0.1.data/data/sgspy/hdf5_hl_cpp.dll +0 -0
  50. sgspy-1.0.1.data/data/sgspy/iconv-2.dll +0 -0
  51. sgspy-1.0.1.data/data/sgspy/jpeg62.dll +0 -0
  52. sgspy-1.0.1.data/data/sgspy/json-c.dll +0 -0
  53. sgspy-1.0.1.data/data/sgspy/legacy.dll +0 -0
  54. sgspy-1.0.1.data/data/sgspy/libcrypto-3-x64.dll +0 -0
  55. sgspy-1.0.1.data/data/sgspy/libcurl.dll +0 -0
  56. sgspy-1.0.1.data/data/sgspy/libecpg.dll +0 -0
  57. sgspy-1.0.1.data/data/sgspy/libecpg_compat.dll +0 -0
  58. sgspy-1.0.1.data/data/sgspy/libexpat.dll +0 -0
  59. sgspy-1.0.1.data/data/sgspy/liblzma.dll +0 -0
  60. sgspy-1.0.1.data/data/sgspy/libpgtypes.dll +0 -0
  61. sgspy-1.0.1.data/data/sgspy/libpng16.dll +0 -0
  62. sgspy-1.0.1.data/data/sgspy/libpq.dll +0 -0
  63. sgspy-1.0.1.data/data/sgspy/libsharpyuv.dll +0 -0
  64. sgspy-1.0.1.data/data/sgspy/libssl-3-x64.dll +0 -0
  65. sgspy-1.0.1.data/data/sgspy/libwebp.dll +0 -0
  66. sgspy-1.0.1.data/data/sgspy/libwebpdecoder.dll +0 -0
  67. sgspy-1.0.1.data/data/sgspy/libwebpdemux.dll +0 -0
  68. sgspy-1.0.1.data/data/sgspy/libwebpmux.dll +0 -0
  69. sgspy-1.0.1.data/data/sgspy/libxml2.dll +0 -0
  70. sgspy-1.0.1.data/data/sgspy/lz4.dll +0 -0
  71. sgspy-1.0.1.data/data/sgspy/minizip.dll +0 -0
  72. sgspy-1.0.1.data/data/sgspy/netcdf.dll +0 -0
  73. sgspy-1.0.1.data/data/sgspy/openjp2.dll +0 -0
  74. sgspy-1.0.1.data/data/sgspy/pcre2-16.dll +0 -0
  75. sgspy-1.0.1.data/data/sgspy/pcre2-32.dll +0 -0
  76. sgspy-1.0.1.data/data/sgspy/pcre2-8.dll +0 -0
  77. sgspy-1.0.1.data/data/sgspy/pcre2-posix.dll +0 -0
  78. sgspy-1.0.1.data/data/sgspy/proj.db +0 -0
  79. sgspy-1.0.1.data/data/sgspy/proj_9.dll +0 -0
  80. sgspy-1.0.1.data/data/sgspy/qhull_r.dll +0 -0
  81. sgspy-1.0.1.data/data/sgspy/spatialite.dll +0 -0
  82. sgspy-1.0.1.data/data/sgspy/sqlite3.dll +0 -0
  83. sgspy-1.0.1.data/data/sgspy/szip.dll +0 -0
  84. sgspy-1.0.1.data/data/sgspy/tiff.dll +0 -0
  85. sgspy-1.0.1.data/data/sgspy/turbojpeg.dll +0 -0
  86. sgspy-1.0.1.data/data/sgspy/uriparser.dll +0 -0
  87. sgspy-1.0.1.data/data/sgspy/zlib1.dll +0 -0
  88. sgspy-1.0.1.data/data/sgspy/zstd.dll +0 -0
  89. sgspy-1.0.1.dist-info/METADATA +13 -0
  90. sgspy-1.0.1.dist-info/RECORD +91 -0
  91. sgspy-1.0.1.dist-info/WHEEL +4 -0
@@ -0,0 +1,240 @@
1
+ # ******************************************************************************
2
+ #
3
+ # Project: sgs
4
+ # Purpose: map mulitiple stratification rasters
5
+ # Author: Joseph Meyer
6
+ # Date: September, 2025
7
+ #
8
+ # ******************************************************************************
9
+
10
+ ##
11
+ # @defgroup user_map map
12
+ # @ingroup user_stratify
13
+
14
+ import os
15
+ import sys
16
+ import tempfile
17
+ from sgspy.utils import SpatialRaster
18
+
19
+ sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
20
+ from _sgs import map_cpp
21
+
22
+ GIGABYTE = 1073741824
23
+
24
+ ##
25
+ # @ingroup user_map
26
+ # This function conducts mapping on existing stratifications.
27
+ #
28
+ # The pre-existing stratifications are passed in the form of a raster, band, and num_stratum.
29
+ # The bands argument specifies which bands within the raster should be used, the num_stratum
30
+ # argument specifies the number of stratum within one particular band.
31
+ #
32
+ # the arguments are passed in the form of a tuple, of which there can be any number.
33
+ # For example, both of the following are valid:
34
+ # - map((rast1, bands1, num_stratum1))
35
+ # - map((rast1, bands1, num_stratum1), (rast1, bands2, num_stratum2))
36
+ #
37
+ # the raster within the tuple MUST be of type sgs.utils.SpatialRaster.
38
+ # The bands argument MUST be:
39
+ # - an int, specifying a single band.
40
+ # - a str, specifying a single band.
41
+ # - a list of ints, specifying the indexes of bands.
42
+ # - a list of strings, specifying the names of bands.
43
+ #
44
+ # The num_stratum argument MUST be
45
+ # - an int, if bands is an int or string, specifiying the exact number of stratum in the
46
+ # selected band.
47
+ # - a list of ints of the same length of bands, specifying the exact number of stratum in
48
+ # each of the indexes specified by the bands list.
49
+ #
50
+ # the filename parameter specifies an output file name. Right now the only file format
51
+ # accepted is GTiff (.tiff).
52
+ #
53
+ # The thread_count parameter specifies the number of threads which this function will
54
+ # utilize in the case where the raster is large an may not fit in memory. If the full
55
+ # raster can fit in memory and does not need to be processed in blocks, this argument
56
+ # will be ignored. The default is 8 threads, although the optimal number will depend
57
+ # significantly on the hardware being used and may be more or less than 8.
58
+ #
59
+ # the driver_options parameter is used to specifiy creation options for the output
60
+ # raster, such as compression. See options fro GTiff driver here:
61
+ # https://gdal.org/en/stable/drivers/raster/gtiff.html#creation-options
62
+ # The keys in the driver_options dict must be strings, the values are converted to
63
+ # string. THe options must be valid for the driver corresponding to the filename,
64
+ # and if filename is not given they must be valid for the GTiff format, as that
65
+ # is the format used to store temporary raster files. Note that if this parameter
66
+ # is given, but filename is not and the raster fits entirely in memory, the
67
+ # driver_options parameter will be ignored.
68
+ #
69
+ # Examples
70
+ # --------------------
71
+ # rast = sgspy.SpatialRaster("rast.tif") @n
72
+ # breaks = sgspy.stratify.breaks(rast, breaks={'zq90': [3, 5, 11, 18], 'pzabove2]: [20, 40, 60, 80]}) @n
73
+ # quantiles = sgspy.stratify.quantiles(rast, num_strata={'zsd': 25}) @n
74
+ # srast = sgspy.stratify.map((breaks, ['strat_zq90', 'strat_pzabove2'], [5, 5]), (quantiles, 'strat_zsd', 25))
75
+ #
76
+ # rast = sgspy.SpatialRaster("rast.tif") @n
77
+ # inventory = sgspy.SpatialVector("inventory_polygons.shp") @n
78
+ # breaks = sgspy.stratify.breaks(rast, breaks={'zq90': [3, 5, 11, 18], 'pzabove2]: [20, 40, 60, 80]}) @n
79
+ # poly = sgspy.stratify.poly(rast, inventory, attribute="NUTRIENTS", layer_name="inventory_polygons", features=['poor', 'medium', 'rich']) @n
80
+ # srast = sgspy.stratify.map((breaks, [0, 1], [5, 5]), (poly, 0, 3), filename="mapped_srast.tif", driver_options={"COMPRESS", "LZW"})
81
+ #
82
+ # Parameters
83
+ # --------------------
84
+ # *args : tuple[SpatialRaster, int|list[int]|list[str], int|list[int]] @n
85
+ # tuples specifying raster bands and their number of stratifications @n @n
86
+ # filename : str @n
87
+ # filename to write to or '' if not file should be written @n @n
88
+ # thread_count : int @n
89
+ # the number of threads to use when multithreading large images @n @n
90
+ # driver_options : dict[str] @n
91
+ # the creation options as defined by GDAL which will be passed when creating output files @n @n
92
+ #
93
+ # Returns
94
+ # --------------------
95
+ # a SpatialRaster object containing a band of mapped stratifications from the input raster(s).
96
+ def map(*args: tuple[SpatialRaster, int|str|list[int]|list[str], int|list[int]],
97
+ filename: str = '',
98
+ thread_count: int = 8,
99
+ driver_options: dict = None):
100
+
101
+ MAX_STRATA_VAL = 2147483647 #maximum value stored within a 32-bit signed integer to ensure no overflow
102
+
103
+ if type(filename) is not str:
104
+ raise TypeError("'filename' parameter must be of type str.")
105
+
106
+ if type(thread_count) is not int:
107
+ raise TypeError("'thread_count' parameter must be of type int.")
108
+
109
+ if driver_options is not None and type(driver_options) is not dict:
110
+ raise TypeError("'driver_options' parameter, if given, must be of type dict.")
111
+
112
+ raster_list = []
113
+ band_lists = []
114
+ strata_lists = []
115
+
116
+ height = args[0][0].height
117
+ width = args[0][0].width
118
+
119
+ raster_size_bytes = 0
120
+ large_raster = False
121
+ for (raster, bands, num_stratum) in args:
122
+ if type(raster) is not SpatialRaster:
123
+ raise TypeError("first value in each tuple argument must be of type sgspy.SpatialRaster.")
124
+
125
+ if type(bands) not in [int, str, list]:
126
+ raise TypeError("second value in each tuple argument must be of type int, str, or list.")
127
+
128
+ if type(num_stratum) not in [int, list]:
129
+ raise TypeError("third value in each tuple argument must be of type int or list.")
130
+
131
+ if raster.closed:
132
+ raise RuntimeError("the C++ object which the raster object wraps has been cleaned up and closed.")
133
+
134
+ if raster.height != height:
135
+ raise ValueError("height is not the same across all rasters.")
136
+
137
+ if raster.width != width:
138
+ raise ValueError("width is not the same across all rasters.")
139
+
140
+ #error checking on bands and num_stratum lists
141
+ if type(bands) is list and type(num_stratum) is list and len(bands) != len(num_stratum):
142
+ raise ValueError("if bands and num_stratum arguments are lists, they must have the same length.")
143
+
144
+ if (type(bands) is list) ^ (type(num_stratum) is list): #XOR
145
+ raise TypeError("if one of bands and num_stratum is list, the other one must be a list of the same length.")
146
+
147
+ if type(bands) is list and len(bands) > raster.band_count:
148
+ raise ValueError("bands list cannot have more bands than raster contains.")
149
+
150
+ #helper function which checks int/str value and returns int band index
151
+ def get_band_int(band: int|str) -> int:
152
+ #if an int is passed, check and return
153
+ if type(band) is int:
154
+ if band not in range(raster.band_count):
155
+ raise ValueError("band {} is out of range.".format(band))
156
+ return band
157
+
158
+ #if a string is passed, check and return corresponding int
159
+ if band not in raster.bands:
160
+ msg = "band {} is not a band within the raster.".format(band)
161
+ raise ValueError(msg)
162
+ return raster.band_name_dict[band]
163
+
164
+ #error checking on band int/string values
165
+ band_list = []
166
+ stratum_list = []
167
+ if type(bands) is list:
168
+ for i in range(len(bands)):
169
+ band_int = get_band_int(bands[i])
170
+ band_list.append(band_int)
171
+ stratum_list.append(num_stratum[i])
172
+
173
+ #check for large raster
174
+ pixel_size = raster.cpp_raster.get_raster_band_type_size(band_int)
175
+ band_size = height * width * pixel_size
176
+ raster_size_bytes += band_size
177
+ if band_size > GIGABYTE:
178
+ large_raster = True
179
+ else:
180
+ band_int = get_band_int(bands)
181
+ band_list.append(band_int)
182
+ stratum_list.append(num_stratum)
183
+
184
+ #check for large raster
185
+ pixel_size = raster.cpp_raster.get_raster_band_type_size(band_int)
186
+ band_size = height * width * pixel_size
187
+ raster_size_bytes += band_size
188
+ if band_size > GIGABYTE:
189
+ large_raster == True
190
+
191
+ #prepare cpp function arguments
192
+ raster_list.append(raster.cpp_raster)
193
+ band_lists.append(band_list)
194
+ strata_lists.append(stratum_list)
195
+
196
+ #if any 1 band is larger than a gigabyte, or all bands together are larger than 4
197
+ #large_raster is defined to let the C++ function know to process in blocks rather
198
+ #than putting the entire raster into memory.
199
+ large_raster = large_raster or (raster_size_bytes > GIGABYTE * 4)
200
+
201
+ #error check max value for potential overflow error
202
+ max_mapped_strata = 1
203
+ for strata_list in strata_lists:
204
+ for strata_count in strata_list:
205
+ max_mapped_strata = max_mapped_strata * strata_count
206
+ if max_mapped_strata > MAX_STRATA_VAL:
207
+ raise ValueError("the mapped strata will cause an overflow error because the max strata number is too large.")
208
+
209
+ #emsire driver options keys are strings, and convert driver options vals to strings
210
+ driver_options_str = {}
211
+ if driver_options:
212
+ for (key, val) in driver_options.items():
213
+ if type(key) is not str:
214
+ raise ValueError("the key for all key/value pairs in teh driver_options dict must be a string")
215
+ driver_options_str[key] = str(val)
216
+
217
+ #make a temp directory which will be deleted if there is any problem when calling the cpp function
218
+ temp_dir = tempfile.mkdtemp()
219
+ args[0][0].have_temp_dir = True
220
+ args[0][0].temp_dir = temp_dir
221
+
222
+ #call cpp map function
223
+ srast = SpatialRaster(map_cpp(
224
+ raster_list,
225
+ band_lists,
226
+ strata_lists,
227
+ filename,
228
+ large_raster,
229
+ thread_count,
230
+ temp_dir,
231
+ driver_options_str
232
+ ))
233
+
234
+ #now that it's created, give the cpp raster object ownership of the temporary directory
235
+ args[0][0].have_temp_dir = False
236
+ srast.cpp_raster.set_temp_dir(temp_dir)
237
+ srast.temp_dataset = filename == "" and large_raster
238
+ srast.filename = filename
239
+
240
+ return srast
@@ -0,0 +1,2 @@
1
+ from . import poly
2
+ from .poly import poly
@@ -0,0 +1,166 @@
1
+ # ******************************************************************************
2
+ #
3
+ # Project: sgs
4
+ # Purpose: stratification using polygons
5
+ # Author: Joseph Meyer
6
+ # Date: June, 2025
7
+ #
8
+ # ******************************************************************************
9
+
10
+ ##
11
+ # @defgroup user_poly poly
12
+ # @ingroup user_stratify
13
+
14
+ import os
15
+ import sys
16
+ import tempfile
17
+
18
+ from sgspy.utils import (
19
+ SpatialRaster,
20
+ SpatialVector,
21
+ )
22
+
23
+ sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
24
+ from _sgs import poly_cpp
25
+
26
+ GIGABYTE = 1073741824
27
+
28
+ ##
29
+ # @ingroup user_poly
30
+ # This function conducts stratification on a vector dataset by rasterizing a polygon
31
+ # layer, and using its attribute values to determine stratifications.
32
+ #
33
+ # the layer_name parameter is the layer to be rasterized, and the attribute
34
+ # is the attribute within the layer to check. The features parameter specifies
35
+ # the which feature value corresponds to which stratification.
36
+ #
37
+ # The features parameter is a list containing strings and lists of strings.
38
+ # The index within this list determines the stratification value. For example:
39
+ #
40
+ # features = ["low", "medium", "high"] @n
41
+ # would result in 3 stratifications (0, 1, 2) where 'low' would correspond
42
+ # to stratification 0, medium to 1, and hight to 2.
43
+ #
44
+ # features = ["low", ["medium", "high"]] @n
45
+ # would result in 2 stratifications (0, 1) where 'low' would correspond
46
+ # to stratification 0, and both medium and high to stratification 1.
47
+ #
48
+ # Examples
49
+ # --------------------
50
+ # rast = sgspy.SpatialRaster('rast.tif') @n
51
+ # vect = sgspy.SpatialVector('inventory_polygons.shp') @n
52
+ # srast = sgspy.stratify.poly(rast, vect, attribute='NUTRIENTS', layer_name='inventory_polygons', features=['poor', 'medium', 'rich'])
53
+ #
54
+ # rast = sgspy.SpatialRaster('rast.tif') @n
55
+ # vect = sgspy.SpatialVector('inventory_polygons.shp') @n
56
+ # srast = sgspy.stratify.poly(rast, vect, attribute='NUTRIENTS', layer_name='inventory_polygons', 'features=['poor', ['medium', 'rich']], filename='nutrient_stratification.shp')
57
+ #
58
+ # Parameters
59
+ # --------------------
60
+ # rast : SpatialRaster @n
61
+ # raster data structure which will determine height, width, geotransform, and projection @n @n
62
+ # vect : SpatialVector @n
63
+ # the vector of polygons to stratify @n @n
64
+ # layer_name : str @n
65
+ # the layer in the vector to be stratified @n @n
66
+ # attribute : str @n
67
+ # the attribute in the layer to be stratified @n @n
68
+ # features : list[str|list[str]] @n
69
+ # the stratification values of each feature value, represented as the index in the list @n @n
70
+ # filename : str @n
71
+ # the output filename to write to, if desired @n @n
72
+ #
73
+ # Returns
74
+ # --------------------
75
+ # a SpatialRaster object containing the rasterized polygon.
76
+ def poly(
77
+ rast: SpatialRaster,
78
+ vect: SpatialVector,
79
+ layer_name: str,
80
+ attribute: str,
81
+ features: list[str|list[str]],
82
+ filename:str = '',
83
+ driver_options: dict = None):
84
+
85
+ MAX_STRATA_VAL = 2147483647 #maximum value stored within a 32-bit signed integer to ensure no overflow
86
+
87
+ if type(rast) is not SpatialRaster:
88
+ raise TypeError("'rast' parameter must be of type sgspy.SpatialRaster")
89
+
90
+ if type(vect) is not SpatialVector:
91
+ raise TypeError("'vect' parameter must be of type sgspy.SpatialVector")
92
+
93
+ if type(layer_name) is not str:
94
+ raise TypeError("'layer_name' parameter must be of type str.")
95
+
96
+ if type(attribute) is not str:
97
+ raise TypeError("'attribute' parameter must be of type str.")
98
+
99
+ if type(features) is not list:
100
+ raise TypeError("'features' parameter must be of type list.")
101
+
102
+ if type(filename) is not str:
103
+ raise TypeError("'filename' parameter must be of type str.")
104
+
105
+ if driver_options is not None and type(driver_options) is not dict:
106
+ raise TypeError("'driver_options' parameter, if givne, must be of type dict.")
107
+
108
+ if rast.closed:
109
+ raise RuntimeError("the C++ object which the rast object wraps has been cleaned up and closed.")
110
+
111
+ cases = ""
112
+ where_entries = []
113
+ num_strata = len(features)
114
+
115
+ if num_strata >= MAX_STRATA_VAL:
116
+ raise ValueError("the number of features (and resulting max strata) will cause an overflow error because the max strata number is too large.")
117
+
118
+ #generate query cases and where clause using features and attribute
119
+ for i in range(len(features)):
120
+ if type(features[i]) is not list:
121
+ cases += "WHEN '{}' THEN {} ".format(str(features[i]), i)
122
+ where_entries.append("{}='{}'".format(attribute, str(features[i])))
123
+ else:
124
+ for j in range(len(features[i])):
125
+ cases += "WHEN '{}' THEN {} ".format(str(features[i][j]), i)
126
+ where_entries.append("{}='{}'".format(attribute, str(features[i][j])))
127
+
128
+ where_clause = " OR ".join(where_entries)
129
+
130
+ #generate SQL query
131
+ sql_query = f"""SELECT CASE {attribute} {cases}ELSE NULL END AS strata, {layer_name}.* FROM {layer_name} WHERE {where_clause}"""
132
+
133
+ driver_options_str = {}
134
+ if driver_options:
135
+ for (key, val) in driver_options.items():
136
+ if type(key) is not str:
137
+ raise ValueError("the key for al key/value pairs in teh driver_options dict must be a string.")
138
+ driver_options_str[key] = str(val)
139
+
140
+ large_raster = rast.height * rast.width > GIGABYTE
141
+
142
+ #make temp directory which will be deleted if there is any problem when calling the cpp function
143
+ temp_dir = tempfile.mkdtemp()
144
+ rast.have_temp_dir = True
145
+ rast.temp_dir = temp_dir
146
+
147
+ srast = SpatialRaster(poly_cpp(
148
+ vect.cpp_vector,
149
+ rast.cpp_raster,
150
+ num_strata,
151
+ layer_name,
152
+ sql_query,
153
+ filename,
154
+ large_raster,
155
+ temp_dir,
156
+ driver_options_str
157
+ ))
158
+
159
+ #now that it's created, give the cpp raster object ownership of the temporary directory
160
+ rast.have_temp_dir = False
161
+ srast.cpp_raster.set_temp_dir(temp_dir)
162
+ srast.temp_dataset = filename == "" and large_raster
163
+ srast.filename = filename
164
+
165
+ return srast
166
+
@@ -0,0 +1,2 @@
1
+ from . import quantiles
2
+ from .quantiles import quantiles
@@ -0,0 +1,272 @@
1
+ # ******************************************************************************
2
+ #
3
+ # Project: sgs
4
+ # Purpose: stratification by user defined quantiles
5
+ # Author: Joseph Meyer
6
+ # Date: September, 2025
7
+ #
8
+ # ******************************************************************************
9
+
10
+ ##
11
+ # @defgroup user_quantiles quantiles
12
+ # @ingroup user_stratify
13
+
14
+ import os
15
+ import sys
16
+ import tempfile
17
+ import numpy as np
18
+ from sgspy.utils import SpatialRaster
19
+
20
+ sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
21
+ from _sgs import quantiles_cpp
22
+
23
+ GIGABYTE = 1073741824
24
+
25
+ ##
26
+ # @ingroup user_quantiles
27
+ # This function conducts stratification on the raster given by generating quantile
28
+ # probabilities according to the 'num_strata' argument given by the user.
29
+ #
30
+ # The quantiles may be defined as an integer, indicating the number of quantiles
31
+ # of equal size. Quantiles may also be defined as a list of probabilities between 0
32
+ # and 1. In the case of a raster with a single band, the quantiles may be passed directly
33
+ # to the num_strata argument as either type: int | list[float].
34
+ #
35
+ # In the case of a multi-band raster image, the specific bands can be specified by the index
36
+ # of a list containing an equal number of quantiles as bands (list[int | list[float]).
37
+ #
38
+ # If not all raster bands should be stratified, specific bands can be selected in
39
+ # the form of a dict where the key is the name of a raster band and the value is the
40
+ # quantiles (dict[str, int | list[float]).
41
+ #
42
+ # if the map parameter is given, an extra output band will be used which combines
43
+ # all stratifications from the bands used into an extra outpu band. A single
44
+ # value in the mapped output band corresponds to a combination a single combination
45
+ # of values from the previous bands.
46
+ #
47
+ # The thread_count parameter specifies the number of threads which this function
48
+ # will utilize the the case where the raster is large and may not fit in memory. If
49
+ # the full raster can fit in memory and does not need to be processed in blocks, this
50
+ # argument will be ignored. The default is 8 threads, although the optimal number
51
+ # will depend significantly on the hardware being used and may be more or less
52
+ # than 8.
53
+ #
54
+ # the driver_options parameter is used to specify creation options for the output
55
+ # raster. See options for the Gtiff driver here:
56
+ # https://gdal.org/en/stable/drivers/raster/gtiff.html#creation-options
57
+ # The keys in the driver_options dict must be strings, the values are converted to
58
+ # string. THe options must be valid for the driver corresponding to the filename,
59
+ # and if filename is not given they must be valid for the GTiff format, as that
60
+ # is the format used to store temporary raster files. Note that if this parameter
61
+ # is given, but filename is not and the raster fits entirely in memory, the
62
+ # driver_options parameter will be ignored.
63
+ #
64
+ # the eps parameter is used only if batch processing is used to calculate the quantiles
65
+ # for a raster. Quantile streaming algorithms cannot be perfectly accurate, as this
66
+ # would necessitate having the entire raster in memory at once. A good approximation
67
+ # can be made, and the error is controlled by this epsilon (eps) value.
68
+ # The Quantile streaming method is the method introduced by Zhang et al. and utilized by MKL:
69
+ # https://web.cs.ucla.edu/~weiwang/paper/SSDBM07_2.pdf
70
+ # https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-summary-statistics-notes/2021-1/computing-quantiles-with-vsl-ss-method-squants-zw.html
71
+ #
72
+ # Examples
73
+ # --------------------
74
+ # rast = sgspy.SpatialRaster('rast.tif') @n
75
+ # srast = sgspy.stratify.quantiles(rast, num_strata=5)
76
+ #
77
+ # rast = sgspy.SpatialRaster('rast.tif') @n
78
+ # srast = sgspy.stratify.quantiles(rast, num_strata=[.1, .2, .3, .5, .7], filename="srast.tif")
79
+ #
80
+ # rast = sgspy.SpatialRaster('multi_band_rast.tif') @n
81
+ # srast = sgspy.stratify.quantiles(rast, num_strata=[5, 5, [.5, .75]], map=True)
82
+ #
83
+ # rast = sgspy.SpatialRaster('multi_band_rast.tif') @n
84
+ # srast = sgspy.stratify.quantiles(rast, num_strata={'zq90': 5})
85
+ #
86
+ # Parameters
87
+ # --------------------
88
+ # rast : SpatialRaster @n
89
+ # raster data structure containing the raster to stratify @n @n
90
+ # num_strata : int | list[float] | list[int|list[float]] | dict[str,int|list[float]] @n
91
+ # specification of the quantiles to stratify @n @n
92
+ # map : bool @n
93
+ # whether to map the stratifiction of multiple raster bands onto a single band @n @n
94
+ # filename : str @n
95
+ # filename to write to or '' if no file should be written @n @n
96
+ # thread_count : int @n
97
+ # the number of threads to use when multithreading large images @n @n
98
+ # driver_options : dict[] @n
99
+ # the creation options as defined by GDAL which will be passed when creating output files @n @n
100
+ # eps : float @n
101
+ # the epsilon value, controlling the error of stream-processed quantiles @n @n
102
+ #
103
+ # Returns
104
+ # --------------------
105
+ # a SpatialRaster object containing stratified raster bands.
106
+ def quantiles(
107
+ rast: SpatialRaster,
108
+ num_strata: int | list[float] | list[int|list[float]] | dict[str,int|list[float]],
109
+ map: bool = False,
110
+ filename: str = '',
111
+ thread_count: int = 8,
112
+ driver_options: dict = None,
113
+ eps: float = .001):
114
+
115
+ MAX_STRATA_VAL = 2147483647 #maximum value stored within a 32-bit signed integer to ensure no overflow
116
+
117
+ if type(rast) is not SpatialRaster:
118
+ raise TypeError("'rast' parameter must be of type sgspy.SpatialRaster")
119
+
120
+ if type(num_strata) not in [int, list, dict]:
121
+ raise TypeError("'num_strata' parameter must be of type int, list, or dict.")
122
+
123
+ if type(map) is not bool:
124
+ raise TypeError("'map' parameter must be of type bool.")
125
+
126
+ if type(filename) is not str:
127
+ raise TypeError("'filename' parameter must be of type str.")
128
+
129
+ if type(thread_count) is not int:
130
+ raise TypeError("'thread_count' parameter must be of type int.")
131
+
132
+ if type(eps) is not float:
133
+ raise TypeError("'eps' parameter must be of type float.")
134
+
135
+ if rast.closed:
136
+ raise RuntimeError("the C++ object which the raster object wraps has been cleaned up and closed.")
137
+
138
+ if type(num_strata) is list and len(num_strata) < 1:
139
+ raise ValueError("num_strata list must contain at least one element")
140
+
141
+ probabilities_dict = {}
142
+ if type(num_strata) is int:
143
+ #error check number of raster bands
144
+ if rast.band_count != 1:
145
+ raise ValueError("num_strata int is for a single rast band, but the raster has {}".format(rast.band_count))
146
+
147
+ #add quantiles to probabilities_dict
148
+ inc = 1 / num_strata
149
+ probabilities_dict[0] = np.array(range(1, num_strata)) / num_strata
150
+
151
+ elif type(num_strata) is list and type(num_strata[0]) is float:
152
+ #error check number of raster bands
153
+ if rast.band_count != 1:
154
+ raise ValueError("num_strata list[float] type is for a single raster band, but the raster has {}".format(rast.band_count))
155
+
156
+ #error check list values
157
+ if min(num_strata) < 0:
158
+ raise ValueError("list[float] must not contain a value less than 0")
159
+ elif max(num_strata) > 1:
160
+ raise ValueError("list[float] must not contain a value greater than 1")
161
+
162
+ #add quantiles to probabilities_dict and ensure 1 and 0 are removed
163
+ probabilities_dict[0] = num_strata
164
+ if 0.0 in probabilities_dict[0]:
165
+ probabilities_dict[0].remove(0.0)
166
+ if 1.0 in probabilities_dict[0]:
167
+ probabilities_dict[0].remove(1.0)
168
+
169
+ elif type(num_strata) is list:
170
+ #error checking number of raster bands
171
+ if (len(num_strata)) != rast.band_count:
172
+ raise ValueError("number of lists in num_strata must be equal to the number of raster bands.")
173
+
174
+ #for each given num_strata, add it to probabilities_dict depending on type
175
+ for i in range(len(num_strata)):
176
+ if type(num_strata[i]) is int:
177
+ inc = 1 / num_strata[i]
178
+ probabilities_dict[i] = np.array(range(1, num_strata[i])) / num_strata[i]
179
+ else: #list of float
180
+ #for lists, error check max and min values
181
+ if min(num_strata[i]) < 0:
182
+ raise ValueError("list[float] must not contain value less than 0")
183
+ elif max(num_strata[i]) > 1:
184
+ raise ValueError("list[float] must not contain value greater than 1")
185
+ probabilities_dict[i] = num_strata[i]
186
+ if 0.0 in probabilities_dict[i]:
187
+ probabilities_dict[i].remove(0.0)
188
+ if 1.0 in probabilities_dict[i]:
189
+ probabilities_dict[i].remove(1.0)
190
+
191
+ else: #type dict
192
+ for key, val in num_strata.items():
193
+ if key not in rast.bands:
194
+ raise ValueError("probabilities dict key must be valid band name (see SpatialRaster.bands for list of names)")
195
+ else:
196
+ band_num = rast.band_name_dict[key]
197
+ if type(val) is int:
198
+ inc = 1 / val
199
+ probabilities_dict[band_num] = np.array(range(1, val)) / val
200
+ else: #list of float
201
+ #for lists, error check max and min values
202
+ if min(val) < 0:
203
+ raise ValueError("list[float] must not contain value less than 0")
204
+ elif max(val) > 1:
205
+ raise ValueError("list[float] must not contain value greater than 1")
206
+ probabilities_dict[band_num] = val
207
+ if 0.0 in probabilities_dict[band_num]:
208
+ probabilities_dict[band_num].remove(0.0)
209
+ if 1.0 in probabilities_dict[band_num]:
210
+ probabilities_dict[band_num].remove(1.0)
211
+
212
+ #error check max value for potential overflow error
213
+ max_mapped_strata = int(map)
214
+ for _, val in probabilities_dict.items():
215
+ strata_count = len(val) + 1
216
+ if strata_count > MAX_STRATA_VAL:
217
+ raise ValueError("one of the quantiles given will cause an integer overflow error because the max strata number is too large.")
218
+ max_mapped_strata = max_mapped_strata * strata_count
219
+
220
+ if max_mapped_strata > MAX_STRATA_VAL:
221
+ raise ValueError("the mapped strata will cause an overflow error because the max strata number is too large.")
222
+
223
+ if thread_count < 1:
224
+ raise ValueError("number of threads can't be less than 1.")
225
+
226
+ driver_options_str = {}
227
+ if driver_options:
228
+ for (key, val) in driver_options.items():
229
+ if type(key) is not str:
230
+ raise ValueError("the key for all key/value pairs in the driver_options dict must be a string.")
231
+ driver_options_str[key] = str(val)
232
+
233
+ large_raster = False
234
+ raster_size_bytes = 0
235
+ height = rast.height
236
+ width = rast.width
237
+ for key, _ in probabilities_dict.items():
238
+ pixel_size = rast.cpp_raster.get_raster_band_type_size(key)
239
+ band_size = height * width * pixel_size
240
+ raster_size_bytes += band_size
241
+ if band_size >= GIGABYTE:
242
+ large_raster = True
243
+ break
244
+
245
+ #if large_raster is true, the C++ function will process the raster in blocks
246
+ large_raster = large_raster or (raster_size_bytes > GIGABYTE * 4)
247
+
248
+ #make a temp directory which will be deleted if there is any problem when calling the cpp function
249
+ temp_dir = tempfile.mkdtemp()
250
+ rast.have_temp_dir = True
251
+ rast.temp_dir = temp_dir
252
+
253
+ #call stratify quantiles function
254
+ srast = SpatialRaster(quantiles_cpp(
255
+ rast.cpp_raster,
256
+ probabilities_dict,
257
+ map,
258
+ filename,
259
+ temp_dir,
260
+ large_raster,
261
+ thread_count,
262
+ driver_options_str,
263
+ eps
264
+ ))
265
+
266
+ #now that it's created, give the cpp raster object ownership of the temporary directory
267
+ rast.have_temp_dir = False
268
+ srast.cpp_raster.set_temp_dir(temp_dir)
269
+ srast.temp_dataset = filename == "" and large_raster
270
+ srast.filename = filename
271
+
272
+ return srast