sgspy 1.0.1__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgspy/__init__.py +82 -0
- sgspy/_sgs.cp313-win_amd64.lib +0 -0
- sgspy/_sgs.cp313-win_amd64.pyd +0 -0
- sgspy/calculate/__init__.py +18 -0
- sgspy/calculate/pca/__init__.py +2 -0
- sgspy/calculate/pca/pca.py +152 -0
- sgspy/calculate/representation/__init__.py +2 -0
- sgspy/calculate/representation/representation.py +3 -0
- sgspy/sample/__init__.py +30 -0
- sgspy/sample/ahels/__init__.py +2 -0
- sgspy/sample/ahels/ahels.py +3 -0
- sgspy/sample/clhs/__init__.py +2 -0
- sgspy/sample/clhs/clhs.py +198 -0
- sgspy/sample/nc/__init__.py +2 -0
- sgspy/sample/nc/nc.py +3 -0
- sgspy/sample/srs/__init__.py +2 -0
- sgspy/sample/srs/srs.py +224 -0
- sgspy/sample/strat/__init__.py +2 -0
- sgspy/sample/strat/strat.py +390 -0
- sgspy/sample/systematic/__init__.py +2 -0
- sgspy/sample/systematic/systematic.py +229 -0
- sgspy/stratify/__init__.py +27 -0
- sgspy/stratify/breaks/__init__.py +2 -0
- sgspy/stratify/breaks/breaks.py +218 -0
- sgspy/stratify/kmeans/__init__.py +2 -0
- sgspy/stratify/kmeans/kmeans.py +3 -0
- sgspy/stratify/map/__init__.py +2 -0
- sgspy/stratify/map/map_stratifications.py +240 -0
- sgspy/stratify/poly/__init__.py +2 -0
- sgspy/stratify/poly/poly.py +166 -0
- sgspy/stratify/quantiles/__init__.py +2 -0
- sgspy/stratify/quantiles/quantiles.py +272 -0
- sgspy/utils/__init__.py +18 -0
- sgspy/utils/plot.py +143 -0
- sgspy/utils/raster.py +602 -0
- sgspy/utils/vector.py +262 -0
- sgspy-1.0.1.data/data/sgspy/Lerc.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/aec.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/charset-1.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/freexl-1.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/gdal.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/geos.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/geos_c.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/geotiff.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/gif.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/hdf5.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/hdf5_cpp.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/hdf5_hl.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/hdf5_hl_cpp.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/iconv-2.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/jpeg62.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/json-c.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/legacy.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/libcrypto-3-x64.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/libcurl.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/libecpg.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/libecpg_compat.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/libexpat.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/liblzma.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/libpgtypes.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/libpng16.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/libpq.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/libsharpyuv.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/libssl-3-x64.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/libwebp.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/libwebpdecoder.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/libwebpdemux.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/libwebpmux.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/libxml2.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/lz4.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/minizip.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/netcdf.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/openjp2.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/pcre2-16.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/pcre2-32.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/pcre2-8.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/pcre2-posix.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/proj.db +0 -0
- sgspy-1.0.1.data/data/sgspy/proj_9.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/qhull_r.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/spatialite.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/sqlite3.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/szip.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/tiff.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/turbojpeg.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/uriparser.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/zlib1.dll +0 -0
- sgspy-1.0.1.data/data/sgspy/zstd.dll +0 -0
- sgspy-1.0.1.dist-info/METADATA +13 -0
- sgspy-1.0.1.dist-info/RECORD +91 -0
- sgspy-1.0.1.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
# ******************************************************************************
|
|
2
|
+
#
|
|
3
|
+
# Project: sgs
|
|
4
|
+
# Purpose: map mulitiple stratification rasters
|
|
5
|
+
# Author: Joseph Meyer
|
|
6
|
+
# Date: September, 2025
|
|
7
|
+
#
|
|
8
|
+
# ******************************************************************************
|
|
9
|
+
|
|
10
|
+
##
|
|
11
|
+
# @defgroup user_map map
|
|
12
|
+
# @ingroup user_stratify
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
import sys
|
|
16
|
+
import tempfile
|
|
17
|
+
from sgspy.utils import SpatialRaster
|
|
18
|
+
|
|
19
|
+
sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
|
|
20
|
+
from _sgs import map_cpp
|
|
21
|
+
|
|
22
|
+
GIGABYTE = 1073741824
|
|
23
|
+
|
|
24
|
+
##
|
|
25
|
+
# @ingroup user_map
|
|
26
|
+
# This function conducts mapping on existing stratifications.
|
|
27
|
+
#
|
|
28
|
+
# The pre-existing stratifications are passed in the form of a raster, band, and num_stratum.
|
|
29
|
+
# The bands argument specifies which bands within the raster should be used, the num_stratum
|
|
30
|
+
# argument specifies the number of stratum within one particular band.
|
|
31
|
+
#
|
|
32
|
+
# the arguments are passed in the form of a tuple, of which there can be any number.
|
|
33
|
+
# For example, both of the following are valid:
|
|
34
|
+
# - map((rast1, bands1, num_stratum1))
|
|
35
|
+
# - map((rast1, bands1, num_stratum1), (rast1, bands2, num_stratum2))
|
|
36
|
+
#
|
|
37
|
+
# the raster within the tuple MUST be of type sgs.utils.SpatialRaster.
|
|
38
|
+
# The bands argument MUST be:
|
|
39
|
+
# - an int, specifying a single band.
|
|
40
|
+
# - a str, specifying a single band.
|
|
41
|
+
# - a list of ints, specifying the indexes of bands.
|
|
42
|
+
# - a list of strings, specifying the names of bands.
|
|
43
|
+
#
|
|
44
|
+
# The num_stratum argument MUST be
|
|
45
|
+
# - an int, if bands is an int or string, specifiying the exact number of stratum in the
|
|
46
|
+
# selected band.
|
|
47
|
+
# - a list of ints of the same length of bands, specifying the exact number of stratum in
|
|
48
|
+
# each of the indexes specified by the bands list.
|
|
49
|
+
#
|
|
50
|
+
# the filename parameter specifies an output file name. Right now the only file format
|
|
51
|
+
# accepted is GTiff (.tiff).
|
|
52
|
+
#
|
|
53
|
+
# The thread_count parameter specifies the number of threads which this function will
|
|
54
|
+
# utilize in the case where the raster is large an may not fit in memory. If the full
|
|
55
|
+
# raster can fit in memory and does not need to be processed in blocks, this argument
|
|
56
|
+
# will be ignored. The default is 8 threads, although the optimal number will depend
|
|
57
|
+
# significantly on the hardware being used and may be more or less than 8.
|
|
58
|
+
#
|
|
59
|
+
# the driver_options parameter is used to specifiy creation options for the output
|
|
60
|
+
# raster, such as compression. See options fro GTiff driver here:
|
|
61
|
+
# https://gdal.org/en/stable/drivers/raster/gtiff.html#creation-options
|
|
62
|
+
# The keys in the driver_options dict must be strings, the values are converted to
|
|
63
|
+
# string. THe options must be valid for the driver corresponding to the filename,
|
|
64
|
+
# and if filename is not given they must be valid for the GTiff format, as that
|
|
65
|
+
# is the format used to store temporary raster files. Note that if this parameter
|
|
66
|
+
# is given, but filename is not and the raster fits entirely in memory, the
|
|
67
|
+
# driver_options parameter will be ignored.
|
|
68
|
+
#
|
|
69
|
+
# Examples
|
|
70
|
+
# --------------------
|
|
71
|
+
# rast = sgspy.SpatialRaster("rast.tif") @n
|
|
72
|
+
# breaks = sgspy.stratify.breaks(rast, breaks={'zq90': [3, 5, 11, 18], 'pzabove2]: [20, 40, 60, 80]}) @n
|
|
73
|
+
# quantiles = sgspy.stratify.quantiles(rast, num_strata={'zsd': 25}) @n
|
|
74
|
+
# srast = sgspy.stratify.map((breaks, ['strat_zq90', 'strat_pzabove2'], [5, 5]), (quantiles, 'strat_zsd', 25))
|
|
75
|
+
#
|
|
76
|
+
# rast = sgspy.SpatialRaster("rast.tif") @n
|
|
77
|
+
# inventory = sgspy.SpatialVector("inventory_polygons.shp") @n
|
|
78
|
+
# breaks = sgspy.stratify.breaks(rast, breaks={'zq90': [3, 5, 11, 18], 'pzabove2]: [20, 40, 60, 80]}) @n
|
|
79
|
+
# poly = sgspy.stratify.poly(rast, inventory, attribute="NUTRIENTS", layer_name="inventory_polygons", features=['poor', 'medium', 'rich']) @n
|
|
80
|
+
# srast = sgspy.stratify.map((breaks, [0, 1], [5, 5]), (poly, 0, 3), filename="mapped_srast.tif", driver_options={"COMPRESS", "LZW"})
|
|
81
|
+
#
|
|
82
|
+
# Parameters
|
|
83
|
+
# --------------------
|
|
84
|
+
# *args : tuple[SpatialRaster, int|list[int]|list[str], int|list[int]] @n
|
|
85
|
+
# tuples specifying raster bands and their number of stratifications @n @n
|
|
86
|
+
# filename : str @n
|
|
87
|
+
# filename to write to or '' if not file should be written @n @n
|
|
88
|
+
# thread_count : int @n
|
|
89
|
+
# the number of threads to use when multithreading large images @n @n
|
|
90
|
+
# driver_options : dict[str] @n
|
|
91
|
+
# the creation options as defined by GDAL which will be passed when creating output files @n @n
|
|
92
|
+
#
|
|
93
|
+
# Returns
|
|
94
|
+
# --------------------
|
|
95
|
+
# a SpatialRaster object containing a band of mapped stratifications from the input raster(s).
|
|
96
|
+
def map(*args: tuple[SpatialRaster, int|str|list[int]|list[str], int|list[int]],
|
|
97
|
+
filename: str = '',
|
|
98
|
+
thread_count: int = 8,
|
|
99
|
+
driver_options: dict = None):
|
|
100
|
+
|
|
101
|
+
MAX_STRATA_VAL = 2147483647 #maximum value stored within a 32-bit signed integer to ensure no overflow
|
|
102
|
+
|
|
103
|
+
if type(filename) is not str:
|
|
104
|
+
raise TypeError("'filename' parameter must be of type str.")
|
|
105
|
+
|
|
106
|
+
if type(thread_count) is not int:
|
|
107
|
+
raise TypeError("'thread_count' parameter must be of type int.")
|
|
108
|
+
|
|
109
|
+
if driver_options is not None and type(driver_options) is not dict:
|
|
110
|
+
raise TypeError("'driver_options' parameter, if given, must be of type dict.")
|
|
111
|
+
|
|
112
|
+
raster_list = []
|
|
113
|
+
band_lists = []
|
|
114
|
+
strata_lists = []
|
|
115
|
+
|
|
116
|
+
height = args[0][0].height
|
|
117
|
+
width = args[0][0].width
|
|
118
|
+
|
|
119
|
+
raster_size_bytes = 0
|
|
120
|
+
large_raster = False
|
|
121
|
+
for (raster, bands, num_stratum) in args:
|
|
122
|
+
if type(raster) is not SpatialRaster:
|
|
123
|
+
raise TypeError("first value in each tuple argument must be of type sgspy.SpatialRaster.")
|
|
124
|
+
|
|
125
|
+
if type(bands) not in [int, str, list]:
|
|
126
|
+
raise TypeError("second value in each tuple argument must be of type int, str, or list.")
|
|
127
|
+
|
|
128
|
+
if type(num_stratum) not in [int, list]:
|
|
129
|
+
raise TypeError("third value in each tuple argument must be of type int or list.")
|
|
130
|
+
|
|
131
|
+
if raster.closed:
|
|
132
|
+
raise RuntimeError("the C++ object which the raster object wraps has been cleaned up and closed.")
|
|
133
|
+
|
|
134
|
+
if raster.height != height:
|
|
135
|
+
raise ValueError("height is not the same across all rasters.")
|
|
136
|
+
|
|
137
|
+
if raster.width != width:
|
|
138
|
+
raise ValueError("width is not the same across all rasters.")
|
|
139
|
+
|
|
140
|
+
#error checking on bands and num_stratum lists
|
|
141
|
+
if type(bands) is list and type(num_stratum) is list and len(bands) != len(num_stratum):
|
|
142
|
+
raise ValueError("if bands and num_stratum arguments are lists, they must have the same length.")
|
|
143
|
+
|
|
144
|
+
if (type(bands) is list) ^ (type(num_stratum) is list): #XOR
|
|
145
|
+
raise TypeError("if one of bands and num_stratum is list, the other one must be a list of the same length.")
|
|
146
|
+
|
|
147
|
+
if type(bands) is list and len(bands) > raster.band_count:
|
|
148
|
+
raise ValueError("bands list cannot have more bands than raster contains.")
|
|
149
|
+
|
|
150
|
+
#helper function which checks int/str value and returns int band index
|
|
151
|
+
def get_band_int(band: int|str) -> int:
|
|
152
|
+
#if an int is passed, check and return
|
|
153
|
+
if type(band) is int:
|
|
154
|
+
if band not in range(raster.band_count):
|
|
155
|
+
raise ValueError("band {} is out of range.".format(band))
|
|
156
|
+
return band
|
|
157
|
+
|
|
158
|
+
#if a string is passed, check and return corresponding int
|
|
159
|
+
if band not in raster.bands:
|
|
160
|
+
msg = "band {} is not a band within the raster.".format(band)
|
|
161
|
+
raise ValueError(msg)
|
|
162
|
+
return raster.band_name_dict[band]
|
|
163
|
+
|
|
164
|
+
#error checking on band int/string values
|
|
165
|
+
band_list = []
|
|
166
|
+
stratum_list = []
|
|
167
|
+
if type(bands) is list:
|
|
168
|
+
for i in range(len(bands)):
|
|
169
|
+
band_int = get_band_int(bands[i])
|
|
170
|
+
band_list.append(band_int)
|
|
171
|
+
stratum_list.append(num_stratum[i])
|
|
172
|
+
|
|
173
|
+
#check for large raster
|
|
174
|
+
pixel_size = raster.cpp_raster.get_raster_band_type_size(band_int)
|
|
175
|
+
band_size = height * width * pixel_size
|
|
176
|
+
raster_size_bytes += band_size
|
|
177
|
+
if band_size > GIGABYTE:
|
|
178
|
+
large_raster = True
|
|
179
|
+
else:
|
|
180
|
+
band_int = get_band_int(bands)
|
|
181
|
+
band_list.append(band_int)
|
|
182
|
+
stratum_list.append(num_stratum)
|
|
183
|
+
|
|
184
|
+
#check for large raster
|
|
185
|
+
pixel_size = raster.cpp_raster.get_raster_band_type_size(band_int)
|
|
186
|
+
band_size = height * width * pixel_size
|
|
187
|
+
raster_size_bytes += band_size
|
|
188
|
+
if band_size > GIGABYTE:
|
|
189
|
+
large_raster == True
|
|
190
|
+
|
|
191
|
+
#prepare cpp function arguments
|
|
192
|
+
raster_list.append(raster.cpp_raster)
|
|
193
|
+
band_lists.append(band_list)
|
|
194
|
+
strata_lists.append(stratum_list)
|
|
195
|
+
|
|
196
|
+
#if any 1 band is larger than a gigabyte, or all bands together are larger than 4
|
|
197
|
+
#large_raster is defined to let the C++ function know to process in blocks rather
|
|
198
|
+
#than putting the entire raster into memory.
|
|
199
|
+
large_raster = large_raster or (raster_size_bytes > GIGABYTE * 4)
|
|
200
|
+
|
|
201
|
+
#error check max value for potential overflow error
|
|
202
|
+
max_mapped_strata = 1
|
|
203
|
+
for strata_list in strata_lists:
|
|
204
|
+
for strata_count in strata_list:
|
|
205
|
+
max_mapped_strata = max_mapped_strata * strata_count
|
|
206
|
+
if max_mapped_strata > MAX_STRATA_VAL:
|
|
207
|
+
raise ValueError("the mapped strata will cause an overflow error because the max strata number is too large.")
|
|
208
|
+
|
|
209
|
+
#emsire driver options keys are strings, and convert driver options vals to strings
|
|
210
|
+
driver_options_str = {}
|
|
211
|
+
if driver_options:
|
|
212
|
+
for (key, val) in driver_options.items():
|
|
213
|
+
if type(key) is not str:
|
|
214
|
+
raise ValueError("the key for all key/value pairs in teh driver_options dict must be a string")
|
|
215
|
+
driver_options_str[key] = str(val)
|
|
216
|
+
|
|
217
|
+
#make a temp directory which will be deleted if there is any problem when calling the cpp function
|
|
218
|
+
temp_dir = tempfile.mkdtemp()
|
|
219
|
+
args[0][0].have_temp_dir = True
|
|
220
|
+
args[0][0].temp_dir = temp_dir
|
|
221
|
+
|
|
222
|
+
#call cpp map function
|
|
223
|
+
srast = SpatialRaster(map_cpp(
|
|
224
|
+
raster_list,
|
|
225
|
+
band_lists,
|
|
226
|
+
strata_lists,
|
|
227
|
+
filename,
|
|
228
|
+
large_raster,
|
|
229
|
+
thread_count,
|
|
230
|
+
temp_dir,
|
|
231
|
+
driver_options_str
|
|
232
|
+
))
|
|
233
|
+
|
|
234
|
+
#now that it's created, give the cpp raster object ownership of the temporary directory
|
|
235
|
+
args[0][0].have_temp_dir = False
|
|
236
|
+
srast.cpp_raster.set_temp_dir(temp_dir)
|
|
237
|
+
srast.temp_dataset = filename == "" and large_raster
|
|
238
|
+
srast.filename = filename
|
|
239
|
+
|
|
240
|
+
return srast
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# ******************************************************************************
|
|
2
|
+
#
|
|
3
|
+
# Project: sgs
|
|
4
|
+
# Purpose: stratification using polygons
|
|
5
|
+
# Author: Joseph Meyer
|
|
6
|
+
# Date: June, 2025
|
|
7
|
+
#
|
|
8
|
+
# ******************************************************************************
|
|
9
|
+
|
|
10
|
+
##
|
|
11
|
+
# @defgroup user_poly poly
|
|
12
|
+
# @ingroup user_stratify
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
import sys
|
|
16
|
+
import tempfile
|
|
17
|
+
|
|
18
|
+
from sgspy.utils import (
|
|
19
|
+
SpatialRaster,
|
|
20
|
+
SpatialVector,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
|
|
24
|
+
from _sgs import poly_cpp
|
|
25
|
+
|
|
26
|
+
GIGABYTE = 1073741824
|
|
27
|
+
|
|
28
|
+
##
|
|
29
|
+
# @ingroup user_poly
|
|
30
|
+
# This function conducts stratification on a vector dataset by rasterizing a polygon
|
|
31
|
+
# layer, and using its attribute values to determine stratifications.
|
|
32
|
+
#
|
|
33
|
+
# the layer_name parameter is the layer to be rasterized, and the attribute
|
|
34
|
+
# is the attribute within the layer to check. The features parameter specifies
|
|
35
|
+
# the which feature value corresponds to which stratification.
|
|
36
|
+
#
|
|
37
|
+
# The features parameter is a list containing strings and lists of strings.
|
|
38
|
+
# The index within this list determines the stratification value. For example:
|
|
39
|
+
#
|
|
40
|
+
# features = ["low", "medium", "high"] @n
|
|
41
|
+
# would result in 3 stratifications (0, 1, 2) where 'low' would correspond
|
|
42
|
+
# to stratification 0, medium to 1, and hight to 2.
|
|
43
|
+
#
|
|
44
|
+
# features = ["low", ["medium", "high"]] @n
|
|
45
|
+
# would result in 2 stratifications (0, 1) where 'low' would correspond
|
|
46
|
+
# to stratification 0, and both medium and high to stratification 1.
|
|
47
|
+
#
|
|
48
|
+
# Examples
|
|
49
|
+
# --------------------
|
|
50
|
+
# rast = sgspy.SpatialRaster('rast.tif') @n
|
|
51
|
+
# vect = sgspy.SpatialVector('inventory_polygons.shp') @n
|
|
52
|
+
# srast = sgspy.stratify.poly(rast, vect, attribute='NUTRIENTS', layer_name='inventory_polygons', features=['poor', 'medium', 'rich'])
|
|
53
|
+
#
|
|
54
|
+
# rast = sgspy.SpatialRaster('rast.tif') @n
|
|
55
|
+
# vect = sgspy.SpatialVector('inventory_polygons.shp') @n
|
|
56
|
+
# srast = sgspy.stratify.poly(rast, vect, attribute='NUTRIENTS', layer_name='inventory_polygons', 'features=['poor', ['medium', 'rich']], filename='nutrient_stratification.shp')
|
|
57
|
+
#
|
|
58
|
+
# Parameters
|
|
59
|
+
# --------------------
|
|
60
|
+
# rast : SpatialRaster @n
|
|
61
|
+
# raster data structure which will determine height, width, geotransform, and projection @n @n
|
|
62
|
+
# vect : SpatialVector @n
|
|
63
|
+
# the vector of polygons to stratify @n @n
|
|
64
|
+
# layer_name : str @n
|
|
65
|
+
# the layer in the vector to be stratified @n @n
|
|
66
|
+
# attribute : str @n
|
|
67
|
+
# the attribute in the layer to be stratified @n @n
|
|
68
|
+
# features : list[str|list[str]] @n
|
|
69
|
+
# the stratification values of each feature value, represented as the index in the list @n @n
|
|
70
|
+
# filename : str @n
|
|
71
|
+
# the output filename to write to, if desired @n @n
|
|
72
|
+
#
|
|
73
|
+
# Returns
|
|
74
|
+
# --------------------
|
|
75
|
+
# a SpatialRaster object containing the rasterized polygon.
|
|
76
|
+
def poly(
|
|
77
|
+
rast: SpatialRaster,
|
|
78
|
+
vect: SpatialVector,
|
|
79
|
+
layer_name: str,
|
|
80
|
+
attribute: str,
|
|
81
|
+
features: list[str|list[str]],
|
|
82
|
+
filename:str = '',
|
|
83
|
+
driver_options: dict = None):
|
|
84
|
+
|
|
85
|
+
MAX_STRATA_VAL = 2147483647 #maximum value stored within a 32-bit signed integer to ensure no overflow
|
|
86
|
+
|
|
87
|
+
if type(rast) is not SpatialRaster:
|
|
88
|
+
raise TypeError("'rast' parameter must be of type sgspy.SpatialRaster")
|
|
89
|
+
|
|
90
|
+
if type(vect) is not SpatialVector:
|
|
91
|
+
raise TypeError("'vect' parameter must be of type sgspy.SpatialVector")
|
|
92
|
+
|
|
93
|
+
if type(layer_name) is not str:
|
|
94
|
+
raise TypeError("'layer_name' parameter must be of type str.")
|
|
95
|
+
|
|
96
|
+
if type(attribute) is not str:
|
|
97
|
+
raise TypeError("'attribute' parameter must be of type str.")
|
|
98
|
+
|
|
99
|
+
if type(features) is not list:
|
|
100
|
+
raise TypeError("'features' parameter must be of type list.")
|
|
101
|
+
|
|
102
|
+
if type(filename) is not str:
|
|
103
|
+
raise TypeError("'filename' parameter must be of type str.")
|
|
104
|
+
|
|
105
|
+
if driver_options is not None and type(driver_options) is not dict:
|
|
106
|
+
raise TypeError("'driver_options' parameter, if givne, must be of type dict.")
|
|
107
|
+
|
|
108
|
+
if rast.closed:
|
|
109
|
+
raise RuntimeError("the C++ object which the rast object wraps has been cleaned up and closed.")
|
|
110
|
+
|
|
111
|
+
cases = ""
|
|
112
|
+
where_entries = []
|
|
113
|
+
num_strata = len(features)
|
|
114
|
+
|
|
115
|
+
if num_strata >= MAX_STRATA_VAL:
|
|
116
|
+
raise ValueError("the number of features (and resulting max strata) will cause an overflow error because the max strata number is too large.")
|
|
117
|
+
|
|
118
|
+
#generate query cases and where clause using features and attribute
|
|
119
|
+
for i in range(len(features)):
|
|
120
|
+
if type(features[i]) is not list:
|
|
121
|
+
cases += "WHEN '{}' THEN {} ".format(str(features[i]), i)
|
|
122
|
+
where_entries.append("{}='{}'".format(attribute, str(features[i])))
|
|
123
|
+
else:
|
|
124
|
+
for j in range(len(features[i])):
|
|
125
|
+
cases += "WHEN '{}' THEN {} ".format(str(features[i][j]), i)
|
|
126
|
+
where_entries.append("{}='{}'".format(attribute, str(features[i][j])))
|
|
127
|
+
|
|
128
|
+
where_clause = " OR ".join(where_entries)
|
|
129
|
+
|
|
130
|
+
#generate SQL query
|
|
131
|
+
sql_query = f"""SELECT CASE {attribute} {cases}ELSE NULL END AS strata, {layer_name}.* FROM {layer_name} WHERE {where_clause}"""
|
|
132
|
+
|
|
133
|
+
driver_options_str = {}
|
|
134
|
+
if driver_options:
|
|
135
|
+
for (key, val) in driver_options.items():
|
|
136
|
+
if type(key) is not str:
|
|
137
|
+
raise ValueError("the key for al key/value pairs in teh driver_options dict must be a string.")
|
|
138
|
+
driver_options_str[key] = str(val)
|
|
139
|
+
|
|
140
|
+
large_raster = rast.height * rast.width > GIGABYTE
|
|
141
|
+
|
|
142
|
+
#make temp directory which will be deleted if there is any problem when calling the cpp function
|
|
143
|
+
temp_dir = tempfile.mkdtemp()
|
|
144
|
+
rast.have_temp_dir = True
|
|
145
|
+
rast.temp_dir = temp_dir
|
|
146
|
+
|
|
147
|
+
srast = SpatialRaster(poly_cpp(
|
|
148
|
+
vect.cpp_vector,
|
|
149
|
+
rast.cpp_raster,
|
|
150
|
+
num_strata,
|
|
151
|
+
layer_name,
|
|
152
|
+
sql_query,
|
|
153
|
+
filename,
|
|
154
|
+
large_raster,
|
|
155
|
+
temp_dir,
|
|
156
|
+
driver_options_str
|
|
157
|
+
))
|
|
158
|
+
|
|
159
|
+
#now that it's created, give the cpp raster object ownership of the temporary directory
|
|
160
|
+
rast.have_temp_dir = False
|
|
161
|
+
srast.cpp_raster.set_temp_dir(temp_dir)
|
|
162
|
+
srast.temp_dataset = filename == "" and large_raster
|
|
163
|
+
srast.filename = filename
|
|
164
|
+
|
|
165
|
+
return srast
|
|
166
|
+
|
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
# ******************************************************************************
|
|
2
|
+
#
|
|
3
|
+
# Project: sgs
|
|
4
|
+
# Purpose: stratification by user defined quantiles
|
|
5
|
+
# Author: Joseph Meyer
|
|
6
|
+
# Date: September, 2025
|
|
7
|
+
#
|
|
8
|
+
# ******************************************************************************
|
|
9
|
+
|
|
10
|
+
##
|
|
11
|
+
# @defgroup user_quantiles quantiles
|
|
12
|
+
# @ingroup user_stratify
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
import sys
|
|
16
|
+
import tempfile
|
|
17
|
+
import numpy as np
|
|
18
|
+
from sgspy.utils import SpatialRaster
|
|
19
|
+
|
|
20
|
+
sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
|
|
21
|
+
from _sgs import quantiles_cpp
|
|
22
|
+
|
|
23
|
+
GIGABYTE = 1073741824
|
|
24
|
+
|
|
25
|
+
##
|
|
26
|
+
# @ingroup user_quantiles
|
|
27
|
+
# This function conducts stratification on the raster given by generating quantile
|
|
28
|
+
# probabilities according to the 'num_strata' argument given by the user.
|
|
29
|
+
#
|
|
30
|
+
# The quantiles may be defined as an integer, indicating the number of quantiles
|
|
31
|
+
# of equal size. Quantiles may also be defined as a list of probabilities between 0
|
|
32
|
+
# and 1. In the case of a raster with a single band, the quantiles may be passed directly
|
|
33
|
+
# to the num_strata argument as either type: int | list[float].
|
|
34
|
+
#
|
|
35
|
+
# In the case of a multi-band raster image, the specific bands can be specified by the index
|
|
36
|
+
# of a list containing an equal number of quantiles as bands (list[int | list[float]).
|
|
37
|
+
#
|
|
38
|
+
# If not all raster bands should be stratified, specific bands can be selected in
|
|
39
|
+
# the form of a dict where the key is the name of a raster band and the value is the
|
|
40
|
+
# quantiles (dict[str, int | list[float]).
|
|
41
|
+
#
|
|
42
|
+
# if the map parameter is given, an extra output band will be used which combines
|
|
43
|
+
# all stratifications from the bands used into an extra outpu band. A single
|
|
44
|
+
# value in the mapped output band corresponds to a combination a single combination
|
|
45
|
+
# of values from the previous bands.
|
|
46
|
+
#
|
|
47
|
+
# The thread_count parameter specifies the number of threads which this function
|
|
48
|
+
# will utilize the the case where the raster is large and may not fit in memory. If
|
|
49
|
+
# the full raster can fit in memory and does not need to be processed in blocks, this
|
|
50
|
+
# argument will be ignored. The default is 8 threads, although the optimal number
|
|
51
|
+
# will depend significantly on the hardware being used and may be more or less
|
|
52
|
+
# than 8.
|
|
53
|
+
#
|
|
54
|
+
# the driver_options parameter is used to specify creation options for the output
|
|
55
|
+
# raster. See options for the Gtiff driver here:
|
|
56
|
+
# https://gdal.org/en/stable/drivers/raster/gtiff.html#creation-options
|
|
57
|
+
# The keys in the driver_options dict must be strings, the values are converted to
|
|
58
|
+
# string. THe options must be valid for the driver corresponding to the filename,
|
|
59
|
+
# and if filename is not given they must be valid for the GTiff format, as that
|
|
60
|
+
# is the format used to store temporary raster files. Note that if this parameter
|
|
61
|
+
# is given, but filename is not and the raster fits entirely in memory, the
|
|
62
|
+
# driver_options parameter will be ignored.
|
|
63
|
+
#
|
|
64
|
+
# the eps parameter is used only if batch processing is used to calculate the quantiles
|
|
65
|
+
# for a raster. Quantile streaming algorithms cannot be perfectly accurate, as this
|
|
66
|
+
# would necessitate having the entire raster in memory at once. A good approximation
|
|
67
|
+
# can be made, and the error is controlled by this epsilon (eps) value.
|
|
68
|
+
# The Quantile streaming method is the method introduced by Zhang et al. and utilized by MKL:
|
|
69
|
+
# https://web.cs.ucla.edu/~weiwang/paper/SSDBM07_2.pdf
|
|
70
|
+
# https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-summary-statistics-notes/2021-1/computing-quantiles-with-vsl-ss-method-squants-zw.html
|
|
71
|
+
#
|
|
72
|
+
# Examples
|
|
73
|
+
# --------------------
|
|
74
|
+
# rast = sgspy.SpatialRaster('rast.tif') @n
|
|
75
|
+
# srast = sgspy.stratify.quantiles(rast, num_strata=5)
|
|
76
|
+
#
|
|
77
|
+
# rast = sgspy.SpatialRaster('rast.tif') @n
|
|
78
|
+
# srast = sgspy.stratify.quantiles(rast, num_strata=[.1, .2, .3, .5, .7], filename="srast.tif")
|
|
79
|
+
#
|
|
80
|
+
# rast = sgspy.SpatialRaster('multi_band_rast.tif') @n
|
|
81
|
+
# srast = sgspy.stratify.quantiles(rast, num_strata=[5, 5, [.5, .75]], map=True)
|
|
82
|
+
#
|
|
83
|
+
# rast = sgspy.SpatialRaster('multi_band_rast.tif') @n
|
|
84
|
+
# srast = sgspy.stratify.quantiles(rast, num_strata={'zq90': 5})
|
|
85
|
+
#
|
|
86
|
+
# Parameters
|
|
87
|
+
# --------------------
|
|
88
|
+
# rast : SpatialRaster @n
|
|
89
|
+
# raster data structure containing the raster to stratify @n @n
|
|
90
|
+
# num_strata : int | list[float] | list[int|list[float]] | dict[str,int|list[float]] @n
|
|
91
|
+
# specification of the quantiles to stratify @n @n
|
|
92
|
+
# map : bool @n
|
|
93
|
+
# whether to map the stratifiction of multiple raster bands onto a single band @n @n
|
|
94
|
+
# filename : str @n
|
|
95
|
+
# filename to write to or '' if no file should be written @n @n
|
|
96
|
+
# thread_count : int @n
|
|
97
|
+
# the number of threads to use when multithreading large images @n @n
|
|
98
|
+
# driver_options : dict[] @n
|
|
99
|
+
# the creation options as defined by GDAL which will be passed when creating output files @n @n
|
|
100
|
+
# eps : float @n
|
|
101
|
+
# the epsilon value, controlling the error of stream-processed quantiles @n @n
|
|
102
|
+
#
|
|
103
|
+
# Returns
|
|
104
|
+
# --------------------
|
|
105
|
+
# a SpatialRaster object containing stratified raster bands.
|
|
106
|
+
def quantiles(
|
|
107
|
+
rast: SpatialRaster,
|
|
108
|
+
num_strata: int | list[float] | list[int|list[float]] | dict[str,int|list[float]],
|
|
109
|
+
map: bool = False,
|
|
110
|
+
filename: str = '',
|
|
111
|
+
thread_count: int = 8,
|
|
112
|
+
driver_options: dict = None,
|
|
113
|
+
eps: float = .001):
|
|
114
|
+
|
|
115
|
+
MAX_STRATA_VAL = 2147483647 #maximum value stored within a 32-bit signed integer to ensure no overflow
|
|
116
|
+
|
|
117
|
+
if type(rast) is not SpatialRaster:
|
|
118
|
+
raise TypeError("'rast' parameter must be of type sgspy.SpatialRaster")
|
|
119
|
+
|
|
120
|
+
if type(num_strata) not in [int, list, dict]:
|
|
121
|
+
raise TypeError("'num_strata' parameter must be of type int, list, or dict.")
|
|
122
|
+
|
|
123
|
+
if type(map) is not bool:
|
|
124
|
+
raise TypeError("'map' parameter must be of type bool.")
|
|
125
|
+
|
|
126
|
+
if type(filename) is not str:
|
|
127
|
+
raise TypeError("'filename' parameter must be of type str.")
|
|
128
|
+
|
|
129
|
+
if type(thread_count) is not int:
|
|
130
|
+
raise TypeError("'thread_count' parameter must be of type int.")
|
|
131
|
+
|
|
132
|
+
if type(eps) is not float:
|
|
133
|
+
raise TypeError("'eps' parameter must be of type float.")
|
|
134
|
+
|
|
135
|
+
if rast.closed:
|
|
136
|
+
raise RuntimeError("the C++ object which the raster object wraps has been cleaned up and closed.")
|
|
137
|
+
|
|
138
|
+
if type(num_strata) is list and len(num_strata) < 1:
|
|
139
|
+
raise ValueError("num_strata list must contain at least one element")
|
|
140
|
+
|
|
141
|
+
probabilities_dict = {}
|
|
142
|
+
if type(num_strata) is int:
|
|
143
|
+
#error check number of raster bands
|
|
144
|
+
if rast.band_count != 1:
|
|
145
|
+
raise ValueError("num_strata int is for a single rast band, but the raster has {}".format(rast.band_count))
|
|
146
|
+
|
|
147
|
+
#add quantiles to probabilities_dict
|
|
148
|
+
inc = 1 / num_strata
|
|
149
|
+
probabilities_dict[0] = np.array(range(1, num_strata)) / num_strata
|
|
150
|
+
|
|
151
|
+
elif type(num_strata) is list and type(num_strata[0]) is float:
|
|
152
|
+
#error check number of raster bands
|
|
153
|
+
if rast.band_count != 1:
|
|
154
|
+
raise ValueError("num_strata list[float] type is for a single raster band, but the raster has {}".format(rast.band_count))
|
|
155
|
+
|
|
156
|
+
#error check list values
|
|
157
|
+
if min(num_strata) < 0:
|
|
158
|
+
raise ValueError("list[float] must not contain a value less than 0")
|
|
159
|
+
elif max(num_strata) > 1:
|
|
160
|
+
raise ValueError("list[float] must not contain a value greater than 1")
|
|
161
|
+
|
|
162
|
+
#add quantiles to probabilities_dict and ensure 1 and 0 are removed
|
|
163
|
+
probabilities_dict[0] = num_strata
|
|
164
|
+
if 0.0 in probabilities_dict[0]:
|
|
165
|
+
probabilities_dict[0].remove(0.0)
|
|
166
|
+
if 1.0 in probabilities_dict[0]:
|
|
167
|
+
probabilities_dict[0].remove(1.0)
|
|
168
|
+
|
|
169
|
+
elif type(num_strata) is list:
|
|
170
|
+
#error checking number of raster bands
|
|
171
|
+
if (len(num_strata)) != rast.band_count:
|
|
172
|
+
raise ValueError("number of lists in num_strata must be equal to the number of raster bands.")
|
|
173
|
+
|
|
174
|
+
#for each given num_strata, add it to probabilities_dict depending on type
|
|
175
|
+
for i in range(len(num_strata)):
|
|
176
|
+
if type(num_strata[i]) is int:
|
|
177
|
+
inc = 1 / num_strata[i]
|
|
178
|
+
probabilities_dict[i] = np.array(range(1, num_strata[i])) / num_strata[i]
|
|
179
|
+
else: #list of float
|
|
180
|
+
#for lists, error check max and min values
|
|
181
|
+
if min(num_strata[i]) < 0:
|
|
182
|
+
raise ValueError("list[float] must not contain value less than 0")
|
|
183
|
+
elif max(num_strata[i]) > 1:
|
|
184
|
+
raise ValueError("list[float] must not contain value greater than 1")
|
|
185
|
+
probabilities_dict[i] = num_strata[i]
|
|
186
|
+
if 0.0 in probabilities_dict[i]:
|
|
187
|
+
probabilities_dict[i].remove(0.0)
|
|
188
|
+
if 1.0 in probabilities_dict[i]:
|
|
189
|
+
probabilities_dict[i].remove(1.0)
|
|
190
|
+
|
|
191
|
+
else: #type dict
|
|
192
|
+
for key, val in num_strata.items():
|
|
193
|
+
if key not in rast.bands:
|
|
194
|
+
raise ValueError("probabilities dict key must be valid band name (see SpatialRaster.bands for list of names)")
|
|
195
|
+
else:
|
|
196
|
+
band_num = rast.band_name_dict[key]
|
|
197
|
+
if type(val) is int:
|
|
198
|
+
inc = 1 / val
|
|
199
|
+
probabilities_dict[band_num] = np.array(range(1, val)) / val
|
|
200
|
+
else: #list of float
|
|
201
|
+
#for lists, error check max and min values
|
|
202
|
+
if min(val) < 0:
|
|
203
|
+
raise ValueError("list[float] must not contain value less than 0")
|
|
204
|
+
elif max(val) > 1:
|
|
205
|
+
raise ValueError("list[float] must not contain value greater than 1")
|
|
206
|
+
probabilities_dict[band_num] = val
|
|
207
|
+
if 0.0 in probabilities_dict[band_num]:
|
|
208
|
+
probabilities_dict[band_num].remove(0.0)
|
|
209
|
+
if 1.0 in probabilities_dict[band_num]:
|
|
210
|
+
probabilities_dict[band_num].remove(1.0)
|
|
211
|
+
|
|
212
|
+
#error check max value for potential overflow error
|
|
213
|
+
max_mapped_strata = int(map)
|
|
214
|
+
for _, val in probabilities_dict.items():
|
|
215
|
+
strata_count = len(val) + 1
|
|
216
|
+
if strata_count > MAX_STRATA_VAL:
|
|
217
|
+
raise ValueError("one of the quantiles given will cause an integer overflow error because the max strata number is too large.")
|
|
218
|
+
max_mapped_strata = max_mapped_strata * strata_count
|
|
219
|
+
|
|
220
|
+
if max_mapped_strata > MAX_STRATA_VAL:
|
|
221
|
+
raise ValueError("the mapped strata will cause an overflow error because the max strata number is too large.")
|
|
222
|
+
|
|
223
|
+
if thread_count < 1:
|
|
224
|
+
raise ValueError("number of threads can't be less than 1.")
|
|
225
|
+
|
|
226
|
+
driver_options_str = {}
|
|
227
|
+
if driver_options:
|
|
228
|
+
for (key, val) in driver_options.items():
|
|
229
|
+
if type(key) is not str:
|
|
230
|
+
raise ValueError("the key for all key/value pairs in the driver_options dict must be a string.")
|
|
231
|
+
driver_options_str[key] = str(val)
|
|
232
|
+
|
|
233
|
+
large_raster = False
|
|
234
|
+
raster_size_bytes = 0
|
|
235
|
+
height = rast.height
|
|
236
|
+
width = rast.width
|
|
237
|
+
for key, _ in probabilities_dict.items():
|
|
238
|
+
pixel_size = rast.cpp_raster.get_raster_band_type_size(key)
|
|
239
|
+
band_size = height * width * pixel_size
|
|
240
|
+
raster_size_bytes += band_size
|
|
241
|
+
if band_size >= GIGABYTE:
|
|
242
|
+
large_raster = True
|
|
243
|
+
break
|
|
244
|
+
|
|
245
|
+
#if large_raster is true, the C++ function will process the raster in blocks
|
|
246
|
+
large_raster = large_raster or (raster_size_bytes > GIGABYTE * 4)
|
|
247
|
+
|
|
248
|
+
#make a temp directory which will be deleted if there is any problem when calling the cpp function
|
|
249
|
+
temp_dir = tempfile.mkdtemp()
|
|
250
|
+
rast.have_temp_dir = True
|
|
251
|
+
rast.temp_dir = temp_dir
|
|
252
|
+
|
|
253
|
+
#call stratify quantiles function
|
|
254
|
+
srast = SpatialRaster(quantiles_cpp(
|
|
255
|
+
rast.cpp_raster,
|
|
256
|
+
probabilities_dict,
|
|
257
|
+
map,
|
|
258
|
+
filename,
|
|
259
|
+
temp_dir,
|
|
260
|
+
large_raster,
|
|
261
|
+
thread_count,
|
|
262
|
+
driver_options_str,
|
|
263
|
+
eps
|
|
264
|
+
))
|
|
265
|
+
|
|
266
|
+
#now that it's created, give the cpp raster object ownership of the temporary directory
|
|
267
|
+
rast.have_temp_dir = False
|
|
268
|
+
srast.cpp_raster.set_temp_dir(temp_dir)
|
|
269
|
+
srast.temp_dataset = filename == "" and large_raster
|
|
270
|
+
srast.filename = filename
|
|
271
|
+
|
|
272
|
+
return srast
|